49 files changed, 28429 insertions, 0 deletions
diff --git a/third_party/rust/cranelift-codegen-meta/src/cdsl/ast.rs b/third_party/rust/cranelift-codegen-meta/src/cdsl/ast.rs
new file mode 100644
index 0000000000..82cdbad762
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/cdsl/ast.rs
@@ -0,0 +1,753 @@
+use crate::cdsl::instructions::{InstSpec, Instruction, InstructionPredicate};
+use crate::cdsl::operands::{OperandKind, OperandKindFields};
+use crate::cdsl::types::ValueType;
+use crate::cdsl::typevar::{TypeSetBuilder, TypeVar};
+
+use cranelift_entity::{entity_impl, PrimaryMap, SparseMap, SparseMapValue};
+
+use std::fmt;
+use std::iter::IntoIterator;
+
+pub(crate) enum Expr {
+    Var(VarIndex),
+    Literal(Literal),
+}
+
+impl Expr {
+    pub fn maybe_literal(&self) -> Option<&Literal> {
+        match &self {
+            Expr::Literal(lit) => Some(lit),
+            _ => None,
+        }
+    }
+
+    pub fn maybe_var(&self) -> Option<VarIndex> {
+        if let Expr::Var(var) = &self {
+            Some(*var)
+        } else {
+            None
+        }
+    }
+
+    pub fn unwrap_var(&self) -> VarIndex {
+        self.maybe_var()
+            .expect("tried to unwrap a non-Var content in Expr::unwrap_var")
+    }
+
+    pub fn to_rust_code(&self, var_pool: &VarPool) -> String {
+        match self {
+            Expr::Var(var_index) => var_pool.get(*var_index).to_rust_code(),
+            Expr::Literal(literal) => literal.to_rust_code(),
+        }
+    }
+}
+
+/// An AST definition associates a set of variables with the values produced by an expression.
+pub(crate) struct Def {
+    pub apply: Apply,
+    pub defined_vars: Vec<VarIndex>,
+}
+
+impl Def {
+    pub fn to_comment_string(&self, var_pool: &VarPool) -> String {
+        let results = self
+            .defined_vars
+            .iter()
+            .map(|&x| var_pool.get(x).name.as_str())
+            .collect::<Vec<_>>();
+
+        let results = if results.len() == 1 {
+            results[0].to_string()
+        } else {
+            format!("({})", results.join(", "))
+        };
+
+        format!("{} := {}", results, self.apply.to_comment_string(var_pool))
+    }
+}
+
+pub(crate) struct DefPool {
+    pool: PrimaryMap<DefIndex, Def>,
+}
+
+impl DefPool {
+    pub fn new() -> Self {
+        Self {
+            pool: PrimaryMap::new(),
+        }
+    }
+    pub fn get(&self, index: DefIndex) -> &Def {
+        self.pool.get(index).unwrap()
+    }
+    pub fn next_index(&self) -> DefIndex {
+        self.pool.next_key()
+    }
+    pub fn create_inst(&mut self, apply: Apply, defined_vars: Vec<VarIndex>) -> DefIndex {
+        self.pool.push(Def {
+            apply,
+            defined_vars,
+        })
+    }
+}
+
+#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
+pub(crate) struct DefIndex(u32);
+entity_impl!(DefIndex);
+
+/// A definition which would lead to generate a block creation.
+#[derive(Clone)]
+pub(crate) struct Block {
+    /// Instruction index after which the block entry is set.
+    pub location: DefIndex,
+    /// Variable holding the new created block.
+    pub name: VarIndex,
+}
+
+pub(crate) struct BlockPool {
+    pool: SparseMap<DefIndex, Block>,
+}
+
+impl SparseMapValue<DefIndex> for Block {
+    fn key(&self) -> DefIndex {
+        self.location
+    }
+}
+
+impl BlockPool {
+    pub fn new() -> Self {
+        Self {
+            pool: SparseMap::new(),
+        }
+    }
+    pub fn get(&self, index: DefIndex) -> Option<&Block> {
+        self.pool.get(index)
+    }
+    pub fn create_block(&mut self, name: VarIndex, location: DefIndex) {
+        if self.pool.contains_key(location) {
+            panic!("Attempt to insert 2 blocks after the same instruction")
+        }
+        self.pool.insert(Block { location, name });
+    }
+    pub fn is_empty(&self) -> bool {
+        self.pool.is_empty()
+    }
+}
+
+// Implement IntoIterator such that we can iterate over blocks which are in the block pool.
+impl<'a> IntoIterator for &'a BlockPool {
+    type Item = <&'a SparseMap<DefIndex, Block> as IntoIterator>::Item;
+    type IntoIter = <&'a SparseMap<DefIndex, Block> as IntoIterator>::IntoIter;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.pool.into_iter()
+    }
+}
+
+#[derive(Clone, Debug)]
+pub(crate) enum Literal {
+    /// A value of an enumerated immediate operand.
+    ///
+    /// Some immediate operand kinds like `intcc` and `floatcc` have an enumerated range of values
+    /// corresponding to a Rust enum type. An `Enumerator` object is an AST leaf node representing one
+    /// of the values.
+    Enumerator {
+        rust_type: &'static str,
+        value: &'static str,
+    },
+
+    /// A bitwise value of an immediate operand, used for bitwise exact floating point constants.
+    Bits { rust_type: &'static str, value: u64 },
+
+    /// A value of an integer immediate operand.
+    Int(i64),
+
+    /// A empty list of variable set of arguments.
+    EmptyVarArgs,
+}
+
+impl Literal {
+    pub fn enumerator_for(kind: &OperandKind, value: &'static str) -> Self {
+        let value = match &kind.fields {
+            OperandKindFields::ImmEnum(values) => values.get(value).unwrap_or_else(|| {
+                panic!(
+                    "nonexistent value '{}' in enumeration '{}'",
+                    value, kind.rust_type
+                )
+            }),
+            _ => panic!("enumerator is for enum values"),
+        };
+        Literal::Enumerator {
+            rust_type: kind.rust_type,
+            value,
+        }
+    }
+
+    pub fn bits(kind: &OperandKind, bits: u64) -> Self {
+        match kind.fields {
+            OperandKindFields::ImmValue => {}
+            _ => panic!("bits_of is for immediate scalar types"),
+        }
+        Literal::Bits {
+            rust_type: kind.rust_type,
+            value: bits,
+        }
+    }
+
+    pub fn constant(kind: &OperandKind, value: i64) -> Self {
+        match kind.fields {
+            OperandKindFields::ImmValue => {}
+            _ => panic!("constant is for immediate scalar types"),
+        }
+        Literal::Int(value)
+    }
+
+    pub fn empty_vararg() -> Self {
+        Literal::EmptyVarArgs
+    }
+
+    pub fn to_rust_code(&self) -> String {
+        match self {
+            Literal::Enumerator { rust_type, value } => format!("{}::{}", rust_type, value),
+            Literal::Bits { rust_type, value } => format!("{}::with_bits({:#x})", rust_type, value),
+            Literal::Int(val) => val.to_string(),
+            Literal::EmptyVarArgs => "&[]".into(),
+        }
+    }
+}
+
+#[derive(Clone, Copy, Debug)]
+pub(crate) enum PatternPosition {
+    Source,
+    Destination,
+}
+
+/// A free variable.
+///
+/// When variables are used in `XForms` with source and destination patterns, they are classified
+/// as follows:
+///
+/// Input values: Uses in the source pattern with no preceding def. These may appear as inputs in
+/// the destination pattern too, but no new inputs can be introduced.
+///
+/// Output values: Variables that are defined in both the source and destination pattern.  These
+/// values may have uses outside the source pattern, and the destination pattern must compute the
+/// same value.
+///
+/// Intermediate values: Values that are defined in the source pattern, but not in the destination
+/// pattern. These may have uses outside the source pattern, so the defining instruction can't be
+/// deleted immediately.
+///
+/// Temporary values are defined only in the destination pattern.
+pub(crate) struct Var {
+    pub name: String,
+
+    /// The `Def` defining this variable in a source pattern.
+    pub src_def: Option<DefIndex>,
+
+    /// The `Def` defining this variable in a destination pattern.
+    pub dst_def: Option<DefIndex>,
+
+    /// TypeVar representing the type of this variable.
+    type_var: Option<TypeVar>,
+
+    /// Is this the original type variable, or has it be redefined with set_typevar?
+    is_original_type_var: bool,
+}
+
+impl Var {
+    fn new(name: String) -> Self {
+        Self {
+            name,
+            src_def: None,
+            dst_def: None,
+            type_var: None,
+            is_original_type_var: false,
+        }
+    }
+
+    /// Is this an input value to the src pattern?
+    pub fn is_input(&self) -> bool {
+        self.src_def.is_none() && self.dst_def.is_none()
+    }
+
+    /// Is this an output value, defined in both src and dst patterns?
+    pub fn is_output(&self) -> bool {
+        self.src_def.is_some() && self.dst_def.is_some()
+    }
+
+    /// Is this an intermediate value, defined only in the src pattern?
+    pub fn is_intermediate(&self) -> bool {
+        self.src_def.is_some() && self.dst_def.is_none()
+    }
+
+    /// Is this a temp value, defined only in the dst pattern?
+    pub fn is_temp(&self) -> bool {
+        self.src_def.is_none() && self.dst_def.is_some()
+    }
+
+    /// Get the def of this variable according to the position.
+    pub fn get_def(&self, position: PatternPosition) -> Option<DefIndex> {
+        match position {
+            PatternPosition::Source => self.src_def,
+            PatternPosition::Destination => self.dst_def,
+        }
+    }
+
+    pub fn set_def(&mut self, position: PatternPosition, def: DefIndex) {
+        assert!(
+            self.get_def(position).is_none(),
+            format!("redefinition of variable {}", self.name)
+        );
+        match position {
+            PatternPosition::Source => {
+                self.src_def = Some(def);
+            }
+            PatternPosition::Destination => {
+                self.dst_def = Some(def);
+            }
+        }
+    }
+
+    /// Get the type variable representing the type of this variable.
+    pub fn get_or_create_typevar(&mut self) -> TypeVar {
+        match &self.type_var {
+            Some(tv) => tv.clone(),
+            None => {
+                // Create a new type var in which we allow all types.
+                let tv = TypeVar::new(
+                    format!("typeof_{}", self.name),
+                    format!("Type of the pattern variable {:?}", self),
+                    TypeSetBuilder::all(),
+                );
+                self.type_var = Some(tv.clone());
+                self.is_original_type_var = true;
+                tv
+            }
+        }
+    }
+    pub fn get_typevar(&self) -> Option<TypeVar> {
+        self.type_var.clone()
+    }
+    pub fn set_typevar(&mut self, tv: TypeVar) {
+        self.is_original_type_var = if let Some(previous_tv) = &self.type_var {
+            *previous_tv == tv
+        } else {
+            false
+        };
+        self.type_var = Some(tv);
+    }
+
+    /// Check if this variable has a free type variable. If not, the type of this variable is
+    /// computed from the type of another variable.
+    pub fn has_free_typevar(&self) -> bool {
+        match &self.type_var {
+            Some(tv) => tv.base.is_none() && self.is_original_type_var,
+            None => false,
+        }
+    }
+
+    pub fn to_rust_code(&self) -> String {
+        self.name.clone()
+    }
+    fn rust_type(&self) -> String {
+        self.type_var.as_ref().unwrap().to_rust_code()
+    }
+}
+
+impl fmt::Debug for Var {
+    fn fmt(&self, fmt: &mut fmt::Formatter) -> Result<(), fmt::Error> {
+        fmt.write_fmt(format_args!(
+            "Var({}{}{})",
+            self.name,
+            if self.src_def.is_some() { ", src" } else { "" },
+            if self.dst_def.is_some() { ", dst" } else { "" }
+        ))
+    }
+}
+
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
+pub(crate) struct VarIndex(u32);
+entity_impl!(VarIndex);
+
+pub(crate) struct VarPool {
+    pool: PrimaryMap<VarIndex, Var>,
+}
+
+impl VarPool {
+    pub fn new() -> Self {
+        Self {
+            pool: PrimaryMap::new(),
+        }
+    }
+    pub fn get(&self, index: VarIndex) -> &Var {
+        self.pool.get(index).unwrap()
+    }
+    pub fn get_mut(&mut self, index: VarIndex) -> &mut Var {
+        self.pool.get_mut(index).unwrap()
+    }
+    pub fn create(&mut self, name: impl Into<String>) -> VarIndex {
+        self.pool.push(Var::new(name.into()))
+    }
+}
+
+/// Contains constants created in the AST that must be inserted into the true [ConstantPool] when
+/// the legalizer code is generated. The constant data is named in the order it is inserted;
+/// inserting data using [insert] will avoid duplicates.
+///
+/// [ConstantPool]: ../../../cranelift_codegen/ir/constant/struct.ConstantPool.html
+/// [insert]: ConstPool::insert
+pub(crate) struct ConstPool {
+    pool: Vec<Vec<u8>>,
+}
+
+impl ConstPool {
+    /// Create an empty constant pool.
+    pub fn new() -> Self {
+        Self { pool: vec![] }
+    }
+
+    /// Create a name for a constant from its position in the pool.
+    fn create_name(position: usize) -> String {
+        format!("const{}", position)
+    }
+
+    /// Insert constant data into the pool, returning the name of the variable used to reference it.
+    /// This method will search for data that matches the new data and return the existing constant
+    /// name to avoid duplicates.
+    pub fn insert(&mut self, data: Vec<u8>) -> String {
+        let possible_position = self.pool.iter().position(|d| d == &data);
+        let position = if let Some(found_position) = possible_position {
+            found_position
+        } else {
+            let new_position = self.pool.len();
+            self.pool.push(data);
+            new_position
+        };
+        ConstPool::create_name(position)
+    }
+
+    /// Iterate over the name/value pairs in the pool.
+    pub fn iter(&self) -> impl Iterator<Item = (String, &Vec<u8>)> {
+        self.pool
+            .iter()
+            .enumerate()
+            .map(|(i, v)| (ConstPool::create_name(i), v))
+    }
+}
+
+/// Apply an instruction to arguments.
+///
+/// An `Apply` AST expression is created by using function call syntax on instructions. This
+/// applies to both bound and unbound polymorphic instructions.
+pub(crate) struct Apply {
+    pub inst: Instruction,
+    pub args: Vec<Expr>,
+    pub value_types: Vec<ValueType>,
+}
+
+impl Apply {
+    pub fn new(target: InstSpec, args: Vec<Expr>) -> Self {
+        let (inst, value_types) = match target {
+            InstSpec::Inst(inst) => (inst, Vec::new()),
+            InstSpec::Bound(bound_inst) => (bound_inst.inst, bound_inst.value_types),
+        };
+
+        // Apply should only operate on concrete value types, not "any".
+        let value_types = value_types
+            .into_iter()
+            .map(|vt| vt.expect("shouldn't be Any"))
+            .collect();
+
+        // Basic check on number of arguments.
+        assert!(
+            inst.operands_in.len() == args.len(),
+            format!("incorrect number of arguments in instruction {}", inst.name)
+        );
+
+        // Check that the kinds of Literals arguments match the expected operand.
+        for &imm_index in &inst.imm_opnums {
+            let arg = &args[imm_index];
+            if let Some(literal) = arg.maybe_literal() {
+                let op = &inst.operands_in[imm_index];
+                match &op.kind.fields {
+                    OperandKindFields::ImmEnum(values) => {
+                        if let Literal::Enumerator { value, .. } = literal {
+                            assert!(
+                                values.iter().any(|(_key, v)| v == value),
+                                "Nonexistent enum value '{}' passed to field of kind '{}' -- \
+                                 did you use the right enum?",
+                                value,
+                                op.kind.rust_type
+                            );
+                        } else {
+                            panic!(
+                                "Passed non-enum field value {:?} to field of kind {}",
+                                literal, op.kind.rust_type
+                            );
+                        }
+                    }
+                    OperandKindFields::ImmValue => match &literal {
+                        Literal::Enumerator { value, .. } => panic!(
+                            "Expected immediate value in immediate field of kind '{}', \
+                             obtained enum value '{}'",
+                            op.kind.rust_type, value
+                        ),
+                        Literal::Bits { .. } | Literal::Int(_) | Literal::EmptyVarArgs => {}
+                    },
+                    _ => {
+                        panic!(
+                            "Literal passed to non-literal field of kind {}",
+                            op.kind.rust_type
+                        );
+                    }
+                }
+            }
+        }
+
+        Self {
+            inst,
+            args,
+            value_types,
+        }
+    }
+
+    fn to_comment_string(&self, var_pool: &VarPool) -> String {
+        let args = self
+            .args
+            .iter()
+            .map(|arg| arg.to_rust_code(var_pool))
+            .collect::<Vec<_>>()
+            .join(", ");
+
+        let mut inst_and_bound_types = vec![self.inst.name.to_string()];
+        inst_and_bound_types.extend(self.value_types.iter().map(|vt| vt.to_string()));
+        let inst_name = inst_and_bound_types.join(".");
+
+        format!("{}({})", inst_name, args)
+    }
+
+    pub fn inst_predicate(&self, var_pool: &VarPool) -> InstructionPredicate {
+        let mut pred = InstructionPredicate::new();
+        for (format_field, &op_num) in self
+            .inst
+            .format
+            .imm_fields
+            .iter()
+            .zip(self.inst.imm_opnums.iter())
+        {
+            let arg = &self.args[op_num];
+            if arg.maybe_var().is_some() {
+                // Ignore free variables for now.
+                continue;
+            }
+            pred = pred.and(InstructionPredicate::new_is_field_equal_ast(
+                &*self.inst.format,
+                format_field,
+                arg.to_rust_code(var_pool),
+            ));
+        }
+
+        // Add checks for any bound secondary type variables.  We can't check the controlling type
+        // variable this way since it may not appear as the type of an operand.
+        if self.value_types.len() > 1 {
+            let poly = self
+                .inst
+                .polymorphic_info
+                .as_ref()
+                .expect("must have polymorphic info if it has bounded types");
+            for (bound_type, type_var) in
+                self.value_types[1..].iter().zip(poly.other_typevars.iter())
+            {
+                pred = pred.and(InstructionPredicate::new_typevar_check(
+                    &self.inst, type_var, bound_type,
+                ));
+            }
+        }
+
+        pred
+    }
+
+    /// Same as `inst_predicate()`, but also check the controlling type variable.
+    pub fn inst_predicate_with_ctrl_typevar(&self, var_pool: &VarPool) -> InstructionPredicate {
+        let mut pred = self.inst_predicate(var_pool);
+
+        if !self.value_types.is_empty() {
+            let bound_type = &self.value_types[0];
+            let poly = self.inst.polymorphic_info.as_ref().unwrap();
+            let type_check = if poly.use_typevar_operand {
+                InstructionPredicate::new_typevar_check(&self.inst, &poly.ctrl_typevar, bound_type)
+            } else {
+                InstructionPredicate::new_ctrl_typevar_check(&bound_type)
+            };
+            pred = pred.and(type_check);
+        }
+
+        pred
+    }
+
+    pub fn rust_builder(&self, defined_vars: &[VarIndex], var_pool: &VarPool) -> String {
+        let mut args = self
+            .args
+            .iter()
+            .map(|expr| expr.to_rust_code(var_pool))
+            .collect::<Vec<_>>()
+            .join(", ");
+
+        // Do we need to pass an explicit type argument?
+        if let Some(poly) = &self.inst.polymorphic_info {
+            if !poly.use_typevar_operand {
+                args = format!("{}, {}", var_pool.get(defined_vars[0]).rust_type(), args);
+            }
+        }
+
+        format!("{}({})", self.inst.snake_name(), args)
+    }
+}
+
+// Simple helpers for legalize actions construction.
+
+pub(crate) enum DummyExpr {
+    Var(DummyVar),
+    Literal(Literal),
+    Constant(DummyConstant),
+    Apply(InstSpec, Vec<DummyExpr>),
+    Block(DummyVar),
+}
+
+#[derive(Clone)]
+pub(crate) struct DummyVar {
+    pub name: String,
+}
+
+impl Into<DummyExpr> for DummyVar {
+    fn into(self) -> DummyExpr {
+        DummyExpr::Var(self)
+    }
+}
+impl Into<DummyExpr> for Literal {
+    fn into(self) -> DummyExpr {
+        DummyExpr::Literal(self)
+    }
+}
+
+#[derive(Clone)]
+pub(crate) struct DummyConstant(pub(crate) Vec<u8>);
+
+pub(crate) fn constant(data: Vec<u8>) -> DummyConstant {
+    DummyConstant(data)
+}
+
+impl Into<DummyExpr> for DummyConstant {
+    fn into(self) -> DummyExpr {
+        DummyExpr::Constant(self)
+    }
+}
+
+pub(crate) fn var(name: &str) -> DummyVar {
+    DummyVar {
+        name: name.to_owned(),
+    }
+}
+
+pub(crate) struct DummyDef {
+    pub expr: DummyExpr,
+    pub defined_vars: Vec<DummyVar>,
+}
+
+pub(crate) struct ExprBuilder {
+    expr: DummyExpr,
+}
+
+impl ExprBuilder {
+    pub fn apply(inst: InstSpec, args: Vec<DummyExpr>) -> Self {
+        let expr = DummyExpr::Apply(inst, args);
+        Self { expr }
+    }
+
+    pub fn assign_to(self, defined_vars: Vec<DummyVar>) -> DummyDef {
+        DummyDef {
+            expr: self.expr,
+            defined_vars,
+        }
+    }
+
+    pub fn block(name: DummyVar) -> Self {
+        let expr = DummyExpr::Block(name);
+        Self { expr }
+    }
+}
+
+macro_rules! def_rhs {
+    // inst(a, b, c)
+    ($inst:ident($($src:expr),*)) => {
+        ExprBuilder::apply($inst.into(), vec![$($src.clone().into()),*])
+    };
+
+    // inst.type(a, b, c)
+    ($inst:ident.$type:ident($($src:expr),*)) => {
+        ExprBuilder::apply($inst.bind($type).into(), vec![$($src.clone().into()),*])
+    };
+}
+
+// Helper macro to define legalization recipes.
+macro_rules! def {
+    // x = ...
+    ($dest:ident = $($tt:tt)*) => {
+        def_rhs!($($tt)*).assign_to(vec![$dest.clone()])
+    };
+
+    // (x, y, ...) = ...
+    (($($dest:ident),*) = $($tt:tt)*) => {
+        def_rhs!($($tt)*).assign_to(vec![$($dest.clone()),*])
+    };
+
+    // An instruction with no results.
+    ($($tt:tt)*) => {
+        def_rhs!($($tt)*).assign_to(Vec::new())
+    }
+}
+
+// Helper macro to define legalization recipes.
+macro_rules! block {
+    // a basic block definition, splitting the current block in 2.
+    ($block: ident) => {
+        ExprBuilder::block($block).assign_to(Vec::new())
+    };
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::cdsl::ast::ConstPool;
+
+    #[test]
+    fn const_pool_returns_var_names() {
+        let mut c = ConstPool::new();
+        assert_eq!(c.insert([0, 1, 2].to_vec()), "const0");
+        assert_eq!(c.insert([1, 2, 3].to_vec()), "const1");
+    }
+
+    #[test]
+    fn const_pool_avoids_duplicates() {
+        let data = [0, 1, 2].to_vec();
+        let mut c = ConstPool::new();
+        assert_eq!(c.pool.len(), 0);
+
+        assert_eq!(c.insert(data.clone()), "const0");
+        assert_eq!(c.pool.len(), 1);
+
+        assert_eq!(c.insert(data), "const0");
+        assert_eq!(c.pool.len(), 1);
+    }
+
+    #[test]
+    fn const_pool_iterates() {
+        let mut c = ConstPool::new();
+        c.insert([0, 1, 2].to_vec());
+        c.insert([3, 4, 5].to_vec());
+
+        let mut iter = c.iter();
+        assert_eq!(iter.next(), Some(("const0".to_owned(), &vec![0, 1, 2])));
+        assert_eq!(iter.next(), Some(("const1".to_owned(), &vec![3, 4, 5])));
+        assert_eq!(iter.next(), None);
+    }
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/cdsl/cpu_modes.rs b/third_party/rust/cranelift-codegen-meta/src/cdsl/cpu_modes.rs
new file mode 100644
index 0000000000..7d119b00ce
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/cdsl/cpu_modes.rs
@@ -0,0 +1,88 @@
+use std::collections::{hash_map, HashMap, HashSet};
+use std::iter::FromIterator;
+
+use crate::cdsl::encodings::Encoding;
+use crate::cdsl::types::{LaneType, ValueType};
+use crate::cdsl::xform::{TransformGroup, TransformGroupIndex};
+
+pub(crate) struct CpuMode {
+    pub name: &'static str,
+    default_legalize: Option<TransformGroupIndex>,
+    monomorphic_legalize: Option<TransformGroupIndex>,
+    typed_legalize: HashMap<ValueType, TransformGroupIndex>,
+    pub encodings: Vec<Encoding>,
+}
+
+impl CpuMode {
+    pub fn new(name: &'static str) -> Self {
+        Self {
+            name,
+            default_legalize: None,
+            monomorphic_legalize: None,
+            typed_legalize: HashMap::new(),
+            encodings: Vec::new(),
+        }
+    }
+
+    pub fn set_encodings(&mut self, encodings: Vec<Encoding>) {
+        assert!(self.encodings.is_empty(), "clobbering encodings");
+        self.encodings = encodings;
+    }
+
+    pub fn legalize_monomorphic(&mut self, group: &TransformGroup) {
+        assert!(self.monomorphic_legalize.is_none());
+        self.monomorphic_legalize = Some(group.id);
+    }
+    pub fn legalize_default(&mut self, group: &TransformGroup) {
+        assert!(self.default_legalize.is_none());
+        self.default_legalize = Some(group.id);
+    }
+    pub fn legalize_value_type(&mut self, lane_type: impl Into<ValueType>, group: &TransformGroup) {
+        assert!(self
+            .typed_legalize
+            .insert(lane_type.into(), group.id)
+            .is_none());
+    }
+    pub fn legalize_type(&mut self, lane_type: impl Into<LaneType>, group: &TransformGroup) {
+        assert!(self
+            .typed_legalize
+            .insert(lane_type.into().into(), group.id)
+            .is_none());
+    }
+
+    pub fn get_default_legalize_code(&self) -> TransformGroupIndex {
+        self.default_legalize
+            .expect("a finished CpuMode must have a default legalize code")
+    }
+    pub fn get_legalize_code_for(&self, typ: &Option<ValueType>) -> TransformGroupIndex {
+        match typ {
+            Some(typ) => self
+                .typed_legalize
+                .get(typ)
+                .copied()
+                .unwrap_or_else(|| self.get_default_legalize_code()),
+            None => self
+                .monomorphic_legalize
+                .unwrap_or_else(|| self.get_default_legalize_code()),
+        }
+    }
+    pub fn get_legalized_types(&self) -> hash_map::Keys<ValueType, TransformGroupIndex> {
+        self.typed_legalize.keys()
+    }
+
+    /// Returns a deterministically ordered, deduplicated list of TransformGroupIndex for the directly
+    /// reachable set of TransformGroup this TargetIsa uses.
+    pub fn direct_transform_groups(&self) -> Vec<TransformGroupIndex> {
+        let mut set = HashSet::new();
+        if let Some(i) = &self.default_legalize {
+            set.insert(*i);
+        }
+        if let Some(i) = &self.monomorphic_legalize {
+            set.insert(*i);
+        }
+        set.extend(self.typed_legalize.values().cloned());
+        let mut ret = Vec::from_iter(set);
+        ret.sort();
+        ret
+    }
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/cdsl/encodings.rs b/third_party/rust/cranelift-codegen-meta/src/cdsl/encodings.rs
new file mode 100644
index 0000000000..f66746f92f
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/cdsl/encodings.rs
@@ -0,0 +1,179 @@
+use crate::cdsl::instructions::{
+    InstSpec, Instruction, InstructionPredicate, InstructionPredicateNode,
+    InstructionPredicateNumber, InstructionPredicateRegistry, ValueTypeOrAny,
+};
+use crate::cdsl::recipes::{EncodingRecipeNumber, Recipes};
+use crate::cdsl::settings::SettingPredicateNumber;
+use crate::cdsl::types::ValueType;
+use std::rc::Rc;
+use std::string::ToString;
+
+/// Encoding for a concrete instruction.
+///
+/// An `Encoding` object ties an instruction opcode with concrete type variables together with an
+/// encoding recipe and encoding encbits.
+///
+/// The concrete instruction can be in three different forms:
+///
+/// 1. A naked opcode: `trap` for non-polymorphic instructions.
+/// 2. With bound type variables: `iadd.i32` for polymorphic instructions.
+/// 3. With operands providing constraints: `icmp.i32(intcc.eq, x, y)`.
+///
+/// If the instruction is polymorphic, all type variables must be provided.
+pub(crate) struct EncodingContent {
+    /// The `Instruction` or `BoundInstruction` being encoded.
+    inst: InstSpec,
+
+    /// The `EncodingRecipe` to use.
+    pub recipe: EncodingRecipeNumber,
+
+    /// Additional encoding bits to be interpreted by `recipe`.
+    pub encbits: u16,
+
+    /// An instruction predicate that must be true to allow selecting this encoding.
+    pub inst_predicate: Option<InstructionPredicateNumber>,
+
+    /// An ISA predicate that must be true to allow selecting this encoding.
+    pub isa_predicate: Option<SettingPredicateNumber>,
+
+    /// The value type this encoding has been bound to, for encodings of polymorphic instructions.
+    pub bound_type: Option<ValueType>,
+}
+
+impl EncodingContent {
+    pub fn inst(&self) -> &Instruction {
+        self.inst.inst()
+    }
+    pub fn to_rust_comment(&self, recipes: &Recipes) -> String {
+        format!("[{}#{:02x}]", recipes[self.recipe].name, self.encbits)
+    }
+}
+
+pub(crate) type Encoding = Rc<EncodingContent>;
+
+pub(crate) struct EncodingBuilder {
+    inst: InstSpec,
+    recipe: EncodingRecipeNumber,
+    encbits: u16,
+    inst_predicate: Option<InstructionPredicate>,
+    isa_predicate: Option<SettingPredicateNumber>,
+    bound_type: Option<ValueType>,
+}
+
+impl EncodingBuilder {
+    pub fn new(inst: InstSpec, recipe: EncodingRecipeNumber, encbits: u16) -> Self {
+        let (inst_predicate, bound_type) = match &inst {
+            InstSpec::Bound(inst) => {
+                let other_typevars = &inst.inst.polymorphic_info.as_ref().unwrap().other_typevars;
+
+                assert_eq!(
+                    inst.value_types.len(),
+                    other_typevars.len() + 1,
+                    "partially bound polymorphic instruction"
+                );
+
+                // Add secondary type variables to the instruction predicate.
+                let value_types = &inst.value_types;
+                let mut inst_predicate: Option<InstructionPredicate> = None;
+                for (typevar, value_type) in other_typevars.iter().zip(value_types.iter().skip(1)) {
+                    let value_type = match value_type {
+                        ValueTypeOrAny::Any => continue,
+                        ValueTypeOrAny::ValueType(vt) => vt,
+                    };
+                    let type_predicate =
+                        InstructionPredicate::new_typevar_check(&inst.inst, typevar, value_type);
+                    inst_predicate = Some(type_predicate.into());
+                }
+
+                // Add immediate value predicates
+                for (immediate_value, immediate_operand) in inst
+                    .immediate_values
+                    .iter()
+                    .zip(inst.inst.operands_in.iter().filter(|o| o.is_immediate()))
+                {
+                    let immediate_predicate = InstructionPredicate::new_is_field_equal(
+                        &inst.inst.format,
+                        immediate_operand.kind.rust_field_name,
+                        immediate_value.to_string(),
+                    );
+                    inst_predicate = if let Some(type_predicate) = inst_predicate {
+                        Some(type_predicate.and(immediate_predicate))
+                    } else {
+                        Some(immediate_predicate.into())
+                    }
+                }
+
+                let ctrl_type = value_types[0]
+                    .clone()
+                    .expect("Controlling type shouldn't be Any");
+                (inst_predicate, Some(ctrl_type))
+            }
+
+            InstSpec::Inst(inst) => {
+                assert!(
+                    inst.polymorphic_info.is_none(),
+                    "unbound polymorphic instruction"
+                );
+                (None, None)
+            }
+        };
+
+        Self {
+            inst,
+            recipe,
+            encbits,
+            inst_predicate,
+            isa_predicate: None,
+            bound_type,
+        }
+    }
+
+    pub fn inst_predicate(mut self, inst_predicate: InstructionPredicateNode) -> Self {
+        let inst_predicate = Some(match self.inst_predicate {
+            Some(node) => node.and(inst_predicate),
+            None => inst_predicate.into(),
+        });
+        self.inst_predicate = inst_predicate;
+        self
+    }
+
+    pub fn isa_predicate(mut self, isa_predicate: SettingPredicateNumber) -> Self {
+        assert!(self.isa_predicate.is_none());
+        self.isa_predicate = Some(isa_predicate);
+        self
+    }
+
+    pub fn build(
+        self,
+        recipes: &Recipes,
+        inst_pred_reg: &mut InstructionPredicateRegistry,
+    ) -> Encoding {
+        let inst_predicate = self.inst_predicate.map(|pred| inst_pred_reg.insert(pred));
+
+        let inst = self.inst.inst();
+        assert!(
+            Rc::ptr_eq(&inst.format, &recipes[self.recipe].format),
+            format!(
+                "Inst {} and recipe {} must have the same format!",
+                inst.name, recipes[self.recipe].name
+            )
+        );
+
+        assert_eq!(
+            inst.is_branch && !inst.is_indirect_branch,
+            recipes[self.recipe].branch_range.is_some(),
+            "Inst {}'s is_branch contradicts recipe {} branch_range!",
+            inst.name,
+            recipes[self.recipe].name
+        );
+
+        Rc::new(EncodingContent {
+            inst: self.inst,
+            recipe: self.recipe,
+            encbits: self.encbits,
+            inst_predicate,
+            isa_predicate: self.isa_predicate,
+            bound_type: self.bound_type,
+        })
+    }
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/cdsl/formats.rs b/third_party/rust/cranelift-codegen-meta/src/cdsl/formats.rs
new file mode 100644
index 0000000000..e713a8bccb
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/cdsl/formats.rs
@@ -0,0 +1,171 @@
+use crate::cdsl::operands::OperandKind;
+use std::fmt;
+use std::rc::Rc;
+
+/// An immediate field in an instruction format.
+///
+/// This corresponds to a single member of a variant of the `InstructionData`
+/// data type.
+#[derive(Debug)]
+pub(crate) struct FormatField {
+    /// Immediate operand kind.
+    pub kind: OperandKind,
+
+    /// Member name in InstructionData variant.
+    pub member: &'static str,
+}
+
+/// Every instruction opcode has a corresponding instruction format which determines the number of
+/// operands and their kinds. Instruction formats are identified structurally, i.e., the format of
+/// an instruction is derived from the kinds of operands used in its declaration.
+///
+/// The instruction format stores two separate lists of operands: Immediates and values. Immediate
+/// operands (including entity references) are represented as explicit members in the
+/// `InstructionData` variants. The value operands are stored differently, depending on how many
+/// there are.  Beyond a certain point, instruction formats switch to an external value list for
+/// storing value arguments. Value lists can hold an arbitrary number of values.
+///
+/// All instruction formats must be predefined in the meta shared/formats.rs module.
+#[derive(Debug)]
+pub(crate) struct InstructionFormat {
+    /// Instruction format name in CamelCase. This is used as a Rust variant name in both the
+    /// `InstructionData` and `InstructionFormat` enums.
+    pub name: &'static str,
+
+    pub num_value_operands: usize,
+
+    pub has_value_list: bool,
+
+    pub imm_fields: Vec<FormatField>,
+
+    /// Index of the value input operand that is used to infer the controlling type variable. By
+    /// default, this is `0`, the first `value` operand. The index is relative to the values only,
+    /// ignoring immediate operands.
+    pub typevar_operand: Option<usize>,
+}
+
+/// A tuple serving as a key to deduplicate InstructionFormat.
+#[derive(Hash, PartialEq, Eq)]
+pub(crate) struct FormatStructure {
+    pub num_value_operands: usize,
+    pub has_value_list: bool,
+    /// Tuples of (Rust field name / Rust type) for each immediate field.
+    pub imm_field_names: Vec<(&'static str, &'static str)>,
+}
+
+impl fmt::Display for InstructionFormat {
+    fn fmt(&self, fmt: &mut fmt::Formatter) -> Result<(), fmt::Error> {
+        let imm_args = self
+            .imm_fields
+            .iter()
+            .map(|field| format!("{}: {}", field.member, field.kind.rust_type))
+            .collect::<Vec<_>>()
+            .join(", ");
+        fmt.write_fmt(format_args!(
+            "{}(imms=({}), vals={})",
+            self.name, imm_args, self.num_value_operands
+        ))?;
+        Ok(())
+    }
+}
+
+impl InstructionFormat {
+    pub fn imm_by_name(&self, name: &'static str) -> &FormatField {
+        self.imm_fields
+            .iter()
+            .find(|&field| field.member == name)
+            .unwrap_or_else(|| {
+                panic!(
+                    "unexpected immediate field named {} in instruction format {}",
+                    name, self.name
+                )
+            })
+    }
+
+    /// Returns a tuple that uniquely identifies the structure.
+    pub fn structure(&self) -> FormatStructure {
+        FormatStructure {
+            num_value_operands: self.num_value_operands,
+            has_value_list: self.has_value_list,
+            imm_field_names: self
+                .imm_fields
+                .iter()
+                .map(|field| (field.kind.rust_field_name, field.kind.rust_type))
+                .collect::<Vec<_>>(),
+        }
+    }
+}
+
+pub(crate) struct InstructionFormatBuilder {
+    name: &'static str,
+    num_value_operands: usize,
+    has_value_list: bool,
+    imm_fields: Vec<FormatField>,
+    typevar_operand: Option<usize>,
+}
+
+impl InstructionFormatBuilder {
+    pub fn new(name: &'static str) -> Self {
+        Self {
+            name,
+            num_value_operands: 0,
+            has_value_list: false,
+            imm_fields: Vec::new(),
+            typevar_operand: None,
+        }
+    }
+
+    pub fn value(mut self) -> Self {
+        self.num_value_operands += 1;
+        self
+    }
+
+    pub fn varargs(mut self) -> Self {
+        self.has_value_list = true;
+        self
+    }
+
+    pub fn imm(mut self, operand_kind: &OperandKind) -> Self {
+        let field = FormatField {
+            kind: operand_kind.clone(),
+            member: operand_kind.rust_field_name,
+        };
+        self.imm_fields.push(field);
+        self
+    }
+
+    pub fn imm_with_name(mut self, member: &'static str, operand_kind: &OperandKind) -> Self {
+        let field = FormatField {
+            kind: operand_kind.clone(),
+            member,
+        };
+        self.imm_fields.push(field);
+        self
+    }
+
+    pub fn typevar_operand(mut self, operand_index: usize) -> Self {
+        assert!(self.typevar_operand.is_none());
+        assert!(self.has_value_list || operand_index < self.num_value_operands);
+        self.typevar_operand = Some(operand_index);
+        self
+    }
+
+    pub fn build(self) -> Rc<InstructionFormat> {
+        let typevar_operand = if self.typevar_operand.is_some() {
+            self.typevar_operand
+        } else if self.has_value_list || self.num_value_operands > 0 {
+            // Default to the first value operand, if there's one.
+            Some(0)
+        } else {
+            None
+        };
+
+        Rc::new(InstructionFormat {
+            name: self.name,
+            num_value_operands: self.num_value_operands,
+            has_value_list: self.has_value_list,
+            imm_fields: self.imm_fields,
+            typevar_operand,
+        })
+    }
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/cdsl/instructions.rs b/third_party/rust/cranelift-codegen-meta/src/cdsl/instructions.rs
new file mode 100644
index 0000000000..88a15c6038
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/cdsl/instructions.rs
@@ -0,0 +1,1395 @@
+use cranelift_codegen_shared::condcodes::IntCC;
+use cranelift_entity::{entity_impl, PrimaryMap};
+
+use std::collections::HashMap;
+use std::fmt;
+use std::fmt::{Display, Error, Formatter};
+use std::rc::Rc;
+
+use crate::cdsl::camel_case;
+use crate::cdsl::formats::{FormatField, InstructionFormat};
+use crate::cdsl::operands::Operand;
+use crate::cdsl::type_inference::Constraint;
+use crate::cdsl::types::{LaneType, ReferenceType, ValueType, VectorType};
+use crate::cdsl::typevar::TypeVar;
+
+use crate::shared::formats::Formats;
+use crate::shared::types::{Bool, Float, Int, Reference};
+
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
+pub(crate) struct OpcodeNumber(u32);
+entity_impl!(OpcodeNumber);
+
+pub(crate) type AllInstructions = PrimaryMap<OpcodeNumber, Instruction>;
+
+pub(crate) struct InstructionGroupBuilder<'all_inst> {
+    all_instructions: &'all_inst mut AllInstructions,
+    own_instructions: Vec<Instruction>,
+}
+
+impl<'all_inst> InstructionGroupBuilder<'all_inst> {
+    pub fn new(all_instructions: &'all_inst mut AllInstructions) -> Self {
+        Self {
+            all_instructions,
+            own_instructions: Vec::new(),
+        }
+    }
+
+    pub fn push(&mut self, builder: InstructionBuilder) {
+        let opcode_number = OpcodeNumber(self.all_instructions.next_key().as_u32());
+        let inst = builder.build(opcode_number);
+        // Note this clone is cheap, since Instruction is a Rc<> wrapper for InstructionContent.
+        self.own_instructions.push(inst.clone());
+        self.all_instructions.push(inst);
+    }
+
+    pub fn build(self) -> InstructionGroup {
+        InstructionGroup {
+            instructions: self.own_instructions,
+        }
+    }
+}
+
+/// Every instruction must belong to exactly one instruction group. A given
+/// target architecture can support instructions from multiple groups, and it
+/// does not necessarily support all instructions in a group.
+pub(crate) struct InstructionGroup {
+    instructions: Vec<Instruction>,
+}
+
+impl InstructionGroup {
+    pub fn by_name(&self, name: &'static str) -> &Instruction {
+        self.instructions
+            .iter()
+            .find(|inst| inst.name == name)
+            .unwrap_or_else(|| panic!("instruction with name '{}' does not exist", name))
+    }
+}
+
+/// Instructions can have parameters bound to them to specialize them for more specific encodings
+/// (e.g. the encoding for adding two float types may be different than that of adding two
+/// integer types)
+pub(crate) trait Bindable {
+    /// Bind a parameter to an instruction
+    fn bind(&self, parameter: impl Into<BindParameter>) -> BoundInstruction;
+}
+
+#[derive(Debug)]
+pub(crate) struct PolymorphicInfo {
+    pub use_typevar_operand: bool,
+    pub ctrl_typevar: TypeVar,
+    pub other_typevars: Vec<TypeVar>,
+}
+
+#[derive(Debug)]
+pub(crate) struct InstructionContent {
+    /// Instruction mnemonic, also becomes opcode name.
+    pub name: String,
+    pub camel_name: String,
+    pub opcode_number: OpcodeNumber,
+
+    /// Documentation string.
+    pub doc: String,
+
+    /// Input operands. This can be a mix of SSA value operands and other operand kinds.
+    pub operands_in: Vec<Operand>,
+    /// Output operands. The output operands must be SSA values or `variable_args`.
+    pub operands_out: Vec<Operand>,
+    /// Instruction-specific TypeConstraints.
+    pub constraints: Vec<Constraint>,
+
+    /// Instruction format, automatically derived from the input operands.
+    pub format: Rc<InstructionFormat>,
+
+    /// One of the input or output operands is a free type variable. None if the instruction is not
+    /// polymorphic, set otherwise.
+    pub polymorphic_info: Option<PolymorphicInfo>,
+
+    /// Indices in operands_in of input operands that are values.
+    pub value_opnums: Vec<usize>,
+    /// Indices in operands_in of input operands that are immediates or entities.
+    pub imm_opnums: Vec<usize>,
+    /// Indices in operands_out of output operands that are values.
+    pub value_results: Vec<usize>,
+
+    /// True for instructions that terminate the block.
+    pub is_terminator: bool,
+    /// True for all branch or jump instructions.
+    pub is_branch: bool,
+    /// True for all indirect branch or jump instructions.',
+    pub is_indirect_branch: bool,
+    /// Is this a call instruction?
+    pub is_call: bool,
+    /// Is this a return instruction?
+    pub is_return: bool,
+    /// Is this a ghost instruction?
+    pub is_ghost: bool,
+    /// Can this instruction read from memory?
+    pub can_load: bool,
+    /// Can this instruction write to memory?
+    pub can_store: bool,
+    /// Can this instruction cause a trap?
+    pub can_trap: bool,
+    /// Does this instruction have other side effects besides can_* flags?
+    pub other_side_effects: bool,
+    /// Does this instruction write to CPU flags?
+    pub writes_cpu_flags: bool,
+    /// Should this opcode be considered to clobber all live registers, during regalloc?
+    pub clobbers_all_regs: bool,
+}
+
+impl InstructionContent {
+    pub fn snake_name(&self) -> &str {
+        if &self.name == "return" {
+            "return_"
+        } else {
+            &self.name
+        }
+    }
+
+    pub fn all_typevars(&self) -> Vec<&TypeVar> {
+        match &self.polymorphic_info {
+            Some(poly) => {
+                let mut result = vec![&poly.ctrl_typevar];
+                result.extend(&poly.other_typevars);
+                result
+            }
+            None => Vec::new(),
+        }
+    }
+}
+
+pub(crate) type Instruction = Rc<InstructionContent>;
+
+impl Bindable for Instruction {
+    fn bind(&self, parameter: impl Into<BindParameter>) -> BoundInstruction {
+        BoundInstruction::new(self).bind(parameter)
+    }
+}
+
+impl fmt::Display for InstructionContent {
+    fn fmt(&self, fmt: &mut fmt::Formatter) -> Result<(), fmt::Error> {
+        if !self.operands_out.is_empty() {
+            let operands_out = self
+                .operands_out
+                .iter()
+                .map(|op| op.name)
+                .collect::<Vec<_>>()
+                .join(", ");
+            fmt.write_str(&operands_out)?;
+            fmt.write_str(" = ")?;
+        }
+
+        fmt.write_str(&self.name)?;
+
+        if !self.operands_in.is_empty() {
+            let operands_in = self
+                .operands_in
+                .iter()
+                .map(|op| op.name)
+                .collect::<Vec<_>>()
+                .join(", ");
+            fmt.write_str(" ")?;
+            fmt.write_str(&operands_in)?;
+        }
+
+        Ok(())
+    }
+}
+
+pub(crate) struct InstructionBuilder {
+    name: String,
+    doc: String,
+    format: Rc<InstructionFormat>,
+    operands_in: Option<Vec<Operand>>,
+    operands_out: Option<Vec<Operand>>,
+    constraints: Option<Vec<Constraint>>,
+
+    // See Instruction comments for the meaning of these fields.
+    is_terminator: bool,
+    is_branch: bool,
+    is_indirect_branch: bool,
+    is_call: bool,
+    is_return: bool,
+    is_ghost: bool,
+    can_load: bool,
+    can_store: bool,
+    can_trap: bool,
+    other_side_effects: bool,
+    clobbers_all_regs: bool,
+}
+
+impl InstructionBuilder {
+    pub fn new<S: Into<String>>(name: S, doc: S, format: &Rc<InstructionFormat>) -> Self {
+        Self {
+            name: name.into(),
+            doc: doc.into(),
+            format: format.clone(),
+            operands_in: None,
+            operands_out: None,
+            constraints: None,
+
+            is_terminator: false,
+            is_branch: false,
+            is_indirect_branch: false,
+            is_call: false,
+            is_return: false,
+            is_ghost: false,
+            can_load: false,
+            can_store: false,
+            can_trap: false,
+            other_side_effects: false,
+            clobbers_all_regs: false,
+        }
+    }
+
+    pub fn operands_in(mut self, operands: Vec<&Operand>) -> Self {
+        assert!(self.operands_in.is_none());
+        self.operands_in = Some(operands.iter().map(|x| (*x).clone()).collect());
+        self
+    }
+
+    pub fn operands_out(mut self, operands: Vec<&Operand>) -> Self {
+        assert!(self.operands_out.is_none());
+        self.operands_out = Some(operands.iter().map(|x| (*x).clone()).collect());
+        self
+    }
+
+    pub fn constraints(mut self, constraints: Vec<Constraint>) -> Self {
+        assert!(self.constraints.is_none());
+        self.constraints = Some(constraints);
+        self
+    }
+
+    #[allow(clippy::wrong_self_convention)]
+    pub fn is_terminator(mut self, val: bool) -> Self {
+        self.is_terminator = val;
+        self
+    }
+
+    #[allow(clippy::wrong_self_convention)]
+    pub fn is_branch(mut self, val: bool) -> Self {
+        self.is_branch = val;
+        self
+    }
+
+    #[allow(clippy::wrong_self_convention)]
+    pub fn is_indirect_branch(mut self, val: bool) -> Self {
+        self.is_indirect_branch = val;
+        self
+    }
+
+    #[allow(clippy::wrong_self_convention)]
+    pub fn is_call(mut self, val: bool) -> Self {
+        self.is_call = val;
+        self
+    }
+
+    #[allow(clippy::wrong_self_convention)]
+    pub fn is_return(mut self, val: bool) -> Self {
+        self.is_return = val;
+        self
+    }
+
+    #[allow(clippy::wrong_self_convention)]
+    pub fn is_ghost(mut self, val: bool) -> Self {
+        self.is_ghost = val;
+        self
+    }
+
+    pub fn can_load(mut self, val: bool) -> Self {
+        self.can_load = val;
+        self
+    }
+
+    pub fn can_store(mut self, val: bool) -> Self {
+        self.can_store = val;
+        self
+    }
+
+    pub fn can_trap(mut self, val: bool) -> Self {
+        self.can_trap = val;
+        self
+    }
+
+    pub fn other_side_effects(mut self, val: bool) -> Self {
+        self.other_side_effects = val;
+        self
+    }
+
+    pub fn clobbers_all_regs(mut self, val: bool) -> Self {
+        self.clobbers_all_regs = val;
+        self
+    }
+
+    fn build(self, opcode_number: OpcodeNumber) -> Instruction {
+        let operands_in = self.operands_in.unwrap_or_else(Vec::new);
+        let operands_out = self.operands_out.unwrap_or_else(Vec::new);
+
+        let mut value_opnums = Vec::new();
+        let mut imm_opnums = Vec::new();
+        for (i, op) in operands_in.iter().enumerate() {
+            if op.is_value() {
+                value_opnums.push(i);
+            } else if op.is_immediate_or_entityref() {
+                imm_opnums.push(i);
+            } else {
+                assert!(op.is_varargs());
+            }
+        }
+
+        let value_results = operands_out
+            .iter()
+            .enumerate()
+            .filter_map(|(i, op)| if op.is_value() { Some(i) } else { None })
+            .collect();
+
+        verify_format(&self.name, &operands_in, &self.format);
+
+        let polymorphic_info =
+            verify_polymorphic(&operands_in, &operands_out, &self.format, &value_opnums);
+
+        // Infer from output operands whether an instruction clobbers CPU flags or not.
+        let writes_cpu_flags = operands_out.iter().any(|op| op.is_cpu_flags());
+
+        let camel_name = camel_case(&self.name);
+
+        Rc::new(InstructionContent {
+            name: self.name,
+            camel_name,
+            opcode_number,
+            doc: self.doc,
+            operands_in,
+            operands_out,
+            constraints: self.constraints.unwrap_or_else(Vec::new),
+            format: self.format,
+            polymorphic_info,
+            value_opnums,
+            value_results,
+            imm_opnums,
+            is_terminator: self.is_terminator,
+            is_branch: self.is_branch,
+            is_indirect_branch: self.is_indirect_branch,
+            is_call: self.is_call,
+            is_return: self.is_return,
+            is_ghost: self.is_ghost,
+            can_load: self.can_load,
+            can_store: self.can_store,
+            can_trap: self.can_trap,
+            other_side_effects: self.other_side_effects,
+            writes_cpu_flags,
+            clobbers_all_regs: self.clobbers_all_regs,
+        })
+    }
+}
+
+/// A thin wrapper like Option<ValueType>, but with more precise semantics.
+#[derive(Clone)]
+pub(crate) enum ValueTypeOrAny {
+    ValueType(ValueType),
+    Any,
+}
+
+impl ValueTypeOrAny {
+    pub fn expect(self, msg: &str) -> ValueType {
+        match self {
+            ValueTypeOrAny::ValueType(vt) => vt,
+            ValueTypeOrAny::Any => panic!(format!("Unexpected Any: {}", msg)),
+        }
+    }
+}
+
+/// The number of bits in the vector
+type VectorBitWidth = u64;
+
+/// An parameter used for binding instructions to specific types or values
+pub(crate) enum BindParameter {
+    Any,
+    Lane(LaneType),
+    Vector(LaneType, VectorBitWidth),
+    Reference(ReferenceType),
+    Immediate(Immediate),
+}
+
+/// Constructor for more easily building vector parameters from any lane type
+pub(crate) fn vector(parameter: impl Into<LaneType>, vector_size: VectorBitWidth) -> BindParameter {
+    BindParameter::Vector(parameter.into(), vector_size)
+}
+
+impl From<Int> for BindParameter {
+    fn from(ty: Int) -> Self {
+        BindParameter::Lane(ty.into())
+    }
+}
+
+impl From<Bool> for BindParameter {
+    fn from(ty: Bool) -> Self {
+        BindParameter::Lane(ty.into())
+    }
+}
+
+impl From<Float> for BindParameter {
+    fn from(ty: Float) -> Self {
+        BindParameter::Lane(ty.into())
+    }
+}
+
+impl From<LaneType> for BindParameter {
+    fn from(ty: LaneType) -> Self {
+        BindParameter::Lane(ty)
+    }
+}
+
+impl From<Reference> for BindParameter {
+    fn from(ty: Reference) -> Self {
+        BindParameter::Reference(ty.into())
+    }
+}
+
+impl From<Immediate> for BindParameter {
+    fn from(imm: Immediate) -> Self {
+        BindParameter::Immediate(imm)
+    }
+}
+
+#[derive(Clone)]
+pub(crate) enum Immediate {
+    // When needed, this enum should be expanded to include other immediate types (e.g. u8, u128).
+    IntCC(IntCC),
+}
+
+impl Display for Immediate {
+    fn fmt(&self, f: &mut Formatter) -> Result<(), Error> {
+        match self {
+            Immediate::IntCC(x) => write!(f, "IntCC::{:?}", x),
+        }
+    }
+}
+
+#[derive(Clone)]
+pub(crate) struct BoundInstruction {
+    pub inst: Instruction,
+    pub value_types: Vec<ValueTypeOrAny>,
+    pub immediate_values: Vec<Immediate>,
+}
+
+impl BoundInstruction {
+    /// Construct a new bound instruction (with nothing bound yet) from an instruction
+    fn new(inst: &Instruction) -> Self {
+        BoundInstruction {
+            inst: inst.clone(),
+            value_types: vec![],
+            immediate_values: vec![],
+        }
+    }
+
+    /// Verify that the bindings for a BoundInstruction are correct.
+    fn verify_bindings(&self) -> Result<(), String> {
+        // Verify that binding types to the instruction does not violate the polymorphic rules.
+        if !self.value_types.is_empty() {
+            match &self.inst.polymorphic_info {
+                Some(poly) => {
+                    if self.value_types.len() > 1 + poly.other_typevars.len() {
+                        return Err(format!(
+                            "trying to bind too many types for {}",
+                            self.inst.name
+                        ));
+                    }
+                }
+                None => {
+                    return Err(format!(
+                        "trying to bind a type for {} which is not a polymorphic instruction",
+                        self.inst.name
+                    ));
+                }
+            }
+        }
+
+        // Verify that only the right number of immediates are bound.
+        let immediate_count = self
+            .inst
+            .operands_in
+            .iter()
+            .filter(|o| o.is_immediate_or_entityref())
+            .count();
+        if self.immediate_values.len() > immediate_count {
+            return Err(format!(
+                "trying to bind too many immediates ({}) to instruction {} which only expects {} \
+                 immediates",
+                self.immediate_values.len(),
+                self.inst.name,
+                immediate_count
+            ));
+        }
+
+        Ok(())
+    }
+}
+
+impl Bindable for BoundInstruction {
+    fn bind(&self, parameter: impl Into<BindParameter>) -> BoundInstruction {
+        let mut modified = self.clone();
+        match parameter.into() {
+            BindParameter::Any => modified.value_types.push(ValueTypeOrAny::Any),
+            BindParameter::Lane(lane_type) => modified
+                .value_types
+                .push(ValueTypeOrAny::ValueType(lane_type.into())),
+            BindParameter::Vector(lane_type, vector_size_in_bits) => {
+                let num_lanes = vector_size_in_bits / lane_type.lane_bits();
+                assert!(
+                    num_lanes >= 2,
+                    "Minimum lane number for bind_vector is 2, found {}.",
+                    num_lanes,
+                );
+                let vector_type = ValueType::Vector(VectorType::new(lane_type, num_lanes));
+                modified
+                    .value_types
+                    .push(ValueTypeOrAny::ValueType(vector_type));
+            }
+            BindParameter::Reference(reference_type) => {
+                modified
+                    .value_types
+                    .push(ValueTypeOrAny::ValueType(reference_type.into()));
+            }
+            BindParameter::Immediate(immediate) => modified.immediate_values.push(immediate),
+        }
+        modified.verify_bindings().unwrap();
+        modified
+    }
+}
+
+/// Checks that the input operands actually match the given format.
+fn verify_format(inst_name: &str, operands_in: &[Operand], format: &InstructionFormat) {
+    // A format is defined by:
+    // - its number of input value operands,
+    // - its number and names of input immediate operands,
+    // - whether it has a value list or not.
+    let mut num_values = 0;
+    let mut num_immediates = 0;
+
+    for operand in operands_in.iter() {
+        if operand.is_varargs() {
+            assert!(
+                format.has_value_list,
+                "instruction {} has varargs, but its format {} doesn't have a value list; you may \
+                 need to use a different format.",
+                inst_name, format.name
+            );
+        }
+        if operand.is_value() {
+            num_values += 1;
+        }
+        if operand.is_immediate_or_entityref() {
+            if let Some(format_field) = format.imm_fields.get(num_immediates) {
+                assert_eq!(
+                    format_field.kind.rust_field_name,
+                    operand.kind.rust_field_name,
+                    "{}th operand of {} should be {} (according to format), not {} (according to \
+                     inst definition). You may need to use a different format.",
+                    num_immediates,
+                    inst_name,
+                    format_field.kind.rust_field_name,
+                    operand.kind.rust_field_name
+                );
+                num_immediates += 1;
+            }
+        }
+    }
+
+    assert_eq!(
+        num_values, format.num_value_operands,
+        "inst {} doesn't have as many value input operands as its format {} declares; you may need \
+         to use a different format.",
+        inst_name, format.name
+    );
+
+    assert_eq!(
+        num_immediates,
+        format.imm_fields.len(),
+        "inst {} doesn't have as many immediate input \
+         operands as its format {} declares; you may need to use a different format.",
+        inst_name,
+        format.name
+    );
+}
+
+/// Check if this instruction is polymorphic, and verify its use of type variables.
+fn verify_polymorphic(
+    operands_in: &[Operand],
+    operands_out: &[Operand],
+    format: &InstructionFormat,
+    value_opnums: &[usize],
+) -> Option<PolymorphicInfo> {
+    // The instruction is polymorphic if it has one free input or output operand.
+    let is_polymorphic = operands_in
+        .iter()
+        .any(|op| op.is_value() && op.type_var().unwrap().free_typevar().is_some())
+        || operands_out
+            .iter()
+            .any(|op| op.is_value() && op.type_var().unwrap().free_typevar().is_some());
+
+    if !is_polymorphic {
+        return None;
+    }
+
+    // Verify the use of type variables.
+    let tv_op = format.typevar_operand;
+    let mut maybe_error_message = None;
+    if let Some(tv_op) = tv_op {
+        if tv_op < value_opnums.len() {
+            let op_num = value_opnums[tv_op];
+            let tv = operands_in[op_num].type_var().unwrap();
+            let free_typevar = tv.free_typevar();
+            if (free_typevar.is_some() && tv == &free_typevar.unwrap())
+                || tv.singleton_type().is_some()
+            {
+                match is_ctrl_typevar_candidate(tv, &operands_in, &operands_out) {
+                    Ok(other_typevars) => {
+                        return Some(PolymorphicInfo {
+                            use_typevar_operand: true,
+                            ctrl_typevar: tv.clone(),
+                            other_typevars,
+                        });
+                    }
+                    Err(error_message) => {
+                        maybe_error_message = Some(error_message);
+                    }
+                }
+            }
+        }
+    };
+
+    // If we reached here, it means the type variable indicated as the typevar operand couldn't
+    // control every other input and output type variable. We need to look at the result type
+    // variables.
+    if operands_out.is_empty() {
+        // No result means no other possible type variable, so it's a type inference failure.
+        match maybe_error_message {
+            Some(msg) => panic!(msg),
+            None => panic!("typevar_operand must be a free type variable"),
+        }
+    }
+
+    // Otherwise, try to infer the controlling type variable by looking at the first result.
+    let tv = operands_out[0].type_var().unwrap();
+    let free_typevar = tv.free_typevar();
+    if free_typevar.is_some() && tv != &free_typevar.unwrap() {
+        panic!("first result must be a free type variable");
+    }
+
+    // At this point, if the next unwrap() fails, it means the output type couldn't be used as a
+    // controlling type variable either; panicking is the right behavior.
+    let other_typevars = is_ctrl_typevar_candidate(tv, &operands_in, &operands_out).unwrap();
+
+    Some(PolymorphicInfo {
+        use_typevar_operand: false,
+        ctrl_typevar: tv.clone(),
+        other_typevars,
+    })
+}
+
+/// Verify that the use of TypeVars is consistent with `ctrl_typevar` as the controlling type
+/// variable.
+///
+/// All polymorhic inputs must either be derived from `ctrl_typevar` or be independent free type
+/// variables only used once.
+///
+/// All polymorphic results must be derived from `ctrl_typevar`.
+///
+/// Return a vector of other type variables used, or a string explaining what went wrong.
+fn is_ctrl_typevar_candidate(
+    ctrl_typevar: &TypeVar,
+    operands_in: &[Operand],
+    operands_out: &[Operand],
+) -> Result<Vec<TypeVar>, String> {
+    let mut other_typevars = Vec::new();
+
+    // Check value inputs.
+    for input in operands_in {
+        if !input.is_value() {
+            continue;
+        }
+
+        let typ = input.type_var().unwrap();
+        let free_typevar = typ.free_typevar();
+
+        // Non-polymorphic or derived from ctrl_typevar is OK.
+        if free_typevar.is_none() {
+            continue;
+        }
+        let free_typevar = free_typevar.unwrap();
+        if &free_typevar == ctrl_typevar {
+            continue;
+        }
+
+        // No other derived typevars allowed.
+        if typ != &free_typevar {
+            return Err(format!(
+                "{:?}: type variable {} must be derived from {:?} while it is derived from {:?}",
+                input, typ.name, ctrl_typevar, free_typevar
+            ));
+        }
+
+        // Other free type variables can only be used once each.
+        for other_tv in &other_typevars {
+            if &free_typevar == other_tv {
+                return Err(format!(
+                    "non-controlling type variable {} can't be used more than once",
+                    free_typevar.name
+                ));
+            }
+        }
+
+        other_typevars.push(free_typevar);
+    }
+
+    // Check outputs.
+    for result in operands_out {
+        if !result.is_value() {
+            continue;
+        }
+
+        let typ = result.type_var().unwrap();
+        let free_typevar = typ.free_typevar();
+
+        // Non-polymorphic or derived from ctrl_typevar is OK.
+        if free_typevar.is_none() || &free_typevar.unwrap() == ctrl_typevar {
+            continue;
+        }
+
+        return Err("type variable in output not derived from ctrl_typevar".into());
+    }
+
+    Ok(other_typevars)
+}
+
+#[derive(Clone, Hash, PartialEq, Eq)]
+pub(crate) enum FormatPredicateKind {
+    /// Is the field member equal to the expected value (stored here)?
+    IsEqual(String),
+
+    /// Is the immediate instruction format field representable as an n-bit two's complement
+    /// integer? (with width: first member, scale: second member).
+    /// The predicate is true if the field is in the range: `-2^(width-1) -- 2^(width-1)-1` and a
+    /// multiple of `2^scale`.
+    IsSignedInt(usize, usize),
+
+    /// Is the immediate instruction format field representable as an n-bit unsigned integer? (with
+    /// width: first member, scale: second member).
+    /// The predicate is true if the field is in the range: `0 -- 2^width - 1` and a multiple of
+    /// `2^scale`.
+    IsUnsignedInt(usize, usize),
+
+    /// Is the immediate format field member an integer equal to zero?
+    IsZeroInt,
+    /// Is the immediate format field member equal to zero? (float32 version)
+    IsZero32BitFloat,
+
+    /// Is the immediate format field member equal to zero? (float64 version)
+    IsZero64BitFloat,
+
+    /// Is the immediate format field member equal zero in all lanes?
+    IsAllZeroes,
+
+    /// Does the immediate format field member have ones in all bits of all lanes?
+    IsAllOnes,
+
+    /// Has the value list (in member_name) the size specified in parameter?
+    LengthEquals(usize),
+
+    /// Is the referenced function colocated?
+    IsColocatedFunc,
+
+    /// Is the referenced data object colocated?
+    IsColocatedData,
+}
+
+#[derive(Clone, Hash, PartialEq, Eq)]
+pub(crate) struct FormatPredicateNode {
+    format_name: &'static str,
+    member_name: &'static str,
+    kind: FormatPredicateKind,
+}
+
+impl FormatPredicateNode {
+    fn new(
+        format: &InstructionFormat,
+        field_name: &'static str,
+        kind: FormatPredicateKind,
+    ) -> Self {
+        let member_name = format.imm_by_name(field_name).member;
+        Self {
+            format_name: format.name,
+            member_name,
+            kind,
+        }
+    }
+
+    fn new_raw(
+        format: &InstructionFormat,
+        member_name: &'static str,
+        kind: FormatPredicateKind,
+    ) -> Self {
+        Self {
+            format_name: format.name,
+            member_name,
+            kind,
+        }
+    }
+
+    fn destructuring_member_name(&self) -> &'static str {
+        match &self.kind {
+            FormatPredicateKind::LengthEquals(_) => {
+                // Length operates on the argument value list.
+                assert!(self.member_name == "args");
+                "ref args"
+            }
+            _ => self.member_name,
+        }
+    }
+
+    fn rust_predicate(&self) -> String {
+        match &self.kind {
+            FormatPredicateKind::IsEqual(arg) => {
+                format!("predicates::is_equal({}, {})", self.member_name, arg)
+            }
+            FormatPredicateKind::IsSignedInt(width, scale) => format!(
+                "predicates::is_signed_int({}, {}, {})",
+                self.member_name, width, scale
+            ),
+            FormatPredicateKind::IsUnsignedInt(width, scale) => format!(
+                "predicates::is_unsigned_int({}, {}, {})",
+                self.member_name, width, scale
+            ),
+            FormatPredicateKind::IsZeroInt => {
+                format!("predicates::is_zero_int({})", self.member_name)
+            }
+            FormatPredicateKind::IsZero32BitFloat => {
+                format!("predicates::is_zero_32_bit_float({})", self.member_name)
+            }
+            FormatPredicateKind::IsZero64BitFloat => {
+                format!("predicates::is_zero_64_bit_float({})", self.member_name)
+            }
+            FormatPredicateKind::IsAllZeroes => format!(
+                "predicates::is_all_zeroes(func.dfg.constants.get({}))",
+                self.member_name
+            ),
+            FormatPredicateKind::IsAllOnes => format!(
+                "predicates::is_all_ones(func.dfg.constants.get({}))",
+                self.member_name
+            ),
+            FormatPredicateKind::LengthEquals(num) => format!(
+                "predicates::has_length_of({}, {}, func)",
+                self.member_name, num
+            ),
+            FormatPredicateKind::IsColocatedFunc => {
+                format!("predicates::is_colocated_func({}, func)", self.member_name,)
+            }
+            FormatPredicateKind::IsColocatedData => {
+                format!("predicates::is_colocated_data({}, func)", self.member_name)
+            }
+        }
+    }
+}
+
+#[derive(Clone, Hash, PartialEq, Eq)]
+pub(crate) enum TypePredicateNode {
+    /// Is the value argument (at the index designated by the first member) the same type as the
+    /// type name (second member)?
+    TypeVarCheck(usize, String),
+
+    /// Is the controlling type variable the same type as the one designated by the type name
+    /// (only member)?
+    CtrlTypeVarCheck(String),
+}
+
+impl TypePredicateNode {
+    fn rust_predicate(&self, func_str: &str) -> String {
+        match self {
+            TypePredicateNode::TypeVarCheck(index, value_type_name) => format!(
+                "{}.dfg.value_type(args[{}]) == {}",
+                func_str, index, value_type_name
+            ),
+            TypePredicateNode::CtrlTypeVarCheck(value_type_name) => {
+                format!("{}.dfg.ctrl_typevar(inst) == {}", func_str, value_type_name)
+            }
+        }
+    }
+}
+
+/// A basic node in an instruction predicate: either an atom, or an AND of two conditions.
+#[derive(Clone, Hash, PartialEq, Eq)]
+pub(crate) enum InstructionPredicateNode {
+    FormatPredicate(FormatPredicateNode),
+
+    TypePredicate(TypePredicateNode),
+
+    /// An AND-combination of two or more other predicates.
+    And(Vec<InstructionPredicateNode>),
+
+    /// An OR-combination of two or more other predicates.
+    Or(Vec<InstructionPredicateNode>),
+}
+
+impl InstructionPredicateNode {
+    fn rust_predicate(&self, func_str: &str) -> String {
+        match self {
+            InstructionPredicateNode::FormatPredicate(node) => node.rust_predicate(),
+            InstructionPredicateNode::TypePredicate(node) => node.rust_predicate(func_str),
+            InstructionPredicateNode::And(nodes) => nodes
+                .iter()
+                .map(|x| x.rust_predicate(func_str))
+                .collect::<Vec<_>>()
+                .join(" && "),
+            InstructionPredicateNode::Or(nodes) => nodes
+                .iter()
+                .map(|x| x.rust_predicate(func_str))
+                .collect::<Vec<_>>()
+                .join(" || "),
+        }
+    }
+
+    pub fn format_destructuring_member_name(&self) -> &str {
+        match self {
+            InstructionPredicateNode::FormatPredicate(format_pred) => {
+                format_pred.destructuring_member_name()
+            }
+            _ => panic!("Only for leaf format predicates"),
+        }
+    }
+
+    pub fn format_name(&self) -> &str {
+        match self {
+            InstructionPredicateNode::FormatPredicate(format_pred) => format_pred.format_name,
+            _ => panic!("Only for leaf format predicates"),
+        }
+    }
+
+    pub fn is_type_predicate(&self) -> bool {
+        match self {
+            InstructionPredicateNode::FormatPredicate(_)
+            | InstructionPredicateNode::And(_)
+            | InstructionPredicateNode::Or(_) => false,
+            InstructionPredicateNode::TypePredicate(_) => true,
+        }
+    }
+
+    fn collect_leaves(&self) -> Vec<&InstructionPredicateNode> {
+        let mut ret = Vec::new();
+        match self {
+            InstructionPredicateNode::And(nodes) | InstructionPredicateNode::Or(nodes) => {
+                for node in nodes {
+                    ret.extend(node.collect_leaves());
+                }
+            }
+            _ => ret.push(self),
+        }
+        ret
+    }
+}
+
+#[derive(Clone, Hash, PartialEq, Eq)]
+pub(crate) struct InstructionPredicate {
+    node: Option<InstructionPredicateNode>,
+}
+
+impl Into<InstructionPredicate> for InstructionPredicateNode {
+    fn into(self) -> InstructionPredicate {
+        InstructionPredicate { node: Some(self) }
+    }
+}
+
+impl InstructionPredicate {
+    pub fn new() -> Self {
+        Self { node: None }
+    }
+
+    pub fn unwrap(self) -> InstructionPredicateNode {
+        self.node.unwrap()
+    }
+
+    pub fn new_typevar_check(
+        inst: &Instruction,
+        type_var: &TypeVar,
+        value_type: &ValueType,
+    ) -> InstructionPredicateNode {
+        let index = inst
+            .value_opnums
+            .iter()
+            .enumerate()
+            .find(|(_, &op_num)| inst.operands_in[op_num].type_var().unwrap() == type_var)
+            .unwrap()
+            .0;
+        InstructionPredicateNode::TypePredicate(TypePredicateNode::TypeVarCheck(
+            index,
+            value_type.rust_name(),
+        ))
+    }
+
+    pub fn new_ctrl_typevar_check(value_type: &ValueType) -> InstructionPredicateNode {
+        InstructionPredicateNode::TypePredicate(TypePredicateNode::CtrlTypeVarCheck(
+            value_type.rust_name(),
+        ))
+    }
+
+    pub fn new_is_field_equal(
+        format: &InstructionFormat,
+        field_name: &'static str,
+        imm_value: String,
+    ) -> InstructionPredicateNode {
+        InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new(
+            format,
+            field_name,
+            FormatPredicateKind::IsEqual(imm_value),
+        ))
+    }
+
+    /// Used only for the AST module, which directly passes in the format field.
+    pub fn new_is_field_equal_ast(
+        format: &InstructionFormat,
+        field: &FormatField,
+        imm_value: String,
+    ) -> InstructionPredicateNode {
+        InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new_raw(
+            format,
+            field.member,
+            FormatPredicateKind::IsEqual(imm_value),
+        ))
+    }
+
+    pub fn new_is_signed_int(
+        format: &InstructionFormat,
+        field_name: &'static str,
+        width: usize,
+        scale: usize,
+    ) -> InstructionPredicateNode {
+        InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new(
+            format,
+            field_name,
+            FormatPredicateKind::IsSignedInt(width, scale),
+        ))
+    }
+
+    pub fn new_is_unsigned_int(
+        format: &InstructionFormat,
+        field_name: &'static str,
+        width: usize,
+        scale: usize,
+    ) -> InstructionPredicateNode {
+        InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new(
+            format,
+            field_name,
+            FormatPredicateKind::IsUnsignedInt(width, scale),
+        ))
+    }
+
+    pub fn new_is_zero_int(
+        format: &InstructionFormat,
+        field_name: &'static str,
+    ) -> InstructionPredicateNode {
+        InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new(
+            format,
+            field_name,
+            FormatPredicateKind::IsZeroInt,
+        ))
+    }
+
+    pub fn new_is_zero_32bit_float(
+        format: &InstructionFormat,
+        field_name: &'static str,
+    ) -> InstructionPredicateNode {
+        InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new(
+            format,
+            field_name,
+            FormatPredicateKind::IsZero32BitFloat,
+        ))
+    }
+
+    pub fn new_is_zero_64bit_float(
+        format: &InstructionFormat,
+        field_name: &'static str,
+    ) -> InstructionPredicateNode {
+        InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new(
+            format,
+            field_name,
+            FormatPredicateKind::IsZero64BitFloat,
+        ))
+    }
+
+    pub fn new_is_all_zeroes(
+        format: &InstructionFormat,
+        field_name: &'static str,
+    ) -> InstructionPredicateNode {
+        InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new(
+            format,
+            field_name,
+            FormatPredicateKind::IsAllZeroes,
+        ))
+    }
+
+    pub fn new_is_all_ones(
+        format: &InstructionFormat,
+        field_name: &'static str,
+    ) -> InstructionPredicateNode {
+        InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new(
+            format,
+            field_name,
+            FormatPredicateKind::IsAllOnes,
+        ))
+    }
+
+    pub fn new_length_equals(format: &InstructionFormat, size: usize) -> InstructionPredicateNode {
+        assert!(
+            format.has_value_list,
+            "the format must be variadic in number of arguments"
+        );
+        InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new_raw(
+            format,
+            "args",
+            FormatPredicateKind::LengthEquals(size),
+        ))
+    }
+
+    pub fn new_is_colocated_func(
+        format: &InstructionFormat,
+        field_name: &'static str,
+    ) -> InstructionPredicateNode {
+        InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new(
+            format,
+            field_name,
+            FormatPredicateKind::IsColocatedFunc,
+        ))
+    }
+
+    pub fn new_is_colocated_data(formats: &Formats) -> InstructionPredicateNode {
+        let format = &formats.unary_global_value;
+        InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new(
+            &*format,
+            "global_value",
+            FormatPredicateKind::IsColocatedData,
+        ))
+    }
+
+    pub fn and(mut self, new_node: InstructionPredicateNode) -> Self {
+        let node = self.node;
+        let mut and_nodes = match node {
+            Some(node) => match node {
+                InstructionPredicateNode::And(nodes) => nodes,
+                InstructionPredicateNode::Or(_) => {
+                    panic!("Can't mix and/or without implementing operator precedence!")
+                }
+                _ => vec![node],
+            },
+            _ => Vec::new(),
+        };
+        and_nodes.push(new_node);
+        self.node = Some(InstructionPredicateNode::And(and_nodes));
+        self
+    }
+
+    pub fn or(mut self, new_node: InstructionPredicateNode) -> Self {
+        let node = self.node;
+        let mut or_nodes = match node {
+            Some(node) => match node {
+                InstructionPredicateNode::Or(nodes) => nodes,
+                InstructionPredicateNode::And(_) => {
+                    panic!("Can't mix and/or without implementing operator precedence!")
+                }
+                _ => vec![node],
+            },
+            _ => Vec::new(),
+        };
+        or_nodes.push(new_node);
+        self.node = Some(InstructionPredicateNode::Or(or_nodes));
+        self
+    }
+
+    pub fn rust_predicate(&self, func_str: &str) -> Option<String> {
+        self.node.as_ref().map(|root| root.rust_predicate(func_str))
+    }
+
+    /// Returns the type predicate if this is one, or None otherwise.
+    pub fn type_predicate(&self, func_str: &str) -> Option<String> {
+        let node = self.node.as_ref().unwrap();
+        if node.is_type_predicate() {
+            Some(node.rust_predicate(func_str))
+        } else {
+            None
+        }
+    }
+
+    /// Returns references to all the nodes that are leaves in the condition (i.e. by flattening
+    /// AND/OR).
+    pub fn collect_leaves(&self) -> Vec<&InstructionPredicateNode> {
+        self.node.as_ref().unwrap().collect_leaves()
+    }
+}
+
+#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
+pub(crate) struct InstructionPredicateNumber(u32);
+entity_impl!(InstructionPredicateNumber);
+
+pub(crate) type InstructionPredicateMap =
+    PrimaryMap<InstructionPredicateNumber, InstructionPredicate>;
+
+/// A registry of predicates to help deduplicating them, during Encodings construction. When the
+/// construction process is over, it needs to be extracted with `extract` and associated to the
+/// TargetIsa.
+pub(crate) struct InstructionPredicateRegistry {
+    /// Maps a predicate number to its actual predicate.
+    map: InstructionPredicateMap,
+
+    /// Inverse map: maps a predicate to its predicate number. This is used before inserting a
+    /// predicate, to check whether it already exists.
+    inverted_map: HashMap<InstructionPredicate, InstructionPredicateNumber>,
+}
+
+impl InstructionPredicateRegistry {
+    pub fn new() -> Self {
+        Self {
+            map: PrimaryMap::new(),
+            inverted_map: HashMap::new(),
+        }
+    }
+    pub fn insert(&mut self, predicate: InstructionPredicate) -> InstructionPredicateNumber {
+        match self.inverted_map.get(&predicate) {
+            Some(&found) => found,
+            None => {
+                let key = self.map.push(predicate.clone());
+                self.inverted_map.insert(predicate, key);
+                key
+            }
+        }
+    }
+    pub fn extract(self) -> InstructionPredicateMap {
+        self.map
+    }
+}
+
+/// An instruction specification, containing an instruction that has bound types or not.
+pub(crate) enum InstSpec {
+    Inst(Instruction),
+    Bound(BoundInstruction),
+}
+
+impl InstSpec {
+    pub fn inst(&self) -> &Instruction {
+        match &self {
+            InstSpec::Inst(inst) => inst,
+            InstSpec::Bound(bound_inst) => &bound_inst.inst,
+        }
+    }
+}
+
+impl Bindable for InstSpec {
+    fn bind(&self, parameter: impl Into<BindParameter>) -> BoundInstruction {
+        match self {
+            InstSpec::Inst(inst) => inst.bind(parameter.into()),
+            InstSpec::Bound(inst) => inst.bind(parameter.into()),
+        }
+    }
+}
+
+impl Into<InstSpec> for &Instruction {
+    fn into(self) -> InstSpec {
+        InstSpec::Inst(self.clone())
+    }
+}
+
+impl Into<InstSpec> for BoundInstruction {
+    fn into(self) -> InstSpec {
+        InstSpec::Bound(self)
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+    use crate::cdsl::formats::InstructionFormatBuilder;
+    use crate::cdsl::operands::{OperandKind, OperandKindFields};
+    use crate::cdsl::typevar::TypeSetBuilder;
+    use crate::shared::types::Int::{I32, I64};
+
+    fn field_to_operand(index: usize, field: OperandKindFields) -> Operand {
+        // Pretend the index string is &'static.
+        let name = Box::leak(index.to_string().into_boxed_str());
+        // Format's name / rust_type don't matter here.
+        let kind = OperandKind::new(name, name, field);
+        let operand = Operand::new(name, kind);
+        operand
+    }
+
+    fn field_to_operands(types: Vec<OperandKindFields>) -> Vec<Operand> {
+        types
+            .iter()
+            .enumerate()
+            .map(|(i, f)| field_to_operand(i, f.clone()))
+            .collect()
+    }
+
+    fn build_fake_instruction(
+        inputs: Vec<OperandKindFields>,
+        outputs: Vec<OperandKindFields>,
+    ) -> Instruction {
+        // Setup a format from the input operands.
+        let mut format = InstructionFormatBuilder::new("fake");
+        for (i, f) in inputs.iter().enumerate() {
+            match f {
+                OperandKindFields::TypeVar(_) => format = format.value(),
+                OperandKindFields::ImmValue => {
+                    format = format.imm(&field_to_operand(i, f.clone()).kind)
+                }
+                _ => {}
+            };
+        }
+        let format = format.build();
+
+        // Create the fake instruction.
+        InstructionBuilder::new("fake", "A fake instruction for testing.", &format)
+            .operands_in(field_to_operands(inputs).iter().collect())
+            .operands_out(field_to_operands(outputs).iter().collect())
+            .build(OpcodeNumber(42))
+    }
+
+    #[test]
+    fn ensure_bound_instructions_can_bind_lane_types() {
+        let type1 = TypeSetBuilder::new().ints(8..64).build();
+        let in1 = OperandKindFields::TypeVar(TypeVar::new("a", "...", type1));
+        let inst = build_fake_instruction(vec![in1], vec![]);
+        inst.bind(LaneType::Int(I32));
+    }
+
+    #[test]
+    fn ensure_bound_instructions_can_bind_immediates() {
+        let inst = build_fake_instruction(vec![OperandKindFields::ImmValue], vec![]);
+        let bound_inst = inst.bind(Immediate::IntCC(IntCC::Equal));
+        assert!(bound_inst.verify_bindings().is_ok());
+    }
+
+    #[test]
+    #[should_panic]
+    fn ensure_instructions_fail_to_bind() {
+        let inst = build_fake_instruction(vec![], vec![]);
+        inst.bind(BindParameter::Lane(LaneType::Int(I32)));
+        // Trying to bind to an instruction with no inputs should fail.
+    }
+
+    #[test]
+    #[should_panic]
+    fn ensure_bound_instructions_fail_to_bind_too_many_types() {
+        let type1 = TypeSetBuilder::new().ints(8..64).build();
+        let in1 = OperandKindFields::TypeVar(TypeVar::new("a", "...", type1));
+        let inst = build_fake_instruction(vec![in1], vec![]);
+        inst.bind(LaneType::Int(I32)).bind(LaneType::Int(I64));
+    }
+
+    #[test]
+    #[should_panic]
+    fn ensure_instructions_fail_to_bind_too_many_immediates() {
+        let inst = build_fake_instruction(vec![OperandKindFields::ImmValue], vec![]);
+        inst.bind(BindParameter::Immediate(Immediate::IntCC(IntCC::Equal)))
+            .bind(BindParameter::Immediate(Immediate::IntCC(IntCC::Equal)));
+        // Trying to bind too many immediates to an instruction should fail; note that the immediate
+        // values are nonsensical but irrelevant to the purpose of this test.
+    }
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/cdsl/isa.rs b/third_party/rust/cranelift-codegen-meta/src/cdsl/isa.rs
new file mode 100644
index 0000000000..512105d09a
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/cdsl/isa.rs
@@ -0,0 +1,99 @@
+use std::collections::HashSet;
+use std::iter::FromIterator;
+
+use crate::cdsl::cpu_modes::CpuMode;
+use crate::cdsl::instructions::{InstructionGroup, InstructionPredicateMap};
+use crate::cdsl::recipes::Recipes;
+use crate::cdsl::regs::IsaRegs;
+use crate::cdsl::settings::SettingGroup;
+use crate::cdsl::xform::{TransformGroupIndex, TransformGroups};
+
+pub(crate) struct TargetIsa {
+    pub name: &'static str,
+    pub instructions: InstructionGroup,
+    pub settings: SettingGroup,
+    pub regs: IsaRegs,
+    pub recipes: Recipes,
+    pub cpu_modes: Vec<CpuMode>,
+    pub encodings_predicates: InstructionPredicateMap,
+
+    /// TransformGroupIndex are global to all the ISAs, while we want to have indices into the
+    /// local array of transform groups that are directly used. We use this map to get this
+    /// information.
+    pub local_transform_groups: Vec<TransformGroupIndex>,
+}
+
+impl TargetIsa {
+    pub fn new(
+        name: &'static str,
+        instructions: InstructionGroup,
+        settings: SettingGroup,
+        regs: IsaRegs,
+        recipes: Recipes,
+        cpu_modes: Vec<CpuMode>,
+        encodings_predicates: InstructionPredicateMap,
+    ) -> Self {
+        // Compute the local TransformGroup index.
+        let mut local_transform_groups = Vec::new();
+        for cpu_mode in &cpu_modes {
+            let transform_groups = cpu_mode.direct_transform_groups();
+            for group_index in transform_groups {
+                // find() is fine here: the number of transform group is < 5 as of June 2019.
+                if local_transform_groups
+                    .iter()
+                    .find(|&val| group_index == *val)
+                    .is_none()
+                {
+                    local_transform_groups.push(group_index);
+                }
+            }
+        }
+
+        Self {
+            name,
+            instructions,
+            settings,
+            regs,
+            recipes,
+            cpu_modes,
+            encodings_predicates,
+            local_transform_groups,
+        }
+    }
+
+    /// Returns a deterministically ordered, deduplicated list of TransformGroupIndex for the
+    /// transitive set of TransformGroup this TargetIsa uses.
+    pub fn transitive_transform_groups(
+        &self,
+        all_groups: &TransformGroups,
+    ) -> Vec<TransformGroupIndex> {
+        let mut set = HashSet::new();
+
+        for &root in self.local_transform_groups.iter() {
+            set.insert(root);
+            let mut base = root;
+            // Follow the chain of chain_with.
+            while let Some(chain_with) = &all_groups.get(base).chain_with {
+                set.insert(*chain_with);
+                base = *chain_with;
+            }
+        }
+
+        let mut vec = Vec::from_iter(set);
+        vec.sort();
+        vec
+    }
+
+    /// Returns a deterministically ordered, deduplicated list of TransformGroupIndex for the directly
+    /// reachable set of TransformGroup this TargetIsa uses.
+    pub fn direct_transform_groups(&self) -> &Vec<TransformGroupIndex> {
+        &self.local_transform_groups
+    }
+
+    pub fn translate_group_index(&self, group_index: TransformGroupIndex) -> usize {
+        self.local_transform_groups
+            .iter()
+            .position(|&val| val == group_index)
+            .expect("TransformGroup unused by this TargetIsa!")
+    }
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/cdsl/mod.rs b/third_party/rust/cranelift-codegen-meta/src/cdsl/mod.rs
new file mode 100644
index 0000000000..698b64dff3
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/cdsl/mod.rs
@@ -0,0 +1,89 @@
+//! Cranelift DSL classes.
+//!
+//! This module defines the classes that are used to define Cranelift
+//! instructions and other entities.
+
+#[macro_use]
+pub mod ast;
+pub mod cpu_modes;
+pub mod encodings;
+pub mod formats;
+pub mod instructions;
+pub mod isa;
+pub mod operands;
+pub mod recipes;
+pub mod regs;
+pub mod settings;
+pub mod type_inference;
+pub mod types;
+pub mod typevar;
+pub mod xform;
+
+/// A macro that converts boolean settings into predicates to look more natural.
+#[macro_export]
+macro_rules! predicate {
+    ($a:ident && $($b:tt)*) => {
+        PredicateNode::And(Box::new($a.into()), Box::new(predicate!($($b)*)))
+    };
+    (!$a:ident && $($b:tt)*) => {
+        PredicateNode::And(
+            Box::new(PredicateNode::Not(Box::new($a.into()))),
+            Box::new(predicate!($($b)*))
+        )
+    };
+    (!$a:ident) => {
+        PredicateNode::Not(Box::new($a.into()))
+    };
+    ($a:ident) => {
+        $a.into()
+    };
+}
+
+/// A macro that joins boolean settings into a list (e.g. `preset!(feature_a && feature_b)`).
+#[macro_export]
+macro_rules! preset {
+    () => {
+        vec![]
+    };
+    ($($x:ident)&&*) => {
+        {
+            let mut v = Vec::new();
+            $(
+                v.push($x.into());
+            )*
+            v
+        }
+    };
+}
+
+/// Convert the string `s` to CamelCase.
+pub fn camel_case(s: &str) -> String {
+    let mut output_chars = String::with_capacity(s.len());
+
+    let mut capitalize = true;
+    for curr_char in s.chars() {
+        if curr_char == '_' {
+            capitalize = true;
+        } else {
+            if capitalize {
+                output_chars.extend(curr_char.to_uppercase());
+            } else {
+                output_chars.push(curr_char);
+            }
+            capitalize = false;
+        }
+    }
+
+    output_chars
+}
+
+#[cfg(test)]
+mod tests {
+    use super::camel_case;
+
+    #[test]
+    fn camel_case_works() {
+        assert_eq!(camel_case("x"), "X");
+        assert_eq!(camel_case("camel_case"), "CamelCase");
+    }
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/cdsl/operands.rs b/third_party/rust/cranelift-codegen-meta/src/cdsl/operands.rs
new file mode 100644
index 0000000000..605df24862
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/cdsl/operands.rs
@@ -0,0 +1,173 @@
+use std::collections::HashMap;
+
+use crate::cdsl::typevar::TypeVar;
+
+/// An instruction operand can be an *immediate*, an *SSA value*, or an *entity reference*. The
+/// type of the operand is one of:
+///
+/// 1. A `ValueType` instance indicates an SSA value operand with a concrete type.
+///
+/// 2. A `TypeVar` instance indicates an SSA value operand, and the instruction is polymorphic over
+///    the possible concrete types that the type variable can assume.
+///
+/// 3. An `ImmediateKind` instance indicates an immediate operand whose value is encoded in the
+///    instruction itself rather than being passed as an SSA value.
+///
+/// 4. An `EntityRefKind` instance indicates an operand that references another entity in the
+///    function, typically something declared in the function preamble.
+#[derive(Clone, Debug)]
+pub(crate) struct Operand {
+    /// Name of the operand variable, as it appears in function parameters, legalizations, etc.
+    pub name: &'static str,
+
+    /// Type of the operand.
+    pub kind: OperandKind,
+
+    doc: Option<&'static str>,
+}
+
+impl Operand {
+    pub fn new(name: &'static str, kind: impl Into<OperandKind>) -> Self {
+        Self {
+            name,
+            doc: None,
+            kind: kind.into(),
+        }
+    }
+    pub fn with_doc(mut self, doc: &'static str) -> Self {
+        self.doc = Some(doc);
+        self
+    }
+
+    pub fn doc(&self) -> Option<&str> {
+        if let Some(doc) = &self.doc {
+            return Some(doc);
+        }
+        match &self.kind.fields {
+            OperandKindFields::TypeVar(tvar) => Some(&tvar.doc),
+            _ => self.kind.doc(),
+        }
+    }
+
+    pub fn is_value(&self) -> bool {
+        match self.kind.fields {
+            OperandKindFields::TypeVar(_) => true,
+            _ => false,
+        }
+    }
+
+    pub fn type_var(&self) -> Option<&TypeVar> {
+        match &self.kind.fields {
+            OperandKindFields::TypeVar(typevar) => Some(typevar),
+            _ => None,
+        }
+    }
+
+    pub fn is_varargs(&self) -> bool {
+        match self.kind.fields {
+            OperandKindFields::VariableArgs => true,
+            _ => false,
+        }
+    }
+
+    /// Returns true if the operand has an immediate kind or is an EntityRef.
+    pub fn is_immediate_or_entityref(&self) -> bool {
+        match self.kind.fields {
+            OperandKindFields::ImmEnum(_)
+            | OperandKindFields::ImmValue
+            | OperandKindFields::EntityRef => true,
+            _ => false,
+        }
+    }
+
+    /// Returns true if the operand has an immediate kind.
+    pub fn is_immediate(&self) -> bool {
+        match self.kind.fields {
+            OperandKindFields::ImmEnum(_) | OperandKindFields::ImmValue => true,
+            _ => false,
+        }
+    }
+
+    pub fn is_cpu_flags(&self) -> bool {
+        match &self.kind.fields {
+            OperandKindFields::TypeVar(type_var)
+                if type_var.name == "iflags" || type_var.name == "fflags" =>
+            {
+                true
+            }
+            _ => false,
+        }
+    }
+}
+
+pub type EnumValues = HashMap<&'static str, &'static str>;
+
+#[derive(Clone, Debug)]
+pub(crate) enum OperandKindFields {
+    EntityRef,
+    VariableArgs,
+    ImmValue,
+    ImmEnum(EnumValues),
+    TypeVar(TypeVar),
+}
+
+#[derive(Clone, Debug)]
+pub(crate) struct OperandKind {
+    /// String representation of the Rust type mapping to this OperandKind.
+    pub rust_type: &'static str,
+
+    /// Name of this OperandKind in the format's member field.
+    pub rust_field_name: &'static str,
+
+    /// Type-specific fields for this OperandKind.
+    pub fields: OperandKindFields,
+
+    doc: Option<&'static str>,
+}
+
+impl OperandKind {
+    pub fn new(
+        rust_field_name: &'static str,
+        rust_type: &'static str,
+        fields: OperandKindFields,
+    ) -> Self {
+        Self {
+            rust_field_name,
+            rust_type,
+            fields,
+            doc: None,
+        }
+    }
+    pub fn with_doc(mut self, doc: &'static str) -> Self {
+        assert!(self.doc.is_none());
+        self.doc = Some(doc);
+        self
+    }
+    fn doc(&self) -> Option<&str> {
+        if let Some(doc) = &self.doc {
+            return Some(doc);
+        }
+        match &self.fields {
+            OperandKindFields::TypeVar(type_var) => Some(&type_var.doc),
+            OperandKindFields::ImmEnum(_)
+            | OperandKindFields::ImmValue
+            | OperandKindFields::EntityRef
+            | OperandKindFields::VariableArgs => None,
+        }
+    }
+}
+
+impl Into<OperandKind> for &TypeVar {
+    fn into(self) -> OperandKind {
+        OperandKind::new(
+            "value",
+            "ir::Value",
+            OperandKindFields::TypeVar(self.into()),
+        )
+    }
+}
+impl Into<OperandKind> for &OperandKind {
+    fn into(self) -> OperandKind {
+        self.clone()
+    }
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/cdsl/recipes.rs b/third_party/rust/cranelift-codegen-meta/src/cdsl/recipes.rs
new file mode 100644
index 0000000000..dfe4cd67a5
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/cdsl/recipes.rs
@@ -0,0 +1,298 @@
+use std::rc::Rc;
+
+use cranelift_entity::{entity_impl, PrimaryMap};
+
+use crate::cdsl::formats::InstructionFormat;
+use crate::cdsl::instructions::InstructionPredicate;
+use crate::cdsl::regs::RegClassIndex;
+use crate::cdsl::settings::SettingPredicateNumber;
+
+/// A specific register in a register class.
+///
+/// A register is identified by the top-level register class it belongs to and
+/// its first register unit.
+///
+/// Specific registers are used to describe constraints on instructions where
+/// some operands must use a fixed register.
+///
+/// Register instances can be created with the constructor, or accessed as
+/// attributes on the register class: `GPR.rcx`.
+#[derive(Copy, Clone, Hash, PartialEq, Eq)]
+pub(crate) struct Register {
+    pub regclass: RegClassIndex,
+    pub unit: u8,
+}
+
+impl Register {
+    pub fn new(regclass: RegClassIndex, unit: u8) -> Self {
+        Self { regclass, unit }
+    }
+}
+
+/// An operand that must be in a stack slot.
+///
+/// A `Stack` object can be used to indicate an operand constraint for a value
+/// operand that must live in a stack slot.
+#[derive(Copy, Clone, Hash, PartialEq)]
+pub(crate) struct Stack {
+    pub regclass: RegClassIndex,
+}
+
+impl Stack {
+    pub fn new(regclass: RegClassIndex) -> Self {
+        Self { regclass }
+    }
+    pub fn stack_base_mask(self) -> &'static str {
+        // TODO: Make this configurable instead of just using the SP.
+        "StackBaseMask(1)"
+    }
+}
+
+#[derive(Clone, Hash, PartialEq)]
+pub(crate) struct BranchRange {
+    pub inst_size: u64,
+    pub range: u64,
+}
+
+#[derive(Copy, Clone, Hash, PartialEq)]
+pub(crate) enum OperandConstraint {
+    RegClass(RegClassIndex),
+    FixedReg(Register),
+    TiedInput(usize),
+    Stack(Stack),
+}
+
+impl Into<OperandConstraint> for RegClassIndex {
+    fn into(self) -> OperandConstraint {
+        OperandConstraint::RegClass(self)
+    }
+}
+
+impl Into<OperandConstraint> for Register {
+    fn into(self) -> OperandConstraint {
+        OperandConstraint::FixedReg(self)
+    }
+}
+
+impl Into<OperandConstraint> for usize {
+    fn into(self) -> OperandConstraint {
+        OperandConstraint::TiedInput(self)
+    }
+}
+
+impl Into<OperandConstraint> for Stack {
+    fn into(self) -> OperandConstraint {
+        OperandConstraint::Stack(self)
+    }
+}
+
+/// A recipe for encoding instructions with a given format.
+///
+/// Many different instructions can be encoded by the same recipe, but they
+/// must all have the same instruction format.
+///
+/// The `operands_in` and `operands_out` arguments are tuples specifying the register
+/// allocation constraints for the value operands and results respectively. The
+/// possible constraints for an operand are:
+///
+/// - A `RegClass` specifying the set of allowed registers.
+/// - A `Register` specifying a fixed-register operand.
+/// - An integer indicating that this result is tied to a value operand, so
+///   they must use the same register.
+/// - A `Stack` specifying a value in a stack slot.
+///
+/// The `branch_range` argument must be provided for recipes that can encode
+/// branch instructions. It is an `(origin, bits)` tuple describing the exact
+/// range that can be encoded in a branch instruction.
+#[derive(Clone)]
+pub(crate) struct EncodingRecipe {
+    /// Short mnemonic name for this recipe.
+    pub name: String,
+
+    /// Associated instruction format.
+    pub format: Rc<InstructionFormat>,
+
+    /// Base number of bytes in the binary encoded instruction.
+    pub base_size: u64,
+
+    /// Tuple of register constraints for value operands.
+    pub operands_in: Vec<OperandConstraint>,
+
+    /// Tuple of register constraints for results.
+    pub operands_out: Vec<OperandConstraint>,
+
+    /// Function name to use when computing actual size.
+    pub compute_size: &'static str,
+
+    /// `(origin, bits)` range for branches.
+    pub branch_range: Option<BranchRange>,
+
+    /// This instruction clobbers `iflags` and `fflags`; true by default.
+    pub clobbers_flags: bool,
+
+    /// Instruction predicate.
+    pub inst_predicate: Option<InstructionPredicate>,
+
+    /// ISA predicate.
+    pub isa_predicate: Option<SettingPredicateNumber>,
+
+    /// Rust code for binary emission.
+    pub emit: Option<String>,
+}
+
+// Implement PartialEq ourselves: take all the fields into account but the name.
+impl PartialEq for EncodingRecipe {
+    fn eq(&self, other: &Self) -> bool {
+        Rc::ptr_eq(&self.format, &other.format)
+            && self.base_size == other.base_size
+            && self.operands_in == other.operands_in
+            && self.operands_out == other.operands_out
+            && self.compute_size == other.compute_size
+            && self.branch_range == other.branch_range
+            && self.clobbers_flags == other.clobbers_flags
+            && self.inst_predicate == other.inst_predicate
+            && self.isa_predicate == other.isa_predicate
+            && self.emit == other.emit
+    }
+}
+
+// To allow using it in a hashmap.
+impl Eq for EncodingRecipe {}
+
+#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
+pub(crate) struct EncodingRecipeNumber(u32);
+entity_impl!(EncodingRecipeNumber);
+
+pub(crate) type Recipes = PrimaryMap<EncodingRecipeNumber, EncodingRecipe>;
+
+#[derive(Clone)]
+pub(crate) struct EncodingRecipeBuilder {
+    pub name: String,
+    format: Rc<InstructionFormat>,
+    pub base_size: u64,
+    pub operands_in: Option<Vec<OperandConstraint>>,
+    pub operands_out: Option<Vec<OperandConstraint>>,
+    pub compute_size: Option<&'static str>,
+    pub branch_range: Option<BranchRange>,
+    pub emit: Option<String>,
+    clobbers_flags: Option<bool>,
+    inst_predicate: Option<InstructionPredicate>,
+    isa_predicate: Option<SettingPredicateNumber>,
+}
+
+impl EncodingRecipeBuilder {
+    pub fn new(name: impl Into<String>, format: &Rc<InstructionFormat>, base_size: u64) -> Self {
+        Self {
+            name: name.into(),
+            format: format.clone(),
+            base_size,
+            operands_in: None,
+            operands_out: None,
+            compute_size: None,
+            branch_range: None,
+            emit: None,
+            clobbers_flags: None,
+            inst_predicate: None,
+            isa_predicate: None,
+        }
+    }
+
+    // Setters.
+    pub fn operands_in(mut self, constraints: Vec<impl Into<OperandConstraint>>) -> Self {
+        assert!(self.operands_in.is_none());
+        self.operands_in = Some(
+            constraints
+                .into_iter()
+                .map(|constr| constr.into())
+                .collect(),
+        );
+        self
+    }
+    pub fn operands_out(mut self, constraints: Vec<impl Into<OperandConstraint>>) -> Self {
+        assert!(self.operands_out.is_none());
+        self.operands_out = Some(
+            constraints
+                .into_iter()
+                .map(|constr| constr.into())
+                .collect(),
+        );
+        self
+    }
+    pub fn clobbers_flags(mut self, flag: bool) -> Self {
+        assert!(self.clobbers_flags.is_none());
+        self.clobbers_flags = Some(flag);
+        self
+    }
+    pub fn emit(mut self, code: impl Into<String>) -> Self {
+        assert!(self.emit.is_none());
+        self.emit = Some(code.into());
+        self
+    }
+    pub fn branch_range(mut self, range: (u64, u64)) -> Self {
+        assert!(self.branch_range.is_none());
+        self.branch_range = Some(BranchRange {
+            inst_size: range.0,
+            range: range.1,
+        });
+        self
+    }
+    pub fn isa_predicate(mut self, pred: SettingPredicateNumber) -> Self {
+        assert!(self.isa_predicate.is_none());
+        self.isa_predicate = Some(pred);
+        self
+    }
+    pub fn inst_predicate(mut self, inst_predicate: impl Into<InstructionPredicate>) -> Self {
+        assert!(self.inst_predicate.is_none());
+        self.inst_predicate = Some(inst_predicate.into());
+        self
+    }
+    pub fn compute_size(mut self, compute_size: &'static str) -> Self {
+        assert!(self.compute_size.is_none());
+        self.compute_size = Some(compute_size);
+        self
+    }
+
+    pub fn build(self) -> EncodingRecipe {
+        let operands_in = self.operands_in.unwrap_or_default();
+        let operands_out = self.operands_out.unwrap_or_default();
+
+        // The number of input constraints must match the number of format input operands.
+        if !self.format.has_value_list {
+            assert!(
+                operands_in.len() == self.format.num_value_operands,
+                format!(
+                    "missing operand constraints for recipe {} (format {})",
+                    self.name, self.format.name
+                )
+            );
+        }
+
+        // Ensure tied inputs actually refer to existing inputs.
+        for constraint in operands_in.iter().chain(operands_out.iter()) {
+            if let OperandConstraint::TiedInput(n) = *constraint {
+                assert!(n < operands_in.len());
+            }
+        }
+
+        let compute_size = match self.compute_size {
+            Some(compute_size) => compute_size,
+            None => "base_size",
+        };
+
+        let clobbers_flags = self.clobbers_flags.unwrap_or(true);
+
+        EncodingRecipe {
+            name: self.name,
+            format: self.format,
+            base_size: self.base_size,
+            operands_in,
+            operands_out,
+            compute_size,
+            branch_range: self.branch_range,
+            clobbers_flags,
+            inst_predicate: self.inst_predicate,
+            isa_predicate: self.isa_predicate,
+            emit: self.emit,
+        }
+    }
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/cdsl/regs.rs b/third_party/rust/cranelift-codegen-meta/src/cdsl/regs.rs
new file mode 100644
index 0000000000..864826ee43
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/cdsl/regs.rs
@@ -0,0 +1,412 @@
+use cranelift_codegen_shared::constants;
+use cranelift_entity::{entity_impl, EntityRef, PrimaryMap};
+
+#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
+pub(crate) struct RegBankIndex(u32);
+entity_impl!(RegBankIndex);
+
+pub(crate) struct RegBank {
+    pub name: &'static str,
+    pub first_unit: u8,
+    pub units: u8,
+    pub names: Vec<&'static str>,
+    pub prefix: &'static str,
+    pub pressure_tracking: bool,
+    pub pinned_reg: Option<u16>,
+    pub toprcs: Vec<RegClassIndex>,
+    pub classes: Vec<RegClassIndex>,
+}
+
+impl RegBank {
+    pub fn new(
+        name: &'static str,
+        first_unit: u8,
+        units: u8,
+        names: Vec<&'static str>,
+        prefix: &'static str,
+        pressure_tracking: bool,
+        pinned_reg: Option<u16>,
+    ) -> Self {
+        RegBank {
+            name,
+            first_unit,
+            units,
+            names,
+            prefix,
+            pressure_tracking,
+            pinned_reg,
+            toprcs: Vec::new(),
+            classes: Vec::new(),
+        }
+    }
+
+    fn unit_by_name(&self, name: &'static str) -> u8 {
+        let unit = if let Some(found) = self.names.iter().position(|&reg_name| reg_name == name) {
+            found
+        } else {
+            // Try to match without the bank prefix.
+            assert!(name.starts_with(self.prefix));
+            let name_without_prefix = &name[self.prefix.len()..];
+            if let Some(found) = self
+                .names
+                .iter()
+                .position(|&reg_name| reg_name == name_without_prefix)
+            {
+                found
+            } else {
+                // Ultimate try: try to parse a number and use this in the array, eg r15 on x86.
+                if let Ok(as_num) = name_without_prefix.parse::<u8>() {
+                    assert!(
+                        as_num < self.units,
+                        "trying to get {}, but bank only has {} registers!",
+                        name,
+                        self.units
+                    );
+                    as_num as usize
+                } else {
+                    panic!("invalid register name {}", name);
+                }
+            }
+        };
+        self.first_unit + (unit as u8)
+    }
+}
+
+#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)]
+pub(crate) struct RegClassIndex(u32);
+entity_impl!(RegClassIndex);
+
+pub(crate) struct RegClass {
+    pub name: &'static str,
+    pub index: RegClassIndex,
+    pub width: u8,
+    pub bank: RegBankIndex,
+    pub toprc: RegClassIndex,
+    pub count: u8,
+    pub start: u8,
+    pub subclasses: Vec<RegClassIndex>,
+}
+
+impl RegClass {
+    pub fn new(
+        name: &'static str,
+        index: RegClassIndex,
+        width: u8,
+        bank: RegBankIndex,
+        toprc: RegClassIndex,
+        count: u8,
+        start: u8,
+    ) -> Self {
+        Self {
+            name,
+            index,
+            width,
+            bank,
+            toprc,
+            count,
+            start,
+            subclasses: Vec::new(),
+        }
+    }
+
+    /// Compute a bit-mask of subclasses, including self.
+    pub fn subclass_mask(&self) -> u64 {
+        let mut m = 1 << self.index.index();
+        for rc in self.subclasses.iter() {
+            m |= 1 << rc.index();
+        }
+        m
+    }
+
+    /// Compute a bit-mask of the register units allocated by this register class.
+    pub fn mask(&self, bank_first_unit: u8) -> Vec<u32> {
+        let mut u = (self.start + bank_first_unit) as usize;
+        let mut out_mask = vec![0, 0, 0];
+        for _ in 0..self.count {
+            out_mask[u / 32] |= 1 << (u % 32);
+            u += self.width as usize;
+        }
+        out_mask
+    }
+}
+
+pub(crate) enum RegClassProto {
+    TopLevel(RegBankIndex),
+    SubClass(RegClassIndex),
+}
+
+pub(crate) struct RegClassBuilder {
+    pub name: &'static str,
+    pub width: u8,
+    pub count: u8,
+    pub start: u8,
+    pub proto: RegClassProto,
+}
+
+impl RegClassBuilder {
+    pub fn new_toplevel(name: &'static str, bank: RegBankIndex) -> Self {
+        Self {
+            name,
+            width: 1,
+            count: 0,
+            start: 0,
+            proto: RegClassProto::TopLevel(bank),
+        }
+    }
+    pub fn subclass_of(
+        name: &'static str,
+        parent_index: RegClassIndex,
+        start: u8,
+        stop: u8,
+    ) -> Self {
+        assert!(stop >= start);
+        Self {
+            name,
+            width: 0,
+            count: stop - start,
+            start,
+            proto: RegClassProto::SubClass(parent_index),
+        }
+    }
+    pub fn count(mut self, count: u8) -> Self {
+        self.count = count;
+        self
+    }
+    pub fn width(mut self, width: u8) -> Self {
+        match self.proto {
+            RegClassProto::TopLevel(_) => self.width = width,
+            RegClassProto::SubClass(_) => panic!("Subclasses inherit their parent's width."),
+        }
+        self
+    }
+}
+
+pub(crate) struct RegBankBuilder {
+    pub name: &'static str,
+    pub units: u8,
+    pub names: Vec<&'static str>,
+    pub prefix: &'static str,
+    pub pressure_tracking: Option<bool>,
+    pub pinned_reg: Option<u16>,
+}
+
+impl RegBankBuilder {
+    pub fn new(name: &'static str, prefix: &'static str) -> Self {
+        Self {
+            name,
+            units: 0,
+            names: vec![],
+            prefix,
+            pressure_tracking: None,
+            pinned_reg: None,
+        }
+    }
+    pub fn units(mut self, units: u8) -> Self {
+        self.units = units;
+        self
+    }
+    pub fn names(mut self, names: Vec<&'static str>) -> Self {
+        self.names = names;
+        self
+    }
+    pub fn track_pressure(mut self, track: bool) -> Self {
+        self.pressure_tracking = Some(track);
+        self
+    }
+    pub fn pinned_reg(mut self, unit: u16) -> Self {
+        assert!(unit < u16::from(self.units));
+        self.pinned_reg = Some(unit);
+        self
+    }
+}
+
+pub(crate) struct IsaRegsBuilder {
+    pub banks: PrimaryMap<RegBankIndex, RegBank>,
+    pub classes: PrimaryMap<RegClassIndex, RegClass>,
+}
+
+impl IsaRegsBuilder {
+    pub fn new() -> Self {
+        Self {
+            banks: PrimaryMap::new(),
+            classes: PrimaryMap::new(),
+        }
+    }
+
+    pub fn add_bank(&mut self, builder: RegBankBuilder) -> RegBankIndex {
+        let first_unit = if self.banks.is_empty() {
+            0
+        } else {
+            let last = &self.banks.last().unwrap();
+            let first_available_unit = (last.first_unit + last.units) as i8;
+            let units = builder.units;
+            let align = if units.is_power_of_two() {
+                units
+            } else {
+                units.next_power_of_two()
+            } as i8;
+            (first_available_unit + align - 1) & -align
+        } as u8;
+
+        self.banks.push(RegBank::new(
+            builder.name,
+            first_unit,
+            builder.units,
+            builder.names,
+            builder.prefix,
+            builder
+                .pressure_tracking
+                .expect("Pressure tracking must be explicitly set"),
+            builder.pinned_reg,
+        ))
+    }
+
+    pub fn add_class(&mut self, builder: RegClassBuilder) -> RegClassIndex {
+        let class_index = self.classes.next_key();
+
+        // Finish delayed construction of RegClass.
+        let (bank, toprc, start, width) = match builder.proto {
+            RegClassProto::TopLevel(bank_index) => {
+                self.banks
+                    .get_mut(bank_index)
+                    .unwrap()
+                    .toprcs
+                    .push(class_index);
+                (bank_index, class_index, builder.start, builder.width)
+            }
+            RegClassProto::SubClass(parent_class_index) => {
+                assert!(builder.width == 0);
+                let (bank, toprc, start, width) = {
+                    let parent = self.classes.get(parent_class_index).unwrap();
+                    (parent.bank, parent.toprc, parent.start, parent.width)
+                };
+                for reg_class in self.classes.values_mut() {
+                    if reg_class.toprc == toprc {
+                        reg_class.subclasses.push(class_index);
+                    }
+                }
+                let subclass_start = start + builder.start * width;
+                (bank, toprc, subclass_start, width)
+            }
+        };
+
+        let reg_bank_units = self.banks.get(bank).unwrap().units;
+        assert!(start < reg_bank_units);
+
+        let count = if builder.count != 0 {
+            builder.count
+        } else {
+            reg_bank_units / width
+        };
+
+        let reg_class = RegClass::new(builder.name, class_index, width, bank, toprc, count, start);
+        self.classes.push(reg_class);
+
+        let reg_bank = self.banks.get_mut(bank).unwrap();
+        reg_bank.classes.push(class_index);
+
+        class_index
+    }
+
+    /// Checks that the set of register classes satisfies:
+    ///
+    /// 1. Closed under intersection: The intersection of any two register
+    ///    classes in the set is either empty or identical to a member of the
+    ///    set.
+    /// 2. There are no identical classes under different names.
+    /// 3. Classes are sorted topologically such that all subclasses have a
+    ///    higher index that the superclass.
+    pub fn build(self) -> IsaRegs {
+        for reg_bank in self.banks.values() {
+            for i1 in reg_bank.classes.iter() {
+                for i2 in reg_bank.classes.iter() {
+                    if i1 >= i2 {
+                        continue;
+                    }
+
+                    let rc1 = self.classes.get(*i1).unwrap();
+                    let rc2 = self.classes.get(*i2).unwrap();
+
+                    let rc1_mask = rc1.mask(0);
+                    let rc2_mask = rc2.mask(0);
+
+                    assert!(
+                        rc1.width != rc2.width || rc1_mask != rc2_mask,
+                        "no duplicates"
+                    );
+                    if rc1.width != rc2.width {
+                        continue;
+                    }
+
+                    let mut intersect = Vec::new();
+                    for (a, b) in rc1_mask.iter().zip(rc2_mask.iter()) {
+                        intersect.push(a & b);
+                    }
+                    if intersect == vec![0; intersect.len()] {
+                        continue;
+                    }
+
+                    // Classes must be topologically ordered, so the intersection can't be the
+                    // superclass.
+                    assert!(intersect != rc1_mask);
+
+                    // If the intersection is the second one, then it must be a subclass.
+                    if intersect == rc2_mask {
+                        assert!(self
+                            .classes
+                            .get(*i1)
+                            .unwrap()
+                            .subclasses
+                            .iter()
+                            .any(|x| *x == *i2));
+                    }
+                }
+            }
+        }
+
+        assert!(
+            self.classes.len() <= constants::MAX_NUM_REG_CLASSES,
+            "Too many register classes"
+        );
+
+        let num_toplevel = self
+            .classes
+            .values()
+            .filter(|x| x.toprc == x.index && self.banks.get(x.bank).unwrap().pressure_tracking)
+            .count();
+
+        assert!(
+            num_toplevel <= constants::MAX_TRACKED_TOP_RCS,
+            "Too many top-level register classes"
+        );
+
+        IsaRegs::new(self.banks, self.classes)
+    }
+}
+
+pub(crate) struct IsaRegs {
+    pub banks: PrimaryMap<RegBankIndex, RegBank>,
+    pub classes: PrimaryMap<RegClassIndex, RegClass>,
+}
+
+impl IsaRegs {
+    fn new(
+        banks: PrimaryMap<RegBankIndex, RegBank>,
+        classes: PrimaryMap<RegClassIndex, RegClass>,
+    ) -> Self {
+        Self { banks, classes }
+    }
+
+    pub fn class_by_name(&self, name: &str) -> RegClassIndex {
+        self.classes
+            .values()
+            .find(|&class| class.name == name)
+            .unwrap_or_else(|| panic!("register class {} not found", name))
+            .index
+    }
+
+    pub fn regunit_by_name(&self, class_index: RegClassIndex, name: &'static str) -> u8 {
+        let bank_index = self.classes.get(class_index).unwrap().bank;
+        self.banks.get(bank_index).unwrap().unit_by_name(name)
+    }
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/cdsl/settings.rs b/third_party/rust/cranelift-codegen-meta/src/cdsl/settings.rs
new file mode 100644
index 0000000000..217bad9955
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/cdsl/settings.rs
@@ -0,0 +1,407 @@
+use std::iter;
+
+#[derive(Clone, Copy, Hash, PartialEq, Eq)]
+pub(crate) struct BoolSettingIndex(usize);
+
+#[derive(Hash, PartialEq, Eq)]
+pub(crate) struct BoolSetting {
+    pub default: bool,
+    pub bit_offset: u8,
+    pub predicate_number: u8,
+}
+
+#[derive(Hash, PartialEq, Eq)]
+pub(crate) enum SpecificSetting {
+    Bool(BoolSetting),
+    Enum(Vec<&'static str>),
+    Num(u8),
+}
+
+#[derive(Hash, PartialEq, Eq)]
+pub(crate) struct Setting {
+    pub name: &'static str,
+    pub comment: &'static str,
+    pub specific: SpecificSetting,
+    pub byte_offset: u8,
+}
+
+impl Setting {
+    pub fn default_byte(&self) -> u8 {
+        match self.specific {
+            SpecificSetting::Bool(BoolSetting {
+                default,
+                bit_offset,
+                ..
+            }) => {
+                if default {
+                    1 << bit_offset
+                } else {
+                    0
+                }
+            }
+            SpecificSetting::Enum(_) => 0,
+            SpecificSetting::Num(default) => default,
+        }
+    }
+
+    fn byte_for_value(&self, v: bool) -> u8 {
+        match self.specific {
+            SpecificSetting::Bool(BoolSetting { bit_offset, .. }) => {
+                if v {
+                    1 << bit_offset
+                } else {
+                    0
+                }
+            }
+            _ => panic!("byte_for_value shouldn't be used for non-boolean settings."),
+        }
+    }
+
+    fn byte_mask(&self) -> u8 {
+        match self.specific {
+            SpecificSetting::Bool(BoolSetting { bit_offset, .. }) => 1 << bit_offset,
+            _ => panic!("byte_for_value shouldn't be used for non-boolean settings."),
+        }
+    }
+}
+
+#[derive(Hash, PartialEq, Eq)]
+pub(crate) struct PresetIndex(usize);
+
+#[derive(Hash, PartialEq, Eq)]
+pub(crate) enum PresetType {
+    BoolSetting(BoolSettingIndex),
+    OtherPreset(PresetIndex),
+}
+
+impl Into<PresetType> for BoolSettingIndex {
+    fn into(self) -> PresetType {
+        PresetType::BoolSetting(self)
+    }
+}
+impl Into<PresetType> for PresetIndex {
+    fn into(self) -> PresetType {
+        PresetType::OtherPreset(self)
+    }
+}
+
+#[derive(Hash, PartialEq, Eq)]
+pub(crate) struct Preset {
+    pub name: &'static str,
+    values: Vec<BoolSettingIndex>,
+}
+
+impl Preset {
+    pub fn layout(&self, group: &SettingGroup) -> Vec<(u8, u8)> {
+        let mut layout: Vec<(u8, u8)> = iter::repeat((0, 0))
+            .take(group.settings_size as usize)
+            .collect();
+        for bool_index in &self.values {
+            let setting = &group.settings[bool_index.0];
+            let mask = setting.byte_mask();
+            let val = setting.byte_for_value(true);
+            assert!((val & !mask) == 0);
+            let (ref mut l_mask, ref mut l_val) =
+                *layout.get_mut(setting.byte_offset as usize).unwrap();
+            *l_mask |= mask;
+            *l_val = (*l_val & !mask) | val;
+        }
+        layout
+    }
+}
+
+pub(crate) struct SettingGroup {
+    pub name: &'static str,
+    pub settings: Vec<Setting>,
+    pub bool_start_byte_offset: u8,
+    pub settings_size: u8,
+    pub presets: Vec<Preset>,
+    pub predicates: Vec<Predicate>,
+}
+
+impl SettingGroup {
+    fn num_bool_settings(&self) -> u8 {
+        self.settings
+            .iter()
+            .filter(|s| {
+                if let SpecificSetting::Bool(_) = s.specific {
+                    true
+                } else {
+                    false
+                }
+            })
+            .count() as u8
+    }
+
+    pub fn byte_size(&self) -> u8 {
+        let num_predicates = self.num_bool_settings() + (self.predicates.len() as u8);
+        self.bool_start_byte_offset + (num_predicates + 7) / 8
+    }
+
+    pub fn get_bool(&self, name: &'static str) -> (BoolSettingIndex, &Self) {
+        for (i, s) in self.settings.iter().enumerate() {
+            if let SpecificSetting::Bool(_) = s.specific {
+                if s.name == name {
+                    return (BoolSettingIndex(i), self);
+                }
+            }
+        }
+        panic!("Should have found bool setting by name.");
+    }
+
+    pub fn predicate_by_name(&self, name: &'static str) -> SettingPredicateNumber {
+        self.predicates
+            .iter()
+            .find(|pred| pred.name == name)
+            .unwrap_or_else(|| panic!("unknown predicate {}", name))
+            .number
+    }
+}
+
+/// This is the basic information needed to track the specific parts of a setting when building
+/// them.
+pub(crate) enum ProtoSpecificSetting {
+    Bool(bool),
+    Enum(Vec<&'static str>),
+    Num(u8),
+}
+
+/// This is the information provided during building for a setting.
+struct ProtoSetting {
+    name: &'static str,
+    comment: &'static str,
+    specific: ProtoSpecificSetting,
+}
+
+#[derive(Hash, PartialEq, Eq)]
+pub(crate) enum PredicateNode {
+    OwnedBool(BoolSettingIndex),
+    SharedBool(&'static str, &'static str),
+    Not(Box<PredicateNode>),
+    And(Box<PredicateNode>, Box<PredicateNode>),
+}
+
+impl Into<PredicateNode> for BoolSettingIndex {
+    fn into(self) -> PredicateNode {
+        PredicateNode::OwnedBool(self)
+    }
+}
+impl<'a> Into<PredicateNode> for (BoolSettingIndex, &'a SettingGroup) {
+    fn into(self) -> PredicateNode {
+        let (index, group) = (self.0, self.1);
+        let setting = &group.settings[index.0];
+        PredicateNode::SharedBool(group.name, setting.name)
+    }
+}
+
+impl PredicateNode {
+    fn render(&self, group: &SettingGroup) -> String {
+        match *self {
+            PredicateNode::OwnedBool(bool_setting_index) => format!(
+                "{}.{}()",
+                group.name, group.settings[bool_setting_index.0].name
+            ),
+            PredicateNode::SharedBool(ref group_name, ref bool_name) => {
+                format!("{}.{}()", group_name, bool_name)
+            }
+            PredicateNode::And(ref lhs, ref rhs) => {
+                format!("{} && {}", lhs.render(group), rhs.render(group))
+            }
+            PredicateNode::Not(ref node) => format!("!({})", node.render(group)),
+        }
+    }
+}
+
+struct ProtoPredicate {
+    pub name: &'static str,
+    node: PredicateNode,
+}
+
+pub(crate) type SettingPredicateNumber = u8;
+
+pub(crate) struct Predicate {
+    pub name: &'static str,
+    node: PredicateNode,
+    pub number: SettingPredicateNumber,
+}
+
+impl Predicate {
+    pub fn render(&self, group: &SettingGroup) -> String {
+        self.node.render(group)
+    }
+}
+
+pub(crate) struct SettingGroupBuilder {
+    name: &'static str,
+    settings: Vec<ProtoSetting>,
+    presets: Vec<Preset>,
+    predicates: Vec<ProtoPredicate>,
+}
+
+impl SettingGroupBuilder {
+    pub fn new(name: &'static str) -> Self {
+        Self {
+            name,
+            settings: Vec::new(),
+            presets: Vec::new(),
+            predicates: Vec::new(),
+        }
+    }
+
+    fn add_setting(
+        &mut self,
+        name: &'static str,
+        comment: &'static str,
+        specific: ProtoSpecificSetting,
+    ) {
+        self.settings.push(ProtoSetting {
+            name,
+            comment,
+            specific,
+        })
+    }
+
+    pub fn add_bool(
+        &mut self,
+        name: &'static str,
+        comment: &'static str,
+        default: bool,
+    ) -> BoolSettingIndex {
+        assert!(
+            self.predicates.is_empty(),
+            "predicates must be added after the boolean settings"
+        );
+        self.add_setting(name, comment, ProtoSpecificSetting::Bool(default));
+        BoolSettingIndex(self.settings.len() - 1)
+    }
+
+    pub fn add_enum(
+        &mut self,
+        name: &'static str,
+        comment: &'static str,
+        values: Vec<&'static str>,
+    ) {
+        self.add_setting(name, comment, ProtoSpecificSetting::Enum(values));
+    }
+
+    pub fn add_num(&mut self, name: &'static str, comment: &'static str, default: u8) {
+        self.add_setting(name, comment, ProtoSpecificSetting::Num(default));
+    }
+
+    pub fn add_predicate(&mut self, name: &'static str, node: PredicateNode) {
+        self.predicates.push(ProtoPredicate { name, node });
+    }
+
+    pub fn add_preset(&mut self, name: &'static str, args: Vec<PresetType>) -> PresetIndex {
+        let mut values = Vec::new();
+        for arg in args {
+            match arg {
+                PresetType::OtherPreset(index) => {
+                    values.extend(self.presets[index.0].values.iter());
+                }
+                PresetType::BoolSetting(index) => values.push(index),
+            }
+        }
+        self.presets.push(Preset { name, values });
+        PresetIndex(self.presets.len() - 1)
+    }
+
+    /// Compute the layout of the byte vector used to represent this settings
+    /// group.
+    ///
+    /// The byte vector contains the following entries in order:
+    ///
+    /// 1. Byte-sized settings like `NumSetting` and `EnumSetting`.
+    /// 2. `BoolSetting` settings.
+    /// 3. Precomputed named predicates.
+    /// 4. Other numbered predicates, including parent predicates that need to be accessible by
+    ///    number.
+    ///
+    /// Set `self.settings_size` to the length of the byte vector prefix that
+    /// contains the settings. All bytes after that are computed, not
+    /// configured.
+    ///
+    /// Set `self.boolean_offset` to the beginning of the numbered predicates,
+    /// 2. in the list above.
+    ///
+    /// Assign `byte_offset` and `bit_offset` fields in all settings.
+    pub fn build(self) -> SettingGroup {
+        let mut group = SettingGroup {
+            name: self.name,
+            settings: Vec::new(),
+            bool_start_byte_offset: 0,
+            settings_size: 0,
+            presets: Vec::new(),
+            predicates: Vec::new(),
+        };
+
+        let mut byte_offset = 0;
+
+        // Assign the non-boolean settings first.
+        for s in &self.settings {
+            let specific = match s.specific {
+                ProtoSpecificSetting::Bool(..) => continue,
+                ProtoSpecificSetting::Enum(ref values) => SpecificSetting::Enum(values.clone()),
+                ProtoSpecificSetting::Num(default) => SpecificSetting::Num(default),
+            };
+
+            group.settings.push(Setting {
+                name: s.name,
+                comment: s.comment,
+                byte_offset,
+                specific,
+            });
+
+            byte_offset += 1;
+        }
+
+        group.bool_start_byte_offset = byte_offset;
+
+        let mut predicate_number = 0;
+
+        // Then the boolean settings.
+        for s in &self.settings {
+            let default = match s.specific {
+                ProtoSpecificSetting::Bool(default) => default,
+                ProtoSpecificSetting::Enum(_) | ProtoSpecificSetting::Num(_) => continue,
+            };
+            group.settings.push(Setting {
+                name: s.name,
+                comment: s.comment,
+                byte_offset: byte_offset + predicate_number / 8,
+                specific: SpecificSetting::Bool(BoolSetting {
+                    default,
+                    bit_offset: predicate_number % 8,
+                    predicate_number,
+                }),
+            });
+            predicate_number += 1;
+        }
+
+        assert!(
+            group.predicates.is_empty(),
+            "settings_size is the byte size before adding predicates"
+        );
+        group.settings_size = group.byte_size();
+
+        // Sort predicates by name to ensure the same order as the Python code.
+        let mut predicates = self.predicates;
+        predicates.sort_by_key(|predicate| predicate.name);
+
+        group
+            .predicates
+            .extend(predicates.into_iter().map(|predicate| {
+                let number = predicate_number;
+                predicate_number += 1;
+                Predicate {
+                    name: predicate.name,
+                    node: predicate.node,
+                    number,
+                }
+            }));
+
+        group.presets.extend(self.presets);
+
+        group
+    }
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/cdsl/type_inference.rs b/third_party/rust/cranelift-codegen-meta/src/cdsl/type_inference.rs
new file mode 100644
index 0000000000..25a07a9b84
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/cdsl/type_inference.rs
@@ -0,0 +1,660 @@
+use crate::cdsl::ast::{Def, DefIndex, DefPool, Var, VarIndex, VarPool};
+use crate::cdsl::typevar::{DerivedFunc, TypeSet, TypeVar};
+
+use std::collections::{HashMap, HashSet};
+use std::iter::FromIterator;
+
+#[derive(Debug, Hash, PartialEq, Eq)]
+pub(crate) enum Constraint {
+    /// Constraint specifying that a type var tv1 must be wider than or equal to type var tv2 at
+    /// runtime. This requires that:
+    /// 1) They have the same number of lanes
+    /// 2) In a lane tv1 has at least as many bits as tv2.
+    WiderOrEq(TypeVar, TypeVar),
+
+    /// Constraint specifying that two derived type vars must have the same runtime type.
+    Eq(TypeVar, TypeVar),
+
+    /// Constraint specifying that a type var must belong to some typeset.
+    InTypeset(TypeVar, TypeSet),
+}
+
+impl Constraint {
+    fn translate_with<F: Fn(&TypeVar) -> TypeVar>(&self, func: F) -> Constraint {
+        match self {
+            Constraint::WiderOrEq(lhs, rhs) => {
+                let lhs = func(&lhs);
+                let rhs = func(&rhs);
+                Constraint::WiderOrEq(lhs, rhs)
+            }
+            Constraint::Eq(lhs, rhs) => {
+                let lhs = func(&lhs);
+                let rhs = func(&rhs);
+                Constraint::Eq(lhs, rhs)
+            }
+            Constraint::InTypeset(tv, ts) => {
+                let tv = func(&tv);
+                Constraint::InTypeset(tv, ts.clone())
+            }
+        }
+    }
+
+    /// Creates a new constraint by replacing type vars by their hashmap equivalent.
+    fn translate_with_map(
+        &self,
+        original_to_own_typevar: &HashMap<&TypeVar, TypeVar>,
+    ) -> Constraint {
+        self.translate_with(|tv| substitute(original_to_own_typevar, tv))
+    }
+
+    /// Creates a new constraint by replacing type vars by their canonical equivalent.
+    fn translate_with_env(&self, type_env: &TypeEnvironment) -> Constraint {
+        self.translate_with(|tv| type_env.get_equivalent(tv))
+    }
+
+    fn is_trivial(&self) -> bool {
+        match self {
+            Constraint::WiderOrEq(lhs, rhs) => {
+                // Trivially true.
+                if lhs == rhs {
+                    return true;
+                }
+
+                let ts1 = lhs.get_typeset();
+                let ts2 = rhs.get_typeset();
+
+                // Trivially true.
+                if ts1.is_wider_or_equal(&ts2) {
+                    return true;
+                }
+
+                // Trivially false.
+                if ts1.is_narrower(&ts2) {
+                    return true;
+                }
+
+                // Trivially false.
+                if (&ts1.lanes & &ts2.lanes).is_empty() {
+                    return true;
+                }
+
+                self.is_concrete()
+            }
+            Constraint::Eq(lhs, rhs) => lhs == rhs || self.is_concrete(),
+            Constraint::InTypeset(_, _) => {
+                // The way InTypeset are made, they would always be trivial if we were applying the
+                // same logic as the Python code did, so ignore this.
+                self.is_concrete()
+            }
+        }
+    }
+
+    /// Returns true iff all the referenced type vars are singletons.
+    fn is_concrete(&self) -> bool {
+        match self {
+            Constraint::WiderOrEq(lhs, rhs) => {
+                lhs.singleton_type().is_some() && rhs.singleton_type().is_some()
+            }
+            Constraint::Eq(lhs, rhs) => {
+                lhs.singleton_type().is_some() && rhs.singleton_type().is_some()
+            }
+            Constraint::InTypeset(tv, _) => tv.singleton_type().is_some(),
+        }
+    }
+
+    fn typevar_args(&self) -> Vec<&TypeVar> {
+        match self {
+            Constraint::WiderOrEq(lhs, rhs) => vec![lhs, rhs],
+            Constraint::Eq(lhs, rhs) => vec![lhs, rhs],
+            Constraint::InTypeset(tv, _) => vec![tv],
+        }
+    }
+}
+
+#[derive(Clone, Copy)]
+enum TypeEnvRank {
+    Singleton = 5,
+    Input = 4,
+    Intermediate = 3,
+    Output = 2,
+    Temp = 1,
+    Internal = 0,
+}
+
+/// Class encapsulating the necessary bookkeeping for type inference.
+pub(crate) struct TypeEnvironment {
+    vars: HashSet<VarIndex>,
+    ranks: HashMap<TypeVar, TypeEnvRank>,
+    equivalency_map: HashMap<TypeVar, TypeVar>,
+    pub constraints: Vec<Constraint>,
+}
+
+impl TypeEnvironment {
+    fn new() -> Self {
+        TypeEnvironment {
+            vars: HashSet::new(),
+            ranks: HashMap::new(),
+            equivalency_map: HashMap::new(),
+            constraints: Vec::new(),
+        }
+    }
+
+    fn register(&mut self, var_index: VarIndex, var: &mut Var) {
+        self.vars.insert(var_index);
+        let rank = if var.is_input() {
+            TypeEnvRank::Input
+        } else if var.is_intermediate() {
+            TypeEnvRank::Intermediate
+        } else if var.is_output() {
+            TypeEnvRank::Output
+        } else {
+            assert!(var.is_temp());
+            TypeEnvRank::Temp
+        };
+        self.ranks.insert(var.get_or_create_typevar(), rank);
+    }
+
+    fn add_constraint(&mut self, constraint: Constraint) {
+        if self.constraints.iter().any(|item| *item == constraint) {
+            return;
+        }
+
+        // Check extra conditions for InTypeset constraints.
+        if let Constraint::InTypeset(tv, _) = &constraint {
+            assert!(
+                tv.base.is_none(),
+                "type variable is {:?}, while expecting none",
+                tv
+            );
+            assert!(
+                tv.name.starts_with("typeof_"),
+                "Name \"{}\" should start with \"typeof_\"",
+                tv.name
+            );
+        }
+
+        self.constraints.push(constraint);
+    }
+
+    /// Returns the canonical representative of the equivalency class of the given argument, or
+    /// duplicates it if it's not there yet.
+    pub fn get_equivalent(&self, tv: &TypeVar) -> TypeVar {
+        let mut tv = tv;
+        while let Some(found) = self.equivalency_map.get(tv) {
+            tv = found;
+        }
+        match &tv.base {
+            Some(parent) => self
+                .get_equivalent(&parent.type_var)
+                .derived(parent.derived_func),
+            None => tv.clone(),
+        }
+    }
+
+    /// Get the rank of tv in the partial order:
+    /// - TVs directly associated with a Var get their rank from the Var (see register()).
+    /// - Internally generated non-derived TVs implicitly get the lowest rank (0).
+    /// - Derived variables get their rank from their free typevar.
+    /// - Singletons have the highest rank.
+    /// - TVs associated with vars in a source pattern have a higher rank than TVs associated with
+    /// temporary vars.
+    fn rank(&self, tv: &TypeVar) -> u8 {
+        let actual_tv = match tv.base {
+            Some(_) => tv.free_typevar(),
+            None => Some(tv.clone()),
+        };
+
+        let rank = match actual_tv {
+            Some(actual_tv) => match self.ranks.get(&actual_tv) {
+                Some(rank) => Some(*rank),
+                None => {
+                    assert!(
+                        !actual_tv.name.starts_with("typeof_"),
+                        format!("variable {} should be explicitly ranked", actual_tv.name)
+                    );
+                    None
+                }
+            },
+            None => None,
+        };
+
+        let rank = match rank {
+            Some(rank) => rank,
+            None => {
+                if tv.singleton_type().is_some() {
+                    TypeEnvRank::Singleton
+                } else {
+                    TypeEnvRank::Internal
+                }
+            }
+        };
+
+        rank as u8
+    }
+
+    /// Record the fact that the free tv1 is part of the same equivalence class as tv2. The
+    /// canonical representative of the merged class is tv2's canonical representative.
+    fn record_equivalent(&mut self, tv1: TypeVar, tv2: TypeVar) {
+        assert!(tv1.base.is_none());
+        assert!(self.get_equivalent(&tv1) == tv1);
+        if let Some(tv2_base) = &tv2.base {
+            // Ensure there are no cycles.
+            assert!(self.get_equivalent(&tv2_base.type_var) != tv1);
+        }
+        self.equivalency_map.insert(tv1, tv2);
+    }
+
+    /// Get the free typevars in the current type environment.
+    pub fn free_typevars(&self, var_pool: &mut VarPool) -> Vec<TypeVar> {
+        let mut typevars = Vec::new();
+        typevars.extend(self.equivalency_map.keys().cloned());
+        typevars.extend(
+            self.vars
+                .iter()
+                .map(|&var_index| var_pool.get_mut(var_index).get_or_create_typevar()),
+        );
+
+        let set: HashSet<TypeVar> = HashSet::from_iter(
+            typevars
+                .iter()
+                .map(|tv| self.get_equivalent(tv).free_typevar())
+                .filter(|opt_tv| {
+                    // Filter out singleton types.
+                    opt_tv.is_some()
+                })
+                .map(|tv| tv.unwrap()),
+        );
+        Vec::from_iter(set)
+    }
+
+    /// Normalize by collapsing any roots that don't correspond to a concrete type var AND have a
+    /// single type var derived from them or equivalent to them.
+    ///
+    /// e.g. if we have a root of the tree that looks like:
+    ///
+    ///   typeof_a   typeof_b
+    ///          \\  /
+    ///       typeof_x
+    ///           |
+    ///         half_width(1)
+    ///           |
+    ///           1
+    ///
+    /// we want to collapse the linear path between 1 and typeof_x. The resulting graph is:
+    ///
+    ///   typeof_a   typeof_b
+    ///          \\  /
+    ///       typeof_x
+    fn normalize(&mut self, var_pool: &mut VarPool) {
+        let source_tvs: HashSet<TypeVar> = HashSet::from_iter(
+            self.vars
+                .iter()
+                .map(|&var_index| var_pool.get_mut(var_index).get_or_create_typevar()),
+        );
+
+        let mut children: HashMap<TypeVar, HashSet<TypeVar>> = HashMap::new();
+
+        // Insert all the parents found by the derivation relationship.
+        for type_var in self.equivalency_map.values() {
+            if type_var.base.is_none() {
+                continue;
+            }
+
+            let parent_tv = type_var.free_typevar();
+            if parent_tv.is_none() {
+                // Ignore this type variable, it's a singleton.
+                continue;
+            }
+            let parent_tv = parent_tv.unwrap();
+
+            children
+                .entry(parent_tv)
+                .or_insert_with(HashSet::new)
+                .insert(type_var.clone());
+        }
+
+        // Insert all the explicit equivalency links.
+        for (equivalent_tv, canon_tv) in self.equivalency_map.iter() {
+            children
+                .entry(canon_tv.clone())
+                .or_insert_with(HashSet::new)
+                .insert(equivalent_tv.clone());
+        }
+
+        // Remove links that are straight paths up to typevar of variables.
+        for free_root in self.free_typevars(var_pool) {
+            let mut root = &free_root;
+            while !source_tvs.contains(&root)
+                && children.contains_key(&root)
+                && children.get(&root).unwrap().len() == 1
+            {
+                let child = children.get(&root).unwrap().iter().next().unwrap();
+                assert_eq!(self.equivalency_map[child], root.clone());
+                self.equivalency_map.remove(child);
+                root = child;
+            }
+        }
+    }
+
+    /// Extract a clean type environment from self, that only mentions type vars associated with
+    /// real variables.
+    fn extract(self, var_pool: &mut VarPool) -> TypeEnvironment {
+        let vars_tv: HashSet<TypeVar> = HashSet::from_iter(
+            self.vars
+                .iter()
+                .map(|&var_index| var_pool.get_mut(var_index).get_or_create_typevar()),
+        );
+
+        let mut new_equivalency_map: HashMap<TypeVar, TypeVar> = HashMap::new();
+        for tv in &vars_tv {
+            let canon_tv = self.get_equivalent(tv);
+            if *tv != canon_tv {
+                new_equivalency_map.insert(tv.clone(), canon_tv.clone());
+            }
+
+            // Sanity check: the translated type map should only refer to real variables.
+            assert!(vars_tv.contains(tv));
+            let canon_free_tv = canon_tv.free_typevar();
+            assert!(canon_free_tv.is_none() || vars_tv.contains(&canon_free_tv.unwrap()));
+        }
+
+        let mut new_constraints: HashSet<Constraint> = HashSet::new();
+        for constraint in &self.constraints {
+            let constraint = constraint.translate_with_env(&self);
+            if constraint.is_trivial() || new_constraints.contains(&constraint) {
+                continue;
+            }
+
+            // Sanity check: translated constraints should refer only to real variables.
+            for arg in constraint.typevar_args() {
+                let arg_free_tv = arg.free_typevar();
+                assert!(arg_free_tv.is_none() || vars_tv.contains(&arg_free_tv.unwrap()));
+            }
+
+            new_constraints.insert(constraint);
+        }
+
+        TypeEnvironment {
+            vars: self.vars,
+            ranks: self.ranks,
+            equivalency_map: new_equivalency_map,
+            constraints: Vec::from_iter(new_constraints),
+        }
+    }
+}
+
+/// Replaces an external type variable according to the following rules:
+/// - if a local copy is present in the map, return it.
+/// - or if it's derived, create a local derived one that recursively substitutes the parent.
+/// - or return itself.
+fn substitute(map: &HashMap<&TypeVar, TypeVar>, external_type_var: &TypeVar) -> TypeVar {
+    match map.get(&external_type_var) {
+        Some(own_type_var) => own_type_var.clone(),
+        None => match &external_type_var.base {
+            Some(parent) => {
+                let parent_substitute = substitute(map, &parent.type_var);
+                TypeVar::derived(&parent_substitute, parent.derived_func)
+            }
+            None => external_type_var.clone(),
+        },
+    }
+}
+
+/// Normalize a (potentially derived) typevar using the following rules:
+///
+/// - vector and width derived functions commute
+///     {HALF,DOUBLE}VECTOR({HALF,DOUBLE}WIDTH(base)) ->
+///     {HALF,DOUBLE}WIDTH({HALF,DOUBLE}VECTOR(base))
+///
+/// - half/double pairs collapse
+///     {HALF,DOUBLE}WIDTH({DOUBLE,HALF}WIDTH(base)) -> base
+///     {HALF,DOUBLE}VECTOR({DOUBLE,HALF}VECTOR(base)) -> base
+fn canonicalize_derivations(tv: TypeVar) -> TypeVar {
+    let base = match &tv.base {
+        Some(base) => base,
+        None => return tv,
+    };
+
+    let derived_func = base.derived_func;
+
+    if let Some(base_base) = &base.type_var.base {
+        let base_base_tv = &base_base.type_var;
+        match (derived_func, base_base.derived_func) {
+            (DerivedFunc::HalfWidth, DerivedFunc::DoubleWidth)
+            | (DerivedFunc::DoubleWidth, DerivedFunc::HalfWidth)
+            | (DerivedFunc::HalfVector, DerivedFunc::DoubleVector)
+            | (DerivedFunc::DoubleVector, DerivedFunc::HalfVector) => {
+                // Cancelling bijective transformations. This doesn't hide any overflow issues
+                // since derived type sets are checked upon derivaion, and base typesets are only
+                // allowed to shrink.
+                return canonicalize_derivations(base_base_tv.clone());
+            }
+            (DerivedFunc::HalfWidth, DerivedFunc::HalfVector)
+            | (DerivedFunc::HalfWidth, DerivedFunc::DoubleVector)
+            | (DerivedFunc::DoubleWidth, DerivedFunc::DoubleVector)
+            | (DerivedFunc::DoubleWidth, DerivedFunc::HalfVector) => {
+                // Arbitrarily put WIDTH derivations before VECTOR derivations, since they commute.
+                return canonicalize_derivations(
+                    base_base_tv
+                        .derived(derived_func)
+                        .derived(base_base.derived_func),
+                );
+            }
+            _ => {}
+        };
+    }
+
+    canonicalize_derivations(base.type_var.clone()).derived(derived_func)
+}
+
+/// Given typevars tv1 and tv2 (which could be derived from one another), constrain their typesets
+/// to be the same. When one is derived from the other, repeat the constrain process until
+/// a fixed point is reached.
+fn constrain_fixpoint(tv1: &TypeVar, tv2: &TypeVar) {
+    loop {
+        let old_tv1_ts = tv1.get_typeset().clone();
+        tv2.constrain_types(tv1.clone());
+        if tv1.get_typeset() == old_tv1_ts {
+            break;
+        }
+    }
+
+    let old_tv2_ts = tv2.get_typeset();
+    tv1.constrain_types(tv2.clone());
+    // The above loop should ensure that all reference cycles have been handled.
+    assert!(old_tv2_ts == tv2.get_typeset());
+}
+
+/// Unify tv1 and tv2 in the given type environment. tv1 must have a rank greater or equal to tv2's
+/// one, modulo commutations.
+fn unify(tv1: &TypeVar, tv2: &TypeVar, type_env: &mut TypeEnvironment) -> Result<(), String> {
+    let tv1 = canonicalize_derivations(type_env.get_equivalent(tv1));
+    let tv2 = canonicalize_derivations(type_env.get_equivalent(tv2));
+
+    if tv1 == tv2 {
+        // Already unified.
+        return Ok(());
+    }
+
+    if type_env.rank(&tv2) < type_env.rank(&tv1) {
+        // Make sure tv1 always has the smallest rank, since real variables have the higher rank
+        // and we want them to be the canonical representatives of their equivalency classes.
+        return unify(&tv2, &tv1, type_env);
+    }
+
+    constrain_fixpoint(&tv1, &tv2);
+
+    if tv1.get_typeset().size() == 0 || tv2.get_typeset().size() == 0 {
+        return Err(format!(
+            "Error: empty type created when unifying {} and {}",
+            tv1.name, tv2.name
+        ));
+    }
+
+    let base = match &tv1.base {
+        Some(base) => base,
+        None => {
+            type_env.record_equivalent(tv1, tv2);
+            return Ok(());
+        }
+    };
+
+    if let Some(inverse) = base.derived_func.inverse() {
+        return unify(&base.type_var, &tv2.derived(inverse), type_env);
+    }
+
+    type_env.add_constraint(Constraint::Eq(tv1, tv2));
+    Ok(())
+}
+
+/// Perform type inference on one Def in the current type environment and return an updated type
+/// environment or error.
+///
+/// At a high level this works by creating fresh copies of each formal type var in the Def's
+/// instruction's signature, and unifying the formal typevar with the corresponding actual typevar.
+fn infer_definition(
+    def: &Def,
+    var_pool: &mut VarPool,
+    type_env: TypeEnvironment,
+    last_type_index: &mut usize,
+) -> Result<TypeEnvironment, String> {
+    let apply = &def.apply;
+    let inst = &apply.inst;
+
+    let mut type_env = type_env;
+    let free_formal_tvs = inst.all_typevars();
+
+    let mut original_to_own_typevar: HashMap<&TypeVar, TypeVar> = HashMap::new();
+    for &tv in &free_formal_tvs {
+        assert!(original_to_own_typevar
+            .insert(
+                tv,
+                TypeVar::copy_from(tv, format!("own_{}", last_type_index))
+            )
+            .is_none());
+        *last_type_index += 1;
+    }
+
+    // Update the mapping with any explicity bound type vars:
+    for (i, value_type) in apply.value_types.iter().enumerate() {
+        let singleton = TypeVar::new_singleton(value_type.clone());
+        assert!(original_to_own_typevar
+            .insert(free_formal_tvs[i], singleton)
+            .is_some());
+    }
+
+    // Get fresh copies for each typevar in the signature (both free and derived).
+    let mut formal_tvs = Vec::new();
+    formal_tvs.extend(inst.value_results.iter().map(|&i| {
+        substitute(
+            &original_to_own_typevar,
+            inst.operands_out[i].type_var().unwrap(),
+        )
+    }));
+    formal_tvs.extend(inst.value_opnums.iter().map(|&i| {
+        substitute(
+            &original_to_own_typevar,
+            inst.operands_in[i].type_var().unwrap(),
+        )
+    }));
+
+    // Get the list of actual vars.
+    let mut actual_vars = Vec::new();
+    actual_vars.extend(inst.value_results.iter().map(|&i| def.defined_vars[i]));
+    actual_vars.extend(
+        inst.value_opnums
+            .iter()
+            .map(|&i| apply.args[i].unwrap_var()),
+    );
+
+    // Get the list of the actual TypeVars.
+    let mut actual_tvs = Vec::new();
+    for var_index in actual_vars {
+        let var = var_pool.get_mut(var_index);
+        type_env.register(var_index, var);
+        actual_tvs.push(var.get_or_create_typevar());
+    }
+
+    // Make sure we start unifying with the control type variable first, by putting it at the
+    // front of both vectors.
+    if let Some(poly) = &inst.polymorphic_info {
+        let own_ctrl_tv = &original_to_own_typevar[&poly.ctrl_typevar];
+        let ctrl_index = formal_tvs.iter().position(|tv| tv == own_ctrl_tv).unwrap();
+        if ctrl_index != 0 {
+            formal_tvs.swap(0, ctrl_index);
+            actual_tvs.swap(0, ctrl_index);
+        }
+    }
+
+    // Unify each actual type variable with the corresponding formal type variable.
+    for (actual_tv, formal_tv) in actual_tvs.iter().zip(&formal_tvs) {
+        if let Err(msg) = unify(actual_tv, formal_tv, &mut type_env) {
+            return Err(format!(
+                "fail ti on {} <: {}: {}",
+                actual_tv.name, formal_tv.name, msg
+            ));
+        }
+    }
+
+    // Add any instruction specific constraints.
+    for constraint in &inst.constraints {
+        type_env.add_constraint(constraint.translate_with_map(&original_to_own_typevar));
+    }
+
+    Ok(type_env)
+}
+
+/// Perform type inference on an transformation. Return an updated type environment or error.
+pub(crate) fn infer_transform(
+    src: DefIndex,
+    dst: &[DefIndex],
+    def_pool: &DefPool,
+    var_pool: &mut VarPool,
+) -> Result<TypeEnvironment, String> {
+    let mut type_env = TypeEnvironment::new();
+    let mut last_type_index = 0;
+
+    // Execute type inference on the source pattern.
+    type_env = infer_definition(def_pool.get(src), var_pool, type_env, &mut last_type_index)
+        .map_err(|err| format!("In src pattern: {}", err))?;
+
+    // Collect the type sets once after applying the source patterm; we'll compare the typesets
+    // after we've also considered the destination pattern, and will emit supplementary InTypeset
+    // checks if they don't match.
+    let src_typesets = type_env
+        .vars
+        .iter()
+        .map(|&var_index| {
+            let var = var_pool.get_mut(var_index);
+            let tv = type_env.get_equivalent(&var.get_or_create_typevar());
+            (var_index, tv.get_typeset())
+        })
+        .collect::<Vec<_>>();
+
+    // Execute type inference on the destination pattern.
+    for (i, &def_index) in dst.iter().enumerate() {
+        let def = def_pool.get(def_index);
+        type_env = infer_definition(def, var_pool, type_env, &mut last_type_index)
+            .map_err(|err| format!("line {}: {}", i, err))?;
+    }
+
+    for (var_index, src_typeset) in src_typesets {
+        let var = var_pool.get(var_index);
+        if !var.has_free_typevar() {
+            continue;
+        }
+        let tv = type_env.get_equivalent(&var.get_typevar().unwrap());
+        let new_typeset = tv.get_typeset();
+        assert!(
+            new_typeset.is_subset(&src_typeset),
+            "type sets can only get narrower"
+        );
+        if new_typeset != src_typeset {
+            type_env.add_constraint(Constraint::InTypeset(tv.clone(), new_typeset.clone()));
+        }
+    }
+
+    type_env.normalize(var_pool);
+
+    Ok(type_env.extract(var_pool))
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/cdsl/types.rs b/third_party/rust/cranelift-codegen-meta/src/cdsl/types.rs
new file mode 100644
index 0000000000..7e03c873db
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/cdsl/types.rs
@@ -0,0 +1,587 @@
+//! Cranelift ValueType hierarchy
+
+use std::fmt;
+
+use crate::shared::types as shared_types;
+use cranelift_codegen_shared::constants;
+
+// Rust name prefix used for the `rust_name` method.
+static _RUST_NAME_PREFIX: &str = "ir::types::";
+
+// ValueType variants (i8, i32, ...) are provided in `shared::types.rs`.
+
+/// A concrete SSA value type.
+///
+/// All SSA values have a type that is described by an instance of `ValueType`
+/// or one of its subclasses.
+#[derive(Clone, Debug, PartialEq, Eq, Hash)]
+pub(crate) enum ValueType {
+    Lane(LaneType),
+    Reference(ReferenceType),
+    Special(SpecialType),
+    Vector(VectorType),
+}
+
+impl ValueType {
+    /// Iterate through all of the lane types.
+    pub fn all_lane_types() -> LaneTypeIterator {
+        LaneTypeIterator::new()
+    }
+
+    /// Iterate through all of the special types (neither lanes nor vectors).
+    pub fn all_special_types() -> SpecialTypeIterator {
+        SpecialTypeIterator::new()
+    }
+
+    pub fn all_reference_types() -> ReferenceTypeIterator {
+        ReferenceTypeIterator::new()
+    }
+
+    /// Return a string containing the documentation comment for this type.
+    pub fn doc(&self) -> String {
+        match *self {
+            ValueType::Lane(l) => l.doc(),
+            ValueType::Reference(r) => r.doc(),
+            ValueType::Special(s) => s.doc(),
+            ValueType::Vector(ref v) => v.doc(),
+        }
+    }
+
+    /// Return the number of bits in a lane.
+    pub fn lane_bits(&self) -> u64 {
+        match *self {
+            ValueType::Lane(l) => l.lane_bits(),
+            ValueType::Reference(r) => r.lane_bits(),
+            ValueType::Special(s) => s.lane_bits(),
+            ValueType::Vector(ref v) => v.lane_bits(),
+        }
+    }
+
+    /// Return the number of lanes.
+    pub fn lane_count(&self) -> u64 {
+        match *self {
+            ValueType::Vector(ref v) => v.lane_count(),
+            _ => 1,
+        }
+    }
+
+    /// Find the number of bytes that this type occupies in memory.
+    pub fn membytes(&self) -> u64 {
+        self.width() / 8
+    }
+
+    /// Find the unique number associated with this type.
+    pub fn number(&self) -> Option<u8> {
+        match *self {
+            ValueType::Lane(l) => Some(l.number()),
+            ValueType::Reference(r) => Some(r.number()),
+            ValueType::Special(s) => Some(s.number()),
+            ValueType::Vector(ref v) => Some(v.number()),
+        }
+    }
+
+    /// Return the name of this type for generated Rust source files.
+    pub fn rust_name(&self) -> String {
+        format!("{}{}", _RUST_NAME_PREFIX, self.to_string().to_uppercase())
+    }
+
+    /// Return true iff:
+    ///     1. self and other have equal number of lanes
+    ///     2. each lane in self has at least as many bits as a lane in other
+    pub fn _wider_or_equal(&self, rhs: &ValueType) -> bool {
+        (self.lane_count() == rhs.lane_count()) && (self.lane_bits() >= rhs.lane_bits())
+    }
+
+    /// Return the total number of bits of an instance of this type.
+    pub fn width(&self) -> u64 {
+        self.lane_count() * self.lane_bits()
+    }
+}
+
+impl fmt::Display for ValueType {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match *self {
+            ValueType::Lane(l) => l.fmt(f),
+            ValueType::Reference(r) => r.fmt(f),
+            ValueType::Special(s) => s.fmt(f),
+            ValueType::Vector(ref v) => v.fmt(f),
+        }
+    }
+}
+
+/// Create a ValueType from a given lane type.
+impl From<LaneType> for ValueType {
+    fn from(lane: LaneType) -> Self {
+        ValueType::Lane(lane)
+    }
+}
+
+/// Create a ValueType from a given reference type.
+impl From<ReferenceType> for ValueType {
+    fn from(reference: ReferenceType) -> Self {
+        ValueType::Reference(reference)
+    }
+}
+
+/// Create a ValueType from a given special type.
+impl From<SpecialType> for ValueType {
+    fn from(spec: SpecialType) -> Self {
+        ValueType::Special(spec)
+    }
+}
+
+/// Create a ValueType from a given vector type.
+impl From<VectorType> for ValueType {
+    fn from(vector: VectorType) -> Self {
+        ValueType::Vector(vector)
+    }
+}
+
+/// A concrete scalar type that can appear as a vector lane too.
+#[derive(Clone, Copy, PartialEq, Eq, Hash)]
+pub(crate) enum LaneType {
+    Bool(shared_types::Bool),
+    Float(shared_types::Float),
+    Int(shared_types::Int),
+}
+
+impl LaneType {
+    /// Return a string containing the documentation comment for this lane type.
+    pub fn doc(self) -> String {
+        match self {
+            LaneType::Bool(_) => format!("A boolean type with {} bits.", self.lane_bits()),
+            LaneType::Float(shared_types::Float::F32) => String::from(
+                "A 32-bit floating point type represented in the IEEE 754-2008
+                *binary32* interchange format. This corresponds to the :c:type:`float`
+                type in most C implementations.",
+            ),
+            LaneType::Float(shared_types::Float::F64) => String::from(
+                "A 64-bit floating point type represented in the IEEE 754-2008
+                *binary64* interchange format. This corresponds to the :c:type:`double`
+                type in most C implementations.",
+            ),
+            LaneType::Int(_) if self.lane_bits() < 32 => format!(
+                "An integer type with {} bits.
+                WARNING: arithmetic on {}bit integers is incomplete",
+                self.lane_bits(),
+                self.lane_bits()
+            ),
+            LaneType::Int(_) => format!("An integer type with {} bits.", self.lane_bits()),
+        }
+    }
+
+    /// Return the number of bits in a lane.
+    pub fn lane_bits(self) -> u64 {
+        match self {
+            LaneType::Bool(ref b) => *b as u64,
+            LaneType::Float(ref f) => *f as u64,
+            LaneType::Int(ref i) => *i as u64,
+        }
+    }
+
+    /// Find the unique number associated with this lane type.
+    pub fn number(self) -> u8 {
+        constants::LANE_BASE
+            + match self {
+                LaneType::Bool(shared_types::Bool::B1) => 0,
+                LaneType::Bool(shared_types::Bool::B8) => 1,
+                LaneType::Bool(shared_types::Bool::B16) => 2,
+                LaneType::Bool(shared_types::Bool::B32) => 3,
+                LaneType::Bool(shared_types::Bool::B64) => 4,
+                LaneType::Bool(shared_types::Bool::B128) => 5,
+                LaneType::Int(shared_types::Int::I8) => 6,
+                LaneType::Int(shared_types::Int::I16) => 7,
+                LaneType::Int(shared_types::Int::I32) => 8,
+                LaneType::Int(shared_types::Int::I64) => 9,
+                LaneType::Int(shared_types::Int::I128) => 10,
+                LaneType::Float(shared_types::Float::F32) => 11,
+                LaneType::Float(shared_types::Float::F64) => 12,
+            }
+    }
+
+    pub fn bool_from_bits(num_bits: u16) -> LaneType {
+        LaneType::Bool(match num_bits {
+            1 => shared_types::Bool::B1,
+            8 => shared_types::Bool::B8,
+            16 => shared_types::Bool::B16,
+            32 => shared_types::Bool::B32,
+            64 => shared_types::Bool::B64,
+            128 => shared_types::Bool::B128,
+            _ => unreachable!("unxpected num bits for bool"),
+        })
+    }
+
+    pub fn int_from_bits(num_bits: u16) -> LaneType {
+        LaneType::Int(match num_bits {
+            8 => shared_types::Int::I8,
+            16 => shared_types::Int::I16,
+            32 => shared_types::Int::I32,
+            64 => shared_types::Int::I64,
+            128 => shared_types::Int::I128,
+            _ => unreachable!("unxpected num bits for int"),
+        })
+    }
+
+    pub fn float_from_bits(num_bits: u16) -> LaneType {
+        LaneType::Float(match num_bits {
+            32 => shared_types::Float::F32,
+            64 => shared_types::Float::F64,
+            _ => unreachable!("unxpected num bits for float"),
+        })
+    }
+
+    pub fn by(self, lanes: u16) -> ValueType {
+        if lanes == 1 {
+            self.into()
+        } else {
+            ValueType::Vector(VectorType::new(self, lanes.into()))
+        }
+    }
+
+    pub fn is_float(self) -> bool {
+        match self {
+            LaneType::Float(_) => true,
+            _ => false,
+        }
+    }
+
+    pub fn is_int(self) -> bool {
+        match self {
+            LaneType::Int(_) => true,
+            _ => false,
+        }
+    }
+}
+
+impl fmt::Display for LaneType {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match *self {
+            LaneType::Bool(_) => write!(f, "b{}", self.lane_bits()),
+            LaneType::Float(_) => write!(f, "f{}", self.lane_bits()),
+            LaneType::Int(_) => write!(f, "i{}", self.lane_bits()),
+        }
+    }
+}
+
+impl fmt::Debug for LaneType {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        let inner_msg = format!("bits={}", self.lane_bits());
+        write!(
+            f,
+            "{}",
+            match *self {
+                LaneType::Bool(_) => format!("BoolType({})", inner_msg),
+                LaneType::Float(_) => format!("FloatType({})", inner_msg),
+                LaneType::Int(_) => format!("IntType({})", inner_msg),
+            }
+        )
+    }
+}
+
+/// Create a LaneType from a given bool variant.
+impl From<shared_types::Bool> for LaneType {
+    fn from(b: shared_types::Bool) -> Self {
+        LaneType::Bool(b)
+    }
+}
+
+/// Create a LaneType from a given float variant.
+impl From<shared_types::Float> for LaneType {
+    fn from(f: shared_types::Float) -> Self {
+        LaneType::Float(f)
+    }
+}
+
+/// Create a LaneType from a given int variant.
+impl From<shared_types::Int> for LaneType {
+    fn from(i: shared_types::Int) -> Self {
+        LaneType::Int(i)
+    }
+}
+
+/// An iterator for different lane types.
+pub(crate) struct LaneTypeIterator {
+    bool_iter: shared_types::BoolIterator,
+    int_iter: shared_types::IntIterator,
+    float_iter: shared_types::FloatIterator,
+}
+
+impl LaneTypeIterator {
+    /// Create a new lane type iterator.
+    fn new() -> Self {
+        Self {
+            bool_iter: shared_types::BoolIterator::new(),
+            int_iter: shared_types::IntIterator::new(),
+            float_iter: shared_types::FloatIterator::new(),
+        }
+    }
+}
+
+impl Iterator for LaneTypeIterator {
+    type Item = LaneType;
+    fn next(&mut self) -> Option<Self::Item> {
+        if let Some(b) = self.bool_iter.next() {
+            Some(LaneType::from(b))
+        } else if let Some(i) = self.int_iter.next() {
+            Some(LaneType::from(i))
+        } else if let Some(f) = self.float_iter.next() {
+            Some(LaneType::from(f))
+        } else {
+            None
+        }
+    }
+}
+
+/// A concrete SIMD vector type.
+///
+/// A vector type has a lane type which is an instance of `LaneType`,
+/// and a positive number of lanes.
+#[derive(Clone, PartialEq, Eq, Hash)]
+pub(crate) struct VectorType {
+    base: LaneType,
+    lanes: u64,
+}
+
+impl VectorType {
+    /// Initialize a new integer type with `n` bits.
+    pub fn new(base: LaneType, lanes: u64) -> Self {
+        Self { base, lanes }
+    }
+
+    /// Return a string containing the documentation comment for this vector type.
+    pub fn doc(&self) -> String {
+        format!(
+            "A SIMD vector with {} lanes containing a `{}` each.",
+            self.lane_count(),
+            self.base
+        )
+    }
+
+    /// Return the number of bits in a lane.
+    pub fn lane_bits(&self) -> u64 {
+        self.base.lane_bits()
+    }
+
+    /// Return the number of lanes.
+    pub fn lane_count(&self) -> u64 {
+        self.lanes
+    }
+
+    /// Return the lane type.
+    pub fn lane_type(&self) -> LaneType {
+        self.base
+    }
+
+    /// Find the unique number associated with this vector type.
+    ///
+    /// Vector types are encoded with the lane type in the low 4 bits and
+    /// log2(lanes) in the high 4 bits, giving a range of 2-256 lanes.
+    pub fn number(&self) -> u8 {
+        let lanes_log_2: u32 = 63 - self.lane_count().leading_zeros();
+        let base_num = u32::from(self.base.number());
+        let num = (lanes_log_2 << 4) + base_num;
+        num as u8
+    }
+}
+
+impl fmt::Display for VectorType {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "{}x{}", self.base, self.lane_count())
+    }
+}
+
+impl fmt::Debug for VectorType {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(
+            f,
+            "VectorType(base={}, lanes={})",
+            self.base,
+            self.lane_count()
+        )
+    }
+}
+
+/// A concrete scalar type that is neither a vector nor a lane type.
+///
+/// Special types cannot be used to form vectors.
+#[derive(Clone, Copy, PartialEq, Eq, Hash)]
+pub(crate) enum SpecialType {
+    Flag(shared_types::Flag),
+    // FIXME remove once the old style backends are removed.
+    StructArgument,
+}
+
+impl SpecialType {
+    /// Return a string containing the documentation comment for this special type.
+    pub fn doc(self) -> String {
+        match self {
+            SpecialType::Flag(shared_types::Flag::IFlags) => String::from(
+                "CPU flags representing the result of an integer comparison. These flags
+                can be tested with an :type:`intcc` condition code.",
+            ),
+            SpecialType::Flag(shared_types::Flag::FFlags) => String::from(
+                "CPU flags representing the result of a floating point comparison. These
+                flags can be tested with a :type:`floatcc` condition code.",
+            ),
+            SpecialType::StructArgument => {
+                String::from("After legalization sarg_t arguments will get this type.")
+            }
+        }
+    }
+
+    /// Return the number of bits in a lane.
+    pub fn lane_bits(self) -> u64 {
+        match self {
+            SpecialType::Flag(_) => 0,
+            SpecialType::StructArgument => 0,
+        }
+    }
+
+    /// Find the unique number associated with this special type.
+    pub fn number(self) -> u8 {
+        match self {
+            SpecialType::Flag(shared_types::Flag::IFlags) => 1,
+            SpecialType::Flag(shared_types::Flag::FFlags) => 2,
+            SpecialType::StructArgument => 3,
+        }
+    }
+}
+
+impl fmt::Display for SpecialType {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match *self {
+            SpecialType::Flag(shared_types::Flag::IFlags) => write!(f, "iflags"),
+            SpecialType::Flag(shared_types::Flag::FFlags) => write!(f, "fflags"),
+            SpecialType::StructArgument => write!(f, "sarg_t"),
+        }
+    }
+}
+
+impl fmt::Debug for SpecialType {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(
+            f,
+            "{}",
+            match *self {
+                SpecialType::Flag(_) => format!("FlagsType({})", self),
+                SpecialType::StructArgument => format!("StructArgument"),
+            }
+        )
+    }
+}
+
+impl From<shared_types::Flag> for SpecialType {
+    fn from(f: shared_types::Flag) -> Self {
+        SpecialType::Flag(f)
+    }
+}
+
+pub(crate) struct SpecialTypeIterator {
+    flag_iter: shared_types::FlagIterator,
+    done: bool,
+}
+
+impl SpecialTypeIterator {
+    fn new() -> Self {
+        Self {
+            flag_iter: shared_types::FlagIterator::new(),
+            done: false,
+        }
+    }
+}
+
+impl Iterator for SpecialTypeIterator {
+    type Item = SpecialType;
+    fn next(&mut self) -> Option<Self::Item> {
+        if let Some(f) = self.flag_iter.next() {
+            Some(SpecialType::from(f))
+        } else {
+            if !self.done {
+                self.done = true;
+                Some(SpecialType::StructArgument)
+            } else {
+                None
+            }
+        }
+    }
+}
+
+/// Reference type is scalar type, but not lane type.
+#[derive(Clone, Copy, PartialEq, Eq, Hash)]
+pub(crate) struct ReferenceType(pub shared_types::Reference);
+
+impl ReferenceType {
+    /// Return a string containing the documentation comment for this reference type.
+    pub fn doc(self) -> String {
+        format!("An opaque reference type with {} bits.", self.lane_bits())
+    }
+
+    /// Return the number of bits in a lane.
+    pub fn lane_bits(self) -> u64 {
+        match self.0 {
+            shared_types::Reference::R32 => 32,
+            shared_types::Reference::R64 => 64,
+        }
+    }
+
+    /// Find the unique number associated with this reference type.
+    pub fn number(self) -> u8 {
+        constants::REFERENCE_BASE
+            + match self {
+                ReferenceType(shared_types::Reference::R32) => 0,
+                ReferenceType(shared_types::Reference::R64) => 1,
+            }
+    }
+
+    pub fn ref_from_bits(num_bits: u16) -> ReferenceType {
+        ReferenceType(match num_bits {
+            32 => shared_types::Reference::R32,
+            64 => shared_types::Reference::R64,
+            _ => unreachable!("unexpected number of bits for a reference type"),
+        })
+    }
+}
+
+impl fmt::Display for ReferenceType {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "r{}", self.lane_bits())
+    }
+}
+
+impl fmt::Debug for ReferenceType {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "ReferenceType(bits={})", self.lane_bits())
+    }
+}
+
+/// Create a ReferenceType from a given reference variant.
+impl From<shared_types::Reference> for ReferenceType {
+    fn from(r: shared_types::Reference) -> Self {
+        ReferenceType(r)
+    }
+}
+
+/// An iterator for different reference types.
+pub(crate) struct ReferenceTypeIterator {
+    reference_iter: shared_types::ReferenceIterator,
+}
+
+impl ReferenceTypeIterator {
+    /// Create a new reference type iterator.
+    fn new() -> Self {
+        Self {
+            reference_iter: shared_types::ReferenceIterator::new(),
+        }
+    }
+}
+
+impl Iterator for ReferenceTypeIterator {
+    type Item = ReferenceType;
+    fn next(&mut self) -> Option<Self::Item> {
+        if let Some(r) = self.reference_iter.next() {
+            Some(ReferenceType::from(r))
+        } else {
+            None
+        }
+    }
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/cdsl/typevar.rs b/third_party/rust/cranelift-codegen-meta/src/cdsl/typevar.rs
new file mode 100644
index 0000000000..c1027bf847
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/cdsl/typevar.rs
@@ -0,0 +1,1274 @@
+use std::cell::RefCell;
+use std::collections::{BTreeSet, HashSet};
+use std::fmt;
+use std::hash;
+use std::iter::FromIterator;
+use std::ops;
+use std::rc::Rc;
+
+use crate::cdsl::types::{LaneType, ReferenceType, SpecialType, ValueType};
+
+const MAX_LANES: u16 = 256;
+const MAX_BITS: u16 = 128;
+const MAX_FLOAT_BITS: u16 = 64;
+
+/// Type variables can be used in place of concrete types when defining
+/// instructions. This makes the instructions *polymorphic*.
+///
+/// A type variable is restricted to vary over a subset of the value types.
+/// This subset is specified by a set of flags that control the permitted base
+/// types and whether the type variable can assume scalar or vector types, or
+/// both.
+#[derive(Debug)]
+pub(crate) struct TypeVarContent {
+    /// Short name of type variable used in instruction descriptions.
+    pub name: String,
+
+    /// Documentation string.
+    pub doc: String,
+
+    /// Type set associated to the type variable.
+    /// This field must remain private; use `get_typeset()` or `get_raw_typeset()` to get the
+    /// information you want.
+    type_set: TypeSet,
+
+    pub base: Option<TypeVarParent>,
+}
+
+#[derive(Clone, Debug)]
+pub(crate) struct TypeVar {
+    content: Rc<RefCell<TypeVarContent>>,
+}
+
+impl TypeVar {
+    pub fn new(name: impl Into<String>, doc: impl Into<String>, type_set: TypeSet) -> Self {
+        Self {
+            content: Rc::new(RefCell::new(TypeVarContent {
+                name: name.into(),
+                doc: doc.into(),
+                type_set,
+                base: None,
+            })),
+        }
+    }
+
+    pub fn new_singleton(value_type: ValueType) -> Self {
+        let (name, doc) = (value_type.to_string(), value_type.doc());
+        let mut builder = TypeSetBuilder::new();
+
+        let (scalar_type, num_lanes) = match value_type {
+            ValueType::Special(special_type) => {
+                return TypeVar::new(name, doc, builder.specials(vec![special_type]).build());
+            }
+            ValueType::Reference(ReferenceType(reference_type)) => {
+                let bits = reference_type as RangeBound;
+                return TypeVar::new(name, doc, builder.refs(bits..bits).build());
+            }
+            ValueType::Lane(lane_type) => (lane_type, 1),
+            ValueType::Vector(vec_type) => {
+                (vec_type.lane_type(), vec_type.lane_count() as RangeBound)
+            }
+        };
+
+        builder = builder.simd_lanes(num_lanes..num_lanes);
+
+        let builder = match scalar_type {
+            LaneType::Int(int_type) => {
+                let bits = int_type as RangeBound;
+                builder.ints(bits..bits)
+            }
+            LaneType::Float(float_type) => {
+                let bits = float_type as RangeBound;
+                builder.floats(bits..bits)
+            }
+            LaneType::Bool(bool_type) => {
+                let bits = bool_type as RangeBound;
+                builder.bools(bits..bits)
+            }
+        };
+        TypeVar::new(name, doc, builder.build())
+    }
+
+    /// Get a fresh copy of self, named after `name`. Can only be called on non-derived typevars.
+    pub fn copy_from(other: &TypeVar, name: String) -> TypeVar {
+        assert!(
+            other.base.is_none(),
+            "copy_from() can only be called on non-derived type variables"
+        );
+        TypeVar {
+            content: Rc::new(RefCell::new(TypeVarContent {
+                name,
+                doc: "".into(),
+                type_set: other.type_set.clone(),
+                base: None,
+            })),
+        }
+    }
+
+    /// Returns the typeset for this TV. If the TV is derived, computes it recursively from the
+    /// derived function and the base's typeset.
+    /// Note this can't be done non-lazily in the constructor, because the TypeSet of the base may
+    /// change over time.
+    pub fn get_typeset(&self) -> TypeSet {
+        match &self.base {
+            Some(base) => base.type_var.get_typeset().image(base.derived_func),
+            None => self.type_set.clone(),
+        }
+    }
+
+    /// Returns this typevar's type set, assuming this type var has no parent.
+    pub fn get_raw_typeset(&self) -> &TypeSet {
+        assert_eq!(self.type_set, self.get_typeset());
+        &self.type_set
+    }
+
+    /// If the associated typeset has a single type return it. Otherwise return None.
+    pub fn singleton_type(&self) -> Option<ValueType> {
+        let type_set = self.get_typeset();
+        if type_set.size() == 1 {
+            Some(type_set.get_singleton())
+        } else {
+            None
+        }
+    }
+
+    /// Get the free type variable controlling this one.
+    pub fn free_typevar(&self) -> Option<TypeVar> {
+        match &self.base {
+            Some(base) => base.type_var.free_typevar(),
+            None => {
+                match self.singleton_type() {
+                    // A singleton type isn't a proper free variable.
+                    Some(_) => None,
+                    None => Some(self.clone()),
+                }
+            }
+        }
+    }
+
+    /// Create a type variable that is a function of another.
+    pub fn derived(&self, derived_func: DerivedFunc) -> TypeVar {
+        let ts = self.get_typeset();
+
+        // Safety checks to avoid over/underflows.
+        assert!(ts.specials.is_empty(), "can't derive from special types");
+        match derived_func {
+            DerivedFunc::HalfWidth => {
+                assert!(
+                    ts.ints.is_empty() || *ts.ints.iter().min().unwrap() > 8,
+                    "can't halve all integer types"
+                );
+                assert!(
+                    ts.floats.is_empty() || *ts.floats.iter().min().unwrap() > 32,
+                    "can't halve all float types"
+                );
+                assert!(
+                    ts.bools.is_empty() || *ts.bools.iter().min().unwrap() > 8,
+                    "can't halve all boolean types"
+                );
+            }
+            DerivedFunc::DoubleWidth => {
+                assert!(
+                    ts.ints.is_empty() || *ts.ints.iter().max().unwrap() < MAX_BITS,
+                    "can't double all integer types"
+                );
+                assert!(
+                    ts.floats.is_empty() || *ts.floats.iter().max().unwrap() < MAX_FLOAT_BITS,
+                    "can't double all float types"
+                );
+                assert!(
+                    ts.bools.is_empty() || *ts.bools.iter().max().unwrap() < MAX_BITS,
+                    "can't double all boolean types"
+                );
+            }
+            DerivedFunc::HalfVector => {
+                assert!(
+                    *ts.lanes.iter().min().unwrap() > 1,
+                    "can't halve a scalar type"
+                );
+            }
+            DerivedFunc::DoubleVector => {
+                assert!(
+                    *ts.lanes.iter().max().unwrap() < MAX_LANES,
+                    "can't double 256 lanes"
+                );
+            }
+            DerivedFunc::SplitLanes => {
+                assert!(
+                    ts.ints.is_empty() || *ts.ints.iter().min().unwrap() > 8,
+                    "can't halve all integer types"
+                );
+                assert!(
+                    ts.floats.is_empty() || *ts.floats.iter().min().unwrap() > 32,
+                    "can't halve all float types"
+                );
+                assert!(
+                    ts.bools.is_empty() || *ts.bools.iter().min().unwrap() > 8,
+                    "can't halve all boolean types"
+                );
+                assert!(
+                    *ts.lanes.iter().max().unwrap() < MAX_LANES,
+                    "can't double 256 lanes"
+                );
+            }
+            DerivedFunc::MergeLanes => {
+                assert!(
+                    ts.ints.is_empty() || *ts.ints.iter().max().unwrap() < MAX_BITS,
+                    "can't double all integer types"
+                );
+                assert!(
+                    ts.floats.is_empty() || *ts.floats.iter().max().unwrap() < MAX_FLOAT_BITS,
+                    "can't double all float types"
+                );
+                assert!(
+                    ts.bools.is_empty() || *ts.bools.iter().max().unwrap() < MAX_BITS,
+                    "can't double all boolean types"
+                );
+                assert!(
+                    *ts.lanes.iter().min().unwrap() > 1,
+                    "can't halve a scalar type"
+                );
+            }
+            DerivedFunc::LaneOf | DerivedFunc::AsBool => { /* no particular assertions */ }
+        }
+
+        TypeVar {
+            content: Rc::new(RefCell::new(TypeVarContent {
+                name: format!("{}({})", derived_func.name(), self.name),
+                doc: "".into(),
+                type_set: ts,
+                base: Some(TypeVarParent {
+                    type_var: self.clone(),
+                    derived_func,
+                }),
+            })),
+        }
+    }
+
+    pub fn lane_of(&self) -> TypeVar {
+        self.derived(DerivedFunc::LaneOf)
+    }
+    pub fn as_bool(&self) -> TypeVar {
+        self.derived(DerivedFunc::AsBool)
+    }
+    pub fn half_width(&self) -> TypeVar {
+        self.derived(DerivedFunc::HalfWidth)
+    }
+    pub fn double_width(&self) -> TypeVar {
+        self.derived(DerivedFunc::DoubleWidth)
+    }
+    pub fn half_vector(&self) -> TypeVar {
+        self.derived(DerivedFunc::HalfVector)
+    }
+    pub fn double_vector(&self) -> TypeVar {
+        self.derived(DerivedFunc::DoubleVector)
+    }
+    pub fn split_lanes(&self) -> TypeVar {
+        self.derived(DerivedFunc::SplitLanes)
+    }
+    pub fn merge_lanes(&self) -> TypeVar {
+        self.derived(DerivedFunc::MergeLanes)
+    }
+
+    /// Constrain the range of types this variable can assume to a subset of those in the typeset
+    /// ts.
+    /// May mutate itself if it's not derived, or its parent if it is.
+    pub fn constrain_types_by_ts(&self, type_set: TypeSet) {
+        match &self.base {
+            Some(base) => {
+                base.type_var
+                    .constrain_types_by_ts(type_set.preimage(base.derived_func));
+            }
+            None => {
+                self.content
+                    .borrow_mut()
+                    .type_set
+                    .inplace_intersect_with(&type_set);
+            }
+        }
+    }
+
+    /// Constrain the range of types this variable can assume to a subset of those `other` can
+    /// assume.
+    /// May mutate itself if it's not derived, or its parent if it is.
+    pub fn constrain_types(&self, other: TypeVar) {
+        if self == &other {
+            return;
+        }
+        self.constrain_types_by_ts(other.get_typeset());
+    }
+
+    /// Get a Rust expression that computes the type of this type variable.
+    pub fn to_rust_code(&self) -> String {
+        match &self.base {
+            Some(base) => format!(
+                "{}.{}().unwrap()",
+                base.type_var.to_rust_code(),
+                base.derived_func.name()
+            ),
+            None => {
+                if let Some(singleton) = self.singleton_type() {
+                    singleton.rust_name()
+                } else {
+                    self.name.clone()
+                }
+            }
+        }
+    }
+}
+
+impl Into<TypeVar> for &TypeVar {
+    fn into(self) -> TypeVar {
+        self.clone()
+    }
+}
+impl Into<TypeVar> for ValueType {
+    fn into(self) -> TypeVar {
+        TypeVar::new_singleton(self)
+    }
+}
+
+// Hash TypeVars by pointers.
+// There might be a better way to do this, but since TypeVar's content (namely TypeSet) can be
+// mutated, it makes sense to use pointer equality/hashing here.
+impl hash::Hash for TypeVar {
+    fn hash<H: hash::Hasher>(&self, h: &mut H) {
+        match &self.base {
+            Some(base) => {
+                base.type_var.hash(h);
+                base.derived_func.hash(h);
+            }
+            None => {
+                (&**self as *const TypeVarContent).hash(h);
+            }
+        }
+    }
+}
+
+impl PartialEq for TypeVar {
+    fn eq(&self, other: &TypeVar) -> bool {
+        match (&self.base, &other.base) {
+            (Some(base1), Some(base2)) => {
+                base1.type_var.eq(&base2.type_var) && base1.derived_func == base2.derived_func
+            }
+            (None, None) => Rc::ptr_eq(&self.content, &other.content),
+            _ => false,
+        }
+    }
+}
+
+// Allow TypeVar as map keys, based on pointer equality (see also above PartialEq impl).
+impl Eq for TypeVar {}
+
+impl ops::Deref for TypeVar {
+    type Target = TypeVarContent;
+    fn deref(&self) -> &Self::Target {
+        unsafe { self.content.as_ptr().as_ref().unwrap() }
+    }
+}
+
+#[derive(Clone, Copy, Debug, Hash, PartialEq)]
+pub(crate) enum DerivedFunc {
+    LaneOf,
+    AsBool,
+    HalfWidth,
+    DoubleWidth,
+    HalfVector,
+    DoubleVector,
+    SplitLanes,
+    MergeLanes,
+}
+
+impl DerivedFunc {
+    pub fn name(self) -> &'static str {
+        match self {
+            DerivedFunc::LaneOf => "lane_of",
+            DerivedFunc::AsBool => "as_bool",
+            DerivedFunc::HalfWidth => "half_width",
+            DerivedFunc::DoubleWidth => "double_width",
+            DerivedFunc::HalfVector => "half_vector",
+            DerivedFunc::DoubleVector => "double_vector",
+            DerivedFunc::SplitLanes => "split_lanes",
+            DerivedFunc::MergeLanes => "merge_lanes",
+        }
+    }
+
+    /// Returns the inverse function of this one, if it is a bijection.
+    pub fn inverse(self) -> Option<DerivedFunc> {
+        match self {
+            DerivedFunc::HalfWidth => Some(DerivedFunc::DoubleWidth),
+            DerivedFunc::DoubleWidth => Some(DerivedFunc::HalfWidth),
+            DerivedFunc::HalfVector => Some(DerivedFunc::DoubleVector),
+            DerivedFunc::DoubleVector => Some(DerivedFunc::HalfVector),
+            DerivedFunc::MergeLanes => Some(DerivedFunc::SplitLanes),
+            DerivedFunc::SplitLanes => Some(DerivedFunc::MergeLanes),
+            _ => None,
+        }
+    }
+}
+
+#[derive(Debug, Hash)]
+pub(crate) struct TypeVarParent {
+    pub type_var: TypeVar,
+    pub derived_func: DerivedFunc,
+}
+
+/// A set of types.
+///
+/// We don't allow arbitrary subsets of types, but use a parametrized approach
+/// instead.
+///
+/// Objects of this class can be used as dictionary keys.
+///
+/// Parametrized type sets are specified in terms of ranges:
+/// - The permitted range of vector lanes, where 1 indicates a scalar type.
+/// - The permitted range of integer types.
+/// - The permitted range of floating point types, and
+/// - The permitted range of boolean types.
+///
+/// The ranges are inclusive from smallest bit-width to largest bit-width.
+///
+/// Finally, a type set can contain special types (derived from `SpecialType`)
+/// which can't appear as lane types.
+
+type RangeBound = u16;
+type Range = ops::Range<RangeBound>;
+type NumSet = BTreeSet<RangeBound>;
+
+macro_rules! num_set {
+    ($($expr:expr),*) => {
+        NumSet::from_iter(vec![$($expr),*])
+    };
+}
+
+#[derive(Clone, PartialEq, Eq, Hash)]
+pub(crate) struct TypeSet {
+    pub lanes: NumSet,
+    pub ints: NumSet,
+    pub floats: NumSet,
+    pub bools: NumSet,
+    pub refs: NumSet,
+    pub specials: Vec<SpecialType>,
+}
+
+impl TypeSet {
+    fn new(
+        lanes: NumSet,
+        ints: NumSet,
+        floats: NumSet,
+        bools: NumSet,
+        refs: NumSet,
+        specials: Vec<SpecialType>,
+    ) -> Self {
+        Self {
+            lanes,
+            ints,
+            floats,
+            bools,
+            refs,
+            specials,
+        }
+    }
+
+    /// Return the number of concrete types represented by this typeset.
+    pub fn size(&self) -> usize {
+        self.lanes.len()
+            * (self.ints.len() + self.floats.len() + self.bools.len() + self.refs.len())
+            + self.specials.len()
+    }
+
+    /// Return the image of self across the derived function func.
+    fn image(&self, derived_func: DerivedFunc) -> TypeSet {
+        match derived_func {
+            DerivedFunc::LaneOf => self.lane_of(),
+            DerivedFunc::AsBool => self.as_bool(),
+            DerivedFunc::HalfWidth => self.half_width(),
+            DerivedFunc::DoubleWidth => self.double_width(),
+            DerivedFunc::HalfVector => self.half_vector(),
+            DerivedFunc::DoubleVector => self.double_vector(),
+            DerivedFunc::SplitLanes => self.half_width().double_vector(),
+            DerivedFunc::MergeLanes => self.double_width().half_vector(),
+        }
+    }
+
+    /// Return a TypeSet describing the image of self across lane_of.
+    fn lane_of(&self) -> TypeSet {
+        let mut copy = self.clone();
+        copy.lanes = num_set![1];
+        copy
+    }
+
+    /// Return a TypeSet describing the image of self across as_bool.
+    fn as_bool(&self) -> TypeSet {
+        let mut copy = self.clone();
+        copy.ints = NumSet::new();
+        copy.floats = NumSet::new();
+        copy.refs = NumSet::new();
+        if !(&self.lanes - &num_set![1]).is_empty() {
+            copy.bools = &self.ints | &self.floats;
+            copy.bools = &copy.bools | &self.bools;
+        }
+        if self.lanes.contains(&1) {
+            copy.bools.insert(1);
+        }
+        copy
+    }
+
+    /// Return a TypeSet describing the image of self across halfwidth.
+    fn half_width(&self) -> TypeSet {
+        let mut copy = self.clone();
+        copy.ints = NumSet::from_iter(self.ints.iter().filter(|&&x| x > 8).map(|&x| x / 2));
+        copy.floats = NumSet::from_iter(self.floats.iter().filter(|&&x| x > 32).map(|&x| x / 2));
+        copy.bools = NumSet::from_iter(self.bools.iter().filter(|&&x| x > 8).map(|&x| x / 2));
+        copy.specials = Vec::new();
+        copy
+    }
+
+    /// Return a TypeSet describing the image of self across doublewidth.
+    fn double_width(&self) -> TypeSet {
+        let mut copy = self.clone();
+        copy.ints = NumSet::from_iter(self.ints.iter().filter(|&&x| x < MAX_BITS).map(|&x| x * 2));
+        copy.floats = NumSet::from_iter(
+            self.floats
+                .iter()
+                .filter(|&&x| x < MAX_FLOAT_BITS)
+                .map(|&x| x * 2),
+        );
+        copy.bools = NumSet::from_iter(
+            self.bools
+                .iter()
+                .filter(|&&x| x < MAX_BITS)
+                .map(|&x| x * 2)
+                .filter(|x| legal_bool(*x)),
+        );
+        copy.specials = Vec::new();
+        copy
+    }
+
+    /// Return a TypeSet describing the image of self across halfvector.
+    fn half_vector(&self) -> TypeSet {
+        let mut copy = self.clone();
+        copy.lanes = NumSet::from_iter(self.lanes.iter().filter(|&&x| x > 1).map(|&x| x / 2));
+        copy.specials = Vec::new();
+        copy
+    }
+
+    /// Return a TypeSet describing the image of self across doublevector.
+    fn double_vector(&self) -> TypeSet {
+        let mut copy = self.clone();
+        copy.lanes = NumSet::from_iter(
+            self.lanes
+                .iter()
+                .filter(|&&x| x < MAX_LANES)
+                .map(|&x| x * 2),
+        );
+        copy.specials = Vec::new();
+        copy
+    }
+
+    fn concrete_types(&self) -> Vec<ValueType> {
+        let mut ret = Vec::new();
+        for &num_lanes in &self.lanes {
+            for &bits in &self.ints {
+                ret.push(LaneType::int_from_bits(bits).by(num_lanes));
+            }
+            for &bits in &self.floats {
+                ret.push(LaneType::float_from_bits(bits).by(num_lanes));
+            }
+            for &bits in &self.bools {
+                ret.push(LaneType::bool_from_bits(bits).by(num_lanes));
+            }
+            for &bits in &self.refs {
+                ret.push(ReferenceType::ref_from_bits(bits).into());
+            }
+        }
+        for &special in &self.specials {
+            ret.push(special.into());
+        }
+        ret
+    }
+
+    /// Return the singleton type represented by self. Can only call on typesets containing 1 type.
+    fn get_singleton(&self) -> ValueType {
+        let mut types = self.concrete_types();
+        assert_eq!(types.len(), 1);
+        types.remove(0)
+    }
+
+    /// Return the inverse image of self across the derived function func.
+    fn preimage(&self, func: DerivedFunc) -> TypeSet {
+        if self.size() == 0 {
+            // The inverse of the empty set is itself.
+            return self.clone();
+        }
+
+        match func {
+            DerivedFunc::LaneOf => {
+                let mut copy = self.clone();
+                copy.lanes =
+                    NumSet::from_iter((0..=MAX_LANES.trailing_zeros()).map(|i| u16::pow(2, i)));
+                copy
+            }
+            DerivedFunc::AsBool => {
+                let mut copy = self.clone();
+                if self.bools.contains(&1) {
+                    copy.ints = NumSet::from_iter(vec![8, 16, 32, 64, 128]);
+                    copy.floats = NumSet::from_iter(vec![32, 64]);
+                } else {
+                    copy.ints = &self.bools - &NumSet::from_iter(vec![1]);
+                    copy.floats = &self.bools & &NumSet::from_iter(vec![32, 64]);
+                    // If b1 is not in our typeset, than lanes=1 cannot be in the pre-image, as
+                    // as_bool() of scalars is always b1.
+                    copy.lanes = &self.lanes - &NumSet::from_iter(vec![1]);
+                }
+                copy
+            }
+            DerivedFunc::HalfWidth => self.double_width(),
+            DerivedFunc::DoubleWidth => self.half_width(),
+            DerivedFunc::HalfVector => self.double_vector(),
+            DerivedFunc::DoubleVector => self.half_vector(),
+            DerivedFunc::SplitLanes => self.double_width().half_vector(),
+            DerivedFunc::MergeLanes => self.half_width().double_vector(),
+        }
+    }
+
+    pub fn inplace_intersect_with(&mut self, other: &TypeSet) {
+        self.lanes = &self.lanes & &other.lanes;
+        self.ints = &self.ints & &other.ints;
+        self.floats = &self.floats & &other.floats;
+        self.bools = &self.bools & &other.bools;
+        self.refs = &self.refs & &other.refs;
+
+        let mut new_specials = Vec::new();
+        for spec in &self.specials {
+            if let Some(spec) = other.specials.iter().find(|&other_spec| other_spec == spec) {
+                new_specials.push(*spec);
+            }
+        }
+        self.specials = new_specials;
+    }
+
+    pub fn is_subset(&self, other: &TypeSet) -> bool {
+        self.lanes.is_subset(&other.lanes)
+            && self.ints.is_subset(&other.ints)
+            && self.floats.is_subset(&other.floats)
+            && self.bools.is_subset(&other.bools)
+            && self.refs.is_subset(&other.refs)
+            && {
+                let specials: HashSet<SpecialType> = HashSet::from_iter(self.specials.clone());
+                let other_specials = HashSet::from_iter(other.specials.clone());
+                specials.is_subset(&other_specials)
+            }
+    }
+
+    pub fn is_wider_or_equal(&self, other: &TypeSet) -> bool {
+        set_wider_or_equal(&self.ints, &other.ints)
+            && set_wider_or_equal(&self.floats, &other.floats)
+            && set_wider_or_equal(&self.bools, &other.bools)
+            && set_wider_or_equal(&self.refs, &other.refs)
+    }
+
+    pub fn is_narrower(&self, other: &TypeSet) -> bool {
+        set_narrower(&self.ints, &other.ints)
+            && set_narrower(&self.floats, &other.floats)
+            && set_narrower(&self.bools, &other.bools)
+            && set_narrower(&self.refs, &other.refs)
+    }
+}
+
+fn set_wider_or_equal(s1: &NumSet, s2: &NumSet) -> bool {
+    !s1.is_empty() && !s2.is_empty() && s1.iter().min() >= s2.iter().max()
+}
+
+fn set_narrower(s1: &NumSet, s2: &NumSet) -> bool {
+    !s1.is_empty() && !s2.is_empty() && s1.iter().min() < s2.iter().max()
+}
+
+impl fmt::Debug for TypeSet {
+    fn fmt(&self, fmt: &mut fmt::Formatter) -> Result<(), fmt::Error> {
+        write!(fmt, "TypeSet(")?;
+
+        let mut subsets = Vec::new();
+        if !self.lanes.is_empty() {
+            subsets.push(format!(
+                "lanes={{{}}}",
+                Vec::from_iter(self.lanes.iter().map(|x| x.to_string())).join(", ")
+            ));
+        }
+        if !self.ints.is_empty() {
+            subsets.push(format!(
+                "ints={{{}}}",
+                Vec::from_iter(self.ints.iter().map(|x| x.to_string())).join(", ")
+            ));
+        }
+        if !self.floats.is_empty() {
+            subsets.push(format!(
+                "floats={{{}}}",
+                Vec::from_iter(self.floats.iter().map(|x| x.to_string())).join(", ")
+            ));
+        }
+        if !self.bools.is_empty() {
+            subsets.push(format!(
+                "bools={{{}}}",
+                Vec::from_iter(self.bools.iter().map(|x| x.to_string())).join(", ")
+            ));
+        }
+        if !self.refs.is_empty() {
+            subsets.push(format!(
+                "refs={{{}}}",
+                Vec::from_iter(self.refs.iter().map(|x| x.to_string())).join(", ")
+            ));
+        }
+        if !self.specials.is_empty() {
+            subsets.push(format!(
+                "specials={{{}}}",
+                Vec::from_iter(self.specials.iter().map(|x| x.to_string())).join(", ")
+            ));
+        }
+
+        write!(fmt, "{})", subsets.join(", "))?;
+        Ok(())
+    }
+}
+
+pub(crate) struct TypeSetBuilder {
+    ints: Interval,
+    floats: Interval,
+    bools: Interval,
+    refs: Interval,
+    includes_scalars: bool,
+    simd_lanes: Interval,
+    specials: Vec<SpecialType>,
+}
+
+impl TypeSetBuilder {
+    pub fn new() -> Self {
+        Self {
+            ints: Interval::None,
+            floats: Interval::None,
+            bools: Interval::None,
+            refs: Interval::None,
+            includes_scalars: true,
+            simd_lanes: Interval::None,
+            specials: Vec::new(),
+        }
+    }
+
+    pub fn ints(mut self, interval: impl Into<Interval>) -> Self {
+        assert!(self.ints == Interval::None);
+        self.ints = interval.into();
+        self
+    }
+    pub fn floats(mut self, interval: impl Into<Interval>) -> Self {
+        assert!(self.floats == Interval::None);
+        self.floats = interval.into();
+        self
+    }
+    pub fn bools(mut self, interval: impl Into<Interval>) -> Self {
+        assert!(self.bools == Interval::None);
+        self.bools = interval.into();
+        self
+    }
+    pub fn refs(mut self, interval: impl Into<Interval>) -> Self {
+        assert!(self.refs == Interval::None);
+        self.refs = interval.into();
+        self
+    }
+    pub fn includes_scalars(mut self, includes_scalars: bool) -> Self {
+        self.includes_scalars = includes_scalars;
+        self
+    }
+    pub fn simd_lanes(mut self, interval: impl Into<Interval>) -> Self {
+        assert!(self.simd_lanes == Interval::None);
+        self.simd_lanes = interval.into();
+        self
+    }
+    pub fn specials(mut self, specials: Vec<SpecialType>) -> Self {
+        assert!(self.specials.is_empty());
+        self.specials = specials;
+        self
+    }
+
+    pub fn build(self) -> TypeSet {
+        let min_lanes = if self.includes_scalars { 1 } else { 2 };
+
+        let bools = range_to_set(self.bools.to_range(1..MAX_BITS, None))
+            .into_iter()
+            .filter(|x| legal_bool(*x))
+            .collect();
+
+        TypeSet::new(
+            range_to_set(self.simd_lanes.to_range(min_lanes..MAX_LANES, Some(1))),
+            range_to_set(self.ints.to_range(8..MAX_BITS, None)),
+            range_to_set(self.floats.to_range(32..64, None)),
+            bools,
+            range_to_set(self.refs.to_range(32..64, None)),
+            self.specials,
+        )
+    }
+
+    pub fn all() -> TypeSet {
+        TypeSetBuilder::new()
+            .ints(Interval::All)
+            .floats(Interval::All)
+            .bools(Interval::All)
+            .refs(Interval::All)
+            .simd_lanes(Interval::All)
+            .specials(ValueType::all_special_types().collect())
+            .includes_scalars(true)
+            .build()
+    }
+}
+
+#[derive(PartialEq)]
+pub(crate) enum Interval {
+    None,
+    All,
+    Range(Range),
+}
+
+impl Interval {
+    fn to_range(&self, full_range: Range, default: Option<RangeBound>) -> Option<Range> {
+        match self {
+            Interval::None => {
+                if let Some(default_val) = default {
+                    Some(default_val..default_val)
+                } else {
+                    None
+                }
+            }
+
+            Interval::All => Some(full_range),
+
+            Interval::Range(range) => {
+                let (low, high) = (range.start, range.end);
+                assert!(low.is_power_of_two());
+                assert!(high.is_power_of_two());
+                assert!(low <= high);
+                assert!(low >= full_range.start);
+                assert!(high <= full_range.end);
+                Some(low..high)
+            }
+        }
+    }
+}
+
+impl Into<Interval> for Range {
+    fn into(self) -> Interval {
+        Interval::Range(self)
+    }
+}
+
+fn legal_bool(bits: RangeBound) -> bool {
+    // Only allow legal bit widths for bool types.
+    bits == 1 || (bits >= 8 && bits <= MAX_BITS && bits.is_power_of_two())
+}
+
+/// Generates a set with all the powers of two included in the range.
+fn range_to_set(range: Option<Range>) -> NumSet {
+    let mut set = NumSet::new();
+
+    let (low, high) = match range {
+        Some(range) => (range.start, range.end),
+        None => return set,
+    };
+
+    assert!(low.is_power_of_two());
+    assert!(high.is_power_of_two());
+    assert!(low <= high);
+
+    for i in low.trailing_zeros()..=high.trailing_zeros() {
+        assert!(1 << i <= RangeBound::max_value());
+        set.insert(1 << i);
+    }
+    set
+}
+
+#[test]
+fn test_typevar_builder() {
+    let type_set = TypeSetBuilder::new().ints(Interval::All).build();
+    assert_eq!(type_set.lanes, num_set![1]);
+    assert!(type_set.floats.is_empty());
+    assert_eq!(type_set.ints, num_set![8, 16, 32, 64, 128]);
+    assert!(type_set.bools.is_empty());
+    assert!(type_set.specials.is_empty());
+
+    let type_set = TypeSetBuilder::new().bools(Interval::All).build();
+    assert_eq!(type_set.lanes, num_set![1]);
+    assert!(type_set.floats.is_empty());
+    assert!(type_set.ints.is_empty());
+    assert_eq!(type_set.bools, num_set![1, 8, 16, 32, 64, 128]);
+    assert!(type_set.specials.is_empty());
+
+    let type_set = TypeSetBuilder::new().floats(Interval::All).build();
+    assert_eq!(type_set.lanes, num_set![1]);
+    assert_eq!(type_set.floats, num_set![32, 64]);
+    assert!(type_set.ints.is_empty());
+    assert!(type_set.bools.is_empty());
+    assert!(type_set.specials.is_empty());
+
+    let type_set = TypeSetBuilder::new()
+        .floats(Interval::All)
+        .simd_lanes(Interval::All)
+        .includes_scalars(false)
+        .build();
+    assert_eq!(type_set.lanes, num_set![2, 4, 8, 16, 32, 64, 128, 256]);
+    assert_eq!(type_set.floats, num_set![32, 64]);
+    assert!(type_set.ints.is_empty());
+    assert!(type_set.bools.is_empty());
+    assert!(type_set.specials.is_empty());
+
+    let type_set = TypeSetBuilder::new()
+        .floats(Interval::All)
+        .simd_lanes(Interval::All)
+        .includes_scalars(true)
+        .build();
+    assert_eq!(type_set.lanes, num_set![1, 2, 4, 8, 16, 32, 64, 128, 256]);
+    assert_eq!(type_set.floats, num_set![32, 64]);
+    assert!(type_set.ints.is_empty());
+    assert!(type_set.bools.is_empty());
+    assert!(type_set.specials.is_empty());
+
+    let type_set = TypeSetBuilder::new().ints(16..64).build();
+    assert_eq!(type_set.lanes, num_set![1]);
+    assert_eq!(type_set.ints, num_set![16, 32, 64]);
+    assert!(type_set.floats.is_empty());
+    assert!(type_set.bools.is_empty());
+    assert!(type_set.specials.is_empty());
+}
+
+#[test]
+#[should_panic]
+fn test_typevar_builder_too_high_bound_panic() {
+    TypeSetBuilder::new().ints(16..2 * MAX_BITS).build();
+}
+
+#[test]
+#[should_panic]
+fn test_typevar_builder_inverted_bounds_panic() {
+    TypeSetBuilder::new().ints(32..16).build();
+}
+
+#[test]
+fn test_as_bool() {
+    let a = TypeSetBuilder::new()
+        .simd_lanes(2..8)
+        .ints(8..8)
+        .floats(32..32)
+        .build();
+    assert_eq!(
+        a.lane_of(),
+        TypeSetBuilder::new().ints(8..8).floats(32..32).build()
+    );
+
+    // Test as_bool with disjoint intervals.
+    let mut a_as_bool = TypeSetBuilder::new().simd_lanes(2..8).build();
+    a_as_bool.bools = num_set![8, 32];
+    assert_eq!(a.as_bool(), a_as_bool);
+
+    let b = TypeSetBuilder::new()
+        .simd_lanes(1..8)
+        .ints(8..8)
+        .floats(32..32)
+        .build();
+    let mut b_as_bool = TypeSetBuilder::new().simd_lanes(1..8).build();
+    b_as_bool.bools = num_set![1, 8, 32];
+    assert_eq!(b.as_bool(), b_as_bool);
+}
+
+#[test]
+fn test_forward_images() {
+    let empty_set = TypeSetBuilder::new().build();
+
+    // Half vector.
+    assert_eq!(
+        TypeSetBuilder::new()
+            .simd_lanes(1..32)
+            .build()
+            .half_vector(),
+        TypeSetBuilder::new().simd_lanes(1..16).build()
+    );
+
+    // Double vector.
+    assert_eq!(
+        TypeSetBuilder::new()
+            .simd_lanes(1..32)
+            .build()
+            .double_vector(),
+        TypeSetBuilder::new().simd_lanes(2..64).build()
+    );
+    assert_eq!(
+        TypeSetBuilder::new()
+            .simd_lanes(128..256)
+            .build()
+            .double_vector(),
+        TypeSetBuilder::new().simd_lanes(256..256).build()
+    );
+
+    // Half width.
+    assert_eq!(
+        TypeSetBuilder::new().ints(8..32).build().half_width(),
+        TypeSetBuilder::new().ints(8..16).build()
+    );
+    assert_eq!(
+        TypeSetBuilder::new().floats(32..32).build().half_width(),
+        empty_set
+    );
+    assert_eq!(
+        TypeSetBuilder::new().floats(32..64).build().half_width(),
+        TypeSetBuilder::new().floats(32..32).build()
+    );
+    assert_eq!(
+        TypeSetBuilder::new().bools(1..8).build().half_width(),
+        empty_set
+    );
+    assert_eq!(
+        TypeSetBuilder::new().bools(1..32).build().half_width(),
+        TypeSetBuilder::new().bools(8..16).build()
+    );
+
+    // Double width.
+    assert_eq!(
+        TypeSetBuilder::new().ints(8..32).build().double_width(),
+        TypeSetBuilder::new().ints(16..64).build()
+    );
+    assert_eq!(
+        TypeSetBuilder::new().ints(32..64).build().double_width(),
+        TypeSetBuilder::new().ints(64..128).build()
+    );
+    assert_eq!(
+        TypeSetBuilder::new().floats(32..32).build().double_width(),
+        TypeSetBuilder::new().floats(64..64).build()
+    );
+    assert_eq!(
+        TypeSetBuilder::new().floats(32..64).build().double_width(),
+        TypeSetBuilder::new().floats(64..64).build()
+    );
+    assert_eq!(
+        TypeSetBuilder::new().bools(1..16).build().double_width(),
+        TypeSetBuilder::new().bools(16..32).build()
+    );
+    assert_eq!(
+        TypeSetBuilder::new().bools(32..64).build().double_width(),
+        TypeSetBuilder::new().bools(64..128).build()
+    );
+}
+
+#[test]
+fn test_backward_images() {
+    let empty_set = TypeSetBuilder::new().build();
+
+    // LaneOf.
+    assert_eq!(
+        TypeSetBuilder::new()
+            .simd_lanes(1..1)
+            .ints(8..8)
+            .floats(32..32)
+            .build()
+            .preimage(DerivedFunc::LaneOf),
+        TypeSetBuilder::new()
+            .simd_lanes(Interval::All)
+            .ints(8..8)
+            .floats(32..32)
+            .build()
+    );
+    assert_eq!(empty_set.preimage(DerivedFunc::LaneOf), empty_set);
+
+    // AsBool.
+    assert_eq!(
+        TypeSetBuilder::new()
+            .simd_lanes(1..4)
+            .bools(1..128)
+            .build()
+            .preimage(DerivedFunc::AsBool),
+        TypeSetBuilder::new()
+            .simd_lanes(1..4)
+            .ints(Interval::All)
+            .bools(Interval::All)
+            .floats(Interval::All)
+            .build()
+    );
+
+    // Double vector.
+    assert_eq!(
+        TypeSetBuilder::new()
+            .simd_lanes(1..1)
+            .ints(8..8)
+            .build()
+            .preimage(DerivedFunc::DoubleVector)
+            .size(),
+        0
+    );
+    assert_eq!(
+        TypeSetBuilder::new()
+            .simd_lanes(1..16)
+            .ints(8..16)
+            .floats(32..32)
+            .build()
+            .preimage(DerivedFunc::DoubleVector),
+        TypeSetBuilder::new()
+            .simd_lanes(1..8)
+            .ints(8..16)
+            .floats(32..32)
+            .build(),
+    );
+
+    // Half vector.
+    assert_eq!(
+        TypeSetBuilder::new()
+            .simd_lanes(256..256)
+            .ints(8..8)
+            .build()
+            .preimage(DerivedFunc::HalfVector)
+            .size(),
+        0
+    );
+    assert_eq!(
+        TypeSetBuilder::new()
+            .simd_lanes(64..128)
+            .bools(1..32)
+            .build()
+            .preimage(DerivedFunc::HalfVector),
+        TypeSetBuilder::new()
+            .simd_lanes(128..256)
+            .bools(1..32)
+            .build(),
+    );
+
+    // Half width.
+    assert_eq!(
+        TypeSetBuilder::new()
+            .ints(128..128)
+            .floats(64..64)
+            .bools(128..128)
+            .build()
+            .preimage(DerivedFunc::HalfWidth)
+            .size(),
+        0
+    );
+    assert_eq!(
+        TypeSetBuilder::new()
+            .simd_lanes(64..256)
+            .bools(1..64)
+            .build()
+            .preimage(DerivedFunc::HalfWidth),
+        TypeSetBuilder::new()
+            .simd_lanes(64..256)
+            .bools(16..128)
+            .build(),
+    );
+
+    // Double width.
+    assert_eq!(
+        TypeSetBuilder::new()
+            .ints(8..8)
+            .floats(32..32)
+            .bools(1..8)
+            .build()
+            .preimage(DerivedFunc::DoubleWidth)
+            .size(),
+        0
+    );
+    assert_eq!(
+        TypeSetBuilder::new()
+            .simd_lanes(1..16)
+            .ints(8..16)
+            .floats(32..64)
+            .build()
+            .preimage(DerivedFunc::DoubleWidth),
+        TypeSetBuilder::new()
+            .simd_lanes(1..16)
+            .ints(8..8)
+            .floats(32..32)
+            .build()
+    );
+}
+
+#[test]
+#[should_panic]
+fn test_typeset_singleton_panic_nonsingleton_types() {
+    TypeSetBuilder::new()
+        .ints(8..8)
+        .floats(32..32)
+        .build()
+        .get_singleton();
+}
+
+#[test]
+#[should_panic]
+fn test_typeset_singleton_panic_nonsingleton_lanes() {
+    TypeSetBuilder::new()
+        .simd_lanes(1..2)
+        .floats(32..32)
+        .build()
+        .get_singleton();
+}
+
+#[test]
+fn test_typeset_singleton() {
+    use crate::shared::types as shared_types;
+    assert_eq!(
+        TypeSetBuilder::new().ints(16..16).build().get_singleton(),
+        ValueType::Lane(shared_types::Int::I16.into())
+    );
+    assert_eq!(
+        TypeSetBuilder::new().floats(64..64).build().get_singleton(),
+        ValueType::Lane(shared_types::Float::F64.into())
+    );
+    assert_eq!(
+        TypeSetBuilder::new().bools(1..1).build().get_singleton(),
+        ValueType::Lane(shared_types::Bool::B1.into())
+    );
+    assert_eq!(
+        TypeSetBuilder::new()
+            .simd_lanes(4..4)
+            .ints(32..32)
+            .build()
+            .get_singleton(),
+        LaneType::from(shared_types::Int::I32).by(4)
+    );
+}
+
+#[test]
+fn test_typevar_functions() {
+    let x = TypeVar::new(
+        "x",
+        "i16 and up",
+        TypeSetBuilder::new().ints(16..64).build(),
+    );
+    assert_eq!(x.half_width().name, "half_width(x)");
+    assert_eq!(
+        x.half_width().double_width().name,
+        "double_width(half_width(x))"
+    );
+
+    let x = TypeVar::new("x", "up to i32", TypeSetBuilder::new().ints(8..32).build());
+    assert_eq!(x.double_width().name, "double_width(x)");
+}
+
+#[test]
+fn test_typevar_singleton() {
+    use crate::cdsl::types::VectorType;
+    use crate::shared::types as shared_types;
+
+    // Test i32.
+    let typevar = TypeVar::new_singleton(ValueType::Lane(LaneType::Int(shared_types::Int::I32)));
+    assert_eq!(typevar.name, "i32");
+    assert_eq!(typevar.type_set.ints, num_set![32]);
+    assert!(typevar.type_set.floats.is_empty());
+    assert!(typevar.type_set.bools.is_empty());
+    assert!(typevar.type_set.specials.is_empty());
+    assert_eq!(typevar.type_set.lanes, num_set![1]);
+
+    // Test f32x4.
+    let typevar = TypeVar::new_singleton(ValueType::Vector(VectorType::new(
+        LaneType::Float(shared_types::Float::F32),
+        4,
+    )));
+    assert_eq!(typevar.name, "f32x4");
+    assert!(typevar.type_set.ints.is_empty());
+    assert_eq!(typevar.type_set.floats, num_set![32]);
+    assert_eq!(typevar.type_set.lanes, num_set![4]);
+    assert!(typevar.type_set.bools.is_empty());
+    assert!(typevar.type_set.specials.is_empty());
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/cdsl/xform.rs b/third_party/rust/cranelift-codegen-meta/src/cdsl/xform.rs
new file mode 100644
index 0000000000..d21e93128d
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/cdsl/xform.rs
@@ -0,0 +1,484 @@
+use crate::cdsl::ast::{
+    Apply, BlockPool, ConstPool, DefIndex, DefPool, DummyDef, DummyExpr, Expr, PatternPosition,
+    VarIndex, VarPool,
+};
+use crate::cdsl::instructions::Instruction;
+use crate::cdsl::type_inference::{infer_transform, TypeEnvironment};
+use crate::cdsl::typevar::TypeVar;
+
+use cranelift_entity::{entity_impl, PrimaryMap};
+
+use std::collections::{HashMap, HashSet};
+use std::iter::FromIterator;
+
+/// An instruction transformation consists of a source and destination pattern.
+///
+/// Patterns are expressed in *register transfer language* as tuples of Def or Expr nodes. A
+/// pattern may optionally have a sequence of TypeConstraints, that additionally limit the set of
+/// cases when it applies.
+///
+/// The source pattern can contain only a single instruction.
+pub(crate) struct Transform {
+    pub src: DefIndex,
+    pub dst: Vec<DefIndex>,
+    pub var_pool: VarPool,
+    pub def_pool: DefPool,
+    pub block_pool: BlockPool,
+    pub const_pool: ConstPool,
+    pub type_env: TypeEnvironment,
+}
+
+type SymbolTable = HashMap<String, VarIndex>;
+
+impl Transform {
+    fn new(src: DummyDef, dst: Vec<DummyDef>) -> Self {
+        let mut var_pool = VarPool::new();
+        let mut def_pool = DefPool::new();
+        let mut block_pool = BlockPool::new();
+        let mut const_pool = ConstPool::new();
+
+        let mut input_vars: Vec<VarIndex> = Vec::new();
+        let mut defined_vars: Vec<VarIndex> = Vec::new();
+
+        // Maps variable names to our own Var copies.
+        let mut symbol_table: SymbolTable = SymbolTable::new();
+
+        // Rewrite variables in src and dst using our own copies.
+        let src = rewrite_def_list(
+            PatternPosition::Source,
+            vec![src],
+            &mut symbol_table,
+            &mut input_vars,
+            &mut defined_vars,
+            &mut var_pool,
+            &mut def_pool,
+            &mut block_pool,
+            &mut const_pool,
+        )[0];
+
+        let num_src_inputs = input_vars.len();
+
+        let dst = rewrite_def_list(
+            PatternPosition::Destination,
+            dst,
+            &mut symbol_table,
+            &mut input_vars,
+            &mut defined_vars,
+            &mut var_pool,
+            &mut def_pool,
+            &mut block_pool,
+            &mut const_pool,
+        );
+
+        // Sanity checks.
+        for &var_index in &input_vars {
+            assert!(
+                var_pool.get(var_index).is_input(),
+                format!("'{:?}' used as both input and def", var_pool.get(var_index))
+            );
+        }
+        assert!(
+            input_vars.len() == num_src_inputs,
+            format!(
+                "extra input vars in dst pattern: {:?}",
+                input_vars
+                    .iter()
+                    .map(|&i| var_pool.get(i))
+                    .skip(num_src_inputs)
+                    .collect::<Vec<_>>()
+            )
+        );
+
+        // Perform type inference and cleanup.
+        let type_env = infer_transform(src, &dst, &def_pool, &mut var_pool).unwrap();
+
+        // Sanity check: the set of inferred free type variables should be a subset of the type
+        // variables corresponding to Vars appearing in the source pattern.
+        {
+            let free_typevars: HashSet<TypeVar> =
+                HashSet::from_iter(type_env.free_typevars(&mut var_pool));
+            let src_tvs = HashSet::from_iter(
+                input_vars
+                    .clone()
+                    .iter()
+                    .chain(
+                        defined_vars
+                            .iter()
+                            .filter(|&&var_index| !var_pool.get(var_index).is_temp()),
+                    )
+                    .map(|&var_index| var_pool.get(var_index).get_typevar())
+                    .filter(|maybe_var| maybe_var.is_some())
+                    .map(|var| var.unwrap()),
+            );
+            if !free_typevars.is_subset(&src_tvs) {
+                let missing_tvs = (&free_typevars - &src_tvs)
+                    .iter()
+                    .map(|tv| tv.name.clone())
+                    .collect::<Vec<_>>()
+                    .join(", ");
+                panic!("Some free vars don't appear in src: {}", missing_tvs);
+            }
+        }
+
+        for &var_index in input_vars.iter().chain(defined_vars.iter()) {
+            let var = var_pool.get_mut(var_index);
+            let canon_tv = type_env.get_equivalent(&var.get_or_create_typevar());
+            var.set_typevar(canon_tv);
+        }
+
+        Self {
+            src,
+            dst,
+            var_pool,
+            def_pool,
+            block_pool,
+            const_pool,
+            type_env,
+        }
+    }
+
+    fn verify_legalize(&self) {
+        let def = self.def_pool.get(self.src);
+        for &var_index in def.defined_vars.iter() {
+            let defined_var = self.var_pool.get(var_index);
+            assert!(
+                defined_var.is_output(),
+                format!("{:?} not defined in the destination pattern", defined_var)
+            );
+        }
+    }
+}
+
+/// Inserts, if not present, a name in the `symbol_table`. Then returns its index in the variable
+/// pool `var_pool`. If the variable was not present in the symbol table, then add it to the list of
+/// `defined_vars`.
+fn var_index(
+    name: &str,
+    symbol_table: &mut SymbolTable,
+    defined_vars: &mut Vec<VarIndex>,
+    var_pool: &mut VarPool,
+) -> VarIndex {
+    let name = name.to_string();
+    match symbol_table.get(&name) {
+        Some(&existing_var) => existing_var,
+        None => {
+            // Materialize the variable.
+            let new_var = var_pool.create(name.clone());
+            symbol_table.insert(name, new_var);
+            defined_vars.push(new_var);
+            new_var
+        }
+    }
+}
+
+/// Given a list of symbols defined in a Def, rewrite them to local symbols. Yield the new locals.
+fn rewrite_defined_vars(
+    position: PatternPosition,
+    dummy_def: &DummyDef,
+    def_index: DefIndex,
+    symbol_table: &mut SymbolTable,
+    defined_vars: &mut Vec<VarIndex>,
+    var_pool: &mut VarPool,
+) -> Vec<VarIndex> {
+    let mut new_defined_vars = Vec::new();
+    for var in &dummy_def.defined_vars {
+        let own_var = var_index(&var.name, symbol_table, defined_vars, var_pool);
+        var_pool.get_mut(own_var).set_def(position, def_index);
+        new_defined_vars.push(own_var);
+    }
+    new_defined_vars
+}
+
+/// Find all uses of variables in `expr` and replace them with our own local symbols.
+fn rewrite_expr(
+    position: PatternPosition,
+    dummy_expr: DummyExpr,
+    symbol_table: &mut SymbolTable,
+    input_vars: &mut Vec<VarIndex>,
+    var_pool: &mut VarPool,
+    const_pool: &mut ConstPool,
+) -> Apply {
+    let (apply_target, dummy_args) = if let DummyExpr::Apply(apply_target, dummy_args) = dummy_expr
+    {
+        (apply_target, dummy_args)
+    } else {
+        panic!("we only rewrite apply expressions");
+    };
+
+    assert_eq!(
+        apply_target.inst().operands_in.len(),
+        dummy_args.len(),
+        "number of arguments in instruction {} is incorrect\nexpected: {:?}",
+        apply_target.inst().name,
+        apply_target
+            .inst()
+            .operands_in
+            .iter()
+            .map(|operand| format!("{}: {}", operand.name, operand.kind.rust_type))
+            .collect::<Vec<_>>(),
+    );
+
+    let mut args = Vec::new();
+    for (i, arg) in dummy_args.into_iter().enumerate() {
+        match arg {
+            DummyExpr::Var(var) => {
+                let own_var = var_index(&var.name, symbol_table, input_vars, var_pool);
+                let var = var_pool.get(own_var);
+                assert!(
+                    var.is_input() || var.get_def(position).is_some(),
+                    format!("{:?} used as both input and def", var)
+                );
+                args.push(Expr::Var(own_var));
+            }
+            DummyExpr::Literal(literal) => {
+                assert!(!apply_target.inst().operands_in[i].is_value());
+                args.push(Expr::Literal(literal));
+            }
+            DummyExpr::Constant(constant) => {
+                let const_name = const_pool.insert(constant.0);
+                // Here we abuse var_index by passing an empty, immediately-dropped vector to
+                // `defined_vars`; the reason for this is that unlike the `Var` case above,
+                // constants will create a variable that is not an input variable (it is tracked
+                // instead by ConstPool).
+                let const_var = var_index(&const_name, symbol_table, &mut vec![], var_pool);
+                args.push(Expr::Var(const_var));
+            }
+            DummyExpr::Apply(..) => {
+                panic!("Recursive apply is not allowed.");
+            }
+            DummyExpr::Block(_block) => {
+                panic!("Blocks are not valid arguments.");
+            }
+        }
+    }
+
+    Apply::new(apply_target, args)
+}
+
+#[allow(clippy::too_many_arguments)]
+fn rewrite_def_list(
+    position: PatternPosition,
+    dummy_defs: Vec<DummyDef>,
+    symbol_table: &mut SymbolTable,
+    input_vars: &mut Vec<VarIndex>,
+    defined_vars: &mut Vec<VarIndex>,
+    var_pool: &mut VarPool,
+    def_pool: &mut DefPool,
+    block_pool: &mut BlockPool,
+    const_pool: &mut ConstPool,
+) -> Vec<DefIndex> {
+    let mut new_defs = Vec::new();
+    // Register variable names of new blocks first as a block name can be used to jump forward. Thus
+    // the name has to be registered first to avoid misinterpreting it as an input-var.
+    for dummy_def in dummy_defs.iter() {
+        if let DummyExpr::Block(ref var) = dummy_def.expr {
+            var_index(&var.name, symbol_table, defined_vars, var_pool);
+        }
+    }
+
+    // Iterate over the definitions and blocks, to map variables names to inputs or outputs.
+    for dummy_def in dummy_defs {
+        let def_index = def_pool.next_index();
+
+        let new_defined_vars = rewrite_defined_vars(
+            position,
+            &dummy_def,
+            def_index,
+            symbol_table,
+            defined_vars,
+            var_pool,
+        );
+        if let DummyExpr::Block(var) = dummy_def.expr {
+            let var_index = *symbol_table
+                .get(&var.name)
+                .or_else(|| {
+                    panic!(
+                        "Block {} was not registered during the first visit",
+                        var.name
+                    )
+                })
+                .unwrap();
+            var_pool.get_mut(var_index).set_def(position, def_index);
+            block_pool.create_block(var_index, def_index);
+        } else {
+            let new_apply = rewrite_expr(
+                position,
+                dummy_def.expr,
+                symbol_table,
+                input_vars,
+                var_pool,
+                const_pool,
+            );
+
+            assert!(
+                def_pool.next_index() == def_index,
+                "shouldn't have created new defs in the meanwhile"
+            );
+            assert_eq!(
+                new_apply.inst.value_results.len(),
+                new_defined_vars.len(),
+                "number of Var results in instruction is incorrect"
+            );
+
+            new_defs.push(def_pool.create_inst(new_apply, new_defined_vars));
+        }
+    }
+    new_defs
+}
+
+/// A group of related transformations.
+pub(crate) struct TransformGroup {
+    pub name: &'static str,
+    pub doc: &'static str,
+    pub chain_with: Option<TransformGroupIndex>,
+    pub isa_name: Option<&'static str>,
+    pub id: TransformGroupIndex,
+
+    /// Maps Instruction camel_case names to custom legalization functions names.
+    pub custom_legalizes: HashMap<String, &'static str>,
+    pub transforms: Vec<Transform>,
+}
+
+impl TransformGroup {
+    pub fn rust_name(&self) -> String {
+        match self.isa_name {
+            Some(_) => {
+                // This is a function in the same module as the LEGALIZE_ACTIONS table referring to
+                // it.
+                self.name.to_string()
+            }
+            None => format!("crate::legalizer::{}", self.name),
+        }
+    }
+}
+
+#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
+pub(crate) struct TransformGroupIndex(u32);
+entity_impl!(TransformGroupIndex);
+
+pub(crate) struct TransformGroupBuilder {
+    name: &'static str,
+    doc: &'static str,
+    chain_with: Option<TransformGroupIndex>,
+    isa_name: Option<&'static str>,
+    pub custom_legalizes: HashMap<String, &'static str>,
+    pub transforms: Vec<Transform>,
+}
+
+impl TransformGroupBuilder {
+    pub fn new(name: &'static str, doc: &'static str) -> Self {
+        Self {
+            name,
+            doc,
+            chain_with: None,
+            isa_name: None,
+            custom_legalizes: HashMap::new(),
+            transforms: Vec::new(),
+        }
+    }
+
+    pub fn chain_with(mut self, next_id: TransformGroupIndex) -> Self {
+        assert!(self.chain_with.is_none());
+        self.chain_with = Some(next_id);
+        self
+    }
+
+    pub fn isa(mut self, isa_name: &'static str) -> Self {
+        assert!(self.isa_name.is_none());
+        self.isa_name = Some(isa_name);
+        self
+    }
+
+    /// Add a custom legalization action for `inst`.
+    ///
+    /// The `func_name` parameter is the fully qualified name of a Rust function which takes the
+    /// same arguments as the `isa::Legalize` actions.
+    ///
+    /// The custom function will be called to legalize `inst` and any return value is ignored.
+    pub fn custom_legalize(&mut self, inst: &Instruction, func_name: &'static str) {
+        assert!(
+            self.custom_legalizes
+                .insert(inst.camel_name.clone(), func_name)
+                .is_none(),
+            format!(
+                "custom legalization action for {} inserted twice",
+                inst.name
+            )
+        );
+    }
+
+    /// Add a legalization pattern to this group.
+    pub fn legalize(&mut self, src: DummyDef, dst: Vec<DummyDef>) {
+        let transform = Transform::new(src, dst);
+        transform.verify_legalize();
+        self.transforms.push(transform);
+    }
+
+    pub fn build_and_add_to(self, owner: &mut TransformGroups) -> TransformGroupIndex {
+        let next_id = owner.next_key();
+        owner.add(TransformGroup {
+            name: self.name,
+            doc: self.doc,
+            isa_name: self.isa_name,
+            id: next_id,
+            chain_with: self.chain_with,
+            custom_legalizes: self.custom_legalizes,
+            transforms: self.transforms,
+        })
+    }
+}
+
+pub(crate) struct TransformGroups {
+    groups: PrimaryMap<TransformGroupIndex, TransformGroup>,
+}
+
+impl TransformGroups {
+    pub fn new() -> Self {
+        Self {
+            groups: PrimaryMap::new(),
+        }
+    }
+    pub fn add(&mut self, new_group: TransformGroup) -> TransformGroupIndex {
+        for group in self.groups.values() {
+            assert!(
+                group.name != new_group.name,
+                format!("trying to insert {} for the second time", new_group.name)
+            );
+        }
+        self.groups.push(new_group)
+    }
+    pub fn get(&self, id: TransformGroupIndex) -> &TransformGroup {
+        &self.groups[id]
+    }
+    fn next_key(&self) -> TransformGroupIndex {
+        self.groups.next_key()
+    }
+    pub fn by_name(&self, name: &'static str) -> &TransformGroup {
+        for group in self.groups.values() {
+            if group.name == name {
+                return group;
+            }
+        }
+        panic!(format!("transform group with name {} not found", name));
+    }
+}
+
+#[test]
+#[should_panic]
+fn test_double_custom_legalization() {
+    use crate::cdsl::formats::InstructionFormatBuilder;
+    use crate::cdsl::instructions::{AllInstructions, InstructionBuilder, InstructionGroupBuilder};
+
+    let nullary = InstructionFormatBuilder::new("nullary").build();
+
+    let mut dummy_all = AllInstructions::new();
+    let mut inst_group = InstructionGroupBuilder::new(&mut dummy_all);
+    inst_group.push(InstructionBuilder::new("dummy", "doc", &nullary));
+
+    let inst_group = inst_group.build();
+    let dummy_inst = inst_group.by_name("dummy");
+
+    let mut transform_group = TransformGroupBuilder::new("test", "doc");
+    transform_group.custom_legalize(&dummy_inst, "custom 1");
+    transform_group.custom_legalize(&dummy_inst, "custom 2");
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/default_map.rs b/third_party/rust/cranelift-codegen-meta/src/default_map.rs
new file mode 100644
index 0000000000..3a2be05dac
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/default_map.rs
@@ -0,0 +1,20 @@
+//! Trait for extending `HashMap` with `get_or_default`.
+use std::collections::HashMap;
+use std::hash::Hash;
+
+pub(crate) trait MapWithDefault<K, V: Default> {
+    fn get_or_default(&mut self, k: K) -> &mut V;
+}
+
+impl<K: Eq + Hash, V: Default> MapWithDefault<K, V> for HashMap<K, V> {
+    fn get_or_default(&mut self, k: K) -> &mut V {
+        self.entry(k).or_insert_with(V::default)
+    }
+}
+
+#[test]
+fn test_default() {
+    let mut hash_map = HashMap::new();
+    hash_map.insert(42, "hello");
+    assert_eq!(*hash_map.get_or_default(43), "");
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/error.rs b/third_party/rust/cranelift-codegen-meta/src/error.rs
new file mode 100644
index 0000000000..4cbf3d8285
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/error.rs
@@ -0,0 +1,48 @@
+//! Error returned during meta code-generation.
+use std::fmt;
+use std::io;
+
+/// An error that occurred when the cranelift_codegen_meta crate was generating
+/// source files for the cranelift_codegen crate.
+#[derive(Debug)]
+pub struct Error {
+    inner: Box<ErrorInner>,
+}
+
+impl Error {
+    /// Create a new error object with the given message.
+    pub fn with_msg<S: Into<String>>(msg: S) -> Error {
+        Error {
+            inner: Box::new(ErrorInner::Msg(msg.into())),
+        }
+    }
+}
+
+impl fmt::Display for Error {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "{}", self.inner)
+    }
+}
+
+impl From<io::Error> for Error {
+    fn from(e: io::Error) -> Self {
+        Error {
+            inner: Box::new(ErrorInner::IoError(e)),
+        }
+    }
+}
+
+#[derive(Debug)]
+enum ErrorInner {
+    Msg(String),
+    IoError(io::Error),
+}
+
+impl fmt::Display for ErrorInner {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match *self {
+            ErrorInner::Msg(ref s) => write!(f, "{}", s),
+            ErrorInner::IoError(ref e) => write!(f, "{}", e),
+        }
+    }
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/gen_binemit.rs b/third_party/rust/cranelift-codegen-meta/src/gen_binemit.rs
new file mode 100644
index 0000000000..f67aa9b5a9
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/gen_binemit.rs
@@ -0,0 +1,224 @@
+//! Generate binary emission code for each ISA.
+
+use cranelift_entity::EntityRef;
+
+use crate::error;
+use crate::srcgen::Formatter;
+
+use crate::cdsl::recipes::{EncodingRecipe, OperandConstraint, Recipes};
+
+/// Generate code to handle a single recipe.
+///
+/// - Unpack the instruction data, knowing the format.
+/// - Determine register locations for operands with register constraints.
+/// - Determine stack slot locations for operands with stack constraints.
+/// - Call hand-written code for the actual emission.
+fn gen_recipe(recipe: &EncodingRecipe, fmt: &mut Formatter) {
+    let inst_format = &recipe.format;
+    let num_value_ops = inst_format.num_value_operands;
+
+    // TODO: Set want_args to true for only MultiAry instructions instead of all formats with value
+    // list.
+    let want_args = inst_format.has_value_list
+        || recipe.operands_in.iter().any(|c| match c {
+            OperandConstraint::RegClass(_) | OperandConstraint::Stack(_) => true,
+            OperandConstraint::FixedReg(_) | OperandConstraint::TiedInput(_) => false,
+        });
+    assert!(!want_args || num_value_ops > 0 || inst_format.has_value_list);
+
+    let want_outs = recipe.operands_out.iter().any(|c| match c {
+        OperandConstraint::RegClass(_) | OperandConstraint::Stack(_) => true,
+        OperandConstraint::FixedReg(_) | OperandConstraint::TiedInput(_) => false,
+    });
+
+    let is_regmove = ["RegMove", "RegSpill", "RegFill"].contains(&inst_format.name);
+
+    // Unpack the instruction data.
+    fmtln!(fmt, "if let InstructionData::{} {{", inst_format.name);
+    fmt.indent(|fmt| {
+        fmt.line("opcode,");
+        for f in &inst_format.imm_fields {
+            fmtln!(fmt, "{},", f.member);
+        }
+        if want_args {
+            if inst_format.has_value_list || num_value_ops > 1 {
+                fmt.line("ref args,");
+            } else {
+                fmt.line("arg,");
+            }
+        }
+        fmt.line("..");
+
+        fmt.outdented_line("} = *inst_data {");
+
+        // Pass recipe arguments in this order: inputs, imm_fields, outputs.
+        let mut args = String::new();
+
+        if want_args && !is_regmove {
+            if inst_format.has_value_list {
+                fmt.line("let args = args.as_slice(&func.dfg.value_lists);");
+            } else if num_value_ops == 1 {
+                fmt.line("let args = [arg];");
+            }
+            args += &unwrap_values(&recipe.operands_in, "in", "args", fmt);
+        }
+
+        for f in &inst_format.imm_fields {
+            args += &format!(", {}", f.member);
+        }
+
+        // Unwrap interesting output arguments.
+        if want_outs {
+            if recipe.operands_out.len() == 1 {
+                fmt.line("let results = [func.dfg.first_result(inst)];")
+            } else {
+                fmt.line("let results = func.dfg.inst_results(inst);");
+            }
+            args += &unwrap_values(&recipe.operands_out, "out", "results", fmt);
+        }
+
+        // Optimization: Only update the register diversion tracker for regmove instructions.
+        if is_regmove {
+            fmt.line("divert.apply(inst_data);")
+        }
+
+        match &recipe.emit {
+            Some(emit) => {
+                fmt.multi_line(emit);
+                fmt.line("return;");
+            }
+            None => {
+                fmtln!(
+                    fmt,
+                    "return recipe_{}(func, inst, sink, bits{});",
+                    recipe.name.to_lowercase(),
+                    args
+                );
+            }
+        }
+    });
+    fmt.line("}");
+}
+
+/// Emit code that unwraps values living in registers or stack slots.
+///
+/// :param args: Input or output constraints.
+/// :param prefix: Prefix to be used for the generated local variables.
+/// :param values: Name of slice containing the values to be unwrapped.
+/// :returns: Comma separated list of the generated variables
+fn unwrap_values(
+    args: &[OperandConstraint],
+    prefix: &str,
+    values_slice: &str,
+    fmt: &mut Formatter,
+) -> String {
+    let mut varlist = String::new();
+    for (i, cst) in args.iter().enumerate() {
+        match cst {
+            OperandConstraint::RegClass(_reg_class) => {
+                let v = format!("{}_reg{}", prefix, i);
+                varlist += &format!(", {}", v);
+                fmtln!(
+                    fmt,
+                    "let {} = divert.reg({}[{}], &func.locations);",
+                    v,
+                    values_slice,
+                    i
+                );
+            }
+            OperandConstraint::Stack(stack) => {
+                let v = format!("{}_stk{}", prefix, i);
+                varlist += &format!(", {}", v);
+                fmtln!(fmt, "let {} = StackRef::masked(", v);
+                fmt.indent(|fmt| {
+                    fmtln!(
+                        fmt,
+                        "divert.stack({}[{}], &func.locations),",
+                        values_slice,
+                        i
+                    );
+                    fmt.line(format!("{},", stack.stack_base_mask()));
+                    fmt.line("&func.stack_slots,");
+                });
+                fmt.line(").unwrap();");
+            }
+            _ => {}
+        }
+    }
+    varlist
+}
+
+fn gen_isa(isa_name: &str, recipes: &Recipes, fmt: &mut Formatter) {
+    fmt.doc_comment(format!(
+        "Emit binary machine code for `inst` for the {} ISA.",
+        isa_name
+    ));
+
+    if recipes.is_empty() {
+        fmt.line("pub fn emit_inst<CS: CodeSink + ?Sized>(");
+        fmt.indent(|fmt| {
+            fmt.line("func: &Function,");
+            fmt.line("inst: Inst,");
+            fmt.line("_divert: &mut RegDiversions,");
+            fmt.line("_sink: &mut CS,");
+            fmt.line("_isa: &dyn TargetIsa,");
+        });
+        fmt.line(") {");
+        fmt.indent(|fmt| {
+            // No encoding recipes: Emit a stub.
+            fmt.line("bad_encoding(func, inst)");
+        });
+        fmt.line("}");
+        return;
+    }
+
+    fmt.line("#[allow(unused_variables, unreachable_code)]");
+    fmt.line("pub fn emit_inst<CS: CodeSink + ?Sized>(");
+    fmt.indent(|fmt| {
+        fmt.line("func: &Function,");
+        fmt.line("inst: Inst,");
+        fmt.line("divert: &mut RegDiversions,");
+        fmt.line("sink: &mut CS,");
+        fmt.line("isa: &dyn TargetIsa,")
+    });
+
+    fmt.line(") {");
+    fmt.indent(|fmt| {
+        fmt.line("let encoding = func.encodings[inst];");
+        fmt.line("let bits = encoding.bits();");
+        fmt.line("let inst_data = &func.dfg[inst];");
+        fmt.line("match encoding.recipe() {");
+        fmt.indent(|fmt| {
+            for (i, recipe) in recipes.iter() {
+                fmt.comment(format!("Recipe {}", recipe.name));
+                fmtln!(fmt, "{} => {{", i.index());
+                fmt.indent(|fmt| {
+                    gen_recipe(recipe, fmt);
+                });
+                fmt.line("}");
+            }
+            fmt.line("_ => {},");
+        });
+        fmt.line("}");
+
+        // Allow for unencoded ghost instructions. The verifier will check details.
+        fmt.line("if encoding.is_legal() {");
+        fmt.indent(|fmt| {
+            fmt.line("bad_encoding(func, inst);");
+        });
+        fmt.line("}");
+    });
+    fmt.line("}");
+}
+
+pub(crate) fn generate(
+    isa_name: &str,
+    recipes: &Recipes,
+    binemit_filename: &str,
+    out_dir: &str,
+) -> Result<(), error::Error> {
+    let mut fmt = Formatter::new();
+    gen_isa(isa_name, recipes, &mut fmt);
+    fmt.update_file(binemit_filename, out_dir)?;
+    Ok(())
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/gen_encodings.rs b/third_party/rust/cranelift-codegen-meta/src/gen_encodings.rs
new file mode 100644
index 0000000000..d79dc66340
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/gen_encodings.rs
@@ -0,0 +1,1139 @@
+//! Generate sources for instruction encoding.
+//!
+//! The tables and functions generated here support the `TargetISA::encode()` function which
+//! determines if a given instruction is legal, and if so, its `Encoding` data which consists of a
+//! *recipe* and some *encoding* bits.
+//!
+//! The `encode` function doesn't actually generate the binary machine bits. Each recipe has a
+//! corresponding hand-written function to do that after registers are allocated.
+//!
+//! This is the information available to us:
+//!
+//! - The instruction to be encoded as an `InstructionData` reference.
+//! - The controlling type variable.
+//! - The data-flow graph giving us access to the types of all values involved. This is needed for
+//! testing any secondary type variables.
+//! - A `PredicateView` reference for the ISA-specific settings for evaluating ISA predicates.
+//! - The currently active CPU mode is determined by the ISA.
+//!
+//! ## Level 1 table lookup
+//!
+//! The CPU mode provides the first table. The key is the instruction's controlling type variable.
+//! If the instruction is not polymorphic, use `INVALID` for the type variable. The table values
+//! are level 2 tables.
+//!
+//! ## Level 2 table lookup
+//!
+//! The level 2 table is keyed by the instruction's opcode. The table values are *encoding lists*.
+//!
+//! The two-level table lookup allows the level 2 tables to be much smaller with good locality.
+//! Code in any given function usually only uses a few different types, so many of the level 2
+//! tables will be cold.
+//!
+//! ## Encoding lists
+//!
+//! An encoding list is a non-empty sequence of list entries. Each entry has one of these forms:
+//!
+//! 1. Recipe + bits. Use this encoding if the recipe predicate is satisfied.
+//! 2. Recipe + bits, final entry. Use this encoding if the recipe predicate is satisfied.
+//!    Otherwise, stop with the default legalization code.
+//! 3. Stop with legalization code.
+//! 4. Predicate + skip count. Test predicate and skip N entries if it is false.
+//! 5. Predicate + stop. Test predicate and stop with the default legalization code if it is false.
+//!
+//! The instruction predicate is also used to distinguish between polymorphic instructions with
+//! different types for secondary type variables.
+
+use std::collections::btree_map;
+use std::collections::{BTreeMap, HashMap, HashSet};
+use std::convert::TryFrom;
+use std::iter::FromIterator;
+
+use cranelift_codegen_shared::constant_hash::generate_table;
+use cranelift_entity::EntityRef;
+
+use crate::error;
+use crate::srcgen::Formatter;
+
+use crate::cdsl::cpu_modes::CpuMode;
+use crate::cdsl::encodings::Encoding;
+use crate::cdsl::instructions::{Instruction, InstructionPredicate, InstructionPredicateNumber};
+use crate::cdsl::isa::TargetIsa;
+use crate::cdsl::recipes::{EncodingRecipe, OperandConstraint, Recipes, Register};
+use crate::cdsl::regs::IsaRegs;
+use crate::cdsl::settings::SettingPredicateNumber;
+use crate::cdsl::types::ValueType;
+use crate::cdsl::xform::TransformGroupIndex;
+
+use crate::shared::Definitions as SharedDefinitions;
+
+use crate::default_map::MapWithDefault;
+use crate::unique_table::UniqueSeqTable;
+
+/// Emit code for matching an instruction predicate against an `InstructionData` reference called
+/// `inst`.
+///
+/// The generated code is an `if let` pattern match that falls through if the instruction has an
+/// unexpected format. This should lead to a panic.
+fn emit_instp(instp: &InstructionPredicate, has_func: bool, fmt: &mut Formatter) {
+    if let Some(type_predicate) = instp.type_predicate("func") {
+        fmt.line("let args = inst.arguments(&func.dfg.value_lists);");
+        fmt.line(type_predicate);
+        return;
+    }
+
+    let leaves = instp.collect_leaves();
+
+    let mut has_type_check = false;
+    let mut format_name = None;
+    let mut field_names = HashSet::new();
+
+    for leaf in leaves {
+        if leaf.is_type_predicate() {
+            has_type_check = true;
+        } else {
+            field_names.insert(leaf.format_destructuring_member_name());
+            let leaf_format_name = leaf.format_name();
+            match format_name {
+                None => format_name = Some(leaf_format_name),
+                Some(previous_format_name) => {
+                    assert!(
+                        previous_format_name == leaf_format_name,
+                        format!("Format predicate can only operate on a single InstructionFormat; trying to use both {} and {}", previous_format_name, leaf_format_name
+                    ));
+                }
+            }
+        }
+    }
+
+    let mut fields = Vec::from_iter(field_names);
+    fields.sort();
+    let fields = fields.join(", ");
+
+    let format_name = format_name.expect("There should be a format name!");
+
+    fmtln!(
+        fmt,
+        "if let crate::ir::InstructionData::{} {{ {}, .. }} = *inst {{",
+        format_name,
+        fields
+    );
+    fmt.indent(|fmt| {
+        if has_type_check {
+            // We could implement this.
+            assert!(has_func, "recipe predicates can't check type variables.");
+            fmt.line("let args = inst.arguments(&func.dfg.value_lists);");
+        } else if has_func {
+            // Silence dead argument.
+            fmt.line("let _ = func;");
+        }
+        fmtln!(fmt, "return {};", instp.rust_predicate("func").unwrap());
+    });
+    fmtln!(fmt, "}");
+
+    fmt.line("unreachable!();");
+}
+
+/// Emit private functions for checking recipe predicates as well as a static `RECIPE_PREDICATES`
+/// array indexed by recipe number.
+///
+/// A recipe predicate is a combination of an ISA predicate and an instruction predicate. Many
+/// recipes have identical predicates.
+fn emit_recipe_predicates(isa: &TargetIsa, fmt: &mut Formatter) {
+    let mut predicate_names = HashMap::new();
+
+    fmt.comment(format!("{} recipe predicates.", isa.name));
+    for recipe in isa.recipes.values() {
+        let (isap, instp) = match (&recipe.isa_predicate, &recipe.inst_predicate) {
+            (None, None) => continue,
+            (isap, instp) if predicate_names.contains_key(&(isap, instp)) => continue,
+            (isap, instp) => (isap, instp),
+        };
+
+        let func_name = format!("recipe_predicate_{}", recipe.name.to_lowercase());
+        predicate_names.insert((isap, instp), func_name.clone());
+
+        // Generate the predicate function.
+        fmtln!(
+            fmt,
+            "fn {}({}: crate::settings::PredicateView, {}: &ir::InstructionData) -> bool {{",
+            func_name,
+            if isap.is_some() { "isap" } else { "_" },
+            if instp.is_some() { "inst" } else { "_" }
+        );
+        fmt.indent(|fmt| {
+            match (isap, instp) {
+                (Some(isap), None) => {
+                    fmtln!(fmt, "isap.test({})", isap);
+                }
+                (None, Some(instp)) => {
+                    emit_instp(instp, /* has func */ false, fmt);
+                }
+                (Some(isap), Some(instp)) => {
+                    fmtln!(fmt, "isap.test({}) &&", isap);
+                    emit_instp(instp, /* has func */ false, fmt);
+                }
+                _ => panic!("skipped above"),
+            }
+        });
+        fmtln!(fmt, "}");
+    }
+    fmt.empty_line();
+
+    // Generate the static table.
+    fmt.doc_comment(format!(
+        r#"{} recipe predicate table.
+
+        One entry per recipe, set to Some only when the recipe is guarded by a predicate."#,
+        isa.name
+    ));
+    fmtln!(
+        fmt,
+        "pub static RECIPE_PREDICATES: [RecipePredicate; {}] = [",
+        isa.recipes.len()
+    );
+    fmt.indent(|fmt| {
+        for recipe in isa.recipes.values() {
+            match (&recipe.isa_predicate, &recipe.inst_predicate) {
+                (None, None) => fmt.line("None,"),
+                key => fmtln!(fmt, "Some({}),", predicate_names.get(&key).unwrap()),
+            }
+        }
+    });
+    fmtln!(fmt, "];");
+    fmt.empty_line();
+}
+
+/// Emit private functions for matching instruction predicates as well as a static
+/// `INST_PREDICATES` array indexed by predicate number.
+fn emit_inst_predicates(isa: &TargetIsa, fmt: &mut Formatter) {
+    fmt.comment(format!("{} instruction predicates.", isa.name));
+    for (id, instp) in isa.encodings_predicates.iter() {
+        fmtln!(fmt, "fn inst_predicate_{}(func: &crate::ir::Function, inst: &crate::ir::InstructionData) -> bool {{", id.index());
+        fmt.indent(|fmt| {
+            emit_instp(instp, /* has func */ true, fmt);
+        });
+        fmtln!(fmt, "}");
+    }
+    fmt.empty_line();
+
+    // Generate the static table.
+    fmt.doc_comment(format!(
+        r#"{} instruction predicate table.
+
+        One entry per instruction predicate, so the encoding bytecode can embed indexes into this
+        table."#,
+        isa.name
+    ));
+    fmtln!(
+        fmt,
+        "pub static INST_PREDICATES: [InstPredicate; {}] = [",
+        isa.encodings_predicates.len()
+    );
+    fmt.indent(|fmt| {
+        for id in isa.encodings_predicates.keys() {
+            fmtln!(fmt, "inst_predicate_{},", id.index());
+        }
+    });
+    fmtln!(fmt, "];");
+    fmt.empty_line();
+}
+
+/// Emit a table of encoding recipe names keyed by recipe number.
+///
+/// This is used for pretty-printing encodings.
+fn emit_recipe_names(isa: &TargetIsa, fmt: &mut Formatter) {
+    fmt.doc_comment(format!(
+        r#"{} recipe names, using the same recipe index spaces as the one specified by the
+        corresponding binemit file."#,
+        isa.name
+    ));
+    fmtln!(
+        fmt,
+        "static RECIPE_NAMES: [&str; {}] = [",
+        isa.recipes.len()
+    );
+    fmt.indent(|fmt| {
+        for recipe in isa.recipes.values() {
+            fmtln!(fmt, r#""{}","#, recipe.name);
+        }
+    });
+    fmtln!(fmt, "];");
+    fmt.empty_line();
+}
+
+/// Returns a set of all the registers involved in fixed register constraints.
+fn get_fixed_registers(operands_in: &[OperandConstraint]) -> HashSet<Register> {
+    HashSet::from_iter(
+        operands_in
+            .iter()
+            .map(|constraint| {
+                if let OperandConstraint::FixedReg(reg) = &constraint {
+                    Some(*reg)
+                } else {
+                    None
+                }
+            })
+            .filter(|opt| opt.is_some())
+            .map(|opt| opt.unwrap()),
+    )
+}
+
+/// Emit a struct field initializer for an array of operand constraints.
+///
+/// Note "fixed_registers" must refer to the other kind of operands (i.e. if we're operating on
+/// inputs, fixed_registers must contain the fixed output registers).
+fn emit_operand_constraints(
+    registers: &IsaRegs,
+    recipe: &EncodingRecipe,
+    constraints: &[OperandConstraint],
+    field_name: &'static str,
+    tied_operands: &HashMap<usize, usize>,
+    fixed_registers: &HashSet<Register>,
+    fmt: &mut Formatter,
+) {
+    if constraints.is_empty() {
+        fmtln!(fmt, "{}: &[],", field_name);
+        return;
+    }
+
+    fmtln!(fmt, "{}: &[", field_name);
+    fmt.indent(|fmt| {
+        for (n, constraint) in constraints.iter().enumerate() {
+            fmt.line("OperandConstraint {");
+            fmt.indent(|fmt| {
+                match constraint {
+                    OperandConstraint::RegClass(reg_class) => {
+                        if let Some(tied_input) = tied_operands.get(&n) {
+                            fmtln!(fmt, "kind: ConstraintKind::Tied({}),", tied_input);
+                        } else {
+                            fmt.line("kind: ConstraintKind::Reg,");
+                        }
+                        fmtln!(
+                            fmt,
+                            "regclass: &{}_DATA,",
+                            registers.classes[*reg_class].name
+                        );
+                    }
+                    OperandConstraint::FixedReg(reg) => {
+                        assert!(!tied_operands.contains_key(&n), "can't tie fixed registers");
+                        let constraint_kind = if fixed_registers.contains(&reg) {
+                            "FixedTied"
+                        } else {
+                            "FixedReg"
+                        };
+                        fmtln!(
+                            fmt,
+                            "kind: ConstraintKind::{}({}),",
+                            constraint_kind,
+                            reg.unit
+                        );
+                        fmtln!(
+                            fmt,
+                            "regclass: &{}_DATA,",
+                            registers.classes[reg.regclass].name
+                        );
+                    }
+                    OperandConstraint::TiedInput(tied_input) => {
+                        // This is a tied output constraint. It should never happen
+                        // for input constraints.
+                        assert!(
+                            tied_input == tied_operands.get(&n).unwrap(),
+                            "invalid tied constraint"
+                        );
+                        fmtln!(fmt, "kind: ConstraintKind::Tied({}),", tied_input);
+
+                        let tied_class = if let OperandConstraint::RegClass(tied_class) =
+                            recipe.operands_in[*tied_input]
+                        {
+                            tied_class
+                        } else {
+                            panic!("tied constraints relate only to register inputs");
+                        };
+
+                        fmtln!(
+                            fmt,
+                            "regclass: &{}_DATA,",
+                            registers.classes[tied_class].name
+                        );
+                    }
+                    OperandConstraint::Stack(stack) => {
+                        assert!(!tied_operands.contains_key(&n), "can't tie stack operand");
+                        fmt.line("kind: ConstraintKind::Stack,");
+                        fmtln!(
+                            fmt,
+                            "regclass: &{}_DATA,",
+                            registers.classes[stack.regclass].name
+                        );
+                    }
+                }
+            });
+            fmt.line("},");
+        }
+    });
+    fmtln!(fmt, "],");
+}
+
+/// Emit a table of encoding recipe operand constraints keyed by recipe number.
+///
+/// These are used by the register allocator to pick registers that can be properly encoded.
+fn emit_recipe_constraints(isa: &TargetIsa, fmt: &mut Formatter) {
+    fmt.doc_comment(format!(
+        r#"{} recipe constraints list, using the same recipe index spaces as the one
+        specified by the corresponding binemit file. These constraints are used by register
+        allocation to select the right location to use for input and output values."#,
+        isa.name
+    ));
+    fmtln!(
+        fmt,
+        "static RECIPE_CONSTRAINTS: [RecipeConstraints; {}] = [",
+        isa.recipes.len()
+    );
+    fmt.indent(|fmt| {
+        for recipe in isa.recipes.values() {
+            // Compute a mapping of tied operands in both directions (input tied to outputs and
+            // conversely).
+            let mut tied_in_to_out = HashMap::new();
+            let mut tied_out_to_in = HashMap::new();
+            for (out_index, constraint) in recipe.operands_out.iter().enumerate() {
+                if let OperandConstraint::TiedInput(in_index) = &constraint {
+                    tied_in_to_out.insert(*in_index, out_index);
+                    tied_out_to_in.insert(out_index, *in_index);
+                }
+            }
+
+            // Find the sets of registers involved in fixed register constraints.
+            let fixed_inputs = get_fixed_registers(&recipe.operands_in);
+            let fixed_outputs = get_fixed_registers(&recipe.operands_out);
+
+            fmt.comment(format!("Constraints for recipe {}:", recipe.name));
+            fmt.line("RecipeConstraints {");
+            fmt.indent(|fmt| {
+                emit_operand_constraints(
+                    &isa.regs,
+                    recipe,
+                    &recipe.operands_in,
+                    "ins",
+                    &tied_in_to_out,
+                    &fixed_outputs,
+                    fmt,
+                );
+                emit_operand_constraints(
+                    &isa.regs,
+                    recipe,
+                    &recipe.operands_out,
+                    "outs",
+                    &tied_out_to_in,
+                    &fixed_inputs,
+                    fmt,
+                );
+                fmtln!(
+                    fmt,
+                    "fixed_ins: {},",
+                    if !fixed_inputs.is_empty() {
+                        "true"
+                    } else {
+                        "false"
+                    }
+                );
+                fmtln!(
+                    fmt,
+                    "fixed_outs: {},",
+                    if !fixed_outputs.is_empty() {
+                        "true"
+                    } else {
+                        "false"
+                    }
+                );
+                fmtln!(
+                    fmt,
+                    "tied_ops: {},",
+                    if !tied_in_to_out.is_empty() {
+                        "true"
+                    } else {
+                        "false"
+                    }
+                );
+                fmtln!(
+                    fmt,
+                    "clobbers_flags: {},",
+                    if recipe.clobbers_flags {
+                        "true"
+                    } else {
+                        "false"
+                    }
+                );
+            });
+            fmt.line("},");
+        }
+    });
+    fmtln!(fmt, "];");
+    fmt.empty_line();
+}
+
+/// Emit a table of encoding recipe code size information.
+fn emit_recipe_sizing(isa: &TargetIsa, fmt: &mut Formatter) {
+    fmt.doc_comment(format!(
+        r#"{} recipe sizing descriptors, using the same recipe index spaces as the one
+        specified by the corresponding binemit file. These are used to compute the final size of an
+        instruction, as well as to compute the range of branches."#,
+        isa.name
+    ));
+    fmtln!(
+        fmt,
+        "static RECIPE_SIZING: [RecipeSizing; {}] = [",
+        isa.recipes.len()
+    );
+    fmt.indent(|fmt| {
+        for recipe in isa.recipes.values() {
+            fmt.comment(format!("Code size information for recipe {}:", recipe.name));
+            fmt.line("RecipeSizing {");
+            fmt.indent(|fmt| {
+                fmtln!(fmt, "base_size: {},", recipe.base_size);
+                fmtln!(fmt, "compute_size: {},", recipe.compute_size);
+                if let Some(range) = &recipe.branch_range {
+                    fmtln!(
+                        fmt,
+                        "branch_range: Some(BranchRange {{ origin: {}, bits: {} }}),",
+                        range.inst_size,
+                        range.range
+                    );
+                } else {
+                    fmt.line("branch_range: None,");
+                }
+            });
+            fmt.line("},");
+        }
+    });
+    fmtln!(fmt, "];");
+    fmt.empty_line();
+}
+
+/// Level 1 table mapping types to `Level2` objects.
+struct Level1Table<'cpu_mode> {
+    cpu_mode: &'cpu_mode CpuMode,
+    legalize_code: TransformGroupIndex,
+
+    table_map: HashMap<Option<ValueType>, usize>,
+    table_vec: Vec<Level2Table>,
+}
+
+impl<'cpu_mode> Level1Table<'cpu_mode> {
+    fn new(cpu_mode: &'cpu_mode CpuMode) -> Self {
+        Self {
+            cpu_mode,
+            legalize_code: cpu_mode.get_default_legalize_code(),
+            table_map: HashMap::new(),
+            table_vec: Vec::new(),
+        }
+    }
+
+    /// Returns the level2 table for the given type; None means monomorphic, in this context.
+    fn l2table_for(&mut self, typ: Option<ValueType>) -> &mut Level2Table {
+        let cpu_mode = &self.cpu_mode;
+        let index = match self.table_map.get(&typ) {
+            Some(&index) => index,
+            None => {
+                let legalize_code = cpu_mode.get_legalize_code_for(&typ);
+                let table = Level2Table::new(typ.clone(), legalize_code);
+                let index = self.table_vec.len();
+                self.table_map.insert(typ, index);
+                self.table_vec.push(table);
+                index
+            }
+        };
+        self.table_vec.get_mut(index).unwrap()
+    }
+
+    fn l2tables(&mut self) -> Vec<&mut Level2Table> {
+        self.table_vec
+            .iter_mut()
+            .filter(|table| !table.is_empty())
+            .collect::<Vec<_>>()
+    }
+}
+
+struct Level2HashTableEntry {
+    inst_name: String,
+    offset: usize,
+}
+
+/// Level 2 table mapping instruction opcodes to `EncList` objects.
+///
+/// A level 2 table can be completely empty if it only holds a custom legalization action for `ty`.
+struct Level2Table {
+    typ: Option<ValueType>,
+    legalize_code: TransformGroupIndex,
+    inst_to_encodings: BTreeMap<String, EncodingList>,
+    hash_table_offset: Option<usize>,
+    hash_table_len: Option<usize>,
+}
+
+impl Level2Table {
+    fn new(typ: Option<ValueType>, legalize_code: TransformGroupIndex) -> Self {
+        Self {
+            typ,
+            legalize_code,
+            inst_to_encodings: BTreeMap::new(),
+            hash_table_offset: None,
+            hash_table_len: None,
+        }
+    }
+
+    fn enclist_for(&mut self, inst: &Instruction) -> &mut EncodingList {
+        let copied_typ = self.typ.clone();
+        self.inst_to_encodings
+            .entry(inst.name.clone())
+            .or_insert_with(|| EncodingList::new(inst, copied_typ))
+    }
+
+    fn enclists(&mut self) -> btree_map::ValuesMut<'_, String, EncodingList> {
+        self.inst_to_encodings.values_mut()
+    }
+
+    fn is_empty(&self) -> bool {
+        self.inst_to_encodings.is_empty()
+    }
+
+    fn layout_hashtable(
+        &mut self,
+        level2_hashtables: &mut Vec<Option<Level2HashTableEntry>>,
+        level2_doc: &mut HashMap<usize, Vec<String>>,
+    ) {
+        let hash_table = generate_table(
+            self.inst_to_encodings.values(),
+            self.inst_to_encodings.len(),
+            // TODO the Python code wanted opcode numbers to start from 1.
+            |enc_list| enc_list.inst.opcode_number.index() + 1,
+        );
+
+        let hash_table_offset = level2_hashtables.len();
+        let hash_table_len = hash_table.len();
+
+        assert!(self.hash_table_offset.is_none());
+        assert!(self.hash_table_len.is_none());
+        self.hash_table_offset = Some(hash_table_offset);
+        self.hash_table_len = Some(hash_table_len);
+
+        level2_hashtables.extend(hash_table.iter().map(|opt_enc_list| {
+            opt_enc_list.map(|enc_list| Level2HashTableEntry {
+                inst_name: enc_list.inst.camel_name.clone(),
+                offset: enc_list.offset.unwrap(),
+            })
+        }));
+
+        let typ_comment = match &self.typ {
+            Some(ty) => ty.to_string(),
+            None => "typeless".into(),
+        };
+
+        level2_doc.get_or_default(hash_table_offset).push(format!(
+            "{:06x}: {}, {} entries",
+            hash_table_offset, typ_comment, hash_table_len
+        ));
+    }
+}
+
+/// The u16 values in an encoding list entry are interpreted as follows:
+///
+/// NR = len(all_recipes)
+///
+/// entry < 2*NR
+///     Try Encoding(entry/2, next_entry) if the recipe predicate is satisfied.
+///     If bit 0 is set, stop with the default legalization code.
+///     If bit 0 is clear, keep going down the list.
+/// entry < PRED_START
+///     Stop with legalization code `entry - 2*NR`.
+///
+/// Remaining entries are interpreted as (skip, pred) pairs, where:
+///
+/// skip = (entry - PRED_START) >> PRED_BITS
+/// pred = (entry - PRED_START) & PRED_MASK
+///
+/// If the predicate is satisfied, keep going. Otherwise skip over the next
+/// `skip` entries. If skip == 0, stop with the default legalization code.
+///
+/// The `pred` predicate number is interpreted as an instruction predicate if it
+/// is in range, otherwise an ISA predicate.
+
+/// Encoding lists are represented as u16 arrays.
+const CODE_BITS: usize = 16;
+
+/// Beginning of the predicate code words.
+const PRED_START: u16 = 0x1000;
+
+/// Number of bits used to hold a predicate number (instruction + ISA predicates).
+const PRED_BITS: usize = 12;
+
+/// Mask for extracting the predicate number.
+const PRED_MASK: usize = (1 << PRED_BITS) - 1;
+
+/// Encoder for the list format above.
+struct Encoder {
+    num_instruction_predicates: usize,
+
+    /// u16 encoding list words.
+    words: Vec<u16>,
+
+    /// Documentation comments: Index into `words` + comment.
+    docs: Vec<(usize, String)>,
+}
+
+impl Encoder {
+    fn new(num_instruction_predicates: usize) -> Self {
+        Self {
+            num_instruction_predicates,
+            words: Vec::new(),
+            docs: Vec::new(),
+        }
+    }
+
+    /// Add a recipe+bits entry to the list.
+    fn recipe(&mut self, recipes: &Recipes, enc: &Encoding, is_final: bool) {
+        let code = (2 * enc.recipe.index() + if is_final { 1 } else { 0 }) as u16;
+        assert!(code < PRED_START);
+
+        let doc = format!(
+            "--> {}{}",
+            enc.to_rust_comment(recipes),
+            if is_final { " and stop" } else { "" }
+        );
+        self.docs.push((self.words.len(), doc));
+
+        self.words.push(code);
+        self.words.push(enc.encbits);
+    }
+
+    /// Add a predicate entry.
+    fn pred(&mut self, pred_comment: String, skip: usize, n: usize) {
+        assert!(n <= PRED_MASK);
+        let entry = (PRED_START as usize) + (n | (skip << PRED_BITS));
+        assert!(entry < (1 << CODE_BITS));
+        let entry = entry as u16;
+
+        let doc = if skip == 0 {
+            "stop".to_string()
+        } else {
+            format!("skip {}", skip)
+        };
+        let doc = format!("{} unless {}", doc, pred_comment);
+
+        self.docs.push((self.words.len(), doc));
+        self.words.push(entry);
+    }
+
+    /// Add an instruction predicate entry.
+    fn inst_predicate(&mut self, pred: InstructionPredicateNumber, skip: usize) {
+        let number = pred.index();
+        let pred_comment = format!("inst_predicate_{}", number);
+        self.pred(pred_comment, skip, number);
+    }
+
+    /// Add an ISA predicate entry.
+    fn isa_predicate(&mut self, pred: SettingPredicateNumber, skip: usize) {
+        // ISA predicates follow the instruction predicates.
+        let n = self.num_instruction_predicates + (pred as usize);
+        let pred_comment = format!("PredicateView({})", pred);
+        self.pred(pred_comment, skip, n);
+    }
+}
+
+/// List of instructions for encoding a given type + opcode pair.
+///
+/// An encoding list contains a sequence of predicates and encoding recipes, all encoded as u16
+/// values.
+struct EncodingList {
+    inst: Instruction,
+    typ: Option<ValueType>,
+    encodings: Vec<Encoding>,
+    offset: Option<usize>,
+}
+
+impl EncodingList {
+    fn new(inst: &Instruction, typ: Option<ValueType>) -> Self {
+        Self {
+            inst: inst.clone(),
+            typ,
+            encodings: Default::default(),
+            offset: None,
+        }
+    }
+
+    /// Encode this list as a sequence of u16 numbers.
+    ///
+    /// Adds the sequence to `enc_lists` and records the returned offset as
+    /// `self.offset`.
+    ///
+    /// Adds comment lines to `enc_lists_doc` keyed by enc_lists offsets.
+    fn encode(
+        &mut self,
+        isa: &TargetIsa,
+        cpu_mode: &CpuMode,
+        enc_lists: &mut UniqueSeqTable<u16>,
+        enc_lists_doc: &mut HashMap<usize, Vec<String>>,
+    ) {
+        assert!(!self.encodings.is_empty());
+
+        let mut encoder = Encoder::new(isa.encodings_predicates.len());
+
+        let mut index = 0;
+        while index < self.encodings.len() {
+            let encoding = &self.encodings[index];
+
+            // Try to see how many encodings are following and have the same ISA predicate and
+            // instruction predicate, so as to reduce the number of tests carried out by the
+            // encoding list interpreter..
+            //
+            // Encodings with similar tests are hereby called a group. The group includes the
+            // current encoding we're looking at.
+            let (isa_predicate, inst_predicate) =
+                (&encoding.isa_predicate, &encoding.inst_predicate);
+
+            let group_size = {
+                let mut group_size = 1;
+                while index + group_size < self.encodings.len() {
+                    let next_encoding = &self.encodings[index + group_size];
+                    if &next_encoding.inst_predicate != inst_predicate
+                        || &next_encoding.isa_predicate != isa_predicate
+                    {
+                        break;
+                    }
+                    group_size += 1;
+                }
+                group_size
+            };
+
+            let is_last_group = index + group_size == self.encodings.len();
+
+            // The number of entries to skip when a predicate isn't satisfied is the size of both
+            // predicates + the size of the group, minus one (for this predicate). Each recipe
+            // entry has a size of two u16 (recipe index + bits).
+            let mut skip = if is_last_group {
+                0
+            } else {
+                let isap_size = match isa_predicate {
+                    Some(_) => 1,
+                    None => 0,
+                };
+                let instp_size = match inst_predicate {
+                    Some(_) => 1,
+                    None => 0,
+                };
+                isap_size + instp_size + group_size * 2 - 1
+            };
+
+            if let Some(pred) = isa_predicate {
+                encoder.isa_predicate(*pred, skip);
+                if !is_last_group {
+                    skip -= 1;
+                }
+            }
+
+            if let Some(pred) = inst_predicate {
+                encoder.inst_predicate(*pred, skip);
+                // No need to update skip, it's dead after this point.
+            }
+
+            for i in 0..group_size {
+                let encoding = &self.encodings[index + i];
+                let is_last_encoding = index + i == self.encodings.len() - 1;
+                encoder.recipe(&isa.recipes, encoding, is_last_encoding);
+            }
+
+            index += group_size;
+        }
+
+        assert!(self.offset.is_none());
+        let offset = enc_lists.add(&encoder.words);
+        self.offset = Some(offset);
+
+        // Doc comments.
+        let recipe_typ_mode_name = format!(
+            "{}{} ({})",
+            self.inst.name,
+            if let Some(typ) = &self.typ {
+                format!(".{}", typ.to_string())
+            } else {
+                "".into()
+            },
+            cpu_mode.name
+        );
+
+        enc_lists_doc
+            .get_or_default(offset)
+            .push(format!("{:06x}: {}", offset, recipe_typ_mode_name));
+        for (pos, doc) in encoder.docs {
+            enc_lists_doc.get_or_default(offset + pos).push(doc);
+        }
+        enc_lists_doc
+            .get_or_default(offset + encoder.words.len())
+            .insert(0, format!("end of {}", recipe_typ_mode_name));
+    }
+}
+
+fn make_tables(cpu_mode: &CpuMode) -> Level1Table {
+    let mut table = Level1Table::new(cpu_mode);
+
+    for encoding in &cpu_mode.encodings {
+        table
+            .l2table_for(encoding.bound_type.clone())
+            .enclist_for(encoding.inst())
+            .encodings
+            .push(encoding.clone());
+    }
+
+    // Ensure there are level 1 table entries for all types with a custom legalize action.
+    for value_type in cpu_mode.get_legalized_types() {
+        table.l2table_for(Some(value_type.clone()));
+    }
+    // ... and also for monomorphic instructions.
+    table.l2table_for(None);
+
+    table
+}
+
+/// Compute encodings and doc comments for encoding lists in `level1`.
+fn encode_enclists(
+    isa: &TargetIsa,
+    cpu_mode: &CpuMode,
+    level1: &mut Level1Table,
+    enc_lists: &mut UniqueSeqTable<u16>,
+    enc_lists_doc: &mut HashMap<usize, Vec<String>>,
+) {
+    for level2 in level1.l2tables() {
+        for enclist in level2.enclists() {
+            enclist.encode(isa, cpu_mode, enc_lists, enc_lists_doc);
+        }
+    }
+}
+
+fn encode_level2_hashtables<'a>(
+    level1: &'a mut Level1Table,
+    level2_hashtables: &mut Vec<Option<Level2HashTableEntry>>,
+    level2_doc: &mut HashMap<usize, Vec<String>>,
+) {
+    for level2 in level1.l2tables() {
+        level2.layout_hashtable(level2_hashtables, level2_doc);
+    }
+}
+
+fn emit_encoding_tables(defs: &SharedDefinitions, isa: &TargetIsa, fmt: &mut Formatter) {
+    // Level 1 tables, one per CPU mode.
+    let mut level1_tables: HashMap<&'static str, Level1Table> = HashMap::new();
+
+    // Single table containing all the level2 hash tables.
+    let mut level2_hashtables = Vec::new();
+    let mut level2_doc: HashMap<usize, Vec<String>> = HashMap::new();
+
+    // Tables for encoding lists with comments.
+    let mut enc_lists = UniqueSeqTable::new();
+    let mut enc_lists_doc = HashMap::new();
+
+    for cpu_mode in &isa.cpu_modes {
+        level2_doc
+            .get_or_default(level2_hashtables.len())
+            .push(cpu_mode.name.into());
+
+        let mut level1 = make_tables(cpu_mode);
+
+        encode_enclists(
+            isa,
+            cpu_mode,
+            &mut level1,
+            &mut enc_lists,
+            &mut enc_lists_doc,
+        );
+        encode_level2_hashtables(&mut level1, &mut level2_hashtables, &mut level2_doc);
+
+        level1_tables.insert(cpu_mode.name, level1);
+    }
+
+    // Compute an appropriate Rust integer type to use for offsets into a table of the given length.
+    let offset_type = |length: usize| {
+        if length <= 0x10000 {
+            "u16"
+        } else {
+            assert!(u32::try_from(length).is_ok(), "table too big!");
+            "u32"
+        }
+    };
+
+    let level1_offset_type = offset_type(level2_hashtables.len());
+    let level2_offset_type = offset_type(enc_lists.len());
+
+    // Emit encoding lists.
+    fmt.doc_comment(
+        format!(r#"{} encoding lists.
+
+        This contains the entire encodings bytecode for every single instruction; the encodings
+        interpreter knows where to start from thanks to the initial lookup in the level 1 and level 2
+        table entries below."#, isa.name)
+    );
+    fmtln!(fmt, "pub static ENCLISTS: [u16; {}] = [", enc_lists.len());
+    fmt.indent(|fmt| {
+        let mut line = Vec::new();
+        for (index, entry) in enc_lists.iter().enumerate() {
+            if let Some(comments) = enc_lists_doc.get(&index) {
+                if !line.is_empty() {
+                    fmtln!(fmt, "{},", line.join(", "));
+                    line.clear();
+                }
+                for comment in comments {
+                    fmt.comment(comment);
+                }
+            }
+            line.push(format!("{:#06x}", entry));
+        }
+        if !line.is_empty() {
+            fmtln!(fmt, "{},", line.join(", "));
+        }
+    });
+    fmtln!(fmt, "];");
+    fmt.empty_line();
+
+    // Emit the full concatenation of level 2 hash tables.
+    fmt.doc_comment(format!(
+        r#"{} level 2 hash tables.
+
+        This hash table, keyed by instruction opcode, contains all the starting offsets for the
+        encodings interpreter, for all the CPU modes. It is jumped to after a lookup on the
+        instruction's controlling type in the level 1 hash table."#,
+        isa.name
+    ));
+    fmtln!(
+        fmt,
+        "pub static LEVEL2: [Level2Entry<{}>; {}] = [",
+        level2_offset_type,
+        level2_hashtables.len()
+    );
+    fmt.indent(|fmt| {
+        for (offset, entry) in level2_hashtables.iter().enumerate() {
+            if let Some(comments) = level2_doc.get(&offset) {
+                for comment in comments {
+                    fmt.comment(comment);
+                }
+            }
+            if let Some(entry) = entry {
+                fmtln!(
+                    fmt,
+                    "Level2Entry {{ opcode: Some(crate::ir::Opcode::{}), offset: {:#08x} }},",
+                    entry.inst_name,
+                    entry.offset
+                );
+            } else {
+                fmt.line("Level2Entry { opcode: None, offset: 0 },");
+            }
+        }
+    });
+    fmtln!(fmt, "];");
+    fmt.empty_line();
+
+    // Emit a level 1 hash table for each CPU mode.
+    for cpu_mode in &isa.cpu_modes {
+        let level1 = &level1_tables.get(cpu_mode.name).unwrap();
+        let hash_table = generate_table(
+            level1.table_vec.iter(),
+            level1.table_vec.len(),
+            |level2_table| {
+                if let Some(typ) = &level2_table.typ {
+                    typ.number().expect("type without a number") as usize
+                } else {
+                    0
+                }
+            },
+        );
+
+        fmt.doc_comment(format!(
+            r#"{} level 1 hash table for the CPU mode {}.
+
+            This hash table, keyed by instruction controlling type, contains all the level 2
+            hash-tables offsets for the given CPU mode, as well as a legalization identifier indicating
+            which legalization scheme to apply when the instruction doesn't have any valid encoding for
+            this CPU mode.
+        "#,
+            isa.name, cpu_mode.name
+        ));
+        fmtln!(
+            fmt,
+            "pub static LEVEL1_{}: [Level1Entry<{}>; {}] = [",
+            cpu_mode.name.to_uppercase(),
+            level1_offset_type,
+            hash_table.len()
+        );
+        fmt.indent(|fmt| {
+            for opt_level2 in hash_table {
+                let level2 = match opt_level2 {
+                    None => {
+                        // Empty hash table entry. Include the default legalization action.
+                        fmtln!(fmt, "Level1Entry {{ ty: ir::types::INVALID, log2len: !0, offset: 0, legalize: {} }},",
+                               isa.translate_group_index(level1.legalize_code));
+                        continue;
+                    }
+                    Some(level2) => level2,
+                };
+
+                let legalize_comment = defs.transform_groups.get(level2.legalize_code).name;
+                let legalize_code = isa.translate_group_index(level2.legalize_code);
+
+                let typ_name = if let Some(typ) = &level2.typ {
+                    typ.rust_name()
+                } else {
+                    "ir::types::INVALID".into()
+                };
+
+                if level2.is_empty() {
+                    // Empty level 2 table: Only a specialized legalization action, no actual
+                    // table.
+                    // Set an offset that is out of bounds, but make sure it doesn't overflow its
+                    // type when adding `1<<log2len`.
+                    fmtln!(fmt, "Level1Entry {{ ty: {}, log2len: 0, offset: !0 - 1, legalize: {} }}, // {}",
+                           typ_name, legalize_code, legalize_comment);
+                    continue;
+                }
+
+                // Proper level 2 hash table.
+                let l2l = (level2.hash_table_len.unwrap() as f64).log2() as i32;
+                assert!(l2l > 0, "Level2 hash table was too small.");
+                fmtln!(fmt, "Level1Entry {{ ty: {}, log2len: {}, offset: {:#08x}, legalize: {} }}, // {}",
+                       typ_name, l2l, level2.hash_table_offset.unwrap(), legalize_code, legalize_comment);
+            }
+        });
+        fmtln!(fmt, "];");
+        fmt.empty_line();
+    }
+}
+
+fn gen_isa(defs: &SharedDefinitions, isa: &TargetIsa, fmt: &mut Formatter) {
+    // Make the `RECIPE_PREDICATES` table.
+    emit_recipe_predicates(isa, fmt);
+
+    // Make the `INST_PREDICATES` table.
+    emit_inst_predicates(isa, fmt);
+
+    emit_encoding_tables(defs, isa, fmt);
+
+    emit_recipe_names(isa, fmt);
+    emit_recipe_constraints(isa, fmt);
+    emit_recipe_sizing(isa, fmt);
+
+    // Finally, tie it all together in an `EncInfo`.
+    fmt.line("pub static INFO: isa::EncInfo = isa::EncInfo {");
+    fmt.indent(|fmt| {
+        fmt.line("constraints: &RECIPE_CONSTRAINTS,");
+        fmt.line("sizing: &RECIPE_SIZING,");
+        fmt.line("names: &RECIPE_NAMES,");
+    });
+    fmt.line("};");
+}
+
+pub(crate) fn generate(
+    defs: &SharedDefinitions,
+    isa: &TargetIsa,
+    filename: &str,
+    out_dir: &str,
+) -> Result<(), error::Error> {
+    let mut fmt = Formatter::new();
+    gen_isa(defs, isa, &mut fmt);
+    fmt.update_file(filename, out_dir)?;
+    Ok(())
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/gen_inst.rs b/third_party/rust/cranelift-codegen-meta/src/gen_inst.rs
new file mode 100644
index 0000000000..a2760b34d7
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/gen_inst.rs
@@ -0,0 +1,1184 @@
+//! Generate instruction data (including opcodes, formats, builders, etc.).
+use std::fmt;
+
+use cranelift_codegen_shared::constant_hash;
+use cranelift_entity::EntityRef;
+
+use crate::cdsl::camel_case;
+use crate::cdsl::formats::InstructionFormat;
+use crate::cdsl::instructions::{AllInstructions, Instruction};
+use crate::cdsl::operands::Operand;
+use crate::cdsl::typevar::{TypeSet, TypeVar};
+
+use crate::error;
+use crate::srcgen::{Formatter, Match};
+use crate::unique_table::{UniqueSeqTable, UniqueTable};
+
+// TypeSet indexes are encoded in 8 bits, with `0xff` reserved.
+const TYPESET_LIMIT: usize = 0xff;
+
+/// Generate an instruction format enumeration.
+fn gen_formats(formats: &[&InstructionFormat], fmt: &mut Formatter) {
+    fmt.doc_comment(
+        r#"
+        An instruction format
+
+        Every opcode has a corresponding instruction format
+        which is represented by both the `InstructionFormat`
+        and the `InstructionData` enums.
+    "#,
+    );
+    fmt.line("#[derive(Copy, Clone, PartialEq, Eq, Debug)]");
+    fmt.line("pub enum InstructionFormat {");
+    fmt.indent(|fmt| {
+        for format in formats {
+            fmt.doc_comment(format.to_string());
+            fmtln!(fmt, "{},", format.name);
+        }
+    });
+    fmt.line("}");
+    fmt.empty_line();
+
+    // Emit a From<InstructionData> which also serves to verify that
+    // InstructionFormat and InstructionData are in sync.
+    fmt.line("impl<'a> From<&'a InstructionData> for InstructionFormat {");
+    fmt.indent(|fmt| {
+        fmt.line("fn from(inst: &'a InstructionData) -> Self {");
+        fmt.indent(|fmt| {
+            let mut m = Match::new("*inst");
+            for format in formats {
+                m.arm(
+                    format!("InstructionData::{}", format.name),
+                    vec![".."],
+                    format!("Self::{}", format.name),
+                );
+            }
+            fmt.add_match(m);
+        });
+        fmt.line("}");
+    });
+    fmt.line("}");
+    fmt.empty_line();
+}
+
+/// Generate the InstructionData enum.
+///
+/// Every variant must contain an `opcode` field. The size of `InstructionData` should be kept at
+/// 16 bytes on 64-bit architectures. If more space is needed to represent an instruction, use a
+/// `ValueList` to store the additional information out of line.
+fn gen_instruction_data(formats: &[&InstructionFormat], fmt: &mut Formatter) {
+    fmt.line("#[derive(Clone, Debug)]");
+    fmt.line("#[allow(missing_docs)]");
+    fmt.line("pub enum InstructionData {");
+    fmt.indent(|fmt| {
+        for format in formats {
+            fmtln!(fmt, "{} {{", format.name);
+            fmt.indent(|fmt| {
+                fmt.line("opcode: Opcode,");
+                if format.typevar_operand.is_some() {
+                    if format.has_value_list {
+                        fmt.line("args: ValueList,");
+                    } else if format.num_value_operands == 1 {
+                        fmt.line("arg: Value,");
+                    } else {
+                        fmtln!(fmt, "args: [Value; {}],", format.num_value_operands);
+                    }
+                }
+                for field in &format.imm_fields {
+                    fmtln!(fmt, "{}: {},", field.member, field.kind.rust_type);
+                }
+            });
+            fmtln!(fmt, "},");
+        }
+    });
+    fmt.line("}");
+}
+
+fn gen_arguments_method(formats: &[&InstructionFormat], fmt: &mut Formatter, is_mut: bool) {
+    let (method, mut_, rslice, as_slice) = if is_mut {
+        (
+            "arguments_mut",
+            "mut ",
+            "core::slice::from_mut",
+            "as_mut_slice",
+        )
+    } else {
+        ("arguments", "", "core::slice::from_ref", "as_slice")
+    };
+
+    fmtln!(
+        fmt,
+        "pub fn {}<'a>(&'a {}self, pool: &'a {}ir::ValueListPool) -> &{}[Value] {{",
+        method,
+        mut_,
+        mut_,
+        mut_
+    );
+    fmt.indent(|fmt| {
+        let mut m = Match::new("*self");
+        for format in formats {
+            let name = format!("Self::{}", format.name);
+
+            // Formats with a value list put all of their arguments in the list. We don't split
+            // them up, just return it all as variable arguments. (I expect the distinction to go
+            // away).
+            if format.has_value_list {
+                m.arm(
+                    name,
+                    vec![format!("ref {}args", mut_), "..".to_string()],
+                    format!("args.{}(pool)", as_slice),
+                );
+                continue;
+            }
+
+            // Fixed args.
+            let mut fields = Vec::new();
+            let arg = if format.num_value_operands == 0 {
+                format!("&{}[]", mut_)
+            } else if format.num_value_operands == 1 {
+                fields.push(format!("ref {}arg", mut_));
+                format!("{}(arg)", rslice)
+            } else {
+                let arg = format!("args_arity{}", format.num_value_operands);
+                fields.push(format!("args: ref {}{}", mut_, arg));
+                arg
+            };
+            fields.push("..".into());
+
+            m.arm(name, fields, arg);
+        }
+        fmt.add_match(m);
+    });
+    fmtln!(fmt, "}");
+}
+
+/// Generate the boring parts of the InstructionData implementation.
+///
+/// These methods in `impl InstructionData` can be generated automatically from the instruction
+/// formats:
+///
+/// - `pub fn opcode(&self) -> Opcode`
+/// - `pub fn arguments(&self, &pool) -> &[Value]`
+/// - `pub fn arguments_mut(&mut self, &pool) -> &mut [Value]`
+/// - `pub fn take_value_list(&mut self) -> Option<ir::ValueList>`
+/// - `pub fn put_value_list(&mut self, args: ir::ValueList>`
+/// - `pub fn eq(&self, &other: Self, &pool) -> bool`
+/// - `pub fn hash<H: Hasher>(&self, state: &mut H, &pool)`
+fn gen_instruction_data_impl(formats: &[&InstructionFormat], fmt: &mut Formatter) {
+    fmt.line("impl InstructionData {");
+    fmt.indent(|fmt| {
+        fmt.doc_comment("Get the opcode of this instruction.");
+        fmt.line("pub fn opcode(&self) -> Opcode {");
+        fmt.indent(|fmt| {
+            let mut m = Match::new("*self");
+            for format in formats {
+                m.arm(format!("Self::{}", format.name), vec!["opcode", ".."],
+                      "opcode".to_string());
+            }
+            fmt.add_match(m);
+        });
+        fmt.line("}");
+        fmt.empty_line();
+
+        fmt.doc_comment("Get the controlling type variable operand.");
+        fmt.line("pub fn typevar_operand(&self, pool: &ir::ValueListPool) -> Option<Value> {");
+        fmt.indent(|fmt| {
+            let mut m = Match::new("*self");
+            for format in formats {
+                let name = format!("Self::{}", format.name);
+                if format.typevar_operand.is_none() {
+                    m.arm(name, vec![".."], "None".to_string());
+                } else if format.has_value_list {
+                    // We keep all arguments in a value list.
+                    m.arm(name, vec!["ref args", ".."], format!("args.get({}, pool)", format.typevar_operand.unwrap()));
+                } else if format.num_value_operands == 1 {
+                    m.arm(name, vec!["arg", ".."], "Some(arg)".to_string());
+                } else {
+                    // We have multiple value operands and an array `args`.
+                    // Which `args` index to use?
+                    let args = format!("args_arity{}", format.num_value_operands);
+                    m.arm(name, vec![format!("args: ref {}", args), "..".to_string()],
+                        format!("Some({}[{}])", args, format.typevar_operand.unwrap()));
+                }
+            }
+            fmt.add_match(m);
+        });
+        fmt.line("}");
+        fmt.empty_line();
+
+        fmt.doc_comment("Get the value arguments to this instruction.");
+        gen_arguments_method(formats, fmt, false);
+        fmt.empty_line();
+
+        fmt.doc_comment(r#"Get mutable references to the value arguments to this
+                        instruction."#);
+        gen_arguments_method(formats, fmt, true);
+        fmt.empty_line();
+
+        fmt.doc_comment(r#"
+            Take out the value list with all the value arguments and return
+            it.
+
+            This leaves the value list in the instruction empty. Use
+            `put_value_list` to put the value list back.
+        "#);
+        fmt.line("pub fn take_value_list(&mut self) -> Option<ir::ValueList> {");
+        fmt.indent(|fmt| {
+            let mut m = Match::new("*self");
+
+            for format in formats {
+                if format.has_value_list {
+                    m.arm(format!("Self::{}", format.name),
+                    vec!["ref mut args", ".."],
+                    "Some(args.take())".to_string());
+                }
+            }
+
+            m.arm_no_fields("_", "None");
+
+            fmt.add_match(m);
+        });
+        fmt.line("}");
+        fmt.empty_line();
+
+        fmt.doc_comment(r#"
+            Put back a value list.
+
+            After removing a value list with `take_value_list()`, use this
+            method to put it back. It is required that this instruction has
+            a format that accepts a value list, and that the existing value
+            list is empty. This avoids leaking list pool memory.
+        "#);
+        fmt.line("pub fn put_value_list(&mut self, vlist: ir::ValueList) {");
+        fmt.indent(|fmt| {
+            fmt.line("let args = match *self {");
+            fmt.indent(|fmt| {
+                for format in formats {
+                    if format.has_value_list {
+                        fmtln!(fmt, "Self::{} {{ ref mut args, .. }} => args,", format.name);
+                    }
+                }
+                fmt.line("_ => panic!(\"No value list: {:?}\", self),");
+            });
+            fmt.line("};");
+            fmt.line("debug_assert!(args.is_empty(), \"Value list already in use\");");
+            fmt.line("*args = vlist;");
+        });
+        fmt.line("}");
+        fmt.empty_line();
+
+        fmt.doc_comment(r#"
+            Compare two `InstructionData` for equality.
+
+            This operation requires a reference to a `ValueListPool` to
+            determine if the contents of any `ValueLists` are equal.
+        "#);
+        fmt.line("pub fn eq(&self, other: &Self, pool: &ir::ValueListPool) -> bool {");
+        fmt.indent(|fmt| {
+            fmt.line("if ::core::mem::discriminant(self) != ::core::mem::discriminant(other) {");
+            fmt.indent(|fmt| {
+                fmt.line("return false;");
+            });
+            fmt.line("}");
+
+            fmt.line("match (self, other) {");
+            fmt.indent(|fmt| {
+                for format in formats {
+                    let name = format!("&Self::{}", format.name);
+                    let mut members = vec!["opcode"];
+
+                    let args_eq = if format.typevar_operand.is_none() {
+                        None
+                    } else if format.has_value_list {
+                        members.push("args");
+                        Some("args1.as_slice(pool) == args2.as_slice(pool)")
+                    } else if format.num_value_operands == 1 {
+                        members.push("arg");
+                        Some("arg1 == arg2")
+                    } else {
+                        members.push("args");
+                        Some("args1 == args2")
+                    };
+
+                    for field in &format.imm_fields {
+                        members.push(field.member);
+                    }
+
+                    let pat1 = members.iter().map(|x| format!("{}: ref {}1", x, x)).collect::<Vec<_>>().join(", ");
+                    let pat2 = members.iter().map(|x| format!("{}: ref {}2", x, x)).collect::<Vec<_>>().join(", ");
+                    fmtln!(fmt, "({} {{ {} }}, {} {{ {} }}) => {{", name, pat1, name, pat2);
+                    fmt.indent(|fmt| {
+                        fmt.line("opcode1 == opcode2");
+                        for field in &format.imm_fields {
+                            fmtln!(fmt, "&& {}1 == {}2", field.member, field.member);
+                        }
+                        if let Some(args_eq) = args_eq {
+                            fmtln!(fmt, "&& {}", args_eq);
+                        }
+                    });
+                    fmtln!(fmt, "}");
+                }
+                fmt.line("_ => unreachable!()");
+            });
+            fmt.line("}");
+        });
+        fmt.line("}");
+        fmt.empty_line();
+
+        fmt.doc_comment(r#"
+            Hash an `InstructionData`.
+
+            This operation requires a reference to a `ValueListPool` to
+            hash the contents of any `ValueLists`.
+        "#);
+        fmt.line("pub fn hash<H: ::core::hash::Hasher>(&self, state: &mut H, pool: &ir::ValueListPool) {");
+        fmt.indent(|fmt| {
+            fmt.line("match *self {");
+            fmt.indent(|fmt| {
+                for format in formats {
+                    let name = format!("Self::{}", format.name);
+                    let mut members = vec!["opcode"];
+
+                    let args = if format.typevar_operand.is_none() {
+                        "&()"
+                    } else if format.has_value_list {
+                        members.push("ref args");
+                        "args.as_slice(pool)"
+                    } else if format.num_value_operands == 1 {
+                        members.push("ref arg");
+                        "arg"
+                    } else {
+                        members.push("ref args");
+                        "args"
+                    };
+
+                    for field in &format.imm_fields {
+                        members.push(field.member);
+                    }
+                    let members = members.join(", ");
+
+                    fmtln!(fmt, "{}{{{}}} => {{", name, members ); // beware the moustaches
+                    fmt.indent(|fmt| {
+                        fmt.line("::core::hash::Hash::hash( &::core::mem::discriminant(self), state);");
+                        fmt.line("::core::hash::Hash::hash(&opcode, state);");
+                        for field in &format.imm_fields {
+                            fmtln!(fmt, "::core::hash::Hash::hash(&{}, state);", field.member);
+                        }
+                        fmtln!(fmt, "::core::hash::Hash::hash({}, state);", args);
+                    });
+                    fmtln!(fmt, "}");
+                }
+            });
+            fmt.line("}");
+        });
+        fmt.line("}");
+    });
+    fmt.line("}");
+}
+
+fn gen_bool_accessor<T: Fn(&Instruction) -> bool>(
+    all_inst: &AllInstructions,
+    get_attr: T,
+    name: &'static str,
+    doc: &'static str,
+    fmt: &mut Formatter,
+) {
+    fmt.doc_comment(doc);
+    fmtln!(fmt, "pub fn {}(self) -> bool {{", name);
+    fmt.indent(|fmt| {
+        let mut m = Match::new("self");
+        for inst in all_inst.values() {
+            if get_attr(inst) {
+                m.arm_no_fields(format!("Self::{}", inst.camel_name), "true");
+            }
+        }
+        m.arm_no_fields("_", "false");
+        fmt.add_match(m);
+    });
+    fmtln!(fmt, "}");
+    fmt.empty_line();
+}
+
+fn gen_opcodes(all_inst: &AllInstructions, fmt: &mut Formatter) {
+    fmt.doc_comment(
+        r#"
+        An instruction opcode.
+
+        All instructions from all supported ISAs are present.
+    "#,
+    );
+    fmt.line("#[repr(u16)]");
+    fmt.line("#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)]");
+    fmt.line(
+        r#"#[cfg_attr(feature = "enable-peepmatic", derive(serde::Serialize, serde::Deserialize))]"#
+    );
+
+    // We explicitly set the discriminant of the first variant to 1, which allows us to take
+    // advantage of the NonZero optimization, meaning that wrapping enums can use the 0
+    // discriminant instead of increasing the size of the whole type, and so the size of
+    // Option<Opcode> is the same as Opcode's.
+    fmt.line("pub enum Opcode {");
+    fmt.indent(|fmt| {
+        let mut is_first_opcode = true;
+        for inst in all_inst.values() {
+            fmt.doc_comment(format!("`{}`. ({})", inst, inst.format.name));
+
+            // Document polymorphism.
+            if let Some(poly) = &inst.polymorphic_info {
+                if poly.use_typevar_operand {
+                    let op_num = inst.value_opnums[inst.format.typevar_operand.unwrap()];
+                    fmt.doc_comment(format!(
+                        "Type inferred from `{}`.",
+                        inst.operands_in[op_num].name
+                    ));
+                }
+            }
+
+            // Enum variant itself.
+            if is_first_opcode {
+                assert!(inst.opcode_number.index() == 0);
+                // TODO the python crate requires opcode numbers to start from one.
+                fmtln!(fmt, "{} = 1,", inst.camel_name);
+                is_first_opcode = false;
+            } else {
+                fmtln!(fmt, "{},", inst.camel_name)
+            }
+        }
+    });
+    fmt.line("}");
+    fmt.empty_line();
+
+    fmt.line("impl Opcode {");
+    fmt.indent(|fmt| {
+        gen_bool_accessor(
+            all_inst,
+            |inst| inst.is_terminator,
+            "is_terminator",
+            "True for instructions that terminate the block",
+            fmt,
+        );
+        gen_bool_accessor(
+            all_inst,
+            |inst| inst.is_branch,
+            "is_branch",
+            "True for all branch or jump instructions.",
+            fmt,
+        );
+        gen_bool_accessor(
+            all_inst,
+            |inst| inst.is_indirect_branch,
+            "is_indirect_branch",
+            "True for all indirect branch or jump instructions.",
+            fmt,
+        );
+        gen_bool_accessor(
+            all_inst,
+            |inst| inst.is_call,
+            "is_call",
+            "Is this a call instruction?",
+            fmt,
+        );
+        gen_bool_accessor(
+            all_inst,
+            |inst| inst.is_return,
+            "is_return",
+            "Is this a return instruction?",
+            fmt,
+        );
+        gen_bool_accessor(
+            all_inst,
+            |inst| inst.is_ghost,
+            "is_ghost",
+            "Is this a ghost instruction?",
+            fmt,
+        );
+        gen_bool_accessor(
+            all_inst,
+            |inst| inst.can_load,
+            "can_load",
+            "Can this instruction read from memory?",
+            fmt,
+        );
+        gen_bool_accessor(
+            all_inst,
+            |inst| inst.can_store,
+            "can_store",
+            "Can this instruction write to memory?",
+            fmt,
+        );
+        gen_bool_accessor(
+            all_inst,
+            |inst| inst.can_trap,
+            "can_trap",
+            "Can this instruction cause a trap?",
+            fmt,
+        );
+        gen_bool_accessor(
+            all_inst,
+            |inst| inst.other_side_effects,
+            "other_side_effects",
+            "Does this instruction have other side effects besides can_* flags?",
+            fmt,
+        );
+        gen_bool_accessor(
+            all_inst,
+            |inst| inst.writes_cpu_flags,
+            "writes_cpu_flags",
+            "Does this instruction write to CPU flags?",
+            fmt,
+        );
+        gen_bool_accessor(
+            all_inst,
+            |inst| inst.clobbers_all_regs,
+            "clobbers_all_regs",
+            "Should this opcode be considered to clobber all the registers, during regalloc?",
+            fmt,
+        );
+    });
+    fmt.line("}");
+    fmt.empty_line();
+
+    // Generate a private opcode_format table.
+    fmtln!(
+        fmt,
+        "const OPCODE_FORMAT: [InstructionFormat; {}] = [",
+        all_inst.len()
+    );
+    fmt.indent(|fmt| {
+        for inst in all_inst.values() {
+            fmtln!(
+                fmt,
+                "InstructionFormat::{}, // {}",
+                inst.format.name,
+                inst.name
+            );
+        }
+    });
+    fmtln!(fmt, "];");
+    fmt.empty_line();
+
+    // Generate a private opcode_name function.
+    fmt.line("fn opcode_name(opc: Opcode) -> &\'static str {");
+    fmt.indent(|fmt| {
+        let mut m = Match::new("opc");
+        for inst in all_inst.values() {
+            m.arm_no_fields(
+                format!("Opcode::{}", inst.camel_name),
+                format!("\"{}\"", inst.name),
+            );
+        }
+        fmt.add_match(m);
+    });
+    fmt.line("}");
+    fmt.empty_line();
+
+    // Generate an opcode hash table for looking up opcodes by name.
+    let hash_table = constant_hash::generate_table(all_inst.values(), all_inst.len(), |inst| {
+        constant_hash::simple_hash(&inst.name)
+    });
+    fmtln!(
+        fmt,
+        "const OPCODE_HASH_TABLE: [Option<Opcode>; {}] = [",
+        hash_table.len()
+    );
+    fmt.indent(|fmt| {
+        for i in hash_table {
+            match i {
+                Some(i) => fmtln!(fmt, "Some(Opcode::{}),", i.camel_name),
+                None => fmtln!(fmt, "None,"),
+            }
+        }
+    });
+    fmtln!(fmt, "];");
+    fmt.empty_line();
+}
+
+fn gen_try_from(all_inst: &AllInstructions, fmt: &mut Formatter) {
+    fmt.line("impl core::convert::TryFrom<u16> for Opcode {");
+    fmt.indent(|fmt| {
+        fmt.line("type Error = ();");
+        fmt.line("#[inline]");
+        fmt.line("fn try_from(x: u16) -> Result<Self, ()> {");
+        fmt.indent(|fmt| {
+            fmtln!(fmt, "if 0 < x && x <= {} {{", all_inst.len());
+            fmt.indent(|fmt| fmt.line("Ok(unsafe { core::mem::transmute(x) })"));
+            fmt.line("} else {");
+            fmt.indent(|fmt| fmt.line("Err(())"));
+            fmt.line("}");
+        });
+        fmt.line("}");
+    });
+    fmt.line("}");
+}
+
+/// Get the value type constraint for an SSA value operand, where
+/// `ctrl_typevar` is the controlling type variable.
+///
+/// Each operand constraint is represented as a string, one of:
+/// - `Concrete(vt)`, where `vt` is a value type name.
+/// - `Free(idx)` where `idx` is an index into `type_sets`.
+/// - `Same`, `Lane`, `AsBool` for controlling typevar-derived constraints.
+fn get_constraint<'entries, 'table>(
+    operand: &'entries Operand,
+    ctrl_typevar: Option<&TypeVar>,
+    type_sets: &'table mut UniqueTable<'entries, TypeSet>,
+) -> String {
+    assert!(operand.is_value());
+    let type_var = operand.type_var().unwrap();
+
+    if let Some(typ) = type_var.singleton_type() {
+        return format!("Concrete({})", typ.rust_name());
+    }
+
+    if let Some(free_typevar) = type_var.free_typevar() {
+        if ctrl_typevar.is_some() && free_typevar != *ctrl_typevar.unwrap() {
+            assert!(type_var.base.is_none());
+            return format!("Free({})", type_sets.add(&type_var.get_raw_typeset()));
+        }
+    }
+
+    if let Some(base) = &type_var.base {
+        assert!(base.type_var == *ctrl_typevar.unwrap());
+        return camel_case(base.derived_func.name());
+    }
+
+    assert!(type_var == ctrl_typevar.unwrap());
+    "Same".into()
+}
+
+fn gen_bitset<'a, T: IntoIterator<Item = &'a u16>>(
+    iterable: T,
+    name: &'static str,
+    field_size: u8,
+    fmt: &mut Formatter,
+) {
+    let bits = iterable.into_iter().fold(0, |acc, x| {
+        assert!(x.is_power_of_two());
+        assert!(u32::from(*x) < (1 << u32::from(field_size)));
+        acc | x
+    });
+    fmtln!(fmt, "{}: BitSet::<u{}>({}),", name, field_size, bits);
+}
+
+fn iterable_to_string<I: fmt::Display, T: IntoIterator<Item = I>>(iterable: T) -> String {
+    let elems = iterable
+        .into_iter()
+        .map(|x| x.to_string())
+        .collect::<Vec<_>>()
+        .join(", ");
+    format!("{{{}}}", elems)
+}
+
+fn typeset_to_string(ts: &TypeSet) -> String {
+    let mut result = format!("TypeSet(lanes={}", iterable_to_string(&ts.lanes));
+    if !ts.ints.is_empty() {
+        result += &format!(", ints={}", iterable_to_string(&ts.ints));
+    }
+    if !ts.floats.is_empty() {
+        result += &format!(", floats={}", iterable_to_string(&ts.floats));
+    }
+    if !ts.bools.is_empty() {
+        result += &format!(", bools={}", iterable_to_string(&ts.bools));
+    }
+    if !ts.specials.is_empty() {
+        result += &format!(", specials=[{}]", iterable_to_string(&ts.specials));
+    }
+    if !ts.refs.is_empty() {
+        result += &format!(", refs={}", iterable_to_string(&ts.refs));
+    }
+    result += ")";
+    result
+}
+
+/// Generate the table of ValueTypeSets described by type_sets.
+pub(crate) fn gen_typesets_table(type_sets: &UniqueTable<TypeSet>, fmt: &mut Formatter) {
+    if type_sets.len() == 0 {
+        return;
+    }
+
+    fmt.comment("Table of value type sets.");
+    assert!(type_sets.len() <= TYPESET_LIMIT, "Too many type sets!");
+    fmtln!(
+        fmt,
+        "const TYPE_SETS: [ir::instructions::ValueTypeSet; {}] = [",
+        type_sets.len()
+    );
+    fmt.indent(|fmt| {
+        for ts in type_sets.iter() {
+            fmt.line("ir::instructions::ValueTypeSet {");
+            fmt.indent(|fmt| {
+                fmt.comment(typeset_to_string(ts));
+                gen_bitset(&ts.lanes, "lanes", 16, fmt);
+                gen_bitset(&ts.ints, "ints", 8, fmt);
+                gen_bitset(&ts.floats, "floats", 8, fmt);
+                gen_bitset(&ts.bools, "bools", 8, fmt);
+                gen_bitset(&ts.refs, "refs", 8, fmt);
+            });
+            fmt.line("},");
+        }
+    });
+    fmtln!(fmt, "];");
+}
+
+/// Generate value type constraints for all instructions.
+/// - Emit a compact constant table of ValueTypeSet objects.
+/// - Emit a compact constant table of OperandConstraint objects.
+/// - Emit an opcode-indexed table of instruction constraints.
+fn gen_type_constraints(all_inst: &AllInstructions, fmt: &mut Formatter) {
+    // Table of TypeSet instances.
+    let mut type_sets = UniqueTable::new();
+
+    // Table of operand constraint sequences (as tuples). Each operand
+    // constraint is represented as a string, one of:
+    // - `Concrete(vt)`, where `vt` is a value type name.
+    // - `Free(idx)` where `idx` is an index into `type_sets`.
+    // - `Same`, `Lane`, `AsBool` for controlling typevar-derived constraints.
+    let mut operand_seqs = UniqueSeqTable::new();
+
+    // Preload table with constraints for typical binops.
+    #[allow(clippy::useless_vec)]
+    operand_seqs.add(&vec!["Same".to_string(); 3]);
+
+    fmt.comment("Table of opcode constraints.");
+    fmtln!(
+        fmt,
+        "const OPCODE_CONSTRAINTS: [OpcodeConstraints; {}] = [",
+        all_inst.len()
+    );
+    fmt.indent(|fmt| {
+        for inst in all_inst.values() {
+            let (ctrl_typevar, ctrl_typeset) = if let Some(poly) = &inst.polymorphic_info {
+                let index = type_sets.add(&*poly.ctrl_typevar.get_raw_typeset());
+                (Some(&poly.ctrl_typevar), index)
+            } else {
+                (None, TYPESET_LIMIT)
+            };
+
+            // Collect constraints for the value results, not including `variable_args` results
+            // which are always special cased.
+            let mut constraints = Vec::new();
+            for &index in &inst.value_results {
+                constraints.push(get_constraint(&inst.operands_out[index], ctrl_typevar, &mut type_sets));
+            }
+            for &index in &inst.value_opnums {
+                constraints.push(get_constraint(&inst.operands_in[index], ctrl_typevar, &mut type_sets));
+            }
+
+            let constraint_offset = operand_seqs.add(&constraints);
+
+            let fixed_results = inst.value_results.len();
+            let fixed_values = inst.value_opnums.len();
+
+            // Can the controlling type variable be inferred from the designated operand?
+            let use_typevar_operand = if let Some(poly) = &inst.polymorphic_info {
+                poly.use_typevar_operand
+            } else {
+                false
+            };
+
+            // Can the controlling type variable be inferred from the result?
+            let use_result = fixed_results > 0 && inst.operands_out[inst.value_results[0]].type_var() == ctrl_typevar;
+
+            // Are we required to use the designated operand instead of the result?
+            let requires_typevar_operand = use_typevar_operand && !use_result;
+
+            fmt.comment(
+                format!("{}: fixed_results={}, use_typevar_operand={}, requires_typevar_operand={}, fixed_values={}",
+                inst.camel_name,
+                fixed_results,
+                use_typevar_operand,
+                requires_typevar_operand,
+                fixed_values)
+            );
+            fmt.comment(format!("Constraints=[{}]", constraints
+                .iter()
+                .map(|x| format!("'{}'", x))
+                .collect::<Vec<_>>()
+                .join(", ")));
+            if let Some(poly) = &inst.polymorphic_info {
+                fmt.comment(format!("Polymorphic over {}", typeset_to_string(&poly.ctrl_typevar.get_raw_typeset())));
+            }
+
+            // Compute the bit field encoding, c.f. instructions.rs.
+            assert!(fixed_results < 8 && fixed_values < 8, "Bit field encoding too tight");
+            let mut flags = fixed_results; // 3 bits
+            if use_typevar_operand {
+                flags |= 1<<3; // 4th bit
+            }
+            if requires_typevar_operand {
+                flags |= 1<<4; // 5th bit
+            }
+            flags |= fixed_values << 5; // 6th bit and more
+
+            fmt.line("OpcodeConstraints {");
+            fmt.indent(|fmt| {
+                fmtln!(fmt, "flags: {:#04x},", flags);
+                fmtln!(fmt, "typeset_offset: {},", ctrl_typeset);
+                fmtln!(fmt, "constraint_offset: {},", constraint_offset);
+            });
+            fmt.line("},");
+        }
+    });
+    fmtln!(fmt, "];");
+    fmt.empty_line();
+
+    gen_typesets_table(&type_sets, fmt);
+    fmt.empty_line();
+
+    fmt.comment("Table of operand constraint sequences.");
+    fmtln!(
+        fmt,
+        "const OPERAND_CONSTRAINTS: [OperandConstraint; {}] = [",
+        operand_seqs.len()
+    );
+    fmt.indent(|fmt| {
+        for constraint in operand_seqs.iter() {
+            fmtln!(fmt, "OperandConstraint::{},", constraint);
+        }
+    });
+    fmtln!(fmt, "];");
+}
+
+/// Emit member initializers for an instruction format.
+fn gen_member_inits(format: &InstructionFormat, fmt: &mut Formatter) {
+    // Immediate operands.
+    // We have local variables with the same names as the members.
+    for f in &format.imm_fields {
+        fmtln!(fmt, "{},", f.member);
+    }
+
+    // Value operands.
+    if format.has_value_list {
+        fmt.line("args,");
+    } else if format.num_value_operands == 1 {
+        fmt.line("arg: arg0,");
+    } else if format.num_value_operands > 1 {
+        let mut args = Vec::new();
+        for i in 0..format.num_value_operands {
+            args.push(format!("arg{}", i));
+        }
+        fmtln!(fmt, "args: [{}],", args.join(", "));
+    }
+}
+
+/// Emit a method for creating and inserting an instruction format.
+///
+/// All instruction formats take an `opcode` argument and a `ctrl_typevar` argument for deducing
+/// the result types.
+fn gen_format_constructor(format: &InstructionFormat, fmt: &mut Formatter) {
+    // Construct method arguments.
+    let mut args = vec![
+        "self".to_string(),
+        "opcode: Opcode".into(),
+        "ctrl_typevar: Type".into(),
+    ];
+
+    // Normal operand arguments. Start with the immediate operands.
+    for f in &format.imm_fields {
+        args.push(format!("{}: {}", f.member, f.kind.rust_type));
+    }
+
+    // Then the value operands.
+    if format.has_value_list {
+        // Take all value arguments as a finished value list. The value lists
+        // are created by the individual instruction constructors.
+        args.push("args: ir::ValueList".into());
+    } else {
+        // Take a fixed number of value operands.
+        for i in 0..format.num_value_operands {
+            args.push(format!("arg{}: Value", i));
+        }
+    }
+
+    let proto = format!(
+        "{}({}) -> (Inst, &'f mut ir::DataFlowGraph)",
+        format.name,
+        args.join(", ")
+    );
+
+    let imms_need_sign_extension = format
+        .imm_fields
+        .iter()
+        .any(|f| f.kind.rust_type == "ir::immediates::Imm64");
+
+    fmt.doc_comment(format.to_string());
+    fmt.line("#[allow(non_snake_case)]");
+    fmtln!(fmt, "fn {} {{", proto);
+    fmt.indent(|fmt| {
+        // Generate the instruction data.
+        fmtln!(
+            fmt,
+            "let{} data = ir::InstructionData::{} {{",
+            if imms_need_sign_extension { " mut" } else { "" },
+            format.name
+        );
+        fmt.indent(|fmt| {
+            fmt.line("opcode,");
+            gen_member_inits(format, fmt);
+        });
+        fmtln!(fmt, "};");
+
+        if imms_need_sign_extension {
+            fmtln!(fmt, "data.sign_extend_immediates(ctrl_typevar);");
+        }
+
+        fmt.line("self.build(data, ctrl_typevar)");
+    });
+    fmtln!(fmt, "}");
+}
+
+/// Emit a method for generating the instruction `inst`.
+///
+/// The method will create and insert an instruction, then return the result values, or the
+/// instruction reference itself for instructions that don't have results.
+fn gen_inst_builder(inst: &Instruction, format: &InstructionFormat, fmt: &mut Formatter) {
+    // Construct method arguments.
+    let mut args = vec![if format.has_value_list {
+        "mut self"
+    } else {
+        "self"
+    }
+    .to_string()];
+
+    let mut args_doc = Vec::new();
+    let mut rets_doc = Vec::new();
+
+    // The controlling type variable will be inferred from the input values if
+    // possible. Otherwise, it is the first method argument.
+    if let Some(poly) = &inst.polymorphic_info {
+        if !poly.use_typevar_operand {
+            args.push(format!("{}: crate::ir::Type", poly.ctrl_typevar.name));
+            args_doc.push(format!(
+                "- {} (controlling type variable): {}",
+                poly.ctrl_typevar.name, poly.ctrl_typevar.doc
+            ));
+        }
+    }
+
+    let mut tmpl_types = Vec::new();
+    let mut into_args = Vec::new();
+    for op in &inst.operands_in {
+        let t = if op.is_immediate() {
+            let t = format!("T{}", tmpl_types.len() + 1);
+            tmpl_types.push(format!("{}: Into<{}>", t, op.kind.rust_type));
+            into_args.push(op.name);
+            t
+        } else {
+            op.kind.rust_type.to_string()
+        };
+        args.push(format!("{}: {}", op.name, t));
+        args_doc.push(format!(
+            "- {}: {}",
+            op.name,
+            op.doc()
+                .expect("every instruction's input operand must be documented")
+        ));
+    }
+
+    for op in &inst.operands_out {
+        rets_doc.push(format!(
+            "- {}: {}",
+            op.name,
+            op.doc()
+                .expect("every instruction's output operand must be documented")
+        ));
+    }
+
+    let rtype = match inst.value_results.len() {
+        0 => "Inst".into(),
+        1 => "Value".into(),
+        _ => format!("({})", vec!["Value"; inst.value_results.len()].join(", ")),
+    };
+
+    let tmpl = if !tmpl_types.is_empty() {
+        format!("<{}>", tmpl_types.join(", "))
+    } else {
+        "".into()
+    };
+
+    let proto = format!(
+        "{}{}({}) -> {}",
+        inst.snake_name(),
+        tmpl,
+        args.join(", "),
+        rtype
+    );
+
+    fmt.doc_comment(&inst.doc);
+    if !args_doc.is_empty() {
+        fmt.line("///");
+        fmt.doc_comment("Inputs:");
+        fmt.line("///");
+        for doc_line in args_doc {
+            fmt.doc_comment(doc_line);
+        }
+    }
+    if !rets_doc.is_empty() {
+        fmt.line("///");
+        fmt.doc_comment("Outputs:");
+        fmt.line("///");
+        for doc_line in rets_doc {
+            fmt.doc_comment(doc_line);
+        }
+    }
+
+    fmt.line("#[allow(non_snake_case)]");
+    fmtln!(fmt, "fn {} {{", proto);
+    fmt.indent(|fmt| {
+        // Convert all of the `Into<>` arguments.
+        for arg in &into_args {
+            fmtln!(fmt, "let {} = {}.into();", arg, arg);
+        }
+
+        // Arguments for instruction constructor.
+        let first_arg = format!("Opcode::{}", inst.camel_name);
+        let mut args = vec![first_arg.as_str()];
+        if let Some(poly) = &inst.polymorphic_info {
+            if poly.use_typevar_operand {
+                // Infer the controlling type variable from the input operands.
+                let op_num = inst.value_opnums[format.typevar_operand.unwrap()];
+                fmtln!(
+                    fmt,
+                    "let ctrl_typevar = self.data_flow_graph().value_type({});",
+                    inst.operands_in[op_num].name
+                );
+
+                // The format constructor will resolve the result types from the type var.
+                args.push("ctrl_typevar");
+            } else {
+                // This was an explicit method argument.
+                args.push(&poly.ctrl_typevar.name);
+            }
+        } else {
+            // No controlling type variable needed.
+            args.push("types::INVALID");
+        }
+
+        // Now add all of the immediate operands to the constructor arguments.
+        for &op_num in &inst.imm_opnums {
+            args.push(inst.operands_in[op_num].name);
+        }
+
+        // Finally, the value operands.
+        if format.has_value_list {
+            // We need to build a value list with all the arguments.
+            fmt.line("let mut vlist = ir::ValueList::default();");
+            args.push("vlist");
+            fmt.line("{");
+            fmt.indent(|fmt| {
+                fmt.line("let pool = &mut self.data_flow_graph_mut().value_lists;");
+                for op in &inst.operands_in {
+                    if op.is_value() {
+                        fmtln!(fmt, "vlist.push({}, pool);", op.name);
+                    } else if op.is_varargs() {
+                        fmtln!(fmt, "vlist.extend({}.iter().cloned(), pool);", op.name);
+                    }
+                }
+            });
+            fmt.line("}");
+        } else {
+            // With no value list, we're guaranteed to just have a set of fixed value operands.
+            for &op_num in &inst.value_opnums {
+                args.push(inst.operands_in[op_num].name);
+            }
+        }
+
+        // Call to the format constructor,
+        let fcall = format!("self.{}({})", format.name, args.join(", "));
+
+        if inst.value_results.is_empty() {
+            fmtln!(fmt, "{}.0", fcall);
+            return;
+        }
+
+        fmtln!(fmt, "let (inst, dfg) = {};", fcall);
+        if inst.value_results.len() == 1 {
+            fmt.line("dfg.first_result(inst)");
+        } else {
+            fmtln!(
+                fmt,
+                "let results = &dfg.inst_results(inst)[0..{}];",
+                inst.value_results.len()
+            );
+            fmtln!(
+                fmt,
+                "({})",
+                inst.value_results
+                    .iter()
+                    .enumerate()
+                    .map(|(i, _)| format!("results[{}]", i))
+                    .collect::<Vec<_>>()
+                    .join(", ")
+            );
+        }
+    });
+    fmtln!(fmt, "}")
+}
+
+/// Generate a Builder trait with methods for all instructions.
+fn gen_builder(
+    instructions: &AllInstructions,
+    formats: &[&InstructionFormat],
+    fmt: &mut Formatter,
+) {
+    fmt.doc_comment(
+        r#"
+        Convenience methods for building instructions.
+
+        The `InstBuilder` trait has one method per instruction opcode for
+        conveniently constructing the instruction with minimum arguments.
+        Polymorphic instructions infer their result types from the input
+        arguments when possible. In some cases, an explicit `ctrl_typevar`
+        argument is required.
+
+        The opcode methods return the new instruction's result values, or
+        the `Inst` itself for instructions that don't have any results.
+
+        There is also a method per instruction format. These methods all
+        return an `Inst`.
+    "#,
+    );
+    fmt.line("pub trait InstBuilder<'f>: InstBuilderBase<'f> {");
+    fmt.indent(|fmt| {
+        for inst in instructions.values() {
+            gen_inst_builder(inst, &*inst.format, fmt);
+            fmt.empty_line();
+        }
+        for (i, format) in formats.iter().enumerate() {
+            gen_format_constructor(format, fmt);
+            if i + 1 != formats.len() {
+                fmt.empty_line();
+            }
+        }
+    });
+    fmt.line("}");
+}
+
+pub(crate) fn generate(
+    formats: Vec<&InstructionFormat>,
+    all_inst: &AllInstructions,
+    opcode_filename: &str,
+    inst_builder_filename: &str,
+    out_dir: &str,
+) -> Result<(), error::Error> {
+    // Opcodes.
+    let mut fmt = Formatter::new();
+    gen_formats(&formats, &mut fmt);
+    gen_instruction_data(&formats, &mut fmt);
+    fmt.empty_line();
+    gen_instruction_data_impl(&formats, &mut fmt);
+    fmt.empty_line();
+    gen_opcodes(all_inst, &mut fmt);
+    fmt.empty_line();
+    gen_type_constraints(all_inst, &mut fmt);
+    fmt.empty_line();
+    gen_try_from(all_inst, &mut fmt);
+    fmt.update_file(opcode_filename, out_dir)?;
+
+    // Instruction builder.
+    let mut fmt = Formatter::new();
+    gen_builder(all_inst, &formats, &mut fmt);
+    fmt.update_file(inst_builder_filename, out_dir)?;
+
+    Ok(())
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/gen_legalizer.rs b/third_party/rust/cranelift-codegen-meta/src/gen_legalizer.rs
new file mode 100644
index 0000000000..7b56b8db48
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/gen_legalizer.rs
@@ -0,0 +1,734 @@
+//! Generate transformations to legalize instructions without encodings.
+use crate::cdsl::ast::{Def, DefPool, Expr, VarPool};
+use crate::cdsl::isa::TargetIsa;
+use crate::cdsl::operands::Operand;
+use crate::cdsl::type_inference::Constraint;
+use crate::cdsl::typevar::{TypeSet, TypeVar};
+use crate::cdsl::xform::{Transform, TransformGroup, TransformGroups};
+
+use crate::error;
+use crate::gen_inst::gen_typesets_table;
+use crate::srcgen::Formatter;
+use crate::unique_table::UniqueTable;
+
+use std::collections::{HashMap, HashSet};
+use std::iter::FromIterator;
+
+/// Given a `Def` node, emit code that extracts all the instruction fields from
+/// `pos.func.dfg[iref]`.
+///
+/// Create local variables named after the `Var` instances in `node`.
+///
+/// Also create a local variable named `predicate` with the value of the evaluated instruction
+/// predicate, or `true` if the node has no predicate.
+fn unwrap_inst(transform: &Transform, fmt: &mut Formatter) -> bool {
+    let var_pool = &transform.var_pool;
+    let def_pool = &transform.def_pool;
+
+    let def = def_pool.get(transform.src);
+    let apply = &def.apply;
+    let inst = &apply.inst;
+    let iform = &inst.format;
+
+    fmt.comment(format!(
+        "Unwrap fields from instruction format {}",
+        def.to_comment_string(&transform.var_pool)
+    ));
+
+    // Extract the Var arguments.
+    let arg_names = apply
+        .args
+        .iter()
+        .enumerate()
+        .filter(|(arg_num, _)| {
+            // Variable args are specially handled after extracting args.
+            !inst.operands_in[*arg_num].is_varargs()
+        })
+        .map(|(arg_num, arg)| match &arg {
+            Expr::Var(var_index) => var_pool.get(*var_index).name.as_ref(),
+            Expr::Literal(_) => {
+                let n = inst.imm_opnums.iter().position(|&i| i == arg_num).unwrap();
+                iform.imm_fields[n].member
+            }
+        })
+        .collect::<Vec<_>>()
+        .join(", ");
+
+    // May we need "args" in the values consumed by predicates?
+    let emit_args = iform.num_value_operands >= 1 || iform.has_value_list;
+
+    // We need a tuple:
+    // - if there's at least one value operand, then we emit a variable for the value, and the
+    // value list as args.
+    // - otherwise, if there's the count of immediate operands added to the presence of a value list exceeds one.
+    let need_tuple = if iform.num_value_operands >= 1 {
+        true
+    } else {
+        let mut imm_and_varargs = inst
+            .operands_in
+            .iter()
+            .filter(|op| op.is_immediate_or_entityref())
+            .count();
+        if iform.has_value_list {
+            imm_and_varargs += 1;
+        }
+        imm_and_varargs > 1
+    };
+
+    let maybe_args = if emit_args { ", args" } else { "" };
+    let defined_values = format!("{}{}", arg_names, maybe_args);
+
+    let tuple_or_value = if need_tuple {
+        format!("({})", defined_values)
+    } else {
+        defined_values
+    };
+
+    fmtln!(
+        fmt,
+        "let {} = if let ir::InstructionData::{} {{",
+        tuple_or_value,
+        iform.name
+    );
+
+    fmt.indent(|fmt| {
+        // Fields are encoded directly.
+        for field in &iform.imm_fields {
+            fmtln!(fmt, "{},", field.member);
+        }
+
+        if iform.has_value_list || iform.num_value_operands > 1 {
+            fmt.line("ref args,");
+        } else if iform.num_value_operands == 1 {
+            fmt.line("arg,");
+        }
+
+        fmt.line("..");
+        fmt.outdented_line("} = pos.func.dfg[inst] {");
+
+        if iform.has_value_list {
+            fmt.line("let args = args.as_slice(&pos.func.dfg.value_lists);");
+        } else if iform.num_value_operands == 1 {
+            fmt.line("let args = [arg];")
+        }
+
+        // Generate the values for the tuple.
+        let emit_one_value =
+            |fmt: &mut Formatter, needs_comma: bool, op_num: usize, op: &Operand| {
+                let comma = if needs_comma { "," } else { "" };
+                if op.is_immediate_or_entityref() {
+                    let n = inst.imm_opnums.iter().position(|&i| i == op_num).unwrap();
+                    fmtln!(fmt, "{}{}", iform.imm_fields[n].member, comma);
+                } else if op.is_value() {
+                    let n = inst.value_opnums.iter().position(|&i| i == op_num).unwrap();
+                    fmtln!(fmt, "pos.func.dfg.resolve_aliases(args[{}]),", n);
+                } else {
+                    // This is a value list argument or a varargs.
+                    assert!(iform.has_value_list || op.is_varargs());
+                }
+            };
+
+        if need_tuple {
+            fmt.line("(");
+            fmt.indent(|fmt| {
+                for (op_num, op) in inst.operands_in.iter().enumerate() {
+                    let needs_comma = emit_args || op_num + 1 < inst.operands_in.len();
+                    emit_one_value(fmt, needs_comma, op_num, op);
+                }
+                if emit_args {
+                    fmt.line("args");
+                }
+            });
+            fmt.line(")");
+        } else {
+            // Only one of these can be true at the same time, otherwise we'd need a tuple.
+            emit_one_value(fmt, false, 0, &inst.operands_in[0]);
+            if emit_args {
+                fmt.line("args");
+            }
+        }
+
+        fmt.outdented_line("} else {");
+        fmt.line(r#"unreachable!("bad instruction format")"#);
+    });
+    fmtln!(fmt, "};");
+    fmt.empty_line();
+
+    assert_eq!(inst.operands_in.len(), apply.args.len());
+    for (i, op) in inst.operands_in.iter().enumerate() {
+        if op.is_varargs() {
+            let name = &var_pool
+                .get(apply.args[i].maybe_var().expect("vararg without name"))
+                .name;
+            let n = inst
+                .imm_opnums
+                .iter()
+                .chain(inst.value_opnums.iter())
+                .max()
+                .copied()
+                .unwrap_or(0);
+            fmtln!(fmt, "let {} = &Vec::from(&args[{}..]);", name, n);
+        }
+    }
+
+    for &op_num in &inst.value_opnums {
+        let arg = &apply.args[op_num];
+        if let Some(var_index) = arg.maybe_var() {
+            let var = var_pool.get(var_index);
+            if var.has_free_typevar() {
+                fmtln!(
+                    fmt,
+                    "let typeof_{} = pos.func.dfg.value_type({});",
+                    var.name,
+                    var.name
+                );
+            }
+        }
+    }
+
+    // If the definition creates results, detach the values and place them in locals.
+    let mut replace_inst = false;
+    if !def.defined_vars.is_empty() {
+        if def.defined_vars
+            == def_pool
+                .get(var_pool.get(def.defined_vars[0]).dst_def.unwrap())
+                .defined_vars
+        {
+            // Special case: The instruction replacing node defines the exact same values.
+            fmt.comment(format!(
+                "Results handled by {}.",
+                def_pool
+                    .get(var_pool.get(def.defined_vars[0]).dst_def.unwrap())
+                    .to_comment_string(var_pool)
+            ));
+
+            fmt.line("let r = pos.func.dfg.inst_results(inst);");
+            for (i, &var_index) in def.defined_vars.iter().enumerate() {
+                let var = var_pool.get(var_index);
+                fmtln!(fmt, "let {} = &r[{}];", var.name, i);
+                fmtln!(
+                    fmt,
+                    "let typeof_{} = pos.func.dfg.value_type(*{});",
+                    var.name,
+                    var.name
+                );
+            }
+
+            replace_inst = true;
+        } else {
+            // Boring case: Detach the result values, capture them in locals.
+            for &var_index in &def.defined_vars {
+                fmtln!(fmt, "let {};", var_pool.get(var_index).name);
+            }
+
+            fmt.line("{");
+            fmt.indent(|fmt| {
+                fmt.line("let r = pos.func.dfg.inst_results(inst);");
+                for i in 0..def.defined_vars.len() {
+                    let var = var_pool.get(def.defined_vars[i]);
+                    fmtln!(fmt, "{} = r[{}];", var.name, i);
+                }
+            });
+            fmt.line("}");
+
+            for &var_index in &def.defined_vars {
+                let var = var_pool.get(var_index);
+                if var.has_free_typevar() {
+                    fmtln!(
+                        fmt,
+                        "let typeof_{} = pos.func.dfg.value_type({});",
+                        var.name,
+                        var.name
+                    );
+                }
+            }
+        }
+    }
+    replace_inst
+}
+
+fn build_derived_expr(tv: &TypeVar) -> String {
+    let base = match &tv.base {
+        Some(base) => base,
+        None => {
+            assert!(tv.name.starts_with("typeof_"));
+            return format!("Some({})", tv.name);
+        }
+    };
+    let base_expr = build_derived_expr(&base.type_var);
+    format!(
+        "{}.map(|t: crate::ir::Type| t.{}())",
+        base_expr,
+        base.derived_func.name()
+    )
+}
+
+/// Emit rust code for the given check.
+///
+/// The emitted code is a statement redefining the `predicate` variable like this:
+///     let predicate = predicate && ...
+fn emit_runtime_typecheck<'a>(
+    constraint: &'a Constraint,
+    type_sets: &mut UniqueTable<'a, TypeSet>,
+    fmt: &mut Formatter,
+) {
+    match constraint {
+        Constraint::InTypeset(tv, ts) => {
+            let ts_index = type_sets.add(&ts);
+            fmt.comment(format!(
+                "{} must belong to {:?}",
+                tv.name,
+                type_sets.get(ts_index)
+            ));
+            fmtln!(
+                fmt,
+                "let predicate = predicate && TYPE_SETS[{}].contains({});",
+                ts_index,
+                tv.name
+            );
+        }
+        Constraint::Eq(tv1, tv2) => {
+            fmtln!(
+                fmt,
+                "let predicate = predicate && match ({}, {}) {{",
+                build_derived_expr(tv1),
+                build_derived_expr(tv2)
+            );
+            fmt.indent(|fmt| {
+                fmt.line("(Some(a), Some(b)) => a == b,");
+                fmt.comment("On overflow, constraint doesn\'t apply");
+                fmt.line("_ => false,");
+            });
+            fmtln!(fmt, "};");
+        }
+        Constraint::WiderOrEq(tv1, tv2) => {
+            fmtln!(
+                fmt,
+                "let predicate = predicate && match ({}, {}) {{",
+                build_derived_expr(tv1),
+                build_derived_expr(tv2)
+            );
+            fmt.indent(|fmt| {
+                fmt.line("(Some(a), Some(b)) => a.wider_or_equal(b),");
+                fmt.comment("On overflow, constraint doesn\'t apply");
+                fmt.line("_ => false,");
+            });
+            fmtln!(fmt, "};");
+        }
+    }
+}
+
+/// Determine if `node` represents one of the value splitting instructions: `isplit` or `vsplit.
+/// These instructions are lowered specially by the `legalize::split` module.
+fn is_value_split(def: &Def) -> bool {
+    let name = &def.apply.inst.name;
+    name == "isplit" || name == "vsplit"
+}
+
+fn emit_dst_inst(def: &Def, def_pool: &DefPool, var_pool: &VarPool, fmt: &mut Formatter) {
+    let defined_vars = {
+        let vars = def
+            .defined_vars
+            .iter()
+            .map(|&var_index| var_pool.get(var_index).name.as_ref())
+            .collect::<Vec<&str>>();
+        if vars.len() == 1 {
+            vars[0].to_string()
+        } else {
+            format!("({})", vars.join(", "))
+        }
+    };
+
+    if is_value_split(def) {
+        // Split instructions are not emitted with the builder, but by calling special functions in
+        // the `legalizer::split` module. These functions will eliminate concat-split patterns.
+        fmt.line("let curpos = pos.position();");
+        fmt.line("let srcloc = pos.srcloc();");
+        fmtln!(
+            fmt,
+            "let {} = split::{}(pos.func, cfg, curpos, srcloc, {});",
+            defined_vars,
+            def.apply.inst.snake_name(),
+            def.apply.args[0].to_rust_code(var_pool)
+        );
+        return;
+    }
+
+    if def.defined_vars.is_empty() {
+        // This node doesn't define any values, so just insert the new instruction.
+        fmtln!(
+            fmt,
+            "pos.ins().{};",
+            def.apply.rust_builder(&def.defined_vars, var_pool)
+        );
+        return;
+    }
+
+    if let Some(src_def0) = var_pool.get(def.defined_vars[0]).src_def {
+        if def.defined_vars == def_pool.get(src_def0).defined_vars {
+            // The replacement instruction defines the exact same values as the source pattern.
+            // Unwrapping would have left the results intact.  Replace the whole instruction.
+            fmtln!(
+                fmt,
+                "let {} = pos.func.dfg.replace(inst).{};",
+                defined_vars,
+                def.apply.rust_builder(&def.defined_vars, var_pool)
+            );
+
+            // We need to bump the cursor so following instructions are inserted *after* the
+            // replaced instruction.
+            fmt.line("if pos.current_inst() == Some(inst) {");
+            fmt.indent(|fmt| {
+                fmt.line("pos.next_inst();");
+            });
+            fmt.line("}");
+            return;
+        }
+    }
+
+    // Insert a new instruction.
+    let mut builder = format!("let {} = pos.ins()", defined_vars);
+
+    if def.defined_vars.len() == 1 && var_pool.get(def.defined_vars[0]).is_output() {
+        // Reuse the single source result value.
+        builder = format!(
+            "{}.with_result({})",
+            builder,
+            var_pool.get(def.defined_vars[0]).to_rust_code()
+        );
+    } else if def
+        .defined_vars
+        .iter()
+        .any(|&var_index| var_pool.get(var_index).is_output())
+    {
+        // There are more than one output values that can be reused.
+        let array = def
+            .defined_vars
+            .iter()
+            .map(|&var_index| {
+                let var = var_pool.get(var_index);
+                if var.is_output() {
+                    format!("Some({})", var.name)
+                } else {
+                    "None".into()
+                }
+            })
+            .collect::<Vec<_>>()
+            .join(", ");
+        builder = format!("{}.with_results([{}])", builder, array);
+    }
+
+    fmtln!(
+        fmt,
+        "{}.{};",
+        builder,
+        def.apply.rust_builder(&def.defined_vars, var_pool)
+    );
+}
+
+/// Emit code for `transform`, assuming that the opcode of transform's root instruction
+/// has already been matched.
+///
+/// `inst: Inst` is the variable to be replaced. It is pointed to by `pos: Cursor`.
+/// `dfg: DataFlowGraph` is available and mutable.
+fn gen_transform<'a>(
+    replace_inst: bool,
+    transform: &'a Transform,
+    type_sets: &mut UniqueTable<'a, TypeSet>,
+    fmt: &mut Formatter,
+) {
+    // Evaluate the instruction predicate if any.
+    let apply = &transform.def_pool.get(transform.src).apply;
+
+    let inst_predicate = apply
+        .inst_predicate_with_ctrl_typevar(&transform.var_pool)
+        .rust_predicate("pos.func");
+
+    let has_extra_constraints = !transform.type_env.constraints.is_empty();
+    if has_extra_constraints {
+        // Extra constraints rely on the predicate being a variable that we can rebind as we add
+        // more constraint predicates.
+        if let Some(pred) = &inst_predicate {
+            fmt.multi_line(&format!("let predicate = {};", pred));
+        } else {
+            fmt.line("let predicate = true;");
+        }
+    }
+
+    // Emit any runtime checks; these will rebind `predicate` emitted right above.
+    for constraint in &transform.type_env.constraints {
+        emit_runtime_typecheck(constraint, type_sets, fmt);
+    }
+
+    let do_expand = |fmt: &mut Formatter| {
+        // Emit any constants that must be created before use.
+        for (name, value) in transform.const_pool.iter() {
+            fmtln!(
+                fmt,
+                "let {} = pos.func.dfg.constants.insert(vec!{:?}.into());",
+                name,
+                value
+            );
+        }
+
+        // If we are adding some blocks, we need to recall the original block, such that we can
+        // recompute it.
+        if !transform.block_pool.is_empty() {
+            fmt.line("let orig_block = pos.current_block().unwrap();");
+        }
+
+        // If we're going to delete `inst`, we need to detach its results first so they can be
+        // reattached during pattern expansion.
+        if !replace_inst {
+            fmt.line("pos.func.dfg.clear_results(inst);");
+        }
+
+        // Emit new block creation.
+        for block in &transform.block_pool {
+            let var = transform.var_pool.get(block.name);
+            fmtln!(fmt, "let {} = pos.func.dfg.make_block();", var.name);
+        }
+
+        // Emit the destination pattern.
+        for &def_index in &transform.dst {
+            if let Some(block) = transform.block_pool.get(def_index) {
+                let var = transform.var_pool.get(block.name);
+                fmtln!(fmt, "pos.insert_block({});", var.name);
+            }
+            emit_dst_inst(
+                transform.def_pool.get(def_index),
+                &transform.def_pool,
+                &transform.var_pool,
+                fmt,
+            );
+        }
+
+        // Insert a new block after the last instruction, if needed.
+        let def_next_index = transform.def_pool.next_index();
+        if let Some(block) = transform.block_pool.get(def_next_index) {
+            let var = transform.var_pool.get(block.name);
+            fmtln!(fmt, "pos.insert_block({});", var.name);
+        }
+
+        // Delete the original instruction if we didn't have an opportunity to replace it.
+        if !replace_inst {
+            fmt.line("let removed = pos.remove_inst();");
+            fmt.line("debug_assert_eq!(removed, inst);");
+        }
+
+        if transform.block_pool.is_empty() {
+            if transform.def_pool.get(transform.src).apply.inst.is_branch {
+                // A branch might have been legalized into multiple branches, so we need to recompute
+                // the cfg.
+                fmt.line("cfg.recompute_block(pos.func, pos.current_block().unwrap());");
+            }
+        } else {
+            // Update CFG for the new blocks.
+            fmt.line("cfg.recompute_block(pos.func, orig_block);");
+            for block in &transform.block_pool {
+                let var = transform.var_pool.get(block.name);
+                fmtln!(fmt, "cfg.recompute_block(pos.func, {});", var.name);
+            }
+        }
+
+        fmt.line("return true;");
+    };
+
+    // Guard the actual expansion by `predicate`.
+    if has_extra_constraints {
+        fmt.line("if predicate {");
+        fmt.indent(|fmt| {
+            do_expand(fmt);
+        });
+        fmt.line("}");
+    } else if let Some(pred) = &inst_predicate {
+        fmt.multi_line(&format!("if {} {{", pred));
+        fmt.indent(|fmt| {
+            do_expand(fmt);
+        });
+        fmt.line("}");
+    } else {
+        // Unconditional transform (there was no predicate), just emit it.
+        do_expand(fmt);
+    }
+}
+
+fn gen_transform_group<'a>(
+    group: &'a TransformGroup,
+    transform_groups: &TransformGroups,
+    type_sets: &mut UniqueTable<'a, TypeSet>,
+    fmt: &mut Formatter,
+) {
+    fmt.doc_comment(group.doc);
+    fmt.line("#[allow(unused_variables,unused_assignments,unused_imports,non_snake_case)]");
+
+    // Function arguments.
+    fmtln!(fmt, "pub fn {}(", group.name);
+    fmt.indent(|fmt| {
+        fmt.line("inst: crate::ir::Inst,");
+        fmt.line("func: &mut crate::ir::Function,");
+        fmt.line("cfg: &mut crate::flowgraph::ControlFlowGraph,");
+        fmt.line("isa: &dyn crate::isa::TargetIsa,");
+    });
+    fmtln!(fmt, ") -> bool {");
+
+    // Function body.
+    fmt.indent(|fmt| {
+        fmt.line("use crate::ir::InstBuilder;");
+        fmt.line("use crate::cursor::{Cursor, FuncCursor};");
+        fmt.line("let mut pos = FuncCursor::new(func).at_inst(inst);");
+        fmt.line("pos.use_srcloc(inst);");
+
+        // Group the transforms by opcode so we can generate a big switch.
+        // Preserve ordering.
+        let mut inst_to_transforms = HashMap::new();
+        for transform in &group.transforms {
+            let def_index = transform.src;
+            let inst = &transform.def_pool.get(def_index).apply.inst;
+            inst_to_transforms
+                .entry(inst.camel_name.clone())
+                .or_insert_with(Vec::new)
+                .push(transform);
+        }
+
+        let mut sorted_inst_names = Vec::from_iter(inst_to_transforms.keys());
+        sorted_inst_names.sort();
+
+        fmt.line("{");
+        fmt.indent(|fmt| {
+            fmt.line("match pos.func.dfg[inst].opcode() {");
+            fmt.indent(|fmt| {
+                for camel_name in sorted_inst_names {
+                    fmtln!(fmt, "ir::Opcode::{} => {{", camel_name);
+                    fmt.indent(|fmt| {
+                        let transforms = inst_to_transforms.get(camel_name).unwrap();
+
+                        // Unwrap the source instruction, create local variables for the input variables.
+                        let replace_inst = unwrap_inst(&transforms[0], fmt);
+                        fmt.empty_line();
+
+                        for (i, transform) in transforms.iter().enumerate() {
+                            if i > 0 {
+                                fmt.empty_line();
+                            }
+                            gen_transform(replace_inst, transform, type_sets, fmt);
+                        }
+                    });
+                    fmtln!(fmt, "}");
+                    fmt.empty_line();
+                }
+
+                // Emit the custom transforms. The Rust compiler will complain about any overlap with
+                // the normal transforms.
+                let mut sorted_custom_legalizes = Vec::from_iter(&group.custom_legalizes);
+                sorted_custom_legalizes.sort();
+                for (inst_camel_name, func_name) in sorted_custom_legalizes {
+                    fmtln!(fmt, "ir::Opcode::{} => {{", inst_camel_name);
+                    fmt.indent(|fmt| {
+                        fmtln!(fmt, "{}(inst, func, cfg, isa);", func_name);
+                        fmt.line("return true;");
+                    });
+                    fmtln!(fmt, "}");
+                    fmt.empty_line();
+                }
+
+                // We'll assume there are uncovered opcodes.
+                fmt.line("_ => {},");
+            });
+            fmt.line("}");
+        });
+        fmt.line("}");
+
+        // If we fall through, nothing was expanded; call the chain if any.
+        match &group.chain_with {
+            Some(group_id) => fmtln!(
+                fmt,
+                "{}(inst, func, cfg, isa)",
+                transform_groups.get(*group_id).rust_name()
+            ),
+            None => fmt.line("false"),
+        };
+    });
+    fmtln!(fmt, "}");
+    fmt.empty_line();
+}
+
+/// Generate legalization functions for `isa` and add any shared `TransformGroup`s
+/// encountered to `shared_groups`.
+///
+/// Generate `TYPE_SETS` and `LEGALIZE_ACTIONS` tables.
+fn gen_isa(
+    isa: &TargetIsa,
+    transform_groups: &TransformGroups,
+    shared_group_names: &mut HashSet<&'static str>,
+    fmt: &mut Formatter,
+) {
+    let mut type_sets = UniqueTable::new();
+    for group_index in isa.transitive_transform_groups(transform_groups) {
+        let group = transform_groups.get(group_index);
+        match group.isa_name {
+            Some(isa_name) => {
+                assert!(
+                    isa_name == isa.name,
+                    "ISA-specific legalizations must be used by the same ISA"
+                );
+                gen_transform_group(group, transform_groups, &mut type_sets, fmt);
+            }
+            None => {
+                shared_group_names.insert(group.name);
+            }
+        }
+    }
+
+    gen_typesets_table(&type_sets, fmt);
+
+    let direct_groups = isa.direct_transform_groups();
+    fmtln!(
+        fmt,
+        "pub static LEGALIZE_ACTIONS: [isa::Legalize; {}] = [",
+        direct_groups.len()
+    );
+    fmt.indent(|fmt| {
+        for &group_index in direct_groups {
+            fmtln!(fmt, "{},", transform_groups.get(group_index).rust_name());
+        }
+    });
+    fmtln!(fmt, "];");
+}
+
+/// Generate the legalizer files.
+pub(crate) fn generate(
+    isas: &[TargetIsa],
+    transform_groups: &TransformGroups,
+    extra_legalization_groups: &[&'static str],
+    filename_prefix: &str,
+    out_dir: &str,
+) -> Result<(), error::Error> {
+    let mut shared_group_names = HashSet::new();
+
+    for isa in isas {
+        let mut fmt = Formatter::new();
+        gen_isa(isa, transform_groups, &mut shared_group_names, &mut fmt);
+        fmt.update_file(format!("{}-{}.rs", filename_prefix, isa.name), out_dir)?;
+    }
+
+    // Add extra legalization groups that were explicitly requested.
+    for group in extra_legalization_groups {
+        shared_group_names.insert(group);
+    }
+
+    // Generate shared legalize groups.
+    let mut fmt = Formatter::new();
+    // Generate shared legalize groups.
+    let mut type_sets = UniqueTable::new();
+    let mut sorted_shared_group_names = Vec::from_iter(shared_group_names);
+    sorted_shared_group_names.sort();
+    for group_name in &sorted_shared_group_names {
+        let group = transform_groups.by_name(group_name);
+        gen_transform_group(group, transform_groups, &mut type_sets, &mut fmt);
+    }
+    gen_typesets_table(&type_sets, &mut fmt);
+    fmt.update_file(format!("{}r.rs", filename_prefix), out_dir)?;
+
+    Ok(())
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/gen_registers.rs b/third_party/rust/cranelift-codegen-meta/src/gen_registers.rs
new file mode 100644
index 0000000000..bd5ac95ae0
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/gen_registers.rs
@@ -0,0 +1,148 @@
+//! Generate the ISA-specific registers.
+use crate::cdsl::isa::TargetIsa;
+use crate::cdsl::regs::{RegBank, RegClass};
+use crate::error;
+use crate::srcgen::Formatter;
+use cranelift_entity::EntityRef;
+
+fn gen_regbank(fmt: &mut Formatter, reg_bank: &RegBank) {
+    let names = if !reg_bank.names.is_empty() {
+        format!(r#""{}""#, reg_bank.names.join(r#"", ""#))
+    } else {
+        "".to_string()
+    };
+    fmtln!(fmt, "RegBank {");
+    fmt.indent(|fmt| {
+        fmtln!(fmt, r#"name: "{}","#, reg_bank.name);
+        fmtln!(fmt, "first_unit: {},", reg_bank.first_unit);
+        fmtln!(fmt, "units: {},", reg_bank.units);
+        fmtln!(fmt, "names: &[{}],", names);
+        fmtln!(fmt, r#"prefix: "{}","#, reg_bank.prefix);
+        fmtln!(fmt, "first_toprc: {},", reg_bank.toprcs[0].index());
+        fmtln!(fmt, "num_toprcs: {},", reg_bank.toprcs.len());
+        fmtln!(
+            fmt,
+            "pressure_tracking: {},",
+            if reg_bank.pressure_tracking {
+                "true"
+            } else {
+                "false"
+            }
+        );
+    });
+    fmtln!(fmt, "},");
+}
+
+fn gen_regclass(isa: &TargetIsa, reg_class: &RegClass, fmt: &mut Formatter) {
+    let reg_bank = isa.regs.banks.get(reg_class.bank).unwrap();
+
+    let mask: Vec<String> = reg_class
+        .mask(reg_bank.first_unit)
+        .iter()
+        .map(|x| format!("0x{:08x}", x))
+        .collect();
+    let mask = mask.join(", ");
+
+    fmtln!(
+        fmt,
+        "pub static {}_DATA: RegClassData = RegClassData {{",
+        reg_class.name
+    );
+    fmt.indent(|fmt| {
+        fmtln!(fmt, r#"name: "{}","#, reg_class.name);
+        fmtln!(fmt, "index: {},", reg_class.index.index());
+        fmtln!(fmt, "width: {},", reg_class.width);
+        fmtln!(fmt, "bank: {},", reg_class.bank.index());
+        fmtln!(fmt, "toprc: {},", reg_class.toprc.index());
+        fmtln!(fmt, "first: {},", reg_bank.first_unit + reg_class.start);
+        fmtln!(fmt, "subclasses: {:#x},", reg_class.subclass_mask());
+        fmtln!(fmt, "mask: [{}],", mask);
+        fmtln!(
+            fmt,
+            "pinned_reg: {:?},",
+            reg_bank
+                .pinned_reg
+                .map(|index| index + reg_bank.first_unit as u16 + reg_class.start as u16)
+        );
+        fmtln!(fmt, "info: &INFO,");
+    });
+    fmtln!(fmt, "};");
+
+    fmtln!(fmt, "#[allow(dead_code)]");
+    fmtln!(
+        fmt,
+        "pub static {}: RegClass = &{}_DATA;",
+        reg_class.name,
+        reg_class.name
+    );
+}
+
+fn gen_regbank_units(reg_bank: &RegBank, fmt: &mut Formatter) {
+    for unit in 0..reg_bank.units {
+        let v = unit + reg_bank.first_unit;
+        if (unit as usize) < reg_bank.names.len() {
+            fmtln!(fmt, "{} = {},", reg_bank.names[unit as usize], v);
+            continue;
+        }
+        fmtln!(fmt, "{}{} = {},", reg_bank.prefix, unit, v);
+    }
+}
+
+fn gen_isa(isa: &TargetIsa, fmt: &mut Formatter) {
+    // Emit RegInfo.
+    fmtln!(fmt, "pub static INFO: RegInfo = RegInfo {");
+
+    fmt.indent(|fmt| {
+        fmtln!(fmt, "banks: &[");
+        // Bank descriptors.
+        fmt.indent(|fmt| {
+            for reg_bank in isa.regs.banks.values() {
+                gen_regbank(fmt, &reg_bank);
+            }
+        });
+        fmtln!(fmt, "],");
+        // References to register classes.
+        fmtln!(fmt, "classes: &[");
+        fmt.indent(|fmt| {
+            for reg_class in isa.regs.classes.values() {
+                fmtln!(fmt, "&{}_DATA,", reg_class.name);
+            }
+        });
+        fmtln!(fmt, "],");
+    });
+    fmtln!(fmt, "};");
+
+    // Register class descriptors.
+    for rc in isa.regs.classes.values() {
+        gen_regclass(&isa, rc, fmt);
+    }
+
+    // Emit constants for all the register units.
+    fmtln!(fmt, "#[allow(dead_code, non_camel_case_types)]");
+    fmtln!(fmt, "#[derive(Clone, Copy)]");
+    fmtln!(fmt, "pub enum RU {");
+    fmt.indent(|fmt| {
+        for reg_bank in isa.regs.banks.values() {
+            gen_regbank_units(reg_bank, fmt);
+        }
+    });
+    fmtln!(fmt, "}");
+
+    // Emit Into conversion for the RU class.
+    fmtln!(fmt, "impl Into<RegUnit> for RU {");
+    fmt.indent(|fmt| {
+        fmtln!(fmt, "fn into(self) -> RegUnit {");
+        fmt.indent(|fmt| {
+            fmtln!(fmt, "self as RegUnit");
+        });
+        fmtln!(fmt, "}");
+    });
+    fmtln!(fmt, "}");
+}
+
+pub(crate) fn generate(isa: &TargetIsa, filename: &str, out_dir: &str) -> Result<(), error::Error> {
+    let mut fmt = Formatter::new();
+    gen_isa(&isa, &mut fmt);
+    fmt.update_file(filename, out_dir)?;
+    Ok(())
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/gen_settings.rs b/third_party/rust/cranelift-codegen-meta/src/gen_settings.rs
new file mode 100644
index 0000000000..2ed5941b80
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/gen_settings.rs
@@ -0,0 +1,447 @@
+//! Generate the ISA-specific settings.
+use std::collections::HashMap;
+
+use cranelift_codegen_shared::constant_hash::{generate_table, simple_hash};
+
+use crate::cdsl::camel_case;
+use crate::cdsl::settings::{
+    BoolSetting, Predicate, Preset, Setting, SettingGroup, SpecificSetting,
+};
+use crate::error;
+use crate::srcgen::{Formatter, Match};
+use crate::unique_table::UniqueSeqTable;
+
+pub(crate) enum ParentGroup {
+    None,
+    Shared,
+}
+
+/// Emits the constructor of the Flags structure.
+fn gen_constructor(group: &SettingGroup, parent: ParentGroup, fmt: &mut Formatter) {
+    let args = match parent {
+        ParentGroup::None => "builder: Builder",
+        ParentGroup::Shared => "shared: &settings::Flags, builder: Builder",
+    };
+    fmtln!(fmt, "impl Flags {");
+    fmt.indent(|fmt| {
+        fmt.doc_comment(format!("Create flags {} settings group.", group.name));
+        fmtln!(fmt, "#[allow(unused_variables)]");
+        fmtln!(fmt, "pub fn new({}) -> Self {{", args);
+        fmt.indent(|fmt| {
+            fmtln!(fmt, "let bvec = builder.state_for(\"{}\");", group.name);
+            fmtln!(
+                fmt,
+                "let mut {} = Self {{ bytes: [0; {}] }};",
+                group.name,
+                group.byte_size()
+            );
+            fmtln!(
+                fmt,
+                "debug_assert_eq!(bvec.len(), {});",
+                group.settings_size
+            );
+            fmtln!(
+                fmt,
+                "{}.bytes[0..{}].copy_from_slice(&bvec);",
+                group.name,
+                group.settings_size
+            );
+
+            // Now compute the predicates.
+            for p in &group.predicates {
+                fmt.comment(format!("Precompute #{}.", p.number));
+                fmtln!(fmt, "if {} {{", p.render(group));
+                fmt.indent(|fmt| {
+                    fmtln!(
+                        fmt,
+                        "{}.bytes[{}] |= 1 << {};",
+                        group.name,
+                        group.bool_start_byte_offset + p.number / 8,
+                        p.number % 8
+                    );
+                });
+                fmtln!(fmt, "}");
+            }
+
+            fmtln!(fmt, group.name);
+        });
+        fmtln!(fmt, "}");
+    });
+    fmtln!(fmt, "}");
+}
+
+/// Emit Display and FromStr implementations for enum settings.
+fn gen_to_and_from_str(name: &str, values: &[&'static str], fmt: &mut Formatter) {
+    fmtln!(fmt, "impl fmt::Display for {} {{", name);
+    fmt.indent(|fmt| {
+        fmtln!(
+            fmt,
+            "fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {"
+        );
+        fmt.indent(|fmt| {
+            fmtln!(fmt, "f.write_str(match *self {");
+            fmt.indent(|fmt| {
+                for v in values.iter() {
+                    fmtln!(fmt, "Self::{} => \"{}\",", camel_case(v), v);
+                }
+            });
+            fmtln!(fmt, "})");
+        });
+        fmtln!(fmt, "}");
+    });
+    fmtln!(fmt, "}");
+
+    fmtln!(fmt, "impl str::FromStr for {} {{", name);
+    fmt.indent(|fmt| {
+        fmtln!(fmt, "type Err = ();");
+        fmtln!(fmt, "fn from_str(s: &str) -> Result<Self, Self::Err> {");
+        fmt.indent(|fmt| {
+            fmtln!(fmt, "match s {");
+            fmt.indent(|fmt| {
+                for v in values.iter() {
+                    fmtln!(fmt, "\"{}\" => Ok(Self::{}),", v, camel_case(v));
+                }
+                fmtln!(fmt, "_ => Err(()),");
+            });
+            fmtln!(fmt, "}");
+        });
+        fmtln!(fmt, "}");
+    });
+    fmtln!(fmt, "}");
+}
+
+/// Emit real enum for the Enum settings.
+fn gen_enum_types(group: &SettingGroup, fmt: &mut Formatter) {
+    for setting in group.settings.iter() {
+        let values = match setting.specific {
+            SpecificSetting::Bool(_) | SpecificSetting::Num(_) => continue,
+            SpecificSetting::Enum(ref values) => values,
+        };
+        let name = camel_case(setting.name);
+
+        fmt.doc_comment(format!("Values for `{}.{}`.", group.name, setting.name));
+        fmtln!(fmt, "#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]");
+        fmtln!(fmt, "pub enum {} {{", name);
+        fmt.indent(|fmt| {
+            for v in values.iter() {
+                fmt.doc_comment(format!("`{}`.", v));
+                fmtln!(fmt, "{},", camel_case(v));
+            }
+        });
+        fmtln!(fmt, "}");
+
+        gen_to_and_from_str(&name, values, fmt);
+    }
+}
+
+/// Emit a getter function for `setting`.
+fn gen_getter(setting: &Setting, fmt: &mut Formatter) {
+    fmt.doc_comment(setting.comment);
+    match setting.specific {
+        SpecificSetting::Bool(BoolSetting {
+            predicate_number, ..
+        }) => {
+            fmtln!(fmt, "pub fn {}(&self) -> bool {{", setting.name);
+            fmt.indent(|fmt| {
+                fmtln!(fmt, "self.numbered_predicate({})", predicate_number);
+            });
+            fmtln!(fmt, "}");
+        }
+        SpecificSetting::Enum(ref values) => {
+            let ty = camel_case(setting.name);
+            fmtln!(fmt, "pub fn {}(&self) -> {} {{", setting.name, ty);
+            fmt.indent(|fmt| {
+                let mut m = Match::new(format!("self.bytes[{}]", setting.byte_offset));
+                for (i, v) in values.iter().enumerate() {
+                    m.arm_no_fields(format!("{}", i), format!("{}::{}", ty, camel_case(v)));
+                }
+                m.arm_no_fields("_", "panic!(\"Invalid enum value\")");
+                fmt.add_match(m);
+            });
+            fmtln!(fmt, "}");
+        }
+        SpecificSetting::Num(_) => {
+            fmtln!(fmt, "pub fn {}(&self) -> u8 {{", setting.name);
+            fmt.indent(|fmt| {
+                fmtln!(fmt, "self.bytes[{}]", setting.byte_offset);
+            });
+            fmtln!(fmt, "}");
+        }
+    }
+}
+
+fn gen_pred_getter(predicate: &Predicate, group: &SettingGroup, fmt: &mut Formatter) {
+    fmt.doc_comment(format!("Computed predicate `{}`.", predicate.render(group)));
+    fmtln!(fmt, "pub fn {}(&self) -> bool {{", predicate.name);
+    fmt.indent(|fmt| {
+        fmtln!(fmt, "self.numbered_predicate({})", predicate.number);
+    });
+    fmtln!(fmt, "}");
+}
+
+/// Emits getters for each setting value.
+fn gen_getters(group: &SettingGroup, fmt: &mut Formatter) {
+    fmt.doc_comment("User-defined settings.");
+    fmtln!(fmt, "#[allow(dead_code)]");
+    fmtln!(fmt, "impl Flags {");
+    fmt.indent(|fmt| {
+        fmt.doc_comment("Get a view of the boolean predicates.");
+        fmtln!(
+            fmt,
+            "pub fn predicate_view(&self) -> crate::settings::PredicateView {"
+        );
+        fmt.indent(|fmt| {
+            fmtln!(
+                fmt,
+                "crate::settings::PredicateView::new(&self.bytes[{}..])",
+                group.bool_start_byte_offset
+            );
+        });
+        fmtln!(fmt, "}");
+
+        if !group.settings.is_empty() {
+            fmt.doc_comment("Dynamic numbered predicate getter.");
+            fmtln!(fmt, "fn numbered_predicate(&self, p: usize) -> bool {");
+            fmt.indent(|fmt| {
+                fmtln!(
+                    fmt,
+                    "self.bytes[{} + p / 8] & (1 << (p % 8)) != 0",
+                    group.bool_start_byte_offset
+                );
+            });
+            fmtln!(fmt, "}");
+        }
+
+        for setting in &group.settings {
+            gen_getter(&setting, fmt);
+        }
+        for predicate in &group.predicates {
+            gen_pred_getter(&predicate, &group, fmt);
+        }
+    });
+    fmtln!(fmt, "}");
+}
+
+#[derive(Hash, PartialEq, Eq)]
+enum SettingOrPreset<'a> {
+    Setting(&'a Setting),
+    Preset(&'a Preset),
+}
+
+impl<'a> SettingOrPreset<'a> {
+    fn name(&self) -> &str {
+        match *self {
+            SettingOrPreset::Setting(s) => s.name,
+            SettingOrPreset::Preset(p) => p.name,
+        }
+    }
+}
+
+/// Emits DESCRIPTORS, ENUMERATORS, HASH_TABLE and PRESETS.
+fn gen_descriptors(group: &SettingGroup, fmt: &mut Formatter) {
+    let mut enum_table = UniqueSeqTable::new();
+
+    let mut descriptor_index_map: HashMap<SettingOrPreset, usize> = HashMap::new();
+
+    // Generate descriptors.
+    fmtln!(
+        fmt,
+        "static DESCRIPTORS: [detail::Descriptor; {}] = [",
+        group.settings.len() + group.presets.len()
+    );
+    fmt.indent(|fmt| {
+        for (idx, setting) in group.settings.iter().enumerate() {
+            fmtln!(fmt, "detail::Descriptor {");
+            fmt.indent(|fmt| {
+                fmtln!(fmt, "name: \"{}\",", setting.name);
+                fmtln!(fmt, "offset: {},", setting.byte_offset);
+                match setting.specific {
+                    SpecificSetting::Bool(BoolSetting { bit_offset, .. }) => {
+                        fmtln!(
+                            fmt,
+                            "detail: detail::Detail::Bool {{ bit: {} }},",
+                            bit_offset
+                        );
+                    }
+                    SpecificSetting::Enum(ref values) => {
+                        let offset = enum_table.add(values);
+                        fmtln!(
+                            fmt,
+                            "detail: detail::Detail::Enum {{ last: {}, enumerators: {} }},",
+                            values.len() - 1,
+                            offset
+                        );
+                    }
+                    SpecificSetting::Num(_) => {
+                        fmtln!(fmt, "detail: detail::Detail::Num,");
+                    }
+                }
+
+                descriptor_index_map.insert(SettingOrPreset::Setting(setting), idx);
+            });
+            fmtln!(fmt, "},");
+        }
+
+        for (idx, preset) in group.presets.iter().enumerate() {
+            fmtln!(fmt, "detail::Descriptor {");
+            fmt.indent(|fmt| {
+                fmtln!(fmt, "name: \"{}\",", preset.name);
+                fmtln!(fmt, "offset: {},", (idx as u8) * group.settings_size);
+                fmtln!(fmt, "detail: detail::Detail::Preset,");
+            });
+            fmtln!(fmt, "},");
+
+            let whole_idx = idx + group.settings.len();
+            descriptor_index_map.insert(SettingOrPreset::Preset(preset), whole_idx);
+        }
+    });
+    fmtln!(fmt, "];");
+
+    // Generate enumerators.
+    fmtln!(fmt, "static ENUMERATORS: [&str; {}] = [", enum_table.len());
+    fmt.indent(|fmt| {
+        for enum_val in enum_table.iter() {
+            fmtln!(fmt, "\"{}\",", enum_val);
+        }
+    });
+    fmtln!(fmt, "];");
+
+    // Generate hash table.
+    let mut hash_entries: Vec<SettingOrPreset> = Vec::new();
+    hash_entries.extend(group.settings.iter().map(|x| SettingOrPreset::Setting(x)));
+    hash_entries.extend(group.presets.iter().map(|x| SettingOrPreset::Preset(x)));
+
+    let hash_table = generate_table(hash_entries.iter(), hash_entries.len(), |entry| {
+        simple_hash(entry.name())
+    });
+    fmtln!(fmt, "static HASH_TABLE: [u16; {}] = [", hash_table.len());
+    fmt.indent(|fmt| {
+        for h in &hash_table {
+            match *h {
+                Some(setting_or_preset) => fmtln!(
+                    fmt,
+                    "{},",
+                    &descriptor_index_map
+                        .get(setting_or_preset)
+                        .unwrap()
+                        .to_string()
+                ),
+                None => fmtln!(fmt, "0xffff,"),
+            }
+        }
+    });
+    fmtln!(fmt, "];");
+
+    // Generate presets.
+    fmtln!(
+        fmt,
+        "static PRESETS: [(u8, u8); {}] = [",
+        group.presets.len() * (group.settings_size as usize)
+    );
+    fmt.indent(|fmt| {
+        for preset in &group.presets {
+            fmt.comment(preset.name);
+            for (mask, value) in preset.layout(&group) {
+                fmtln!(fmt, "(0b{:08b}, 0b{:08b}),", mask, value);
+            }
+        }
+    });
+    fmtln!(fmt, "];");
+}
+
+fn gen_template(group: &SettingGroup, fmt: &mut Formatter) {
+    let mut default_bytes: Vec<u8> = vec![0; group.settings_size as usize];
+    for setting in &group.settings {
+        *default_bytes.get_mut(setting.byte_offset as usize).unwrap() |= setting.default_byte();
+    }
+
+    let default_bytes: Vec<String> = default_bytes
+        .iter()
+        .map(|x| format!("{:#04x}", x))
+        .collect();
+    let default_bytes_str = default_bytes.join(", ");
+
+    fmtln!(
+        fmt,
+        "static TEMPLATE: detail::Template = detail::Template {"
+    );
+    fmt.indent(|fmt| {
+        fmtln!(fmt, "name: \"{}\",", group.name);
+        fmtln!(fmt, "descriptors: &DESCRIPTORS,");
+        fmtln!(fmt, "enumerators: &ENUMERATORS,");
+        fmtln!(fmt, "hash_table: &HASH_TABLE,");
+        fmtln!(fmt, "defaults: &[{}],", default_bytes_str);
+        fmtln!(fmt, "presets: &PRESETS,");
+    });
+    fmtln!(fmt, "};");
+
+    fmt.doc_comment(format!(
+        "Create a `settings::Builder` for the {} settings group.",
+        group.name
+    ));
+    fmtln!(fmt, "pub fn builder() -> Builder {");
+    fmt.indent(|fmt| {
+        fmtln!(fmt, "Builder::new(&TEMPLATE)");
+    });
+    fmtln!(fmt, "}");
+}
+
+fn gen_display(group: &SettingGroup, fmt: &mut Formatter) {
+    fmtln!(fmt, "impl fmt::Display for Flags {");
+    fmt.indent(|fmt| {
+        fmtln!(
+            fmt,
+            "fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {"
+        );
+        fmt.indent(|fmt| {
+            fmtln!(fmt, "writeln!(f, \"[{}]\")?;", group.name);
+            fmtln!(fmt, "for d in &DESCRIPTORS {");
+            fmt.indent(|fmt| {
+                fmtln!(fmt, "if !d.detail.is_preset() {");
+                fmt.indent(|fmt| {
+                    fmtln!(fmt, "write!(f, \"{} = \", d.name)?;");
+                    fmtln!(
+                        fmt,
+                        "TEMPLATE.format_toml_value(d.detail, self.bytes[d.offset as usize], f)?;",
+                    );
+                    fmtln!(fmt, "writeln!(f)?;");
+                });
+                fmtln!(fmt, "}");
+            });
+            fmtln!(fmt, "}");
+            fmtln!(fmt, "Ok(())");
+        });
+        fmtln!(fmt, "}")
+    });
+    fmtln!(fmt, "}");
+}
+
+fn gen_group(group: &SettingGroup, parent: ParentGroup, fmt: &mut Formatter) {
+    // Generate struct.
+    fmtln!(fmt, "#[derive(Clone)]");
+    fmt.doc_comment(format!("Flags group `{}`.", group.name));
+    fmtln!(fmt, "pub struct Flags {");
+    fmt.indent(|fmt| {
+        fmtln!(fmt, "bytes: [u8; {}],", group.byte_size());
+    });
+    fmtln!(fmt, "}");
+
+    gen_constructor(group, parent, fmt);
+    gen_enum_types(group, fmt);
+    gen_getters(group, fmt);
+    gen_descriptors(group, fmt);
+    gen_template(group, fmt);
+    gen_display(group, fmt);
+}
+
+pub(crate) fn generate(
+    settings: &SettingGroup,
+    parent_group: ParentGroup,
+    filename: &str,
+    out_dir: &str,
+) -> Result<(), error::Error> {
+    let mut fmt = Formatter::new();
+    gen_group(&settings, parent_group, &mut fmt);
+    fmt.update_file(filename, out_dir)?;
+    Ok(())
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/gen_types.rs b/third_party/rust/cranelift-codegen-meta/src/gen_types.rs
new file mode 100644
index 0000000000..6ced212b8d
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/gen_types.rs
@@ -0,0 +1,76 @@
+//! Generate sources with type info.
+//!
+//! This generates a `types.rs` file which is included in
+//! `cranelift-codegen/ir/types.rs`. The file provides constant definitions for the
+//! most commonly used types, including all of the scalar types.
+//!
+//! This ensures that the metaprogram and the generated program see the same
+//! type numbering.
+
+use crate::cdsl::types as cdsl_types;
+use crate::error;
+use crate::srcgen;
+
+/// Emit a constant definition of a single value type.
+fn emit_type(ty: &cdsl_types::ValueType, fmt: &mut srcgen::Formatter) -> Result<(), error::Error> {
+    let name = ty.to_string().to_uppercase();
+    let number = ty.number().ok_or_else(|| {
+        error::Error::with_msg(format!(
+            "Could not emit type `{}` which has no number.",
+            name
+        ))
+    })?;
+
+    fmt.doc_comment(&ty.doc());
+    fmtln!(fmt, "pub const {}: Type = Type({:#x});\n", name, number);
+
+    Ok(())
+}
+
+/// Emit definition for all vector types with `bits` total size.
+fn emit_vectors(bits: u64, fmt: &mut srcgen::Formatter) -> Result<(), error::Error> {
+    let vec_size: u64 = bits / 8;
+    for vec in cdsl_types::ValueType::all_lane_types()
+        .map(|ty| (ty, cdsl_types::ValueType::from(ty).membytes()))
+        .filter(|&(_, lane_size)| lane_size != 0 && lane_size < vec_size)
+        .map(|(ty, lane_size)| (ty, vec_size / lane_size))
+        .map(|(ty, lanes)| cdsl_types::VectorType::new(ty, lanes))
+    {
+        emit_type(&cdsl_types::ValueType::from(vec), fmt)?;
+    }
+
+    Ok(())
+}
+
+/// Emit types using the given formatter object.
+fn emit_types(fmt: &mut srcgen::Formatter) -> Result<(), error::Error> {
+    // Emit all of the special types, such as types for CPU flags.
+    for spec in cdsl_types::ValueType::all_special_types().map(cdsl_types::ValueType::from) {
+        emit_type(&spec, fmt)?;
+    }
+
+    // Emit all of the lane types, such integers, floats, and booleans.
+    for ty in cdsl_types::ValueType::all_lane_types().map(cdsl_types::ValueType::from) {
+        emit_type(&ty, fmt)?;
+    }
+
+    // Emit all reference types.
+    for ty in cdsl_types::ValueType::all_reference_types().map(cdsl_types::ValueType::from) {
+        emit_type(&ty, fmt)?;
+    }
+
+    // Emit vector definitions for common SIMD sizes.
+    for vec_size in &[64_u64, 128, 256, 512] {
+        emit_vectors(*vec_size, fmt)?;
+    }
+
+    Ok(())
+}
+
+/// Generate the types file.
+pub(crate) fn generate(filename: &str, out_dir: &str) -> Result<(), error::Error> {
+    let mut fmt = srcgen::Formatter::new();
+    emit_types(&mut fmt)?;
+    fmt.update_file(filename, out_dir)?;
+    Ok(())
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/isa/arm32/mod.rs b/third_party/rust/cranelift-codegen-meta/src/isa/arm32/mod.rs
new file mode 100644
index 0000000000..f699ece8eb
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/isa/arm32/mod.rs
@@ -0,0 +1,88 @@
+use crate::cdsl::cpu_modes::CpuMode;
+use crate::cdsl::instructions::{InstructionGroupBuilder, InstructionPredicateMap};
+use crate::cdsl::isa::TargetIsa;
+use crate::cdsl::recipes::Recipes;
+use crate::cdsl::regs::{IsaRegs, IsaRegsBuilder, RegBankBuilder, RegClassBuilder};
+use crate::cdsl::settings::{SettingGroup, SettingGroupBuilder};
+
+use crate::shared::Definitions as SharedDefinitions;
+
+fn define_settings(_shared: &SettingGroup) -> SettingGroup {
+    let setting = SettingGroupBuilder::new("arm32");
+    setting.build()
+}
+
+fn define_regs() -> IsaRegs {
+    let mut regs = IsaRegsBuilder::new();
+
+    let builder = RegBankBuilder::new("FloatRegs", "s")
+        .units(64)
+        .track_pressure(true);
+    let float_regs = regs.add_bank(builder);
+
+    let builder = RegBankBuilder::new("IntRegs", "r")
+        .units(16)
+        .track_pressure(true);
+    let int_regs = regs.add_bank(builder);
+
+    let builder = RegBankBuilder::new("FlagRegs", "")
+        .units(1)
+        .names(vec!["nzcv"])
+        .track_pressure(false);
+    let flag_reg = regs.add_bank(builder);
+
+    let builder = RegClassBuilder::new_toplevel("S", float_regs).count(32);
+    regs.add_class(builder);
+
+    let builder = RegClassBuilder::new_toplevel("D", float_regs).width(2);
+    regs.add_class(builder);
+
+    let builder = RegClassBuilder::new_toplevel("Q", float_regs).width(4);
+    regs.add_class(builder);
+
+    let builder = RegClassBuilder::new_toplevel("GPR", int_regs);
+    regs.add_class(builder);
+
+    let builder = RegClassBuilder::new_toplevel("FLAG", flag_reg);
+    regs.add_class(builder);
+
+    regs.build()
+}
+
+pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa {
+    let settings = define_settings(&shared_defs.settings);
+    let regs = define_regs();
+
+    let inst_group = InstructionGroupBuilder::new(&mut shared_defs.all_instructions).build();
+
+    // CPU modes for 32-bit ARM and Thumb2.
+    let mut a32 = CpuMode::new("A32");
+    let mut t32 = CpuMode::new("T32");
+
+    // TODO refine these.
+    let narrow_flags = shared_defs.transform_groups.by_name("narrow_flags");
+    a32.legalize_default(narrow_flags);
+    t32.legalize_default(narrow_flags);
+
+    // Make sure that the expand code is used, thus generated.
+    let expand = shared_defs.transform_groups.by_name("expand");
+    a32.legalize_monomorphic(expand);
+
+    let cpu_modes = vec![a32, t32];
+
+    // TODO implement arm32 recipes.
+    let recipes = Recipes::new();
+
+    // TODO implement arm32 encodings and predicates.
+    let encodings_predicates = InstructionPredicateMap::new();
+
+    TargetIsa::new(
+        "arm32",
+        inst_group,
+        settings,
+        regs,
+        recipes,
+        cpu_modes,
+        encodings_predicates,
+    )
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/isa/arm64/mod.rs b/third_party/rust/cranelift-codegen-meta/src/isa/arm64/mod.rs
new file mode 100644
index 0000000000..5d8bc76fc4
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/isa/arm64/mod.rs
@@ -0,0 +1,79 @@
+use crate::cdsl::cpu_modes::CpuMode;
+use crate::cdsl::instructions::{InstructionGroupBuilder, InstructionPredicateMap};
+use crate::cdsl::isa::TargetIsa;
+use crate::cdsl::recipes::Recipes;
+use crate::cdsl::regs::{IsaRegs, IsaRegsBuilder, RegBankBuilder, RegClassBuilder};
+use crate::cdsl::settings::{SettingGroup, SettingGroupBuilder};
+
+use crate::shared::Definitions as SharedDefinitions;
+
+fn define_settings(_shared: &SettingGroup) -> SettingGroup {
+    let setting = SettingGroupBuilder::new("arm64");
+    setting.build()
+}
+
+fn define_registers() -> IsaRegs {
+    let mut regs = IsaRegsBuilder::new();
+
+    // The `x31` regunit serves as the stack pointer / zero register depending on context. We
+    // reserve it and don't model the difference.
+    let builder = RegBankBuilder::new("IntRegs", "x")
+        .units(32)
+        .track_pressure(true);
+    let int_regs = regs.add_bank(builder);
+
+    let builder = RegBankBuilder::new("FloatRegs", "v")
+        .units(32)
+        .track_pressure(true);
+    let float_regs = regs.add_bank(builder);
+
+    let builder = RegBankBuilder::new("FlagRegs", "")
+        .units(1)
+        .names(vec!["nzcv"])
+        .track_pressure(false);
+    let flag_reg = regs.add_bank(builder);
+
+    let builder = RegClassBuilder::new_toplevel("GPR", int_regs);
+    regs.add_class(builder);
+
+    let builder = RegClassBuilder::new_toplevel("FPR", float_regs);
+    regs.add_class(builder);
+
+    let builder = RegClassBuilder::new_toplevel("FLAG", flag_reg);
+    regs.add_class(builder);
+
+    regs.build()
+}
+
+pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa {
+    let settings = define_settings(&shared_defs.settings);
+    let regs = define_registers();
+
+    let inst_group = InstructionGroupBuilder::new(&mut shared_defs.all_instructions).build();
+
+    let mut a64 = CpuMode::new("A64");
+
+    // TODO refine these.
+    let expand_flags = shared_defs.transform_groups.by_name("expand_flags");
+    let narrow_flags = shared_defs.transform_groups.by_name("narrow_flags");
+    a64.legalize_monomorphic(expand_flags);
+    a64.legalize_default(narrow_flags);
+
+    let cpu_modes = vec![a64];
+
+    // TODO implement arm64 recipes.
+    let recipes = Recipes::new();
+
+    // TODO implement arm64 encodings and predicates.
+    let encodings_predicates = InstructionPredicateMap::new();
+
+    TargetIsa::new(
+        "arm64",
+        inst_group,
+        settings,
+        regs,
+        recipes,
+        cpu_modes,
+        encodings_predicates,
+    )
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/isa/mod.rs b/third_party/rust/cranelift-codegen-meta/src/isa/mod.rs
new file mode 100644
index 0000000000..ed8db85f0d
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/isa/mod.rs
@@ -0,0 +1,67 @@
+//! Define supported ISAs; includes ISA-specific instructions, encodings, registers, settings, etc.
+use crate::cdsl::isa::TargetIsa;
+use crate::shared::Definitions as SharedDefinitions;
+use std::fmt;
+
+mod arm32;
+mod arm64;
+mod riscv;
+pub(crate) mod x86;
+
+/// Represents known ISA target.
+#[derive(PartialEq, Copy, Clone)]
+pub enum Isa {
+    Riscv,
+    X86,
+    Arm32,
+    Arm64,
+}
+
+impl Isa {
+    /// Creates isa target using name.
+    pub fn from_name(name: &str) -> Option<Self> {
+        Isa::all()
+            .iter()
+            .cloned()
+            .find(|isa| isa.to_string() == name)
+    }
+
+    /// Creates isa target from arch.
+    pub fn from_arch(arch: &str) -> Option<Self> {
+        match arch {
+            "riscv" => Some(Isa::Riscv),
+            "aarch64" => Some(Isa::Arm64),
+            x if ["x86_64", "i386", "i586", "i686"].contains(&x) => Some(Isa::X86),
+            x if x.starts_with("arm") || arch.starts_with("thumb") => Some(Isa::Arm32),
+            _ => None,
+        }
+    }
+
+    /// Returns all supported isa targets.
+    pub fn all() -> &'static [Isa] {
+        &[Isa::Riscv, Isa::X86, Isa::Arm32, Isa::Arm64]
+    }
+}
+
+impl fmt::Display for Isa {
+    // These names should be kept in sync with the crate features.
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match *self {
+            Isa::Riscv => write!(f, "riscv"),
+            Isa::X86 => write!(f, "x86"),
+            Isa::Arm32 => write!(f, "arm32"),
+            Isa::Arm64 => write!(f, "arm64"),
+        }
+    }
+}
+
+pub(crate) fn define(isas: &[Isa], shared_defs: &mut SharedDefinitions) -> Vec<TargetIsa> {
+    isas.iter()
+        .map(|isa| match isa {
+            Isa::Riscv => riscv::define(shared_defs),
+            Isa::X86 => x86::define(shared_defs),
+            Isa::Arm32 => arm32::define(shared_defs),
+            Isa::Arm64 => arm64::define(shared_defs),
+        })
+        .collect()
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/isa/riscv/encodings.rs b/third_party/rust/cranelift-codegen-meta/src/isa/riscv/encodings.rs
new file mode 100644
index 0000000000..c255ddb483
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/isa/riscv/encodings.rs
@@ -0,0 +1,431 @@
+use crate::cdsl::ast::{Apply, Expr, Literal, VarPool};
+use crate::cdsl::encodings::{Encoding, EncodingBuilder};
+use crate::cdsl::instructions::{
+    Bindable, BoundInstruction, InstSpec, InstructionPredicateNode, InstructionPredicateRegistry,
+};
+use crate::cdsl::recipes::{EncodingRecipeNumber, Recipes};
+use crate::cdsl::settings::SettingGroup;
+
+use crate::shared::types::Bool::B1;
+use crate::shared::types::Float::{F32, F64};
+use crate::shared::types::Int::{I16, I32, I64, I8};
+use crate::shared::types::Reference::{R32, R64};
+use crate::shared::Definitions as SharedDefinitions;
+
+use super::recipes::RecipeGroup;
+
+pub(crate) struct PerCpuModeEncodings<'defs> {
+    pub inst_pred_reg: InstructionPredicateRegistry,
+    pub enc32: Vec<Encoding>,
+    pub enc64: Vec<Encoding>,
+    recipes: &'defs Recipes,
+}
+
+impl<'defs> PerCpuModeEncodings<'defs> {
+    fn new(recipes: &'defs Recipes) -> Self {
+        Self {
+            inst_pred_reg: InstructionPredicateRegistry::new(),
+            enc32: Vec::new(),
+            enc64: Vec::new(),
+            recipes,
+        }
+    }
+    fn enc(
+        &self,
+        inst: impl Into<InstSpec>,
+        recipe: EncodingRecipeNumber,
+        bits: u16,
+    ) -> EncodingBuilder {
+        EncodingBuilder::new(inst.into(), recipe, bits)
+    }
+    fn add32(&mut self, encoding: EncodingBuilder) {
+        self.enc32
+            .push(encoding.build(self.recipes, &mut self.inst_pred_reg));
+    }
+    fn add64(&mut self, encoding: EncodingBuilder) {
+        self.enc64
+            .push(encoding.build(self.recipes, &mut self.inst_pred_reg));
+    }
+}
+
+// The low 7 bits of a RISC-V instruction is the base opcode. All 32-bit instructions have 11 as
+// the two low bits, with bits 6:2 determining the base opcode.
+//
+// Encbits for the 32-bit recipes are opcode[6:2] | (funct3 << 5) | ...
+// The functions below encode the encbits.
+
+fn load_bits(funct3: u16) -> u16 {
+    assert!(funct3 <= 0b111);
+    funct3 << 5
+}
+
+fn store_bits(funct3: u16) -> u16 {
+    assert!(funct3 <= 0b111);
+    0b01000 | (funct3 << 5)
+}
+
+fn branch_bits(funct3: u16) -> u16 {
+    assert!(funct3 <= 0b111);
+    0b11000 | (funct3 << 5)
+}
+
+fn jalr_bits() -> u16 {
+    // This was previously accepting an argument funct3 of 3 bits and used the following formula:
+    //0b11001 | (funct3 << 5)
+    0b11001
+}
+
+fn jal_bits() -> u16 {
+    0b11011
+}
+
+fn opimm_bits(funct3: u16, funct7: u16) -> u16 {
+    assert!(funct3 <= 0b111);
+    0b00100 | (funct3 << 5) | (funct7 << 8)
+}
+
+fn opimm32_bits(funct3: u16, funct7: u16) -> u16 {
+    assert!(funct3 <= 0b111);
+    0b00110 | (funct3 << 5) | (funct7 << 8)
+}
+
+fn op_bits(funct3: u16, funct7: u16) -> u16 {
+    assert!(funct3 <= 0b111);
+    assert!(funct7 <= 0b111_1111);
+    0b01100 | (funct3 << 5) | (funct7 << 8)
+}
+
+fn op32_bits(funct3: u16, funct7: u16) -> u16 {
+    assert!(funct3 <= 0b111);
+    assert!(funct7 <= 0b111_1111);
+    0b01110 | (funct3 << 5) | (funct7 << 8)
+}
+
+fn lui_bits() -> u16 {
+    0b01101
+}
+
+pub(crate) fn define<'defs>(
+    shared_defs: &'defs SharedDefinitions,
+    isa_settings: &SettingGroup,
+    recipes: &'defs RecipeGroup,
+) -> PerCpuModeEncodings<'defs> {
+    // Instructions shorthands.
+    let shared = &shared_defs.instructions;
+
+    let band = shared.by_name("band");
+    let band_imm = shared.by_name("band_imm");
+    let bor = shared.by_name("bor");
+    let bor_imm = shared.by_name("bor_imm");
+    let br_icmp = shared.by_name("br_icmp");
+    let brz = shared.by_name("brz");
+    let brnz = shared.by_name("brnz");
+    let bxor = shared.by_name("bxor");
+    let bxor_imm = shared.by_name("bxor_imm");
+    let call = shared.by_name("call");
+    let call_indirect = shared.by_name("call_indirect");
+    let copy = shared.by_name("copy");
+    let copy_nop = shared.by_name("copy_nop");
+    let copy_to_ssa = shared.by_name("copy_to_ssa");
+    let fill = shared.by_name("fill");
+    let fill_nop = shared.by_name("fill_nop");
+    let iadd = shared.by_name("iadd");
+    let iadd_imm = shared.by_name("iadd_imm");
+    let iconst = shared.by_name("iconst");
+    let icmp = shared.by_name("icmp");
+    let icmp_imm = shared.by_name("icmp_imm");
+    let imul = shared.by_name("imul");
+    let ishl = shared.by_name("ishl");
+    let ishl_imm = shared.by_name("ishl_imm");
+    let isub = shared.by_name("isub");
+    let jump = shared.by_name("jump");
+    let regmove = shared.by_name("regmove");
+    let spill = shared.by_name("spill");
+    let sshr = shared.by_name("sshr");
+    let sshr_imm = shared.by_name("sshr_imm");
+    let ushr = shared.by_name("ushr");
+    let ushr_imm = shared.by_name("ushr_imm");
+    let return_ = shared.by_name("return");
+
+    // Recipes shorthands, prefixed with r_.
+    let r_copytossa = recipes.by_name("copytossa");
+    let r_fillnull = recipes.by_name("fillnull");
+    let r_icall = recipes.by_name("Icall");
+    let r_icopy = recipes.by_name("Icopy");
+    let r_ii = recipes.by_name("Ii");
+    let r_iicmp = recipes.by_name("Iicmp");
+    let r_iret = recipes.by_name("Iret");
+    let r_irmov = recipes.by_name("Irmov");
+    let r_iz = recipes.by_name("Iz");
+    let r_gp_sp = recipes.by_name("GPsp");
+    let r_gp_fi = recipes.by_name("GPfi");
+    let r_r = recipes.by_name("R");
+    let r_ricmp = recipes.by_name("Ricmp");
+    let r_rshamt = recipes.by_name("Rshamt");
+    let r_sb = recipes.by_name("SB");
+    let r_sb_zero = recipes.by_name("SBzero");
+    let r_stacknull = recipes.by_name("stacknull");
+    let r_u = recipes.by_name("U");
+    let r_uj = recipes.by_name("UJ");
+    let r_uj_call = recipes.by_name("UJcall");
+
+    // Predicates shorthands.
+    let use_m = isa_settings.predicate_by_name("use_m");
+
+    // Definitions.
+    let mut e = PerCpuModeEncodings::new(&recipes.recipes);
+
+    // Basic arithmetic binary instructions are encoded in an R-type instruction.
+    for &(inst, inst_imm, f3, f7) in &[
+        (iadd, Some(iadd_imm), 0b000, 0b000_0000),
+        (isub, None, 0b000, 0b010_0000),
+        (bxor, Some(bxor_imm), 0b100, 0b000_0000),
+        (bor, Some(bor_imm), 0b110, 0b000_0000),
+        (band, Some(band_imm), 0b111, 0b000_0000),
+    ] {
+        e.add32(e.enc(inst.bind(I32), r_r, op_bits(f3, f7)));
+        e.add64(e.enc(inst.bind(I64), r_r, op_bits(f3, f7)));
+
+        // Immediate versions for add/xor/or/and.
+        if let Some(inst_imm) = inst_imm {
+            e.add32(e.enc(inst_imm.bind(I32), r_ii, opimm_bits(f3, 0)));
+            e.add64(e.enc(inst_imm.bind(I64), r_ii, opimm_bits(f3, 0)));
+        }
+    }
+
+    // 32-bit ops in RV64.
+    e.add64(e.enc(iadd.bind(I32), r_r, op32_bits(0b000, 0b000_0000)));
+    e.add64(e.enc(isub.bind(I32), r_r, op32_bits(0b000, 0b010_0000)));
+    // There are no andiw/oriw/xoriw variations.
+    e.add64(e.enc(iadd_imm.bind(I32), r_ii, opimm32_bits(0b000, 0)));
+
+    // Use iadd_imm with %x0 to materialize constants.
+    e.add32(e.enc(iconst.bind(I32), r_iz, opimm_bits(0b0, 0)));
+    e.add64(e.enc(iconst.bind(I32), r_iz, opimm_bits(0b0, 0)));
+    e.add64(e.enc(iconst.bind(I64), r_iz, opimm_bits(0b0, 0)));
+
+    // Dynamic shifts have the same masking semantics as the clif base instructions.
+    for &(inst, inst_imm, f3, f7) in &[
+        (ishl, ishl_imm, 0b1, 0b0),
+        (ushr, ushr_imm, 0b101, 0b0),
+        (sshr, sshr_imm, 0b101, 0b10_0000),
+    ] {
+        e.add32(e.enc(inst.bind(I32).bind(I32), r_r, op_bits(f3, f7)));
+        e.add64(e.enc(inst.bind(I64).bind(I64), r_r, op_bits(f3, f7)));
+        e.add64(e.enc(inst.bind(I32).bind(I32), r_r, op32_bits(f3, f7)));
+        // Allow i32 shift amounts in 64-bit shifts.
+        e.add64(e.enc(inst.bind(I64).bind(I32), r_r, op_bits(f3, f7)));
+        e.add64(e.enc(inst.bind(I32).bind(I64), r_r, op32_bits(f3, f7)));
+
+        // Immediate shifts.
+        e.add32(e.enc(inst_imm.bind(I32), r_rshamt, opimm_bits(f3, f7)));
+        e.add64(e.enc(inst_imm.bind(I64), r_rshamt, opimm_bits(f3, f7)));
+        e.add64(e.enc(inst_imm.bind(I32), r_rshamt, opimm32_bits(f3, f7)));
+    }
+
+    // Signed and unsigned integer 'less than'. There are no 'w' variants for comparing 32-bit
+    // numbers in RV64.
+    {
+        let mut var_pool = VarPool::new();
+
+        // Helper that creates an instruction predicate for an instruction in the icmp family.
+        let mut icmp_instp = |bound_inst: &BoundInstruction,
+                              intcc_field: &'static str|
+         -> InstructionPredicateNode {
+            let x = var_pool.create("x");
+            let y = var_pool.create("y");
+            let cc = Literal::enumerator_for(&shared_defs.imm.intcc, intcc_field);
+            Apply::new(
+                bound_inst.clone().into(),
+                vec![Expr::Literal(cc), Expr::Var(x), Expr::Var(y)],
+            )
+            .inst_predicate(&var_pool)
+            .unwrap()
+        };
+
+        let icmp_i32 = icmp.bind(I32);
+        let icmp_i64 = icmp.bind(I64);
+        e.add32(
+            e.enc(icmp_i32.clone(), r_ricmp, op_bits(0b010, 0b000_0000))
+                .inst_predicate(icmp_instp(&icmp_i32, "slt")),
+        );
+        e.add64(
+            e.enc(icmp_i64.clone(), r_ricmp, op_bits(0b010, 0b000_0000))
+                .inst_predicate(icmp_instp(&icmp_i64, "slt")),
+        );
+
+        e.add32(
+            e.enc(icmp_i32.clone(), r_ricmp, op_bits(0b011, 0b000_0000))
+                .inst_predicate(icmp_instp(&icmp_i32, "ult")),
+        );
+        e.add64(
+            e.enc(icmp_i64.clone(), r_ricmp, op_bits(0b011, 0b000_0000))
+                .inst_predicate(icmp_instp(&icmp_i64, "ult")),
+        );
+
+        // Immediate variants.
+        let icmp_i32 = icmp_imm.bind(I32);
+        let icmp_i64 = icmp_imm.bind(I64);
+        e.add32(
+            e.enc(icmp_i32.clone(), r_iicmp, opimm_bits(0b010, 0))
+                .inst_predicate(icmp_instp(&icmp_i32, "slt")),
+        );
+        e.add64(
+            e.enc(icmp_i64.clone(), r_iicmp, opimm_bits(0b010, 0))
+                .inst_predicate(icmp_instp(&icmp_i64, "slt")),
+        );
+
+        e.add32(
+            e.enc(icmp_i32.clone(), r_iicmp, opimm_bits(0b011, 0))
+                .inst_predicate(icmp_instp(&icmp_i32, "ult")),
+        );
+        e.add64(
+            e.enc(icmp_i64.clone(), r_iicmp, opimm_bits(0b011, 0))
+                .inst_predicate(icmp_instp(&icmp_i64, "ult")),
+        );
+    }
+
+    // Integer constants with the low 12 bits clear are materialized by lui.
+    e.add32(e.enc(iconst.bind(I32), r_u, lui_bits()));
+    e.add64(e.enc(iconst.bind(I32), r_u, lui_bits()));
+    e.add64(e.enc(iconst.bind(I64), r_u, lui_bits()));
+
+    // "M" Standard Extension for Integer Multiplication and Division.
+    // Gated by the `use_m` flag.
+    e.add32(
+        e.enc(imul.bind(I32), r_r, op_bits(0b000, 0b0000_0001))
+            .isa_predicate(use_m),
+    );
+    e.add64(
+        e.enc(imul.bind(I64), r_r, op_bits(0b000, 0b0000_0001))
+            .isa_predicate(use_m),
+    );
+    e.add64(
+        e.enc(imul.bind(I32), r_r, op32_bits(0b000, 0b0000_0001))
+            .isa_predicate(use_m),
+    );
+
+    // Control flow.
+
+    // Unconditional branches.
+    e.add32(e.enc(jump, r_uj, jal_bits()));
+    e.add64(e.enc(jump, r_uj, jal_bits()));
+    e.add32(e.enc(call, r_uj_call, jal_bits()));
+    e.add64(e.enc(call, r_uj_call, jal_bits()));
+
+    // Conditional branches.
+    {
+        let mut var_pool = VarPool::new();
+
+        // Helper that creates an instruction predicate for an instruction in the icmp family.
+        let mut br_icmp_instp = |bound_inst: &BoundInstruction,
+                                 intcc_field: &'static str|
+         -> InstructionPredicateNode {
+            let x = var_pool.create("x");
+            let y = var_pool.create("y");
+            let dest = var_pool.create("dest");
+            let args = var_pool.create("args");
+            let cc = Literal::enumerator_for(&shared_defs.imm.intcc, intcc_field);
+            Apply::new(
+                bound_inst.clone().into(),
+                vec![
+                    Expr::Literal(cc),
+                    Expr::Var(x),
+                    Expr::Var(y),
+                    Expr::Var(dest),
+                    Expr::Var(args),
+                ],
+            )
+            .inst_predicate(&var_pool)
+            .unwrap()
+        };
+
+        let br_icmp_i32 = br_icmp.bind(I32);
+        let br_icmp_i64 = br_icmp.bind(I64);
+        for &(cond, f3) in &[
+            ("eq", 0b000),
+            ("ne", 0b001),
+            ("slt", 0b100),
+            ("sge", 0b101),
+            ("ult", 0b110),
+            ("uge", 0b111),
+        ] {
+            e.add32(
+                e.enc(br_icmp_i32.clone(), r_sb, branch_bits(f3))
+                    .inst_predicate(br_icmp_instp(&br_icmp_i32, cond)),
+            );
+            e.add64(
+                e.enc(br_icmp_i64.clone(), r_sb, branch_bits(f3))
+                    .inst_predicate(br_icmp_instp(&br_icmp_i64, cond)),
+            );
+        }
+    }
+
+    for &(inst, f3) in &[(brz, 0b000), (brnz, 0b001)] {
+        e.add32(e.enc(inst.bind(I32), r_sb_zero, branch_bits(f3)));
+        e.add64(e.enc(inst.bind(I64), r_sb_zero, branch_bits(f3)));
+        e.add32(e.enc(inst.bind(B1), r_sb_zero, branch_bits(f3)));
+        e.add64(e.enc(inst.bind(B1), r_sb_zero, branch_bits(f3)));
+    }
+
+    // Returns are a special case of jalr_bits using %x1 to hold the return address.
+    // The return address is provided by a special-purpose `link` return value that
+    // is added by legalize_signature().
+    e.add32(e.enc(return_, r_iret, jalr_bits()));
+    e.add64(e.enc(return_, r_iret, jalr_bits()));
+    e.add32(e.enc(call_indirect.bind(I32), r_icall, jalr_bits()));
+    e.add64(e.enc(call_indirect.bind(I64), r_icall, jalr_bits()));
+
+    // Spill and fill.
+    e.add32(e.enc(spill.bind(I32), r_gp_sp, store_bits(0b010)));
+    e.add64(e.enc(spill.bind(I32), r_gp_sp, store_bits(0b010)));
+    e.add64(e.enc(spill.bind(I64), r_gp_sp, store_bits(0b011)));
+    e.add32(e.enc(fill.bind(I32), r_gp_fi, load_bits(0b010)));
+    e.add64(e.enc(fill.bind(I32), r_gp_fi, load_bits(0b010)));
+    e.add64(e.enc(fill.bind(I64), r_gp_fi, load_bits(0b011)));
+
+    // No-op fills, created by late-stage redundant-fill removal.
+    for &ty in &[I64, I32] {
+        e.add64(e.enc(fill_nop.bind(ty), r_fillnull, 0));
+        e.add32(e.enc(fill_nop.bind(ty), r_fillnull, 0));
+    }
+    e.add64(e.enc(fill_nop.bind(B1), r_fillnull, 0));
+    e.add32(e.enc(fill_nop.bind(B1), r_fillnull, 0));
+
+    // Register copies.
+    e.add32(e.enc(copy.bind(I32), r_icopy, opimm_bits(0b000, 0)));
+    e.add64(e.enc(copy.bind(I64), r_icopy, opimm_bits(0b000, 0)));
+    e.add64(e.enc(copy.bind(I32), r_icopy, opimm32_bits(0b000, 0)));
+
+    e.add32(e.enc(regmove.bind(I32), r_irmov, opimm_bits(0b000, 0)));
+    e.add64(e.enc(regmove.bind(I64), r_irmov, opimm_bits(0b000, 0)));
+    e.add64(e.enc(regmove.bind(I32), r_irmov, opimm32_bits(0b000, 0)));
+
+    e.add32(e.enc(copy.bind(B1), r_icopy, opimm_bits(0b000, 0)));
+    e.add64(e.enc(copy.bind(B1), r_icopy, opimm_bits(0b000, 0)));
+    e.add32(e.enc(regmove.bind(B1), r_irmov, opimm_bits(0b000, 0)));
+    e.add64(e.enc(regmove.bind(B1), r_irmov, opimm_bits(0b000, 0)));
+
+    // Stack-slot-to-the-same-stack-slot copy, which is guaranteed to turn
+    // into a no-op.
+    // The same encoding is generated for both the 64- and 32-bit architectures.
+    for &ty in &[I64, I32, I16, I8] {
+        e.add32(e.enc(copy_nop.bind(ty), r_stacknull, 0));
+        e.add64(e.enc(copy_nop.bind(ty), r_stacknull, 0));
+    }
+    for &ty in &[F64, F32] {
+        e.add32(e.enc(copy_nop.bind(ty), r_stacknull, 0));
+        e.add64(e.enc(copy_nop.bind(ty), r_stacknull, 0));
+    }
+
+    // Copy-to-SSA
+    e.add32(e.enc(copy_to_ssa.bind(I32), r_copytossa, opimm_bits(0b000, 0)));
+    e.add64(e.enc(copy_to_ssa.bind(I64), r_copytossa, opimm_bits(0b000, 0)));
+    e.add64(e.enc(copy_to_ssa.bind(I32), r_copytossa, opimm32_bits(0b000, 0)));
+    e.add32(e.enc(copy_to_ssa.bind(B1), r_copytossa, opimm_bits(0b000, 0)));
+    e.add64(e.enc(copy_to_ssa.bind(B1), r_copytossa, opimm_bits(0b000, 0)));
+    e.add32(e.enc(copy_to_ssa.bind(R32), r_copytossa, opimm_bits(0b000, 0)));
+    e.add64(e.enc(copy_to_ssa.bind(R64), r_copytossa, opimm_bits(0b000, 0)));
+
+    e
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/isa/riscv/mod.rs b/third_party/rust/cranelift-codegen-meta/src/isa/riscv/mod.rs
new file mode 100644
index 0000000000..801e61a3d2
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/isa/riscv/mod.rs
@@ -0,0 +1,134 @@
+use crate::cdsl::cpu_modes::CpuMode;
+use crate::cdsl::instructions::InstructionGroupBuilder;
+use crate::cdsl::isa::TargetIsa;
+use crate::cdsl::regs::{IsaRegs, IsaRegsBuilder, RegBankBuilder, RegClassBuilder};
+use crate::cdsl::settings::{PredicateNode, SettingGroup, SettingGroupBuilder};
+
+use crate::shared::types::Float::{F32, F64};
+use crate::shared::types::Int::{I32, I64};
+use crate::shared::Definitions as SharedDefinitions;
+
+mod encodings;
+mod recipes;
+
+fn define_settings(shared: &SettingGroup) -> SettingGroup {
+    let mut setting = SettingGroupBuilder::new("riscv");
+
+    let supports_m = setting.add_bool(
+        "supports_m",
+        "CPU supports the 'M' extension (mul/div)",
+        false,
+    );
+    let supports_a = setting.add_bool(
+        "supports_a",
+        "CPU supports the 'A' extension (atomics)",
+        false,
+    );
+    let supports_f = setting.add_bool(
+        "supports_f",
+        "CPU supports the 'F' extension (float)",
+        false,
+    );
+    let supports_d = setting.add_bool(
+        "supports_d",
+        "CPU supports the 'D' extension (double)",
+        false,
+    );
+
+    let enable_m = setting.add_bool(
+        "enable_m",
+        "Enable the use of 'M' instructions if available",
+        true,
+    );
+
+    setting.add_bool(
+        "enable_e",
+        "Enable the 'RV32E' instruction set with only 16 registers",
+        false,
+    );
+
+    let shared_enable_atomics = shared.get_bool("enable_atomics");
+    let shared_enable_float = shared.get_bool("enable_float");
+    let shared_enable_simd = shared.get_bool("enable_simd");
+
+    setting.add_predicate("use_m", predicate!(supports_m && enable_m));
+    setting.add_predicate("use_a", predicate!(supports_a && shared_enable_atomics));
+    setting.add_predicate("use_f", predicate!(supports_f && shared_enable_float));
+    setting.add_predicate("use_d", predicate!(supports_d && shared_enable_float));
+    setting.add_predicate(
+        "full_float",
+        predicate!(shared_enable_simd && supports_f && supports_d),
+    );
+
+    setting.build()
+}
+
+fn define_registers() -> IsaRegs {
+    let mut regs = IsaRegsBuilder::new();
+
+    let builder = RegBankBuilder::new("IntRegs", "x")
+        .units(32)
+        .track_pressure(true);
+    let int_regs = regs.add_bank(builder);
+
+    let builder = RegBankBuilder::new("FloatRegs", "f")
+        .units(32)
+        .track_pressure(true);
+    let float_regs = regs.add_bank(builder);
+
+    let builder = RegClassBuilder::new_toplevel("GPR", int_regs);
+    regs.add_class(builder);
+
+    let builder = RegClassBuilder::new_toplevel("FPR", float_regs);
+    regs.add_class(builder);
+
+    regs.build()
+}
+
+pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa {
+    let settings = define_settings(&shared_defs.settings);
+    let regs = define_registers();
+
+    let inst_group = InstructionGroupBuilder::new(&mut shared_defs.all_instructions).build();
+
+    // CPU modes for 32-bit and 64-bit operation.
+    let mut rv_32 = CpuMode::new("RV32");
+    let mut rv_64 = CpuMode::new("RV64");
+
+    let expand = shared_defs.transform_groups.by_name("expand");
+    let narrow_no_flags = shared_defs.transform_groups.by_name("narrow_no_flags");
+
+    rv_32.legalize_monomorphic(expand);
+    rv_32.legalize_default(narrow_no_flags);
+    rv_32.legalize_type(I32, expand);
+    rv_32.legalize_type(F32, expand);
+    rv_32.legalize_type(F64, expand);
+
+    rv_64.legalize_monomorphic(expand);
+    rv_64.legalize_default(narrow_no_flags);
+    rv_64.legalize_type(I32, expand);
+    rv_64.legalize_type(I64, expand);
+    rv_64.legalize_type(F32, expand);
+    rv_64.legalize_type(F64, expand);
+
+    let recipes = recipes::define(shared_defs, &regs);
+
+    let encodings = encodings::define(shared_defs, &settings, &recipes);
+    rv_32.set_encodings(encodings.enc32);
+    rv_64.set_encodings(encodings.enc64);
+    let encodings_predicates = encodings.inst_pred_reg.extract();
+
+    let recipes = recipes.collect();
+
+    let cpu_modes = vec![rv_32, rv_64];
+
+    TargetIsa::new(
+        "riscv",
+        inst_group,
+        settings,
+        regs,
+        recipes,
+        cpu_modes,
+        encodings_predicates,
+    )
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/isa/riscv/recipes.rs b/third_party/rust/cranelift-codegen-meta/src/isa/riscv/recipes.rs
new file mode 100644
index 0000000000..47acdbb042
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/isa/riscv/recipes.rs
@@ -0,0 +1,279 @@
+use std::collections::HashMap;
+
+use crate::cdsl::instructions::InstructionPredicate;
+use crate::cdsl::recipes::{EncodingRecipeBuilder, EncodingRecipeNumber, Recipes, Stack};
+use crate::cdsl::regs::IsaRegs;
+use crate::shared::Definitions as SharedDefinitions;
+
+/// An helper to create recipes and use them when defining the RISCV encodings.
+pub(crate) struct RecipeGroup {
+    /// The actualy list of recipes explicitly created in this file.
+    pub recipes: Recipes,
+
+    /// Provides fast lookup from a name to an encoding recipe.
+    name_to_recipe: HashMap<String, EncodingRecipeNumber>,
+}
+
+impl RecipeGroup {
+    fn new() -> Self {
+        Self {
+            recipes: Recipes::new(),
+            name_to_recipe: HashMap::new(),
+        }
+    }
+
+    fn push(&mut self, builder: EncodingRecipeBuilder) {
+        assert!(
+            self.name_to_recipe.get(&builder.name).is_none(),
+            format!("riscv recipe '{}' created twice", builder.name)
+        );
+        let name = builder.name.clone();
+        let number = self.recipes.push(builder.build());
+        self.name_to_recipe.insert(name, number);
+    }
+
+    pub fn by_name(&self, name: &str) -> EncodingRecipeNumber {
+        *self
+            .name_to_recipe
+            .get(name)
+            .unwrap_or_else(|| panic!("unknown riscv recipe name {}", name))
+    }
+
+    pub fn collect(self) -> Recipes {
+        self.recipes
+    }
+}
+
+pub(crate) fn define(shared_defs: &SharedDefinitions, regs: &IsaRegs) -> RecipeGroup {
+    let formats = &shared_defs.formats;
+
+    // Register classes shorthands.
+    let gpr = regs.class_by_name("GPR");
+
+    // Definitions.
+    let mut recipes = RecipeGroup::new();
+
+    // R-type 32-bit instructions: These are mostly binary arithmetic instructions.
+    // The encbits are `opcode[6:2] | (funct3 << 5) | (funct7 << 8)
+    recipes.push(
+        EncodingRecipeBuilder::new("R", &formats.binary, 4)
+            .operands_in(vec![gpr, gpr])
+            .operands_out(vec![gpr])
+            .emit("put_r(bits, in_reg0, in_reg1, out_reg0, sink);"),
+    );
+
+    // R-type with an immediate shift amount instead of rs2.
+    recipes.push(
+        EncodingRecipeBuilder::new("Rshamt", &formats.binary_imm64, 4)
+            .operands_in(vec![gpr])
+            .operands_out(vec![gpr])
+            .emit("put_rshamt(bits, in_reg0, imm.into(), out_reg0, sink);"),
+    );
+
+    // R-type encoding of an integer comparison.
+    recipes.push(
+        EncodingRecipeBuilder::new("Ricmp", &formats.int_compare, 4)
+            .operands_in(vec![gpr, gpr])
+            .operands_out(vec![gpr])
+            .emit("put_r(bits, in_reg0, in_reg1, out_reg0, sink);"),
+    );
+
+    recipes.push(
+        EncodingRecipeBuilder::new("Ii", &formats.binary_imm64, 4)
+            .operands_in(vec![gpr])
+            .operands_out(vec![gpr])
+            .inst_predicate(InstructionPredicate::new_is_signed_int(
+                &*formats.binary_imm64,
+                "imm",
+                12,
+                0,
+            ))
+            .emit("put_i(bits, in_reg0, imm.into(), out_reg0, sink);"),
+    );
+
+    // I-type instruction with a hardcoded %x0 rs1.
+    recipes.push(
+        EncodingRecipeBuilder::new("Iz", &formats.unary_imm, 4)
+            .operands_out(vec![gpr])
+            .inst_predicate(InstructionPredicate::new_is_signed_int(
+                &formats.unary_imm,
+                "imm",
+                12,
+                0,
+            ))
+            .emit("put_i(bits, 0, imm.into(), out_reg0, sink);"),
+    );
+
+    // I-type encoding of an integer comparison.
+    recipes.push(
+        EncodingRecipeBuilder::new("Iicmp", &formats.int_compare_imm, 4)
+            .operands_in(vec![gpr])
+            .operands_out(vec![gpr])
+            .inst_predicate(InstructionPredicate::new_is_signed_int(
+                &formats.int_compare_imm,
+                "imm",
+                12,
+                0,
+            ))
+            .emit("put_i(bits, in_reg0, imm.into(), out_reg0, sink);"),
+    );
+
+    // I-type encoding for `jalr` as a return instruction. We won't use the immediate offset.  The
+    // variable return values are not encoded.
+    recipes.push(
+        EncodingRecipeBuilder::new("Iret", &formats.multiary, 4).emit(
+            r#"
+                    // Return instructions are always a jalr to %x1.
+                    // The return address is provided as a special-purpose link argument.
+                    put_i(
+                        bits,
+                        1, // rs1 = %x1
+                        0, // no offset.
+                        0, // rd = %x0: no address written.
+                        sink,
+                    );
+                "#,
+        ),
+    );
+
+    // I-type encoding for `jalr` as a call_indirect.
+    recipes.push(
+        EncodingRecipeBuilder::new("Icall", &formats.call_indirect, 4)
+            .operands_in(vec![gpr])
+            .emit(
+                r#"
+                    // call_indirect instructions are jalr with rd=%x1.
+                    put_i(
+                        bits,
+                        in_reg0,
+                        0, // no offset.
+                        1, // rd = %x1: link register.
+                        sink,
+                    );
+                "#,
+            ),
+    );
+
+    // Copy of a GPR is implemented as addi x, 0.
+    recipes.push(
+        EncodingRecipeBuilder::new("Icopy", &formats.unary, 4)
+            .operands_in(vec![gpr])
+            .operands_out(vec![gpr])
+            .emit("put_i(bits, in_reg0, 0, out_reg0, sink);"),
+    );
+
+    // Same for a GPR regmove.
+    recipes.push(
+        EncodingRecipeBuilder::new("Irmov", &formats.reg_move, 4)
+            .operands_in(vec![gpr])
+            .emit("put_i(bits, src, 0, dst, sink);"),
+    );
+
+    // Same for copy-to-SSA -- GPR regmove.
+    recipes.push(
+        EncodingRecipeBuilder::new("copytossa", &formats.copy_to_ssa, 4)
+            // No operands_in to mention, because a source register is specified directly.
+            .operands_out(vec![gpr])
+            .emit("put_i(bits, src, 0, out_reg0, sink);"),
+    );
+
+    // U-type instructions have a 20-bit immediate that targets bits 12-31.
+    recipes.push(
+        EncodingRecipeBuilder::new("U", &formats.unary_imm, 4)
+            .operands_out(vec![gpr])
+            .inst_predicate(InstructionPredicate::new_is_signed_int(
+                &formats.unary_imm,
+                "imm",
+                32,
+                12,
+            ))
+            .emit("put_u(bits, imm.into(), out_reg0, sink);"),
+    );
+
+    // UJ-type unconditional branch instructions.
+    recipes.push(
+        EncodingRecipeBuilder::new("UJ", &formats.jump, 4)
+            .branch_range((0, 21))
+            .emit(
+                r#"
+                    let dest = i64::from(func.offsets[destination]);
+                    let disp = dest - i64::from(sink.offset());
+                    put_uj(bits, disp, 0, sink);
+                "#,
+            ),
+    );
+
+    recipes.push(EncodingRecipeBuilder::new("UJcall", &formats.call, 4).emit(
+        r#"
+                    sink.reloc_external(func.srclocs[inst],
+                                        Reloc::RiscvCall,
+                                        &func.dfg.ext_funcs[func_ref].name,
+                                        0);
+                    // rd=%x1 is the standard link register.
+                    put_uj(bits, 0, 1, sink);
+                "#,
+    ));
+
+    // SB-type branch instructions.
+    recipes.push(
+        EncodingRecipeBuilder::new("SB", &formats.branch_icmp, 4)
+            .operands_in(vec![gpr, gpr])
+            .branch_range((0, 13))
+            .emit(
+                r#"
+                    let dest = i64::from(func.offsets[destination]);
+                    let disp = dest - i64::from(sink.offset());
+                    put_sb(bits, disp, in_reg0, in_reg1, sink);
+                "#,
+            ),
+    );
+
+    // SB-type branch instruction with rs2 fixed to zero.
+    recipes.push(
+        EncodingRecipeBuilder::new("SBzero", &formats.branch, 4)
+            .operands_in(vec![gpr])
+            .branch_range((0, 13))
+            .emit(
+                r#"
+                    let dest = i64::from(func.offsets[destination]);
+                    let disp = dest - i64::from(sink.offset());
+                    put_sb(bits, disp, in_reg0, 0, sink);
+                "#,
+            ),
+    );
+
+    // Spill of a GPR.
+    recipes.push(
+        EncodingRecipeBuilder::new("GPsp", &formats.unary, 4)
+            .operands_in(vec![gpr])
+            .operands_out(vec![Stack::new(gpr)])
+            .emit("unimplemented!();"),
+    );
+
+    // Fill of a GPR.
+    recipes.push(
+        EncodingRecipeBuilder::new("GPfi", &formats.unary, 4)
+            .operands_in(vec![Stack::new(gpr)])
+            .operands_out(vec![gpr])
+            .emit("unimplemented!();"),
+    );
+
+    // Stack-slot to same stack-slot copy, which is guaranteed to turn into a no-op.
+    recipes.push(
+        EncodingRecipeBuilder::new("stacknull", &formats.unary, 0)
+            .operands_in(vec![Stack::new(gpr)])
+            .operands_out(vec![Stack::new(gpr)])
+            .emit(""),
+    );
+
+    // No-op fills, created by late-stage redundant-fill removal.
+    recipes.push(
+        EncodingRecipeBuilder::new("fillnull", &formats.unary, 0)
+            .operands_in(vec![Stack::new(gpr)])
+            .operands_out(vec![gpr])
+            .clobbers_flags(false)
+            .emit(""),
+    );
+
+    recipes
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/isa/x86/encodings.rs b/third_party/rust/cranelift-codegen-meta/src/isa/x86/encodings.rs
new file mode 100644
index 0000000000..9ee12656c0
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/isa/x86/encodings.rs
@@ -0,0 +1,2726 @@
+#![allow(non_snake_case)]
+
+use cranelift_codegen_shared::condcodes::IntCC;
+use std::collections::HashMap;
+
+use crate::cdsl::encodings::{Encoding, EncodingBuilder};
+use crate::cdsl::instructions::{
+    vector, Bindable, Immediate, InstSpec, Instruction, InstructionGroup, InstructionPredicate,
+    InstructionPredicateNode, InstructionPredicateRegistry,
+};
+use crate::cdsl::recipes::{EncodingRecipe, EncodingRecipeNumber, Recipes};
+use crate::cdsl::settings::{SettingGroup, SettingPredicateNumber};
+use crate::cdsl::types::{LaneType, ValueType};
+use crate::shared::types::Bool::{B1, B16, B32, B64, B8};
+use crate::shared::types::Float::{F32, F64};
+use crate::shared::types::Int::{I16, I32, I64, I8};
+use crate::shared::types::Reference::{R32, R64};
+use crate::shared::Definitions as SharedDefinitions;
+
+use crate::isa::x86::opcodes::*;
+
+use super::recipes::{RecipeGroup, Template};
+use crate::cdsl::instructions::BindParameter::Any;
+
+pub(crate) struct PerCpuModeEncodings {
+    pub enc32: Vec<Encoding>,
+    pub enc64: Vec<Encoding>,
+    pub recipes: Recipes,
+    recipes_by_name: HashMap<String, EncodingRecipeNumber>,
+    pub inst_pred_reg: InstructionPredicateRegistry,
+}
+
+impl PerCpuModeEncodings {
+    fn new() -> Self {
+        Self {
+            enc32: Vec::new(),
+            enc64: Vec::new(),
+            recipes: Recipes::new(),
+            recipes_by_name: HashMap::new(),
+            inst_pred_reg: InstructionPredicateRegistry::new(),
+        }
+    }
+
+    fn add_recipe(&mut self, recipe: EncodingRecipe) -> EncodingRecipeNumber {
+        if let Some(found_index) = self.recipes_by_name.get(&recipe.name) {
+            assert!(
+                self.recipes[*found_index] == recipe,
+                format!(
+                    "trying to insert different recipes with a same name ({})",
+                    recipe.name
+                )
+            );
+            *found_index
+        } else {
+            let recipe_name = recipe.name.clone();
+            let index = self.recipes.push(recipe);
+            self.recipes_by_name.insert(recipe_name, index);
+            index
+        }
+    }
+
+    fn make_encoding<T>(
+        &mut self,
+        inst: InstSpec,
+        template: Template,
+        builder_closure: T,
+    ) -> Encoding
+    where
+        T: FnOnce(EncodingBuilder) -> EncodingBuilder,
+    {
+        let (recipe, bits) = template.build();
+        let recipe_number = self.add_recipe(recipe);
+        let builder = EncodingBuilder::new(inst, recipe_number, bits);
+        builder_closure(builder).build(&self.recipes, &mut self.inst_pred_reg)
+    }
+
+    fn enc32_func<T>(&mut self, inst: impl Into<InstSpec>, template: Template, builder_closure: T)
+    where
+        T: FnOnce(EncodingBuilder) -> EncodingBuilder,
+    {
+        let encoding = self.make_encoding(inst.into(), template, builder_closure);
+        self.enc32.push(encoding);
+    }
+    fn enc32(&mut self, inst: impl Into<InstSpec>, template: Template) {
+        self.enc32_func(inst, template, |x| x);
+    }
+    fn enc32_isap(
+        &mut self,
+        inst: impl Into<InstSpec>,
+        template: Template,
+        isap: SettingPredicateNumber,
+    ) {
+        self.enc32_func(inst, template, |encoding| encoding.isa_predicate(isap));
+    }
+    fn enc32_instp(
+        &mut self,
+        inst: impl Into<InstSpec>,
+        template: Template,
+        instp: InstructionPredicateNode,
+    ) {
+        self.enc32_func(inst, template, |encoding| encoding.inst_predicate(instp));
+    }
+    fn enc32_rec(&mut self, inst: impl Into<InstSpec>, recipe: &EncodingRecipe, bits: u16) {
+        let recipe_number = self.add_recipe(recipe.clone());
+        let builder = EncodingBuilder::new(inst.into(), recipe_number, bits);
+        let encoding = builder.build(&self.recipes, &mut self.inst_pred_reg);
+        self.enc32.push(encoding);
+    }
+
+    fn enc64_func<T>(&mut self, inst: impl Into<InstSpec>, template: Template, builder_closure: T)
+    where
+        T: FnOnce(EncodingBuilder) -> EncodingBuilder,
+    {
+        let encoding = self.make_encoding(inst.into(), template, builder_closure);
+        self.enc64.push(encoding);
+    }
+    fn enc64(&mut self, inst: impl Into<InstSpec>, template: Template) {
+        self.enc64_func(inst, template, |x| x);
+    }
+    fn enc64_isap(
+        &mut self,
+        inst: impl Into<InstSpec>,
+        template: Template,
+        isap: SettingPredicateNumber,
+    ) {
+        self.enc64_func(inst, template, |encoding| encoding.isa_predicate(isap));
+    }
+    fn enc64_instp(
+        &mut self,
+        inst: impl Into<InstSpec>,
+        template: Template,
+        instp: InstructionPredicateNode,
+    ) {
+        self.enc64_func(inst, template, |encoding| encoding.inst_predicate(instp));
+    }
+    fn enc64_rec(&mut self, inst: impl Into<InstSpec>, recipe: &EncodingRecipe, bits: u16) {
+        let recipe_number = self.add_recipe(recipe.clone());
+        let builder = EncodingBuilder::new(inst.into(), recipe_number, bits);
+        let encoding = builder.build(&self.recipes, &mut self.inst_pred_reg);
+        self.enc64.push(encoding);
+    }
+
+    /// Adds I32/I64 encodings as appropriate for a typed instruction.
+    /// The REX prefix is always inferred at runtime.
+    ///
+    /// Add encodings for `inst.i32` to X86_32.
+    /// Add encodings for `inst.i32` to X86_64 with optional, inferred REX.
+    /// Add encodings for `inst.i64` to X86_64 with a REX.W prefix.
+    fn enc_i32_i64(&mut self, inst: impl Into<InstSpec>, template: Template) {
+        let inst: InstSpec = inst.into();
+
+        // I32 on x86: no REX prefix.
+        self.enc32(inst.bind(I32), template.infer_rex());
+
+        // I32 on x86_64: REX.W unset; REX.RXB determined at runtime from registers.
+        self.enc64(inst.bind(I32), template.infer_rex());
+
+        // I64 on x86_64: REX.W set; REX.RXB determined at runtime from registers.
+        self.enc64(inst.bind(I64), template.rex().w());
+    }
+
+    /// Adds I32/I64 encodings as appropriate for a typed instruction.
+    /// All variants of REX prefix are explicitly emitted, not inferred.
+    ///
+    /// Add encodings for `inst.i32` to X86_32.
+    /// Add encodings for `inst.i32` to X86_64 with and without REX.
+    /// Add encodings for `inst.i64` to X86_64 with and without REX.
+    fn enc_i32_i64_explicit_rex(&mut self, inst: impl Into<InstSpec>, template: Template) {
+        let inst: InstSpec = inst.into();
+        self.enc32(inst.bind(I32), template.nonrex());
+
+        // REX-less encoding must come after REX encoding so we don't use it by default.
+        // Otherwise reg-alloc would never use r8 and up.
+        self.enc64(inst.bind(I32), template.rex());
+        self.enc64(inst.bind(I32), template.nonrex());
+        self.enc64(inst.bind(I64), template.rex().w());
+    }
+
+    /// Adds B32/B64 encodings as appropriate for a typed instruction.
+    /// The REX prefix is always inferred at runtime.
+    ///
+    /// Adds encoding for `inst.b32` to X86_32.
+    /// Adds encoding for `inst.b32` to X86_64 with optional, inferred REX.
+    /// Adds encoding for `inst.b64` to X86_64 with a REX.W prefix.
+    fn enc_b32_b64(&mut self, inst: impl Into<InstSpec>, template: Template) {
+        let inst: InstSpec = inst.into();
+
+        // B32 on x86: no REX prefix.
+        self.enc32(inst.bind(B32), template.infer_rex());
+
+        // B32 on x86_64: REX.W unset; REX.RXB determined at runtime from registers.
+        self.enc64(inst.bind(B32), template.infer_rex());
+
+        // B64 on x86_64: REX.W set; REX.RXB determined at runtime from registers.
+        self.enc64(inst.bind(B64), template.rex().w());
+    }
+
+    /// Add encodings for `inst.i32` to X86_32.
+    /// Add encodings for `inst.i32` to X86_64 with a REX prefix.
+    /// Add encodings for `inst.i64` to X86_64 with a REX.W prefix.
+    fn enc_i32_i64_rex_only(&mut self, inst: impl Into<InstSpec>, template: Template) {
+        let inst: InstSpec = inst.into();
+        self.enc32(inst.bind(I32), template.nonrex());
+        self.enc64(inst.bind(I32), template.rex());
+        self.enc64(inst.bind(I64), template.rex().w());
+    }
+
+    /// Add encodings for `inst.i32` to X86_32.
+    /// Add encodings for `inst.i32` to X86_64 with and without REX.
+    /// Add encodings for `inst.i64` to X86_64 with a REX.W prefix.
+    fn enc_i32_i64_instp(
+        &mut self,
+        inst: &Instruction,
+        template: Template,
+        instp: InstructionPredicateNode,
+    ) {
+        self.enc32_func(inst.bind(I32), template.nonrex(), |builder| {
+            builder.inst_predicate(instp.clone())
+        });
+
+        // REX-less encoding must come after REX encoding so we don't use it by default. Otherwise
+        // reg-alloc would never use r8 and up.
+        self.enc64_func(inst.bind(I32), template.rex(), |builder| {
+            builder.inst_predicate(instp.clone())
+        });
+        self.enc64_func(inst.bind(I32), template.nonrex(), |builder| {
+            builder.inst_predicate(instp.clone())
+        });
+        self.enc64_func(inst.bind(I64), template.rex().w(), |builder| {
+            builder.inst_predicate(instp)
+        });
+    }
+
+    /// Add encodings for `inst.r32` to X86_32.
+    /// Add encodings for `inst.r32` to X86_64 with and without REX.
+    /// Add encodings for `inst.r64` to X86_64 with a REX.W prefix.
+    fn enc_r32_r64_instp(
+        &mut self,
+        inst: &Instruction,
+        template: Template,
+        instp: InstructionPredicateNode,
+    ) {
+        self.enc32_func(inst.bind(R32), template.nonrex(), |builder| {
+            builder.inst_predicate(instp.clone())
+        });
+
+        // REX-less encoding must come after REX encoding so we don't use it by default. Otherwise
+        // reg-alloc would never use r8 and up.
+        self.enc64_func(inst.bind(R32), template.rex(), |builder| {
+            builder.inst_predicate(instp.clone())
+        });
+        self.enc64_func(inst.bind(R32), template.nonrex(), |builder| {
+            builder.inst_predicate(instp.clone())
+        });
+        self.enc64_func(inst.bind(R64), template.rex().w(), |builder| {
+            builder.inst_predicate(instp)
+        });
+    }
+
+    /// Add encodings for `inst.r32` to X86_32.
+    /// Add encodings for `inst.r64` to X86_64 with a REX.W prefix.
+    fn enc_r32_r64_rex_only(&mut self, inst: impl Into<InstSpec>, template: Template) {
+        let inst: InstSpec = inst.into();
+        self.enc32(inst.bind(R32), template.nonrex());
+        self.enc64(inst.bind(R64), template.rex().w());
+    }
+
+    fn enc_r32_r64_ld_st(&mut self, inst: &Instruction, w_bit: bool, template: Template) {
+        self.enc32(inst.clone().bind(R32).bind(Any), template.clone());
+
+        // REX-less encoding must come after REX encoding so we don't use it by
+        // default. Otherwise reg-alloc would never use r8 and up.
+        self.enc64(inst.clone().bind(R32).bind(Any), template.clone().rex());
+        self.enc64(inst.clone().bind(R32).bind(Any), template.clone());
+
+        if w_bit {
+            self.enc64(inst.clone().bind(R64).bind(Any), template.rex().w());
+        } else {
+            self.enc64(inst.clone().bind(R64).bind(Any), template.clone().rex());
+            self.enc64(inst.clone().bind(R64).bind(Any), template);
+        }
+    }
+
+    /// Add encodings for `inst` to X86_64 with and without a REX prefix.
+    fn enc_x86_64(&mut self, inst: impl Into<InstSpec> + Clone, template: Template) {
+        // See above comment about the ordering of rex vs non-rex encodings.
+        self.enc64(inst.clone(), template.rex());
+        self.enc64(inst, template);
+    }
+
+    /// Add encodings for `inst` to X86_64 with and without a REX prefix.
+    fn enc_x86_64_instp(
+        &mut self,
+        inst: impl Clone + Into<InstSpec>,
+        template: Template,
+        instp: InstructionPredicateNode,
+    ) {
+        // See above comment about the ordering of rex vs non-rex encodings.
+        self.enc64_func(inst.clone(), template.rex(), |builder| {
+            builder.inst_predicate(instp.clone())
+        });
+        self.enc64_func(inst, template, |builder| builder.inst_predicate(instp));
+    }
+    fn enc_x86_64_isap(
+        &mut self,
+        inst: impl Clone + Into<InstSpec>,
+        template: Template,
+        isap: SettingPredicateNumber,
+    ) {
+        // See above comment about the ordering of rex vs non-rex encodings.
+        self.enc64_isap(inst.clone(), template.rex(), isap);
+        self.enc64_isap(inst, template, isap);
+    }
+
+    /// Add all three encodings for `inst`:
+    /// - X86_32
+    /// - X86_64 with and without the REX prefix.
+    fn enc_both(&mut self, inst: impl Clone + Into<InstSpec>, template: Template) {
+        self.enc32(inst.clone(), template.clone());
+        self.enc_x86_64(inst, template);
+    }
+    fn enc_both_isap(
+        &mut self,
+        inst: impl Clone + Into<InstSpec>,
+        template: Template,
+        isap: SettingPredicateNumber,
+    ) {
+        self.enc32_isap(inst.clone(), template.clone(), isap);
+        self.enc_x86_64_isap(inst, template, isap);
+    }
+    fn enc_both_instp(
+        &mut self,
+        inst: impl Clone + Into<InstSpec>,
+        template: Template,
+        instp: InstructionPredicateNode,
+    ) {
+        self.enc32_instp(inst.clone(), template.clone(), instp.clone());
+        self.enc_x86_64_instp(inst, template, instp);
+    }
+
+    /// Add two encodings for `inst`:
+    /// - X86_32, no REX prefix, since this is not valid in 32-bit mode.
+    /// - X86_64, dynamically infer the REX prefix.
+    fn enc_both_inferred(&mut self, inst: impl Clone + Into<InstSpec>, template: Template) {
+        self.enc32(inst.clone(), template.clone());
+        self.enc64(inst, template.infer_rex());
+    }
+    fn enc_both_inferred_maybe_isap(
+        &mut self,
+        inst: impl Clone + Into<InstSpec>,
+        template: Template,
+        isap: Option<SettingPredicateNumber>,
+    ) {
+        self.enc32_maybe_isap(inst.clone(), template.clone(), isap);
+        self.enc64_maybe_isap(inst, template.infer_rex(), isap);
+    }
+
+    /// Add two encodings for `inst`:
+    /// - X86_32
+    /// - X86_64 with the REX prefix.
+    fn enc_both_rex_only(&mut self, inst: impl Clone + Into<InstSpec>, template: Template) {
+        self.enc32(inst.clone(), template.clone());
+        self.enc64(inst, template.rex());
+    }
+
+    /// Add encodings for `inst.i32` to X86_32.
+    /// Add encodings for `inst.i32` to X86_64 with and without REX.
+    /// Add encodings for `inst.i64` to X86_64 with a REX prefix, using the `w_bit`
+    /// argument to determine whether or not to set the REX.W bit.
+    fn enc_i32_i64_ld_st(&mut self, inst: &Instruction, w_bit: bool, template: Template) {
+        self.enc32(inst.clone().bind(I32).bind(Any), template.clone());
+
+        // REX-less encoding must come after REX encoding so we don't use it by
+        // default. Otherwise reg-alloc would never use r8 and up.
+        self.enc64(inst.clone().bind(I32).bind(Any), template.clone().rex());
+        self.enc64(inst.clone().bind(I32).bind(Any), template.clone());
+
+        if w_bit {
+            self.enc64(inst.clone().bind(I64).bind(Any), template.rex().w());
+        } else {
+            self.enc64(inst.clone().bind(I64).bind(Any), template.clone().rex());
+            self.enc64(inst.clone().bind(I64).bind(Any), template);
+        }
+    }
+
+    /// Add the same encoding/recipe pairing to both X86_32 and X86_64
+    fn enc_32_64_rec(
+        &mut self,
+        inst: impl Clone + Into<InstSpec>,
+        recipe: &EncodingRecipe,
+        bits: u16,
+    ) {
+        self.enc32_rec(inst.clone(), recipe, bits);
+        self.enc64_rec(inst, recipe, bits);
+    }
+
+    /// Add the same encoding to both X86_32 and X86_64; assumes configuration (e.g. REX, operand binding) has already happened
+    fn enc_32_64_func<T>(
+        &mut self,
+        inst: impl Clone + Into<InstSpec>,
+        template: Template,
+        builder_closure: T,
+    ) where
+        T: FnOnce(EncodingBuilder) -> EncodingBuilder,
+    {
+        let encoding = self.make_encoding(inst.into(), template, builder_closure);
+        self.enc32.push(encoding.clone());
+        self.enc64.push(encoding);
+    }
+
+    /// Add the same encoding to both X86_32 and X86_64; assumes configuration (e.g. REX, operand
+    /// binding) has already happened.
+    fn enc_32_64_maybe_isap(
+        &mut self,
+        inst: impl Clone + Into<InstSpec>,
+        template: Template,
+        isap: Option<SettingPredicateNumber>,
+    ) {
+        self.enc32_maybe_isap(inst.clone(), template.clone(), isap);
+        self.enc64_maybe_isap(inst, template, isap);
+    }
+
+    fn enc32_maybe_isap(
+        &mut self,
+        inst: impl Into<InstSpec>,
+        template: Template,
+        isap: Option<SettingPredicateNumber>,
+    ) {
+        match isap {
+            None => self.enc32(inst, template),
+            Some(isap) => self.enc32_isap(inst, template, isap),
+        }
+    }
+
+    fn enc64_maybe_isap(
+        &mut self,
+        inst: impl Into<InstSpec>,
+        template: Template,
+        isap: Option<SettingPredicateNumber>,
+    ) {
+        match isap {
+            None => self.enc64(inst, template),
+            Some(isap) => self.enc64_isap(inst, template, isap),
+        }
+    }
+}
+
+// Definitions.
+
+#[inline(never)]
+fn define_moves(e: &mut PerCpuModeEncodings, shared_defs: &SharedDefinitions, r: &RecipeGroup) {
+    let shared = &shared_defs.instructions;
+    let formats = &shared_defs.formats;
+
+    // Shorthands for instructions.
+    let bconst = shared.by_name("bconst");
+    let bint = shared.by_name("bint");
+    let copy = shared.by_name("copy");
+    let copy_special = shared.by_name("copy_special");
+    let copy_to_ssa = shared.by_name("copy_to_ssa");
+    let get_pinned_reg = shared.by_name("get_pinned_reg");
+    let iconst = shared.by_name("iconst");
+    let ireduce = shared.by_name("ireduce");
+    let regmove = shared.by_name("regmove");
+    let sextend = shared.by_name("sextend");
+    let set_pinned_reg = shared.by_name("set_pinned_reg");
+    let uextend = shared.by_name("uextend");
+    let dummy_sarg_t = shared.by_name("dummy_sarg_t");
+
+    // Shorthands for recipes.
+    let rec_copysp = r.template("copysp");
+    let rec_furm_reg_to_ssa = r.template("furm_reg_to_ssa");
+    let rec_get_pinned_reg = r.recipe("get_pinned_reg");
+    let rec_null = r.recipe("null");
+    let rec_pu_id = r.template("pu_id");
+    let rec_pu_id_bool = r.template("pu_id_bool");
+    let rec_pu_iq = r.template("pu_iq");
+    let rec_rmov = r.template("rmov");
+    let rec_set_pinned_reg = r.template("set_pinned_reg");
+    let rec_u_id = r.template("u_id");
+    let rec_u_id_z = r.template("u_id_z");
+    let rec_umr = r.template("umr");
+    let rec_umr_reg_to_ssa = r.template("umr_reg_to_ssa");
+    let rec_urm_noflags = r.template("urm_noflags");
+    let rec_urm_noflags_abcd = r.template("urm_noflags_abcd");
+    let rec_dummy_sarg_t = r.recipe("dummy_sarg_t");
+
+    // The pinned reg is fixed to a certain value entirely user-controlled, so it generates nothing!
+    e.enc64_rec(get_pinned_reg.bind(I64), rec_get_pinned_reg, 0);
+    e.enc_x86_64(
+        set_pinned_reg.bind(I64),
+        rec_set_pinned_reg.opcodes(&MOV_STORE).rex().w(),
+    );
+
+    e.enc_i32_i64(copy, rec_umr.opcodes(&MOV_STORE));
+    e.enc_r32_r64_rex_only(copy, rec_umr.opcodes(&MOV_STORE));
+    e.enc_both(copy.bind(B1), rec_umr.opcodes(&MOV_STORE));
+    e.enc_both(copy.bind(I8), rec_umr.opcodes(&MOV_STORE));
+    e.enc_both(copy.bind(I16), rec_umr.opcodes(&MOV_STORE));
+
+    // TODO For x86-64, only define REX forms for now, since we can't describe the
+    // special regunit immediate operands with the current constraint language.
+    for &ty in &[I8, I16, I32] {
+        e.enc32(regmove.bind(ty), rec_rmov.opcodes(&MOV_STORE));
+        e.enc64(regmove.bind(ty), rec_rmov.opcodes(&MOV_STORE).rex());
+    }
+    for &ty in &[B8, B16, B32] {
+        e.enc32(regmove.bind(ty), rec_rmov.opcodes(&MOV_STORE));
+        e.enc64(regmove.bind(ty), rec_rmov.opcodes(&MOV_STORE).rex());
+    }
+    e.enc64(regmove.bind(I64), rec_rmov.opcodes(&MOV_STORE).rex().w());
+    e.enc_both(regmove.bind(B1), rec_rmov.opcodes(&MOV_STORE));
+    e.enc_both(regmove.bind(I8), rec_rmov.opcodes(&MOV_STORE));
+    e.enc32(regmove.bind(R32), rec_rmov.opcodes(&MOV_STORE));
+    e.enc64(regmove.bind(R32), rec_rmov.opcodes(&MOV_STORE).rex());
+    e.enc64(regmove.bind(R64), rec_rmov.opcodes(&MOV_STORE).rex().w());
+
+    // Immediate constants.
+    e.enc32(iconst.bind(I32), rec_pu_id.opcodes(&MOV_IMM));
+
+    e.enc64(iconst.bind(I32), rec_pu_id.rex().opcodes(&MOV_IMM));
+    e.enc64(iconst.bind(I32), rec_pu_id.opcodes(&MOV_IMM));
+
+    // The 32-bit immediate movl also zero-extends to 64 bits.
+    let is_unsigned_int32 =
+        InstructionPredicate::new_is_unsigned_int(&*formats.unary_imm, "imm", 32, 0);
+
+    e.enc64_func(
+        iconst.bind(I64),
+        rec_pu_id.opcodes(&MOV_IMM).rex(),
+        |encoding| encoding.inst_predicate(is_unsigned_int32.clone()),
+    );
+    e.enc64_func(iconst.bind(I64), rec_pu_id.opcodes(&MOV_IMM), |encoding| {
+        encoding.inst_predicate(is_unsigned_int32)
+    });
+
+    // Sign-extended 32-bit immediate.
+    e.enc64(
+        iconst.bind(I64),
+        rec_u_id.rex().opcodes(&MOV_IMM_SIGNEXTEND).rrr(0).w(),
+    );
+
+    // Finally, the MOV_IMM opcode takes an 8-byte immediate with a REX.W prefix.
+    e.enc64(iconst.bind(I64), rec_pu_iq.opcodes(&MOV_IMM).rex().w());
+
+    // Bool constants (uses MOV)
+    for &ty in &[B1, B8, B16, B32] {
+        e.enc_both(bconst.bind(ty), rec_pu_id_bool.opcodes(&MOV_IMM));
+    }
+    e.enc64(bconst.bind(B64), rec_pu_id_bool.opcodes(&MOV_IMM).rex());
+
+    let is_zero_int = InstructionPredicate::new_is_zero_int(&formats.unary_imm, "imm");
+    e.enc_both_instp(
+        iconst.bind(I8),
+        rec_u_id_z.opcodes(&XORB),
+        is_zero_int.clone(),
+    );
+
+    // You may expect that i16 encodings would have an 0x66 prefix on the opcode to indicate that
+    // encodings should be on 16-bit operands (f.ex, "xor %ax, %ax"). Cranelift currently does not
+    // know that it can drop the 0x66 prefix and clear the upper half of a 32-bit register in these
+    // scenarios, so we explicitly select a wider but permissible opcode.
+    //
+    // This effectively formalizes the i16->i32 widening that Cranelift performs when there isn't
+    // an appropriate i16 encoding available.
+    e.enc_both_instp(
+        iconst.bind(I16),
+        rec_u_id_z.opcodes(&XOR),
+        is_zero_int.clone(),
+    );
+    e.enc_both_instp(
+        iconst.bind(I32),
+        rec_u_id_z.opcodes(&XOR),
+        is_zero_int.clone(),
+    );
+    e.enc_x86_64_instp(iconst.bind(I64), rec_u_id_z.opcodes(&XOR), is_zero_int);
+
+    // Numerical conversions.
+
+    // Reducing an integer is a no-op.
+    e.enc32_rec(ireduce.bind(I8).bind(I16), rec_null, 0);
+    e.enc32_rec(ireduce.bind(I8).bind(I32), rec_null, 0);
+    e.enc32_rec(ireduce.bind(I16).bind(I32), rec_null, 0);
+
+    e.enc64_rec(ireduce.bind(I8).bind(I16), rec_null, 0);
+    e.enc64_rec(ireduce.bind(I8).bind(I32), rec_null, 0);
+    e.enc64_rec(ireduce.bind(I16).bind(I32), rec_null, 0);
+    e.enc64_rec(ireduce.bind(I8).bind(I64), rec_null, 0);
+    e.enc64_rec(ireduce.bind(I16).bind(I64), rec_null, 0);
+    e.enc64_rec(ireduce.bind(I32).bind(I64), rec_null, 0);
+
+    // TODO: Add encodings for cbw, cwde, cdqe, which are sign-extending
+    // instructions for %al/%ax/%eax to %ax/%eax/%rax.
+
+    // movsbl
+    e.enc32(
+        sextend.bind(I32).bind(I8),
+        rec_urm_noflags_abcd.opcodes(&MOVSX_BYTE),
+    );
+    e.enc64(
+        sextend.bind(I32).bind(I8),
+        rec_urm_noflags.opcodes(&MOVSX_BYTE).rex(),
+    );
+    e.enc64(
+        sextend.bind(I32).bind(I8),
+        rec_urm_noflags_abcd.opcodes(&MOVSX_BYTE),
+    );
+
+    // movswl
+    e.enc32(
+        sextend.bind(I32).bind(I16),
+        rec_urm_noflags.opcodes(&MOVSX_WORD),
+    );
+    e.enc64(
+        sextend.bind(I32).bind(I16),
+        rec_urm_noflags.opcodes(&MOVSX_WORD).rex(),
+    );
+    e.enc64(
+        sextend.bind(I32).bind(I16),
+        rec_urm_noflags.opcodes(&MOVSX_WORD),
+    );
+
+    // movsbq
+    e.enc64(
+        sextend.bind(I64).bind(I8),
+        rec_urm_noflags.opcodes(&MOVSX_BYTE).rex().w(),
+    );
+
+    // movswq
+    e.enc64(
+        sextend.bind(I64).bind(I16),
+        rec_urm_noflags.opcodes(&MOVSX_WORD).rex().w(),
+    );
+
+    // movslq
+    e.enc64(
+        sextend.bind(I64).bind(I32),
+        rec_urm_noflags.opcodes(&MOVSXD).rex().w(),
+    );
+
+    // movzbl
+    e.enc32(
+        uextend.bind(I32).bind(I8),
+        rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE),
+    );
+    e.enc64(
+        uextend.bind(I32).bind(I8),
+        rec_urm_noflags.opcodes(&MOVZX_BYTE).rex(),
+    );
+    e.enc64(
+        uextend.bind(I32).bind(I8),
+        rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE),
+    );
+
+    // movzwl
+    e.enc32(
+        uextend.bind(I32).bind(I16),
+        rec_urm_noflags.opcodes(&MOVZX_WORD),
+    );
+    e.enc64(
+        uextend.bind(I32).bind(I16),
+        rec_urm_noflags.opcodes(&MOVZX_WORD).rex(),
+    );
+    e.enc64(
+        uextend.bind(I32).bind(I16),
+        rec_urm_noflags.opcodes(&MOVZX_WORD),
+    );
+
+    // movzbq, encoded as movzbl because it's equivalent and shorter.
+    e.enc64(
+        uextend.bind(I64).bind(I8),
+        rec_urm_noflags.opcodes(&MOVZX_BYTE).rex(),
+    );
+    e.enc64(
+        uextend.bind(I64).bind(I8),
+        rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE),
+    );
+
+    // movzwq, encoded as movzwl because it's equivalent and shorter
+    e.enc64(
+        uextend.bind(I64).bind(I16),
+        rec_urm_noflags.opcodes(&MOVZX_WORD).rex(),
+    );
+    e.enc64(
+        uextend.bind(I64).bind(I16),
+        rec_urm_noflags.opcodes(&MOVZX_WORD),
+    );
+
+    // A 32-bit register copy clears the high 32 bits.
+    e.enc64(
+        uextend.bind(I64).bind(I32),
+        rec_umr.opcodes(&MOV_STORE).rex(),
+    );
+    e.enc64(uextend.bind(I64).bind(I32), rec_umr.opcodes(&MOV_STORE));
+
+    // Convert bool to int.
+    //
+    // This assumes that b1 is represented as an 8-bit low register with the value 0
+    // or 1.
+    //
+    // Encode movzbq as movzbl, because it's equivalent and shorter.
+    for &to in &[I8, I16, I32, I64] {
+        for &from in &[B1, B8] {
+            e.enc64(
+                bint.bind(to).bind(from),
+                rec_urm_noflags.opcodes(&MOVZX_BYTE).rex(),
+            );
+            e.enc64(
+                bint.bind(to).bind(from),
+                rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE),
+            );
+            if to != I64 {
+                e.enc32(
+                    bint.bind(to).bind(from),
+                    rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE),
+                );
+            }
+        }
+    }
+    for (to, from) in &[(I16, B16), (I32, B32), (I64, B64)] {
+        e.enc_both(
+            bint.bind(*to).bind(*from),
+            rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE),
+        );
+    }
+
+    // Copy Special
+    // For x86-64, only define REX forms for now, since we can't describe the
+    // special regunit immediate operands with the current constraint language.
+    e.enc64(copy_special, rec_copysp.opcodes(&MOV_STORE).rex().w());
+    e.enc32(copy_special, rec_copysp.opcodes(&MOV_STORE));
+
+    // Copy to SSA.  These have to be done with special _rex_only encoders, because the standard
+    // machinery for deciding whether a REX.{RXB} prefix is needed doesn't take into account
+    // the source register, which is specified directly in the instruction.
+    e.enc_i32_i64_rex_only(copy_to_ssa, rec_umr_reg_to_ssa.opcodes(&MOV_STORE));
+    e.enc_r32_r64_rex_only(copy_to_ssa, rec_umr_reg_to_ssa.opcodes(&MOV_STORE));
+    e.enc_both_rex_only(copy_to_ssa.bind(B1), rec_umr_reg_to_ssa.opcodes(&MOV_STORE));
+    e.enc_both_rex_only(copy_to_ssa.bind(I8), rec_umr_reg_to_ssa.opcodes(&MOV_STORE));
+    e.enc_both_rex_only(
+        copy_to_ssa.bind(I16),
+        rec_umr_reg_to_ssa.opcodes(&MOV_STORE),
+    );
+    e.enc_both_rex_only(
+        copy_to_ssa.bind(F64),
+        rec_furm_reg_to_ssa.opcodes(&MOVSD_LOAD),
+    );
+    e.enc_both_rex_only(
+        copy_to_ssa.bind(F32),
+        rec_furm_reg_to_ssa.opcodes(&MOVSS_LOAD),
+    );
+
+    e.enc_32_64_rec(dummy_sarg_t, rec_dummy_sarg_t, 0);
+}
+
+#[inline(never)]
+fn define_memory(
+    e: &mut PerCpuModeEncodings,
+    shared_defs: &SharedDefinitions,
+    x86: &InstructionGroup,
+    r: &RecipeGroup,
+) {
+    let shared = &shared_defs.instructions;
+    let formats = &shared_defs.formats;
+
+    // Shorthands for instructions.
+    let adjust_sp_down = shared.by_name("adjust_sp_down");
+    let adjust_sp_down_imm = shared.by_name("adjust_sp_down_imm");
+    let adjust_sp_up_imm = shared.by_name("adjust_sp_up_imm");
+    let copy_nop = shared.by_name("copy_nop");
+    let fill = shared.by_name("fill");
+    let fill_nop = shared.by_name("fill_nop");
+    let istore16 = shared.by_name("istore16");
+    let istore16_complex = shared.by_name("istore16_complex");
+    let istore32 = shared.by_name("istore32");
+    let istore32_complex = shared.by_name("istore32_complex");
+    let istore8 = shared.by_name("istore8");
+    let istore8_complex = shared.by_name("istore8_complex");
+    let load = shared.by_name("load");
+    let load_complex = shared.by_name("load_complex");
+    let regfill = shared.by_name("regfill");
+    let regspill = shared.by_name("regspill");
+    let sload16 = shared.by_name("sload16");
+    let sload16_complex = shared.by_name("sload16_complex");
+    let sload32 = shared.by_name("sload32");
+    let sload32_complex = shared.by_name("sload32_complex");
+    let sload8 = shared.by_name("sload8");
+    let sload8_complex = shared.by_name("sload8_complex");
+    let spill = shared.by_name("spill");
+    let store = shared.by_name("store");
+    let store_complex = shared.by_name("store_complex");
+    let uload16 = shared.by_name("uload16");
+    let uload16_complex = shared.by_name("uload16_complex");
+    let uload32 = shared.by_name("uload32");
+    let uload32_complex = shared.by_name("uload32_complex");
+    let uload8 = shared.by_name("uload8");
+    let uload8_complex = shared.by_name("uload8_complex");
+    let x86_pop = x86.by_name("x86_pop");
+    let x86_push = x86.by_name("x86_push");
+
+    // Shorthands for recipes.
+    let rec_adjustsp = r.template("adjustsp");
+    let rec_adjustsp_ib = r.template("adjustsp_ib");
+    let rec_adjustsp_id = r.template("adjustsp_id");
+    let rec_ffillnull = r.recipe("ffillnull");
+    let rec_fillnull = r.recipe("fillnull");
+    let rec_fillSib32 = r.template("fillSib32");
+    let rec_ld = r.template("ld");
+    let rec_ldDisp32 = r.template("ldDisp32");
+    let rec_ldDisp8 = r.template("ldDisp8");
+    let rec_ldWithIndex = r.template("ldWithIndex");
+    let rec_ldWithIndexDisp32 = r.template("ldWithIndexDisp32");
+    let rec_ldWithIndexDisp8 = r.template("ldWithIndexDisp8");
+    let rec_popq = r.template("popq");
+    let rec_pushq = r.template("pushq");
+    let rec_regfill32 = r.template("regfill32");
+    let rec_regspill32 = r.template("regspill32");
+    let rec_spillSib32 = r.template("spillSib32");
+    let rec_st = r.template("st");
+    let rec_stacknull = r.recipe("stacknull");
+    let rec_stDisp32 = r.template("stDisp32");
+    let rec_stDisp32_abcd = r.template("stDisp32_abcd");
+    let rec_stDisp8 = r.template("stDisp8");
+    let rec_stDisp8_abcd = r.template("stDisp8_abcd");
+    let rec_stWithIndex = r.template("stWithIndex");
+    let rec_stWithIndexDisp32 = r.template("stWithIndexDisp32");
+    let rec_stWithIndexDisp32_abcd = r.template("stWithIndexDisp32_abcd");
+    let rec_stWithIndexDisp8 = r.template("stWithIndexDisp8");
+    let rec_stWithIndexDisp8_abcd = r.template("stWithIndexDisp8_abcd");
+    let rec_stWithIndex_abcd = r.template("stWithIndex_abcd");
+    let rec_st_abcd = r.template("st_abcd");
+
+    // Loads and stores.
+    let is_load_complex_length_two =
+        InstructionPredicate::new_length_equals(&*formats.load_complex, 2);
+
+    for recipe in &[rec_ldWithIndex, rec_ldWithIndexDisp8, rec_ldWithIndexDisp32] {
+        e.enc_i32_i64_instp(
+            load_complex,
+            recipe.opcodes(&MOV_LOAD),
+            is_load_complex_length_two.clone(),
+        );
+        e.enc_r32_r64_instp(
+            load_complex,
+            recipe.opcodes(&MOV_LOAD),
+            is_load_complex_length_two.clone(),
+        );
+        e.enc_x86_64_instp(
+            uload32_complex,
+            recipe.opcodes(&MOV_LOAD),
+            is_load_complex_length_two.clone(),
+        );
+
+        e.enc64_instp(
+            sload32_complex,
+            recipe.opcodes(&MOVSXD).rex().w(),
+            is_load_complex_length_two.clone(),
+        );
+
+        e.enc_i32_i64_instp(
+            uload16_complex,
+            recipe.opcodes(&MOVZX_WORD),
+            is_load_complex_length_two.clone(),
+        );
+        e.enc_i32_i64_instp(
+            sload16_complex,
+            recipe.opcodes(&MOVSX_WORD),
+            is_load_complex_length_two.clone(),
+        );
+
+        e.enc_i32_i64_instp(
+            uload8_complex,
+            recipe.opcodes(&MOVZX_BYTE),
+            is_load_complex_length_two.clone(),
+        );
+
+        e.enc_i32_i64_instp(
+            sload8_complex,
+            recipe.opcodes(&MOVSX_BYTE),
+            is_load_complex_length_two.clone(),
+        );
+    }
+
+    let is_store_complex_length_three =
+        InstructionPredicate::new_length_equals(&*formats.store_complex, 3);
+
+    for recipe in &[rec_stWithIndex, rec_stWithIndexDisp8, rec_stWithIndexDisp32] {
+        e.enc_i32_i64_instp(
+            store_complex,
+            recipe.opcodes(&MOV_STORE),
+            is_store_complex_length_three.clone(),
+        );
+        e.enc_r32_r64_instp(
+            store_complex,
+            recipe.opcodes(&MOV_STORE),
+            is_store_complex_length_three.clone(),
+        );
+        e.enc_x86_64_instp(
+            istore32_complex,
+            recipe.opcodes(&MOV_STORE),
+            is_store_complex_length_three.clone(),
+        );
+        e.enc_both_instp(
+            istore16_complex.bind(I32),
+            recipe.opcodes(&MOV_STORE_16),
+            is_store_complex_length_three.clone(),
+        );
+        e.enc_x86_64_instp(
+            istore16_complex.bind(I64),
+            recipe.opcodes(&MOV_STORE_16),
+            is_store_complex_length_three.clone(),
+        );
+    }
+
+    for recipe in &[
+        rec_stWithIndex_abcd,
+        rec_stWithIndexDisp8_abcd,
+        rec_stWithIndexDisp32_abcd,
+    ] {
+        e.enc_both_instp(
+            istore8_complex.bind(I32),
+            recipe.opcodes(&MOV_BYTE_STORE),
+            is_store_complex_length_three.clone(),
+        );
+        e.enc_x86_64_instp(
+            istore8_complex.bind(I64),
+            recipe.opcodes(&MOV_BYTE_STORE),
+            is_store_complex_length_three.clone(),
+        );
+    }
+
+    for recipe in &[rec_st, rec_stDisp8, rec_stDisp32] {
+        e.enc_i32_i64_ld_st(store, true, recipe.opcodes(&MOV_STORE));
+        e.enc_r32_r64_ld_st(store, true, recipe.opcodes(&MOV_STORE));
+        e.enc_x86_64(istore32.bind(I64).bind(Any), recipe.opcodes(&MOV_STORE));
+        e.enc_i32_i64_ld_st(istore16, false, recipe.opcodes(&MOV_STORE_16));
+    }
+
+    // Byte stores are more complicated because the registers they can address
+    // depends of the presence of a REX prefix. The st*_abcd recipes fall back to
+    // the corresponding st* recipes when a REX prefix is applied.
+
+    for recipe in &[rec_st_abcd, rec_stDisp8_abcd, rec_stDisp32_abcd] {
+        e.enc_both(istore8.bind(I32).bind(Any), recipe.opcodes(&MOV_BYTE_STORE));
+        e.enc_x86_64(istore8.bind(I64).bind(Any), recipe.opcodes(&MOV_BYTE_STORE));
+    }
+
+    e.enc_i32_i64_explicit_rex(spill, rec_spillSib32.opcodes(&MOV_STORE));
+    e.enc_i32_i64_explicit_rex(regspill, rec_regspill32.opcodes(&MOV_STORE));
+    e.enc_r32_r64_rex_only(spill, rec_spillSib32.opcodes(&MOV_STORE));
+    e.enc_r32_r64_rex_only(regspill, rec_regspill32.opcodes(&MOV_STORE));
+
+    // Use a 32-bit write for spilling `b1`, `i8` and `i16` to avoid
+    // constraining the permitted registers.
+    // See MIN_SPILL_SLOT_SIZE which makes this safe.
+
+    e.enc_both(spill.bind(B1), rec_spillSib32.opcodes(&MOV_STORE));
+    e.enc_both(regspill.bind(B1), rec_regspill32.opcodes(&MOV_STORE));
+    for &ty in &[I8, I16] {
+        e.enc_both(spill.bind(ty), rec_spillSib32.opcodes(&MOV_STORE));
+        e.enc_both(regspill.bind(ty), rec_regspill32.opcodes(&MOV_STORE));
+    }
+
+    for recipe in &[rec_ld, rec_ldDisp8, rec_ldDisp32] {
+        e.enc_i32_i64_ld_st(load, true, recipe.opcodes(&MOV_LOAD));
+        e.enc_r32_r64_ld_st(load, true, recipe.opcodes(&MOV_LOAD));
+        e.enc_x86_64(uload32.bind(I64), recipe.opcodes(&MOV_LOAD));
+        e.enc64(sload32.bind(I64), recipe.opcodes(&MOVSXD).rex().w());
+        e.enc_i32_i64_ld_st(uload16, true, recipe.opcodes(&MOVZX_WORD));
+        e.enc_i32_i64_ld_st(sload16, true, recipe.opcodes(&MOVSX_WORD));
+        e.enc_i32_i64_ld_st(uload8, true, recipe.opcodes(&MOVZX_BYTE));
+        e.enc_i32_i64_ld_st(sload8, true, recipe.opcodes(&MOVSX_BYTE));
+    }
+
+    e.enc_i32_i64_explicit_rex(fill, rec_fillSib32.opcodes(&MOV_LOAD));
+    e.enc_i32_i64_explicit_rex(regfill, rec_regfill32.opcodes(&MOV_LOAD));
+    e.enc_r32_r64_rex_only(fill, rec_fillSib32.opcodes(&MOV_LOAD));
+    e.enc_r32_r64_rex_only(regfill, rec_regfill32.opcodes(&MOV_LOAD));
+
+    // No-op fills, created by late-stage redundant-fill removal.
+    for &ty in &[I64, I32, I16, I8] {
+        e.enc64_rec(fill_nop.bind(ty), rec_fillnull, 0);
+        e.enc32_rec(fill_nop.bind(ty), rec_fillnull, 0);
+    }
+    e.enc64_rec(fill_nop.bind(B1), rec_fillnull, 0);
+    e.enc32_rec(fill_nop.bind(B1), rec_fillnull, 0);
+    for &ty in &[F64, F32] {
+        e.enc64_rec(fill_nop.bind(ty), rec_ffillnull, 0);
+        e.enc32_rec(fill_nop.bind(ty), rec_ffillnull, 0);
+    }
+    for &ty in &[R64, R32] {
+        e.enc64_rec(fill_nop.bind(ty), rec_fillnull, 0);
+        e.enc32_rec(fill_nop.bind(ty), rec_fillnull, 0);
+    }
+
+    // Load 32 bits from `b1`, `i8` and `i16` spill slots. See `spill.b1` above.
+
+    e.enc_both(fill.bind(B1), rec_fillSib32.opcodes(&MOV_LOAD));
+    e.enc_both(regfill.bind(B1), rec_regfill32.opcodes(&MOV_LOAD));
+    for &ty in &[I8, I16] {
+        e.enc_both(fill.bind(ty), rec_fillSib32.opcodes(&MOV_LOAD));
+        e.enc_both(regfill.bind(ty), rec_regfill32.opcodes(&MOV_LOAD));
+    }
+
+    // Push and Pop.
+    e.enc32(x86_push.bind(I32), rec_pushq.opcodes(&PUSH_REG));
+    e.enc_x86_64(x86_push.bind(I64), rec_pushq.opcodes(&PUSH_REG));
+
+    e.enc32(x86_pop.bind(I32), rec_popq.opcodes(&POP_REG));
+    e.enc_x86_64(x86_pop.bind(I64), rec_popq.opcodes(&POP_REG));
+
+    // Stack-slot-to-the-same-stack-slot copy, which is guaranteed to turn
+    // into a no-op.
+    // The same encoding is generated for both the 64- and 32-bit architectures.
+    for &ty in &[I64, I32, I16, I8] {
+        e.enc64_rec(copy_nop.bind(ty), rec_stacknull, 0);
+        e.enc32_rec(copy_nop.bind(ty), rec_stacknull, 0);
+    }
+    for &ty in &[F64, F32] {
+        e.enc64_rec(copy_nop.bind(ty), rec_stacknull, 0);
+        e.enc32_rec(copy_nop.bind(ty), rec_stacknull, 0);
+    }
+
+    // Adjust SP down by a dynamic value (or up, with a negative operand).
+    e.enc32(adjust_sp_down.bind(I32), rec_adjustsp.opcodes(&SUB));
+    e.enc64(
+        adjust_sp_down.bind(I64),
+        rec_adjustsp.opcodes(&SUB).rex().w(),
+    );
+
+    // Adjust SP up by an immediate (or down, with a negative immediate).
+    e.enc32(adjust_sp_up_imm, rec_adjustsp_ib.opcodes(&CMP_IMM8));
+    e.enc32(adjust_sp_up_imm, rec_adjustsp_id.opcodes(&CMP_IMM));
+    e.enc64(
+        adjust_sp_up_imm,
+        rec_adjustsp_ib.opcodes(&CMP_IMM8).rex().w(),
+    );
+    e.enc64(
+        adjust_sp_up_imm,
+        rec_adjustsp_id.opcodes(&CMP_IMM).rex().w(),
+    );
+
+    // Adjust SP down by an immediate (or up, with a negative immediate).
+    e.enc32(
+        adjust_sp_down_imm,
+        rec_adjustsp_ib.opcodes(&CMP_IMM8).rrr(5),
+    );
+    e.enc32(adjust_sp_down_imm, rec_adjustsp_id.opcodes(&CMP_IMM).rrr(5));
+    e.enc64(
+        adjust_sp_down_imm,
+        rec_adjustsp_ib.opcodes(&CMP_IMM8).rrr(5).rex().w(),
+    );
+    e.enc64(
+        adjust_sp_down_imm,
+        rec_adjustsp_id.opcodes(&CMP_IMM).rrr(5).rex().w(),
+    );
+}
+
+#[inline(never)]
+fn define_fpu_moves(e: &mut PerCpuModeEncodings, shared_defs: &SharedDefinitions, r: &RecipeGroup) {
+    let shared = &shared_defs.instructions;
+
+    // Shorthands for instructions.
+    let bitcast = shared.by_name("bitcast");
+    let copy = shared.by_name("copy");
+    let regmove = shared.by_name("regmove");
+
+    // Shorthands for recipes.
+    let rec_frmov = r.template("frmov");
+    let rec_frurm = r.template("frurm");
+    let rec_furm = r.template("furm");
+    let rec_rfumr = r.template("rfumr");
+
+    // Floating-point moves.
+    // movd
+    e.enc_both(
+        bitcast.bind(F32).bind(I32),
+        rec_frurm.opcodes(&MOVD_LOAD_XMM),
+    );
+    e.enc_both(
+        bitcast.bind(I32).bind(F32),
+        rec_rfumr.opcodes(&MOVD_STORE_XMM),
+    );
+
+    // movq
+    e.enc64(
+        bitcast.bind(F64).bind(I64),
+        rec_frurm.opcodes(&MOVD_LOAD_XMM).rex().w(),
+    );
+    e.enc64(
+        bitcast.bind(I64).bind(F64),
+        rec_rfumr.opcodes(&MOVD_STORE_XMM).rex().w(),
+    );
+
+    // movaps
+    e.enc_both(copy.bind(F32), rec_furm.opcodes(&MOVAPS_LOAD));
+    e.enc_both(copy.bind(F64), rec_furm.opcodes(&MOVAPS_LOAD));
+
+    // TODO For x86-64, only define REX forms for now, since we can't describe the special regunit
+    // immediate operands with the current constraint language.
+    e.enc32(regmove.bind(F32), rec_frmov.opcodes(&MOVAPS_LOAD));
+    e.enc64(regmove.bind(F32), rec_frmov.opcodes(&MOVAPS_LOAD).rex());
+
+    // TODO For x86-64, only define REX forms for now, since we can't describe the special regunit
+    // immediate operands with the current constraint language.
+    e.enc32(regmove.bind(F64), rec_frmov.opcodes(&MOVAPS_LOAD));
+    e.enc64(regmove.bind(F64), rec_frmov.opcodes(&MOVAPS_LOAD).rex());
+}
+
+#[inline(never)]
+fn define_fpu_memory(
+    e: &mut PerCpuModeEncodings,
+    shared_defs: &SharedDefinitions,
+    r: &RecipeGroup,
+) {
+    let shared = &shared_defs.instructions;
+
+    // Shorthands for instructions.
+    let fill = shared.by_name("fill");
+    let load = shared.by_name("load");
+    let load_complex = shared.by_name("load_complex");
+    let regfill = shared.by_name("regfill");
+    let regspill = shared.by_name("regspill");
+    let spill = shared.by_name("spill");
+    let store = shared.by_name("store");
+    let store_complex = shared.by_name("store_complex");
+
+    // Shorthands for recipes.
+    let rec_ffillSib32 = r.template("ffillSib32");
+    let rec_fld = r.template("fld");
+    let rec_fldDisp32 = r.template("fldDisp32");
+    let rec_fldDisp8 = r.template("fldDisp8");
+    let rec_fldWithIndex = r.template("fldWithIndex");
+    let rec_fldWithIndexDisp32 = r.template("fldWithIndexDisp32");
+    let rec_fldWithIndexDisp8 = r.template("fldWithIndexDisp8");
+    let rec_fregfill32 = r.template("fregfill32");
+    let rec_fregspill32 = r.template("fregspill32");
+    let rec_fspillSib32 = r.template("fspillSib32");
+    let rec_fst = r.template("fst");
+    let rec_fstDisp32 = r.template("fstDisp32");
+    let rec_fstDisp8 = r.template("fstDisp8");
+    let rec_fstWithIndex = r.template("fstWithIndex");
+    let rec_fstWithIndexDisp32 = r.template("fstWithIndexDisp32");
+    let rec_fstWithIndexDisp8 = r.template("fstWithIndexDisp8");
+
+    // Float loads and stores.
+    e.enc_both(load.bind(F32).bind(Any), rec_fld.opcodes(&MOVSS_LOAD));
+    e.enc_both(load.bind(F32).bind(Any), rec_fldDisp8.opcodes(&MOVSS_LOAD));
+    e.enc_both(load.bind(F32).bind(Any), rec_fldDisp32.opcodes(&MOVSS_LOAD));
+
+    e.enc_both(
+        load_complex.bind(F32),
+        rec_fldWithIndex.opcodes(&MOVSS_LOAD),
+    );
+    e.enc_both(
+        load_complex.bind(F32),
+        rec_fldWithIndexDisp8.opcodes(&MOVSS_LOAD),
+    );
+    e.enc_both(
+        load_complex.bind(F32),
+        rec_fldWithIndexDisp32.opcodes(&MOVSS_LOAD),
+    );
+
+    e.enc_both(load.bind(F64).bind(Any), rec_fld.opcodes(&MOVSD_LOAD));
+    e.enc_both(load.bind(F64).bind(Any), rec_fldDisp8.opcodes(&MOVSD_LOAD));
+    e.enc_both(load.bind(F64).bind(Any), rec_fldDisp32.opcodes(&MOVSD_LOAD));
+
+    e.enc_both(
+        load_complex.bind(F64),
+        rec_fldWithIndex.opcodes(&MOVSD_LOAD),
+    );
+    e.enc_both(
+        load_complex.bind(F64),
+        rec_fldWithIndexDisp8.opcodes(&MOVSD_LOAD),
+    );
+    e.enc_both(
+        load_complex.bind(F64),
+        rec_fldWithIndexDisp32.opcodes(&MOVSD_LOAD),
+    );
+
+    e.enc_both(store.bind(F32).bind(Any), rec_fst.opcodes(&MOVSS_STORE));
+    e.enc_both(
+        store.bind(F32).bind(Any),
+        rec_fstDisp8.opcodes(&MOVSS_STORE),
+    );
+    e.enc_both(
+        store.bind(F32).bind(Any),
+        rec_fstDisp32.opcodes(&MOVSS_STORE),
+    );
+
+    e.enc_both(
+        store_complex.bind(F32),
+        rec_fstWithIndex.opcodes(&MOVSS_STORE),
+    );
+    e.enc_both(
+        store_complex.bind(F32),
+        rec_fstWithIndexDisp8.opcodes(&MOVSS_STORE),
+    );
+    e.enc_both(
+        store_complex.bind(F32),
+        rec_fstWithIndexDisp32.opcodes(&MOVSS_STORE),
+    );
+
+    e.enc_both(store.bind(F64).bind(Any), rec_fst.opcodes(&MOVSD_STORE));
+    e.enc_both(
+        store.bind(F64).bind(Any),
+        rec_fstDisp8.opcodes(&MOVSD_STORE),
+    );
+    e.enc_both(
+        store.bind(F64).bind(Any),
+        rec_fstDisp32.opcodes(&MOVSD_STORE),
+    );
+
+    e.enc_both(
+        store_complex.bind(F64),
+        rec_fstWithIndex.opcodes(&MOVSD_STORE),
+    );
+    e.enc_both(
+        store_complex.bind(F64),
+        rec_fstWithIndexDisp8.opcodes(&MOVSD_STORE),
+    );
+    e.enc_both(
+        store_complex.bind(F64),
+        rec_fstWithIndexDisp32.opcodes(&MOVSD_STORE),
+    );
+
+    e.enc_both(fill.bind(F32), rec_ffillSib32.opcodes(&MOVSS_LOAD));
+    e.enc_both(regfill.bind(F32), rec_fregfill32.opcodes(&MOVSS_LOAD));
+    e.enc_both(fill.bind(F64), rec_ffillSib32.opcodes(&MOVSD_LOAD));
+    e.enc_both(regfill.bind(F64), rec_fregfill32.opcodes(&MOVSD_LOAD));
+
+    e.enc_both(spill.bind(F32), rec_fspillSib32.opcodes(&MOVSS_STORE));
+    e.enc_both(regspill.bind(F32), rec_fregspill32.opcodes(&MOVSS_STORE));
+    e.enc_both(spill.bind(F64), rec_fspillSib32.opcodes(&MOVSD_STORE));
+    e.enc_both(regspill.bind(F64), rec_fregspill32.opcodes(&MOVSD_STORE));
+}
+
+#[inline(never)]
+fn define_fpu_ops(
+    e: &mut PerCpuModeEncodings,
+    shared_defs: &SharedDefinitions,
+    settings: &SettingGroup,
+    x86: &InstructionGroup,
+    r: &RecipeGroup,
+) {
+    let shared = &shared_defs.instructions;
+    let formats = &shared_defs.formats;
+
+    // Shorthands for instructions.
+    let ceil = shared.by_name("ceil");
+    let f32const = shared.by_name("f32const");
+    let f64const = shared.by_name("f64const");
+    let fadd = shared.by_name("fadd");
+    let fcmp = shared.by_name("fcmp");
+    let fcvt_from_sint = shared.by_name("fcvt_from_sint");
+    let fdemote = shared.by_name("fdemote");
+    let fdiv = shared.by_name("fdiv");
+    let ffcmp = shared.by_name("ffcmp");
+    let floor = shared.by_name("floor");
+    let fmul = shared.by_name("fmul");
+    let fpromote = shared.by_name("fpromote");
+    let fsub = shared.by_name("fsub");
+    let nearest = shared.by_name("nearest");
+    let sqrt = shared.by_name("sqrt");
+    let trunc = shared.by_name("trunc");
+    let x86_cvtt2si = x86.by_name("x86_cvtt2si");
+    let x86_fmax = x86.by_name("x86_fmax");
+    let x86_fmin = x86.by_name("x86_fmin");
+
+    // Shorthands for recipes.
+    let rec_f32imm_z = r.template("f32imm_z");
+    let rec_f64imm_z = r.template("f64imm_z");
+    let rec_fa = r.template("fa");
+    let rec_fcmp = r.template("fcmp");
+    let rec_fcscc = r.template("fcscc");
+    let rec_frurm = r.template("frurm");
+    let rec_furm = r.template("furm");
+    let rec_furmi_rnd = r.template("furmi_rnd");
+    let rec_rfurm = r.template("rfurm");
+
+    // Predicates shorthands.
+    let use_sse41 = settings.predicate_by_name("use_sse41");
+
+    // Floating-point constants equal to 0.0 can be encoded using either `xorps` or `xorpd`, for
+    // 32-bit and 64-bit floats respectively.
+    let is_zero_32_bit_float =
+        InstructionPredicate::new_is_zero_32bit_float(&*formats.unary_ieee32, "imm");
+    e.enc32_instp(
+        f32const,
+        rec_f32imm_z.opcodes(&XORPS),
+        is_zero_32_bit_float.clone(),
+    );
+
+    let is_zero_64_bit_float =
+        InstructionPredicate::new_is_zero_64bit_float(&*formats.unary_ieee64, "imm");
+    e.enc32_instp(
+        f64const,
+        rec_f64imm_z.opcodes(&XORPD),
+        is_zero_64_bit_float.clone(),
+    );
+
+    e.enc_x86_64_instp(f32const, rec_f32imm_z.opcodes(&XORPS), is_zero_32_bit_float);
+    e.enc_x86_64_instp(f64const, rec_f64imm_z.opcodes(&XORPD), is_zero_64_bit_float);
+
+    // cvtsi2ss
+    e.enc_i32_i64(fcvt_from_sint.bind(F32), rec_frurm.opcodes(&CVTSI2SS));
+
+    // cvtsi2sd
+    e.enc_i32_i64(fcvt_from_sint.bind(F64), rec_frurm.opcodes(&CVTSI2SD));
+
+    // cvtss2sd
+    e.enc_both(fpromote.bind(F64).bind(F32), rec_furm.opcodes(&CVTSS2SD));
+
+    // cvtsd2ss
+    e.enc_both(fdemote.bind(F32).bind(F64), rec_furm.opcodes(&CVTSD2SS));
+
+    // cvttss2si
+    e.enc_both(
+        x86_cvtt2si.bind(I32).bind(F32),
+        rec_rfurm.opcodes(&CVTTSS2SI),
+    );
+    e.enc64(
+        x86_cvtt2si.bind(I64).bind(F32),
+        rec_rfurm.opcodes(&CVTTSS2SI).rex().w(),
+    );
+
+    // cvttsd2si
+    e.enc_both(
+        x86_cvtt2si.bind(I32).bind(F64),
+        rec_rfurm.opcodes(&CVTTSD2SI),
+    );
+    e.enc64(
+        x86_cvtt2si.bind(I64).bind(F64),
+        rec_rfurm.opcodes(&CVTTSD2SI).rex().w(),
+    );
+
+    // Exact square roots.
+    e.enc_both(sqrt.bind(F32), rec_furm.opcodes(&SQRTSS));
+    e.enc_both(sqrt.bind(F64), rec_furm.opcodes(&SQRTSD));
+
+    // Rounding. The recipe looks at the opcode to pick an immediate.
+    for inst in &[nearest, floor, ceil, trunc] {
+        e.enc_both_isap(inst.bind(F32), rec_furmi_rnd.opcodes(&ROUNDSS), use_sse41);
+        e.enc_both_isap(inst.bind(F64), rec_furmi_rnd.opcodes(&ROUNDSD), use_sse41);
+    }
+
+    // Binary arithmetic ops.
+    e.enc_both(fadd.bind(F32), rec_fa.opcodes(&ADDSS));
+    e.enc_both(fadd.bind(F64), rec_fa.opcodes(&ADDSD));
+
+    e.enc_both(fsub.bind(F32), rec_fa.opcodes(&SUBSS));
+    e.enc_both(fsub.bind(F64), rec_fa.opcodes(&SUBSD));
+
+    e.enc_both(fmul.bind(F32), rec_fa.opcodes(&MULSS));
+    e.enc_both(fmul.bind(F64), rec_fa.opcodes(&MULSD));
+
+    e.enc_both(fdiv.bind(F32), rec_fa.opcodes(&DIVSS));
+    e.enc_both(fdiv.bind(F64), rec_fa.opcodes(&DIVSD));
+
+    e.enc_both(x86_fmin.bind(F32), rec_fa.opcodes(&MINSS));
+    e.enc_both(x86_fmin.bind(F64), rec_fa.opcodes(&MINSD));
+
+    e.enc_both(x86_fmax.bind(F32), rec_fa.opcodes(&MAXSS));
+    e.enc_both(x86_fmax.bind(F64), rec_fa.opcodes(&MAXSD));
+
+    // Comparisons.
+    //
+    // This only covers the condition codes in `supported_floatccs`, the rest are
+    // handled by legalization patterns.
+    e.enc_both(fcmp.bind(F32), rec_fcscc.opcodes(&UCOMISS));
+    e.enc_both(fcmp.bind(F64), rec_fcscc.opcodes(&UCOMISD));
+    e.enc_both(ffcmp.bind(F32), rec_fcmp.opcodes(&UCOMISS));
+    e.enc_both(ffcmp.bind(F64), rec_fcmp.opcodes(&UCOMISD));
+}
+
+#[inline(never)]
+fn define_alu(
+    e: &mut PerCpuModeEncodings,
+    shared_defs: &SharedDefinitions,
+    settings: &SettingGroup,
+    x86: &InstructionGroup,
+    r: &RecipeGroup,
+) {
+    let shared = &shared_defs.instructions;
+
+    // Shorthands for instructions.
+    let clz = shared.by_name("clz");
+    let ctz = shared.by_name("ctz");
+    let icmp = shared.by_name("icmp");
+    let icmp_imm = shared.by_name("icmp_imm");
+    let ifcmp = shared.by_name("ifcmp");
+    let ifcmp_imm = shared.by_name("ifcmp_imm");
+    let ifcmp_sp = shared.by_name("ifcmp_sp");
+    let ishl = shared.by_name("ishl");
+    let ishl_imm = shared.by_name("ishl_imm");
+    let popcnt = shared.by_name("popcnt");
+    let rotl = shared.by_name("rotl");
+    let rotl_imm = shared.by_name("rotl_imm");
+    let rotr = shared.by_name("rotr");
+    let rotr_imm = shared.by_name("rotr_imm");
+    let selectif = shared.by_name("selectif");
+    let selectif_spectre_guard = shared.by_name("selectif_spectre_guard");
+    let sshr = shared.by_name("sshr");
+    let sshr_imm = shared.by_name("sshr_imm");
+    let trueff = shared.by_name("trueff");
+    let trueif = shared.by_name("trueif");
+    let ushr = shared.by_name("ushr");
+    let ushr_imm = shared.by_name("ushr_imm");
+    let x86_bsf = x86.by_name("x86_bsf");
+    let x86_bsr = x86.by_name("x86_bsr");
+
+    // Shorthands for recipes.
+    let rec_bsf_and_bsr = r.template("bsf_and_bsr");
+    let rec_cmov = r.template("cmov");
+    let rec_icscc = r.template("icscc");
+    let rec_icscc_ib = r.template("icscc_ib");
+    let rec_icscc_id = r.template("icscc_id");
+    let rec_rcmp = r.template("rcmp");
+    let rec_rcmp_ib = r.template("rcmp_ib");
+    let rec_rcmp_id = r.template("rcmp_id");
+    let rec_rcmp_sp = r.template("rcmp_sp");
+    let rec_rc = r.template("rc");
+    let rec_setf_abcd = r.template("setf_abcd");
+    let rec_seti_abcd = r.template("seti_abcd");
+    let rec_urm = r.template("urm");
+
+    // Predicates shorthands.
+    let use_popcnt = settings.predicate_by_name("use_popcnt");
+    let use_lzcnt = settings.predicate_by_name("use_lzcnt");
+    let use_bmi1 = settings.predicate_by_name("use_bmi1");
+
+    let band = shared.by_name("band");
+    let band_imm = shared.by_name("band_imm");
+    let band_not = shared.by_name("band_not");
+    let bnot = shared.by_name("bnot");
+    let bor = shared.by_name("bor");
+    let bor_imm = shared.by_name("bor_imm");
+    let bxor = shared.by_name("bxor");
+    let bxor_imm = shared.by_name("bxor_imm");
+    let iadd = shared.by_name("iadd");
+    let iadd_ifcarry = shared.by_name("iadd_ifcarry");
+    let iadd_ifcin = shared.by_name("iadd_ifcin");
+    let iadd_ifcout = shared.by_name("iadd_ifcout");
+    let iadd_imm = shared.by_name("iadd_imm");
+    let imul = shared.by_name("imul");
+    let isub = shared.by_name("isub");
+    let isub_ifbin = shared.by_name("isub_ifbin");
+    let isub_ifborrow = shared.by_name("isub_ifborrow");
+    let isub_ifbout = shared.by_name("isub_ifbout");
+    let x86_sdivmodx = x86.by_name("x86_sdivmodx");
+    let x86_smulx = x86.by_name("x86_smulx");
+    let x86_udivmodx = x86.by_name("x86_udivmodx");
+    let x86_umulx = x86.by_name("x86_umulx");
+
+    let rec_div = r.template("div");
+    let rec_fa = r.template("fa");
+    let rec_fax = r.template("fax");
+    let rec_mulx = r.template("mulx");
+    let rec_r_ib = r.template("r_ib");
+    let rec_r_id = r.template("r_id");
+    let rec_rin = r.template("rin");
+    let rec_rio = r.template("rio");
+    let rec_rout = r.template("rout");
+    let rec_rr = r.template("rr");
+    let rec_rrx = r.template("rrx");
+    let rec_ur = r.template("ur");
+
+    e.enc_i32_i64(iadd, rec_rr.opcodes(&ADD));
+    e.enc_i32_i64(iadd_ifcout, rec_rout.opcodes(&ADD));
+    e.enc_i32_i64(iadd_ifcin, rec_rin.opcodes(&ADC));
+    e.enc_i32_i64(iadd_ifcarry, rec_rio.opcodes(&ADC));
+    e.enc_i32_i64(iadd_imm, rec_r_ib.opcodes(&ADD_IMM8_SIGN_EXTEND).rrr(0));
+    e.enc_i32_i64(iadd_imm, rec_r_id.opcodes(&ADD_IMM).rrr(0));
+
+    e.enc_i32_i64(isub, rec_rr.opcodes(&SUB));
+    e.enc_i32_i64(isub_ifbout, rec_rout.opcodes(&SUB));
+    e.enc_i32_i64(isub_ifbin, rec_rin.opcodes(&SBB));
+    e.enc_i32_i64(isub_ifborrow, rec_rio.opcodes(&SBB));
+
+    e.enc_i32_i64(band, rec_rr.opcodes(&AND));
+    e.enc_b32_b64(band, rec_rr.opcodes(&AND));
+
+    // TODO: band_imm.i64 with an unsigned 32-bit immediate can be encoded as band_imm.i32. Can
+    // even use the single-byte immediate for 0xffff_ffXX masks.
+
+    e.enc_i32_i64(band_imm, rec_r_ib.opcodes(&AND_IMM8_SIGN_EXTEND).rrr(4));
+    e.enc_i32_i64(band_imm, rec_r_id.opcodes(&AND_IMM).rrr(4));
+
+    e.enc_i32_i64(bor, rec_rr.opcodes(&OR));
+    e.enc_b32_b64(bor, rec_rr.opcodes(&OR));
+    e.enc_i32_i64(bor_imm, rec_r_ib.opcodes(&OR_IMM8_SIGN_EXTEND).rrr(1));
+    e.enc_i32_i64(bor_imm, rec_r_id.opcodes(&OR_IMM).rrr(1));
+
+    e.enc_i32_i64(bxor, rec_rr.opcodes(&XOR));
+    e.enc_b32_b64(bxor, rec_rr.opcodes(&XOR));
+    e.enc_i32_i64(bxor_imm, rec_r_ib.opcodes(&XOR_IMM8_SIGN_EXTEND).rrr(6));
+    e.enc_i32_i64(bxor_imm, rec_r_id.opcodes(&XOR_IMM).rrr(6));
+
+    // x86 has a bitwise not instruction NOT.
+    e.enc_i32_i64(bnot, rec_ur.opcodes(&NOT).rrr(2));
+    e.enc_b32_b64(bnot, rec_ur.opcodes(&NOT).rrr(2));
+    e.enc_both(bnot.bind(B1), rec_ur.opcodes(&NOT).rrr(2));
+
+    // Also add a `b1` encodings for the logic instructions.
+    // TODO: Should this be done with 8-bit instructions? It would improve partial register
+    // dependencies.
+    e.enc_both(band.bind(B1), rec_rr.opcodes(&AND));
+    e.enc_both(bor.bind(B1), rec_rr.opcodes(&OR));
+    e.enc_both(bxor.bind(B1), rec_rr.opcodes(&XOR));
+
+    e.enc_i32_i64(imul, rec_rrx.opcodes(&IMUL));
+    e.enc_i32_i64(x86_sdivmodx, rec_div.opcodes(&IDIV).rrr(7));
+    e.enc_i32_i64(x86_udivmodx, rec_div.opcodes(&DIV).rrr(6));
+
+    e.enc_i32_i64(x86_smulx, rec_mulx.opcodes(&IMUL_RDX_RAX).rrr(5));
+    e.enc_i32_i64(x86_umulx, rec_mulx.opcodes(&MUL).rrr(4));
+
+    // Binary bitwise ops.
+    //
+    // The F64 version is intentionally encoded using the single-precision opcode:
+    // the operation is identical and the encoding is one byte shorter.
+    e.enc_both(band.bind(F32), rec_fa.opcodes(&ANDPS));
+    e.enc_both(band.bind(F64), rec_fa.opcodes(&ANDPS));
+
+    e.enc_both(bor.bind(F32), rec_fa.opcodes(&ORPS));
+    e.enc_both(bor.bind(F64), rec_fa.opcodes(&ORPS));
+
+    e.enc_both(bxor.bind(F32), rec_fa.opcodes(&XORPS));
+    e.enc_both(bxor.bind(F64), rec_fa.opcodes(&XORPS));
+
+    // The `andnps(x,y)` instruction computes `~x&y`, while band_not(x,y)` is `x&~y.
+    e.enc_both(band_not.bind(F32), rec_fax.opcodes(&ANDNPS));
+    e.enc_both(band_not.bind(F64), rec_fax.opcodes(&ANDNPS));
+
+    // Shifts and rotates.
+    // Note that the dynamic shift amount is only masked by 5 or 6 bits; the 8-bit
+    // and 16-bit shifts would need explicit masking.
+
+    for &(inst, rrr) in &[(rotl, 0), (rotr, 1), (ishl, 4), (ushr, 5), (sshr, 7)] {
+        // Cannot use enc_i32_i64 for this pattern because instructions require
+        // to bind any.
+        e.enc32(inst.bind(I32).bind(I8), rec_rc.opcodes(&ROTATE_CL).rrr(rrr));
+        e.enc32(
+            inst.bind(I32).bind(I16),
+            rec_rc.opcodes(&ROTATE_CL).rrr(rrr),
+        );
+        e.enc32(
+            inst.bind(I32).bind(I32),
+            rec_rc.opcodes(&ROTATE_CL).rrr(rrr),
+        );
+        e.enc64(
+            inst.bind(I64).bind(Any),
+            rec_rc.opcodes(&ROTATE_CL).rrr(rrr).rex().w(),
+        );
+        e.enc64(
+            inst.bind(I32).bind(Any),
+            rec_rc.opcodes(&ROTATE_CL).rrr(rrr).rex(),
+        );
+        e.enc64(
+            inst.bind(I32).bind(Any),
+            rec_rc.opcodes(&ROTATE_CL).rrr(rrr),
+        );
+    }
+
+    e.enc_i32_i64(rotl_imm, rec_r_ib.opcodes(&ROTATE_IMM8).rrr(0));
+    e.enc_i32_i64(rotr_imm, rec_r_ib.opcodes(&ROTATE_IMM8).rrr(1));
+    e.enc_i32_i64(ishl_imm, rec_r_ib.opcodes(&ROTATE_IMM8).rrr(4));
+    e.enc_i32_i64(ushr_imm, rec_r_ib.opcodes(&ROTATE_IMM8).rrr(5));
+    e.enc_i32_i64(sshr_imm, rec_r_ib.opcodes(&ROTATE_IMM8).rrr(7));
+
+    // Population count.
+    e.enc32_isap(popcnt.bind(I32), rec_urm.opcodes(&POPCNT), use_popcnt);
+    e.enc64_isap(
+        popcnt.bind(I64),
+        rec_urm.opcodes(&POPCNT).rex().w(),
+        use_popcnt,
+    );
+    e.enc64_isap(popcnt.bind(I32), rec_urm.opcodes(&POPCNT).rex(), use_popcnt);
+    e.enc64_isap(popcnt.bind(I32), rec_urm.opcodes(&POPCNT), use_popcnt);
+
+    // Count leading zero bits.
+    e.enc32_isap(clz.bind(I32), rec_urm.opcodes(&LZCNT), use_lzcnt);
+    e.enc64_isap(clz.bind(I64), rec_urm.opcodes(&LZCNT).rex().w(), use_lzcnt);
+    e.enc64_isap(clz.bind(I32), rec_urm.opcodes(&LZCNT).rex(), use_lzcnt);
+    e.enc64_isap(clz.bind(I32), rec_urm.opcodes(&LZCNT), use_lzcnt);
+
+    // Count trailing zero bits.
+    e.enc32_isap(ctz.bind(I32), rec_urm.opcodes(&TZCNT), use_bmi1);
+    e.enc64_isap(ctz.bind(I64), rec_urm.opcodes(&TZCNT).rex().w(), use_bmi1);
+    e.enc64_isap(ctz.bind(I32), rec_urm.opcodes(&TZCNT).rex(), use_bmi1);
+    e.enc64_isap(ctz.bind(I32), rec_urm.opcodes(&TZCNT), use_bmi1);
+
+    // Bit scan forwards and reverse
+    e.enc_i32_i64(x86_bsf, rec_bsf_and_bsr.opcodes(&BIT_SCAN_FORWARD));
+    e.enc_i32_i64(x86_bsr, rec_bsf_and_bsr.opcodes(&BIT_SCAN_REVERSE));
+
+    // Comparisons
+    e.enc_i32_i64(icmp, rec_icscc.opcodes(&CMP_REG));
+    e.enc_i32_i64(icmp_imm, rec_icscc_ib.opcodes(&CMP_IMM8).rrr(7));
+    e.enc_i32_i64(icmp_imm, rec_icscc_id.opcodes(&CMP_IMM).rrr(7));
+    e.enc_i32_i64(ifcmp, rec_rcmp.opcodes(&CMP_REG));
+    e.enc_i32_i64(ifcmp_imm, rec_rcmp_ib.opcodes(&CMP_IMM8).rrr(7));
+    e.enc_i32_i64(ifcmp_imm, rec_rcmp_id.opcodes(&CMP_IMM).rrr(7));
+    // TODO: We could special-case ifcmp_imm(x, 0) to TEST(x, x).
+
+    e.enc32(ifcmp_sp.bind(I32), rec_rcmp_sp.opcodes(&CMP_REG));
+    e.enc64(ifcmp_sp.bind(I64), rec_rcmp_sp.opcodes(&CMP_REG).rex().w());
+
+    // Convert flags to bool.
+    // This encodes `b1` as an 8-bit low register with the value 0 or 1.
+    e.enc_both(trueif, rec_seti_abcd.opcodes(&SET_BYTE_IF_OVERFLOW));
+    e.enc_both(trueff, rec_setf_abcd.opcodes(&SET_BYTE_IF_OVERFLOW));
+
+    // Conditional move (a.k.a integer select).
+    e.enc_i32_i64(selectif, rec_cmov.opcodes(&CMOV_OVERFLOW));
+    // A Spectre-guard integer select is exactly the same as a selectif, but
+    // is not associated with any other legalization rules and is not
+    // recognized by any optimizations, so it must arrive here unmodified
+    // and in its original place.
+    e.enc_i32_i64(selectif_spectre_guard, rec_cmov.opcodes(&CMOV_OVERFLOW));
+}
+
+#[inline(never)]
+#[allow(clippy::cognitive_complexity)]
+fn define_simd(
+    e: &mut PerCpuModeEncodings,
+    shared_defs: &SharedDefinitions,
+    settings: &SettingGroup,
+    x86: &InstructionGroup,
+    r: &RecipeGroup,
+) {
+    let shared = &shared_defs.instructions;
+    let formats = &shared_defs.formats;
+
+    // Shorthands for instructions.
+    let avg_round = shared.by_name("avg_round");
+    let bitcast = shared.by_name("bitcast");
+    let bor = shared.by_name("bor");
+    let bxor = shared.by_name("bxor");
+    let copy = shared.by_name("copy");
+    let copy_nop = shared.by_name("copy_nop");
+    let copy_to_ssa = shared.by_name("copy_to_ssa");
+    let fadd = shared.by_name("fadd");
+    let fcmp = shared.by_name("fcmp");
+    let fcvt_from_sint = shared.by_name("fcvt_from_sint");
+    let fdiv = shared.by_name("fdiv");
+    let fill = shared.by_name("fill");
+    let fill_nop = shared.by_name("fill_nop");
+    let fmul = shared.by_name("fmul");
+    let fsub = shared.by_name("fsub");
+    let iabs = shared.by_name("iabs");
+    let iadd = shared.by_name("iadd");
+    let icmp = shared.by_name("icmp");
+    let imul = shared.by_name("imul");
+    let ishl_imm = shared.by_name("ishl_imm");
+    let load = shared.by_name("load");
+    let load_complex = shared.by_name("load_complex");
+    let raw_bitcast = shared.by_name("raw_bitcast");
+    let regfill = shared.by_name("regfill");
+    let regmove = shared.by_name("regmove");
+    let regspill = shared.by_name("regspill");
+    let sadd_sat = shared.by_name("sadd_sat");
+    let scalar_to_vector = shared.by_name("scalar_to_vector");
+    let sload8x8 = shared.by_name("sload8x8");
+    let sload8x8_complex = shared.by_name("sload8x8_complex");
+    let sload16x4 = shared.by_name("sload16x4");
+    let sload16x4_complex = shared.by_name("sload16x4_complex");
+    let sload32x2 = shared.by_name("sload32x2");
+    let sload32x2_complex = shared.by_name("sload32x2_complex");
+    let spill = shared.by_name("spill");
+    let sqrt = shared.by_name("sqrt");
+    let sshr_imm = shared.by_name("sshr_imm");
+    let ssub_sat = shared.by_name("ssub_sat");
+    let store = shared.by_name("store");
+    let store_complex = shared.by_name("store_complex");
+    let swiden_low = shared.by_name("swiden_low");
+    let uadd_sat = shared.by_name("uadd_sat");
+    let uload8x8 = shared.by_name("uload8x8");
+    let uload8x8_complex = shared.by_name("uload8x8_complex");
+    let uload16x4 = shared.by_name("uload16x4");
+    let uload16x4_complex = shared.by_name("uload16x4_complex");
+    let uload32x2 = shared.by_name("uload32x2");
+    let uload32x2_complex = shared.by_name("uload32x2_complex");
+    let snarrow = shared.by_name("snarrow");
+    let unarrow = shared.by_name("unarrow");
+    let uwiden_low = shared.by_name("uwiden_low");
+    let ushr_imm = shared.by_name("ushr_imm");
+    let usub_sat = shared.by_name("usub_sat");
+    let vconst = shared.by_name("vconst");
+    let vselect = shared.by_name("vselect");
+    let x86_cvtt2si = x86.by_name("x86_cvtt2si");
+    let x86_insertps = x86.by_name("x86_insertps");
+    let x86_fmax = x86.by_name("x86_fmax");
+    let x86_fmin = x86.by_name("x86_fmin");
+    let x86_movlhps = x86.by_name("x86_movlhps");
+    let x86_movsd = x86.by_name("x86_movsd");
+    let x86_pblendw = x86.by_name("x86_pblendw");
+    let x86_pextr = x86.by_name("x86_pextr");
+    let x86_pinsr = x86.by_name("x86_pinsr");
+    let x86_pmaxs = x86.by_name("x86_pmaxs");
+    let x86_pmaxu = x86.by_name("x86_pmaxu");
+    let x86_pmins = x86.by_name("x86_pmins");
+    let x86_pminu = x86.by_name("x86_pminu");
+    let x86_pmullq = x86.by_name("x86_pmullq");
+    let x86_pmuludq = x86.by_name("x86_pmuludq");
+    let x86_palignr = x86.by_name("x86_palignr");
+    let x86_pshufb = x86.by_name("x86_pshufb");
+    let x86_pshufd = x86.by_name("x86_pshufd");
+    let x86_psll = x86.by_name("x86_psll");
+    let x86_psra = x86.by_name("x86_psra");
+    let x86_psrl = x86.by_name("x86_psrl");
+    let x86_ptest = x86.by_name("x86_ptest");
+    let x86_punpckh = x86.by_name("x86_punpckh");
+    let x86_punpckl = x86.by_name("x86_punpckl");
+    let x86_vcvtudq2ps = x86.by_name("x86_vcvtudq2ps");
+
+    // Shorthands for recipes.
+    let rec_blend = r.template("blend");
+    let rec_evex_reg_vvvv_rm_128 = r.template("evex_reg_vvvv_rm_128");
+    let rec_evex_reg_rm_128 = r.template("evex_reg_rm_128");
+    let rec_f_ib = r.template("f_ib");
+    let rec_fa = r.template("fa");
+    let rec_fa_ib = r.template("fa_ib");
+    let rec_fax = r.template("fax");
+    let rec_fcmp = r.template("fcmp");
+    let rec_ffillSib32 = r.template("ffillSib32");
+    let rec_ffillnull = r.recipe("ffillnull");
+    let rec_fld = r.template("fld");
+    let rec_fldDisp32 = r.template("fldDisp32");
+    let rec_fldDisp8 = r.template("fldDisp8");
+    let rec_fldWithIndex = r.template("fldWithIndex");
+    let rec_fldWithIndexDisp32 = r.template("fldWithIndexDisp32");
+    let rec_fldWithIndexDisp8 = r.template("fldWithIndexDisp8");
+    let rec_fregfill32 = r.template("fregfill32");
+    let rec_fregspill32 = r.template("fregspill32");
+    let rec_frmov = r.template("frmov");
+    let rec_frurm = r.template("frurm");
+    let rec_fspillSib32 = r.template("fspillSib32");
+    let rec_fst = r.template("fst");
+    let rec_fstDisp32 = r.template("fstDisp32");
+    let rec_fstDisp8 = r.template("fstDisp8");
+    let rec_fstWithIndex = r.template("fstWithIndex");
+    let rec_fstWithIndexDisp32 = r.template("fstWithIndexDisp32");
+    let rec_fstWithIndexDisp8 = r.template("fstWithIndexDisp8");
+    let rec_furm = r.template("furm");
+    let rec_furm_reg_to_ssa = r.template("furm_reg_to_ssa");
+    let rec_icscc_fpr = r.template("icscc_fpr");
+    let rec_null_fpr = r.recipe("null_fpr");
+    let rec_pfcmp = r.template("pfcmp");
+    let rec_r_ib_unsigned_fpr = r.template("r_ib_unsigned_fpr");
+    let rec_r_ib_unsigned_gpr = r.template("r_ib_unsigned_gpr");
+    let rec_r_ib_unsigned_r = r.template("r_ib_unsigned_r");
+    let rec_stacknull = r.recipe("stacknull");
+    let rec_vconst = r.template("vconst");
+    let rec_vconst_optimized = r.template("vconst_optimized");
+
+    // Predicates shorthands.
+    settings.predicate_by_name("all_ones_funcaddrs_and_not_is_pic");
+    settings.predicate_by_name("not_all_ones_funcaddrs_and_not_is_pic");
+    let use_ssse3_simd = settings.predicate_by_name("use_ssse3_simd");
+    let use_sse41_simd = settings.predicate_by_name("use_sse41_simd");
+    let use_sse42_simd = settings.predicate_by_name("use_sse42_simd");
+    let use_avx512dq_simd = settings.predicate_by_name("use_avx512dq_simd");
+    let use_avx512vl_simd = settings.predicate_by_name("use_avx512vl_simd");
+
+    // SIMD vector size: eventually multiple vector sizes may be supported but for now only
+    // SSE-sized vectors are available.
+    let sse_vector_size: u64 = 128;
+
+    // SIMD splat: before x86 can use vector data, it must be moved to XMM registers; see
+    // legalize.rs for how this is done; once there, x86_pshuf* (below) is used for broadcasting the
+    // value across the register.
+
+    let allowed_simd_type = |t: &LaneType| t.lane_bits() >= 8 && t.lane_bits() < 128;
+
+    // PSHUFB, 8-bit shuffle using two XMM registers.
+    for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
+        let instruction = x86_pshufb.bind(vector(ty, sse_vector_size));
+        let template = rec_fa.opcodes(&PSHUFB);
+        e.enc_both_inferred_maybe_isap(instruction.clone(), template.clone(), Some(use_ssse3_simd));
+    }
+
+    // PSHUFD, 32-bit shuffle using one XMM register and a u8 immediate.
+    for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 32) {
+        let instruction = x86_pshufd.bind(vector(ty, sse_vector_size));
+        let template = rec_r_ib_unsigned_fpr.opcodes(&PSHUFD);
+        e.enc_both_inferred(instruction, template);
+    }
+
+    // SIMD vselect; controlling value of vselect is a boolean vector, so each lane should be
+    // either all ones or all zeroes - it makes it possible to always use 8-bit PBLENDVB;
+    // for 32/64-bit lanes we can also use BLENDVPS and BLENDVPD
+    for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
+        let opcode = match ty.lane_bits() {
+            32 => &BLENDVPS,
+            64 => &BLENDVPD,
+            _ => &PBLENDVB,
+        };
+        let instruction = vselect.bind(vector(ty, sse_vector_size));
+        let template = rec_blend.opcodes(opcode);
+        e.enc_both_inferred_maybe_isap(instruction, template, Some(use_sse41_simd));
+    }
+
+    // PBLENDW, select lanes using a u8 immediate.
+    for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 16) {
+        let instruction = x86_pblendw.bind(vector(ty, sse_vector_size));
+        let template = rec_fa_ib.opcodes(&PBLENDW);
+        e.enc_both_inferred_maybe_isap(instruction, template, Some(use_sse41_simd));
+    }
+
+    // SIMD scalar_to_vector; this uses MOV to copy the scalar value to an XMM register; according
+    // to the Intel manual: "When the destination operand is an XMM register, the source operand is
+    // written to the low doubleword of the register and the register is zero-extended to 128 bits."
+    for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
+        let instruction = scalar_to_vector.bind(vector(ty, sse_vector_size));
+        if ty.is_float() {
+            // No need to move floats--they already live in XMM registers.
+            e.enc_32_64_rec(instruction, rec_null_fpr, 0);
+        } else {
+            let template = rec_frurm.opcodes(&MOVD_LOAD_XMM);
+            if ty.lane_bits() < 64 {
+                e.enc_both_inferred(instruction, template);
+            } else {
+                // No 32-bit encodings for 64-bit widths.
+                assert_eq!(ty.lane_bits(), 64);
+                e.enc64(instruction, template.rex().w());
+            }
+        }
+    }
+
+    // SIMD insertlane
+    for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
+        let (opcode, isap): (&[_], _) = match ty.lane_bits() {
+            8 => (&PINSRB, Some(use_sse41_simd)),
+            16 => (&PINSRW, None),
+            32 | 64 => (&PINSR, Some(use_sse41_simd)),
+            _ => panic!("invalid size for SIMD insertlane"),
+        };
+
+        let instruction = x86_pinsr.bind(vector(ty, sse_vector_size));
+        let template = rec_r_ib_unsigned_r.opcodes(opcode);
+        if ty.lane_bits() < 64 {
+            e.enc_both_inferred_maybe_isap(instruction, template, isap);
+        } else {
+            // It turns out the 64-bit widths have REX/W encodings and only are available on
+            // x86_64.
+            e.enc64_maybe_isap(instruction, template.rex().w(), isap);
+        }
+    }
+
+    // For legalizing insertlane with floats, INSERTPS from SSE4.1.
+    {
+        let instruction = x86_insertps.bind(vector(F32, sse_vector_size));
+        let template = rec_fa_ib.opcodes(&INSERTPS);
+        e.enc_both_inferred_maybe_isap(instruction, template, Some(use_sse41_simd));
+    }
+
+    // For legalizing insertlane with floats,  MOVSD from SSE2.
+    {
+        let instruction = x86_movsd.bind(vector(F64, sse_vector_size));
+        let template = rec_fa.opcodes(&MOVSD_LOAD);
+        e.enc_both_inferred(instruction, template); // from SSE2
+    }
+
+    // For legalizing insertlane with floats, MOVLHPS from SSE.
+    {
+        let instruction = x86_movlhps.bind(vector(F64, sse_vector_size));
+        let template = rec_fa.opcodes(&MOVLHPS);
+        e.enc_both_inferred(instruction, template); // from SSE
+    }
+
+    // SIMD extractlane
+    for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
+        let opcode = match ty.lane_bits() {
+            8 => &PEXTRB,
+            16 => &PEXTRW,
+            32 | 64 => &PEXTR,
+            _ => panic!("invalid size for SIMD extractlane"),
+        };
+
+        let instruction = x86_pextr.bind(vector(ty, sse_vector_size));
+        let template = rec_r_ib_unsigned_gpr.opcodes(opcode);
+        if ty.lane_bits() < 64 {
+            e.enc_both_inferred_maybe_isap(instruction, template, Some(use_sse41_simd));
+        } else {
+            // It turns out the 64-bit widths have REX/W encodings and only are available on
+            // x86_64.
+            e.enc64_maybe_isap(instruction, template.rex().w(), Some(use_sse41_simd));
+        }
+    }
+
+    // SIMD packing/unpacking
+    for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
+        let (high, low) = match ty.lane_bits() {
+            8 => (&PUNPCKHBW, &PUNPCKLBW),
+            16 => (&PUNPCKHWD, &PUNPCKLWD),
+            32 => (&PUNPCKHDQ, &PUNPCKLDQ),
+            64 => (&PUNPCKHQDQ, &PUNPCKLQDQ),
+            _ => panic!("invalid size for SIMD packing/unpacking"),
+        };
+
+        e.enc_both_inferred(
+            x86_punpckh.bind(vector(ty, sse_vector_size)),
+            rec_fa.opcodes(high),
+        );
+        e.enc_both_inferred(
+            x86_punpckl.bind(vector(ty, sse_vector_size)),
+            rec_fa.opcodes(low),
+        );
+    }
+
+    // SIMD narrow/widen
+    for (ty, opcodes) in &[(I16, &PACKSSWB), (I32, &PACKSSDW)] {
+        let snarrow = snarrow.bind(vector(*ty, sse_vector_size));
+        e.enc_both_inferred(snarrow, rec_fa.opcodes(*opcodes));
+    }
+    for (ty, opcodes, isap) in &[
+        (I16, &PACKUSWB[..], None),
+        (I32, &PACKUSDW[..], Some(use_sse41_simd)),
+    ] {
+        let unarrow = unarrow.bind(vector(*ty, sse_vector_size));
+        e.enc_both_inferred_maybe_isap(unarrow, rec_fa.opcodes(*opcodes), *isap);
+    }
+    for (ty, swiden_opcode, uwiden_opcode) in &[
+        (I8, &PMOVSXBW[..], &PMOVZXBW[..]),
+        (I16, &PMOVSXWD[..], &PMOVZXWD[..]),
+    ] {
+        let isap = Some(use_sse41_simd);
+        let swiden_low = swiden_low.bind(vector(*ty, sse_vector_size));
+        e.enc_both_inferred_maybe_isap(swiden_low, rec_furm.opcodes(*swiden_opcode), isap);
+        let uwiden_low = uwiden_low.bind(vector(*ty, sse_vector_size));
+        e.enc_both_inferred_maybe_isap(uwiden_low, rec_furm.opcodes(*uwiden_opcode), isap);
+    }
+    for ty in &[I8, I16, I32, I64] {
+        e.enc_both_inferred_maybe_isap(
+            x86_palignr.bind(vector(*ty, sse_vector_size)),
+            rec_fa_ib.opcodes(&PALIGNR[..]),
+            Some(use_ssse3_simd),
+        );
+    }
+
+    // SIMD bitcast all 128-bit vectors to each other (for legalizing splat.x16x8).
+    for from_type in ValueType::all_lane_types().filter(allowed_simd_type) {
+        for to_type in
+            ValueType::all_lane_types().filter(|t| allowed_simd_type(t) && *t != from_type)
+        {
+            let instruction = raw_bitcast
+                .bind(vector(to_type, sse_vector_size))
+                .bind(vector(from_type, sse_vector_size));
+            e.enc_32_64_rec(instruction, rec_null_fpr, 0);
+        }
+    }
+
+    // SIMD raw bitcast floats to vector (and back); assumes that floats are already stored in an
+    // XMM register.
+    for float_type in &[F32, F64] {
+        for lane_type in ValueType::all_lane_types().filter(allowed_simd_type) {
+            e.enc_32_64_rec(
+                raw_bitcast
+                    .bind(vector(lane_type, sse_vector_size))
+                    .bind(*float_type),
+                rec_null_fpr,
+                0,
+            );
+            e.enc_32_64_rec(
+                raw_bitcast
+                    .bind(*float_type)
+                    .bind(vector(lane_type, sse_vector_size)),
+                rec_null_fpr,
+                0,
+            );
+        }
+    }
+
+    // SIMD conversions
+    {
+        let fcvt_from_sint_32 = fcvt_from_sint
+            .bind(vector(F32, sse_vector_size))
+            .bind(vector(I32, sse_vector_size));
+        e.enc_both(fcvt_from_sint_32, rec_furm.opcodes(&CVTDQ2PS));
+
+        e.enc_32_64_maybe_isap(
+            x86_vcvtudq2ps,
+            rec_evex_reg_rm_128.opcodes(&VCVTUDQ2PS),
+            Some(use_avx512vl_simd), // TODO need an OR predicate to join with AVX512F
+        );
+
+        e.enc_both_inferred(
+            x86_cvtt2si
+                .bind(vector(I32, sse_vector_size))
+                .bind(vector(F32, sse_vector_size)),
+            rec_furm.opcodes(&CVTTPS2DQ),
+        );
+    }
+
+    // SIMD vconst for special cases (all zeroes, all ones)
+    // this must be encoded prior to the MOVUPS implementation (below) so the compiler sees this
+    // encoding first
+    for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
+        let instruction = vconst.bind(vector(ty, sse_vector_size));
+
+        let is_zero_128bit =
+            InstructionPredicate::new_is_all_zeroes(&*formats.unary_const, "constant_handle");
+        let template = rec_vconst_optimized.opcodes(&PXOR).infer_rex();
+        e.enc_32_64_func(instruction.clone(), template, |builder| {
+            builder.inst_predicate(is_zero_128bit)
+        });
+
+        let is_ones_128bit =
+            InstructionPredicate::new_is_all_ones(&*formats.unary_const, "constant_handle");
+        let template = rec_vconst_optimized.opcodes(&PCMPEQB).infer_rex();
+        e.enc_32_64_func(instruction, template, |builder| {
+            builder.inst_predicate(is_ones_128bit)
+        });
+    }
+
+    // SIMD vconst using MOVUPS
+    // TODO it would be ideal if eventually this became the more efficient MOVAPS but we would have
+    // to guarantee that the constants are aligned when emitted and there is currently no mechanism
+    // for that; alternately, constants could be loaded into XMM registers using a sequence like:
+    // MOVQ + MOVHPD + MOVQ + MOVLPD (this allows the constants to be immediates instead of stored
+    // in memory) but some performance measurements are needed.
+    for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
+        let instruction = vconst.bind(vector(ty, sse_vector_size));
+        let template = rec_vconst.opcodes(&MOVUPS_LOAD);
+        e.enc_both_inferred(instruction, template); // from SSE
+    }
+
+    // SIMD register movement: store, load, spill, fill, regmove, etc. All of these use encodings of
+    // MOVUPS and MOVAPS from SSE (TODO ideally all of these would either use MOVAPS when we have
+    // alignment or type-specific encodings, see https://github.com/bytecodealliance/wasmtime/issues/1124).
+    // Also, it would be ideal to infer REX prefixes for all of these instructions but for the
+    // time being only instructions with common recipes have `infer_rex()` support.
+    for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
+        // Store
+        let bound_store = store.bind(vector(ty, sse_vector_size)).bind(Any);
+        e.enc_both_inferred(bound_store.clone(), rec_fst.opcodes(&MOVUPS_STORE));
+        e.enc_both_inferred(bound_store.clone(), rec_fstDisp8.opcodes(&MOVUPS_STORE));
+        e.enc_both_inferred(bound_store, rec_fstDisp32.opcodes(&MOVUPS_STORE));
+
+        // Store complex
+        let bound_store_complex = store_complex.bind(vector(ty, sse_vector_size));
+        e.enc_both(
+            bound_store_complex.clone(),
+            rec_fstWithIndex.opcodes(&MOVUPS_STORE),
+        );
+        e.enc_both(
+            bound_store_complex.clone(),
+            rec_fstWithIndexDisp8.opcodes(&MOVUPS_STORE),
+        );
+        e.enc_both(
+            bound_store_complex,
+            rec_fstWithIndexDisp32.opcodes(&MOVUPS_STORE),
+        );
+
+        // Load
+        let bound_load = load.bind(vector(ty, sse_vector_size)).bind(Any);
+        e.enc_both_inferred(bound_load.clone(), rec_fld.opcodes(&MOVUPS_LOAD));
+        e.enc_both_inferred(bound_load.clone(), rec_fldDisp8.opcodes(&MOVUPS_LOAD));
+        e.enc_both_inferred(bound_load, rec_fldDisp32.opcodes(&MOVUPS_LOAD));
+
+        // Load complex
+        let bound_load_complex = load_complex.bind(vector(ty, sse_vector_size));
+        e.enc_both(
+            bound_load_complex.clone(),
+            rec_fldWithIndex.opcodes(&MOVUPS_LOAD),
+        );
+        e.enc_both(
+            bound_load_complex.clone(),
+            rec_fldWithIndexDisp8.opcodes(&MOVUPS_LOAD),
+        );
+        e.enc_both(
+            bound_load_complex,
+            rec_fldWithIndexDisp32.opcodes(&MOVUPS_LOAD),
+        );
+
+        // Spill
+        let bound_spill = spill.bind(vector(ty, sse_vector_size));
+        e.enc_both(bound_spill, rec_fspillSib32.opcodes(&MOVUPS_STORE));
+        let bound_regspill = regspill.bind(vector(ty, sse_vector_size));
+        e.enc_both(bound_regspill, rec_fregspill32.opcodes(&MOVUPS_STORE));
+
+        // Fill
+        let bound_fill = fill.bind(vector(ty, sse_vector_size));
+        e.enc_both(bound_fill, rec_ffillSib32.opcodes(&MOVUPS_LOAD));
+        let bound_regfill = regfill.bind(vector(ty, sse_vector_size));
+        e.enc_both(bound_regfill, rec_fregfill32.opcodes(&MOVUPS_LOAD));
+        let bound_fill_nop = fill_nop.bind(vector(ty, sse_vector_size));
+        e.enc_32_64_rec(bound_fill_nop, rec_ffillnull, 0);
+
+        // Regmove
+        let bound_regmove = regmove.bind(vector(ty, sse_vector_size));
+        e.enc_both(bound_regmove, rec_frmov.opcodes(&MOVAPS_LOAD));
+
+        // Copy
+        let bound_copy = copy.bind(vector(ty, sse_vector_size));
+        e.enc_both(bound_copy, rec_furm.opcodes(&MOVAPS_LOAD));
+        let bound_copy_to_ssa = copy_to_ssa.bind(vector(ty, sse_vector_size));
+        e.enc_both(bound_copy_to_ssa, rec_furm_reg_to_ssa.opcodes(&MOVAPS_LOAD));
+        let bound_copy_nop = copy_nop.bind(vector(ty, sse_vector_size));
+        e.enc_32_64_rec(bound_copy_nop, rec_stacknull, 0);
+    }
+
+    // SIMD load extend
+    for (inst, opcodes) in &[
+        (uload8x8, &PMOVZXBW),
+        (uload16x4, &PMOVZXWD),
+        (uload32x2, &PMOVZXDQ),
+        (sload8x8, &PMOVSXBW),
+        (sload16x4, &PMOVSXWD),
+        (sload32x2, &PMOVSXDQ),
+    ] {
+        let isap = Some(use_sse41_simd);
+        for recipe in &[rec_fld, rec_fldDisp8, rec_fldDisp32] {
+            let inst = *inst;
+            let template = recipe.opcodes(*opcodes);
+            e.enc_both_inferred_maybe_isap(inst.clone().bind(I32), template.clone(), isap);
+            e.enc64_maybe_isap(inst.bind(I64), template.infer_rex(), isap);
+        }
+    }
+
+    // SIMD load extend (complex addressing)
+    let is_load_complex_length_two =
+        InstructionPredicate::new_length_equals(&*formats.load_complex, 2);
+    for (inst, opcodes) in &[
+        (uload8x8_complex, &PMOVZXBW),
+        (uload16x4_complex, &PMOVZXWD),
+        (uload32x2_complex, &PMOVZXDQ),
+        (sload8x8_complex, &PMOVSXBW),
+        (sload16x4_complex, &PMOVSXWD),
+        (sload32x2_complex, &PMOVSXDQ),
+    ] {
+        for recipe in &[
+            rec_fldWithIndex,
+            rec_fldWithIndexDisp8,
+            rec_fldWithIndexDisp32,
+        ] {
+            let template = recipe.opcodes(*opcodes);
+            let predicate = |encoding: EncodingBuilder| {
+                encoding
+                    .isa_predicate(use_sse41_simd)
+                    .inst_predicate(is_load_complex_length_two.clone())
+            };
+            e.enc32_func(inst.clone(), template.clone(), predicate);
+            // No infer_rex calculator for these recipes; place REX version first as in enc_x86_64.
+            e.enc64_func(inst.clone(), template.rex(), predicate);
+            e.enc64_func(inst.clone(), template, predicate);
+        }
+    }
+
+    // SIMD integer addition
+    for (ty, opcodes) in &[(I8, &PADDB), (I16, &PADDW), (I32, &PADDD), (I64, &PADDQ)] {
+        let iadd = iadd.bind(vector(*ty, sse_vector_size));
+        e.enc_both_inferred(iadd, rec_fa.opcodes(*opcodes));
+    }
+
+    // SIMD integer saturating addition
+    e.enc_both_inferred(
+        sadd_sat.bind(vector(I8, sse_vector_size)),
+        rec_fa.opcodes(&PADDSB),
+    );
+    e.enc_both_inferred(
+        sadd_sat.bind(vector(I16, sse_vector_size)),
+        rec_fa.opcodes(&PADDSW),
+    );
+    e.enc_both_inferred(
+        uadd_sat.bind(vector(I8, sse_vector_size)),
+        rec_fa.opcodes(&PADDUSB),
+    );
+    e.enc_both_inferred(
+        uadd_sat.bind(vector(I16, sse_vector_size)),
+        rec_fa.opcodes(&PADDUSW),
+    );
+
+    // SIMD integer subtraction
+    let isub = shared.by_name("isub");
+    for (ty, opcodes) in &[(I8, &PSUBB), (I16, &PSUBW), (I32, &PSUBD), (I64, &PSUBQ)] {
+        let isub = isub.bind(vector(*ty, sse_vector_size));
+        e.enc_both_inferred(isub, rec_fa.opcodes(*opcodes));
+    }
+
+    // SIMD integer saturating subtraction
+    e.enc_both_inferred(
+        ssub_sat.bind(vector(I8, sse_vector_size)),
+        rec_fa.opcodes(&PSUBSB),
+    );
+    e.enc_both_inferred(
+        ssub_sat.bind(vector(I16, sse_vector_size)),
+        rec_fa.opcodes(&PSUBSW),
+    );
+    e.enc_both_inferred(
+        usub_sat.bind(vector(I8, sse_vector_size)),
+        rec_fa.opcodes(&PSUBUSB),
+    );
+    e.enc_both_inferred(
+        usub_sat.bind(vector(I16, sse_vector_size)),
+        rec_fa.opcodes(&PSUBUSW),
+    );
+
+    // SIMD integer multiplication: the x86 ISA does not have instructions for multiplying I8x16
+    // and I64x2 and these are (at the time of writing) not necessary for WASM SIMD.
+    for (ty, opcodes, isap) in &[
+        (I16, &PMULLW[..], None),
+        (I32, &PMULLD[..], Some(use_sse41_simd)),
+    ] {
+        let imul = imul.bind(vector(*ty, sse_vector_size));
+        e.enc_both_inferred_maybe_isap(imul, rec_fa.opcodes(opcodes), *isap);
+    }
+
+    // SIMD multiplication with lane expansion.
+    e.enc_both_inferred(x86_pmuludq, rec_fa.opcodes(&PMULUDQ));
+
+    // SIMD integer multiplication for I64x2 using a AVX512.
+    {
+        e.enc_32_64_maybe_isap(
+            x86_pmullq,
+            rec_evex_reg_vvvv_rm_128.opcodes(&VPMULLQ).w(),
+            Some(use_avx512dq_simd), // TODO need an OR predicate to join with AVX512VL
+        );
+    }
+
+    // SIMD integer average with rounding.
+    for (ty, opcodes) in &[(I8, &PAVGB[..]), (I16, &PAVGW[..])] {
+        let avgr = avg_round.bind(vector(*ty, sse_vector_size));
+        e.enc_both_inferred(avgr, rec_fa.opcodes(opcodes));
+    }
+
+    // SIMD integer absolute value.
+    for (ty, opcodes) in &[(I8, &PABSB[..]), (I16, &PABSW[..]), (I32, &PABSD)] {
+        let iabs = iabs.bind(vector(*ty, sse_vector_size));
+        e.enc_both_inferred_maybe_isap(iabs, rec_furm.opcodes(opcodes), Some(use_ssse3_simd));
+    }
+
+    // SIMD logical operations
+    let band = shared.by_name("band");
+    let band_not = shared.by_name("band_not");
+    for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
+        // and
+        let band = band.bind(vector(ty, sse_vector_size));
+        e.enc_both_inferred(band, rec_fa.opcodes(&PAND));
+
+        // and not (note flipped recipe operands to match band_not order)
+        let band_not = band_not.bind(vector(ty, sse_vector_size));
+        e.enc_both_inferred(band_not, rec_fax.opcodes(&PANDN));
+
+        // or
+        let bor = bor.bind(vector(ty, sse_vector_size));
+        e.enc_both_inferred(bor, rec_fa.opcodes(&POR));
+
+        // xor
+        let bxor = bxor.bind(vector(ty, sse_vector_size));
+        e.enc_both_inferred(bxor, rec_fa.opcodes(&PXOR));
+
+        // ptest
+        let x86_ptest = x86_ptest.bind(vector(ty, sse_vector_size));
+        e.enc_both_inferred_maybe_isap(x86_ptest, rec_fcmp.opcodes(&PTEST), Some(use_sse41_simd));
+    }
+
+    // SIMD bitcast from I32/I64 to the low bits of a vector (e.g. I64x2); this register movement
+    // allows SIMD shifts to be legalized more easily. TODO ideally this would be typed as an
+    // I128x1 but restrictions on the type builder prevent this; the general idea here is that
+    // the upper bits are all zeroed and do not form parts of any separate lane. See
+    // https://github.com/bytecodealliance/wasmtime/issues/1140.
+    e.enc_both_inferred(
+        bitcast.bind(vector(I64, sse_vector_size)).bind(I32),
+        rec_frurm.opcodes(&MOVD_LOAD_XMM),
+    );
+    e.enc64(
+        bitcast.bind(vector(I64, sse_vector_size)).bind(I64),
+        rec_frurm.opcodes(&MOVD_LOAD_XMM).rex().w(),
+    );
+
+    // SIMD shift left
+    for (ty, opcodes) in &[(I16, &PSLLW), (I32, &PSLLD), (I64, &PSLLQ)] {
+        let x86_psll = x86_psll.bind(vector(*ty, sse_vector_size));
+        e.enc_both_inferred(x86_psll, rec_fa.opcodes(*opcodes));
+    }
+
+    // SIMD shift right (logical)
+    for (ty, opcodes) in &[(I16, &PSRLW), (I32, &PSRLD), (I64, &PSRLQ)] {
+        let x86_psrl = x86_psrl.bind(vector(*ty, sse_vector_size));
+        e.enc_both_inferred(x86_psrl, rec_fa.opcodes(*opcodes));
+    }
+
+    // SIMD shift right (arithmetic)
+    for (ty, opcodes) in &[(I16, &PSRAW), (I32, &PSRAD)] {
+        let x86_psra = x86_psra.bind(vector(*ty, sse_vector_size));
+        e.enc_both_inferred(x86_psra, rec_fa.opcodes(*opcodes));
+    }
+
+    // SIMD immediate shift
+    for (ty, opcodes) in &[(I16, &PS_W_IMM), (I32, &PS_D_IMM), (I64, &PS_Q_IMM)] {
+        let ishl_imm = ishl_imm.bind(vector(*ty, sse_vector_size));
+        e.enc_both_inferred(ishl_imm, rec_f_ib.opcodes(*opcodes).rrr(6));
+
+        let ushr_imm = ushr_imm.bind(vector(*ty, sse_vector_size));
+        e.enc_both_inferred(ushr_imm, rec_f_ib.opcodes(*opcodes).rrr(2));
+
+        // One exception: PSRAQ does not exist in for 64x2 in SSE2, it requires a higher CPU feature set.
+        if *ty != I64 {
+            let sshr_imm = sshr_imm.bind(vector(*ty, sse_vector_size));
+            e.enc_both_inferred(sshr_imm, rec_f_ib.opcodes(*opcodes).rrr(4));
+        }
+    }
+
+    // SIMD integer comparisons
+    {
+        use IntCC::*;
+        for (ty, cc, opcodes, isa_predicate) in &[
+            (I8, Equal, &PCMPEQB[..], None),
+            (I16, Equal, &PCMPEQW[..], None),
+            (I32, Equal, &PCMPEQD[..], None),
+            (I64, Equal, &PCMPEQQ[..], Some(use_sse41_simd)),
+            (I8, SignedGreaterThan, &PCMPGTB[..], None),
+            (I16, SignedGreaterThan, &PCMPGTW[..], None),
+            (I32, SignedGreaterThan, &PCMPGTD[..], None),
+            (I64, SignedGreaterThan, &PCMPGTQ, Some(use_sse42_simd)),
+        ] {
+            let instruction = icmp
+                .bind(Immediate::IntCC(*cc))
+                .bind(vector(*ty, sse_vector_size));
+            let template = rec_icscc_fpr.opcodes(opcodes);
+            e.enc_both_inferred_maybe_isap(instruction, template, *isa_predicate);
+        }
+    }
+
+    // SIMD min/max
+    for (ty, inst, opcodes, isa_predicate) in &[
+        (I8, x86_pmaxs, &PMAXSB[..], Some(use_sse41_simd)),
+        (I16, x86_pmaxs, &PMAXSW[..], None),
+        (I32, x86_pmaxs, &PMAXSD[..], Some(use_sse41_simd)),
+        (I8, x86_pmaxu, &PMAXUB[..], None),
+        (I16, x86_pmaxu, &PMAXUW[..], Some(use_sse41_simd)),
+        (I32, x86_pmaxu, &PMAXUD[..], Some(use_sse41_simd)),
+        (I8, x86_pmins, &PMINSB[..], Some(use_sse41_simd)),
+        (I16, x86_pmins, &PMINSW[..], None),
+        (I32, x86_pmins, &PMINSD[..], Some(use_sse41_simd)),
+        (I8, x86_pminu, &PMINUB[..], None),
+        (I16, x86_pminu, &PMINUW[..], Some(use_sse41_simd)),
+        (I32, x86_pminu, &PMINUD[..], Some(use_sse41_simd)),
+    ] {
+        let inst = inst.bind(vector(*ty, sse_vector_size));
+        e.enc_both_inferred_maybe_isap(inst, rec_fa.opcodes(opcodes), *isa_predicate);
+    }
+
+    // SIMD float comparisons
+    e.enc_both_inferred(
+        fcmp.bind(vector(F32, sse_vector_size)),
+        rec_pfcmp.opcodes(&CMPPS),
+    );
+    e.enc_both_inferred(
+        fcmp.bind(vector(F64, sse_vector_size)),
+        rec_pfcmp.opcodes(&CMPPD),
+    );
+
+    // SIMD float arithmetic
+    for (ty, inst, opcodes) in &[
+        (F32, fadd, &ADDPS[..]),
+        (F64, fadd, &ADDPD[..]),
+        (F32, fsub, &SUBPS[..]),
+        (F64, fsub, &SUBPD[..]),
+        (F32, fmul, &MULPS[..]),
+        (F64, fmul, &MULPD[..]),
+        (F32, fdiv, &DIVPS[..]),
+        (F64, fdiv, &DIVPD[..]),
+        (F32, x86_fmin, &MINPS[..]),
+        (F64, x86_fmin, &MINPD[..]),
+        (F32, x86_fmax, &MAXPS[..]),
+        (F64, x86_fmax, &MAXPD[..]),
+    ] {
+        let inst = inst.bind(vector(*ty, sse_vector_size));
+        e.enc_both_inferred(inst, rec_fa.opcodes(opcodes));
+    }
+    for (ty, inst, opcodes) in &[(F32, sqrt, &SQRTPS[..]), (F64, sqrt, &SQRTPD[..])] {
+        let inst = inst.bind(vector(*ty, sse_vector_size));
+        e.enc_both_inferred(inst, rec_furm.opcodes(opcodes));
+    }
+}
+
+#[inline(never)]
+fn define_entity_ref(
+    e: &mut PerCpuModeEncodings,
+    shared_defs: &SharedDefinitions,
+    settings: &SettingGroup,
+    r: &RecipeGroup,
+) {
+    let shared = &shared_defs.instructions;
+    let formats = &shared_defs.formats;
+
+    // Shorthands for instructions.
+    let const_addr = shared.by_name("const_addr");
+    let func_addr = shared.by_name("func_addr");
+    let stack_addr = shared.by_name("stack_addr");
+    let symbol_value = shared.by_name("symbol_value");
+
+    // Shorthands for recipes.
+    let rec_allones_fnaddr4 = r.template("allones_fnaddr4");
+    let rec_allones_fnaddr8 = r.template("allones_fnaddr8");
+    let rec_fnaddr4 = r.template("fnaddr4");
+    let rec_fnaddr8 = r.template("fnaddr8");
+    let rec_const_addr = r.template("const_addr");
+    let rec_got_fnaddr8 = r.template("got_fnaddr8");
+    let rec_got_gvaddr8 = r.template("got_gvaddr8");
+    let rec_gvaddr4 = r.template("gvaddr4");
+    let rec_gvaddr8 = r.template("gvaddr8");
+    let rec_pcrel_fnaddr8 = r.template("pcrel_fnaddr8");
+    let rec_pcrel_gvaddr8 = r.template("pcrel_gvaddr8");
+    let rec_spaddr_id = r.template("spaddr_id");
+
+    // Predicates shorthands.
+    let all_ones_funcaddrs_and_not_is_pic =
+        settings.predicate_by_name("all_ones_funcaddrs_and_not_is_pic");
+    let is_pic = settings.predicate_by_name("is_pic");
+    let not_all_ones_funcaddrs_and_not_is_pic =
+        settings.predicate_by_name("not_all_ones_funcaddrs_and_not_is_pic");
+    let not_is_pic = settings.predicate_by_name("not_is_pic");
+
+    // Function addresses.
+
+    // Non-PIC, all-ones funcaddresses.
+    e.enc32_isap(
+        func_addr.bind(I32),
+        rec_fnaddr4.opcodes(&MOV_IMM),
+        not_all_ones_funcaddrs_and_not_is_pic,
+    );
+    e.enc64_isap(
+        func_addr.bind(I64),
+        rec_fnaddr8.opcodes(&MOV_IMM).rex().w(),
+        not_all_ones_funcaddrs_and_not_is_pic,
+    );
+
+    // Non-PIC, all-zeros funcaddresses.
+    e.enc32_isap(
+        func_addr.bind(I32),
+        rec_allones_fnaddr4.opcodes(&MOV_IMM),
+        all_ones_funcaddrs_and_not_is_pic,
+    );
+    e.enc64_isap(
+        func_addr.bind(I64),
+        rec_allones_fnaddr8.opcodes(&MOV_IMM).rex().w(),
+        all_ones_funcaddrs_and_not_is_pic,
+    );
+
+    // 64-bit, colocated, both PIC and non-PIC. Use the lea instruction's pc-relative field.
+    let is_colocated_func =
+        InstructionPredicate::new_is_colocated_func(&*formats.func_addr, "func_ref");
+    e.enc64_instp(
+        func_addr.bind(I64),
+        rec_pcrel_fnaddr8.opcodes(&LEA).rex().w(),
+        is_colocated_func,
+    );
+
+    // 64-bit, non-colocated, PIC.
+    e.enc64_isap(
+        func_addr.bind(I64),
+        rec_got_fnaddr8.opcodes(&MOV_LOAD).rex().w(),
+        is_pic,
+    );
+
+    // Global addresses.
+
+    // Non-PIC.
+    e.enc32_isap(
+        symbol_value.bind(I32),
+        rec_gvaddr4.opcodes(&MOV_IMM),
+        not_is_pic,
+    );
+    e.enc64_isap(
+        symbol_value.bind(I64),
+        rec_gvaddr8.opcodes(&MOV_IMM).rex().w(),
+        not_is_pic,
+    );
+
+    // PIC, colocated.
+    e.enc64_func(
+        symbol_value.bind(I64),
+        rec_pcrel_gvaddr8.opcodes(&LEA).rex().w(),
+        |encoding| {
+            encoding
+                .isa_predicate(is_pic)
+                .inst_predicate(InstructionPredicate::new_is_colocated_data(formats))
+        },
+    );
+
+    // PIC, non-colocated.
+    e.enc64_isap(
+        symbol_value.bind(I64),
+        rec_got_gvaddr8.opcodes(&MOV_LOAD).rex().w(),
+        is_pic,
+    );
+
+    // Stack addresses.
+    //
+    // TODO: Add encoding rules for stack_load and stack_store, so that they
+    // don't get legalized to stack_addr + load/store.
+    e.enc64(stack_addr.bind(I64), rec_spaddr_id.opcodes(&LEA).rex().w());
+    e.enc32(stack_addr.bind(I32), rec_spaddr_id.opcodes(&LEA));
+
+    // Constant addresses (PIC).
+    e.enc64(const_addr.bind(I64), rec_const_addr.opcodes(&LEA).rex().w());
+    e.enc32(const_addr.bind(I32), rec_const_addr.opcodes(&LEA));
+}
+
+/// Control flow opcodes.
+#[inline(never)]
+fn define_control_flow(
+    e: &mut PerCpuModeEncodings,
+    shared_defs: &SharedDefinitions,
+    settings: &SettingGroup,
+    r: &RecipeGroup,
+) {
+    let shared = &shared_defs.instructions;
+    let formats = &shared_defs.formats;
+
+    // Shorthands for instructions.
+    let brff = shared.by_name("brff");
+    let brif = shared.by_name("brif");
+    let brnz = shared.by_name("brnz");
+    let brz = shared.by_name("brz");
+    let call = shared.by_name("call");
+    let call_indirect = shared.by_name("call_indirect");
+    let debugtrap = shared.by_name("debugtrap");
+    let indirect_jump_table_br = shared.by_name("indirect_jump_table_br");
+    let jump = shared.by_name("jump");
+    let jump_table_base = shared.by_name("jump_table_base");
+    let jump_table_entry = shared.by_name("jump_table_entry");
+    let return_ = shared.by_name("return");
+    let trap = shared.by_name("trap");
+    let trapff = shared.by_name("trapff");
+    let trapif = shared.by_name("trapif");
+    let resumable_trap = shared.by_name("resumable_trap");
+
+    // Shorthands for recipes.
+    let rec_brfb = r.template("brfb");
+    let rec_brfd = r.template("brfd");
+    let rec_brib = r.template("brib");
+    let rec_brid = r.template("brid");
+    let rec_call_id = r.template("call_id");
+    let rec_call_plt_id = r.template("call_plt_id");
+    let rec_call_r = r.template("call_r");
+    let rec_debugtrap = r.recipe("debugtrap");
+    let rec_indirect_jmp = r.template("indirect_jmp");
+    let rec_jmpb = r.template("jmpb");
+    let rec_jmpd = r.template("jmpd");
+    let rec_jt_base = r.template("jt_base");
+    let rec_jt_entry = r.template("jt_entry");
+    let rec_ret = r.template("ret");
+    let rec_t8jccb_abcd = r.template("t8jccb_abcd");
+    let rec_t8jccd_abcd = r.template("t8jccd_abcd");
+    let rec_t8jccd_long = r.template("t8jccd_long");
+    let rec_tjccb = r.template("tjccb");
+    let rec_tjccd = r.template("tjccd");
+    let rec_trap = r.template("trap");
+    let rec_trapif = r.recipe("trapif");
+    let rec_trapff = r.recipe("trapff");
+
+    // Predicates shorthands.
+    let is_pic = settings.predicate_by_name("is_pic");
+
+    // Call/return
+
+    // 32-bit, both PIC and non-PIC.
+    e.enc32(call, rec_call_id.opcodes(&CALL_RELATIVE));
+
+    // 64-bit, colocated, both PIC and non-PIC. Use the call instruction's pc-relative field.
+    let is_colocated_func = InstructionPredicate::new_is_colocated_func(&*formats.call, "func_ref");
+    e.enc64_instp(call, rec_call_id.opcodes(&CALL_RELATIVE), is_colocated_func);
+
+    // 64-bit, non-colocated, PIC. There is no 64-bit non-colocated non-PIC version, since non-PIC
+    // is currently using the large model, which requires calls be lowered to
+    // func_addr+call_indirect.
+    e.enc64_isap(call, rec_call_plt_id.opcodes(&CALL_RELATIVE), is_pic);
+
+    e.enc32(
+        call_indirect.bind(I32),
+        rec_call_r.opcodes(&JUMP_ABSOLUTE).rrr(2),
+    );
+    e.enc64(
+        call_indirect.bind(I64),
+        rec_call_r.opcodes(&JUMP_ABSOLUTE).rrr(2).rex(),
+    );
+    e.enc64(
+        call_indirect.bind(I64),
+        rec_call_r.opcodes(&JUMP_ABSOLUTE).rrr(2),
+    );
+
+    e.enc32(return_, rec_ret.opcodes(&RET_NEAR));
+    e.enc64(return_, rec_ret.opcodes(&RET_NEAR));
+
+    // Branches.
+    e.enc32(jump, rec_jmpb.opcodes(&JUMP_SHORT));
+    e.enc64(jump, rec_jmpb.opcodes(&JUMP_SHORT));
+    e.enc32(jump, rec_jmpd.opcodes(&JUMP_NEAR_RELATIVE));
+    e.enc64(jump, rec_jmpd.opcodes(&JUMP_NEAR_RELATIVE));
+
+    e.enc_both(brif, rec_brib.opcodes(&JUMP_SHORT_IF_OVERFLOW));
+    e.enc_both(brif, rec_brid.opcodes(&JUMP_NEAR_IF_OVERFLOW));
+
+    // Not all float condition codes are legal, see `supported_floatccs`.
+    e.enc_both(brff, rec_brfb.opcodes(&JUMP_SHORT_IF_OVERFLOW));
+    e.enc_both(brff, rec_brfd.opcodes(&JUMP_NEAR_IF_OVERFLOW));
+
+    // Note that the tjccd opcode will be prefixed with 0x0f.
+    e.enc_i32_i64_explicit_rex(brz, rec_tjccb.opcodes(&JUMP_SHORT_IF_EQUAL));
+    e.enc_i32_i64_explicit_rex(brz, rec_tjccd.opcodes(&TEST_BYTE_REG));
+    e.enc_i32_i64_explicit_rex(brnz, rec_tjccb.opcodes(&JUMP_SHORT_IF_NOT_EQUAL));
+    e.enc_i32_i64_explicit_rex(brnz, rec_tjccd.opcodes(&TEST_REG));
+
+    // Branch on a b1 value in a register only looks at the low 8 bits. See also
+    // bint encodings below.
+    //
+    // Start with the worst-case encoding for X86_32 only. The register allocator
+    // can't handle a branch with an ABCD-constrained operand.
+    e.enc32(brz.bind(B1), rec_t8jccd_long.opcodes(&TEST_BYTE_REG));
+    e.enc32(brnz.bind(B1), rec_t8jccd_long.opcodes(&TEST_REG));
+
+    e.enc_both(brz.bind(B1), rec_t8jccb_abcd.opcodes(&JUMP_SHORT_IF_EQUAL));
+    e.enc_both(brz.bind(B1), rec_t8jccd_abcd.opcodes(&TEST_BYTE_REG));
+    e.enc_both(
+        brnz.bind(B1),
+        rec_t8jccb_abcd.opcodes(&JUMP_SHORT_IF_NOT_EQUAL),
+    );
+    e.enc_both(brnz.bind(B1), rec_t8jccd_abcd.opcodes(&TEST_REG));
+
+    // Jump tables.
+    e.enc64(
+        jump_table_entry.bind(I64),
+        rec_jt_entry.opcodes(&MOVSXD).rex().w(),
+    );
+    e.enc32(jump_table_entry.bind(I32), rec_jt_entry.opcodes(&MOV_LOAD));
+
+    e.enc64(
+        jump_table_base.bind(I64),
+        rec_jt_base.opcodes(&LEA).rex().w(),
+    );
+    e.enc32(jump_table_base.bind(I32), rec_jt_base.opcodes(&LEA));
+
+    e.enc_x86_64(
+        indirect_jump_table_br.bind(I64),
+        rec_indirect_jmp.opcodes(&JUMP_ABSOLUTE).rrr(4),
+    );
+    e.enc32(
+        indirect_jump_table_br.bind(I32),
+        rec_indirect_jmp.opcodes(&JUMP_ABSOLUTE).rrr(4),
+    );
+
+    // Trap as ud2
+    e.enc32(trap, rec_trap.opcodes(&UNDEFINED2));
+    e.enc64(trap, rec_trap.opcodes(&UNDEFINED2));
+    e.enc32(resumable_trap, rec_trap.opcodes(&UNDEFINED2));
+    e.enc64(resumable_trap, rec_trap.opcodes(&UNDEFINED2));
+
+    // Debug trap as int3
+    e.enc32_rec(debugtrap, rec_debugtrap, 0);
+    e.enc64_rec(debugtrap, rec_debugtrap, 0);
+
+    e.enc32_rec(trapif, rec_trapif, 0);
+    e.enc64_rec(trapif, rec_trapif, 0);
+    e.enc32_rec(trapff, rec_trapff, 0);
+    e.enc64_rec(trapff, rec_trapff, 0);
+}
+
+/// Reference type instructions.
+#[inline(never)]
+fn define_reftypes(e: &mut PerCpuModeEncodings, shared_defs: &SharedDefinitions, r: &RecipeGroup) {
+    let shared = &shared_defs.instructions;
+
+    let is_null = shared.by_name("is_null");
+    let is_invalid = shared.by_name("is_invalid");
+    let null = shared.by_name("null");
+    let safepoint = shared.by_name("safepoint");
+
+    let rec_is_zero = r.template("is_zero");
+    let rec_is_invalid = r.template("is_invalid");
+    let rec_pu_id_ref = r.template("pu_id_ref");
+    let rec_safepoint = r.recipe("safepoint");
+
+    // Null references implemented as iconst 0.
+    e.enc32(null.bind(R32), rec_pu_id_ref.opcodes(&MOV_IMM));
+
+    e.enc64(null.bind(R64), rec_pu_id_ref.rex().opcodes(&MOV_IMM));
+    e.enc64(null.bind(R64), rec_pu_id_ref.opcodes(&MOV_IMM));
+
+    // is_null, implemented by testing whether the value is 0.
+    e.enc_r32_r64_rex_only(is_null, rec_is_zero.opcodes(&TEST_REG));
+
+    // is_invalid, implemented by testing whether the value is -1.
+    e.enc_r32_r64_rex_only(is_invalid, rec_is_invalid.opcodes(&CMP_IMM8).rrr(7));
+
+    // safepoint instruction calls sink, no actual encoding.
+    e.enc32_rec(safepoint, rec_safepoint, 0);
+    e.enc64_rec(safepoint, rec_safepoint, 0);
+}
+
+#[allow(clippy::cognitive_complexity)]
+pub(crate) fn define(
+    shared_defs: &SharedDefinitions,
+    settings: &SettingGroup,
+    x86: &InstructionGroup,
+    r: &RecipeGroup,
+) -> PerCpuModeEncodings {
+    // Definitions.
+    let mut e = PerCpuModeEncodings::new();
+
+    define_moves(&mut e, shared_defs, r);
+    define_memory(&mut e, shared_defs, x86, r);
+    define_fpu_moves(&mut e, shared_defs, r);
+    define_fpu_memory(&mut e, shared_defs, r);
+    define_fpu_ops(&mut e, shared_defs, settings, x86, r);
+    define_alu(&mut e, shared_defs, settings, x86, r);
+    define_simd(&mut e, shared_defs, settings, x86, r);
+    define_entity_ref(&mut e, shared_defs, settings, r);
+    define_control_flow(&mut e, shared_defs, settings, r);
+    define_reftypes(&mut e, shared_defs, r);
+
+    let x86_elf_tls_get_addr = x86.by_name("x86_elf_tls_get_addr");
+    let x86_macho_tls_get_addr = x86.by_name("x86_macho_tls_get_addr");
+
+    let rec_elf_tls_get_addr = r.recipe("elf_tls_get_addr");
+    let rec_macho_tls_get_addr = r.recipe("macho_tls_get_addr");
+
+    e.enc64_rec(x86_elf_tls_get_addr, rec_elf_tls_get_addr, 0);
+    e.enc64_rec(x86_macho_tls_get_addr, rec_macho_tls_get_addr, 0);
+
+    e
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/isa/x86/instructions.rs b/third_party/rust/cranelift-codegen-meta/src/isa/x86/instructions.rs
new file mode 100644
index 0000000000..7acd2e2c50
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/isa/x86/instructions.rs
@@ -0,0 +1,723 @@
+#![allow(non_snake_case)]
+
+use crate::cdsl::instructions::{
+    AllInstructions, InstructionBuilder as Inst, InstructionGroup, InstructionGroupBuilder,
+};
+use crate::cdsl::operands::Operand;
+use crate::cdsl::types::ValueType;
+use crate::cdsl::typevar::{Interval, TypeSetBuilder, TypeVar};
+use crate::shared::entities::EntityRefs;
+use crate::shared::formats::Formats;
+use crate::shared::immediates::Immediates;
+use crate::shared::types;
+
+#[allow(clippy::many_single_char_names)]
+pub(crate) fn define(
+    mut all_instructions: &mut AllInstructions,
+    formats: &Formats,
+    immediates: &Immediates,
+    entities: &EntityRefs,
+) -> InstructionGroup {
+    let mut ig = InstructionGroupBuilder::new(&mut all_instructions);
+
+    let iflags: &TypeVar = &ValueType::Special(types::Flag::IFlags.into()).into();
+
+    let iWord = &TypeVar::new(
+        "iWord",
+        "A scalar integer machine word",
+        TypeSetBuilder::new().ints(32..64).build(),
+    );
+    let nlo = &Operand::new("nlo", iWord).with_doc("Low part of numerator");
+    let nhi = &Operand::new("nhi", iWord).with_doc("High part of numerator");
+    let d = &Operand::new("d", iWord).with_doc("Denominator");
+    let q = &Operand::new("q", iWord).with_doc("Quotient");
+    let r = &Operand::new("r", iWord).with_doc("Remainder");
+
+    ig.push(
+        Inst::new(
+            "x86_udivmodx",
+            r#"
+        Extended unsigned division.
+
+        Concatenate the bits in `nhi` and `nlo` to form the numerator.
+        Interpret the bits as an unsigned number and divide by the unsigned
+        denominator `d`. Trap when `d` is zero or if the quotient is larger
+        than the range of the output.
+
+        Return both quotient and remainder.
+        "#,
+            &formats.ternary,
+        )
+        .operands_in(vec![nlo, nhi, d])
+        .operands_out(vec![q, r])
+        .can_trap(true),
+    );
+
+    ig.push(
+        Inst::new(
+            "x86_sdivmodx",
+            r#"
+        Extended signed division.
+
+        Concatenate the bits in `nhi` and `nlo` to form the numerator.
+        Interpret the bits as a signed number and divide by the signed
+        denominator `d`. Trap when `d` is zero or if the quotient is outside
+        the range of the output.
+
+        Return both quotient and remainder.
+        "#,
+            &formats.ternary,
+        )
+        .operands_in(vec![nlo, nhi, d])
+        .operands_out(vec![q, r])
+        .can_trap(true),
+    );
+
+    let argL = &Operand::new("argL", iWord);
+    let argR = &Operand::new("argR", iWord);
+    let resLo = &Operand::new("resLo", iWord);
+    let resHi = &Operand::new("resHi", iWord);
+
+    ig.push(
+        Inst::new(
+            "x86_umulx",
+            r#"
+        Unsigned integer multiplication, producing a double-length result.
+
+        Polymorphic over all scalar integer types, but does not support vector
+        types.
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![argL, argR])
+        .operands_out(vec![resLo, resHi]),
+    );
+
+    ig.push(
+        Inst::new(
+            "x86_smulx",
+            r#"
+        Signed integer multiplication, producing a double-length result.
+
+        Polymorphic over all scalar integer types, but does not support vector
+        types.
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![argL, argR])
+        .operands_out(vec![resLo, resHi]),
+    );
+
+    let Float = &TypeVar::new(
+        "Float",
+        "A scalar or vector floating point number",
+        TypeSetBuilder::new()
+            .floats(Interval::All)
+            .simd_lanes(Interval::All)
+            .build(),
+    );
+    let IntTo = &TypeVar::new(
+        "IntTo",
+        "An integer type with the same number of lanes",
+        TypeSetBuilder::new()
+            .ints(32..64)
+            .simd_lanes(Interval::All)
+            .build(),
+    );
+    let x = &Operand::new("x", Float);
+    let a = &Operand::new("a", IntTo);
+
+    ig.push(
+        Inst::new(
+            "x86_cvtt2si",
+            r#"
+        Convert with truncation floating point to signed integer.
+
+        The source floating point operand is converted to a signed integer by
+        rounding towards zero. If the result can't be represented in the output
+        type, returns the smallest signed value the output type can represent.
+
+        This instruction does not trap.
+        "#,
+            &formats.unary,
+        )
+        .operands_in(vec![x])
+        .operands_out(vec![a]),
+    );
+
+    let f32x4 = &TypeVar::new(
+        "f32x4",
+        "A floating point number",
+        TypeSetBuilder::new()
+            .floats(32..32)
+            .simd_lanes(4..4)
+            .build(),
+    );
+    let i32x4 = &TypeVar::new(
+        "i32x4",
+        "An integer type with the same number of lanes",
+        TypeSetBuilder::new().ints(32..32).simd_lanes(4..4).build(),
+    );
+    let x = &Operand::new("x", i32x4);
+    let a = &Operand::new("a", f32x4);
+
+    ig.push(
+        Inst::new(
+            "x86_vcvtudq2ps",
+            r#"
+        Convert unsigned integer to floating point.
+
+        Convert packed doubleword unsigned integers to packed single-precision floating-point 
+        values. This instruction does not trap.
+        "#,
+            &formats.unary,
+        )
+        .operands_in(vec![x])
+        .operands_out(vec![a]),
+    );
+
+    let x = &Operand::new("x", Float);
+    let a = &Operand::new("a", Float);
+    let y = &Operand::new("y", Float);
+
+    ig.push(
+        Inst::new(
+            "x86_fmin",
+            r#"
+        Floating point minimum with x86 semantics.
+
+        This is equivalent to the C ternary operator `x < y ? x : y` which
+        differs from `fmin` when either operand is NaN or when comparing
+        +0.0 to -0.0.
+
+        When the two operands don't compare as LT, `y` is returned unchanged,
+        even if it is a signalling NaN.
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "x86_fmax",
+            r#"
+        Floating point maximum with x86 semantics.
+
+        This is equivalent to the C ternary operator `x > y ? x : y` which
+        differs from `fmax` when either operand is NaN or when comparing
+        +0.0 to -0.0.
+
+        When the two operands don't compare as GT, `y` is returned unchanged,
+        even if it is a signalling NaN.
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a]),
+    );
+
+    let x = &Operand::new("x", iWord);
+
+    ig.push(
+        Inst::new(
+            "x86_push",
+            r#"
+    Pushes a value onto the stack.
+
+    Decrements the stack pointer and stores the specified value on to the top.
+
+    This is polymorphic in i32 and i64. However, it is only implemented for i64
+    in 64-bit mode, and only for i32 in 32-bit mode.
+    "#,
+            &formats.unary,
+        )
+        .operands_in(vec![x])
+        .other_side_effects(true)
+        .can_store(true),
+    );
+
+    ig.push(
+        Inst::new(
+            "x86_pop",
+            r#"
+    Pops a value from the stack.
+
+    Loads a value from the top of the stack and then increments the stack
+    pointer.
+
+    This is polymorphic in i32 and i64. However, it is only implemented for i64
+    in 64-bit mode, and only for i32 in 32-bit mode.
+    "#,
+            &formats.nullary,
+        )
+        .operands_out(vec![x])
+        .other_side_effects(true)
+        .can_load(true),
+    );
+
+    let y = &Operand::new("y", iWord);
+    let rflags = &Operand::new("rflags", iflags);
+
+    ig.push(
+        Inst::new(
+            "x86_bsr",
+            r#"
+    Bit Scan Reverse -- returns the bit-index of the most significant 1
+    in the word. Result is undefined if the argument is zero. However, it
+    sets the Z flag depending on the argument, so it is at least easy to
+    detect and handle that case.
+
+    This is polymorphic in i32 and i64. It is implemented for both i64 and
+    i32 in 64-bit mode, and only for i32 in 32-bit mode.
+    "#,
+            &formats.unary,
+        )
+        .operands_in(vec![x])
+        .operands_out(vec![y, rflags]),
+    );
+
+    ig.push(
+        Inst::new(
+            "x86_bsf",
+            r#"
+    Bit Scan Forwards -- returns the bit-index of the least significant 1
+    in the word. Is otherwise identical to 'bsr', just above.
+    "#,
+            &formats.unary,
+        )
+        .operands_in(vec![x])
+        .operands_out(vec![y, rflags]),
+    );
+
+    let uimm8 = &immediates.uimm8;
+    let TxN = &TypeVar::new(
+        "TxN",
+        "A SIMD vector type",
+        TypeSetBuilder::new()
+            .ints(Interval::All)
+            .floats(Interval::All)
+            .bools(Interval::All)
+            .simd_lanes(Interval::All)
+            .includes_scalars(false)
+            .build(),
+    );
+    let a = &Operand::new("a", TxN).with_doc("A vector value (i.e. held in an XMM register)");
+    let b = &Operand::new("b", TxN).with_doc("A vector value (i.e. held in an XMM register)");
+    let i = &Operand::new("i", uimm8).with_doc("An ordering operand controlling the copying of data from the source to the destination; see PSHUFD in Intel manual for details");
+
+    ig.push(
+        Inst::new(
+            "x86_pshufd",
+            r#"
+    Packed Shuffle Doublewords -- copies data from either memory or lanes in an extended
+    register and re-orders the data according to the passed immediate byte.
+    "#,
+            &formats.binary_imm8,
+        )
+        .operands_in(vec![a, i]) // TODO allow copying from memory here (need more permissive type than TxN)
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "x86_pshufb",
+            r#"
+    Packed Shuffle Bytes -- re-orders data in an extended register using a shuffle
+    mask from either memory or another extended register
+    "#,
+            &formats.binary,
+        )
+        .operands_in(vec![a, b]) // TODO allow re-ordering from memory here (need more permissive type than TxN)
+        .operands_out(vec![a]),
+    );
+
+    let mask = &Operand::new("mask", uimm8).with_doc("mask to select lanes from b");
+    ig.push(
+        Inst::new(
+            "x86_pblendw",
+            r#"
+    Blend packed words using an immediate mask. Each bit of the 8-bit immediate corresponds to a 
+    lane in ``b``: if the bit is set, the lane is copied into ``a``.
+    "#,
+            &formats.ternary_imm8,
+        )
+        .operands_in(vec![a, b, mask])
+        .operands_out(vec![a]),
+    );
+
+    let Idx = &Operand::new("Idx", uimm8).with_doc("Lane index");
+    let x = &Operand::new("x", TxN);
+    let a = &Operand::new("a", &TxN.lane_of());
+
+    ig.push(
+        Inst::new(
+            "x86_pextr",
+            r#"
+        Extract lane ``Idx`` from ``x``.
+        The lane index, ``Idx``, is an immediate value, not an SSA value. It
+        must indicate a valid lane index for the type of ``x``.
+        "#,
+            &formats.binary_imm8,
+        )
+        .operands_in(vec![x, Idx])
+        .operands_out(vec![a]),
+    );
+
+    let IBxN = &TypeVar::new(
+        "IBxN",
+        "A SIMD vector type containing only booleans and integers",
+        TypeSetBuilder::new()
+            .ints(Interval::All)
+            .bools(Interval::All)
+            .simd_lanes(Interval::All)
+            .includes_scalars(false)
+            .build(),
+    );
+    let x = &Operand::new("x", IBxN);
+    let y = &Operand::new("y", &IBxN.lane_of()).with_doc("New lane value");
+    let a = &Operand::new("a", IBxN);
+
+    ig.push(
+        Inst::new(
+            "x86_pinsr",
+            r#"
+        Insert ``y`` into ``x`` at lane ``Idx``.
+        The lane index, ``Idx``, is an immediate value, not an SSA value. It
+        must indicate a valid lane index for the type of ``x``.
+        "#,
+            &formats.ternary_imm8,
+        )
+        .operands_in(vec![x, y, Idx])
+        .operands_out(vec![a]),
+    );
+
+    let FxN = &TypeVar::new(
+        "FxN",
+        "A SIMD vector type containing floats",
+        TypeSetBuilder::new()
+            .floats(Interval::All)
+            .simd_lanes(Interval::All)
+            .includes_scalars(false)
+            .build(),
+    );
+    let x = &Operand::new("x", FxN);
+    let y = &Operand::new("y", &FxN.lane_of()).with_doc("New lane value");
+    let a = &Operand::new("a", FxN);
+
+    ig.push(
+        Inst::new(
+            "x86_insertps",
+            r#"
+        Insert a lane of ``y`` into ``x`` at using ``Idx`` to encode both which lane the value is
+        extracted from and which it is inserted to. This is similar to x86_pinsr but inserts
+        floats, which are already stored in an XMM register.
+        "#,
+            &formats.ternary_imm8,
+        )
+        .operands_in(vec![x, y, Idx])
+        .operands_out(vec![a]),
+    );
+
+    let x = &Operand::new("x", TxN);
+    let y = &Operand::new("y", TxN);
+    let a = &Operand::new("a", TxN);
+
+    ig.push(
+        Inst::new(
+            "x86_punpckh",
+            r#"
+        Unpack the high-order lanes of ``x`` and ``y`` and interleave into ``a``. With notional
+        i8x4 vectors, where ``x = [x3, x2, x1, x0]`` and ``y = [y3, y2, y1, y0]``, this operation
+        would result in ``a = [y3, x3, y2, x2]`` (using the Intel manual's right-to-left lane
+        ordering). 
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "x86_punpckl",
+            r#"
+        Unpack the low-order lanes of ``x`` and ``y`` and interleave into ``a``. With notional
+        i8x4 vectors, where ``x = [x3, x2, x1, x0]`` and ``y = [y3, y2, y1, y0]``, this operation
+        would result in ``a = [y1, x1, y0, x0]`` (using the Intel manual's right-to-left lane
+        ordering).
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a]),
+    );
+
+    let x = &Operand::new("x", FxN);
+    let y = &Operand::new("y", FxN);
+    let a = &Operand::new("a", FxN);
+
+    ig.push(
+        Inst::new(
+            "x86_movsd",
+            r#"
+        Move the low 64 bits of the float vector ``y`` to the low 64 bits of float vector ``x``
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "x86_movlhps",
+            r#"
+        Move the low 64 bits of the float vector ``y`` to the high 64 bits of float vector ``x``
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a]),
+    );
+
+    let IxN = &TypeVar::new(
+        "IxN",
+        "A SIMD vector type containing integers",
+        TypeSetBuilder::new()
+            .ints(Interval::All)
+            .simd_lanes(Interval::All)
+            .includes_scalars(false)
+            .build(),
+    );
+    let I128 = &TypeVar::new(
+        "I128",
+        "A SIMD vector type containing one large integer (due to Cranelift type constraints, \
+        this uses the Cranelift I64X2 type but should be understood as one large value, i.e., the \
+        upper lane is concatenated with the lower lane to form the integer)",
+        TypeSetBuilder::new()
+            .ints(64..64)
+            .simd_lanes(2..2)
+            .includes_scalars(false)
+            .build(),
+    );
+
+    let x = &Operand::new("x", IxN).with_doc("Vector value to shift");
+    let y = &Operand::new("y", I128).with_doc("Number of bits to shift");
+    let a = &Operand::new("a", IxN);
+
+    ig.push(
+        Inst::new(
+            "x86_psll",
+            r#"
+        Shift Packed Data Left Logical -- This implements the behavior of the shared instruction
+        ``ishl`` but alters the shift operand to live in an XMM register as expected by the PSLL*
+        family of instructions.
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "x86_psrl",
+            r#"
+        Shift Packed Data Right Logical -- This implements the behavior of the shared instruction
+        ``ushr`` but alters the shift operand to live in an XMM register as expected by the PSRL*
+        family of instructions.
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "x86_psra",
+            r#"
+        Shift Packed Data Right Arithmetic -- This implements the behavior of the shared
+        instruction ``sshr`` but alters the shift operand to live in an XMM register as expected by
+        the PSRA* family of instructions.
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a]),
+    );
+
+    let I64x2 = &TypeVar::new(
+        "I64x2",
+        "A SIMD vector type containing two 64-bit integers",
+        TypeSetBuilder::new()
+            .ints(64..64)
+            .simd_lanes(2..2)
+            .includes_scalars(false)
+            .build(),
+    );
+
+    let x = &Operand::new("x", I64x2);
+    let y = &Operand::new("y", I64x2);
+    let a = &Operand::new("a", I64x2);
+    ig.push(
+        Inst::new(
+            "x86_pmullq",
+            r#"
+        Multiply Packed Integers -- Multiply two 64x2 integers and receive a 64x2 result with
+        lane-wise wrapping if the result overflows. This instruction is necessary to add distinct
+        encodings for CPUs with newer vector features.
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "x86_pmuludq",
+            r#"
+        Multiply Packed Integers -- Using only the bottom 32 bits in each lane, multiply two 64x2
+        unsigned integers and receive a 64x2 result. This instruction avoids the need for handling
+        overflow as in `x86_pmullq`.
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a]),
+    );
+
+    let x = &Operand::new("x", TxN);
+    let y = &Operand::new("y", TxN);
+    let f = &Operand::new("f", iflags);
+    ig.push(
+        Inst::new(
+            "x86_ptest",
+            r#"
+        Logical Compare -- PTEST will set the ZF flag if all bits in the result are 0 of the
+        bitwise AND of the first source operand (first operand) and the second source operand
+        (second operand). PTEST sets the CF flag if all bits in the result are 0 of the bitwise
+        AND of the second source operand (second operand) and the logical NOT of the destination
+        operand (first operand).
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![f]),
+    );
+
+    let x = &Operand::new("x", IxN);
+    let y = &Operand::new("y", IxN);
+    let a = &Operand::new("a", IxN);
+    ig.push(
+        Inst::new(
+            "x86_pmaxs",
+            r#"
+        Maximum of Packed Signed Integers -- Compare signed integers in the first and second
+        operand and return the maximum values.
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "x86_pmaxu",
+            r#"
+        Maximum of Packed Unsigned Integers -- Compare unsigned integers in the first and second
+        operand and return the maximum values.
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "x86_pmins",
+            r#"
+        Minimum of Packed Signed Integers -- Compare signed integers in the first and second
+        operand and return the minimum values.
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "x86_pminu",
+            r#"
+        Minimum of Packed Unsigned Integers -- Compare unsigned integers in the first and second
+        operand and return the minimum values.
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a]),
+    );
+
+    let c = &Operand::new("c", uimm8)
+        .with_doc("The number of bytes to shift right; see PALIGNR in Intel manual for details");
+    ig.push(
+        Inst::new(
+            "x86_palignr",
+            r#"
+        Concatenate destination and source operands, extracting a byte-aligned result shifted to 
+        the right by `c`.
+        "#,
+            &formats.ternary_imm8,
+        )
+        .operands_in(vec![x, y, c])
+        .operands_out(vec![a]),
+    );
+
+    let i64_t = &TypeVar::new(
+        "i64_t",
+        "A scalar 64bit integer",
+        TypeSetBuilder::new().ints(64..64).build(),
+    );
+
+    let GV = &Operand::new("GV", &entities.global_value);
+    let addr = &Operand::new("addr", i64_t);
+
+    ig.push(
+        Inst::new(
+            "x86_elf_tls_get_addr",
+            r#"
+        Elf tls get addr -- This implements the GD TLS model for ELF. The clobber output should
+        not be used.
+            "#,
+            &formats.unary_global_value,
+        )
+        // This is a bit overly broad to mark as clobbering *all* the registers, because it should
+        // only preserve caller-saved registers. There's no way to indicate this to register
+        // allocation yet, though, so mark as clobbering all registers instead.
+        .clobbers_all_regs(true)
+        .operands_in(vec![GV])
+        .operands_out(vec![addr]),
+    );
+    ig.push(
+        Inst::new(
+            "x86_macho_tls_get_addr",
+            r#"
+        Mach-O tls get addr -- This implements TLS access for Mach-O. The clobber output should
+        not be used.
+            "#,
+            &formats.unary_global_value,
+        )
+        // See above comment for x86_elf_tls_get_addr.
+        .clobbers_all_regs(true)
+        .operands_in(vec![GV])
+        .operands_out(vec![addr]),
+    );
+
+    ig.build()
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/isa/x86/legalize.rs b/third_party/rust/cranelift-codegen-meta/src/isa/x86/legalize.rs
new file mode 100644
index 0000000000..681b3104d5
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/isa/x86/legalize.rs
@@ -0,0 +1,829 @@
+use crate::cdsl::ast::{constant, var, ExprBuilder, Literal};
+use crate::cdsl::instructions::{vector, Bindable, InstructionGroup};
+use crate::cdsl::types::{LaneType, ValueType};
+use crate::cdsl::xform::TransformGroupBuilder;
+use crate::shared::types::Float::{F32, F64};
+use crate::shared::types::Int::{I16, I32, I64, I8};
+use crate::shared::Definitions as SharedDefinitions;
+
+#[allow(clippy::many_single_char_names)]
+pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &InstructionGroup) {
+    let mut expand = TransformGroupBuilder::new(
+        "x86_expand",
+        r#"
+    Legalize instructions by expansion.
+
+    Use x86-specific instructions if needed."#,
+    )
+    .isa("x86")
+    .chain_with(shared.transform_groups.by_name("expand_flags").id);
+
+    let mut narrow = TransformGroupBuilder::new(
+        "x86_narrow",
+        r#"
+    Legalize instructions by narrowing.
+
+    Use x86-specific instructions if needed."#,
+    )
+    .isa("x86")
+    .chain_with(shared.transform_groups.by_name("narrow_flags").id);
+
+    let mut narrow_avx = TransformGroupBuilder::new(
+        "x86_narrow_avx",
+        r#"
+    Legalize instructions by narrowing with CPU feature checks.
+
+    This special case converts using x86 AVX instructions where available."#,
+    )
+    .isa("x86");
+    // We cannot chain with the x86_narrow group until this group is built, see bottom of this
+    // function for where this is chained.
+
+    let mut widen = TransformGroupBuilder::new(
+        "x86_widen",
+        r#"
+    Legalize instructions by widening.
+
+    Use x86-specific instructions if needed."#,
+    )
+    .isa("x86")
+    .chain_with(shared.transform_groups.by_name("widen").id);
+
+    // List of instructions.
+    let insts = &shared.instructions;
+    let band = insts.by_name("band");
+    let bor = insts.by_name("bor");
+    let clz = insts.by_name("clz");
+    let ctz = insts.by_name("ctz");
+    let fcmp = insts.by_name("fcmp");
+    let fcvt_from_uint = insts.by_name("fcvt_from_uint");
+    let fcvt_to_sint = insts.by_name("fcvt_to_sint");
+    let fcvt_to_uint = insts.by_name("fcvt_to_uint");
+    let fcvt_to_sint_sat = insts.by_name("fcvt_to_sint_sat");
+    let fcvt_to_uint_sat = insts.by_name("fcvt_to_uint_sat");
+    let fmax = insts.by_name("fmax");
+    let fmin = insts.by_name("fmin");
+    let iadd = insts.by_name("iadd");
+    let iconst = insts.by_name("iconst");
+    let imul = insts.by_name("imul");
+    let ineg = insts.by_name("ineg");
+    let isub = insts.by_name("isub");
+    let ishl = insts.by_name("ishl");
+    let ireduce = insts.by_name("ireduce");
+    let popcnt = insts.by_name("popcnt");
+    let sdiv = insts.by_name("sdiv");
+    let selectif = insts.by_name("selectif");
+    let smulhi = insts.by_name("smulhi");
+    let srem = insts.by_name("srem");
+    let tls_value = insts.by_name("tls_value");
+    let udiv = insts.by_name("udiv");
+    let umulhi = insts.by_name("umulhi");
+    let ushr = insts.by_name("ushr");
+    let ushr_imm = insts.by_name("ushr_imm");
+    let urem = insts.by_name("urem");
+
+    let x86_bsf = x86_instructions.by_name("x86_bsf");
+    let x86_bsr = x86_instructions.by_name("x86_bsr");
+    let x86_umulx = x86_instructions.by_name("x86_umulx");
+    let x86_smulx = x86_instructions.by_name("x86_smulx");
+
+    let imm = &shared.imm;
+
+    // Shift by a 64-bit amount is equivalent to a shift by that amount mod 32, so we can reduce
+    // the size of the shift amount. This is useful for x86_32, where an I64 shift amount is
+    // not encodable.
+    let a = var("a");
+    let x = var("x");
+    let y = var("y");
+    let z = var("z");
+
+    for &ty in &[I8, I16, I32] {
+        let ishl_by_i64 = ishl.bind(ty).bind(I64);
+        let ireduce = ireduce.bind(I32);
+        expand.legalize(
+            def!(a = ishl_by_i64(x, y)),
+            vec![def!(z = ireduce(y)), def!(a = ishl(x, z))],
+        );
+    }
+
+    for &ty in &[I8, I16, I32] {
+        let ushr_by_i64 = ushr.bind(ty).bind(I64);
+        let ireduce = ireduce.bind(I32);
+        expand.legalize(
+            def!(a = ushr_by_i64(x, y)),
+            vec![def!(z = ireduce(y)), def!(a = ishl(x, z))],
+        );
+    }
+
+    // Division and remainder.
+    //
+    // The srem expansion requires custom code because srem INT_MIN, -1 is not
+    // allowed to trap. The other ops need to check avoid_div_traps.
+    expand.custom_legalize(sdiv, "expand_sdivrem");
+    expand.custom_legalize(srem, "expand_sdivrem");
+    expand.custom_legalize(udiv, "expand_udivrem");
+    expand.custom_legalize(urem, "expand_udivrem");
+
+    // Double length (widening) multiplication.
+    let a = var("a");
+    let x = var("x");
+    let y = var("y");
+    let a1 = var("a1");
+    let a2 = var("a2");
+    let res_lo = var("res_lo");
+    let res_hi = var("res_hi");
+
+    expand.legalize(
+        def!(res_hi = umulhi(x, y)),
+        vec![def!((res_lo, res_hi) = x86_umulx(x, y))],
+    );
+
+    expand.legalize(
+        def!(res_hi = smulhi(x, y)),
+        vec![def!((res_lo, res_hi) = x86_smulx(x, y))],
+    );
+
+    // Floating point condition codes.
+    //
+    // The 8 condition codes in `supported_floatccs` are directly supported by a
+    // `ucomiss` or `ucomisd` instruction. The remaining codes need legalization
+    // patterns.
+
+    let floatcc_eq = Literal::enumerator_for(&imm.floatcc, "eq");
+    let floatcc_ord = Literal::enumerator_for(&imm.floatcc, "ord");
+    let floatcc_ueq = Literal::enumerator_for(&imm.floatcc, "ueq");
+    let floatcc_ne = Literal::enumerator_for(&imm.floatcc, "ne");
+    let floatcc_uno = Literal::enumerator_for(&imm.floatcc, "uno");
+    let floatcc_one = Literal::enumerator_for(&imm.floatcc, "one");
+
+    // Equality needs an explicit `ord` test which checks the parity bit.
+    expand.legalize(
+        def!(a = fcmp(floatcc_eq, x, y)),
+        vec![
+            def!(a1 = fcmp(floatcc_ord, x, y)),
+            def!(a2 = fcmp(floatcc_ueq, x, y)),
+            def!(a = band(a1, a2)),
+        ],
+    );
+    expand.legalize(
+        def!(a = fcmp(floatcc_ne, x, y)),
+        vec![
+            def!(a1 = fcmp(floatcc_uno, x, y)),
+            def!(a2 = fcmp(floatcc_one, x, y)),
+            def!(a = bor(a1, a2)),
+        ],
+    );
+
+    let floatcc_lt = &Literal::enumerator_for(&imm.floatcc, "lt");
+    let floatcc_gt = &Literal::enumerator_for(&imm.floatcc, "gt");
+    let floatcc_le = &Literal::enumerator_for(&imm.floatcc, "le");
+    let floatcc_ge = &Literal::enumerator_for(&imm.floatcc, "ge");
+    let floatcc_ugt = &Literal::enumerator_for(&imm.floatcc, "ugt");
+    let floatcc_ult = &Literal::enumerator_for(&imm.floatcc, "ult");
+    let floatcc_uge = &Literal::enumerator_for(&imm.floatcc, "uge");
+    let floatcc_ule = &Literal::enumerator_for(&imm.floatcc, "ule");
+
+    // Inequalities that need to be reversed.
+    for &(cc, rev_cc) in &[
+        (floatcc_lt, floatcc_gt),
+        (floatcc_le, floatcc_ge),
+        (floatcc_ugt, floatcc_ult),
+        (floatcc_uge, floatcc_ule),
+    ] {
+        expand.legalize(def!(a = fcmp(cc, x, y)), vec![def!(a = fcmp(rev_cc, y, x))]);
+    }
+
+    // We need to modify the CFG for min/max legalization.
+    expand.custom_legalize(fmin, "expand_minmax");
+    expand.custom_legalize(fmax, "expand_minmax");
+
+    // Conversions from unsigned need special handling.
+    expand.custom_legalize(fcvt_from_uint, "expand_fcvt_from_uint");
+    // Conversions from float to int can trap and modify the control flow graph.
+    expand.custom_legalize(fcvt_to_sint, "expand_fcvt_to_sint");
+    expand.custom_legalize(fcvt_to_uint, "expand_fcvt_to_uint");
+    expand.custom_legalize(fcvt_to_sint_sat, "expand_fcvt_to_sint_sat");
+    expand.custom_legalize(fcvt_to_uint_sat, "expand_fcvt_to_uint_sat");
+
+    // Count leading and trailing zeroes, for baseline x86_64
+    let c_minus_one = var("c_minus_one");
+    let c_thirty_one = var("c_thirty_one");
+    let c_thirty_two = var("c_thirty_two");
+    let c_sixty_three = var("c_sixty_three");
+    let c_sixty_four = var("c_sixty_four");
+    let index1 = var("index1");
+    let r2flags = var("r2flags");
+    let index2 = var("index2");
+
+    let intcc_eq = Literal::enumerator_for(&imm.intcc, "eq");
+    let imm64_minus_one = Literal::constant(&imm.imm64, -1);
+    let imm64_63 = Literal::constant(&imm.imm64, 63);
+    expand.legalize(
+        def!(a = clz.I64(x)),
+        vec![
+            def!(c_minus_one = iconst(imm64_minus_one)),
+            def!(c_sixty_three = iconst(imm64_63)),
+            def!((index1, r2flags) = x86_bsr(x)),
+            def!(index2 = selectif(intcc_eq, r2flags, c_minus_one, index1)),
+            def!(a = isub(c_sixty_three, index2)),
+        ],
+    );
+
+    let imm64_31 = Literal::constant(&imm.imm64, 31);
+    expand.legalize(
+        def!(a = clz.I32(x)),
+        vec![
+            def!(c_minus_one = iconst(imm64_minus_one)),
+            def!(c_thirty_one = iconst(imm64_31)),
+            def!((index1, r2flags) = x86_bsr(x)),
+            def!(index2 = selectif(intcc_eq, r2flags, c_minus_one, index1)),
+            def!(a = isub(c_thirty_one, index2)),
+        ],
+    );
+
+    let imm64_64 = Literal::constant(&imm.imm64, 64);
+    expand.legalize(
+        def!(a = ctz.I64(x)),
+        vec![
+            def!(c_sixty_four = iconst(imm64_64)),
+            def!((index1, r2flags) = x86_bsf(x)),
+            def!(a = selectif(intcc_eq, r2flags, c_sixty_four, index1)),
+        ],
+    );
+
+    let imm64_32 = Literal::constant(&imm.imm64, 32);
+    expand.legalize(
+        def!(a = ctz.I32(x)),
+        vec![
+            def!(c_thirty_two = iconst(imm64_32)),
+            def!((index1, r2flags) = x86_bsf(x)),
+            def!(a = selectif(intcc_eq, r2flags, c_thirty_two, index1)),
+        ],
+    );
+
+    // Population count for baseline x86_64
+    let x = var("x");
+    let r = var("r");
+
+    let qv3 = var("qv3");
+    let qv4 = var("qv4");
+    let qv5 = var("qv5");
+    let qv6 = var("qv6");
+    let qv7 = var("qv7");
+    let qv8 = var("qv8");
+    let qv9 = var("qv9");
+    let qv10 = var("qv10");
+    let qv11 = var("qv11");
+    let qv12 = var("qv12");
+    let qv13 = var("qv13");
+    let qv14 = var("qv14");
+    let qv15 = var("qv15");
+    let qc77 = var("qc77");
+    #[allow(non_snake_case)]
+    let qc0F = var("qc0F");
+    let qc01 = var("qc01");
+
+    let imm64_1 = Literal::constant(&imm.imm64, 1);
+    let imm64_4 = Literal::constant(&imm.imm64, 4);
+    expand.legalize(
+        def!(r = popcnt.I64(x)),
+        vec![
+            def!(qv3 = ushr_imm(x, imm64_1)),
+            def!(qc77 = iconst(Literal::constant(&imm.imm64, 0x7777_7777_7777_7777))),
+            def!(qv4 = band(qv3, qc77)),
+            def!(qv5 = isub(x, qv4)),
+            def!(qv6 = ushr_imm(qv4, imm64_1)),
+            def!(qv7 = band(qv6, qc77)),
+            def!(qv8 = isub(qv5, qv7)),
+            def!(qv9 = ushr_imm(qv7, imm64_1)),
+            def!(qv10 = band(qv9, qc77)),
+            def!(qv11 = isub(qv8, qv10)),
+            def!(qv12 = ushr_imm(qv11, imm64_4)),
+            def!(qv13 = iadd(qv11, qv12)),
+            def!(qc0F = iconst(Literal::constant(&imm.imm64, 0x0F0F_0F0F_0F0F_0F0F))),
+            def!(qv14 = band(qv13, qc0F)),
+            def!(qc01 = iconst(Literal::constant(&imm.imm64, 0x0101_0101_0101_0101))),
+            def!(qv15 = imul(qv14, qc01)),
+            def!(r = ushr_imm(qv15, Literal::constant(&imm.imm64, 56))),
+        ],
+    );
+
+    let lv3 = var("lv3");
+    let lv4 = var("lv4");
+    let lv5 = var("lv5");
+    let lv6 = var("lv6");
+    let lv7 = var("lv7");
+    let lv8 = var("lv8");
+    let lv9 = var("lv9");
+    let lv10 = var("lv10");
+    let lv11 = var("lv11");
+    let lv12 = var("lv12");
+    let lv13 = var("lv13");
+    let lv14 = var("lv14");
+    let lv15 = var("lv15");
+    let lc77 = var("lc77");
+    #[allow(non_snake_case)]
+    let lc0F = var("lc0F");
+    let lc01 = var("lc01");
+
+    expand.legalize(
+        def!(r = popcnt.I32(x)),
+        vec![
+            def!(lv3 = ushr_imm(x, imm64_1)),
+            def!(lc77 = iconst(Literal::constant(&imm.imm64, 0x7777_7777))),
+            def!(lv4 = band(lv3, lc77)),
+            def!(lv5 = isub(x, lv4)),
+            def!(lv6 = ushr_imm(lv4, imm64_1)),
+            def!(lv7 = band(lv6, lc77)),
+            def!(lv8 = isub(lv5, lv7)),
+            def!(lv9 = ushr_imm(lv7, imm64_1)),
+            def!(lv10 = band(lv9, lc77)),
+            def!(lv11 = isub(lv8, lv10)),
+            def!(lv12 = ushr_imm(lv11, imm64_4)),
+            def!(lv13 = iadd(lv11, lv12)),
+            def!(lc0F = iconst(Literal::constant(&imm.imm64, 0x0F0F_0F0F))),
+            def!(lv14 = band(lv13, lc0F)),
+            def!(lc01 = iconst(Literal::constant(&imm.imm64, 0x0101_0101))),
+            def!(lv15 = imul(lv14, lc01)),
+            def!(r = ushr_imm(lv15, Literal::constant(&imm.imm64, 24))),
+        ],
+    );
+
+    expand.custom_legalize(ineg, "convert_ineg");
+    expand.custom_legalize(tls_value, "expand_tls_value");
+    widen.custom_legalize(ineg, "convert_ineg");
+
+    // To reduce compilation times, separate out large blocks of legalizations by theme.
+    define_simd(shared, x86_instructions, &mut narrow, &mut narrow_avx);
+
+    expand.build_and_add_to(&mut shared.transform_groups);
+    let narrow_id = narrow.build_and_add_to(&mut shared.transform_groups);
+    narrow_avx
+        .chain_with(narrow_id)
+        .build_and_add_to(&mut shared.transform_groups);
+    widen.build_and_add_to(&mut shared.transform_groups);
+}
+
+fn define_simd(
+    shared: &mut SharedDefinitions,
+    x86_instructions: &InstructionGroup,
+    narrow: &mut TransformGroupBuilder,
+    narrow_avx: &mut TransformGroupBuilder,
+) {
+    let insts = &shared.instructions;
+    let band = insts.by_name("band");
+    let band_not = insts.by_name("band_not");
+    let bitcast = insts.by_name("bitcast");
+    let bitselect = insts.by_name("bitselect");
+    let bor = insts.by_name("bor");
+    let bnot = insts.by_name("bnot");
+    let bxor = insts.by_name("bxor");
+    let extractlane = insts.by_name("extractlane");
+    let fabs = insts.by_name("fabs");
+    let fcmp = insts.by_name("fcmp");
+    let fcvt_from_uint = insts.by_name("fcvt_from_uint");
+    let fcvt_to_sint_sat = insts.by_name("fcvt_to_sint_sat");
+    let fcvt_to_uint_sat = insts.by_name("fcvt_to_uint_sat");
+    let fmax = insts.by_name("fmax");
+    let fmin = insts.by_name("fmin");
+    let fneg = insts.by_name("fneg");
+    let iadd_imm = insts.by_name("iadd_imm");
+    let icmp = insts.by_name("icmp");
+    let imax = insts.by_name("imax");
+    let imin = insts.by_name("imin");
+    let imul = insts.by_name("imul");
+    let ineg = insts.by_name("ineg");
+    let insertlane = insts.by_name("insertlane");
+    let ishl = insts.by_name("ishl");
+    let ishl_imm = insts.by_name("ishl_imm");
+    let load_splat = insts.by_name("load_splat");
+    let raw_bitcast = insts.by_name("raw_bitcast");
+    let scalar_to_vector = insts.by_name("scalar_to_vector");
+    let splat = insts.by_name("splat");
+    let shuffle = insts.by_name("shuffle");
+    let sshr = insts.by_name("sshr");
+    let swizzle = insts.by_name("swizzle");
+    let trueif = insts.by_name("trueif");
+    let uadd_sat = insts.by_name("uadd_sat");
+    let umax = insts.by_name("umax");
+    let umin = insts.by_name("umin");
+    let snarrow = insts.by_name("snarrow");
+    let swiden_high = insts.by_name("swiden_high");
+    let swiden_low = insts.by_name("swiden_low");
+    let ushr_imm = insts.by_name("ushr_imm");
+    let ushr = insts.by_name("ushr");
+    let uwiden_high = insts.by_name("uwiden_high");
+    let uwiden_low = insts.by_name("uwiden_low");
+    let vconst = insts.by_name("vconst");
+    let vall_true = insts.by_name("vall_true");
+    let vany_true = insts.by_name("vany_true");
+    let vselect = insts.by_name("vselect");
+
+    let x86_palignr = x86_instructions.by_name("x86_palignr");
+    let x86_pmaxs = x86_instructions.by_name("x86_pmaxs");
+    let x86_pmaxu = x86_instructions.by_name("x86_pmaxu");
+    let x86_pmins = x86_instructions.by_name("x86_pmins");
+    let x86_pminu = x86_instructions.by_name("x86_pminu");
+    let x86_pshufb = x86_instructions.by_name("x86_pshufb");
+    let x86_pshufd = x86_instructions.by_name("x86_pshufd");
+    let x86_psra = x86_instructions.by_name("x86_psra");
+    let x86_ptest = x86_instructions.by_name("x86_ptest");
+    let x86_punpckh = x86_instructions.by_name("x86_punpckh");
+    let x86_punpckl = x86_instructions.by_name("x86_punpckl");
+
+    let imm = &shared.imm;
+
+    // Set up variables and immediates.
+    let uimm8_zero = Literal::constant(&imm.uimm8, 0x00);
+    let uimm8_one = Literal::constant(&imm.uimm8, 0x01);
+    let uimm8_eight = Literal::constant(&imm.uimm8, 8);
+    let u128_zeroes = constant(vec![0x00; 16]);
+    let u128_ones = constant(vec![0xff; 16]);
+    let u128_seventies = constant(vec![0x70; 16]);
+    let a = var("a");
+    let b = var("b");
+    let c = var("c");
+    let d = var("d");
+    let e = var("e");
+    let f = var("f");
+    let g = var("g");
+    let h = var("h");
+    let x = var("x");
+    let y = var("y");
+    let z = var("z");
+
+    // Limit the SIMD vector size: eventually multiple vector sizes may be supported
+    // but for now only SSE-sized vectors are available.
+    let sse_vector_size: u64 = 128;
+    let allowed_simd_type = |t: &LaneType| t.lane_bits() >= 8 && t.lane_bits() < 128;
+
+    // SIMD splat: 8-bits
+    for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 8) {
+        let splat_any8x16 = splat.bind(vector(ty, sse_vector_size));
+        narrow.legalize(
+            def!(y = splat_any8x16(x)),
+            vec![
+                // Move into the lowest 8 bits of an XMM register.
+                def!(a = scalar_to_vector(x)),
+                // Zero out a different XMM register; the shuffle mask for moving the lowest byte
+                // to all other byte lanes is 0x0.
+                def!(b = vconst(u128_zeroes)),
+                // PSHUFB takes two XMM operands, one of which is a shuffle mask (i.e. b).
+                def!(y = x86_pshufb(a, b)),
+            ],
+        );
+    }
+
+    // SIMD splat: 16-bits
+    for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 16) {
+        let splat_x16x8 = splat.bind(vector(ty, sse_vector_size));
+        let raw_bitcast_any16x8_to_i32x4 = raw_bitcast
+            .bind(vector(I32, sse_vector_size))
+            .bind(vector(ty, sse_vector_size));
+        let raw_bitcast_i32x4_to_any16x8 = raw_bitcast
+            .bind(vector(ty, sse_vector_size))
+            .bind(vector(I32, sse_vector_size));
+        narrow.legalize(
+            def!(y = splat_x16x8(x)),
+            vec![
+                // Move into the lowest 16 bits of an XMM register.
+                def!(a = scalar_to_vector(x)),
+                // Insert the value again but in the next lowest 16 bits.
+                def!(b = insertlane(a, x, uimm8_one)),
+                // No instruction emitted; pretend this is an I32x4 so we can use PSHUFD.
+                def!(c = raw_bitcast_any16x8_to_i32x4(b)),
+                // Broadcast the bytes in the XMM register with PSHUFD.
+                def!(d = x86_pshufd(c, uimm8_zero)),
+                // No instruction emitted; pretend this is an X16x8 again.
+                def!(y = raw_bitcast_i32x4_to_any16x8(d)),
+            ],
+        );
+    }
+
+    // SIMD splat: 32-bits
+    for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 32) {
+        let splat_any32x4 = splat.bind(vector(ty, sse_vector_size));
+        narrow.legalize(
+            def!(y = splat_any32x4(x)),
+            vec![
+                // Translate to an x86 MOV to get the value in an XMM register.
+                def!(a = scalar_to_vector(x)),
+                // Broadcast the bytes in the XMM register with PSHUFD.
+                def!(y = x86_pshufd(a, uimm8_zero)),
+            ],
+        );
+    }
+
+    // SIMD splat: 64-bits
+    for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 64) {
+        let splat_any64x2 = splat.bind(vector(ty, sse_vector_size));
+        narrow.legalize(
+            def!(y = splat_any64x2(x)),
+            vec![
+                // Move into the lowest 64 bits of an XMM register.
+                def!(a = scalar_to_vector(x)),
+                // Move into the highest 64 bits of the same XMM register.
+                def!(y = insertlane(a, x, uimm8_one)),
+            ],
+        );
+    }
+
+    // SIMD swizzle; the following inefficient implementation is due to the Wasm SIMD spec requiring
+    // mask indexes greater than 15 to have the same semantics as a 0 index. For the spec discussion,
+    // see https://github.com/WebAssembly/simd/issues/93.
+    {
+        let swizzle = swizzle.bind(vector(I8, sse_vector_size));
+        narrow.legalize(
+            def!(a = swizzle(x, y)),
+            vec![
+                def!(b = vconst(u128_seventies)),
+                def!(c = uadd_sat(y, b)),
+                def!(a = x86_pshufb(x, c)),
+            ],
+        );
+    }
+
+    // SIMD bnot
+    for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
+        let bnot = bnot.bind(vector(ty, sse_vector_size));
+        narrow.legalize(
+            def!(y = bnot(x)),
+            vec![def!(a = vconst(u128_ones)), def!(y = bxor(a, x))],
+        );
+    }
+
+    // SIMD shift right (arithmetic, i16x8 and i32x4)
+    for ty in &[I16, I32] {
+        let sshr = sshr.bind(vector(*ty, sse_vector_size));
+        let bitcast_i64x2 = bitcast.bind(vector(I64, sse_vector_size));
+        narrow.legalize(
+            def!(a = sshr(x, y)),
+            vec![def!(b = bitcast_i64x2(y)), def!(a = x86_psra(x, b))],
+        );
+    }
+    // SIMD shift right (arithmetic, i8x16)
+    {
+        let sshr = sshr.bind(vector(I8, sse_vector_size));
+        let bitcast_i64x2 = bitcast.bind(vector(I64, sse_vector_size));
+        let raw_bitcast_i16x8 = raw_bitcast.bind(vector(I16, sse_vector_size));
+        let raw_bitcast_i16x8_again = raw_bitcast.bind(vector(I16, sse_vector_size));
+        narrow.legalize(
+            def!(z = sshr(x, y)),
+            vec![
+                // Since we will use the high byte of each 16x8 lane, shift an extra 8 bits.
+                def!(a = iadd_imm(y, uimm8_eight)),
+                def!(b = bitcast_i64x2(a)),
+                // Take the low 8 bytes of x, duplicate them in 16x8 lanes, then shift right.
+                def!(c = x86_punpckl(x, x)),
+                def!(d = raw_bitcast_i16x8(c)),
+                def!(e = x86_psra(d, b)),
+                // Take the high 8 bytes of x, duplicate them in 16x8 lanes, then shift right.
+                def!(f = x86_punpckh(x, x)),
+                def!(g = raw_bitcast_i16x8_again(f)),
+                def!(h = x86_psra(g, b)),
+                // Re-pack the vector.
+                def!(z = snarrow(e, h)),
+            ],
+        );
+    }
+    // SIMD shift right (arithmetic, i64x2)
+    {
+        let sshr_vector = sshr.bind(vector(I64, sse_vector_size));
+        let sshr_scalar_lane0 = sshr.bind(I64);
+        let sshr_scalar_lane1 = sshr.bind(I64);
+        narrow.legalize(
+            def!(z = sshr_vector(x, y)),
+            vec![
+                // Use scalar operations to shift the first lane.
+                def!(a = extractlane(x, uimm8_zero)),
+                def!(b = sshr_scalar_lane0(a, y)),
+                def!(c = insertlane(x, b, uimm8_zero)),
+                // Do the same for the second lane.
+                def!(d = extractlane(x, uimm8_one)),
+                def!(e = sshr_scalar_lane1(d, y)),
+                def!(z = insertlane(c, e, uimm8_one)),
+            ],
+        );
+    }
+
+    // SIMD select
+    for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
+        let bitselect = bitselect.bind(vector(ty, sse_vector_size)); // must bind both x/y and c
+        narrow.legalize(
+            def!(d = bitselect(c, x, y)),
+            vec![
+                def!(a = band(x, c)),
+                def!(b = band_not(y, c)),
+                def!(d = bor(a, b)),
+            ],
+        );
+    }
+
+    // SIMD vselect; replace with bitselect if BLEND* instructions are not available.
+    // This works, because each lane of boolean vector is filled with zeroes or ones.
+    for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
+        let vselect = vselect.bind(vector(ty, sse_vector_size));
+        let raw_bitcast = raw_bitcast.bind(vector(ty, sse_vector_size));
+        narrow.legalize(
+            def!(d = vselect(c, x, y)),
+            vec![def!(a = raw_bitcast(c)), def!(d = bitselect(a, x, y))],
+        );
+    }
+
+    // SIMD vany_true
+    let ne = Literal::enumerator_for(&imm.intcc, "ne");
+    for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
+        let vany_true = vany_true.bind(vector(ty, sse_vector_size));
+        narrow.legalize(
+            def!(y = vany_true(x)),
+            vec![def!(a = x86_ptest(x, x)), def!(y = trueif(ne, a))],
+        );
+    }
+
+    // SIMD vall_true
+    let eq = Literal::enumerator_for(&imm.intcc, "eq");
+    for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
+        let vall_true = vall_true.bind(vector(ty, sse_vector_size));
+        if ty.is_int() {
+            // In the common case (Wasm's integer-only all_true), we do not require a
+            // bitcast.
+            narrow.legalize(
+                def!(y = vall_true(x)),
+                vec![
+                    def!(a = vconst(u128_zeroes)),
+                    def!(c = icmp(eq, x, a)),
+                    def!(d = x86_ptest(c, c)),
+                    def!(y = trueif(eq, d)),
+                ],
+            );
+        } else {
+            // However, to support other types we must bitcast them to an integer vector to
+            // use icmp.
+            let lane_type_as_int = LaneType::int_from_bits(ty.lane_bits() as u16);
+            let raw_bitcast_to_int = raw_bitcast.bind(vector(lane_type_as_int, sse_vector_size));
+            narrow.legalize(
+                def!(y = vall_true(x)),
+                vec![
+                    def!(a = vconst(u128_zeroes)),
+                    def!(b = raw_bitcast_to_int(x)),
+                    def!(c = icmp(eq, b, a)),
+                    def!(d = x86_ptest(c, c)),
+                    def!(y = trueif(eq, d)),
+                ],
+            );
+        }
+    }
+
+    // SIMD icmp ne
+    let ne = Literal::enumerator_for(&imm.intcc, "ne");
+    for ty in ValueType::all_lane_types().filter(|ty| allowed_simd_type(ty) && ty.is_int()) {
+        let icmp_ = icmp.bind(vector(ty, sse_vector_size));
+        narrow.legalize(
+            def!(c = icmp_(ne, a, b)),
+            vec![def!(x = icmp(eq, a, b)), def!(c = bnot(x))],
+        );
+    }
+
+    // SIMD icmp greater-/less-than
+    let sgt = Literal::enumerator_for(&imm.intcc, "sgt");
+    let ugt = Literal::enumerator_for(&imm.intcc, "ugt");
+    let sge = Literal::enumerator_for(&imm.intcc, "sge");
+    let uge = Literal::enumerator_for(&imm.intcc, "uge");
+    let slt = Literal::enumerator_for(&imm.intcc, "slt");
+    let ult = Literal::enumerator_for(&imm.intcc, "ult");
+    let sle = Literal::enumerator_for(&imm.intcc, "sle");
+    let ule = Literal::enumerator_for(&imm.intcc, "ule");
+    for ty in &[I8, I16, I32] {
+        // greater-than
+        let icmp_ = icmp.bind(vector(*ty, sse_vector_size));
+        narrow.legalize(
+            def!(c = icmp_(ugt, a, b)),
+            vec![
+                def!(x = x86_pmaxu(a, b)),
+                def!(y = icmp(eq, x, b)),
+                def!(c = bnot(y)),
+            ],
+        );
+        let icmp_ = icmp.bind(vector(*ty, sse_vector_size));
+        narrow.legalize(
+            def!(c = icmp_(sge, a, b)),
+            vec![def!(x = x86_pmins(a, b)), def!(c = icmp(eq, x, b))],
+        );
+        let icmp_ = icmp.bind(vector(*ty, sse_vector_size));
+        narrow.legalize(
+            def!(c = icmp_(uge, a, b)),
+            vec![def!(x = x86_pminu(a, b)), def!(c = icmp(eq, x, b))],
+        );
+
+        // less-than
+        let icmp_ = icmp.bind(vector(*ty, sse_vector_size));
+        narrow.legalize(def!(c = icmp_(slt, a, b)), vec![def!(c = icmp(sgt, b, a))]);
+        let icmp_ = icmp.bind(vector(*ty, sse_vector_size));
+        narrow.legalize(def!(c = icmp_(ult, a, b)), vec![def!(c = icmp(ugt, b, a))]);
+        let icmp_ = icmp.bind(vector(*ty, sse_vector_size));
+        narrow.legalize(def!(c = icmp_(sle, a, b)), vec![def!(c = icmp(sge, b, a))]);
+        let icmp_ = icmp.bind(vector(*ty, sse_vector_size));
+        narrow.legalize(def!(c = icmp_(ule, a, b)), vec![def!(c = icmp(uge, b, a))]);
+    }
+
+    // SIMD integer min/max
+    for ty in &[I8, I16, I32] {
+        let imin = imin.bind(vector(*ty, sse_vector_size));
+        narrow.legalize(def!(c = imin(a, b)), vec![def!(c = x86_pmins(a, b))]);
+        let umin = umin.bind(vector(*ty, sse_vector_size));
+        narrow.legalize(def!(c = umin(a, b)), vec![def!(c = x86_pminu(a, b))]);
+        let imax = imax.bind(vector(*ty, sse_vector_size));
+        narrow.legalize(def!(c = imax(a, b)), vec![def!(c = x86_pmaxs(a, b))]);
+        let umax = umax.bind(vector(*ty, sse_vector_size));
+        narrow.legalize(def!(c = umax(a, b)), vec![def!(c = x86_pmaxu(a, b))]);
+    }
+
+    // SIMD fcmp greater-/less-than
+    let gt = Literal::enumerator_for(&imm.floatcc, "gt");
+    let lt = Literal::enumerator_for(&imm.floatcc, "lt");
+    let ge = Literal::enumerator_for(&imm.floatcc, "ge");
+    let le = Literal::enumerator_for(&imm.floatcc, "le");
+    let ugt = Literal::enumerator_for(&imm.floatcc, "ugt");
+    let ult = Literal::enumerator_for(&imm.floatcc, "ult");
+    let uge = Literal::enumerator_for(&imm.floatcc, "uge");
+    let ule = Literal::enumerator_for(&imm.floatcc, "ule");
+    for ty in &[F32, F64] {
+        let fcmp_ = fcmp.bind(vector(*ty, sse_vector_size));
+        narrow.legalize(def!(c = fcmp_(gt, a, b)), vec![def!(c = fcmp(lt, b, a))]);
+        let fcmp_ = fcmp.bind(vector(*ty, sse_vector_size));
+        narrow.legalize(def!(c = fcmp_(ge, a, b)), vec![def!(c = fcmp(le, b, a))]);
+        let fcmp_ = fcmp.bind(vector(*ty, sse_vector_size));
+        narrow.legalize(def!(c = fcmp_(ult, a, b)), vec![def!(c = fcmp(ugt, b, a))]);
+        let fcmp_ = fcmp.bind(vector(*ty, sse_vector_size));
+        narrow.legalize(def!(c = fcmp_(ule, a, b)), vec![def!(c = fcmp(uge, b, a))]);
+    }
+
+    for ty in &[F32, F64] {
+        let fneg = fneg.bind(vector(*ty, sse_vector_size));
+        let lane_type_as_int = LaneType::int_from_bits(LaneType::from(*ty).lane_bits() as u16);
+        let uimm8_shift = Literal::constant(&imm.uimm8, lane_type_as_int.lane_bits() as i64 - 1);
+        let vconst = vconst.bind(vector(lane_type_as_int, sse_vector_size));
+        let bitcast_to_float = raw_bitcast.bind(vector(*ty, sse_vector_size));
+        narrow.legalize(
+            def!(b = fneg(a)),
+            vec![
+                def!(c = vconst(u128_ones)),
+                def!(d = ishl_imm(c, uimm8_shift)), // Create a mask of all 0s except the MSB.
+                def!(e = bitcast_to_float(d)),      // Cast mask to the floating-point type.
+                def!(b = bxor(a, e)),               // Flip the MSB.
+            ],
+        );
+    }
+
+    // SIMD fabs
+    for ty in &[F32, F64] {
+        let fabs = fabs.bind(vector(*ty, sse_vector_size));
+        let lane_type_as_int = LaneType::int_from_bits(LaneType::from(*ty).lane_bits() as u16);
+        let vconst = vconst.bind(vector(lane_type_as_int, sse_vector_size));
+        let bitcast_to_float = raw_bitcast.bind(vector(*ty, sse_vector_size));
+        narrow.legalize(
+            def!(b = fabs(a)),
+            vec![
+                def!(c = vconst(u128_ones)),
+                def!(d = ushr_imm(c, uimm8_one)), // Create a mask of all 1s except the MSB.
+                def!(e = bitcast_to_float(d)),    // Cast mask to the floating-point type.
+                def!(b = band(a, e)),             // Unset the MSB.
+            ],
+        );
+    }
+
+    // SIMD widen
+    for ty in &[I8, I16] {
+        let swiden_high = swiden_high.bind(vector(*ty, sse_vector_size));
+        narrow.legalize(
+            def!(b = swiden_high(a)),
+            vec![
+                def!(c = x86_palignr(a, a, uimm8_eight)),
+                def!(b = swiden_low(c)),
+            ],
+        );
+        let uwiden_high = uwiden_high.bind(vector(*ty, sse_vector_size));
+        narrow.legalize(
+            def!(b = uwiden_high(a)),
+            vec![
+                def!(c = x86_palignr(a, a, uimm8_eight)),
+                def!(b = uwiden_low(c)),
+            ],
+        );
+    }
+
+    narrow.custom_legalize(shuffle, "convert_shuffle");
+    narrow.custom_legalize(extractlane, "convert_extractlane");
+    narrow.custom_legalize(insertlane, "convert_insertlane");
+    narrow.custom_legalize(ineg, "convert_ineg");
+    narrow.custom_legalize(ushr, "convert_ushr");
+    narrow.custom_legalize(ishl, "convert_ishl");
+    narrow.custom_legalize(fcvt_to_sint_sat, "expand_fcvt_to_sint_sat_vector");
+    narrow.custom_legalize(fmin, "expand_minmax_vector");
+    narrow.custom_legalize(fmax, "expand_minmax_vector");
+    narrow.custom_legalize(load_splat, "expand_load_splat");
+
+    narrow_avx.custom_legalize(imul, "convert_i64x2_imul");
+    narrow_avx.custom_legalize(fcvt_from_uint, "expand_fcvt_from_uint_vector");
+    narrow_avx.custom_legalize(fcvt_to_uint_sat, "expand_fcvt_to_uint_sat_vector");
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/isa/x86/mod.rs b/third_party/rust/cranelift-codegen-meta/src/isa/x86/mod.rs
new file mode 100644
index 0000000000..a272e83900
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/isa/x86/mod.rs
@@ -0,0 +1,88 @@
+use crate::cdsl::cpu_modes::CpuMode;
+use crate::cdsl::isa::TargetIsa;
+use crate::cdsl::types::{ReferenceType, VectorType};
+
+use crate::shared::types::Bool::B1;
+use crate::shared::types::Float::{F32, F64};
+use crate::shared::types::Int::{I16, I32, I64, I8};
+use crate::shared::types::Reference::{R32, R64};
+use crate::shared::Definitions as SharedDefinitions;
+
+mod encodings;
+mod instructions;
+mod legalize;
+mod opcodes;
+mod recipes;
+mod registers;
+pub(crate) mod settings;
+
+pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa {
+    let settings = settings::define(&shared_defs.settings);
+    let regs = registers::define();
+
+    let inst_group = instructions::define(
+        &mut shared_defs.all_instructions,
+        &shared_defs.formats,
+        &shared_defs.imm,
+        &shared_defs.entities,
+    );
+    legalize::define(shared_defs, &inst_group);
+
+    // CPU modes for 32-bit and 64-bit operations.
+    let mut x86_64 = CpuMode::new("I64");
+    let mut x86_32 = CpuMode::new("I32");
+
+    let expand_flags = shared_defs.transform_groups.by_name("expand_flags");
+    let x86_widen = shared_defs.transform_groups.by_name("x86_widen");
+    let x86_narrow = shared_defs.transform_groups.by_name("x86_narrow");
+    let x86_narrow_avx = shared_defs.transform_groups.by_name("x86_narrow_avx");
+    let x86_expand = shared_defs.transform_groups.by_name("x86_expand");
+
+    x86_32.legalize_monomorphic(expand_flags);
+    x86_32.legalize_default(x86_narrow);
+    x86_32.legalize_type(B1, expand_flags);
+    x86_32.legalize_type(I8, x86_widen);
+    x86_32.legalize_type(I16, x86_widen);
+    x86_32.legalize_type(I32, x86_expand);
+    x86_32.legalize_value_type(ReferenceType(R32), x86_expand);
+    x86_32.legalize_type(F32, x86_expand);
+    x86_32.legalize_type(F64, x86_expand);
+    x86_32.legalize_value_type(VectorType::new(I32.into(), 4), x86_narrow_avx);
+    x86_32.legalize_value_type(VectorType::new(I64.into(), 2), x86_narrow_avx);
+    x86_32.legalize_value_type(VectorType::new(F32.into(), 4), x86_narrow_avx);
+
+    x86_64.legalize_monomorphic(expand_flags);
+    x86_64.legalize_default(x86_narrow);
+    x86_64.legalize_type(B1, expand_flags);
+    x86_64.legalize_type(I8, x86_widen);
+    x86_64.legalize_type(I16, x86_widen);
+    x86_64.legalize_type(I32, x86_expand);
+    x86_64.legalize_type(I64, x86_expand);
+    x86_64.legalize_value_type(ReferenceType(R64), x86_expand);
+    x86_64.legalize_type(F32, x86_expand);
+    x86_64.legalize_type(F64, x86_expand);
+    x86_64.legalize_value_type(VectorType::new(I32.into(), 4), x86_narrow_avx);
+    x86_64.legalize_value_type(VectorType::new(I64.into(), 2), x86_narrow_avx);
+    x86_64.legalize_value_type(VectorType::new(F32.into(), 4), x86_narrow_avx);
+
+    let recipes = recipes::define(shared_defs, &settings, &regs);
+
+    let encodings = encodings::define(shared_defs, &settings, &inst_group, &recipes);
+    x86_32.set_encodings(encodings.enc32);
+    x86_64.set_encodings(encodings.enc64);
+    let encodings_predicates = encodings.inst_pred_reg.extract();
+
+    let recipes = encodings.recipes;
+
+    let cpu_modes = vec![x86_64, x86_32];
+
+    TargetIsa::new(
+        "x86",
+        inst_group,
+        settings,
+        regs,
+        recipes,
+        cpu_modes,
+        encodings_predicates,
+    )
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/isa/x86/opcodes.rs b/third_party/rust/cranelift-codegen-meta/src/isa/x86/opcodes.rs
new file mode 100644
index 0000000000..09c07c458f
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/isa/x86/opcodes.rs
@@ -0,0 +1,721 @@
+//! Static, named definitions of instruction opcodes.
+
+/// Empty opcode for use as a default.
+pub static EMPTY: [u8; 0] = [];
+
+/// Add with carry flag r{16,32,64} to r/m of the same size.
+pub static ADC: [u8; 1] = [0x11];
+
+/// Add r{16,32,64} to r/m of the same size.
+pub static ADD: [u8; 1] = [0x01];
+
+/// Add imm{16,32} to r/m{16,32,64}, possibly sign-extended.
+pub static ADD_IMM: [u8; 1] = [0x81];
+
+/// Add sign-extended imm8 to r/m{16,32,64}.
+pub static ADD_IMM8_SIGN_EXTEND: [u8; 1] = [0x83];
+
+/// Add packed double-precision floating-point values from xmm2/mem to xmm1 and store result in  
+/// xmm1 (SSE2).
+pub static ADDPD: [u8; 3] = [0x66, 0x0f, 0x58];
+
+/// Add packed single-precision floating-point values from xmm2/mem to xmm1 and store result in  
+/// xmm1 (SSE).
+pub static ADDPS: [u8; 2] = [0x0f, 0x58];
+
+/// Add the low double-precision floating-point value from xmm2/mem to xmm1
+/// and store the result in xmm1.
+pub static ADDSD: [u8; 3] = [0xf2, 0x0f, 0x58];
+
+/// Add the low single-precision floating-point value from xmm2/mem to xmm1
+/// and store the result in xmm1.
+pub static ADDSS: [u8; 3] = [0xf3, 0x0f, 0x58];
+
+/// r/m{16,32,64} AND register of the same size (Intel docs have a typo).
+pub static AND: [u8; 1] = [0x21];
+
+/// imm{16,32} AND r/m{16,32,64}, possibly sign-extended.
+pub static AND_IMM: [u8; 1] = [0x81];
+
+/// r/m{16,32,64} AND sign-extended imm8.
+pub static AND_IMM8_SIGN_EXTEND: [u8; 1] = [0x83];
+
+/// Return the bitwise logical AND NOT of packed single-precision floating-point
+/// values in xmm1 and xmm2/mem.
+pub static ANDNPS: [u8; 2] = [0x0f, 0x55];
+
+/// Return the bitwise logical AND of packed single-precision floating-point values
+/// in xmm1 and xmm2/mem.
+pub static ANDPS: [u8; 2] = [0x0f, 0x54];
+
+/// Bit scan forward (stores index of first encountered 1 from the front).
+pub static BIT_SCAN_FORWARD: [u8; 2] = [0x0f, 0xbc];
+
+/// Bit scan reverse (stores index of first encountered 1 from the back).
+pub static BIT_SCAN_REVERSE: [u8; 2] = [0x0f, 0xbd];
+
+/// Select packed single-precision floating-point values from xmm1 and xmm2/m128
+/// from mask specified in XMM0 and store the values into xmm1 (SSE4.1).
+pub static BLENDVPS: [u8; 4] = [0x66, 0x0f, 0x38, 0x14];
+
+/// Select packed double-precision floating-point values from xmm1 and xmm2/m128
+/// from mask specified in XMM0 and store the values into xmm1 (SSE4.1).
+pub static BLENDVPD: [u8; 4] = [0x66, 0x0f, 0x38, 0x15];
+
+/// Call near, relative, displacement relative to next instruction (sign-extended).
+pub static CALL_RELATIVE: [u8; 1] = [0xe8];
+
+/// Move r/m{16,32,64} if overflow (OF=1).
+pub static CMOV_OVERFLOW: [u8; 2] = [0x0f, 0x40];
+
+/// Compare imm{16,32} with r/m{16,32,64} (sign-extended if 64).
+pub static CMP_IMM: [u8; 1] = [0x81];
+
+/// Compare imm8 with r/m{16,32,64}.
+pub static CMP_IMM8: [u8; 1] = [0x83];
+
+/// Compare r{16,32,64} with r/m of the same size.
+pub static CMP_REG: [u8; 1] = [0x39];
+
+/// Compare packed double-precision floating-point value in xmm2/m32 and xmm1 using bits 2:0 of
+/// imm8 as comparison predicate (SSE2).
+pub static CMPPD: [u8; 3] = [0x66, 0x0f, 0xc2];
+
+/// Compare packed single-precision floating-point value in xmm2/m32 and xmm1 using bits 2:0 of
+/// imm8 as comparison predicate (SSE).
+pub static CMPPS: [u8; 2] = [0x0f, 0xc2];
+
+/// Convert four packed signed doubleword integers from xmm2/mem to four packed single-precision
+/// floating-point values in xmm1 (SSE2).
+pub static CVTDQ2PS: [u8; 2] = [0x0f, 0x5b];
+
+/// Convert scalar double-precision floating-point value to scalar single-precision
+/// floating-point value.
+pub static CVTSD2SS: [u8; 3] = [0xf2, 0x0f, 0x5a];
+
+/// Convert doubleword integer to scalar double-precision floating-point value.
+pub static CVTSI2SD: [u8; 3] = [0xf2, 0x0f, 0x2a];
+
+/// Convert doubleword integer to scalar single-precision floating-point value.
+pub static CVTSI2SS: [u8; 3] = [0xf3, 0x0f, 0x2a];
+
+/// Convert scalar single-precision floating-point value to scalar double-precision
+/// float-point value.
+pub static CVTSS2SD: [u8; 3] = [0xf3, 0x0f, 0x5a];
+
+/// Convert four packed single-precision floating-point values from xmm2/mem to four packed signed
+/// doubleword values in xmm1 using truncation (SSE2).
+pub static CVTTPS2DQ: [u8; 3] = [0xf3, 0x0f, 0x5b];
+
+/// Convert with truncation scalar double-precision floating-point value to signed
+/// integer.
+pub static CVTTSD2SI: [u8; 3] = [0xf2, 0x0f, 0x2c];
+
+/// Convert with truncation scalar single-precision floating-point value to integer.
+pub static CVTTSS2SI: [u8; 3] = [0xf3, 0x0f, 0x2c];
+
+/// Unsigned divide for {16,32,64}-bit.
+pub static DIV: [u8; 1] = [0xf7];
+
+/// Divide packed double-precision floating-point values in xmm1 by packed double-precision
+/// floating-point values in xmm2/mem (SSE2).
+pub static DIVPD: [u8; 3] = [0x66, 0x0f, 0x5e];
+
+/// Divide packed single-precision floating-point values in xmm1 by packed single-precision
+/// floating-point values in xmm2/mem (SSE).
+pub static DIVPS: [u8; 2] = [0x0f, 0x5e];
+
+/// Divide low double-precision floating-point value in xmm1 by low double-precision
+/// floating-point value in xmm2/m64.
+pub static DIVSD: [u8; 3] = [0xf2, 0x0f, 0x5e];
+
+/// Divide low single-precision floating-point value in xmm1 by low single-precision
+/// floating-point value in xmm2/m32.
+pub static DIVSS: [u8; 3] = [0xf3, 0x0f, 0x5e];
+
+/// Signed divide for {16,32,64}-bit.
+pub static IDIV: [u8; 1] = [0xf7];
+
+/// Signed multiply for {16,32,64}-bit, generic registers.
+pub static IMUL: [u8; 2] = [0x0f, 0xaf];
+
+/// Signed multiply for {16,32,64}-bit, storing into RDX:RAX.
+pub static IMUL_RDX_RAX: [u8; 1] = [0xf7];
+
+/// Insert scalar single-precision floating-point value.
+pub static INSERTPS: [u8; 4] = [0x66, 0x0f, 0x3a, 0x21];
+
+/// Either:
+///  1. Jump near, absolute indirect, RIP = 64-bit offset from register or memory.
+///  2. Jump far, absolute indirect, address given in m16:64.
+pub static JUMP_ABSOLUTE: [u8; 1] = [0xff];
+
+/// Jump near, relative, RIP = RIP + 32-bit displacement sign extended to 64 bits.
+pub static JUMP_NEAR_RELATIVE: [u8; 1] = [0xe9];
+
+/// Jump near (rel32) if overflow (OF=1).
+pub static JUMP_NEAR_IF_OVERFLOW: [u8; 2] = [0x0f, 0x80];
+
+/// Jump short, relative, RIP = RIP + 8-bit displacement sign extended to 64 bits.
+pub static JUMP_SHORT: [u8; 1] = [0xeb];
+
+/// Jump short (rel8) if equal (ZF=1).
+pub static JUMP_SHORT_IF_EQUAL: [u8; 1] = [0x74];
+
+/// Jump short (rel8) if not equal (ZF=0).
+pub static JUMP_SHORT_IF_NOT_EQUAL: [u8; 1] = [0x75];
+
+/// Jump short (rel8) if overflow (OF=1).
+pub static JUMP_SHORT_IF_OVERFLOW: [u8; 1] = [0x70];
+
+/// Store effective address for m in register r{16,32,64}.
+pub static LEA: [u8; 1] = [0x8d];
+
+/// Count the number of leading zero bits.
+pub static LZCNT: [u8; 3] = [0xf3, 0x0f, 0xbd];
+
+/// Return the maximum packed double-precision floating-point values between xmm1 and xmm2/m128
+/// (SSE2).
+pub static MAXPD: [u8; 3] = [0x66, 0x0f, 0x5f];
+
+/// Return the maximum packed single-precision floating-point values between  xmm1 and xmm2/m128
+/// (SSE).
+pub static MAXPS: [u8; 2] = [0x0f, 0x5f];
+
+/// Return the maximum scalar double-precision floating-point value between
+/// xmm2/m64 and xmm1.
+pub static MAXSD: [u8; 3] = [0xf2, 0x0f, 0x5f];
+
+/// Return the maximum scalar single-precision floating-point value between
+/// xmm2/m32 and xmm1.
+pub static MAXSS: [u8; 3] = [0xf3, 0x0f, 0x5f];
+
+/// Return the minimum packed double-precision floating-point values between xmm1 and xmm2/m128
+/// (SSE2).
+pub static MINPD: [u8; 3] = [0x66, 0x0f, 0x5d];
+
+/// Return the minimum packed single-precision floating-point values between xmm1 and xmm2/m128
+/// (SSE).
+pub static MINPS: [u8; 2] = [0x0f, 0x5d];
+
+/// Return the minimum scalar double-precision floating-point value between
+/// xmm2/m64 and xmm1.
+pub static MINSD: [u8; 3] = [0xf2, 0x0f, 0x5d];
+
+/// Return the minimum scalar single-precision floating-point value between
+/// xmm2/m32 and xmm1.
+pub static MINSS: [u8; 3] = [0xf3, 0x0f, 0x5d];
+
+/// Move r8 to r/m8.
+pub static MOV_BYTE_STORE: [u8; 1] = [0x88];
+
+/// Move imm{16,32,64} to same-sized register.
+pub static MOV_IMM: [u8; 1] = [0xb8];
+
+/// Move imm{16,32} to r{16,32,64}, sign-extended if 64-bit target.
+pub static MOV_IMM_SIGNEXTEND: [u8; 1] = [0xc7];
+
+/// Move {r/m16, r/m32, r/m64} to same-sized register.
+pub static MOV_LOAD: [u8; 1] = [0x8b];
+
+/// Move r16 to r/m16.
+pub static MOV_STORE_16: [u8; 2] = [0x66, 0x89];
+
+/// Move {r16, r32, r64} to same-sized register or memory.
+pub static MOV_STORE: [u8; 1] = [0x89];
+
+/// Move aligned packed single-precision floating-point values from x/m to xmm (SSE).
+pub static MOVAPS_LOAD: [u8; 2] = [0x0f, 0x28];
+
+/// Move doubleword from r/m32 to xmm (SSE2). Quadword with REX prefix.
+pub static MOVD_LOAD_XMM: [u8; 3] = [0x66, 0x0f, 0x6e];
+
+/// Move doubleword from xmm to r/m32 (SSE2). Quadword with REX prefix.
+pub static MOVD_STORE_XMM: [u8; 3] = [0x66, 0x0f, 0x7e];
+
+/// Move packed single-precision floating-point values low to high (SSE).
+pub static MOVLHPS: [u8; 2] = [0x0f, 0x16];
+
+/// Move scalar double-precision floating-point value (from reg/mem to reg).
+pub static MOVSD_LOAD: [u8; 3] = [0xf2, 0x0f, 0x10];
+
+/// Move scalar double-precision floating-point value (from reg to reg/mem).
+pub static MOVSD_STORE: [u8; 3] = [0xf2, 0x0f, 0x11];
+
+/// Move scalar single-precision floating-point value (from reg to reg/mem).
+pub static MOVSS_STORE: [u8; 3] = [0xf3, 0x0f, 0x11];
+
+/// Move scalar single-precision floating-point-value (from reg/mem to reg).
+pub static MOVSS_LOAD: [u8; 3] = [0xf3, 0x0f, 0x10];
+
+/// Move byte to register with sign-extension.
+pub static MOVSX_BYTE: [u8; 2] = [0x0f, 0xbe];
+
+/// Move word to register with sign-extension.
+pub static MOVSX_WORD: [u8; 2] = [0x0f, 0xbf];
+
+/// Move doubleword to register with sign-extension.
+pub static MOVSXD: [u8; 1] = [0x63];
+
+/// Move unaligned packed single-precision floating-point from x/m to xmm (SSE).
+pub static MOVUPS_LOAD: [u8; 2] = [0x0f, 0x10];
+
+/// Move unaligned packed single-precision floating-point value from xmm to x/m (SSE).
+pub static MOVUPS_STORE: [u8; 2] = [0x0f, 0x11];
+
+/// Move byte to register with zero-extension.
+pub static MOVZX_BYTE: [u8; 2] = [0x0f, 0xb6];
+
+/// Move word to register with zero-extension.
+pub static MOVZX_WORD: [u8; 2] = [0x0f, 0xb7];
+
+/// Unsigned multiply for {16,32,64}-bit.
+pub static MUL: [u8; 1] = [0xf7];
+
+/// Multiply packed double-precision floating-point values from xmm2/mem to xmm1 and store result
+/// in xmm1 (SSE2).
+pub static MULPD: [u8; 3] = [0x66, 0x0f, 0x59];
+
+/// Multiply packed single-precision floating-point values from xmm2/mem to xmm1 and store result
+/// in xmm1 (SSE).
+pub static MULPS: [u8; 2] = [0x0f, 0x59];
+
+/// Multiply the low double-precision floating-point value in xmm2/m64 by the
+/// low double-precision floating-point value in xmm1.
+pub static MULSD: [u8; 3] = [0xf2, 0x0f, 0x59];
+
+/// Multiply the low single-precision floating-point value in xmm2/m32 by the
+/// low single-precision floating-point value in xmm1.
+pub static MULSS: [u8; 3] = [0xf3, 0x0f, 0x59];
+
+/// Reverse each bit of r/m{16,32,64}.
+pub static NOT: [u8; 1] = [0xf7];
+
+/// r{16,32,64} OR register of same size.
+pub static OR: [u8; 1] = [0x09];
+
+/// imm{16,32} OR r/m{16,32,64}, possibly sign-extended.
+pub static OR_IMM: [u8; 1] = [0x81];
+
+/// r/m{16,32,64} OR sign-extended imm8.
+pub static OR_IMM8_SIGN_EXTEND: [u8; 1] = [0x83];
+
+/// Return the bitwise logical OR of packed single-precision values in xmm and x/m (SSE).
+pub static ORPS: [u8; 2] = [0x0f, 0x56];
+
+/// Compute the absolute value of bytes in xmm2/m128 and store the unsigned result in xmm1 (SSSE3).
+pub static PABSB: [u8; 4] = [0x66, 0x0f, 0x38, 0x1c];
+
+/// Compute the absolute value of 32-bit integers in xmm2/m128 and store the unsigned result in
+/// xmm1 (SSSE3).
+pub static PABSD: [u8; 4] = [0x66, 0x0f, 0x38, 0x1e];
+
+/// Compute the absolute value of 16-bit integers in xmm2/m128 and store the unsigned result in
+/// xmm1 (SSSE3).
+pub static PABSW: [u8; 4] = [0x66, 0x0f, 0x38, 0x1d];
+
+/// Converts 8 packed signed word integers from xmm1 and from xmm2/m128 into 16 packed signed byte
+/// integers in xmm1 using signed saturation (SSE2).
+pub static PACKSSWB: [u8; 3] = [0x66, 0x0f, 0x63];
+
+/// Converts 4 packed signed doubleword integers from xmm1 and from xmm2/m128 into 8 packed signed
+/// word integers in xmm1 using signed saturation (SSE2).
+pub static PACKSSDW: [u8; 3] = [0x66, 0x0f, 0x6b];
+
+/// Converts 8 packed signed word integers from xmm1 and from xmm2/m128 into 16 packed unsigned byte
+/// integers in xmm1 using unsigned saturation (SSE2).
+pub static PACKUSWB: [u8; 3] = [0x66, 0x0f, 0x67];
+
+/// Converts 4 packed signed doubleword integers from xmm1 and from xmm2/m128 into 8 unpacked signed
+/// word integers in xmm1 using unsigned saturation (SSE4.1).
+pub static PACKUSDW: [u8; 4] = [0x66, 0x0f, 0x38, 0x2b];
+
+/// Add packed byte integers from xmm2/m128 and xmm1 (SSE2).
+pub static PADDB: [u8; 3] = [0x66, 0x0f, 0xfc];
+
+/// Add packed doubleword integers from xmm2/m128 and xmm1 (SSE2).
+pub static PADDD: [u8; 3] = [0x66, 0x0f, 0xfe];
+
+/// Add packed quadword integers from xmm2/m128 and xmm1 (SSE2).
+pub static PADDQ: [u8; 3] = [0x66, 0x0f, 0xd4];
+
+/// Add packed word integers from xmm2/m128 and xmm1 (SSE2).
+pub static PADDW: [u8; 3] = [0x66, 0x0f, 0xfd];
+
+/// Add packed signed byte integers from xmm2/m128 and xmm1 saturate the results (SSE).
+pub static PADDSB: [u8; 3] = [0x66, 0x0f, 0xec];
+
+/// Add packed signed word integers from xmm2/m128 and xmm1 saturate the results (SSE).
+pub static PADDSW: [u8; 3] = [0x66, 0x0f, 0xed];
+
+/// Add packed unsigned byte integers from xmm2/m128 and xmm1 saturate the results (SSE).
+pub static PADDUSB: [u8; 3] = [0x66, 0x0f, 0xdc];
+
+/// Add packed unsigned word integers from xmm2/m128 and xmm1 saturate the results (SSE).
+pub static PADDUSW: [u8; 3] = [0x66, 0x0f, 0xdd];
+
+/// Concatenate destination and source operands, extract a byte-aligned result into xmm1 that is
+/// shifted to the right by the constant number of bytes in imm8 (SSSE3).
+pub static PALIGNR: [u8; 4] = [0x66, 0x0f, 0x3a, 0x0f];
+
+/// Bitwise AND of xmm2/m128 and xmm1 (SSE2).
+pub static PAND: [u8; 3] = [0x66, 0x0f, 0xdb];
+
+/// Bitwise AND NOT of xmm2/m128 and xmm1 (SSE2).
+pub static PANDN: [u8; 3] = [0x66, 0x0f, 0xdf];
+
+/// Average packed unsigned byte integers from xmm2/m128 and xmm1 with rounding (SSE2).
+pub static PAVGB: [u8; 3] = [0x66, 0x0f, 0xE0];
+
+/// Average packed unsigned word integers from xmm2/m128 and xmm1 with rounding (SSE2).
+pub static PAVGW: [u8; 3] = [0x66, 0x0f, 0xE3];
+
+/// Select byte values from xmm1 and xmm2/m128 from mask specified in the high bit of each byte
+/// in XMM0 and store the values into xmm1 (SSE4.1).
+pub static PBLENDVB: [u8; 4] = [0x66, 0x0f, 0x38, 0x10];
+
+/// Select words from xmm1 and xmm2/m128 from mask specified in imm8 and store the values into xmm1
+/// (SSE4.1).
+pub static PBLENDW: [u8; 4] = [0x66, 0x0f, 0x3a, 0x0e];
+
+/// Compare packed data for equal (SSE2).
+pub static PCMPEQB: [u8; 3] = [0x66, 0x0f, 0x74];
+
+/// Compare packed data for equal (SSE2).
+pub static PCMPEQD: [u8; 3] = [0x66, 0x0f, 0x76];
+
+/// Compare packed data for equal (SSE4.1).
+pub static PCMPEQQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x29];
+
+/// Compare packed data for equal (SSE2).
+pub static PCMPEQW: [u8; 3] = [0x66, 0x0f, 0x75];
+
+/// Compare packed signed byte integers for greater than (SSE2).
+pub static PCMPGTB: [u8; 3] = [0x66, 0x0f, 0x64];
+
+/// Compare packed signed doubleword integers for greater than (SSE2).
+pub static PCMPGTD: [u8; 3] = [0x66, 0x0f, 0x66];
+
+/// Compare packed signed quadword integers for greater than (SSE4.2).
+pub static PCMPGTQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x37];
+
+/// Compare packed signed word integers for greater than (SSE2).
+pub static PCMPGTW: [u8; 3] = [0x66, 0x0f, 0x65];
+
+/// Extract doubleword or quadword, depending on REX.W (SSE4.1).
+pub static PEXTR: [u8; 4] = [0x66, 0x0f, 0x3a, 0x16];
+
+/// Extract byte (SSE4.1).
+pub static PEXTRB: [u8; 4] = [0x66, 0x0f, 0x3a, 0x14];
+
+/// Extract word (SSE4.1). There is a 3-byte SSE2 variant that can also move to m/16.
+pub static PEXTRW: [u8; 4] = [0x66, 0x0f, 0x3a, 0x15];
+
+/// Insert doubleword or quadword, depending on REX.W (SSE4.1).
+pub static PINSR: [u8; 4] = [0x66, 0x0f, 0x3a, 0x22];
+
+/// Insert byte (SSE4.1).
+pub static PINSRB: [u8; 4] = [0x66, 0x0f, 0x3a, 0x20];
+
+/// Insert word (SSE2).
+pub static PINSRW: [u8; 3] = [0x66, 0x0f, 0xc4];
+
+/// Compare packed signed byte integers in xmm1 and xmm2/m128 and store packed maximum values in
+/// xmm1 (SSE4.1).
+pub static PMAXSB: [u8; 4] = [0x66, 0x0f, 0x38, 0x3c];
+
+/// Compare packed signed doubleword integers in xmm1 and xmm2/m128 and store packed maximum
+/// values in xmm1 (SSE4.1).
+pub static PMAXSD: [u8; 4] = [0x66, 0x0f, 0x38, 0x3d];
+
+/// Compare packed signed word integers in xmm1 and xmm2/m128 and store packed maximum values in
+/// xmm1 (SSE2).
+pub static PMAXSW: [u8; 3] = [0x66, 0x0f, 0xee];
+
+/// Compare packed unsigned byte integers in xmm1 and xmm2/m128 and store packed maximum values in
+/// xmm1 (SSE2).
+pub static PMAXUB: [u8; 3] = [0x66, 0x0f, 0xde];
+
+/// Compare packed unsigned doubleword integers in xmm1 and xmm2/m128 and store packed maximum
+/// values in xmm1 (SSE4.1).
+pub static PMAXUD: [u8; 4] = [0x66, 0x0f, 0x38, 0x3f];
+
+/// Compare packed unsigned word integers in xmm1 and xmm2/m128 and store packed maximum values in
+/// xmm1 (SSE4.1).
+pub static PMAXUW: [u8; 4] = [0x66, 0x0f, 0x38, 0x3e];
+
+/// Compare packed signed byte integers in xmm1 and xmm2/m128 and store packed minimum values in
+/// xmm1 (SSE4.1).
+pub static PMINSB: [u8; 4] = [0x66, 0x0f, 0x38, 0x38];
+
+/// Compare packed signed doubleword integers in xmm1 and xmm2/m128 and store packed minimum
+/// values in xmm1 (SSE4.1).
+pub static PMINSD: [u8; 4] = [0x66, 0x0f, 0x38, 0x39];
+
+/// Compare packed signed word integers in xmm1 and xmm2/m128 and store packed minimum values in
+/// xmm1 (SSE2).
+pub static PMINSW: [u8; 3] = [0x66, 0x0f, 0xea];
+
+/// Compare packed unsigned byte integers in xmm1 and xmm2/m128 and store packed minimum values in
+/// xmm1 (SSE2).
+pub static PMINUB: [u8; 3] = [0x66, 0x0f, 0xda];
+
+/// Compare packed unsigned doubleword integers in xmm1 and xmm2/m128 and store packed minimum
+/// values in xmm1 (SSE4.1).
+pub static PMINUD: [u8; 4] = [0x66, 0x0f, 0x38, 0x3b];
+
+/// Compare packed unsigned word integers in xmm1 and xmm2/m128 and store packed minimum values in
+/// xmm1 (SSE4.1).
+pub static PMINUW: [u8; 4] = [0x66, 0x0f, 0x38, 0x3a];
+
+/// Sign extend 8 packed 8-bit integers in the low 8 bytes of xmm2/m64 to 8 packed 16-bit
+/// integers in xmm1 (SSE4.1).
+pub static PMOVSXBW: [u8; 4] = [0x66, 0x0f, 0x38, 0x20];
+
+/// Sign extend 4 packed 16-bit integers in the low 8 bytes of xmm2/m64 to 4 packed 32-bit
+/// integers in xmm1 (SSE4.1).
+pub static PMOVSXWD: [u8; 4] = [0x66, 0x0f, 0x38, 0x23];
+
+/// Sign extend 2 packed 32-bit integers in the low 8 bytes of xmm2/m64 to 2 packed 64-bit
+/// integers in xmm1 (SSE4.1).
+pub static PMOVSXDQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x25];
+
+/// Zero extend 8 packed 8-bit integers in the low 8 bytes of xmm2/m64 to 8 packed 16-bit
+/// integers in xmm1 (SSE4.1).
+pub static PMOVZXBW: [u8; 4] = [0x66, 0x0f, 0x38, 0x30];
+
+/// Zero extend 4 packed 16-bit integers in the low 8 bytes of xmm2/m64 to 4 packed 32-bit
+/// integers in xmm1 (SSE4.1).
+pub static PMOVZXWD: [u8; 4] = [0x66, 0x0f, 0x38, 0x33];
+
+/// Zero extend 2 packed 32-bit integers in the low 8 bytes of xmm2/m64 to 2 packed 64-bit
+/// integers in xmm1 (SSE4.1).
+pub static PMOVZXDQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x35];
+
+/// Multiply the packed signed word integers in xmm1 and xmm2/m128, and store the low 16 bits of
+/// the results in xmm1 (SSE2).
+pub static PMULLW: [u8; 3] = [0x66, 0x0f, 0xd5];
+
+/// Multiply the packed doubleword signed integers in xmm1 and xmm2/m128 and store the low 32
+/// bits of each product in xmm1 (SSE4.1).
+pub static PMULLD: [u8; 4] = [0x66, 0x0f, 0x38, 0x40];
+
+/// Multiply the packed quadword signed integers in xmm2 and xmm3/m128 and store the low 64
+/// bits of each product in xmm1 (AVX512VL/DQ). Requires an EVEX encoding.
+pub static VPMULLQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x40];
+
+/// Multiply packed unsigned doubleword integers in xmm1 by packed unsigned doubleword integers
+/// in xmm2/m128, and store the quadword results in xmm1 (SSE2).
+pub static PMULUDQ: [u8; 3] = [0x66, 0x0f, 0xf4];
+
+/// Pop top of stack into r{16,32,64}; increment stack pointer.
+pub static POP_REG: [u8; 1] = [0x58];
+
+/// Returns the count of number of bits set to 1.
+pub static POPCNT: [u8; 3] = [0xf3, 0x0f, 0xb8];
+
+/// Bitwise OR of xmm2/m128 and xmm1 (SSE2).
+pub static POR: [u8; 3] = [0x66, 0x0f, 0xeb];
+
+/// Shuffle bytes in xmm1 according to contents of xmm2/m128 (SSE3).
+pub static PSHUFB: [u8; 4] = [0x66, 0x0f, 0x38, 0x00];
+
+/// Shuffle the doublewords in xmm2/m128 based on the encoding in imm8 and
+/// store the result in xmm1 (SSE2).
+pub static PSHUFD: [u8; 3] = [0x66, 0x0f, 0x70];
+
+/// Shift words in xmm1 by imm8; the direction and sign-bit behavior is controlled by the RRR
+/// digit used in the ModR/M byte (SSE2).
+pub static PS_W_IMM: [u8; 3] = [0x66, 0x0f, 0x71];
+
+/// Shift doublewords in xmm1 by imm8; the direction and sign-bit behavior is controlled by the RRR
+/// digit used in the ModR/M byte (SSE2).
+pub static PS_D_IMM: [u8; 3] = [0x66, 0x0f, 0x72];
+
+/// Shift quadwords in xmm1 by imm8; the direction and sign-bit behavior is controlled by the RRR
+/// digit used in the ModR/M byte (SSE2).
+pub static PS_Q_IMM: [u8; 3] = [0x66, 0x0f, 0x73];
+
+/// Shift words in xmm1 left by xmm2/m128 while shifting in 0s (SSE2).
+pub static PSLLW: [u8; 3] = [0x66, 0x0f, 0xf1];
+
+/// Shift doublewords in xmm1 left by xmm2/m128 while shifting in 0s (SSE2).
+pub static PSLLD: [u8; 3] = [0x66, 0x0f, 0xf2];
+
+/// Shift quadwords in xmm1 left by xmm2/m128 while shifting in 0s (SSE2).
+pub static PSLLQ: [u8; 3] = [0x66, 0x0f, 0xf3];
+
+/// Shift words in xmm1 right by xmm2/m128 while shifting in 0s (SSE2).
+pub static PSRLW: [u8; 3] = [0x66, 0x0f, 0xd1];
+
+/// Shift doublewords in xmm1 right by xmm2/m128 while shifting in 0s (SSE2).
+pub static PSRLD: [u8; 3] = [0x66, 0x0f, 0xd2];
+
+/// Shift quadwords in xmm1 right by xmm2/m128 while shifting in 0s (SSE2).
+pub static PSRLQ: [u8; 3] = [0x66, 0x0f, 0xd3];
+
+/// Shift words in xmm1 right by xmm2/m128 while shifting in sign bits (SSE2).
+pub static PSRAW: [u8; 3] = [0x66, 0x0f, 0xe1];
+
+/// Shift doublewords in xmm1 right by xmm2/m128 while shifting in sign bits (SSE2).
+pub static PSRAD: [u8; 3] = [0x66, 0x0f, 0xe2];
+
+/// Subtract packed byte integers in xmm2/m128 from packed byte integers in xmm1 (SSE2).
+pub static PSUBB: [u8; 3] = [0x66, 0x0f, 0xf8];
+
+/// Subtract packed word integers in xmm2/m128 from packed word integers in xmm1 (SSE2).
+pub static PSUBW: [u8; 3] = [0x66, 0x0f, 0xf9];
+
+/// Subtract packed doubleword integers in xmm2/m128 from doubleword byte integers in xmm1 (SSE2).
+pub static PSUBD: [u8; 3] = [0x66, 0x0f, 0xfa];
+
+/// Subtract packed quadword integers in xmm2/m128 from xmm1 (SSE2).
+pub static PSUBQ: [u8; 3] = [0x66, 0x0f, 0xfb];
+
+/// Subtract packed signed byte integers in xmm2/m128 from packed signed byte integers in xmm1
+/// and saturate results (SSE2).
+pub static PSUBSB: [u8; 3] = [0x66, 0x0f, 0xe8];
+
+/// Subtract packed signed word integers in xmm2/m128 from packed signed word integers in xmm1
+/// and saturate results (SSE2).
+pub static PSUBSW: [u8; 3] = [0x66, 0x0f, 0xe9];
+
+/// Subtract packed unsigned byte integers in xmm2/m128 from packed unsigned byte integers in xmm1
+/// and saturate results (SSE2).
+pub static PSUBUSB: [u8; 3] = [0x66, 0x0f, 0xd8];
+
+/// Subtract packed unsigned word integers in xmm2/m128 from packed unsigned word integers in xmm1
+/// and saturate results (SSE2).
+pub static PSUBUSW: [u8; 3] = [0x66, 0x0f, 0xd9];
+
+/// Set ZF if xmm2/m128 AND xmm1 result is all 0s; set CF if xmm2/m128 AND NOT xmm1 result is all
+/// 0s (SSE4.1).
+pub static PTEST: [u8; 4] = [0x66, 0x0f, 0x38, 0x17];
+
+/// Unpack and interleave high-order bytes from xmm1 and xmm2/m128 into xmm1 (SSE2).
+pub static PUNPCKHBW: [u8; 3] = [0x66, 0x0f, 0x68];
+
+/// Unpack and interleave high-order words from xmm1 and xmm2/m128 into xmm1 (SSE2).
+pub static PUNPCKHWD: [u8; 3] = [0x66, 0x0f, 0x69];
+
+/// Unpack and interleave high-order doublewords from xmm1 and xmm2/m128 into xmm1 (SSE2).
+pub static PUNPCKHDQ: [u8; 3] = [0x66, 0x0f, 0x6A];
+
+/// Unpack and interleave high-order quadwords from xmm1 and xmm2/m128 into xmm1 (SSE2).
+pub static PUNPCKHQDQ: [u8; 3] = [0x66, 0x0f, 0x6D];
+
+/// Unpack and interleave low-order bytes from xmm1 and xmm2/m128 into xmm1 (SSE2).
+pub static PUNPCKLBW: [u8; 3] = [0x66, 0x0f, 0x60];
+
+/// Unpack and interleave low-order words from xmm1 and xmm2/m128 into xmm1 (SSE2).
+pub static PUNPCKLWD: [u8; 3] = [0x66, 0x0f, 0x61];
+
+/// Unpack and interleave low-order doublewords from xmm1 and xmm2/m128 into xmm1 (SSE2).
+pub static PUNPCKLDQ: [u8; 3] = [0x66, 0x0f, 0x62];
+
+/// Unpack and interleave low-order quadwords from xmm1 and xmm2/m128 into xmm1 (SSE2).
+pub static PUNPCKLQDQ: [u8; 3] = [0x66, 0x0f, 0x6C];
+
+/// Push r{16,32,64}.
+pub static PUSH_REG: [u8; 1] = [0x50];
+
+/// Logical exclusive OR (SSE2).
+pub static PXOR: [u8; 3] = [0x66, 0x0f, 0xef];
+
+/// Near return to calling procedure.
+pub static RET_NEAR: [u8; 1] = [0xc3];
+
+/// General rotation opcode. Kind of rotation depends on encoding.
+pub static ROTATE_CL: [u8; 1] = [0xd3];
+
+/// General rotation opcode. Kind of rotation depends on encoding.
+pub static ROTATE_IMM8: [u8; 1] = [0xc1];
+
+/// Round scalar doubl-precision floating-point values.
+pub static ROUNDSD: [u8; 4] = [0x66, 0x0f, 0x3a, 0x0b];
+
+/// Round scalar single-precision floating-point values.
+pub static ROUNDSS: [u8; 4] = [0x66, 0x0f, 0x3a, 0x0a];
+
+/// Subtract with borrow r{16,32,64} from r/m of the same size.
+pub static SBB: [u8; 1] = [0x19];
+
+/// Set byte if overflow (OF=1).
+pub static SET_BYTE_IF_OVERFLOW: [u8; 2] = [0x0f, 0x90];
+
+/// Compute the square root of the packed double-precision floating-point values and store the
+/// result in xmm1 (SSE2).
+pub static SQRTPD: [u8; 3] = [0x66, 0x0f, 0x51];
+
+/// Compute the square root of the packed double-precision floating-point values and store the
+/// result in xmm1 (SSE).
+pub static SQRTPS: [u8; 2] = [0x0f, 0x51];
+
+/// Compute square root of scalar double-precision floating-point value.
+pub static SQRTSD: [u8; 3] = [0xf2, 0x0f, 0x51];
+
+/// Compute square root of scalar single-precision value.
+pub static SQRTSS: [u8; 3] = [0xf3, 0x0f, 0x51];
+
+/// Subtract r{16,32,64} from r/m of same size.
+pub static SUB: [u8; 1] = [0x29];
+
+/// Subtract packed double-precision floating-point values in xmm2/mem from xmm1 and store result
+/// in xmm1 (SSE2).
+pub static SUBPD: [u8; 3] = [0x66, 0x0f, 0x5c];
+
+/// Subtract packed single-precision floating-point values in xmm2/mem from xmm1 and store result
+/// in xmm1 (SSE).
+pub static SUBPS: [u8; 2] = [0x0f, 0x5c];
+
+/// Subtract the low double-precision floating-point value in xmm2/m64 from xmm1
+/// and store the result in xmm1.
+pub static SUBSD: [u8; 3] = [0xf2, 0x0f, 0x5c];
+
+/// Subtract the low single-precision floating-point value in xmm2/m32 from xmm1
+/// and store the result in xmm1.
+pub static SUBSS: [u8; 3] = [0xf3, 0x0f, 0x5c];
+
+/// AND r8 with r/m8; set SF, ZF, PF according to result.
+pub static TEST_BYTE_REG: [u8; 1] = [0x84];
+
+/// AND {r16, r32, r64} with r/m of the same size; set SF, ZF, PF according to result.
+pub static TEST_REG: [u8; 1] = [0x85];
+
+/// Count the number of trailing zero bits.
+pub static TZCNT: [u8; 3] = [0xf3, 0x0f, 0xbc];
+
+/// Compare low double-precision floating-point values in xmm1 and xmm2/mem64
+/// and set the EFLAGS flags accordingly.
+pub static UCOMISD: [u8; 3] = [0x66, 0x0f, 0x2e];
+
+/// Compare low single-precision floating-point values in xmm1 and xmm2/mem32
+/// and set the EFLAGS flags accordingly.
+pub static UCOMISS: [u8; 2] = [0x0f, 0x2e];
+
+/// Raise invalid opcode instruction.
+pub static UNDEFINED2: [u8; 2] = [0x0f, 0x0b];
+
+/// Convert four packed unsigned doubleword integers from xmm2/m128/m32bcst to packed
+/// single-precision floating-point values in xmm1 with writemask k1. Rounding behavior
+/// is controlled by MXCSR but can be overriden by EVEX.L'L in static rounding mode
+/// (AVX512VL, AVX512F).
+pub static VCVTUDQ2PS: [u8; 3] = [0xf2, 0x0f, 0x7a];
+
+/// imm{16,32} XOR r/m{16,32,64}, possibly sign-extended.
+pub static XOR_IMM: [u8; 1] = [0x81];
+
+/// r/m{16,32,64} XOR sign-extended imm8.
+pub static XOR_IMM8_SIGN_EXTEND: [u8; 1] = [0x83];
+
+/// r/m{16,32,64} XOR register of the same size.
+pub static XOR: [u8; 1] = [0x31];
+
+/// r/m8 XOR r8.
+pub static XORB: [u8; 1] = [0x30];
+
+/// Bitwise logical XOR of packed double-precision floating-point values.
+pub static XORPD: [u8; 3] = [0x66, 0x0f, 0x57];
+
+/// Bitwise logical XOR of packed single-precision floating-point values.
+pub static XORPS: [u8; 2] = [0x0f, 0x57];
diff --git a/third_party/rust/cranelift-codegen-meta/src/isa/x86/recipes.rs b/third_party/rust/cranelift-codegen-meta/src/isa/x86/recipes.rs
new file mode 100644
index 0000000000..f45f8dc673
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/isa/x86/recipes.rs
@@ -0,0 +1,3445 @@
+//! Encoding recipes for x86/x86_64.
+use std::rc::Rc;
+
+use cranelift_codegen_shared::isa::x86::EncodingBits;
+
+use crate::cdsl::ast::Literal;
+use crate::cdsl::formats::InstructionFormat;
+use crate::cdsl::instructions::InstructionPredicate;
+use crate::cdsl::recipes::{
+    EncodingRecipe, EncodingRecipeBuilder, OperandConstraint, Register, Stack,
+};
+use crate::cdsl::regs::IsaRegs;
+use crate::cdsl::settings::SettingGroup;
+use crate::shared::Definitions as SharedDefinitions;
+
+use crate::isa::x86::opcodes;
+
+/// Helper data structure to create recipes and template recipes.
+/// It contains all the recipes and recipe templates that might be used in the encodings crate of
+/// this same directory.
+pub(crate) struct RecipeGroup<'builder> {
+    /// Memoized registers description, to pass it to builders later.
+    regs: &'builder IsaRegs,
+
+    /// All the recipes explicitly created in this file. This is different from the final set of
+    /// recipes, which is definitive only once encodings have generated new recipes on the fly.
+    recipes: Vec<EncodingRecipe>,
+
+    /// All the recipe templates created in this file.
+    templates: Vec<Rc<Template<'builder>>>,
+}
+
+impl<'builder> RecipeGroup<'builder> {
+    fn new(regs: &'builder IsaRegs) -> Self {
+        Self {
+            regs,
+            recipes: Vec::new(),
+            templates: Vec::new(),
+        }
+    }
+    fn add_recipe(&mut self, recipe: EncodingRecipeBuilder) {
+        self.recipes.push(recipe.build());
+    }
+    fn add_template_recipe(&mut self, recipe: EncodingRecipeBuilder) -> Rc<Template<'builder>> {
+        let template = Rc::new(Template::new(recipe, self.regs));
+        self.templates.push(template.clone());
+        template
+    }
+    fn add_template_inferred(
+        &mut self,
+        recipe: EncodingRecipeBuilder,
+        infer_function: &'static str,
+    ) -> Rc<Template<'builder>> {
+        let template =
+            Rc::new(Template::new(recipe, self.regs).inferred_rex_compute_size(infer_function));
+        self.templates.push(template.clone());
+        template
+    }
+    fn add_template(&mut self, template: Template<'builder>) -> Rc<Template<'builder>> {
+        let template = Rc::new(template);
+        self.templates.push(template.clone());
+        template
+    }
+    pub fn recipe(&self, name: &str) -> &EncodingRecipe {
+        self.recipes
+            .iter()
+            .find(|recipe| recipe.name == name)
+            .unwrap_or_else(|| panic!("unknown recipe name: {}. Try template?", name))
+    }
+    pub fn template(&self, name: &str) -> &Template {
+        self.templates
+            .iter()
+            .find(|recipe| recipe.name() == name)
+            .unwrap_or_else(|| panic!("unknown template name: {}. Try recipe?", name))
+    }
+}
+
+// Opcode representation.
+//
+// Cranelift requires each recipe to have a single encoding size in bytes, and x86 opcodes are
+// variable length, so we use separate recipes for different styles of opcodes and prefixes. The
+// opcode format is indicated by the recipe name prefix.
+//
+// The match case below does not include the REX prefix which goes after the mandatory prefix.
+// VEX/XOP and EVEX prefixes are not yet supported. Encodings using any of these prefixes are
+// represented by separate recipes.
+//
+// The encoding bits are:
+//
+// 0-7:   The opcode byte <op>.
+// 8-9:   pp, mandatory prefix:
+//        00 none (Op*)
+//        01 66   (Mp*)
+//        10 F3   (Mp*)
+//        11 F2   (Mp*)
+// 10-11: mm, opcode map:
+//        00 <op>        (Op1/Mp1)
+//        01 0F <op>     (Op2/Mp2)
+//        10 0F 38 <op>  (Op3/Mp3)
+//        11 0F 3A <op>  (Op3/Mp3)
+// 12-14  rrr, opcode bits for the ModR/M byte for certain opcodes.
+// 15:    REX.W bit (or VEX.W/E)
+//
+// There is some redundancy between bits 8-11 and the recipe names, but we have enough bits, and
+// the pp+mm format is ready for supporting VEX prefixes.
+//
+// TODO Cranelift doesn't actually require recipe to have different encoding sizes anymore, so this
+// could be simplified.
+
+/// Given a sequence of opcode bytes, compute the recipe name prefix and encoding bits.
+fn decode_opcodes(op_bytes: &[u8], rrr: u16, w: u16) -> (&'static str, u16) {
+    let enc = EncodingBits::new(op_bytes, rrr, w);
+    (enc.prefix().recipe_name_prefix(), enc.bits())
+}
+
+/// Given a snippet of Rust code (or None), replace the `PUT_OP` macro with the
+/// corresponding `put_*` function from the `binemit.rs` module.
+fn replace_put_op(code: Option<String>, prefix: &str) -> Option<String> {
+    code.map(|code| code.replace("{{PUT_OP}}", &format!("put_{}", prefix.to_lowercase())))
+}
+
+/// Replaces constraints to a REX-prefixed register class by the equivalent non-REX register class.
+fn replace_nonrex_constraints(
+    regs: &IsaRegs,
+    constraints: Vec<OperandConstraint>,
+) -> Vec<OperandConstraint> {
+    constraints
+        .into_iter()
+        .map(|constraint| match constraint {
+            OperandConstraint::RegClass(rc_index) => {
+                let new_rc_index = if rc_index == regs.class_by_name("GPR") {
+                    regs.class_by_name("GPR8")
+                } else if rc_index == regs.class_by_name("FPR") {
+                    regs.class_by_name("FPR8")
+                } else {
+                    rc_index
+                };
+                OperandConstraint::RegClass(new_rc_index)
+            }
+            _ => constraint,
+        })
+        .collect()
+}
+
+fn replace_evex_constraints(
+    _: &IsaRegs,
+    constraints: Vec<OperandConstraint>,
+) -> Vec<OperandConstraint> {
+    constraints
+        .into_iter()
+        .map(|constraint| match constraint {
+            OperandConstraint::RegClass(rc_index) => {
+                // FIXME(#1306) this should be able to upgrade the register class to FPR32 as in
+                // `replace_nonrex_constraints` above, e.g. When FPR32 is re-added, add back in the
+                // rc_index conversion to FPR32. In the meantime, this is effectively a no-op
+                // conversion--the register class stays the same.
+                OperandConstraint::RegClass(rc_index)
+            }
+            _ => constraint,
+        })
+        .collect()
+}
+
+/// Specifies how the prefix (e.g. REX) is emitted by a Recipe.
+#[derive(Copy, Clone, PartialEq)]
+pub enum RecipePrefixKind {
+    /// The REX emission behavior is not hardcoded for the Recipe
+    /// and may be overridden when using the Template.
+    Unspecified,
+
+    /// The Recipe must hardcode the non-emission of the REX prefix.
+    NeverEmitRex,
+
+    /// The Recipe must hardcode the emission of the REX prefix.
+    AlwaysEmitRex,
+
+    /// The Recipe should infer the emission of the REX.RXB bits from registers,
+    /// and the REX.W bit from the EncodingBits.
+    ///
+    /// Because such a Recipe has a non-constant instruction size, it must have
+    /// a special `compute_size` handler for the inferrable-REX case.
+    InferRex,
+
+    /// The Recipe must hardcode the emission of an EVEX prefix.
+    Evex,
+}
+
+impl Default for RecipePrefixKind {
+    fn default() -> Self {
+        Self::Unspecified
+    }
+}
+
+/// Previously called a TailRecipe in the Python meta language, this allows to create multiple
+/// variants of a single base EncodingRecipe (rex prefix, specialized w/rrr bits, different
+/// opcodes). It serves as a prototype of an EncodingRecipe, which is then used when actually creating
+/// Encodings, in encodings.rs. This is an idiosyncrasy of the x86 meta-language, and could be
+/// reconsidered later.
+#[derive(Clone)]
+pub(crate) struct Template<'builder> {
+    /// Description of registers, used in the build() method.
+    regs: &'builder IsaRegs,
+
+    /// The recipe template, which is to be specialized (by copy).
+    recipe: EncodingRecipeBuilder,
+
+    /// How is the REX prefix emitted?
+    rex_kind: RecipePrefixKind,
+
+    /// Function for `compute_size()` when REX is inferrable.
+    inferred_rex_compute_size: Option<&'static str>,
+
+    /// Other recipe to use when REX-prefixed.
+    when_prefixed: Option<Rc<Template<'builder>>>,
+
+    // Parameters passed in the EncodingBits.
+    /// Value of the W bit (0 or 1), stored in the EncodingBits.
+    w_bit: u16,
+    /// Value of the RRR bits (between 0 and 0b111).
+    rrr_bits: u16,
+    /// Opcode bytes.
+    op_bytes: &'static [u8],
+}
+
+impl<'builder> Template<'builder> {
+    fn new(recipe: EncodingRecipeBuilder, regs: &'builder IsaRegs) -> Self {
+        Self {
+            regs,
+            recipe,
+            rex_kind: RecipePrefixKind::default(),
+            inferred_rex_compute_size: None,
+            when_prefixed: None,
+            w_bit: 0,
+            rrr_bits: 0,
+            op_bytes: &opcodes::EMPTY,
+        }
+    }
+
+    fn name(&self) -> &str {
+        &self.recipe.name
+    }
+    fn rex_kind(self, kind: RecipePrefixKind) -> Self {
+        Self {
+            rex_kind: kind,
+            ..self
+        }
+    }
+    fn inferred_rex_compute_size(self, function: &'static str) -> Self {
+        Self {
+            inferred_rex_compute_size: Some(function),
+            ..self
+        }
+    }
+    fn when_prefixed(self, template: Rc<Template<'builder>>) -> Self {
+        assert!(self.when_prefixed.is_none());
+        Self {
+            when_prefixed: Some(template),
+            ..self
+        }
+    }
+
+    // Copy setters.
+    pub fn opcodes(&self, op_bytes: &'static [u8]) -> Self {
+        assert!(!op_bytes.is_empty());
+        let mut copy = self.clone();
+        copy.op_bytes = op_bytes;
+        copy
+    }
+    pub fn w(&self) -> Self {
+        let mut copy = self.clone();
+        copy.w_bit = 1;
+        copy
+    }
+    pub fn rrr(&self, value: u16) -> Self {
+        assert!(value <= 0b111);
+        let mut copy = self.clone();
+        copy.rrr_bits = value;
+        copy
+    }
+    pub fn nonrex(&self) -> Self {
+        assert!(
+            self.rex_kind != RecipePrefixKind::AlwaysEmitRex,
+            "Template requires REX prefix."
+        );
+        let mut copy = self.clone();
+        copy.rex_kind = RecipePrefixKind::NeverEmitRex;
+        copy
+    }
+    pub fn rex(&self) -> Self {
+        assert!(
+            self.rex_kind != RecipePrefixKind::NeverEmitRex,
+            "Template requires no REX prefix."
+        );
+        if let Some(prefixed) = &self.when_prefixed {
+            let mut ret = prefixed.rex();
+            // Forward specialized parameters.
+            ret.op_bytes = self.op_bytes;
+            ret.w_bit = self.w_bit;
+            ret.rrr_bits = self.rrr_bits;
+            return ret;
+        }
+        let mut copy = self.clone();
+        copy.rex_kind = RecipePrefixKind::AlwaysEmitRex;
+        copy
+    }
+    pub fn infer_rex(&self) -> Self {
+        assert!(
+            self.rex_kind != RecipePrefixKind::NeverEmitRex,
+            "Template requires no REX prefix."
+        );
+        assert!(
+            self.when_prefixed.is_none(),
+            "infer_rex used with when_prefixed()."
+        );
+        let mut copy = self.clone();
+        copy.rex_kind = RecipePrefixKind::InferRex;
+        copy
+    }
+
+    pub fn build(mut self) -> (EncodingRecipe, u16) {
+        let (opcode, bits) = decode_opcodes(&self.op_bytes, self.rrr_bits, self.w_bit);
+
+        let (recipe_name, size_addendum) = match self.rex_kind {
+            RecipePrefixKind::Unspecified | RecipePrefixKind::NeverEmitRex => {
+                // Ensure the operands are limited to non-REX constraints.
+                let operands_in = self.recipe.operands_in.unwrap_or_default();
+                self.recipe.operands_in = Some(replace_nonrex_constraints(self.regs, operands_in));
+                let operands_out = self.recipe.operands_out.unwrap_or_default();
+                self.recipe.operands_out =
+                    Some(replace_nonrex_constraints(self.regs, operands_out));
+
+                (opcode.into(), self.op_bytes.len() as u64)
+            }
+            RecipePrefixKind::AlwaysEmitRex => {
+                ("Rex".to_string() + opcode, self.op_bytes.len() as u64 + 1)
+            }
+            RecipePrefixKind::InferRex => {
+                assert_eq!(self.w_bit, 0, "A REX.W bit always requires a REX prefix; avoid using `infer_rex().w()` and use `rex().w()` instead.");
+                // Hook up the right function for inferred compute_size().
+                assert!(
+                    self.inferred_rex_compute_size.is_some(),
+                    "InferRex recipe '{}' needs an inferred_rex_compute_size function.",
+                    &self.recipe.name
+                );
+                self.recipe.compute_size = self.inferred_rex_compute_size;
+
+                ("DynRex".to_string() + opcode, self.op_bytes.len() as u64)
+            }
+            RecipePrefixKind::Evex => {
+                // Allow the operands to expand limits to EVEX constraints.
+                let operands_in = self.recipe.operands_in.unwrap_or_default();
+                self.recipe.operands_in = Some(replace_evex_constraints(self.regs, operands_in));
+                let operands_out = self.recipe.operands_out.unwrap_or_default();
+                self.recipe.operands_out = Some(replace_evex_constraints(self.regs, operands_out));
+
+                ("Evex".to_string() + opcode, 4 + 1)
+            }
+        };
+
+        self.recipe.base_size += size_addendum;
+
+        // Branch ranges are relative to the end of the instruction.
+        // For InferRex, the range should be the minimum, assuming no REX.
+        if let Some(range) = self.recipe.branch_range.as_mut() {
+            range.inst_size += size_addendum;
+        }
+
+        self.recipe.emit = replace_put_op(self.recipe.emit, &recipe_name);
+        self.recipe.name = recipe_name + &self.recipe.name;
+
+        (self.recipe.build(), bits)
+    }
+}
+
+/// Returns a predicate checking that the "cond" field of the instruction contains one of the
+/// directly supported floating point condition codes.
+fn supported_floatccs_predicate(
+    supported_cc: &[Literal],
+    format: &InstructionFormat,
+) -> InstructionPredicate {
+    supported_cc
+        .iter()
+        .fold(InstructionPredicate::new(), |pred, literal| {
+            pred.or(InstructionPredicate::new_is_field_equal(
+                format,
+                "cond",
+                literal.to_rust_code(),
+            ))
+        })
+}
+
+/// Return an instruction predicate that checks if `iform.imm` is a valid `scale` for a SIB byte.
+fn valid_scale(format: &InstructionFormat) -> InstructionPredicate {
+    ["1", "2", "4", "8"]
+        .iter()
+        .fold(InstructionPredicate::new(), |pred, &literal| {
+            pred.or(InstructionPredicate::new_is_field_equal(
+                format,
+                "imm",
+                literal.into(),
+            ))
+        })
+}
+
+pub(crate) fn define<'shared>(
+    shared_defs: &'shared SharedDefinitions,
+    settings: &'shared SettingGroup,
+    regs: &'shared IsaRegs,
+) -> RecipeGroup<'shared> {
+    // The set of floating point condition codes that are directly supported.
+    // Other condition codes need to be reversed or expressed as two tests.
+    let floatcc = &shared_defs.imm.floatcc;
+    let supported_floatccs: Vec<Literal> = ["ord", "uno", "one", "ueq", "gt", "ge", "ult", "ule"]
+        .iter()
+        .map(|name| Literal::enumerator_for(floatcc, name))
+        .collect();
+
+    // Register classes shorthands.
+    let abcd = regs.class_by_name("ABCD");
+    let gpr = regs.class_by_name("GPR");
+    let fpr = regs.class_by_name("FPR");
+    let flag = regs.class_by_name("FLAG");
+
+    // Operand constraints shorthands.
+    let reg_rflags = Register::new(flag, regs.regunit_by_name(flag, "rflags"));
+    let reg_rax = Register::new(gpr, regs.regunit_by_name(gpr, "rax"));
+    let reg_rcx = Register::new(gpr, regs.regunit_by_name(gpr, "rcx"));
+    let reg_rdx = Register::new(gpr, regs.regunit_by_name(gpr, "rdx"));
+    let reg_r15 = Register::new(gpr, regs.regunit_by_name(gpr, "r15"));
+    let reg_xmm0 = Register::new(fpr, regs.regunit_by_name(fpr, "xmm0"));
+
+    // Stack operand with a 32-bit signed displacement from either RBP or RSP.
+    let stack_gpr32 = Stack::new(gpr);
+    let stack_fpr32 = Stack::new(fpr);
+
+    let formats = &shared_defs.formats;
+
+    // Predicates shorthands.
+    let use_sse41 = settings.predicate_by_name("use_sse41");
+
+    // Definitions.
+    let mut recipes = RecipeGroup::new(regs);
+
+    // A null unary instruction that takes a GPR register. Can be used for identity copies and
+    // no-op conversions.
+    recipes.add_recipe(
+        EncodingRecipeBuilder::new("null", &formats.unary, 0)
+            .operands_in(vec![gpr])
+            .operands_out(vec![0])
+            .emit(""),
+    );
+    recipes.add_recipe(
+        EncodingRecipeBuilder::new("null_fpr", &formats.unary, 0)
+            .operands_in(vec![fpr])
+            .operands_out(vec![0])
+            .emit(""),
+    );
+    recipes.add_recipe(
+        EncodingRecipeBuilder::new("stacknull", &formats.unary, 0)
+            .operands_in(vec![stack_gpr32])
+            .operands_out(vec![stack_gpr32])
+            .emit(""),
+    );
+
+    recipes.add_recipe(
+        EncodingRecipeBuilder::new("get_pinned_reg", &formats.nullary, 0)
+            .operands_out(vec![reg_r15])
+            .emit(""),
+    );
+    // umr with a fixed register output that's r15.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("set_pinned_reg", &formats.unary, 1)
+            .operands_in(vec![gpr])
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    let r15 = RU::r15.into();
+                    {{PUT_OP}}(bits, rex2(r15, in_reg0), sink);
+                    modrm_rr(r15, in_reg0, sink);
+                "#,
+            ),
+    );
+
+    // No-op fills, created by late-stage redundant-fill removal.
+    recipes.add_recipe(
+        EncodingRecipeBuilder::new("fillnull", &formats.unary, 0)
+            .operands_in(vec![stack_gpr32])
+            .operands_out(vec![gpr])
+            .clobbers_flags(false)
+            .emit(""),
+    );
+    recipes.add_recipe(
+        EncodingRecipeBuilder::new("ffillnull", &formats.unary, 0)
+            .operands_in(vec![stack_gpr32])
+            .operands_out(vec![fpr])
+            .clobbers_flags(false)
+            .emit(""),
+    );
+
+    recipes.add_recipe(
+        EncodingRecipeBuilder::new("debugtrap", &formats.nullary, 1).emit("sink.put1(0xcc);"),
+    );
+
+    // XX opcode, no ModR/M.
+    recipes.add_template_recipe(EncodingRecipeBuilder::new("trap", &formats.trap, 0).emit(
+        r#"
+            sink.trap(code, func.srclocs[inst]);
+            {{PUT_OP}}(bits, BASE_REX, sink);
+        "#,
+    ));
+
+    // Macro: conditional jump over a ud2.
+    recipes.add_recipe(
+        EncodingRecipeBuilder::new("trapif", &formats.int_cond_trap, 4)
+            .operands_in(vec![reg_rflags])
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    // Jump over a 2-byte ud2.
+                    sink.put1(0x70 | (icc2opc(cond.inverse()) as u8));
+                    sink.put1(2);
+                    // ud2.
+                    sink.trap(code, func.srclocs[inst]);
+                    sink.put1(0x0f);
+                    sink.put1(0x0b);
+                "#,
+            ),
+    );
+
+    recipes.add_recipe(
+        EncodingRecipeBuilder::new("trapff", &formats.float_cond_trap, 4)
+            .operands_in(vec![reg_rflags])
+            .clobbers_flags(false)
+            .inst_predicate(supported_floatccs_predicate(
+                &supported_floatccs,
+                &*formats.float_cond_trap,
+            ))
+            .emit(
+                r#"
+                    // Jump over a 2-byte ud2.
+                    sink.put1(0x70 | (fcc2opc(cond.inverse()) as u8));
+                    sink.put1(2);
+                    // ud2.
+                    sink.trap(code, func.srclocs[inst]);
+                    sink.put1(0x0f);
+                    sink.put1(0x0b);
+                "#,
+            ),
+    );
+
+    // XX /r
+    recipes.add_template_inferred(
+        EncodingRecipeBuilder::new("rr", &formats.binary, 1)
+            .operands_in(vec![gpr, gpr])
+            .operands_out(vec![0])
+            .emit(
+                r#"
+                        {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink);
+                        modrm_rr(in_reg0, in_reg1, sink);
+                    "#,
+            ),
+        "size_with_inferred_rex_for_inreg0_inreg1",
+    );
+
+    // XX /r with operands swapped. (RM form).
+    recipes.add_template_inferred(
+        EncodingRecipeBuilder::new("rrx", &formats.binary, 1)
+            .operands_in(vec![gpr, gpr])
+            .operands_out(vec![0])
+            .emit(
+                r#"
+                        {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
+                        modrm_rr(in_reg1, in_reg0, sink);
+                    "#,
+            ),
+        "size_with_inferred_rex_for_inreg0_inreg1",
+    );
+
+    // XX /r with FPR ins and outs. A form.
+    recipes.add_template_inferred(
+        EncodingRecipeBuilder::new("fa", &formats.binary, 1)
+            .operands_in(vec![fpr, fpr])
+            .operands_out(vec![0])
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
+                    modrm_rr(in_reg1, in_reg0, sink);
+                "#,
+            ),
+        "size_with_inferred_rex_for_inreg0_inreg1",
+    );
+
+    // XX /r with FPR ins and outs. A form with input operands swapped.
+    recipes.add_template_inferred(
+        EncodingRecipeBuilder::new("fax", &formats.binary, 1)
+            .operands_in(vec![fpr, fpr])
+            .operands_out(vec![1])
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink);
+                    modrm_rr(in_reg0, in_reg1, sink);
+                "#,
+            ),
+        // The operand order does not matter for calculating whether a REX prefix is needed.
+        "size_with_inferred_rex_for_inreg0_inreg1",
+    );
+
+    // XX /r with FPR ins and outs. A form with a byte immediate.
+    {
+        recipes.add_template_inferred(
+            EncodingRecipeBuilder::new("fa_ib", &formats.ternary_imm8, 2)
+                .operands_in(vec![fpr, fpr])
+                .operands_out(vec![0])
+                .inst_predicate(InstructionPredicate::new_is_unsigned_int(
+                    &*formats.ternary_imm8,
+                    "imm",
+                    8,
+                    0,
+                ))
+                .emit(
+                    r#"
+                    {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
+                    modrm_rr(in_reg1, in_reg0, sink);
+                    let imm: i64 = imm.into();
+                    sink.put1(imm as u8);
+                "#,
+                ),
+            "size_with_inferred_rex_for_inreg0_inreg1",
+        );
+    }
+
+    // XX /n for a unary operation with extension bits.
+    recipes.add_template(
+        Template::new(
+            EncodingRecipeBuilder::new("ur", &formats.unary, 1)
+                .operands_in(vec![gpr])
+                .operands_out(vec![0])
+                .emit(
+                    r#"
+                        {{PUT_OP}}(bits, rex1(in_reg0), sink);
+                        modrm_r_bits(in_reg0, bits, sink);
+                    "#,
+                ),
+            regs,
+        )
+        .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"),
+    );
+
+    // XX /r, but for a unary operator with separate input/output register, like
+    // copies. MR form, preserving flags.
+    recipes.add_template(
+        Template::new(
+            EncodingRecipeBuilder::new("umr", &formats.unary, 1)
+                .operands_in(vec![gpr])
+                .operands_out(vec![gpr])
+                .clobbers_flags(false)
+                .emit(
+                    r#"
+                        {{PUT_OP}}(bits, rex2(out_reg0, in_reg0), sink);
+                        modrm_rr(out_reg0, in_reg0, sink);
+                    "#,
+                ),
+            regs,
+        )
+        .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_outreg0"),
+    );
+
+    // Same as umr, but with FPR -> GPR registers.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("rfumr", &formats.unary, 1)
+            .operands_in(vec![fpr])
+            .operands_out(vec![gpr])
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(out_reg0, in_reg0), sink);
+                    modrm_rr(out_reg0, in_reg0, sink);
+                "#,
+            ),
+    );
+
+    // Same as umr, but with the source register specified directly.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("umr_reg_to_ssa", &formats.copy_to_ssa, 1)
+            // No operands_in to mention, because a source register is specified directly.
+            .operands_out(vec![gpr])
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(out_reg0, src), sink);
+                    modrm_rr(out_reg0, src, sink);
+                "#,
+            ),
+    );
+
+    // XX /r, but for a unary operator with separate input/output register.
+    // RM form. Clobbers FLAGS.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("urm", &formats.unary, 1)
+            .operands_in(vec![gpr])
+            .operands_out(vec![gpr])
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
+                    modrm_rr(in_reg0, out_reg0, sink);
+                "#,
+            ),
+    );
+
+    // XX /r. Same as urm, but doesn't clobber FLAGS.
+    let urm_noflags = recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("urm_noflags", &formats.unary, 1)
+            .operands_in(vec![gpr])
+            .operands_out(vec![gpr])
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
+                    modrm_rr(in_reg0, out_reg0, sink);
+                "#,
+            ),
+    );
+
+    // XX /r. Same as urm_noflags, but input limited to ABCD.
+    recipes.add_template(
+        Template::new(
+            EncodingRecipeBuilder::new("urm_noflags_abcd", &formats.unary, 1)
+                .operands_in(vec![abcd])
+                .operands_out(vec![gpr])
+                .clobbers_flags(false)
+                .emit(
+                    r#"
+                    {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
+                    modrm_rr(in_reg0, out_reg0, sink);
+                "#,
+                ),
+            regs,
+        )
+        .when_prefixed(urm_noflags),
+    );
+
+    // XX /r, RM form, FPR -> FPR.
+    recipes.add_template_inferred(
+        EncodingRecipeBuilder::new("furm", &formats.unary, 1)
+            .operands_in(vec![fpr])
+            .operands_out(vec![fpr])
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
+                    modrm_rr(in_reg0, out_reg0, sink);
+                "#,
+            ),
+        "size_with_inferred_rex_for_inreg0_outreg0",
+    );
+
+    // Same as furm, but with the source register specified directly.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("furm_reg_to_ssa", &formats.copy_to_ssa, 1)
+            // No operands_in to mention, because a source register is specified directly.
+            .operands_out(vec![fpr])
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(src, out_reg0), sink);
+                    modrm_rr(src, out_reg0, sink);
+                "#,
+            ),
+    );
+
+    // XX /r, RM form, GPR -> FPR.
+    recipes.add_template_inferred(
+        EncodingRecipeBuilder::new("frurm", &formats.unary, 1)
+            .operands_in(vec![gpr])
+            .operands_out(vec![fpr])
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                        {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
+                        modrm_rr(in_reg0, out_reg0, sink);
+                    "#,
+            ),
+        "size_with_inferred_rex_for_inreg0_outreg0",
+    );
+
+    // XX /r, RM form, FPR -> GPR.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("rfurm", &formats.unary, 1)
+            .operands_in(vec![fpr])
+            .operands_out(vec![gpr])
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
+                    modrm_rr(in_reg0, out_reg0, sink);
+                "#,
+            ),
+    );
+
+    // XX /r, RMI form for one of the roundXX SSE 4.1 instructions.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("furmi_rnd", &formats.unary, 2)
+            .operands_in(vec![fpr])
+            .operands_out(vec![fpr])
+            .isa_predicate(use_sse41)
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
+                    modrm_rr(in_reg0, out_reg0, sink);
+                    sink.put1(match opcode {
+                        Opcode::Nearest => 0b00,
+                        Opcode::Floor => 0b01,
+                        Opcode::Ceil => 0b10,
+                        Opcode::Trunc => 0b11,
+                        x => panic!("{} unexpected for furmi_rnd", opcode),
+                    });
+                "#,
+            ),
+    );
+
+    // XX /r, for regmove instructions.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("rmov", &formats.reg_move, 1)
+            .operands_in(vec![gpr])
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(dst, src), sink);
+                    modrm_rr(dst, src, sink);
+                "#,
+            ),
+    );
+
+    // XX /r, for regmove instructions (FPR version, RM encoded).
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("frmov", &formats.reg_move, 1)
+            .operands_in(vec![fpr])
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(src, dst), sink);
+                    modrm_rr(src, dst, sink);
+                "#,
+            ),
+    );
+
+    // XX /n with one arg in %rcx, for shifts.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("rc", &formats.binary, 1)
+            .operands_in(vec![
+                OperandConstraint::RegClass(gpr),
+                OperandConstraint::FixedReg(reg_rcx),
+            ])
+            .operands_out(vec![0])
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex1(in_reg0), sink);
+                    modrm_r_bits(in_reg0, bits, sink);
+                "#,
+            ),
+    );
+
+    // XX /n for division: inputs in %rax, %rdx, r. Outputs in %rax, %rdx.
+    recipes.add_template(
+        Template::new(
+            EncodingRecipeBuilder::new("div", &formats.ternary, 1)
+                .operands_in(vec![
+                    OperandConstraint::FixedReg(reg_rax),
+                    OperandConstraint::FixedReg(reg_rdx),
+                    OperandConstraint::RegClass(gpr),
+                ])
+                .operands_out(vec![reg_rax, reg_rdx])
+                .emit(
+                    r#"
+                        sink.trap(TrapCode::IntegerDivisionByZero, func.srclocs[inst]);
+                        {{PUT_OP}}(bits, rex1(in_reg2), sink);
+                        modrm_r_bits(in_reg2, bits, sink);
+                    "#,
+                ),
+            regs,
+        )
+        .inferred_rex_compute_size("size_with_inferred_rex_for_inreg2"),
+    );
+
+    // XX /n for {s,u}mulx: inputs in %rax, r. Outputs in %rdx(hi):%rax(lo)
+    recipes.add_template(
+        Template::new(
+            EncodingRecipeBuilder::new("mulx", &formats.binary, 1)
+                .operands_in(vec![
+                    OperandConstraint::FixedReg(reg_rax),
+                    OperandConstraint::RegClass(gpr),
+                ])
+                .operands_out(vec![
+                    OperandConstraint::FixedReg(reg_rax),
+                    OperandConstraint::FixedReg(reg_rdx),
+                ])
+                .emit(
+                    r#"
+                        {{PUT_OP}}(bits, rex1(in_reg1), sink);
+                        modrm_r_bits(in_reg1, bits, sink);
+                    "#,
+                ),
+            regs,
+        )
+        .inferred_rex_compute_size("size_with_inferred_rex_for_inreg1"),
+    );
+
+    // XX /r for BLEND* instructions
+    recipes.add_template_inferred(
+        EncodingRecipeBuilder::new("blend", &formats.ternary, 1)
+            .operands_in(vec![
+                OperandConstraint::FixedReg(reg_xmm0),
+                OperandConstraint::RegClass(fpr),
+                OperandConstraint::RegClass(fpr),
+            ])
+            .operands_out(vec![2])
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(in_reg1, in_reg2), sink);
+                    modrm_rr(in_reg1, in_reg2, sink);
+                "#,
+            ),
+        "size_with_inferred_rex_for_inreg1_inreg2",
+    );
+
+    // XX /n ib with 8-bit immediate sign-extended.
+    {
+        recipes.add_template_inferred(
+            EncodingRecipeBuilder::new("r_ib", &formats.binary_imm64, 2)
+                .operands_in(vec![gpr])
+                .operands_out(vec![0])
+                .inst_predicate(InstructionPredicate::new_is_signed_int(
+                    &*formats.binary_imm64,
+                    "imm",
+                    8,
+                    0,
+                ))
+                .emit(
+                    r#"
+                            {{PUT_OP}}(bits, rex1(in_reg0), sink);
+                            modrm_r_bits(in_reg0, bits, sink);
+                            let imm: i64 = imm.into();
+                            sink.put1(imm as u8);
+                        "#,
+                ),
+            "size_with_inferred_rex_for_inreg0",
+        );
+
+        recipes.add_template_inferred(
+            EncodingRecipeBuilder::new("f_ib", &formats.binary_imm64, 2)
+                .operands_in(vec![fpr])
+                .operands_out(vec![0])
+                .inst_predicate(InstructionPredicate::new_is_signed_int(
+                    &*formats.binary_imm64,
+                    "imm",
+                    8,
+                    0,
+                ))
+                .emit(
+                    r#"
+                        {{PUT_OP}}(bits, rex1(in_reg0), sink);
+                        modrm_r_bits(in_reg0, bits, sink);
+                        let imm: i64 = imm.into();
+                        sink.put1(imm as u8);
+                    "#,
+                ),
+            "size_with_inferred_rex_for_inreg0",
+        );
+
+        // XX /n id with 32-bit immediate sign-extended.
+        recipes.add_template(
+            Template::new(
+                EncodingRecipeBuilder::new("r_id", &formats.binary_imm64, 5)
+                    .operands_in(vec![gpr])
+                    .operands_out(vec![0])
+                    .inst_predicate(InstructionPredicate::new_is_signed_int(
+                        &*formats.binary_imm64,
+                        "imm",
+                        32,
+                        0,
+                    ))
+                    .emit(
+                        r#"
+                            {{PUT_OP}}(bits, rex1(in_reg0), sink);
+                            modrm_r_bits(in_reg0, bits, sink);
+                            let imm: i64 = imm.into();
+                            sink.put4(imm as u32);
+                        "#,
+                    ),
+                regs,
+            )
+            .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"),
+        );
+    }
+
+    // XX /r ib with 8-bit unsigned immediate (e.g. for pshufd)
+    {
+        recipes.add_template_inferred(
+            EncodingRecipeBuilder::new("r_ib_unsigned_fpr", &formats.binary_imm8, 2)
+                .operands_in(vec![fpr])
+                .operands_out(vec![fpr])
+                .inst_predicate(InstructionPredicate::new_is_unsigned_int(
+                    &*formats.binary_imm8,
+                    "imm",
+                    8,
+                    0,
+                ))
+                .emit(
+                    r#"
+                    {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
+                    modrm_rr(in_reg0, out_reg0, sink);
+                    let imm: i64 = imm.into();
+                    sink.put1(imm as u8);
+                "#,
+                ),
+            "size_with_inferred_rex_for_inreg0_outreg0",
+        );
+    }
+
+    // XX /r ib with 8-bit unsigned immediate (e.g. for extractlane)
+    {
+        recipes.add_template_inferred(
+            EncodingRecipeBuilder::new("r_ib_unsigned_gpr", &formats.binary_imm8, 2)
+                .operands_in(vec![fpr])
+                .operands_out(vec![gpr])
+                .inst_predicate(InstructionPredicate::new_is_unsigned_int(
+                    &*formats.binary_imm8, "imm", 8, 0,
+                ))
+                .emit(
+                    r#"
+                    {{PUT_OP}}(bits, rex2(out_reg0, in_reg0), sink);
+                    modrm_rr(out_reg0, in_reg0, sink); // note the flipped register in the ModR/M byte
+                    let imm: i64 = imm.into();
+                    sink.put1(imm as u8);
+                "#,
+                ), "size_with_inferred_rex_for_inreg0_outreg0"
+        );
+    }
+
+    // XX /r ib with 8-bit unsigned immediate (e.g. for insertlane)
+    {
+        recipes.add_template_inferred(
+            EncodingRecipeBuilder::new("r_ib_unsigned_r", &formats.ternary_imm8, 2)
+                .operands_in(vec![fpr, gpr])
+                .operands_out(vec![0])
+                .inst_predicate(InstructionPredicate::new_is_unsigned_int(
+                    &*formats.ternary_imm8,
+                    "imm",
+                    8,
+                    0,
+                ))
+                .emit(
+                    r#"
+                    {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
+                    modrm_rr(in_reg1, in_reg0, sink);
+                    let imm: i64 = imm.into();
+                    sink.put1(imm as u8);
+                "#,
+                ),
+            "size_with_inferred_rex_for_inreg0_inreg1",
+        );
+    }
+
+    {
+        // XX /n id with 32-bit immediate sign-extended. UnaryImm version.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("u_id", &formats.unary_imm, 5)
+                .operands_out(vec![gpr])
+                .inst_predicate(InstructionPredicate::new_is_signed_int(
+                    &*formats.unary_imm,
+                    "imm",
+                    32,
+                    0,
+                ))
+                .emit(
+                    r#"
+                        {{PUT_OP}}(bits, rex1(out_reg0), sink);
+                        modrm_r_bits(out_reg0, bits, sink);
+                        let imm: i64 = imm.into();
+                        sink.put4(imm as u32);
+                    "#,
+                ),
+        );
+    }
+
+    // XX+rd id unary with 32-bit immediate. Note no recipe predicate.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("pu_id", &formats.unary_imm, 4)
+            .operands_out(vec![gpr])
+            .emit(
+                r#"
+                    // The destination register is encoded in the low bits of the opcode.
+                    // No ModR/M.
+                    {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink);
+                    let imm: i64 = imm.into();
+                    sink.put4(imm as u32);
+                "#,
+            ),
+    );
+
+    // XX+rd id unary with bool immediate. Note no recipe predicate.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("pu_id_bool", &formats.unary_bool, 4)
+            .operands_out(vec![gpr])
+            .emit(
+                r#"
+                    // The destination register is encoded in the low bits of the opcode.
+                    // No ModR/M.
+                    {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink);
+                    let imm: u32 = if imm { 1 } else { 0 };
+                    sink.put4(imm);
+                "#,
+            ),
+    );
+
+    // XX+rd id nullary with 0 as 32-bit immediate. Note no recipe predicate.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("pu_id_ref", &formats.nullary, 4)
+            .operands_out(vec![gpr])
+            .emit(
+                r#"
+                    // The destination register is encoded in the low bits of the opcode.
+                    // No ModR/M.
+                    {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink);
+                    sink.put4(0);
+                "#,
+            ),
+    );
+
+    // XX+rd iq unary with 64-bit immediate.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("pu_iq", &formats.unary_imm, 8)
+            .operands_out(vec![gpr])
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink);
+                    let imm: i64 = imm.into();
+                    sink.put8(imm as u64);
+                "#,
+            ),
+    );
+
+    // XX+rd id unary with zero immediate.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("u_id_z", &formats.unary_imm, 1)
+            .operands_out(vec![gpr])
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(out_reg0, out_reg0), sink);
+                    modrm_rr(out_reg0, out_reg0, sink);
+                "#,
+            ),
+    );
+
+    // XX /n Unary with floating point 32-bit immediate equal to zero.
+    {
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("f32imm_z", &formats.unary_ieee32, 1)
+                .operands_out(vec![fpr])
+                .inst_predicate(InstructionPredicate::new_is_zero_32bit_float(
+                    &*formats.unary_ieee32,
+                    "imm",
+                ))
+                .emit(
+                    r#"
+                        {{PUT_OP}}(bits, rex2(out_reg0, out_reg0), sink);
+                        modrm_rr(out_reg0, out_reg0, sink);
+                    "#,
+                ),
+        );
+    }
+
+    // XX /n Unary with floating point 64-bit immediate equal to zero.
+    {
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("f64imm_z", &formats.unary_ieee64, 1)
+                .operands_out(vec![fpr])
+                .inst_predicate(InstructionPredicate::new_is_zero_64bit_float(
+                    &*formats.unary_ieee64,
+                    "imm",
+                ))
+                .emit(
+                    r#"
+                        {{PUT_OP}}(bits, rex2(out_reg0, out_reg0), sink);
+                        modrm_rr(out_reg0, out_reg0, sink);
+                    "#,
+                ),
+        );
+    }
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("pushq", &formats.unary, 0)
+            .operands_in(vec![gpr])
+            .emit(
+                r#"
+                    sink.trap(TrapCode::StackOverflow, func.srclocs[inst]);
+                    {{PUT_OP}}(bits | (in_reg0 & 7), rex1(in_reg0), sink);
+                "#,
+            ),
+    );
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("popq", &formats.nullary, 0)
+            .operands_out(vec![gpr])
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink);
+                "#,
+            ),
+    );
+
+    // XX /r, for regmove instructions.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("copysp", &formats.copy_special, 1)
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(dst, src), sink);
+                    modrm_rr(dst, src, sink);
+                "#,
+            ),
+    );
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("adjustsp", &formats.unary, 1)
+            .operands_in(vec![gpr])
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(RU::rsp.into(), in_reg0), sink);
+                    modrm_rr(RU::rsp.into(), in_reg0, sink);
+                "#,
+            ),
+    );
+
+    {
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("adjustsp_ib", &formats.unary_imm, 2)
+                .inst_predicate(InstructionPredicate::new_is_signed_int(
+                    &*formats.unary_imm,
+                    "imm",
+                    8,
+                    0,
+                ))
+                .emit(
+                    r#"
+                        {{PUT_OP}}(bits, rex1(RU::rsp.into()), sink);
+                        modrm_r_bits(RU::rsp.into(), bits, sink);
+                        let imm: i64 = imm.into();
+                        sink.put1(imm as u8);
+                    "#,
+                ),
+        );
+
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("adjustsp_id", &formats.unary_imm, 5)
+                .inst_predicate(InstructionPredicate::new_is_signed_int(
+                    &*formats.unary_imm,
+                    "imm",
+                    32,
+                    0,
+                ))
+                .emit(
+                    r#"
+                        {{PUT_OP}}(bits, rex1(RU::rsp.into()), sink);
+                        modrm_r_bits(RU::rsp.into(), bits, sink);
+                        let imm: i64 = imm.into();
+                        sink.put4(imm as u32);
+                    "#,
+                ),
+        );
+    }
+
+    recipes.add_recipe(
+        EncodingRecipeBuilder::new("dummy_sarg_t", &formats.nullary, 0)
+            .operands_out(vec![Stack::new(gpr)])
+            .emit(""),
+    );
+
+    // XX+rd id with Abs4 function relocation.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("fnaddr4", &formats.func_addr, 4)
+            .operands_out(vec![gpr])
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink);
+                    sink.reloc_external(func.srclocs[inst],
+                                        Reloc::Abs4,
+                                        &func.dfg.ext_funcs[func_ref].name,
+                                        0);
+                    sink.put4(0);
+                "#,
+            ),
+    );
+
+    // XX+rd iq with Abs8 function relocation.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("fnaddr8", &formats.func_addr, 8)
+            .operands_out(vec![gpr])
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink);
+                    sink.reloc_external(func.srclocs[inst],
+                                        Reloc::Abs8,
+                                        &func.dfg.ext_funcs[func_ref].name,
+                                        0);
+                    sink.put8(0);
+                "#,
+            ),
+    );
+
+    // Similar to fnaddr4, but writes !0 (this is used by BaldrMonkey).
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("allones_fnaddr4", &formats.func_addr, 4)
+            .operands_out(vec![gpr])
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink);
+                    sink.reloc_external(func.srclocs[inst],
+                                        Reloc::Abs4,
+                                        &func.dfg.ext_funcs[func_ref].name,
+                                        0);
+                    // Write the immediate as `!0` for the benefit of BaldrMonkey.
+                    sink.put4(!0);
+                "#,
+            ),
+    );
+
+    // Similar to fnaddr8, but writes !0 (this is used by BaldrMonkey).
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("allones_fnaddr8", &formats.func_addr, 8)
+            .operands_out(vec![gpr])
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink);
+                    sink.reloc_external(func.srclocs[inst],
+                                        Reloc::Abs8,
+                                        &func.dfg.ext_funcs[func_ref].name,
+                                        0);
+                    // Write the immediate as `!0` for the benefit of BaldrMonkey.
+                    sink.put8(!0);
+                "#,
+            ),
+    );
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("pcrel_fnaddr8", &formats.func_addr, 5)
+            .operands_out(vec![gpr])
+            // rex2 gets passed 0 for r/m register because the upper bit of
+            // r/m doesn't get decoded when in rip-relative addressing mode.
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(0, out_reg0), sink);
+                    modrm_riprel(out_reg0, sink);
+                    // The addend adjusts for the difference between the end of the
+                    // instruction and the beginning of the immediate field.
+                    sink.reloc_external(func.srclocs[inst],
+                                        Reloc::X86PCRel4,
+                                        &func.dfg.ext_funcs[func_ref].name,
+                                        -4);
+                    sink.put4(0);
+                "#,
+            ),
+    );
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("got_fnaddr8", &formats.func_addr, 5)
+            .operands_out(vec![gpr])
+            // rex2 gets passed 0 for r/m register because the upper bit of
+            // r/m doesn't get decoded when in rip-relative addressing mode.
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(0, out_reg0), sink);
+                    modrm_riprel(out_reg0, sink);
+                    // The addend adjusts for the difference between the end of the
+                    // instruction and the beginning of the immediate field.
+                    sink.reloc_external(func.srclocs[inst],
+                                        Reloc::X86GOTPCRel4,
+                                        &func.dfg.ext_funcs[func_ref].name,
+                                        -4);
+                    sink.put4(0);
+                "#,
+            ),
+    );
+
+    // XX+rd id with Abs4 globalsym relocation.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("gvaddr4", &formats.unary_global_value, 4)
+            .operands_out(vec![gpr])
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink);
+                    sink.reloc_external(func.srclocs[inst],
+                                        Reloc::Abs4,
+                                        &func.global_values[global_value].symbol_name(),
+                                        0);
+                    sink.put4(0);
+                "#,
+            ),
+    );
+
+    // XX+rd iq with Abs8 globalsym relocation.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("gvaddr8", &formats.unary_global_value, 8)
+            .operands_out(vec![gpr])
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink);
+                    sink.reloc_external(func.srclocs[inst],
+                                        Reloc::Abs8,
+                                        &func.global_values[global_value].symbol_name(),
+                                        0);
+                    sink.put8(0);
+                "#,
+            ),
+    );
+
+    // XX+rd iq with PCRel4 globalsym relocation.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("pcrel_gvaddr8", &formats.unary_global_value, 5)
+            .operands_out(vec![gpr])
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(0, out_reg0), sink);
+                    modrm_rm(5, out_reg0, sink);
+                    // The addend adjusts for the difference between the end of the
+                    // instruction and the beginning of the immediate field.
+                    sink.reloc_external(func.srclocs[inst],
+                                        Reloc::X86PCRel4,
+                                        &func.global_values[global_value].symbol_name(),
+                                        -4);
+                    sink.put4(0);
+                "#,
+            ),
+    );
+
+    // XX+rd iq with Abs8 globalsym relocation.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("got_gvaddr8", &formats.unary_global_value, 5)
+            .operands_out(vec![gpr])
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(0, out_reg0), sink);
+                    modrm_rm(5, out_reg0, sink);
+                    // The addend adjusts for the difference between the end of the
+                    // instruction and the beginning of the immediate field.
+                    sink.reloc_external(func.srclocs[inst],
+                                        Reloc::X86GOTPCRel4,
+                                        &func.global_values[global_value].symbol_name(),
+                                        -4);
+                    sink.put4(0);
+                "#,
+            ),
+    );
+
+    // Stack addresses.
+    //
+    // TODO Alternative forms for 8-bit immediates, when applicable.
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("spaddr_id", &formats.stack_load, 6)
+            .operands_out(vec![gpr])
+            .emit(
+                r#"
+                    let sp = StackRef::sp(stack_slot, &func.stack_slots);
+                    let base = stk_base(sp.base);
+                    {{PUT_OP}}(bits, rex2(base, out_reg0), sink);
+                    modrm_sib_disp32(out_reg0, sink);
+                    sib_noindex(base, sink);
+                    let imm : i32 = offset.into();
+                    sink.put4(sp.offset.checked_add(imm).unwrap() as u32);
+                "#,
+            ),
+    );
+
+    // Constant addresses.
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("const_addr", &formats.unary_const, 5)
+            .operands_out(vec![gpr])
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(0, out_reg0), sink);
+                    modrm_riprel(out_reg0, sink);
+                    const_disp4(constant_handle, func, sink);
+                "#,
+            ),
+    );
+
+    // Store recipes.
+
+    {
+        // Simple stores.
+
+        // A predicate asking if the offset is zero.
+        let has_no_offset =
+            InstructionPredicate::new_is_field_equal(&*formats.store, "offset", "0".into());
+
+        // XX /r register-indirect store with no offset.
+        let st = recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("st", &formats.store, 1)
+                .operands_in(vec![gpr, gpr])
+                .inst_predicate(has_no_offset.clone())
+                .clobbers_flags(false)
+                .compute_size("size_plus_maybe_sib_or_offset_for_inreg_1")
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
+                        if needs_sib_byte(in_reg1) {
+                            modrm_sib(in_reg0, sink);
+                            sib_noindex(in_reg1, sink);
+                        } else if needs_offset(in_reg1) {
+                            modrm_disp8(in_reg1, in_reg0, sink);
+                            sink.put1(0);
+                        } else {
+                            modrm_rm(in_reg1, in_reg0, sink);
+                        }
+                    "#,
+                ),
+        );
+
+        // XX /r register-indirect store with no offset.
+        // Only ABCD allowed for stored value. This is for byte stores with no REX.
+        recipes.add_template(
+            Template::new(
+                EncodingRecipeBuilder::new("st_abcd", &formats.store, 1)
+                    .operands_in(vec![abcd, gpr])
+                    .inst_predicate(has_no_offset.clone())
+                    .clobbers_flags(false)
+                    .compute_size("size_plus_maybe_sib_or_offset_for_inreg_1")
+                    .emit(
+                        r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
+                        if needs_sib_byte(in_reg1) {
+                            modrm_sib(in_reg0, sink);
+                            sib_noindex(in_reg1, sink);
+                        } else if needs_offset(in_reg1) {
+                            modrm_disp8(in_reg1, in_reg0, sink);
+                            sink.put1(0);
+                        } else {
+                            modrm_rm(in_reg1, in_reg0, sink);
+                        }
+                    "#,
+                    ),
+                regs,
+            )
+            .when_prefixed(st),
+        );
+
+        // XX /r register-indirect store of FPR with no offset.
+        recipes.add_template_inferred(
+            EncodingRecipeBuilder::new("fst", &formats.store, 1)
+                .operands_in(vec![fpr, gpr])
+                .inst_predicate(has_no_offset)
+                .clobbers_flags(false)
+                .compute_size("size_plus_maybe_sib_or_offset_for_inreg_1")
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
+                        if needs_sib_byte(in_reg1) {
+                            modrm_sib(in_reg0, sink);
+                            sib_noindex(in_reg1, sink);
+                        } else if needs_offset(in_reg1) {
+                            modrm_disp8(in_reg1, in_reg0, sink);
+                            sink.put1(0);
+                        } else {
+                            modrm_rm(in_reg1, in_reg0, sink);
+                        }
+                    "#,
+                ),
+            "size_plus_maybe_sib_or_offset_inreg1_plus_rex_prefix_for_inreg0_inreg1",
+        );
+
+        let has_small_offset =
+            InstructionPredicate::new_is_signed_int(&*formats.store, "offset", 8, 0);
+
+        // XX /r register-indirect store with 8-bit offset.
+        let st_disp8 = recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("stDisp8", &formats.store, 2)
+                .operands_in(vec![gpr, gpr])
+                .inst_predicate(has_small_offset.clone())
+                .clobbers_flags(false)
+                .compute_size("size_plus_maybe_sib_for_inreg_1")
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
+                        if needs_sib_byte(in_reg1) {
+                            modrm_sib_disp8(in_reg0, sink);
+                            sib_noindex(in_reg1, sink);
+                        } else {
+                            modrm_disp8(in_reg1, in_reg0, sink);
+                        }
+                        let offset: i32 = offset.into();
+                        sink.put1(offset as u8);
+                    "#,
+                ),
+        );
+
+        // XX /r register-indirect store with 8-bit offset.
+        // Only ABCD allowed for stored value. This is for byte stores with no REX.
+        recipes.add_template(
+            Template::new(
+                EncodingRecipeBuilder::new("stDisp8_abcd", &formats.store, 2)
+                    .operands_in(vec![abcd, gpr])
+                    .inst_predicate(has_small_offset.clone())
+                    .clobbers_flags(false)
+                    .compute_size("size_plus_maybe_sib_for_inreg_1")
+                    .emit(
+                        r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
+                        if needs_sib_byte(in_reg1) {
+                            modrm_sib_disp8(in_reg0, sink);
+                            sib_noindex(in_reg1, sink);
+                        } else {
+                            modrm_disp8(in_reg1, in_reg0, sink);
+                        }
+                        let offset: i32 = offset.into();
+                        sink.put1(offset as u8);
+                    "#,
+                    ),
+                regs,
+            )
+            .when_prefixed(st_disp8),
+        );
+
+        // XX /r register-indirect store with 8-bit offset of FPR.
+        recipes.add_template_inferred(
+            EncodingRecipeBuilder::new("fstDisp8", &formats.store, 2)
+                .operands_in(vec![fpr, gpr])
+                .inst_predicate(has_small_offset)
+                .clobbers_flags(false)
+                .compute_size("size_plus_maybe_sib_for_inreg_1")
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
+                        if needs_sib_byte(in_reg1) {
+                            modrm_sib_disp8(in_reg0, sink);
+                            sib_noindex(in_reg1, sink);
+                        } else {
+                            modrm_disp8(in_reg1, in_reg0, sink);
+                        }
+                        let offset: i32 = offset.into();
+                        sink.put1(offset as u8);
+                    "#,
+                ),
+            "size_plus_maybe_sib_inreg1_plus_rex_prefix_for_inreg0_inreg1",
+        );
+
+        // XX /r register-indirect store with 32-bit offset.
+        let st_disp32 = recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("stDisp32", &formats.store, 5)
+                .operands_in(vec![gpr, gpr])
+                .clobbers_flags(false)
+                .compute_size("size_plus_maybe_sib_for_inreg_1")
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
+                        if needs_sib_byte(in_reg1) {
+                            modrm_sib_disp32(in_reg0, sink);
+                            sib_noindex(in_reg1, sink);
+                        } else {
+                            modrm_disp32(in_reg1, in_reg0, sink);
+                        }
+                        let offset: i32 = offset.into();
+                        sink.put4(offset as u32);
+                    "#,
+                ),
+        );
+
+        // XX /r register-indirect store with 32-bit offset.
+        // Only ABCD allowed for stored value. This is for byte stores with no REX.
+        recipes.add_template(
+            Template::new(
+                EncodingRecipeBuilder::new("stDisp32_abcd", &formats.store, 5)
+                    .operands_in(vec![abcd, gpr])
+                    .clobbers_flags(false)
+                    .compute_size("size_plus_maybe_sib_for_inreg_1")
+                    .emit(
+                        r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
+                        if needs_sib_byte(in_reg1) {
+                            modrm_sib_disp32(in_reg0, sink);
+                            sib_noindex(in_reg1, sink);
+                        } else {
+                            modrm_disp32(in_reg1, in_reg0, sink);
+                        }
+                        let offset: i32 = offset.into();
+                        sink.put4(offset as u32);
+                    "#,
+                    ),
+                regs,
+            )
+            .when_prefixed(st_disp32),
+        );
+
+        // XX /r register-indirect store with 32-bit offset of FPR.
+        recipes.add_template_inferred(
+            EncodingRecipeBuilder::new("fstDisp32", &formats.store, 5)
+                .operands_in(vec![fpr, gpr])
+                .clobbers_flags(false)
+                .compute_size("size_plus_maybe_sib_for_inreg_1")
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
+                        if needs_sib_byte(in_reg1) {
+                            modrm_sib_disp32(in_reg0, sink);
+                            sib_noindex(in_reg1, sink);
+                        } else {
+                            modrm_disp32(in_reg1, in_reg0, sink);
+                        }
+                        let offset: i32 = offset.into();
+                        sink.put4(offset as u32);
+                    "#,
+                ),
+            "size_plus_maybe_sib_inreg1_plus_rex_prefix_for_inreg0_inreg1",
+        );
+    }
+
+    {
+        // Complex stores.
+
+        // A predicate asking if the offset is zero.
+        let has_no_offset =
+            InstructionPredicate::new_is_field_equal(&*formats.store_complex, "offset", "0".into());
+
+        // XX /r register-indirect store with index and no offset.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("stWithIndex", &formats.store_complex, 2)
+                .operands_in(vec![gpr, gpr, gpr])
+                .inst_predicate(has_no_offset.clone())
+                .clobbers_flags(false)
+                .compute_size("size_plus_maybe_offset_for_inreg_1")
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
+                        // The else branch always inserts an SIB byte.
+                        if needs_offset(in_reg1) {
+                            modrm_sib_disp8(in_reg0, sink);
+                            sib(0, in_reg2, in_reg1, sink);
+                            sink.put1(0);
+                        } else {
+                            modrm_sib(in_reg0, sink);
+                            sib(0, in_reg2, in_reg1, sink);
+                        }
+                    "#,
+                ),
+        );
+
+        // XX /r register-indirect store with index and no offset.
+        // Only ABCD allowed for stored value. This is for byte stores with no REX.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("stWithIndex_abcd", &formats.store_complex, 2)
+                .operands_in(vec![abcd, gpr, gpr])
+                .inst_predicate(has_no_offset.clone())
+                .clobbers_flags(false)
+                .compute_size("size_plus_maybe_offset_for_inreg_1")
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
+                        // The else branch always inserts an SIB byte.
+                        if needs_offset(in_reg1) {
+                            modrm_sib_disp8(in_reg0, sink);
+                            sib(0, in_reg2, in_reg1, sink);
+                            sink.put1(0);
+                        } else {
+                            modrm_sib(in_reg0, sink);
+                            sib(0, in_reg2, in_reg1, sink);
+                        }
+                    "#,
+                ),
+        );
+
+        // XX /r register-indirect store with index and no offset of FPR.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("fstWithIndex", &formats.store_complex, 2)
+                .operands_in(vec![fpr, gpr, gpr])
+                .inst_predicate(has_no_offset)
+                .clobbers_flags(false)
+                .compute_size("size_plus_maybe_offset_for_inreg_1")
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
+                        // The else branch always inserts an SIB byte.
+                        if needs_offset(in_reg1) {
+                            modrm_sib_disp8(in_reg0, sink);
+                            sib(0, in_reg2, in_reg1, sink);
+                            sink.put1(0);
+                        } else {
+                            modrm_sib(in_reg0, sink);
+                            sib(0, in_reg2, in_reg1, sink);
+                        }
+                    "#,
+                ),
+        );
+
+        let has_small_offset =
+            InstructionPredicate::new_is_signed_int(&*formats.store_complex, "offset", 8, 0);
+
+        // XX /r register-indirect store with index and 8-bit offset.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("stWithIndexDisp8", &formats.store_complex, 3)
+                .operands_in(vec![gpr, gpr, gpr])
+                .inst_predicate(has_small_offset.clone())
+                .clobbers_flags(false)
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
+                        modrm_sib_disp8(in_reg0, sink);
+                        sib(0, in_reg2, in_reg1, sink);
+                        let offset: i32 = offset.into();
+                        sink.put1(offset as u8);
+                    "#,
+                ),
+        );
+
+        // XX /r register-indirect store with index and 8-bit offset.
+        // Only ABCD allowed for stored value. This is for byte stores with no REX.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("stWithIndexDisp8_abcd", &formats.store_complex, 3)
+                .operands_in(vec![abcd, gpr, gpr])
+                .inst_predicate(has_small_offset.clone())
+                .clobbers_flags(false)
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
+                        modrm_sib_disp8(in_reg0, sink);
+                        sib(0, in_reg2, in_reg1, sink);
+                        let offset: i32 = offset.into();
+                        sink.put1(offset as u8);
+                    "#,
+                ),
+        );
+
+        // XX /r register-indirect store with index and 8-bit offset of FPR.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("fstWithIndexDisp8", &formats.store_complex, 3)
+                .operands_in(vec![fpr, gpr, gpr])
+                .inst_predicate(has_small_offset)
+                .clobbers_flags(false)
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
+                        modrm_sib_disp8(in_reg0, sink);
+                        sib(0, in_reg2, in_reg1, sink);
+                        let offset: i32 = offset.into();
+                        sink.put1(offset as u8);
+                    "#,
+                ),
+        );
+
+        let has_big_offset =
+            InstructionPredicate::new_is_signed_int(&*formats.store_complex, "offset", 32, 0);
+
+        // XX /r register-indirect store with index and 32-bit offset.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("stWithIndexDisp32", &formats.store_complex, 6)
+                .operands_in(vec![gpr, gpr, gpr])
+                .inst_predicate(has_big_offset.clone())
+                .clobbers_flags(false)
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
+                        modrm_sib_disp32(in_reg0, sink);
+                        sib(0, in_reg2, in_reg1, sink);
+                        let offset: i32 = offset.into();
+                        sink.put4(offset as u32);
+                    "#,
+                ),
+        );
+
+        // XX /r register-indirect store with index and 32-bit offset.
+        // Only ABCD allowed for stored value. This is for byte stores with no REX.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("stWithIndexDisp32_abcd", &formats.store_complex, 6)
+                .operands_in(vec![abcd, gpr, gpr])
+                .inst_predicate(has_big_offset.clone())
+                .clobbers_flags(false)
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
+                        modrm_sib_disp32(in_reg0, sink);
+                        sib(0, in_reg2, in_reg1, sink);
+                        let offset: i32 = offset.into();
+                        sink.put4(offset as u32);
+                    "#,
+                ),
+        );
+
+        // XX /r register-indirect store with index and 32-bit offset of FPR.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("fstWithIndexDisp32", &formats.store_complex, 6)
+                .operands_in(vec![fpr, gpr, gpr])
+                .inst_predicate(has_big_offset)
+                .clobbers_flags(false)
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
+                        modrm_sib_disp32(in_reg0, sink);
+                        sib(0, in_reg2, in_reg1, sink);
+                        let offset: i32 = offset.into();
+                        sink.put4(offset as u32);
+                    "#,
+                ),
+        );
+    }
+
+    // Unary spill with SIB and 32-bit displacement.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("spillSib32", &formats.unary, 6)
+            .operands_in(vec![gpr])
+            .operands_out(vec![stack_gpr32])
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    sink.trap(TrapCode::StackOverflow, func.srclocs[inst]);
+                    let base = stk_base(out_stk0.base);
+                    {{PUT_OP}}(bits, rex2(base, in_reg0), sink);
+                    modrm_sib_disp32(in_reg0, sink);
+                    sib_noindex(base, sink);
+                    sink.put4(out_stk0.offset as u32);
+                "#,
+            ),
+    );
+
+    // Like spillSib32, but targeting an FPR rather than a GPR.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("fspillSib32", &formats.unary, 6)
+            .operands_in(vec![fpr])
+            .operands_out(vec![stack_fpr32])
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    sink.trap(TrapCode::StackOverflow, func.srclocs[inst]);
+                    let base = stk_base(out_stk0.base);
+                    {{PUT_OP}}(bits, rex2(base, in_reg0), sink);
+                    modrm_sib_disp32(in_reg0, sink);
+                    sib_noindex(base, sink);
+                    sink.put4(out_stk0.offset as u32);
+                "#,
+            ),
+    );
+
+    // Regspill using RSP-relative addressing.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("regspill32", &formats.reg_spill, 6)
+            .operands_in(vec![gpr])
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    sink.trap(TrapCode::StackOverflow, func.srclocs[inst]);
+                    let dst = StackRef::sp(dst, &func.stack_slots);
+                    let base = stk_base(dst.base);
+                    {{PUT_OP}}(bits, rex2(base, src), sink);
+                    modrm_sib_disp32(src, sink);
+                    sib_noindex(base, sink);
+                    sink.put4(dst.offset as u32);
+                "#,
+            ),
+    );
+
+    // Like regspill32, but targeting an FPR rather than a GPR.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("fregspill32", &formats.reg_spill, 6)
+            .operands_in(vec![fpr])
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    sink.trap(TrapCode::StackOverflow, func.srclocs[inst]);
+                    let dst = StackRef::sp(dst, &func.stack_slots);
+                    let base = stk_base(dst.base);
+                    {{PUT_OP}}(bits, rex2(base, src), sink);
+                    modrm_sib_disp32(src, sink);
+                    sib_noindex(base, sink);
+                    sink.put4(dst.offset as u32);
+                "#,
+            ),
+    );
+
+    // Load recipes.
+
+    {
+        // Simple loads.
+
+        // A predicate asking if the offset is zero.
+        let has_no_offset =
+            InstructionPredicate::new_is_field_equal(&*formats.load, "offset", "0".into());
+
+        // XX /r load with no offset.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("ld", &formats.load, 1)
+                .operands_in(vec![gpr])
+                .operands_out(vec![gpr])
+                .inst_predicate(has_no_offset.clone())
+                .clobbers_flags(false)
+                .compute_size("size_plus_maybe_sib_or_offset_for_inreg_0")
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
+                        if needs_sib_byte(in_reg0) {
+                            modrm_sib(out_reg0, sink);
+                            sib_noindex(in_reg0, sink);
+                        } else if needs_offset(in_reg0) {
+                            modrm_disp8(in_reg0, out_reg0, sink);
+                            sink.put1(0);
+                        } else {
+                            modrm_rm(in_reg0, out_reg0, sink);
+                        }
+                    "#,
+                ),
+        );
+
+        // XX /r float load with no offset.
+        recipes.add_template_inferred(
+            EncodingRecipeBuilder::new("fld", &formats.load, 1)
+                .operands_in(vec![gpr])
+                .operands_out(vec![fpr])
+                .inst_predicate(has_no_offset)
+                .clobbers_flags(false)
+                .compute_size("size_plus_maybe_sib_or_offset_for_inreg_0")
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
+                        if needs_sib_byte(in_reg0) {
+                            modrm_sib(out_reg0, sink);
+                            sib_noindex(in_reg0, sink);
+                        } else if needs_offset(in_reg0) {
+                            modrm_disp8(in_reg0, out_reg0, sink);
+                            sink.put1(0);
+                        } else {
+                            modrm_rm(in_reg0, out_reg0, sink);
+                        }
+                    "#,
+                ),
+            "size_plus_maybe_sib_or_offset_for_inreg_0_plus_rex_prefix_for_inreg0_outreg0",
+        );
+
+        let has_small_offset =
+            InstructionPredicate::new_is_signed_int(&*formats.load, "offset", 8, 0);
+
+        // XX /r load with 8-bit offset.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("ldDisp8", &formats.load, 2)
+                .operands_in(vec![gpr])
+                .operands_out(vec![gpr])
+                .inst_predicate(has_small_offset.clone())
+                .clobbers_flags(false)
+                .compute_size("size_plus_maybe_sib_for_inreg_0")
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
+                        if needs_sib_byte(in_reg0) {
+                            modrm_sib_disp8(out_reg0, sink);
+                            sib_noindex(in_reg0, sink);
+                        } else {
+                            modrm_disp8(in_reg0, out_reg0, sink);
+                        }
+                        let offset: i32 = offset.into();
+                        sink.put1(offset as u8);
+                    "#,
+                ),
+        );
+
+        // XX /r float load with 8-bit offset.
+        recipes.add_template_inferred(
+            EncodingRecipeBuilder::new("fldDisp8", &formats.load, 2)
+                .operands_in(vec![gpr])
+                .operands_out(vec![fpr])
+                .inst_predicate(has_small_offset)
+                .clobbers_flags(false)
+                .compute_size("size_plus_maybe_sib_for_inreg_0")
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
+                        if needs_sib_byte(in_reg0) {
+                            modrm_sib_disp8(out_reg0, sink);
+                            sib_noindex(in_reg0, sink);
+                        } else {
+                            modrm_disp8(in_reg0, out_reg0, sink);
+                        }
+                        let offset: i32 = offset.into();
+                        sink.put1(offset as u8);
+                    "#,
+                ),
+            "size_plus_maybe_sib_for_inreg_0_plus_rex_prefix_for_inreg0_outreg0",
+        );
+
+        let has_big_offset =
+            InstructionPredicate::new_is_signed_int(&*formats.load, "offset", 32, 0);
+
+        // XX /r load with 32-bit offset.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("ldDisp32", &formats.load, 5)
+                .operands_in(vec![gpr])
+                .operands_out(vec![gpr])
+                .inst_predicate(has_big_offset.clone())
+                .clobbers_flags(false)
+                .compute_size("size_plus_maybe_sib_for_inreg_0")
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
+                        if needs_sib_byte(in_reg0) {
+                            modrm_sib_disp32(out_reg0, sink);
+                            sib_noindex(in_reg0, sink);
+                        } else {
+                            modrm_disp32(in_reg0, out_reg0, sink);
+                        }
+                        let offset: i32 = offset.into();
+                        sink.put4(offset as u32);
+                    "#,
+                ),
+        );
+
+        // XX /r float load with 32-bit offset.
+        recipes.add_template_inferred(
+            EncodingRecipeBuilder::new("fldDisp32", &formats.load, 5)
+                .operands_in(vec![gpr])
+                .operands_out(vec![fpr])
+                .inst_predicate(has_big_offset)
+                .clobbers_flags(false)
+                .compute_size("size_plus_maybe_sib_for_inreg_0")
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
+                        if needs_sib_byte(in_reg0) {
+                            modrm_sib_disp32(out_reg0, sink);
+                            sib_noindex(in_reg0, sink);
+                        } else {
+                            modrm_disp32(in_reg0, out_reg0, sink);
+                        }
+                        let offset: i32 = offset.into();
+                        sink.put4(offset as u32);
+                    "#,
+                ),
+            "size_plus_maybe_sib_for_inreg_0_plus_rex_prefix_for_inreg0_outreg0",
+        );
+    }
+
+    {
+        // Complex loads.
+
+        // A predicate asking if the offset is zero.
+        let has_no_offset =
+            InstructionPredicate::new_is_field_equal(&*formats.load_complex, "offset", "0".into());
+
+        // XX /r load with index and no offset.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("ldWithIndex", &formats.load_complex, 2)
+                .operands_in(vec![gpr, gpr])
+                .operands_out(vec![gpr])
+                .inst_predicate(has_no_offset.clone())
+                .clobbers_flags(false)
+                .compute_size("size_plus_maybe_offset_for_inreg_0")
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink);
+                        // The else branch always inserts an SIB byte.
+                        if needs_offset(in_reg0) {
+                            modrm_sib_disp8(out_reg0, sink);
+                            sib(0, in_reg1, in_reg0, sink);
+                            sink.put1(0);
+                        } else {
+                            modrm_sib(out_reg0, sink);
+                            sib(0, in_reg1, in_reg0, sink);
+                        }
+                    "#,
+                ),
+        );
+
+        // XX /r float load with index and no offset.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("fldWithIndex", &formats.load_complex, 2)
+                .operands_in(vec![gpr, gpr])
+                .operands_out(vec![fpr])
+                .inst_predicate(has_no_offset)
+                .clobbers_flags(false)
+                .compute_size("size_plus_maybe_offset_for_inreg_0")
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink);
+                        // The else branch always inserts an SIB byte.
+                        if needs_offset(in_reg0) {
+                            modrm_sib_disp8(out_reg0, sink);
+                            sib(0, in_reg1, in_reg0, sink);
+                            sink.put1(0);
+                        } else {
+                            modrm_sib(out_reg0, sink);
+                            sib(0, in_reg1, in_reg0, sink);
+                        }
+                    "#,
+                ),
+        );
+
+        let has_small_offset =
+            InstructionPredicate::new_is_signed_int(&*formats.load_complex, "offset", 8, 0);
+
+        // XX /r load with index and 8-bit offset.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("ldWithIndexDisp8", &formats.load_complex, 3)
+                .operands_in(vec![gpr, gpr])
+                .operands_out(vec![gpr])
+                .inst_predicate(has_small_offset.clone())
+                .clobbers_flags(false)
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink);
+                        modrm_sib_disp8(out_reg0, sink);
+                        sib(0, in_reg1, in_reg0, sink);
+                        let offset: i32 = offset.into();
+                        sink.put1(offset as u8);
+                    "#,
+                ),
+        );
+
+        // XX /r float load with 8-bit offset.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("fldWithIndexDisp8", &formats.load_complex, 3)
+                .operands_in(vec![gpr, gpr])
+                .operands_out(vec![fpr])
+                .inst_predicate(has_small_offset)
+                .clobbers_flags(false)
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink);
+                        modrm_sib_disp8(out_reg0, sink);
+                        sib(0, in_reg1, in_reg0, sink);
+                        let offset: i32 = offset.into();
+                        sink.put1(offset as u8);
+                    "#,
+                ),
+        );
+
+        let has_big_offset =
+            InstructionPredicate::new_is_signed_int(&*formats.load_complex, "offset", 32, 0);
+
+        // XX /r load with index and 32-bit offset.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("ldWithIndexDisp32", &formats.load_complex, 6)
+                .operands_in(vec![gpr, gpr])
+                .operands_out(vec![gpr])
+                .inst_predicate(has_big_offset.clone())
+                .clobbers_flags(false)
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink);
+                        modrm_sib_disp32(out_reg0, sink);
+                        sib(0, in_reg1, in_reg0, sink);
+                        let offset: i32 = offset.into();
+                        sink.put4(offset as u32);
+                    "#,
+                ),
+        );
+
+        // XX /r float load with index and 32-bit offset.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("fldWithIndexDisp32", &formats.load_complex, 6)
+                .operands_in(vec![gpr, gpr])
+                .operands_out(vec![fpr])
+                .inst_predicate(has_big_offset)
+                .clobbers_flags(false)
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink);
+                        modrm_sib_disp32(out_reg0, sink);
+                        sib(0, in_reg1, in_reg0, sink);
+                        let offset: i32 = offset.into();
+                        sink.put4(offset as u32);
+                    "#,
+                ),
+        );
+    }
+
+    // Unary fill with SIB and 32-bit displacement.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("fillSib32", &formats.unary, 6)
+            .operands_in(vec![stack_gpr32])
+            .operands_out(vec![gpr])
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    let base = stk_base(in_stk0.base);
+                    {{PUT_OP}}(bits, rex2(base, out_reg0), sink);
+                    modrm_sib_disp32(out_reg0, sink);
+                    sib_noindex(base, sink);
+                    sink.put4(in_stk0.offset as u32);
+                "#,
+            ),
+    );
+
+    // Like fillSib32, but targeting an FPR rather than a GPR.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("ffillSib32", &formats.unary, 6)
+            .operands_in(vec![stack_fpr32])
+            .operands_out(vec![fpr])
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    let base = stk_base(in_stk0.base);
+                    {{PUT_OP}}(bits, rex2(base, out_reg0), sink);
+                    modrm_sib_disp32(out_reg0, sink);
+                    sib_noindex(base, sink);
+                    sink.put4(in_stk0.offset as u32);
+                "#,
+            ),
+    );
+
+    // Regfill with RSP-relative 32-bit displacement.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("regfill32", &formats.reg_fill, 6)
+            .operands_in(vec![stack_gpr32])
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    let src = StackRef::sp(src, &func.stack_slots);
+                    let base = stk_base(src.base);
+                    {{PUT_OP}}(bits, rex2(base, dst), sink);
+                    modrm_sib_disp32(dst, sink);
+                    sib_noindex(base, sink);
+                    sink.put4(src.offset as u32);
+                "#,
+            ),
+    );
+
+    // Like regfill32, but targeting an FPR rather than a GPR.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("fregfill32", &formats.reg_fill, 6)
+            .operands_in(vec![stack_fpr32])
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    let src = StackRef::sp(src, &func.stack_slots);
+                    let base = stk_base(src.base);
+                    {{PUT_OP}}(bits, rex2(base, dst), sink);
+                    modrm_sib_disp32(dst, sink);
+                    sib_noindex(base, sink);
+                    sink.put4(src.offset as u32);
+                "#,
+            ),
+    );
+
+    // Call/return.
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("call_id", &formats.call, 4).emit(
+            r#"
+            sink.trap(TrapCode::StackOverflow, func.srclocs[inst]);
+            {{PUT_OP}}(bits, BASE_REX, sink);
+            // The addend adjusts for the difference between the end of the
+            // instruction and the beginning of the immediate field.
+            sink.reloc_external(func.srclocs[inst],
+                                Reloc::X86CallPCRel4,
+                                &func.dfg.ext_funcs[func_ref].name,
+                                -4);
+            sink.put4(0);
+            sink.add_call_site(opcode, func.srclocs[inst]);
+        "#,
+        ),
+    );
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("call_plt_id", &formats.call, 4).emit(
+            r#"
+            sink.trap(TrapCode::StackOverflow, func.srclocs[inst]);
+            {{PUT_OP}}(bits, BASE_REX, sink);
+            sink.reloc_external(func.srclocs[inst],
+                                Reloc::X86CallPLTRel4,
+                                &func.dfg.ext_funcs[func_ref].name,
+                                -4);
+            sink.put4(0);
+            sink.add_call_site(opcode, func.srclocs[inst]);
+        "#,
+        ),
+    );
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("call_r", &formats.call_indirect, 1)
+            .operands_in(vec![gpr])
+            .emit(
+                r#"
+                    sink.trap(TrapCode::StackOverflow, func.srclocs[inst]);
+                    {{PUT_OP}}(bits, rex1(in_reg0), sink);
+                    modrm_r_bits(in_reg0, bits, sink);
+                    sink.add_call_site(opcode, func.srclocs[inst]);
+                "#,
+            ),
+    );
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("ret", &formats.multiary, 0)
+            .emit("{{PUT_OP}}(bits, BASE_REX, sink);"),
+    );
+
+    // Branches.
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("jmpb", &formats.jump, 1)
+            .branch_range((1, 8))
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, BASE_REX, sink);
+                    disp1(destination, func, sink);
+                "#,
+            ),
+    );
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("jmpd", &formats.jump, 4)
+            .branch_range((4, 32))
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, BASE_REX, sink);
+                    disp4(destination, func, sink);
+                "#,
+            ),
+    );
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("brib", &formats.branch_int, 1)
+            .operands_in(vec![reg_rflags])
+            .branch_range((1, 8))
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits | icc2opc(cond), BASE_REX, sink);
+                    disp1(destination, func, sink);
+                "#,
+            ),
+    );
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("brid", &formats.branch_int, 4)
+            .operands_in(vec![reg_rflags])
+            .branch_range((4, 32))
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits | icc2opc(cond), BASE_REX, sink);
+                    disp4(destination, func, sink);
+                "#,
+            ),
+    );
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("brfb", &formats.branch_float, 1)
+            .operands_in(vec![reg_rflags])
+            .branch_range((1, 8))
+            .clobbers_flags(false)
+            .inst_predicate(supported_floatccs_predicate(
+                &supported_floatccs,
+                &*formats.branch_float,
+            ))
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits | fcc2opc(cond), BASE_REX, sink);
+                    disp1(destination, func, sink);
+                "#,
+            ),
+    );
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("brfd", &formats.branch_float, 4)
+            .operands_in(vec![reg_rflags])
+            .branch_range((4, 32))
+            .clobbers_flags(false)
+            .inst_predicate(supported_floatccs_predicate(
+                &supported_floatccs,
+                &*formats.branch_float,
+            ))
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits | fcc2opc(cond), BASE_REX, sink);
+                    disp4(destination, func, sink);
+                "#,
+            ),
+    );
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("indirect_jmp", &formats.indirect_jump, 1)
+            .operands_in(vec![gpr])
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex1(in_reg0), sink);
+                    modrm_r_bits(in_reg0, bits, sink);
+                "#,
+            ),
+    );
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("jt_entry", &formats.branch_table_entry, 2)
+            .operands_in(vec![gpr, gpr])
+            .operands_out(vec![gpr])
+            .clobbers_flags(false)
+            .inst_predicate(valid_scale(&*formats.branch_table_entry))
+            .compute_size("size_plus_maybe_offset_for_inreg_1")
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex3(in_reg1, out_reg0, in_reg0), sink);
+                    if needs_offset(in_reg1) {
+                        modrm_sib_disp8(out_reg0, sink);
+                        sib(imm.trailing_zeros() as u8, in_reg0, in_reg1, sink);
+                        sink.put1(0);
+                    } else {
+                        modrm_sib(out_reg0, sink);
+                        sib(imm.trailing_zeros() as u8, in_reg0, in_reg1, sink);
+                    }
+                "#,
+            ),
+    );
+
+    recipes.add_template_inferred(
+        EncodingRecipeBuilder::new("vconst", &formats.unary_const, 5)
+            .operands_out(vec![fpr])
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(0, out_reg0), sink);
+                    modrm_riprel(out_reg0, sink);
+                    const_disp4(constant_handle, func, sink);
+                "#,
+            ),
+        "size_with_inferred_rex_for_outreg0",
+    );
+
+    recipes.add_template_inferred(
+        EncodingRecipeBuilder::new("vconst_optimized", &formats.unary_const, 1)
+            .operands_out(vec![fpr])
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(out_reg0, out_reg0), sink);
+                    modrm_rr(out_reg0, out_reg0, sink);
+                "#,
+            ),
+        "size_with_inferred_rex_for_outreg0",
+    );
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("jt_base", &formats.branch_table_base, 5)
+            .operands_out(vec![gpr])
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(0, out_reg0), sink);
+                    modrm_riprel(out_reg0, sink);
+
+                    // No reloc is needed here as the jump table is emitted directly after
+                    // the function body.
+                    jt_disp4(table, func, sink);
+                "#,
+            ),
+    );
+
+    // Test flags and set a register.
+    //
+    // These setCC instructions only set the low 8 bits, and they can only write ABCD registers
+    // without a REX prefix.
+    //
+    // Other instruction encodings accepting `b1` inputs have the same constraints and only look at
+    // the low 8 bits of the input register.
+
+    let seti = recipes.add_template(
+        Template::new(
+            EncodingRecipeBuilder::new("seti", &formats.int_cond, 1)
+                .operands_in(vec![reg_rflags])
+                .operands_out(vec![gpr])
+                .clobbers_flags(false)
+                .emit(
+                    r#"
+                    {{PUT_OP}}(bits | icc2opc(cond), rex1(out_reg0), sink);
+                    modrm_r_bits(out_reg0, bits, sink);
+                "#,
+                ),
+            regs,
+        )
+        .rex_kind(RecipePrefixKind::AlwaysEmitRex),
+    );
+
+    recipes.add_template(
+        Template::new(
+            EncodingRecipeBuilder::new("seti_abcd", &formats.int_cond, 1)
+                .operands_in(vec![reg_rflags])
+                .operands_out(vec![abcd])
+                .clobbers_flags(false)
+                .emit(
+                    r#"
+                    {{PUT_OP}}(bits | icc2opc(cond), rex1(out_reg0), sink);
+                    modrm_r_bits(out_reg0, bits, sink);
+                "#,
+                ),
+            regs,
+        )
+        .when_prefixed(seti),
+    );
+
+    let setf = recipes.add_template(
+        Template::new(
+            EncodingRecipeBuilder::new("setf", &formats.float_cond, 1)
+                .operands_in(vec![reg_rflags])
+                .operands_out(vec![gpr])
+                .clobbers_flags(false)
+                .emit(
+                    r#"
+                    {{PUT_OP}}(bits | fcc2opc(cond), rex1(out_reg0), sink);
+                    modrm_r_bits(out_reg0, bits, sink);
+                "#,
+                ),
+            regs,
+        )
+        .rex_kind(RecipePrefixKind::AlwaysEmitRex),
+    );
+
+    recipes.add_template(
+        Template::new(
+            EncodingRecipeBuilder::new("setf_abcd", &formats.float_cond, 1)
+                .operands_in(vec![reg_rflags])
+                .operands_out(vec![abcd])
+                .clobbers_flags(false)
+                .emit(
+                    r#"
+                    {{PUT_OP}}(bits | fcc2opc(cond), rex1(out_reg0), sink);
+                    modrm_r_bits(out_reg0, bits, sink);
+                "#,
+                ),
+            regs,
+        )
+        .when_prefixed(setf),
+    );
+
+    // Conditional move (a.k.a integer select)
+    // (maybe-REX.W) 0F 4x modrm(r,r)
+    // 1 byte, modrm(r,r), is after the opcode
+    recipes.add_template(
+        Template::new(
+            EncodingRecipeBuilder::new("cmov", &formats.int_select, 1)
+                .operands_in(vec![
+                    OperandConstraint::FixedReg(reg_rflags),
+                    OperandConstraint::RegClass(gpr),
+                    OperandConstraint::RegClass(gpr),
+                ])
+                .operands_out(vec![2])
+                .clobbers_flags(false)
+                .emit(
+                    r#"
+                        {{PUT_OP}}(bits | icc2opc(cond), rex2(in_reg1, in_reg2), sink);
+                        modrm_rr(in_reg1, in_reg2, sink);
+                    "#,
+                ),
+            regs,
+        )
+        .inferred_rex_compute_size("size_with_inferred_rex_for_cmov"),
+    );
+
+    // Bit scan forwards and reverse
+    recipes.add_template(
+        Template::new(
+            EncodingRecipeBuilder::new("bsf_and_bsr", &formats.unary, 1)
+                .operands_in(vec![gpr])
+                .operands_out(vec![
+                    OperandConstraint::RegClass(gpr),
+                    OperandConstraint::FixedReg(reg_rflags),
+                ])
+                .emit(
+                    r#"
+                        {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
+                        modrm_rr(in_reg0, out_reg0, sink);
+                    "#,
+                ),
+            regs,
+        )
+        .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_outreg0"),
+    );
+
+    // Arithematic with flag I/O.
+
+    // XX /r, MR form. Add two GPR registers and set carry flag.
+    recipes.add_template(
+        Template::new(
+            EncodingRecipeBuilder::new("rout", &formats.binary, 1)
+                .operands_in(vec![gpr, gpr])
+                .operands_out(vec![
+                    OperandConstraint::TiedInput(0),
+                    OperandConstraint::FixedReg(reg_rflags),
+                ])
+                .clobbers_flags(true)
+                .emit(
+                    r#"
+                        {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink);
+                        modrm_rr(in_reg0, in_reg1, sink);
+                    "#,
+                ),
+            regs,
+        )
+        .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_inreg1"),
+    );
+
+    // XX /r, MR form. Add two GPR registers and get carry flag.
+    recipes.add_template(
+        Template::new(
+            EncodingRecipeBuilder::new("rin", &formats.ternary, 1)
+                .operands_in(vec![
+                    OperandConstraint::RegClass(gpr),
+                    OperandConstraint::RegClass(gpr),
+                    OperandConstraint::FixedReg(reg_rflags),
+                ])
+                .operands_out(vec![0])
+                .clobbers_flags(true)
+                .emit(
+                    r#"
+                        {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink);
+                        modrm_rr(in_reg0, in_reg1, sink);
+                    "#,
+                ),
+            regs,
+        )
+        .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_inreg1"),
+    );
+
+    // XX /r, MR form. Add two GPR registers with carry flag.
+    recipes.add_template(
+        Template::new(
+            EncodingRecipeBuilder::new("rio", &formats.ternary, 1)
+                .operands_in(vec![
+                    OperandConstraint::RegClass(gpr),
+                    OperandConstraint::RegClass(gpr),
+                    OperandConstraint::FixedReg(reg_rflags),
+                ])
+                .operands_out(vec![
+                    OperandConstraint::TiedInput(0),
+                    OperandConstraint::FixedReg(reg_rflags),
+                ])
+                .clobbers_flags(true)
+                .emit(
+                    r#"
+                        {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink);
+                        modrm_rr(in_reg0, in_reg1, sink);
+                    "#,
+                ),
+            regs,
+        )
+        .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_inreg1"),
+    );
+
+    // Compare and set flags.
+
+    // XX /r, MR form. Compare two GPR registers and set flags.
+    recipes.add_template(
+        Template::new(
+            EncodingRecipeBuilder::new("rcmp", &formats.binary, 1)
+                .operands_in(vec![gpr, gpr])
+                .operands_out(vec![reg_rflags])
+                .emit(
+                    r#"
+                        {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink);
+                        modrm_rr(in_reg0, in_reg1, sink);
+                    "#,
+                ),
+            regs,
+        )
+        .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_inreg1"),
+    );
+
+    // Same as rcmp, but second operand is the stack pointer.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("rcmp_sp", &formats.unary, 1)
+            .operands_in(vec![gpr])
+            .operands_out(vec![reg_rflags])
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(in_reg0, RU::rsp.into()), sink);
+                    modrm_rr(in_reg0, RU::rsp.into(), sink);
+                "#,
+            ),
+    );
+
+    // XX /r, RM form. Compare two FPR registers and set flags.
+    recipes.add_template_inferred(
+        EncodingRecipeBuilder::new("fcmp", &formats.binary, 1)
+            .operands_in(vec![fpr, fpr])
+            .operands_out(vec![reg_rflags])
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
+                    modrm_rr(in_reg1, in_reg0, sink);
+                "#,
+            ),
+        "size_with_inferred_rex_for_inreg0_inreg1",
+    );
+
+    {
+        let has_small_offset =
+            InstructionPredicate::new_is_signed_int(&*formats.binary_imm64, "imm", 8, 0);
+
+        // XX /n, MI form with imm8.
+        recipes.add_template(
+            Template::new(
+                EncodingRecipeBuilder::new("rcmp_ib", &formats.binary_imm64, 2)
+                    .operands_in(vec![gpr])
+                    .operands_out(vec![reg_rflags])
+                    .inst_predicate(has_small_offset)
+                    .emit(
+                        r#"
+                            {{PUT_OP}}(bits, rex1(in_reg0), sink);
+                            modrm_r_bits(in_reg0, bits, sink);
+                            let imm: i64 = imm.into();
+                            sink.put1(imm as u8);
+                        "#,
+                    ),
+                regs,
+            )
+            .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"),
+        );
+
+        let has_big_offset =
+            InstructionPredicate::new_is_signed_int(&*formats.binary_imm64, "imm", 32, 0);
+
+        // XX /n, MI form with imm32.
+        recipes.add_template(
+            Template::new(
+                EncodingRecipeBuilder::new("rcmp_id", &formats.binary_imm64, 5)
+                    .operands_in(vec![gpr])
+                    .operands_out(vec![reg_rflags])
+                    .inst_predicate(has_big_offset)
+                    .emit(
+                        r#"
+                            {{PUT_OP}}(bits, rex1(in_reg0), sink);
+                            modrm_r_bits(in_reg0, bits, sink);
+                            let imm: i64 = imm.into();
+                            sink.put4(imm as u32);
+                        "#,
+                    ),
+                regs,
+            )
+            .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"),
+        );
+    }
+
+    // Test-and-branch.
+    //
+    // This recipe represents the macro fusion of a test and a conditional branch.
+    // This serves two purposes:
+    //
+    // 1. Guarantee that the test and branch get scheduled next to each other so
+    //    macro fusion is guaranteed to be possible.
+    // 2. Hide the status flags from Cranelift which doesn't currently model flags.
+    //
+    // The encoding bits affect both the test and the branch instruction:
+    //
+    // Bits 0-7 are the Jcc opcode.
+    // Bits 8-15 control the test instruction which always has opcode byte 0x85.
+
+    recipes.add_template(
+        Template::new(
+            EncodingRecipeBuilder::new("tjccb", &formats.branch, 1 + 2)
+                .operands_in(vec![gpr])
+                .branch_range((3, 8))
+                .emit(
+                    r#"
+                        // test r, r.
+                        {{PUT_OP}}((bits & 0xff00) | 0x85, rex2(in_reg0, in_reg0), sink);
+                        modrm_rr(in_reg0, in_reg0, sink);
+                        // Jcc instruction.
+                        sink.put1(bits as u8);
+                        disp1(destination, func, sink);
+                    "#,
+                ),
+            regs,
+        )
+        .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"),
+    );
+
+    recipes.add_template(
+        Template::new(
+            EncodingRecipeBuilder::new("tjccd", &formats.branch, 1 + 6)
+                .operands_in(vec![gpr])
+                .branch_range((7, 32))
+                .emit(
+                    r#"
+                        // test r, r.
+                        {{PUT_OP}}((bits & 0xff00) | 0x85, rex2(in_reg0, in_reg0), sink);
+                        modrm_rr(in_reg0, in_reg0, sink);
+                        // Jcc instruction.
+                        sink.put1(0x0f);
+                        sink.put1(bits as u8);
+                        disp4(destination, func, sink);
+                    "#,
+                ),
+            regs,
+        )
+        .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"),
+    );
+
+    // 8-bit test-and-branch.
+
+    let t8jccb = recipes.add_template(
+        Template::new(
+            EncodingRecipeBuilder::new("t8jccb", &formats.branch, 1 + 2)
+                .operands_in(vec![gpr])
+                .branch_range((3, 8))
+                .emit(
+                    r#"
+                    // test8 r, r.
+                    {{PUT_OP}}((bits & 0xff00) | 0x84, rex2(in_reg0, in_reg0), sink);
+                    modrm_rr(in_reg0, in_reg0, sink);
+                    // Jcc instruction.
+                    sink.put1(bits as u8);
+                    disp1(destination, func, sink);
+                "#,
+                ),
+            regs,
+        )
+        .rex_kind(RecipePrefixKind::AlwaysEmitRex),
+    );
+
+    recipes.add_template(
+        Template::new(
+            EncodingRecipeBuilder::new("t8jccb_abcd", &formats.branch, 1 + 2)
+                .operands_in(vec![abcd])
+                .branch_range((3, 8))
+                .emit(
+                    r#"
+                    // test8 r, r.
+                    {{PUT_OP}}((bits & 0xff00) | 0x84, rex2(in_reg0, in_reg0), sink);
+                    modrm_rr(in_reg0, in_reg0, sink);
+                    // Jcc instruction.
+                    sink.put1(bits as u8);
+                    disp1(destination, func, sink);
+                "#,
+                ),
+            regs,
+        )
+        .when_prefixed(t8jccb),
+    );
+
+    let t8jccd = recipes.add_template(
+        Template::new(
+            EncodingRecipeBuilder::new("t8jccd", &formats.branch, 1 + 6)
+                .operands_in(vec![gpr])
+                .branch_range((7, 32))
+                .emit(
+                    r#"
+                    // test8 r, r.
+                    {{PUT_OP}}((bits & 0xff00) | 0x84, rex2(in_reg0, in_reg0), sink);
+                    modrm_rr(in_reg0, in_reg0, sink);
+                    // Jcc instruction.
+                    sink.put1(0x0f);
+                    sink.put1(bits as u8);
+                    disp4(destination, func, sink);
+                "#,
+                ),
+            regs,
+        )
+        .rex_kind(RecipePrefixKind::AlwaysEmitRex),
+    );
+
+    recipes.add_template(
+        Template::new(
+            EncodingRecipeBuilder::new("t8jccd_abcd", &formats.branch, 1 + 6)
+                .operands_in(vec![abcd])
+                .branch_range((7, 32))
+                .emit(
+                    r#"
+                    // test8 r, r.
+                    {{PUT_OP}}((bits & 0xff00) | 0x84, rex2(in_reg0, in_reg0), sink);
+                    modrm_rr(in_reg0, in_reg0, sink);
+                    // Jcc instruction.
+                    sink.put1(0x0f);
+                    sink.put1(bits as u8);
+                    disp4(destination, func, sink);
+                "#,
+                ),
+            regs,
+        )
+        .when_prefixed(t8jccd),
+    );
+
+    // Worst case test-and-branch recipe for brz.b1 and brnz.b1 in 32-bit mode.
+    // The register allocator can't handle a branch instruction with constrained
+    // operands like the t8jccd_abcd above. This variant can accept the b1 opernd in
+    // any register, but is is larger because it uses a 32-bit test instruction with
+    // a 0xff immediate.
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("t8jccd_long", &formats.branch, 5 + 6)
+            .operands_in(vec![gpr])
+            .branch_range((11, 32))
+            .emit(
+                r#"
+                    // test32 r, 0xff.
+                    {{PUT_OP}}((bits & 0xff00) | 0xf7, rex1(in_reg0), sink);
+                    modrm_r_bits(in_reg0, bits, sink);
+                    sink.put4(0xff);
+                    // Jcc instruction.
+                    sink.put1(0x0f);
+                    sink.put1(bits as u8);
+                    disp4(destination, func, sink);
+                "#,
+            ),
+    );
+
+    // Comparison that produces a `b1` result in a GPR.
+    //
+    // This is a macro of a `cmp` instruction followed by a `setCC` instruction.
+    //
+    // TODO This is not a great solution because:
+    //
+    // - The cmp+setcc combination is not recognized by CPU's macro fusion.
+    // - The 64-bit encoding has issues with REX prefixes. The `cmp` and `setCC`
+    //   instructions may need a REX independently.
+    // - Modeling CPU flags in the type system would be better.
+    //
+    // Since the `setCC` instructions only write an 8-bit register, we use that as
+    // our `b1` representation: A `b1` value is represented as a GPR where the low 8
+    // bits are known to be 0 or 1. The high bits are undefined.
+    //
+    // This bandaid macro doesn't support a REX prefix for the final `setCC`
+    // instruction, so it is limited to the `ABCD` register class for booleans.
+    // The omission of a `when_prefixed` alternative is deliberate here.
+
+    recipes.add_template(
+        Template::new(
+            EncodingRecipeBuilder::new("icscc", &formats.int_compare, 1 + 3)
+                .operands_in(vec![gpr, gpr])
+                .operands_out(vec![abcd])
+                .emit(
+                    r#"
+                        // Comparison instruction.
+                        {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink);
+                        modrm_rr(in_reg0, in_reg1, sink);
+                        // `setCC` instruction, no REX.
+                        let setcc = 0x90 | icc2opc(cond);
+                        sink.put1(0x0f);
+                        sink.put1(setcc as u8);
+                        modrm_rr(out_reg0, 0, sink);
+                    "#,
+                ),
+            regs,
+        )
+        .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_inreg1"),
+    );
+
+    recipes.add_template_inferred(
+        EncodingRecipeBuilder::new("icscc_fpr", &formats.int_compare, 1)
+            .operands_in(vec![fpr, fpr])
+            .operands_out(vec![0])
+            .emit(
+                r#"
+                    // Comparison instruction.
+                    {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
+                    modrm_rr(in_reg1, in_reg0, sink);
+                "#,
+            ),
+        "size_with_inferred_rex_for_inreg0_inreg1",
+    );
+
+    {
+        let is_small_imm =
+            InstructionPredicate::new_is_signed_int(&*formats.int_compare_imm, "imm", 8, 0);
+
+        recipes.add_template(
+            Template::new(
+                EncodingRecipeBuilder::new("icscc_ib", &formats.int_compare_imm, 2 + 3)
+                    .operands_in(vec![gpr])
+                    .operands_out(vec![abcd])
+                    .inst_predicate(is_small_imm)
+                    .emit(
+                        r#"
+                            // Comparison instruction.
+                            {{PUT_OP}}(bits, rex1(in_reg0), sink);
+                            modrm_r_bits(in_reg0, bits, sink);
+                            let imm: i64 = imm.into();
+                            sink.put1(imm as u8);
+                            // `setCC` instruction, no REX.
+                            let setcc = 0x90 | icc2opc(cond);
+                            sink.put1(0x0f);
+                            sink.put1(setcc as u8);
+                            modrm_rr(out_reg0, 0, sink);
+                        "#,
+                    ),
+                regs,
+            )
+            .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"),
+        );
+
+        let is_big_imm =
+            InstructionPredicate::new_is_signed_int(&*formats.int_compare_imm, "imm", 32, 0);
+
+        recipes.add_template(
+            Template::new(
+                EncodingRecipeBuilder::new("icscc_id", &formats.int_compare_imm, 5 + 3)
+                    .operands_in(vec![gpr])
+                    .operands_out(vec![abcd])
+                    .inst_predicate(is_big_imm)
+                    .emit(
+                        r#"
+                            // Comparison instruction.
+                            {{PUT_OP}}(bits, rex1(in_reg0), sink);
+                            modrm_r_bits(in_reg0, bits, sink);
+                            let imm: i64 = imm.into();
+                            sink.put4(imm as u32);
+                            // `setCC` instruction, no REX.
+                            let setcc = 0x90 | icc2opc(cond);
+                            sink.put1(0x0f);
+                            sink.put1(setcc as u8);
+                            modrm_rr(out_reg0, 0, sink);
+                        "#,
+                    ),
+                regs,
+            )
+            .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"),
+        );
+    }
+
+    // Make a FloatCompare instruction predicate with the supported condition codes.
+    //
+    // Same thing for floating point.
+    //
+    // The ucomiss/ucomisd instructions set the FLAGS bits CF/PF/CF like this:
+    //
+    //    ZPC OSA
+    // UN 111 000
+    // GT 000 000
+    // LT 001 000
+    // EQ 100 000
+    //
+    // Not all floating point condition codes are supported.
+    // The omission of a `when_prefixed` alternative is deliberate here.
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("fcscc", &formats.float_compare, 1 + 3)
+            .operands_in(vec![fpr, fpr])
+            .operands_out(vec![abcd])
+            .inst_predicate(supported_floatccs_predicate(
+                &supported_floatccs,
+                &*formats.float_compare,
+            ))
+            .emit(
+                r#"
+                    // Comparison instruction.
+                    {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
+                    modrm_rr(in_reg1, in_reg0, sink);
+                    // `setCC` instruction, no REX.
+                    use crate::ir::condcodes::FloatCC::*;
+                    let setcc = match cond {
+                        Ordered                    => 0x9b, // EQ|LT|GT => setnp (P=0)
+                        Unordered                  => 0x9a, // UN       => setp  (P=1)
+                        OrderedNotEqual            => 0x95, // LT|GT    => setne (Z=0),
+                        UnorderedOrEqual           => 0x94, // UN|EQ    => sete  (Z=1)
+                        GreaterThan                => 0x97, // GT       => seta  (C=0&Z=0)
+                        GreaterThanOrEqual         => 0x93, // GT|EQ    => setae (C=0)
+                        UnorderedOrLessThan        => 0x92, // UN|LT    => setb  (C=1)
+                        UnorderedOrLessThanOrEqual => 0x96, // UN|LT|EQ => setbe (Z=1|C=1)
+                        Equal |                       // EQ
+                        NotEqual |                    // UN|LT|GT
+                        LessThan |                    // LT
+                        LessThanOrEqual |             // LT|EQ
+                        UnorderedOrGreaterThan |      // UN|GT
+                        UnorderedOrGreaterThanOrEqual // UN|GT|EQ
+                        => panic!("{} not supported by fcscc", cond),
+                    };
+                    sink.put1(0x0f);
+                    sink.put1(setcc);
+                    modrm_rr(out_reg0, 0, sink);
+                "#,
+            ),
+    );
+
+    {
+        let supported_floatccs: Vec<Literal> = ["eq", "lt", "le", "uno", "ne", "uge", "ugt", "ord"]
+            .iter()
+            .map(|name| Literal::enumerator_for(floatcc, name))
+            .collect();
+        recipes.add_template_inferred(
+            EncodingRecipeBuilder::new("pfcmp", &formats.float_compare, 2)
+                .operands_in(vec![fpr, fpr])
+                .operands_out(vec![0])
+                .inst_predicate(supported_floatccs_predicate(
+                    &supported_floatccs[..],
+                    &*formats.float_compare,
+                ))
+                .emit(
+                    r#"
+                    // Comparison instruction.
+                    {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
+                    modrm_rr(in_reg1, in_reg0, sink);
+                    // Add immediate byte indicating what type of comparison.
+                    use crate::ir::condcodes::FloatCC::*;
+                    let imm = match cond {
+                        Equal                      => 0x00,
+                        LessThan                   => 0x01,
+                        LessThanOrEqual            => 0x02,
+                        Unordered                  => 0x03,
+                        NotEqual                   => 0x04,
+                        UnorderedOrGreaterThanOrEqual => 0x05,
+                        UnorderedOrGreaterThan => 0x06,
+                        Ordered                    => 0x07,
+                        _ => panic!("{} not supported by pfcmp", cond),
+                    };
+                    sink.put1(imm);
+                "#,
+                ),
+            "size_with_inferred_rex_for_inreg0_inreg1",
+        );
+    }
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("is_zero", &formats.unary, 2 + 2)
+            .operands_in(vec![gpr])
+            .operands_out(vec![abcd])
+            .emit(
+                r#"
+                    // Test instruction.
+                    {{PUT_OP}}(bits, rex2(in_reg0, in_reg0), sink);
+                    modrm_rr(in_reg0, in_reg0, sink);
+                    // Check ZF = 1 flag to see if register holds 0.
+                    sink.put1(0x0f);
+                    sink.put1(0x94);
+                    modrm_rr(out_reg0, 0, sink);
+                "#,
+            ),
+    );
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("is_invalid", &formats.unary, 2 + 3)
+            .operands_in(vec![gpr])
+            .operands_out(vec![abcd])
+            .emit(
+                r#"
+                    // Comparison instruction.
+                    {{PUT_OP}}(bits, rex1(in_reg0), sink);
+                    modrm_r_bits(in_reg0, bits, sink);
+                    sink.put1(0xff);
+                    // `setCC` instruction, no REX.
+                    use crate::ir::condcodes::IntCC::*;
+                    let setcc = 0x90 | icc2opc(Equal);
+                    sink.put1(0x0f);
+                    sink.put1(setcc as u8);
+                    modrm_rr(out_reg0, 0, sink);
+                "#,
+            ),
+    );
+
+    recipes.add_recipe(
+        EncodingRecipeBuilder::new("safepoint", &formats.multiary, 0).emit(
+            r#"
+                sink.add_stack_map(args, func, isa);
+            "#,
+        ),
+    );
+
+    // Both `elf_tls_get_addr` and `macho_tls_get_addr` require all caller-saved registers to be spilled.
+    // This is currently special cased in `regalloc/spilling.rs` in the `visit_inst` function.
+
+    recipes.add_recipe(
+        EncodingRecipeBuilder::new("elf_tls_get_addr", &formats.unary_global_value, 16)
+            // FIXME Correct encoding for non rax registers
+            .operands_out(vec![reg_rax])
+            .emit(
+                r#"
+                    // output %rax
+                    // clobbers %rdi
+
+                    // Those data16 prefixes are necessary to pad to 16 bytes.
+
+                    // data16 lea gv@tlsgd(%rip),%rdi
+                    sink.put1(0x66); // data16
+                    sink.put1(0b01001000); // rex.w
+                    const LEA: u8 = 0x8d;
+                    sink.put1(LEA); // lea
+                    modrm_riprel(0b111/*out_reg0*/, sink); // 0x3d
+                    sink.reloc_external(func.srclocs[inst],
+                                        Reloc::ElfX86_64TlsGd,
+                                        &func.global_values[global_value].symbol_name(),
+                                        -4);
+                    sink.put4(0);
+
+                    // data16 data16 callq __tls_get_addr-4
+                    sink.put1(0x66); // data16
+                    sink.put1(0x66); // data16
+                    sink.put1(0b01001000); // rex.w
+                    sink.put1(0xe8); // call
+                    sink.reloc_external(func.srclocs[inst],
+                                        Reloc::X86CallPLTRel4,
+                                        &ExternalName::LibCall(LibCall::ElfTlsGetAddr),
+                                        -4);
+                    sink.put4(0);
+                "#,
+            ),
+    );
+
+    recipes.add_recipe(
+        EncodingRecipeBuilder::new("macho_tls_get_addr", &formats.unary_global_value, 9)
+            // FIXME Correct encoding for non rax registers
+            .operands_out(vec![reg_rax])
+            .emit(
+                r#"
+                    // output %rax
+                    // clobbers %rdi
+
+                    // movq gv@tlv(%rip), %rdi
+                    sink.put1(0x48); // rex
+                    sink.put1(0x8b); // mov
+                    modrm_riprel(0b111/*out_reg0*/, sink); // 0x3d
+                    sink.reloc_external(func.srclocs[inst],
+                                        Reloc::MachOX86_64Tlv,
+                                        &func.global_values[global_value].symbol_name(),
+                                        -4);
+                    sink.put4(0);
+
+                    // callq *(%rdi)
+                    sink.put1(0xff);
+                    sink.put1(0x17);
+                "#,
+            ),
+    );
+
+    recipes.add_template(
+        Template::new(
+        EncodingRecipeBuilder::new("evex_reg_vvvv_rm_128", &formats.binary, 1)
+            .operands_in(vec![fpr, fpr])
+            .operands_out(vec![fpr])
+            .emit(
+                r#"
+                // instruction encoding operands: reg (op1, w), vvvv (op2, r), rm (op3, r)
+                // this maps to:                  out_reg0,     in_reg0,       in_reg1
+                let context = EvexContext::Other { length: EvexVectorLength::V128 };
+                let masking = EvexMasking::None;
+                put_evex(bits, out_reg0, in_reg0, in_reg1, context, masking, sink); // params: reg, vvvv, rm
+                modrm_rr(in_reg1, out_reg0, sink); // params: rm, reg
+                "#,
+            ),
+        regs).rex_kind(RecipePrefixKind::Evex)
+    );
+
+    recipes.add_template(
+        Template::new(
+            EncodingRecipeBuilder::new("evex_reg_rm_128", &formats.unary, 1)
+                .operands_in(vec![fpr])
+                .operands_out(vec![fpr])
+                .emit(
+                    r#"
+                // instruction encoding operands: reg (op1, w), rm (op2, r)
+                // this maps to:                  out_reg0,     in_reg0
+                let context = EvexContext::Other { length: EvexVectorLength::V128 };
+                let masking = EvexMasking::None;
+                put_evex(bits, out_reg0, 0, in_reg0, context, masking, sink); // params: reg, vvvv, rm
+                modrm_rr(in_reg0, out_reg0, sink); // params: rm, reg
+                "#,
+                ),
+            regs).rex_kind(RecipePrefixKind::Evex)
+    );
+
+    recipes
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/isa/x86/registers.rs b/third_party/rust/cranelift-codegen-meta/src/isa/x86/registers.rs
new file mode 100644
index 0000000000..85a8965f89
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/isa/x86/registers.rs
@@ -0,0 +1,43 @@
+use crate::cdsl::regs::{IsaRegs, IsaRegsBuilder, RegBankBuilder, RegClassBuilder};
+
+pub(crate) fn define() -> IsaRegs {
+    let mut regs = IsaRegsBuilder::new();
+
+    let builder = RegBankBuilder::new("FloatRegs", "xmm")
+        .units(16)
+        .track_pressure(true);
+    let float_regs = regs.add_bank(builder);
+
+    let builder = RegBankBuilder::new("IntRegs", "r")
+        .units(16)
+        .names(vec!["rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi"])
+        .track_pressure(true)
+        .pinned_reg(15);
+    let int_regs = regs.add_bank(builder);
+
+    let builder = RegBankBuilder::new("FlagRegs", "")
+        .units(1)
+        .names(vec!["rflags"])
+        .track_pressure(false);
+    let flag_reg = regs.add_bank(builder);
+
+    let builder = RegClassBuilder::new_toplevel("GPR", int_regs);
+    let gpr = regs.add_class(builder);
+
+    let builder = RegClassBuilder::new_toplevel("FPR", float_regs);
+    let fpr = regs.add_class(builder);
+
+    let builder = RegClassBuilder::new_toplevel("FLAG", flag_reg);
+    regs.add_class(builder);
+
+    let builder = RegClassBuilder::subclass_of("GPR8", gpr, 0, 8);
+    let gpr8 = regs.add_class(builder);
+
+    let builder = RegClassBuilder::subclass_of("ABCD", gpr8, 0, 4);
+    regs.add_class(builder);
+
+    let builder = RegClassBuilder::subclass_of("FPR8", fpr, 0, 8);
+    regs.add_class(builder);
+
+    regs.build()
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/isa/x86/settings.rs b/third_party/rust/cranelift-codegen-meta/src/isa/x86/settings.rs
new file mode 100644
index 0000000000..dddd69abb3
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/isa/x86/settings.rs
@@ -0,0 +1,135 @@
+use crate::cdsl::settings::{PredicateNode, SettingGroup, SettingGroupBuilder};
+
+pub(crate) fn define(shared: &SettingGroup) -> SettingGroup {
+    let mut settings = SettingGroupBuilder::new("x86");
+
+    // CPUID.01H:ECX
+    let has_sse3 = settings.add_bool("has_sse3", "SSE3: CPUID.01H:ECX.SSE3[bit 0]", false);
+    let has_ssse3 = settings.add_bool("has_ssse3", "SSSE3: CPUID.01H:ECX.SSSE3[bit 9]", false);
+    let has_sse41 = settings.add_bool("has_sse41", "SSE4.1: CPUID.01H:ECX.SSE4_1[bit 19]", false);
+    let has_sse42 = settings.add_bool("has_sse42", "SSE4.2: CPUID.01H:ECX.SSE4_2[bit 20]", false);
+    let has_avx = settings.add_bool("has_avx", "AVX: CPUID.01H:ECX.AVX[bit 28]", false);
+    let has_avx2 = settings.add_bool("has_avx2", "AVX2: CPUID.07H:EBX.AVX2[bit 5]", false);
+    let has_avx512dq = settings.add_bool(
+        "has_avx512dq",
+        "AVX512DQ: CPUID.07H:EBX.AVX512DQ[bit 17]",
+        false,
+    );
+    let has_avx512vl = settings.add_bool(
+        "has_avx512vl",
+        "AVX512VL: CPUID.07H:EBX.AVX512VL[bit 31]",
+        false,
+    );
+    let has_avx512f = settings.add_bool(
+        "has_avx512f",
+        "AVX512F: CPUID.07H:EBX.AVX512F[bit 16]",
+        false,
+    );
+    let has_popcnt = settings.add_bool("has_popcnt", "POPCNT: CPUID.01H:ECX.POPCNT[bit 23]", false);
+
+    // CPUID.(EAX=07H, ECX=0H):EBX
+    let has_bmi1 = settings.add_bool(
+        "has_bmi1",
+        "BMI1: CPUID.(EAX=07H, ECX=0H):EBX.BMI1[bit 3]",
+        false,
+    );
+    let has_bmi2 = settings.add_bool(
+        "has_bmi2",
+        "BMI2: CPUID.(EAX=07H, ECX=0H):EBX.BMI2[bit 8]",
+        false,
+    );
+
+    // CPUID.EAX=80000001H:ECX
+    let has_lzcnt = settings.add_bool(
+        "has_lzcnt",
+        "LZCNT: CPUID.EAX=80000001H:ECX.LZCNT[bit 5]",
+        false,
+    );
+
+    let shared_enable_simd = shared.get_bool("enable_simd");
+
+    settings.add_predicate("use_ssse3", predicate!(has_ssse3));
+    settings.add_predicate("use_sse41", predicate!(has_sse41));
+    settings.add_predicate("use_sse42", predicate!(has_sse41 && has_sse42));
+
+    settings.add_predicate(
+        "use_ssse3_simd",
+        predicate!(shared_enable_simd && has_ssse3),
+    );
+    settings.add_predicate(
+        "use_sse41_simd",
+        predicate!(shared_enable_simd && has_sse41),
+    );
+    settings.add_predicate(
+        "use_sse42_simd",
+        predicate!(shared_enable_simd && has_sse41 && has_sse42),
+    );
+
+    settings.add_predicate("use_avx_simd", predicate!(shared_enable_simd && has_avx));
+    settings.add_predicate("use_avx2_simd", predicate!(shared_enable_simd && has_avx2));
+    settings.add_predicate(
+        "use_avx512dq_simd",
+        predicate!(shared_enable_simd && has_avx512dq),
+    );
+    settings.add_predicate(
+        "use_avx512vl_simd",
+        predicate!(shared_enable_simd && has_avx512vl),
+    );
+    settings.add_predicate(
+        "use_avx512f_simd",
+        predicate!(shared_enable_simd && has_avx512f),
+    );
+
+    settings.add_predicate("use_popcnt", predicate!(has_popcnt && has_sse42));
+    settings.add_predicate("use_bmi1", predicate!(has_bmi1));
+    settings.add_predicate("use_lzcnt", predicate!(has_lzcnt));
+
+    // Some shared boolean values are used in x86 instruction predicates, so we need to group them
+    // in the same TargetIsa, for compabitibity with code generated by meta-python.
+    // TODO Once all the meta generation code has been migrated from Python to Rust, we can put it
+    // back in the shared SettingGroup, and use it in x86 instruction predicates.
+
+    let is_pic = shared.get_bool("is_pic");
+    let emit_all_ones_funcaddrs = shared.get_bool("emit_all_ones_funcaddrs");
+    settings.add_predicate("is_pic", predicate!(is_pic));
+    settings.add_predicate("not_is_pic", predicate!(!is_pic));
+    settings.add_predicate(
+        "all_ones_funcaddrs_and_not_is_pic",
+        predicate!(emit_all_ones_funcaddrs && !is_pic),
+    );
+    settings.add_predicate(
+        "not_all_ones_funcaddrs_and_not_is_pic",
+        predicate!(!emit_all_ones_funcaddrs && !is_pic),
+    );
+
+    // Presets corresponding to x86 CPUs.
+
+    settings.add_preset("baseline", preset!());
+    let nehalem = settings.add_preset(
+        "nehalem",
+        preset!(has_sse3 && has_ssse3 && has_sse41 && has_sse42 && has_popcnt),
+    );
+    let haswell = settings.add_preset(
+        "haswell",
+        preset!(nehalem && has_bmi1 && has_bmi2 && has_lzcnt),
+    );
+    let broadwell = settings.add_preset("broadwell", preset!(haswell));
+    let skylake = settings.add_preset("skylake", preset!(broadwell));
+    let cannonlake = settings.add_preset("cannonlake", preset!(skylake));
+    settings.add_preset("icelake", preset!(cannonlake));
+    settings.add_preset(
+        "znver1",
+        preset!(
+            has_sse3
+                && has_ssse3
+                && has_sse41
+                && has_sse42
+                && has_popcnt
+                && has_bmi1
+                && has_bmi2
+                && has_lzcnt
+        ),
+    );
+
+    settings.build()
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/lib.rs b/third_party/rust/cranelift-codegen-meta/src/lib.rs
new file mode 100644
index 0000000000..ead2c4442f
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/lib.rs
@@ -0,0 +1,124 @@
+//! This crate generates Rust sources for use by
+//! [`cranelift_codegen`](../cranelift_codegen/index.html).
+#[macro_use]
+mod cdsl;
+mod srcgen;
+
+pub mod error;
+pub mod isa;
+
+mod gen_binemit;
+mod gen_encodings;
+mod gen_inst;
+mod gen_legalizer;
+mod gen_registers;
+mod gen_settings;
+mod gen_types;
+
+mod default_map;
+mod shared;
+mod unique_table;
+
+/// Generate an ISA from an architecture string (e.g. "x86_64").
+pub fn isa_from_arch(arch: &str) -> Result<isa::Isa, String> {
+    isa::Isa::from_arch(arch).ok_or_else(|| format!("no supported isa found for arch `{}`", arch))
+}
+
+/// Generates all the Rust source files used in Cranelift from the meta-language.
+pub fn generate(
+    old_backend_isas: &[isa::Isa],
+    new_backend_isas: &[isa::Isa],
+    out_dir: &str,
+) -> Result<(), error::Error> {
+    // Create all the definitions:
+    // - common definitions.
+    let mut shared_defs = shared::define();
+
+    gen_settings::generate(
+        &shared_defs.settings,
+        gen_settings::ParentGroup::None,
+        "settings.rs",
+        &out_dir,
+    )?;
+    gen_types::generate("types.rs", &out_dir)?;
+
+    // - per ISA definitions.
+    let target_isas = isa::define(old_backend_isas, &mut shared_defs);
+
+    // At this point, all definitions are done.
+    let all_formats = shared_defs.verify_instruction_formats();
+
+    // Generate all the code.
+    gen_inst::generate(
+        all_formats,
+        &shared_defs.all_instructions,
+        "opcodes.rs",
+        "inst_builder.rs",
+        &out_dir,
+    )?;
+
+    let extra_legalization_groups: &[&'static str] = if !new_backend_isas.is_empty() {
+        // The new backend only requires the "expand" legalization group.
+        &["expand"]
+    } else {
+        &[]
+    };
+
+    gen_legalizer::generate(
+        &target_isas,
+        &shared_defs.transform_groups,
+        extra_legalization_groups,
+        "legalize",
+        &out_dir,
+    )?;
+
+    for isa in target_isas {
+        gen_registers::generate(&isa, &format!("registers-{}.rs", isa.name), &out_dir)?;
+
+        gen_settings::generate(
+            &isa.settings,
+            gen_settings::ParentGroup::Shared,
+            &format!("settings-{}.rs", isa.name),
+            &out_dir,
+        )?;
+
+        gen_encodings::generate(
+            &shared_defs,
+            &isa,
+            &format!("encoding-{}.rs", isa.name),
+            &out_dir,
+        )?;
+
+        gen_binemit::generate(
+            &isa.name,
+            &isa.recipes,
+            &format!("binemit-{}.rs", isa.name),
+            &out_dir,
+        )?;
+    }
+
+    for isa in new_backend_isas {
+        match isa {
+            isa::Isa::X86 => {
+                // If the old backend ISAs contained x86, this file has already been generated.
+                if old_backend_isas.iter().any(|isa| *isa == isa::Isa::X86) {
+                    continue;
+                }
+
+                let settings = crate::isa::x86::settings::define(&shared_defs.settings);
+                gen_settings::generate(
+                    &settings,
+                    gen_settings::ParentGroup::Shared,
+                    "settings-x86.rs",
+                    &out_dir,
+                )?;
+            }
+            isa::Isa::Arm64 => {
+                // aarch64 doesn't have platform-specific settings.
+            }
+            isa::Isa::Arm32 | isa::Isa::Riscv => todo!(),
+        }
+    }
+
+    Ok(())
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/shared/entities.rs b/third_party/rust/cranelift-codegen-meta/src/shared/entities.rs
new file mode 100644
index 0000000000..c3f2bc0387
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/shared/entities.rs
@@ -0,0 +1,73 @@
+use crate::cdsl::operands::{OperandKind, OperandKindFields};
+
+/// Small helper to initialize an OperandBuilder with the right kind, for a given name and doc.
+fn new(format_field_name: &'static str, rust_type: &'static str, doc: &'static str) -> OperandKind {
+    OperandKind::new(format_field_name, rust_type, OperandKindFields::EntityRef).with_doc(doc)
+}
+
+pub(crate) struct EntityRefs {
+    /// A reference to a basic block in the same function.
+    /// This is primarliy used in control flow instructions.
+    pub(crate) block: OperandKind,
+
+    /// A reference to a stack slot declared in the function preamble.
+    pub(crate) stack_slot: OperandKind,
+
+    /// A reference to a global value.
+    pub(crate) global_value: OperandKind,
+
+    /// A reference to a function signature declared in the function preamble.
+    /// This is used to provide the call signature in a call_indirect instruction.
+    pub(crate) sig_ref: OperandKind,
+
+    /// A reference to an external function declared in the function preamble.
+    /// This is used to provide the callee and signature in a call instruction.
+    pub(crate) func_ref: OperandKind,
+
+    /// A reference to a jump table declared in the function preamble.
+    pub(crate) jump_table: OperandKind,
+
+    /// A reference to a heap declared in the function preamble.
+    pub(crate) heap: OperandKind,
+
+    /// A reference to a table declared in the function preamble.
+    pub(crate) table: OperandKind,
+
+    /// A variable-sized list of value operands. Use for Block and function call arguments.
+    pub(crate) varargs: OperandKind,
+}
+
+impl EntityRefs {
+    pub fn new() -> Self {
+        Self {
+            block: new(
+                "destination",
+                "ir::Block",
+                "a basic block in the same function.",
+            ),
+            stack_slot: new("stack_slot", "ir::StackSlot", "A stack slot"),
+
+            global_value: new("global_value", "ir::GlobalValue", "A global value."),
+
+            sig_ref: new("sig_ref", "ir::SigRef", "A function signature."),
+
+            func_ref: new("func_ref", "ir::FuncRef", "An external function."),
+
+            jump_table: new("table", "ir::JumpTable", "A jump table."),
+
+            heap: new("heap", "ir::Heap", "A heap."),
+
+            table: new("table", "ir::Table", "A table."),
+
+            varargs: OperandKind::new("", "&[Value]", OperandKindFields::VariableArgs).with_doc(
+                r#"
+                        A variable size list of `value` operands.
+
+                        Use this to represent arguments passed to a function call, arguments
+                        passed to a basic block, or a variable number of results
+                        returned from an instruction.
+                    "#,
+            ),
+        }
+    }
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/shared/formats.rs b/third_party/rust/cranelift-codegen-meta/src/shared/formats.rs
new file mode 100644
index 0000000000..3d081951a5
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/shared/formats.rs
@@ -0,0 +1,330 @@
+use crate::cdsl::formats::{InstructionFormat, InstructionFormatBuilder as Builder};
+use crate::shared::{entities::EntityRefs, immediates::Immediates};
+use std::rc::Rc;
+
+pub(crate) struct Formats {
+    pub(crate) atomic_cas: Rc<InstructionFormat>,
+    pub(crate) atomic_rmw: Rc<InstructionFormat>,
+    pub(crate) binary: Rc<InstructionFormat>,
+    pub(crate) binary_imm8: Rc<InstructionFormat>,
+    pub(crate) binary_imm64: Rc<InstructionFormat>,
+    pub(crate) branch: Rc<InstructionFormat>,
+    pub(crate) branch_float: Rc<InstructionFormat>,
+    pub(crate) branch_icmp: Rc<InstructionFormat>,
+    pub(crate) branch_int: Rc<InstructionFormat>,
+    pub(crate) branch_table: Rc<InstructionFormat>,
+    pub(crate) branch_table_base: Rc<InstructionFormat>,
+    pub(crate) branch_table_entry: Rc<InstructionFormat>,
+    pub(crate) call: Rc<InstructionFormat>,
+    pub(crate) call_indirect: Rc<InstructionFormat>,
+    pub(crate) cond_trap: Rc<InstructionFormat>,
+    pub(crate) copy_special: Rc<InstructionFormat>,
+    pub(crate) copy_to_ssa: Rc<InstructionFormat>,
+    pub(crate) float_compare: Rc<InstructionFormat>,
+    pub(crate) float_cond: Rc<InstructionFormat>,
+    pub(crate) float_cond_trap: Rc<InstructionFormat>,
+    pub(crate) func_addr: Rc<InstructionFormat>,
+    pub(crate) heap_addr: Rc<InstructionFormat>,
+    pub(crate) indirect_jump: Rc<InstructionFormat>,
+    pub(crate) int_compare: Rc<InstructionFormat>,
+    pub(crate) int_compare_imm: Rc<InstructionFormat>,
+    pub(crate) int_cond: Rc<InstructionFormat>,
+    pub(crate) int_cond_trap: Rc<InstructionFormat>,
+    pub(crate) int_select: Rc<InstructionFormat>,
+    pub(crate) jump: Rc<InstructionFormat>,
+    pub(crate) load: Rc<InstructionFormat>,
+    pub(crate) load_complex: Rc<InstructionFormat>,
+    pub(crate) load_no_offset: Rc<InstructionFormat>,
+    pub(crate) multiary: Rc<InstructionFormat>,
+    pub(crate) nullary: Rc<InstructionFormat>,
+    pub(crate) reg_fill: Rc<InstructionFormat>,
+    pub(crate) reg_move: Rc<InstructionFormat>,
+    pub(crate) reg_spill: Rc<InstructionFormat>,
+    pub(crate) shuffle: Rc<InstructionFormat>,
+    pub(crate) stack_load: Rc<InstructionFormat>,
+    pub(crate) stack_store: Rc<InstructionFormat>,
+    pub(crate) store: Rc<InstructionFormat>,
+    pub(crate) store_complex: Rc<InstructionFormat>,
+    pub(crate) store_no_offset: Rc<InstructionFormat>,
+    pub(crate) table_addr: Rc<InstructionFormat>,
+    pub(crate) ternary: Rc<InstructionFormat>,
+    pub(crate) ternary_imm8: Rc<InstructionFormat>,
+    pub(crate) trap: Rc<InstructionFormat>,
+    pub(crate) unary: Rc<InstructionFormat>,
+    pub(crate) unary_bool: Rc<InstructionFormat>,
+    pub(crate) unary_const: Rc<InstructionFormat>,
+    pub(crate) unary_global_value: Rc<InstructionFormat>,
+    pub(crate) unary_ieee32: Rc<InstructionFormat>,
+    pub(crate) unary_ieee64: Rc<InstructionFormat>,
+    pub(crate) unary_imm: Rc<InstructionFormat>,
+}
+
+impl Formats {
+    pub fn new(imm: &Immediates, entities: &EntityRefs) -> Self {
+        Self {
+            unary: Builder::new("Unary").value().build(),
+
+            unary_imm: Builder::new("UnaryImm").imm(&imm.imm64).build(),
+
+            unary_ieee32: Builder::new("UnaryIeee32").imm(&imm.ieee32).build(),
+
+            unary_ieee64: Builder::new("UnaryIeee64").imm(&imm.ieee64).build(),
+
+            unary_bool: Builder::new("UnaryBool").imm(&imm.boolean).build(),
+
+            unary_const: Builder::new("UnaryConst").imm(&imm.pool_constant).build(),
+
+            unary_global_value: Builder::new("UnaryGlobalValue")
+                .imm(&entities.global_value)
+                .build(),
+
+            binary: Builder::new("Binary").value().value().build(),
+
+            binary_imm8: Builder::new("BinaryImm8").value().imm(&imm.uimm8).build(),
+
+            binary_imm64: Builder::new("BinaryImm64").value().imm(&imm.imm64).build(),
+
+            // The select instructions are controlled by the second VALUE operand.
+            // The first VALUE operand is the controlling flag which has a derived type.
+            // The fma instruction has the same constraint on all inputs.
+            ternary: Builder::new("Ternary")
+                .value()
+                .value()
+                .value()
+                .typevar_operand(1)
+                .build(),
+
+            ternary_imm8: Builder::new("TernaryImm8")
+                .value()
+                .imm(&imm.uimm8)
+                .value()
+                .build(),
+
+            // Catch-all for instructions with many outputs and inputs and no immediate
+            // operands.
+            multiary: Builder::new("MultiAry").varargs().build(),
+
+            nullary: Builder::new("NullAry").build(),
+
+            shuffle: Builder::new("Shuffle")
+                .value()
+                .value()
+                .imm_with_name("mask", &imm.uimm128)
+                .build(),
+
+            int_compare: Builder::new("IntCompare")
+                .imm(&imm.intcc)
+                .value()
+                .value()
+                .build(),
+
+            int_compare_imm: Builder::new("IntCompareImm")
+                .imm(&imm.intcc)
+                .value()
+                .imm(&imm.imm64)
+                .build(),
+
+            int_cond: Builder::new("IntCond").imm(&imm.intcc).value().build(),
+
+            float_compare: Builder::new("FloatCompare")
+                .imm(&imm.floatcc)
+                .value()
+                .value()
+                .build(),
+
+            float_cond: Builder::new("FloatCond").imm(&imm.floatcc).value().build(),
+
+            int_select: Builder::new("IntSelect")
+                .imm(&imm.intcc)
+                .value()
+                .value()
+                .value()
+                .build(),
+
+            jump: Builder::new("Jump").imm(&entities.block).varargs().build(),
+
+            branch: Builder::new("Branch")
+                .value()
+                .imm(&entities.block)
+                .varargs()
+                .build(),
+
+            branch_int: Builder::new("BranchInt")
+                .imm(&imm.intcc)
+                .value()
+                .imm(&entities.block)
+                .varargs()
+                .build(),
+
+            branch_float: Builder::new("BranchFloat")
+                .imm(&imm.floatcc)
+                .value()
+                .imm(&entities.block)
+                .varargs()
+                .build(),
+
+            branch_icmp: Builder::new("BranchIcmp")
+                .imm(&imm.intcc)
+                .value()
+                .value()
+                .imm(&entities.block)
+                .varargs()
+                .build(),
+
+            branch_table: Builder::new("BranchTable")
+                .value()
+                .imm(&entities.block)
+                .imm(&entities.jump_table)
+                .build(),
+
+            branch_table_entry: Builder::new("BranchTableEntry")
+                .value()
+                .value()
+                .imm(&imm.uimm8)
+                .imm(&entities.jump_table)
+                .build(),
+
+            branch_table_base: Builder::new("BranchTableBase")
+                .imm(&entities.jump_table)
+                .build(),
+
+            indirect_jump: Builder::new("IndirectJump")
+                .value()
+                .imm(&entities.jump_table)
+                .build(),
+
+            call: Builder::new("Call")
+                .imm(&entities.func_ref)
+                .varargs()
+                .build(),
+
+            call_indirect: Builder::new("CallIndirect")
+                .imm(&entities.sig_ref)
+                .value()
+                .varargs()
+                .build(),
+
+            func_addr: Builder::new("FuncAddr").imm(&entities.func_ref).build(),
+
+            atomic_rmw: Builder::new("AtomicRmw")
+                .imm(&imm.memflags)
+                .imm(&imm.atomic_rmw_op)
+                .value()
+                .value()
+                .build(),
+
+            atomic_cas: Builder::new("AtomicCas")
+                .imm(&imm.memflags)
+                .value()
+                .value()
+                .value()
+                .typevar_operand(2)
+                .build(),
+
+            load: Builder::new("Load")
+                .imm(&imm.memflags)
+                .value()
+                .imm(&imm.offset32)
+                .build(),
+
+            load_complex: Builder::new("LoadComplex")
+                .imm(&imm.memflags)
+                .varargs()
+                .imm(&imm.offset32)
+                .build(),
+
+            load_no_offset: Builder::new("LoadNoOffset")
+                .imm(&imm.memflags)
+                .value()
+                .build(),
+
+            store: Builder::new("Store")
+                .imm(&imm.memflags)
+                .value()
+                .value()
+                .imm(&imm.offset32)
+                .build(),
+
+            store_complex: Builder::new("StoreComplex")
+                .imm(&imm.memflags)
+                .value()
+                .varargs()
+                .imm(&imm.offset32)
+                .build(),
+
+            store_no_offset: Builder::new("StoreNoOffset")
+                .imm(&imm.memflags)
+                .value()
+                .value()
+                .build(),
+
+            stack_load: Builder::new("StackLoad")
+                .imm(&entities.stack_slot)
+                .imm(&imm.offset32)
+                .build(),
+
+            stack_store: Builder::new("StackStore")
+                .value()
+                .imm(&entities.stack_slot)
+                .imm(&imm.offset32)
+                .build(),
+
+            // Accessing a WebAssembly heap.
+            heap_addr: Builder::new("HeapAddr")
+                .imm(&entities.heap)
+                .value()
+                .imm(&imm.uimm32)
+                .build(),
+
+            // Accessing a WebAssembly table.
+            table_addr: Builder::new("TableAddr")
+                .imm(&entities.table)
+                .value()
+                .imm(&imm.offset32)
+                .build(),
+
+            reg_move: Builder::new("RegMove")
+                .value()
+                .imm_with_name("src", &imm.regunit)
+                .imm_with_name("dst", &imm.regunit)
+                .build(),
+
+            copy_special: Builder::new("CopySpecial")
+                .imm_with_name("src", &imm.regunit)
+                .imm_with_name("dst", &imm.regunit)
+                .build(),
+
+            copy_to_ssa: Builder::new("CopyToSsa")
+                .imm_with_name("src", &imm.regunit)
+                .build(),
+
+            reg_spill: Builder::new("RegSpill")
+                .value()
+                .imm_with_name("src", &imm.regunit)
+                .imm_with_name("dst", &entities.stack_slot)
+                .build(),
+
+            reg_fill: Builder::new("RegFill")
+                .value()
+                .imm_with_name("src", &entities.stack_slot)
+                .imm_with_name("dst", &imm.regunit)
+                .build(),
+
+            trap: Builder::new("Trap").imm(&imm.trapcode).build(),
+
+            cond_trap: Builder::new("CondTrap").value().imm(&imm.trapcode).build(),
+
+            int_cond_trap: Builder::new("IntCondTrap")
+                .imm(&imm.intcc)
+                .value()
+                .imm(&imm.trapcode)
+                .build(),
+
+            float_cond_trap: Builder::new("FloatCondTrap")
+                .imm(&imm.floatcc)
+                .value()
+                .imm(&imm.trapcode)
+                .build(),
+        }
+    }
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/shared/immediates.rs b/third_party/rust/cranelift-codegen-meta/src/shared/immediates.rs
new file mode 100644
index 0000000000..0aa4129daf
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/shared/immediates.rs
@@ -0,0 +1,175 @@
+use crate::cdsl::operands::{EnumValues, OperandKind, OperandKindFields};
+
+use std::collections::HashMap;
+
+pub(crate) struct Immediates {
+    /// A 64-bit immediate integer operand.
+    ///
+    /// This type of immediate integer can interact with SSA values with any IntType type.
+    pub imm64: OperandKind,
+
+    /// An unsigned 8-bit immediate integer operand.
+    ///
+    /// This small operand is used to indicate lane indexes in SIMD vectors and immediate bit
+    /// counts on shift instructions.
+    pub uimm8: OperandKind,
+
+    /// An unsigned 32-bit immediate integer operand.
+    pub uimm32: OperandKind,
+
+    /// An unsigned 128-bit immediate integer operand.
+    ///
+    /// This operand is used to pass entire 128-bit vectors as immediates to instructions like
+    /// const.
+    pub uimm128: OperandKind,
+
+    /// A constant stored in the constant pool.
+    ///
+    /// This operand is used to pass constants to instructions like vconst while storing the
+    /// actual bytes in the constant pool.
+    pub pool_constant: OperandKind,
+
+    /// A 32-bit immediate signed offset.
+    ///
+    /// This is used to represent an immediate address offset in load/store instructions.
+    pub offset32: OperandKind,
+
+    /// A 32-bit immediate floating point operand.
+    ///
+    /// IEEE 754-2008 binary32 interchange format.
+    pub ieee32: OperandKind,
+
+    /// A 64-bit immediate floating point operand.
+    ///
+    /// IEEE 754-2008 binary64 interchange format.
+    pub ieee64: OperandKind,
+
+    /// An immediate boolean operand.
+    ///
+    /// This type of immediate boolean can interact with SSA values with any BoolType type.
+    pub boolean: OperandKind,
+
+    /// A condition code for comparing integer values.
+    ///
+    /// This enumerated operand kind is used for the `icmp` instruction and corresponds to the
+    /// condcodes::IntCC` Rust type.
+    pub intcc: OperandKind,
+
+    /// A condition code for comparing floating point values.
+    ///
+    /// This enumerated operand kind is used for the `fcmp` instruction and corresponds to the
+    /// `condcodes::FloatCC` Rust type.
+    pub floatcc: OperandKind,
+
+    /// Flags for memory operations like `load` and `store`.
+    pub memflags: OperandKind,
+
+    /// A register unit in the current target ISA.
+    pub regunit: OperandKind,
+
+    /// A trap code indicating the reason for trapping.
+    ///
+    /// The Rust enum type also has a `User(u16)` variant for user-provided trap codes.
+    pub trapcode: OperandKind,
+
+    /// A code indicating the arithmetic operation to perform in an atomic_rmw memory access.
+    pub atomic_rmw_op: OperandKind,
+}
+
+fn new_imm(format_field_name: &'static str, rust_type: &'static str) -> OperandKind {
+    OperandKind::new(format_field_name, rust_type, OperandKindFields::ImmValue)
+}
+fn new_enum(
+    format_field_name: &'static str,
+    rust_type: &'static str,
+    values: EnumValues,
+) -> OperandKind {
+    OperandKind::new(
+        format_field_name,
+        rust_type,
+        OperandKindFields::ImmEnum(values),
+    )
+}
+
+impl Immediates {
+    pub fn new() -> Self {
+        Self {
+            imm64: new_imm("imm", "ir::immediates::Imm64").with_doc("A 64-bit immediate integer."),
+            uimm8: new_imm("imm", "ir::immediates::Uimm8")
+                .with_doc("An 8-bit immediate unsigned integer."),
+            uimm32: new_imm("imm", "ir::immediates::Uimm32")
+                .with_doc("A 32-bit immediate unsigned integer."),
+            uimm128: new_imm("imm", "ir::Immediate")
+                .with_doc("A 128-bit immediate unsigned integer."),
+            pool_constant: new_imm("constant_handle", "ir::Constant")
+                .with_doc("A constant stored in the constant pool."),
+            offset32: new_imm("offset", "ir::immediates::Offset32")
+                .with_doc("A 32-bit immediate signed offset."),
+            ieee32: new_imm("imm", "ir::immediates::Ieee32")
+                .with_doc("A 32-bit immediate floating point number."),
+            ieee64: new_imm("imm", "ir::immediates::Ieee64")
+                .with_doc("A 64-bit immediate floating point number."),
+            boolean: new_imm("imm", "bool").with_doc("An immediate boolean."),
+            intcc: {
+                let mut intcc_values = HashMap::new();
+                intcc_values.insert("eq", "Equal");
+                intcc_values.insert("ne", "NotEqual");
+                intcc_values.insert("sge", "SignedGreaterThanOrEqual");
+                intcc_values.insert("sgt", "SignedGreaterThan");
+                intcc_values.insert("sle", "SignedLessThanOrEqual");
+                intcc_values.insert("slt", "SignedLessThan");
+                intcc_values.insert("uge", "UnsignedGreaterThanOrEqual");
+                intcc_values.insert("ugt", "UnsignedGreaterThan");
+                intcc_values.insert("ule", "UnsignedLessThanOrEqual");
+                intcc_values.insert("ult", "UnsignedLessThan");
+                intcc_values.insert("of", "Overflow");
+                intcc_values.insert("nof", "NotOverflow");
+                new_enum("cond", "ir::condcodes::IntCC", intcc_values)
+                    .with_doc("An integer comparison condition code.")
+            },
+
+            floatcc: {
+                let mut floatcc_values = HashMap::new();
+                floatcc_values.insert("ord", "Ordered");
+                floatcc_values.insert("uno", "Unordered");
+                floatcc_values.insert("eq", "Equal");
+                floatcc_values.insert("ne", "NotEqual");
+                floatcc_values.insert("one", "OrderedNotEqual");
+                floatcc_values.insert("ueq", "UnorderedOrEqual");
+                floatcc_values.insert("lt", "LessThan");
+                floatcc_values.insert("le", "LessThanOrEqual");
+                floatcc_values.insert("gt", "GreaterThan");
+                floatcc_values.insert("ge", "GreaterThanOrEqual");
+                floatcc_values.insert("ult", "UnorderedOrLessThan");
+                floatcc_values.insert("ule", "UnorderedOrLessThanOrEqual");
+                floatcc_values.insert("ugt", "UnorderedOrGreaterThan");
+                floatcc_values.insert("uge", "UnorderedOrGreaterThanOrEqual");
+                new_enum("cond", "ir::condcodes::FloatCC", floatcc_values)
+                    .with_doc("A floating point comparison condition code")
+            },
+
+            memflags: new_imm("flags", "ir::MemFlags").with_doc("Memory operation flags"),
+            regunit: new_imm("regunit", "isa::RegUnit")
+                .with_doc("A register unit in the target ISA"),
+            trapcode: {
+                let mut trapcode_values = HashMap::new();
+                trapcode_values.insert("stk_ovf", "StackOverflow");
+                trapcode_values.insert("heap_oob", "HeapOutOfBounds");
+                trapcode_values.insert("int_ovf", "IntegerOverflow");
+                trapcode_values.insert("int_divz", "IntegerDivisionByZero");
+                new_enum("code", "ir::TrapCode", trapcode_values).with_doc("A trap reason code.")
+            },
+            atomic_rmw_op: {
+                let mut atomic_rmw_op_values = HashMap::new();
+                atomic_rmw_op_values.insert("add", "Add");
+                atomic_rmw_op_values.insert("sub", "Sub");
+                atomic_rmw_op_values.insert("and", "And");
+                atomic_rmw_op_values.insert("or", "Or");
+                atomic_rmw_op_values.insert("xor", "Xor");
+                atomic_rmw_op_values.insert("xchg", "Xchg");
+                new_enum("op", "ir::AtomicRmwOp", atomic_rmw_op_values)
+                    .with_doc("Atomic Read-Modify-Write Ops")
+            },
+        }
+    }
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/shared/instructions.rs b/third_party/rust/cranelift-codegen-meta/src/shared/instructions.rs
new file mode 100644
index 0000000000..bd1444d79c
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/shared/instructions.rs
@@ -0,0 +1,4514 @@
+#![allow(non_snake_case)]
+
+use crate::cdsl::instructions::{
+    AllInstructions, InstructionBuilder as Inst, InstructionGroup, InstructionGroupBuilder,
+};
+use crate::cdsl::operands::Operand;
+use crate::cdsl::type_inference::Constraint::WiderOrEq;
+use crate::cdsl::types::{LaneType, ValueType};
+use crate::cdsl::typevar::{Interval, TypeSetBuilder, TypeVar};
+use crate::shared::formats::Formats;
+use crate::shared::types;
+use crate::shared::{entities::EntityRefs, immediates::Immediates};
+
+#[inline(never)]
+fn define_control_flow(
+    ig: &mut InstructionGroupBuilder,
+    formats: &Formats,
+    imm: &Immediates,
+    entities: &EntityRefs,
+) {
+    let block = &Operand::new("block", &entities.block).with_doc("Destination basic block");
+    let args = &Operand::new("args", &entities.varargs).with_doc("block arguments");
+
+    ig.push(
+        Inst::new(
+            "jump",
+            r#"
+        Jump.
+
+        Unconditionally jump to a basic block, passing the specified
+        block arguments. The number and types of arguments must match the
+        destination block.
+        "#,
+            &formats.jump,
+        )
+        .operands_in(vec![block, args])
+        .is_terminator(true)
+        .is_branch(true),
+    );
+
+    ig.push(
+        Inst::new(
+            "fallthrough",
+            r#"
+        Fall through to the next block.
+
+        This is the same as `jump`, except the destination block must be
+        the next one in the layout.
+
+        Jumps are turned into fall-through instructions by the branch
+        relaxation pass. There is no reason to use this instruction outside
+        that pass.
+        "#,
+            &formats.jump,
+        )
+        .operands_in(vec![block, args])
+        .is_terminator(true)
+        .is_branch(true),
+    );
+
+    let Testable = &TypeVar::new(
+        "Testable",
+        "A scalar boolean or integer type",
+        TypeSetBuilder::new()
+            .ints(Interval::All)
+            .bools(Interval::All)
+            .build(),
+    );
+
+    {
+        let c = &Operand::new("c", Testable).with_doc("Controlling value to test");
+
+        ig.push(
+            Inst::new(
+                "brz",
+                r#"
+        Branch when zero.
+
+        If ``c`` is a `b1` value, take the branch when ``c`` is false. If
+        ``c`` is an integer value, take the branch when ``c = 0``.
+        "#,
+                &formats.branch,
+            )
+            .operands_in(vec![c, block, args])
+            .is_branch(true),
+        );
+
+        ig.push(
+            Inst::new(
+                "brnz",
+                r#"
+        Branch when non-zero.
+
+        If ``c`` is a `b1` value, take the branch when ``c`` is true. If
+        ``c`` is an integer value, take the branch when ``c != 0``.
+        "#,
+                &formats.branch,
+            )
+            .operands_in(vec![c, block, args])
+            .is_branch(true),
+        );
+    }
+
+    let iB = &TypeVar::new(
+        "iB",
+        "A scalar integer type",
+        TypeSetBuilder::new().ints(Interval::All).build(),
+    );
+    let iflags: &TypeVar = &ValueType::Special(types::Flag::IFlags.into()).into();
+    let fflags: &TypeVar = &ValueType::Special(types::Flag::FFlags.into()).into();
+
+    {
+        let Cond = &Operand::new("Cond", &imm.intcc);
+        let x = &Operand::new("x", iB);
+        let y = &Operand::new("y", iB);
+
+        ig.push(
+            Inst::new(
+                "br_icmp",
+                r#"
+        Compare scalar integers and branch.
+
+        Compare ``x`` and ``y`` in the same way as the `icmp` instruction
+        and take the branch if the condition is true:
+
+        ```text
+            br_icmp ugt v1, v2, block4(v5, v6)
+        ```
+
+        is semantically equivalent to:
+
+        ```text
+            v10 = icmp ugt, v1, v2
+            brnz v10, block4(v5, v6)
+        ```
+
+        Some RISC architectures like MIPS and RISC-V provide instructions that
+        implement all or some of the condition codes. The instruction can also
+        be used to represent *macro-op fusion* on architectures like Intel's.
+        "#,
+                &formats.branch_icmp,
+            )
+            .operands_in(vec![Cond, x, y, block, args])
+            .is_branch(true),
+        );
+
+        let f = &Operand::new("f", iflags);
+
+        ig.push(
+            Inst::new(
+                "brif",
+                r#"
+        Branch when condition is true in integer CPU flags.
+        "#,
+                &formats.branch_int,
+            )
+            .operands_in(vec![Cond, f, block, args])
+            .is_branch(true),
+        );
+    }
+
+    {
+        let Cond = &Operand::new("Cond", &imm.floatcc);
+
+        let f = &Operand::new("f", fflags);
+
+        ig.push(
+            Inst::new(
+                "brff",
+                r#"
+        Branch when condition is true in floating point CPU flags.
+        "#,
+                &formats.branch_float,
+            )
+            .operands_in(vec![Cond, f, block, args])
+            .is_branch(true),
+        );
+    }
+
+    {
+        let x = &Operand::new("x", iB).with_doc("index into jump table");
+        let JT = &Operand::new("JT", &entities.jump_table);
+
+        ig.push(
+            Inst::new(
+                "br_table",
+                r#"
+        Indirect branch via jump table.
+
+        Use ``x`` as an unsigned index into the jump table ``JT``. If a jump
+        table entry is found, branch to the corresponding block. If no entry was
+        found or the index is out-of-bounds, branch to the given default block.
+
+        Note that this branch instruction can't pass arguments to the targeted
+        blocks. Split critical edges as needed to work around this.
+
+        Do not confuse this with "tables" in WebAssembly. ``br_table`` is for
+        jump tables with destinations within the current function only -- think
+        of a ``match`` in Rust or a ``switch`` in C.  If you want to call a
+        function in a dynamic library, that will typically use
+        ``call_indirect``.
+        "#,
+                &formats.branch_table,
+            )
+            .operands_in(vec![x, block, JT])
+            .is_terminator(true)
+            .is_branch(true),
+        );
+    }
+
+    let iAddr = &TypeVar::new(
+        "iAddr",
+        "An integer address type",
+        TypeSetBuilder::new().ints(32..64).refs(32..64).build(),
+    );
+
+    {
+        let x = &Operand::new("x", iAddr).with_doc("index into jump table");
+        let addr = &Operand::new("addr", iAddr);
+        let Size = &Operand::new("Size", &imm.uimm8).with_doc("Size in bytes");
+        let JT = &Operand::new("JT", &entities.jump_table);
+        let entry = &Operand::new("entry", iAddr).with_doc("entry of jump table");
+
+        ig.push(
+            Inst::new(
+                "jump_table_entry",
+                r#"
+    Get an entry from a jump table.
+
+    Load a serialized ``entry`` from a jump table ``JT`` at a given index
+    ``addr`` with a specific ``Size``. The retrieved entry may need to be
+    decoded after loading, depending upon the jump table type used.
+
+    Currently, the only type supported is entries which are relative to the
+    base of the jump table.
+    "#,
+                &formats.branch_table_entry,
+            )
+            .operands_in(vec![x, addr, Size, JT])
+            .operands_out(vec![entry])
+            .can_load(true),
+        );
+
+        ig.push(
+            Inst::new(
+                "jump_table_base",
+                r#"
+    Get the absolute base address of a jump table.
+
+    This is used for jump tables wherein the entries are stored relative to
+    the base of jump table. In order to use these, generated code should first
+    load an entry using ``jump_table_entry``, then use this instruction to add
+    the relative base back to it.
+    "#,
+                &formats.branch_table_base,
+            )
+            .operands_in(vec![JT])
+            .operands_out(vec![addr]),
+        );
+
+        ig.push(
+            Inst::new(
+                "indirect_jump_table_br",
+                r#"
+    Branch indirectly via a jump table entry.
+
+    Unconditionally jump via a jump table entry that was previously loaded
+    with the ``jump_table_entry`` instruction.
+    "#,
+                &formats.indirect_jump,
+            )
+            .operands_in(vec![addr, JT])
+            .is_indirect_branch(true)
+            .is_terminator(true)
+            .is_branch(true),
+        );
+    }
+
+    ig.push(
+        Inst::new(
+            "debugtrap",
+            r#"
+    Encodes an assembly debug trap.
+    "#,
+            &formats.nullary,
+        )
+        .other_side_effects(true)
+        .can_load(true)
+        .can_store(true),
+    );
+
+    {
+        let code = &Operand::new("code", &imm.trapcode);
+        ig.push(
+            Inst::new(
+                "trap",
+                r#"
+        Terminate execution unconditionally.
+        "#,
+                &formats.trap,
+            )
+            .operands_in(vec![code])
+            .can_trap(true)
+            .is_terminator(true),
+        );
+
+        let c = &Operand::new("c", Testable).with_doc("Controlling value to test");
+        ig.push(
+            Inst::new(
+                "trapz",
+                r#"
+        Trap when zero.
+
+        if ``c`` is non-zero, execution continues at the following instruction.
+        "#,
+                &formats.cond_trap,
+            )
+            .operands_in(vec![c, code])
+            .can_trap(true),
+        );
+
+        ig.push(
+            Inst::new(
+                "resumable_trap",
+                r#"
+        A resumable trap.
+
+        This instruction allows non-conditional traps to be used as non-terminal instructions.
+        "#,
+                &formats.trap,
+            )
+            .operands_in(vec![code])
+            .can_trap(true),
+        );
+
+        let c = &Operand::new("c", Testable).with_doc("Controlling value to test");
+        ig.push(
+            Inst::new(
+                "trapnz",
+                r#"
+        Trap when non-zero.
+
+        If ``c`` is zero, execution continues at the following instruction.
+        "#,
+                &formats.cond_trap,
+            )
+            .operands_in(vec![c, code])
+            .can_trap(true),
+        );
+
+        ig.push(
+            Inst::new(
+                "resumable_trapnz",
+                r#"
+        A resumable trap to be called when the passed condition is non-zero.
+
+        If ``c`` is zero, execution continues at the following instruction.
+        "#,
+                &formats.cond_trap,
+            )
+            .operands_in(vec![c, code])
+            .can_trap(true),
+        );
+
+        let Cond = &Operand::new("Cond", &imm.intcc);
+        let f = &Operand::new("f", iflags);
+        ig.push(
+            Inst::new(
+                "trapif",
+                r#"
+        Trap when condition is true in integer CPU flags.
+        "#,
+                &formats.int_cond_trap,
+            )
+            .operands_in(vec![Cond, f, code])
+            .can_trap(true),
+        );
+
+        let Cond = &Operand::new("Cond", &imm.floatcc);
+        let f = &Operand::new("f", fflags);
+        let code = &Operand::new("code", &imm.trapcode);
+        ig.push(
+            Inst::new(
+                "trapff",
+                r#"
+        Trap when condition is true in floating point CPU flags.
+        "#,
+                &formats.float_cond_trap,
+            )
+            .operands_in(vec![Cond, f, code])
+            .can_trap(true),
+        );
+    }
+
+    let rvals = &Operand::new("rvals", &entities.varargs).with_doc("return values");
+    ig.push(
+        Inst::new(
+            "return",
+            r#"
+        Return from the function.
+
+        Unconditionally transfer control to the calling function, passing the
+        provided return values. The list of return values must match the
+        function signature's return types.
+        "#,
+            &formats.multiary,
+        )
+        .operands_in(vec![rvals])
+        .is_return(true)
+        .is_terminator(true),
+    );
+
+    let rvals = &Operand::new("rvals", &entities.varargs).with_doc("return values");
+    ig.push(
+        Inst::new(
+            "fallthrough_return",
+            r#"
+        Return from the function by fallthrough.
+
+        This is a specialized instruction for use where one wants to append
+        a custom epilogue, which will then perform the real return. This
+        instruction has no encoding.
+        "#,
+            &formats.multiary,
+        )
+        .operands_in(vec![rvals])
+        .is_return(true)
+        .is_terminator(true),
+    );
+
+    let FN = &Operand::new("FN", &entities.func_ref)
+        .with_doc("function to call, declared by `function`");
+    let args = &Operand::new("args", &entities.varargs).with_doc("call arguments");
+    let rvals = &Operand::new("rvals", &entities.varargs).with_doc("return values");
+    ig.push(
+        Inst::new(
+            "call",
+            r#"
+        Direct function call.
+
+        Call a function which has been declared in the preamble. The argument
+        types must match the function's signature.
+        "#,
+            &formats.call,
+        )
+        .operands_in(vec![FN, args])
+        .operands_out(vec![rvals])
+        .is_call(true),
+    );
+
+    let SIG = &Operand::new("SIG", &entities.sig_ref).with_doc("function signature");
+    let callee = &Operand::new("callee", iAddr).with_doc("address of function to call");
+    let args = &Operand::new("args", &entities.varargs).with_doc("call arguments");
+    let rvals = &Operand::new("rvals", &entities.varargs).with_doc("return values");
+    ig.push(
+        Inst::new(
+            "call_indirect",
+            r#"
+        Indirect function call.
+
+        Call the function pointed to by `callee` with the given arguments. The
+        called function must match the specified signature.
+
+        Note that this is different from WebAssembly's ``call_indirect``; the
+        callee is a native address, rather than a table index. For WebAssembly,
+        `table_addr` and `load` are used to obtain a native address
+        from a table.
+        "#,
+            &formats.call_indirect,
+        )
+        .operands_in(vec![SIG, callee, args])
+        .operands_out(vec![rvals])
+        .is_call(true),
+    );
+
+    let FN = &Operand::new("FN", &entities.func_ref)
+        .with_doc("function to call, declared by `function`");
+    let addr = &Operand::new("addr", iAddr);
+    ig.push(
+        Inst::new(
+            "func_addr",
+            r#"
+        Get the address of a function.
+
+        Compute the absolute address of a function declared in the preamble.
+        The returned address can be used as a ``callee`` argument to
+        `call_indirect`. This is also a method for calling functions that
+        are too far away to be addressable by a direct `call`
+        instruction.
+        "#,
+            &formats.func_addr,
+        )
+        .operands_in(vec![FN])
+        .operands_out(vec![addr]),
+    );
+}
+
+#[inline(never)]
+fn define_simd_lane_access(
+    ig: &mut InstructionGroupBuilder,
+    formats: &Formats,
+    imm: &Immediates,
+    _: &EntityRefs,
+) {
+    let TxN = &TypeVar::new(
+        "TxN",
+        "A SIMD vector type",
+        TypeSetBuilder::new()
+            .ints(Interval::All)
+            .floats(Interval::All)
+            .bools(Interval::All)
+            .simd_lanes(Interval::All)
+            .includes_scalars(false)
+            .build(),
+    );
+
+    let x = &Operand::new("x", &TxN.lane_of()).with_doc("Value to splat to all lanes");
+    let a = &Operand::new("a", TxN);
+
+    ig.push(
+        Inst::new(
+            "splat",
+            r#"
+        Vector splat.
+
+        Return a vector whose lanes are all ``x``.
+        "#,
+            &formats.unary,
+        )
+        .operands_in(vec![x])
+        .operands_out(vec![a]),
+    );
+
+    let I8x16 = &TypeVar::new(
+        "I8x16",
+        "A SIMD vector type consisting of 16 lanes of 8-bit integers",
+        TypeSetBuilder::new()
+            .ints(8..8)
+            .simd_lanes(16..16)
+            .includes_scalars(false)
+            .build(),
+    );
+    let x = &Operand::new("x", I8x16).with_doc("Vector to modify by re-arranging lanes");
+    let y = &Operand::new("y", I8x16).with_doc("Mask for re-arranging lanes");
+
+    ig.push(
+        Inst::new(
+            "swizzle",
+            r#"
+        Vector swizzle.
+
+        Returns a new vector with byte-width lanes selected from the lanes of the first input
+        vector ``x`` specified in the second input vector ``s``. The indices ``i`` in range
+        ``[0, 15]`` select the ``i``-th element of ``x``. For indices outside of the range the
+        resulting lane is 0. Note that this operates on byte-width lanes.
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a]),
+    );
+
+    let x = &Operand::new("x", TxN).with_doc("The vector to modify");
+    let y = &Operand::new("y", &TxN.lane_of()).with_doc("New lane value");
+    let Idx = &Operand::new("Idx", &imm.uimm8).with_doc("Lane index");
+
+    ig.push(
+        Inst::new(
+            "insertlane",
+            r#"
+        Insert ``y`` as lane ``Idx`` in x.
+
+        The lane index, ``Idx``, is an immediate value, not an SSA value. It
+        must indicate a valid lane index for the type of ``x``.
+        "#,
+            &formats.ternary_imm8,
+        )
+        .operands_in(vec![x, y, Idx])
+        .operands_out(vec![a]),
+    );
+
+    let x = &Operand::new("x", TxN);
+    let a = &Operand::new("a", &TxN.lane_of());
+
+    ig.push(
+        Inst::new(
+            "extractlane",
+            r#"
+        Extract lane ``Idx`` from ``x``.
+
+        The lane index, ``Idx``, is an immediate value, not an SSA value. It
+        must indicate a valid lane index for the type of ``x``. Note that the upper bits of ``a``
+        may or may not be zeroed depending on the ISA but the type system should prevent using
+        ``a`` as anything other than the extracted value.
+        "#,
+            &formats.binary_imm8,
+        )
+        .operands_in(vec![x, Idx])
+        .operands_out(vec![a]),
+    );
+}
+
+#[inline(never)]
+fn define_simd_arithmetic(
+    ig: &mut InstructionGroupBuilder,
+    formats: &Formats,
+    _: &Immediates,
+    _: &EntityRefs,
+) {
+    let Int = &TypeVar::new(
+        "Int",
+        "A scalar or vector integer type",
+        TypeSetBuilder::new()
+            .ints(Interval::All)
+            .simd_lanes(Interval::All)
+            .build(),
+    );
+
+    let a = &Operand::new("a", Int);
+    let x = &Operand::new("x", Int);
+    let y = &Operand::new("y", Int);
+
+    ig.push(
+        Inst::new(
+            "imin",
+            r#"
+        Signed integer minimum.
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "umin",
+            r#"
+        Unsigned integer minimum.
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "imax",
+            r#"
+        Signed integer maximum.
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "umax",
+            r#"
+        Unsigned integer maximum.
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a]),
+    );
+
+    let IxN = &TypeVar::new(
+        "IxN",
+        "A SIMD vector type containing integers",
+        TypeSetBuilder::new()
+            .ints(Interval::All)
+            .simd_lanes(Interval::All)
+            .includes_scalars(false)
+            .build(),
+    );
+
+    let a = &Operand::new("a", IxN);
+    let x = &Operand::new("x", IxN);
+    let y = &Operand::new("y", IxN);
+
+    ig.push(
+        Inst::new(
+            "avg_round",
+            r#"
+        Unsigned average with rounding: `a := (x + y + 1) // 2`
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a]),
+    );
+}
+
+#[allow(clippy::many_single_char_names)]
+pub(crate) fn define(
+    all_instructions: &mut AllInstructions,
+    formats: &Formats,
+    imm: &Immediates,
+    entities: &EntityRefs,
+) -> InstructionGroup {
+    let mut ig = InstructionGroupBuilder::new(all_instructions);
+
+    define_control_flow(&mut ig, formats, imm, entities);
+    define_simd_lane_access(&mut ig, formats, imm, entities);
+    define_simd_arithmetic(&mut ig, formats, imm, entities);
+
+    // Operand kind shorthands.
+    let iflags: &TypeVar = &ValueType::Special(types::Flag::IFlags.into()).into();
+    let fflags: &TypeVar = &ValueType::Special(types::Flag::FFlags.into()).into();
+
+    let b1: &TypeVar = &ValueType::from(LaneType::from(types::Bool::B1)).into();
+    let f32_: &TypeVar = &ValueType::from(LaneType::from(types::Float::F32)).into();
+    let f64_: &TypeVar = &ValueType::from(LaneType::from(types::Float::F64)).into();
+
+    // Starting definitions.
+    let Int = &TypeVar::new(
+        "Int",
+        "A scalar or vector integer type",
+        TypeSetBuilder::new()
+            .ints(Interval::All)
+            .simd_lanes(Interval::All)
+            .build(),
+    );
+
+    let Bool = &TypeVar::new(
+        "Bool",
+        "A scalar or vector boolean type",
+        TypeSetBuilder::new()
+            .bools(Interval::All)
+            .simd_lanes(Interval::All)
+            .build(),
+    );
+
+    let iB = &TypeVar::new(
+        "iB",
+        "A scalar integer type",
+        TypeSetBuilder::new().ints(Interval::All).build(),
+    );
+
+    let iAddr = &TypeVar::new(
+        "iAddr",
+        "An integer address type",
+        TypeSetBuilder::new().ints(32..64).refs(32..64).build(),
+    );
+
+    let Ref = &TypeVar::new(
+        "Ref",
+        "A scalar reference type",
+        TypeSetBuilder::new().refs(Interval::All).build(),
+    );
+
+    let Testable = &TypeVar::new(
+        "Testable",
+        "A scalar boolean or integer type",
+        TypeSetBuilder::new()
+            .ints(Interval::All)
+            .bools(Interval::All)
+            .build(),
+    );
+
+    let TxN = &TypeVar::new(
+        "TxN",
+        "A SIMD vector type",
+        TypeSetBuilder::new()
+            .ints(Interval::All)
+            .floats(Interval::All)
+            .bools(Interval::All)
+            .simd_lanes(Interval::All)
+            .includes_scalars(false)
+            .build(),
+    );
+    let Any = &TypeVar::new(
+        "Any",
+        "Any integer, float, boolean, or reference scalar or vector type",
+        TypeSetBuilder::new()
+            .ints(Interval::All)
+            .floats(Interval::All)
+            .bools(Interval::All)
+            .refs(Interval::All)
+            .simd_lanes(Interval::All)
+            .includes_scalars(true)
+            .build(),
+    );
+
+    let AnyTo = &TypeVar::copy_from(Any, "AnyTo".to_string());
+
+    let Mem = &TypeVar::new(
+        "Mem",
+        "Any type that can be stored in memory",
+        TypeSetBuilder::new()
+            .ints(Interval::All)
+            .floats(Interval::All)
+            .simd_lanes(Interval::All)
+            .refs(Interval::All)
+            .build(),
+    );
+
+    let MemTo = &TypeVar::copy_from(Mem, "MemTo".to_string());
+
+    let addr = &Operand::new("addr", iAddr);
+
+    let SS = &Operand::new("SS", &entities.stack_slot);
+    let Offset = &Operand::new("Offset", &imm.offset32).with_doc("Byte offset from base address");
+    let x = &Operand::new("x", Mem).with_doc("Value to be stored");
+    let a = &Operand::new("a", Mem).with_doc("Value loaded");
+    let p = &Operand::new("p", iAddr);
+    let MemFlags = &Operand::new("MemFlags", &imm.memflags);
+    let args = &Operand::new("args", &entities.varargs).with_doc("Address arguments");
+
+    ig.push(
+        Inst::new(
+            "load",
+            r#"
+        Load from memory at ``p + Offset``.
+
+        This is a polymorphic instruction that can load any value type which
+        has a memory representation.
+        "#,
+            &formats.load,
+        )
+        .operands_in(vec![MemFlags, p, Offset])
+        .operands_out(vec![a])
+        .can_load(true),
+    );
+
+    ig.push(
+        Inst::new(
+            "load_complex",
+            r#"
+        Load from memory at ``sum(args) + Offset``.
+
+        This is a polymorphic instruction that can load any value type which
+        has a memory representation.
+        "#,
+            &formats.load_complex,
+        )
+        .operands_in(vec![MemFlags, args, Offset])
+        .operands_out(vec![a])
+        .can_load(true),
+    );
+
+    ig.push(
+        Inst::new(
+            "store",
+            r#"
+        Store ``x`` to memory at ``p + Offset``.
+
+        This is a polymorphic instruction that can store any value type with a
+        memory representation.
+        "#,
+            &formats.store,
+        )
+        .operands_in(vec![MemFlags, x, p, Offset])
+        .can_store(true),
+    );
+
+    ig.push(
+        Inst::new(
+            "store_complex",
+            r#"
+        Store ``x`` to memory at ``sum(args) + Offset``.
+
+        This is a polymorphic instruction that can store any value type with a
+        memory representation.
+        "#,
+            &formats.store_complex,
+        )
+        .operands_in(vec![MemFlags, x, args, Offset])
+        .can_store(true),
+    );
+
+    let iExt8 = &TypeVar::new(
+        "iExt8",
+        "An integer type with more than 8 bits",
+        TypeSetBuilder::new().ints(16..64).build(),
+    );
+    let x = &Operand::new("x", iExt8);
+    let a = &Operand::new("a", iExt8);
+
+    ig.push(
+        Inst::new(
+            "uload8",
+            r#"
+        Load 8 bits from memory at ``p + Offset`` and zero-extend.
+
+        This is equivalent to ``load.i8`` followed by ``uextend``.
+        "#,
+            &formats.load,
+        )
+        .operands_in(vec![MemFlags, p, Offset])
+        .operands_out(vec![a])
+        .can_load(true),
+    );
+
+    ig.push(
+        Inst::new(
+            "uload8_complex",
+            r#"
+        Load 8 bits from memory at ``sum(args) + Offset`` and zero-extend.
+
+        This is equivalent to ``load.i8`` followed by ``uextend``.
+        "#,
+            &formats.load_complex,
+        )
+        .operands_in(vec![MemFlags, args, Offset])
+        .operands_out(vec![a])
+        .can_load(true),
+    );
+
+    ig.push(
+        Inst::new(
+            "sload8",
+            r#"
+        Load 8 bits from memory at ``p + Offset`` and sign-extend.
+
+        This is equivalent to ``load.i8`` followed by ``sextend``.
+        "#,
+            &formats.load,
+        )
+        .operands_in(vec![MemFlags, p, Offset])
+        .operands_out(vec![a])
+        .can_load(true),
+    );
+
+    ig.push(
+        Inst::new(
+            "sload8_complex",
+            r#"
+        Load 8 bits from memory at ``sum(args) + Offset`` and sign-extend.
+
+        This is equivalent to ``load.i8`` followed by ``sextend``.
+        "#,
+            &formats.load_complex,
+        )
+        .operands_in(vec![MemFlags, args, Offset])
+        .operands_out(vec![a])
+        .can_load(true),
+    );
+
+    ig.push(
+        Inst::new(
+            "istore8",
+            r#"
+        Store the low 8 bits of ``x`` to memory at ``p + Offset``.
+
+        This is equivalent to ``ireduce.i8`` followed by ``store.i8``.
+        "#,
+            &formats.store,
+        )
+        .operands_in(vec![MemFlags, x, p, Offset])
+        .can_store(true),
+    );
+
+    ig.push(
+        Inst::new(
+            "istore8_complex",
+            r#"
+        Store the low 8 bits of ``x`` to memory at ``sum(args) + Offset``.
+
+        This is equivalent to ``ireduce.i8`` followed by ``store.i8``.
+        "#,
+            &formats.store_complex,
+        )
+        .operands_in(vec![MemFlags, x, args, Offset])
+        .can_store(true),
+    );
+
+    let iExt16 = &TypeVar::new(
+        "iExt16",
+        "An integer type with more than 16 bits",
+        TypeSetBuilder::new().ints(32..64).build(),
+    );
+    let x = &Operand::new("x", iExt16);
+    let a = &Operand::new("a", iExt16);
+
+    ig.push(
+        Inst::new(
+            "uload16",
+            r#"
+        Load 16 bits from memory at ``p + Offset`` and zero-extend.
+
+        This is equivalent to ``load.i16`` followed by ``uextend``.
+        "#,
+            &formats.load,
+        )
+        .operands_in(vec![MemFlags, p, Offset])
+        .operands_out(vec![a])
+        .can_load(true),
+    );
+
+    ig.push(
+        Inst::new(
+            "uload16_complex",
+            r#"
+        Load 16 bits from memory at ``sum(args) + Offset`` and zero-extend.
+
+        This is equivalent to ``load.i16`` followed by ``uextend``.
+        "#,
+            &formats.load_complex,
+        )
+        .operands_in(vec![MemFlags, args, Offset])
+        .operands_out(vec![a])
+        .can_load(true),
+    );
+
+    ig.push(
+        Inst::new(
+            "sload16",
+            r#"
+        Load 16 bits from memory at ``p + Offset`` and sign-extend.
+
+        This is equivalent to ``load.i16`` followed by ``sextend``.
+        "#,
+            &formats.load,
+        )
+        .operands_in(vec![MemFlags, p, Offset])
+        .operands_out(vec![a])
+        .can_load(true),
+    );
+
+    ig.push(
+        Inst::new(
+            "sload16_complex",
+            r#"
+        Load 16 bits from memory at ``sum(args) + Offset`` and sign-extend.
+
+        This is equivalent to ``load.i16`` followed by ``sextend``.
+        "#,
+            &formats.load_complex,
+        )
+        .operands_in(vec![MemFlags, args, Offset])
+        .operands_out(vec![a])
+        .can_load(true),
+    );
+
+    ig.push(
+        Inst::new(
+            "istore16",
+            r#"
+        Store the low 16 bits of ``x`` to memory at ``p + Offset``.
+
+        This is equivalent to ``ireduce.i16`` followed by ``store.i16``.
+        "#,
+            &formats.store,
+        )
+        .operands_in(vec![MemFlags, x, p, Offset])
+        .can_store(true),
+    );
+
+    ig.push(
+        Inst::new(
+            "istore16_complex",
+            r#"
+        Store the low 16 bits of ``x`` to memory at ``sum(args) + Offset``.
+
+        This is equivalent to ``ireduce.i16`` followed by ``store.i16``.
+        "#,
+            &formats.store_complex,
+        )
+        .operands_in(vec![MemFlags, x, args, Offset])
+        .can_store(true),
+    );
+
+    let iExt32 = &TypeVar::new(
+        "iExt32",
+        "An integer type with more than 32 bits",
+        TypeSetBuilder::new().ints(64..64).build(),
+    );
+    let x = &Operand::new("x", iExt32);
+    let a = &Operand::new("a", iExt32);
+
+    ig.push(
+        Inst::new(
+            "uload32",
+            r#"
+        Load 32 bits from memory at ``p + Offset`` and zero-extend.
+
+        This is equivalent to ``load.i32`` followed by ``uextend``.
+        "#,
+            &formats.load,
+        )
+        .operands_in(vec![MemFlags, p, Offset])
+        .operands_out(vec![a])
+        .can_load(true),
+    );
+
+    ig.push(
+        Inst::new(
+            "uload32_complex",
+            r#"
+        Load 32 bits from memory at ``sum(args) + Offset`` and zero-extend.
+
+        This is equivalent to ``load.i32`` followed by ``uextend``.
+        "#,
+            &formats.load_complex,
+        )
+        .operands_in(vec![MemFlags, args, Offset])
+        .operands_out(vec![a])
+        .can_load(true),
+    );
+
+    ig.push(
+        Inst::new(
+            "sload32",
+            r#"
+        Load 32 bits from memory at ``p + Offset`` and sign-extend.
+
+        This is equivalent to ``load.i32`` followed by ``sextend``.
+        "#,
+            &formats.load,
+        )
+        .operands_in(vec![MemFlags, p, Offset])
+        .operands_out(vec![a])
+        .can_load(true),
+    );
+
+    ig.push(
+        Inst::new(
+            "sload32_complex",
+            r#"
+        Load 32 bits from memory at ``sum(args) + Offset`` and sign-extend.
+
+        This is equivalent to ``load.i32`` followed by ``sextend``.
+        "#,
+            &formats.load_complex,
+        )
+        .operands_in(vec![MemFlags, args, Offset])
+        .operands_out(vec![a])
+        .can_load(true),
+    );
+
+    ig.push(
+        Inst::new(
+            "istore32",
+            r#"
+        Store the low 32 bits of ``x`` to memory at ``p + Offset``.
+
+        This is equivalent to ``ireduce.i32`` followed by ``store.i32``.
+        "#,
+            &formats.store,
+        )
+        .operands_in(vec![MemFlags, x, p, Offset])
+        .can_store(true),
+    );
+
+    ig.push(
+        Inst::new(
+            "istore32_complex",
+            r#"
+        Store the low 32 bits of ``x`` to memory at ``sum(args) + Offset``.
+
+        This is equivalent to ``ireduce.i32`` followed by ``store.i32``.
+        "#,
+            &formats.store_complex,
+        )
+        .operands_in(vec![MemFlags, x, args, Offset])
+        .can_store(true),
+    );
+
+    let I16x8 = &TypeVar::new(
+        "I16x8",
+        "A SIMD vector with exactly 8 lanes of 16-bit values",
+        TypeSetBuilder::new()
+            .ints(16..16)
+            .simd_lanes(8..8)
+            .includes_scalars(false)
+            .build(),
+    );
+    let a = &Operand::new("a", I16x8).with_doc("Value loaded");
+
+    ig.push(
+        Inst::new(
+            "uload8x8",
+            r#"
+        Load an 8x8 vector (64 bits) from memory at ``p + Offset`` and zero-extend into an i16x8
+        vector.
+        "#,
+            &formats.load,
+        )
+        .operands_in(vec![MemFlags, p, Offset])
+        .operands_out(vec![a])
+        .can_load(true),
+    );
+
+    ig.push(
+        Inst::new(
+            "uload8x8_complex",
+            r#"
+        Load an 8x8 vector (64 bits) from memory at ``sum(args) + Offset`` and zero-extend into an
+        i16x8 vector.
+        "#,
+            &formats.load_complex,
+        )
+        .operands_in(vec![MemFlags, args, Offset])
+        .operands_out(vec![a])
+        .can_load(true),
+    );
+
+    ig.push(
+        Inst::new(
+            "sload8x8",
+            r#"
+        Load an 8x8 vector (64 bits) from memory at ``p + Offset`` and sign-extend into an i16x8
+        vector.
+        "#,
+            &formats.load,
+        )
+        .operands_in(vec![MemFlags, p, Offset])
+        .operands_out(vec![a])
+        .can_load(true),
+    );
+
+    ig.push(
+        Inst::new(
+            "sload8x8_complex",
+            r#"
+        Load an 8x8 vector (64 bits) from memory at ``sum(args) + Offset`` and sign-extend into an
+        i16x8 vector.
+        "#,
+            &formats.load_complex,
+        )
+        .operands_in(vec![MemFlags, args, Offset])
+        .operands_out(vec![a])
+        .can_load(true),
+    );
+
+    let I32x4 = &TypeVar::new(
+        "I32x4",
+        "A SIMD vector with exactly 4 lanes of 32-bit values",
+        TypeSetBuilder::new()
+            .ints(32..32)
+            .simd_lanes(4..4)
+            .includes_scalars(false)
+            .build(),
+    );
+    let a = &Operand::new("a", I32x4).with_doc("Value loaded");
+
+    ig.push(
+        Inst::new(
+            "uload16x4",
+            r#"
+        Load a 16x4 vector (64 bits) from memory at ``p + Offset`` and zero-extend into an i32x4
+        vector.
+        "#,
+            &formats.load,
+        )
+        .operands_in(vec![MemFlags, p, Offset])
+        .operands_out(vec![a])
+        .can_load(true),
+    );
+
+    ig.push(
+        Inst::new(
+            "uload16x4_complex",
+            r#"
+        Load a 16x4 vector (64 bits) from memory at ``sum(args) + Offset`` and zero-extend into an
+        i32x4 vector.
+        "#,
+            &formats.load_complex,
+        )
+        .operands_in(vec![MemFlags, args, Offset])
+        .operands_out(vec![a])
+        .can_load(true),
+    );
+
+    ig.push(
+        Inst::new(
+            "sload16x4",
+            r#"
+        Load a 16x4 vector (64 bits) from memory at ``p + Offset`` and sign-extend into an i32x4
+        vector.
+        "#,
+            &formats.load,
+        )
+        .operands_in(vec![MemFlags, p, Offset])
+        .operands_out(vec![a])
+        .can_load(true),
+    );
+
+    ig.push(
+        Inst::new(
+            "sload16x4_complex",
+            r#"
+        Load a 16x4 vector (64 bits) from memory at ``sum(args) + Offset`` and sign-extend into an
+        i32x4 vector.
+        "#,
+            &formats.load_complex,
+        )
+        .operands_in(vec![MemFlags, args, Offset])
+        .operands_out(vec![a])
+        .can_load(true),
+    );
+
+    let I64x2 = &TypeVar::new(
+        "I64x2",
+        "A SIMD vector with exactly 2 lanes of 64-bit values",
+        TypeSetBuilder::new()
+            .ints(64..64)
+            .simd_lanes(2..2)
+            .includes_scalars(false)
+            .build(),
+    );
+    let a = &Operand::new("a", I64x2).with_doc("Value loaded");
+
+    ig.push(
+        Inst::new(
+            "uload32x2",
+            r#"
+        Load an 32x2 vector (64 bits) from memory at ``p + Offset`` and zero-extend into an i64x2
+        vector.
+        "#,
+            &formats.load,
+        )
+        .operands_in(vec![MemFlags, p, Offset])
+        .operands_out(vec![a])
+        .can_load(true),
+    );
+
+    ig.push(
+        Inst::new(
+            "uload32x2_complex",
+            r#"
+        Load a 32x2 vector (64 bits) from memory at ``sum(args) + Offset`` and zero-extend into an
+        i64x2 vector.
+        "#,
+            &formats.load_complex,
+        )
+        .operands_in(vec![MemFlags, args, Offset])
+        .operands_out(vec![a])
+        .can_load(true),
+    );
+
+    ig.push(
+        Inst::new(
+            "sload32x2",
+            r#"
+        Load a 32x2 vector (64 bits) from memory at ``p + Offset`` and sign-extend into an i64x2
+        vector.
+        "#,
+            &formats.load,
+        )
+        .operands_in(vec![MemFlags, p, Offset])
+        .operands_out(vec![a])
+        .can_load(true),
+    );
+
+    ig.push(
+        Inst::new(
+            "sload32x2_complex",
+            r#"
+        Load a 32x2 vector (64 bits) from memory at ``sum(args) + Offset`` and sign-extend into an
+        i64x2 vector.
+        "#,
+            &formats.load_complex,
+        )
+        .operands_in(vec![MemFlags, args, Offset])
+        .operands_out(vec![a])
+        .can_load(true),
+    );
+
+    let x = &Operand::new("x", Mem).with_doc("Value to be stored");
+    let a = &Operand::new("a", Mem).with_doc("Value loaded");
+    let Offset =
+        &Operand::new("Offset", &imm.offset32).with_doc("In-bounds offset into stack slot");
+
+    ig.push(
+        Inst::new(
+            "stack_load",
+            r#"
+        Load a value from a stack slot at the constant offset.
+
+        This is a polymorphic instruction that can load any value type which
+        has a memory representation.
+
+        The offset is an immediate constant, not an SSA value. The memory
+        access cannot go out of bounds, i.e.
+        `sizeof(a) + Offset <= sizeof(SS)`.
+        "#,
+            &formats.stack_load,
+        )
+        .operands_in(vec![SS, Offset])
+        .operands_out(vec![a])
+        .can_load(true),
+    );
+
+    ig.push(
+        Inst::new(
+            "stack_store",
+            r#"
+        Store a value to a stack slot at a constant offset.
+
+        This is a polymorphic instruction that can store any value type with a
+        memory representation.
+
+        The offset is an immediate constant, not an SSA value. The memory
+        access cannot go out of bounds, i.e.
+        `sizeof(a) + Offset <= sizeof(SS)`.
+        "#,
+            &formats.stack_store,
+        )
+        .operands_in(vec![x, SS, Offset])
+        .can_store(true),
+    );
+
+    ig.push(
+        Inst::new(
+            "stack_addr",
+            r#"
+        Get the address of a stack slot.
+
+        Compute the absolute address of a byte in a stack slot. The offset must
+        refer to a byte inside the stack slot:
+        `0 <= Offset < sizeof(SS)`.
+        "#,
+            &formats.stack_load,
+        )
+        .operands_in(vec![SS, Offset])
+        .operands_out(vec![addr]),
+    );
+
+    let GV = &Operand::new("GV", &entities.global_value);
+
+    ig.push(
+        Inst::new(
+            "global_value",
+            r#"
+        Compute the value of global GV.
+        "#,
+            &formats.unary_global_value,
+        )
+        .operands_in(vec![GV])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "symbol_value",
+            r#"
+        Compute the value of global GV, which is a symbolic value.
+        "#,
+            &formats.unary_global_value,
+        )
+        .operands_in(vec![GV])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "tls_value",
+            r#"
+        Compute the value of global GV, which is a TLS (thread local storage) value.
+        "#,
+            &formats.unary_global_value,
+        )
+        .operands_in(vec![GV])
+        .operands_out(vec![a]),
+    );
+
+    let HeapOffset = &TypeVar::new(
+        "HeapOffset",
+        "An unsigned heap offset",
+        TypeSetBuilder::new().ints(32..64).build(),
+    );
+
+    let H = &Operand::new("H", &entities.heap);
+    let p = &Operand::new("p", HeapOffset);
+    let Size = &Operand::new("Size", &imm.uimm32).with_doc("Size in bytes");
+
+    ig.push(
+        Inst::new(
+            "heap_addr",
+            r#"
+        Bounds check and compute absolute address of heap memory.
+
+        Verify that the offset range ``p .. p + Size - 1`` is in bounds for the
+        heap H, and generate an absolute address that is safe to dereference.
+
+        1. If ``p + Size`` is not greater than the heap bound, return an
+           absolute address corresponding to a byte offset of ``p`` from the
+           heap's base address.
+        2. If ``p + Size`` is greater than the heap bound, generate a trap.
+        "#,
+            &formats.heap_addr,
+        )
+        .operands_in(vec![H, p, Size])
+        .operands_out(vec![addr]),
+    );
+
+    // Note this instruction is marked as having other side-effects, so GVN won't try to hoist it,
+    // which would result in it being subject to spilling. While not hoisting would generally hurt
+    // performance, since a computed value used many times may need to be regenerated before each
+    // use, it is not the case here: this instruction doesn't generate any code.  That's because,
+    // by definition the pinned register is never used by the register allocator, but is written to
+    // and read explicitly and exclusively by set_pinned_reg and get_pinned_reg.
+    ig.push(
+        Inst::new(
+            "get_pinned_reg",
+            r#"
+            Gets the content of the pinned register, when it's enabled.
+        "#,
+            &formats.nullary,
+        )
+        .operands_out(vec![addr])
+        .other_side_effects(true),
+    );
+
+    ig.push(
+        Inst::new(
+            "set_pinned_reg",
+            r#"
+        Sets the content of the pinned register, when it's enabled.
+        "#,
+            &formats.unary,
+        )
+        .operands_in(vec![addr])
+        .other_side_effects(true),
+    );
+
+    let TableOffset = &TypeVar::new(
+        "TableOffset",
+        "An unsigned table offset",
+        TypeSetBuilder::new().ints(32..64).build(),
+    );
+    let T = &Operand::new("T", &entities.table);
+    let p = &Operand::new("p", TableOffset);
+    let Offset =
+        &Operand::new("Offset", &imm.offset32).with_doc("Byte offset from element address");
+
+    ig.push(
+        Inst::new(
+            "table_addr",
+            r#"
+        Bounds check and compute absolute address of a table entry.
+
+        Verify that the offset ``p`` is in bounds for the table T, and generate
+        an absolute address that is safe to dereference.
+
+        ``Offset`` must be less than the size of a table element.
+
+        1. If ``p`` is not greater than the table bound, return an absolute
+           address corresponding to a byte offset of ``p`` from the table's
+           base address.
+        2. If ``p`` is greater than the table bound, generate a trap.
+        "#,
+            &formats.table_addr,
+        )
+        .operands_in(vec![T, p, Offset])
+        .operands_out(vec![addr]),
+    );
+
+    let N = &Operand::new("N", &imm.imm64);
+    let a = &Operand::new("a", Int).with_doc("A constant integer scalar or vector value");
+
+    ig.push(
+        Inst::new(
+            "iconst",
+            r#"
+        Integer constant.
+
+        Create a scalar integer SSA value with an immediate constant value, or
+        an integer vector where all the lanes have the same value.
+        "#,
+            &formats.unary_imm,
+        )
+        .operands_in(vec![N])
+        .operands_out(vec![a]),
+    );
+
+    let N = &Operand::new("N", &imm.ieee32);
+    let a = &Operand::new("a", f32_).with_doc("A constant f32 scalar value");
+
+    ig.push(
+        Inst::new(
+            "f32const",
+            r#"
+        Floating point constant.
+
+        Create a `f32` SSA value with an immediate constant value.
+        "#,
+            &formats.unary_ieee32,
+        )
+        .operands_in(vec![N])
+        .operands_out(vec![a]),
+    );
+
+    let N = &Operand::new("N", &imm.ieee64);
+    let a = &Operand::new("a", f64_).with_doc("A constant f64 scalar value");
+
+    ig.push(
+        Inst::new(
+            "f64const",
+            r#"
+        Floating point constant.
+
+        Create a `f64` SSA value with an immediate constant value.
+        "#,
+            &formats.unary_ieee64,
+        )
+        .operands_in(vec![N])
+        .operands_out(vec![a]),
+    );
+
+    let N = &Operand::new("N", &imm.boolean);
+    let a = &Operand::new("a", Bool).with_doc("A constant boolean scalar or vector value");
+
+    ig.push(
+        Inst::new(
+            "bconst",
+            r#"
+        Boolean constant.
+
+        Create a scalar boolean SSA value with an immediate constant value, or
+        a boolean vector where all the lanes have the same value.
+        "#,
+            &formats.unary_bool,
+        )
+        .operands_in(vec![N])
+        .operands_out(vec![a]),
+    );
+
+    let N = &Operand::new("N", &imm.pool_constant)
+        .with_doc("The 16 immediate bytes of a 128-bit vector");
+    let a = &Operand::new("a", TxN).with_doc("A constant vector value");
+
+    ig.push(
+        Inst::new(
+            "vconst",
+            r#"
+        SIMD vector constant.
+
+        Construct a vector with the given immediate bytes.
+        "#,
+            &formats.unary_const,
+        )
+        .operands_in(vec![N])
+        .operands_out(vec![a]),
+    );
+
+    let constant =
+        &Operand::new("constant", &imm.pool_constant).with_doc("A constant in the constant pool");
+    let address = &Operand::new("address", iAddr);
+    ig.push(
+        Inst::new(
+            "const_addr",
+            r#"
+        Calculate the base address of a value in the constant pool.
+        "#,
+            &formats.unary_const,
+        )
+        .operands_in(vec![constant])
+        .operands_out(vec![address]),
+    );
+
+    let mask = &Operand::new("mask", &imm.uimm128)
+        .with_doc("The 16 immediate bytes used for selecting the elements to shuffle");
+    let Tx16 = &TypeVar::new(
+        "Tx16",
+        "A SIMD vector with exactly 16 lanes of 8-bit values; eventually this may support other \
+         lane counts and widths",
+        TypeSetBuilder::new()
+            .ints(8..8)
+            .bools(8..8)
+            .simd_lanes(16..16)
+            .includes_scalars(false)
+            .build(),
+    );
+    let a = &Operand::new("a", Tx16).with_doc("A vector value");
+    let b = &Operand::new("b", Tx16).with_doc("A vector value");
+
+    ig.push(
+        Inst::new(
+            "shuffle",
+            r#"
+        SIMD vector shuffle.
+
+        Shuffle two vectors using the given immediate bytes. For each of the 16 bytes of the
+        immediate, a value i of 0-15 selects the i-th element of the first vector and a value i of
+        16-31 selects the (i-16)th element of the second vector. Immediate values outside of the
+        0-31 range place a 0 in the resulting vector lane.
+        "#,
+            &formats.shuffle,
+        )
+        .operands_in(vec![a, b, mask])
+        .operands_out(vec![a]),
+    );
+
+    let a = &Operand::new("a", Ref).with_doc("A constant reference null value");
+
+    ig.push(
+        Inst::new(
+            "null",
+            r#"
+        Null constant value for reference types.
+
+        Create a scalar reference SSA value with a constant null value.
+        "#,
+            &formats.nullary,
+        )
+        .operands_out(vec![a]),
+    );
+
+    ig.push(Inst::new(
+        "nop",
+        r#"
+        Just a dummy instruction.
+
+        Note: this doesn't compile to a machine code nop.
+        "#,
+        &formats.nullary,
+    ));
+
+    let c = &Operand::new("c", Testable).with_doc("Controlling value to test");
+    let x = &Operand::new("x", Any).with_doc("Value to use when `c` is true");
+    let y = &Operand::new("y", Any).with_doc("Value to use when `c` is false");
+    let a = &Operand::new("a", Any);
+
+    ig.push(
+        Inst::new(
+            "select",
+            r#"
+        Conditional select.
+
+        This instruction selects whole values. Use `vselect` for
+        lane-wise selection.
+        "#,
+            &formats.ternary,
+        )
+        .operands_in(vec![c, x, y])
+        .operands_out(vec![a]),
+    );
+
+    let cc = &Operand::new("cc", &imm.intcc).with_doc("Controlling condition code");
+    let flags = &Operand::new("flags", iflags).with_doc("The machine's flag register");
+
+    ig.push(
+        Inst::new(
+            "selectif",
+            r#"
+        Conditional select, dependent on integer condition codes.
+        "#,
+            &formats.int_select,
+        )
+        .operands_in(vec![cc, flags, x, y])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "selectif_spectre_guard",
+            r#"
+            Conditional select intended for Spectre guards.
+
+            This operation is semantically equivalent to a selectif instruction.
+            However, it is guaranteed to not be removed or otherwise altered by any
+            optimization pass, and is guaranteed to result in a conditional-move
+            instruction, not a branch-based lowering.  As such, it is suitable
+            for use when producing Spectre guards. For example, a bounds-check
+            may guard against unsafe speculation past a bounds-check conditional
+            branch by passing the address or index to be accessed through a
+            conditional move, also gated on the same condition. Because no
+            Spectre-vulnerable processors are known to perform speculation on
+            conditional move instructions, this is guaranteed to pick the
+            correct input. If the selected input in case of overflow is a "safe"
+            value, for example a null pointer that causes an exception in the
+            speculative path, this ensures that no Spectre vulnerability will
+            exist.
+            "#,
+            &formats.int_select,
+        )
+        .operands_in(vec![cc, flags, x, y])
+        .operands_out(vec![a])
+        .other_side_effects(true),
+    );
+
+    let c = &Operand::new("c", Any).with_doc("Controlling value to test");
+    ig.push(
+        Inst::new(
+            "bitselect",
+            r#"
+        Conditional select of bits.
+
+        For each bit in `c`, this instruction selects the corresponding bit from `x` if the bit
+        in `c` is 1 and the corresponding bit from `y` if the bit in `c` is 0. See also:
+        `select`, `vselect`.
+        "#,
+            &formats.ternary,
+        )
+        .operands_in(vec![c, x, y])
+        .operands_out(vec![a]),
+    );
+
+    let x = &Operand::new("x", Any);
+
+    ig.push(
+        Inst::new(
+            "copy",
+            r#"
+        Register-register copy.
+
+        This instruction copies its input, preserving the value type.
+
+        A pure SSA-form program does not need to copy values, but this
+        instruction is useful for representing intermediate stages during
+        instruction transformations, and the register allocator needs a way of
+        representing register copies.
+        "#,
+            &formats.unary,
+        )
+        .operands_in(vec![x])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "spill",
+            r#"
+        Spill a register value to a stack slot.
+
+        This instruction behaves exactly like `copy`, but the result
+        value is assigned to a spill slot.
+        "#,
+            &formats.unary,
+        )
+        .operands_in(vec![x])
+        .operands_out(vec![a])
+        .can_store(true),
+    );
+
+    ig.push(
+        Inst::new(
+            "fill",
+            r#"
+        Load a register value from a stack slot.
+
+        This instruction behaves exactly like `copy`, but creates a new
+        SSA value for the spilled input value.
+        "#,
+            &formats.unary,
+        )
+        .operands_in(vec![x])
+        .operands_out(vec![a])
+        .can_load(true),
+    );
+
+    ig.push(
+        Inst::new(
+            "fill_nop",
+            r#"
+        This is identical to `fill`, except it has no encoding, since it is a no-op.
+
+        This instruction is created only during late-stage redundant-reload removal, after all
+        registers and stack slots have been assigned.  It is used to replace `fill`s that have
+        been identified as redundant.
+        "#,
+            &formats.unary,
+        )
+        .operands_in(vec![x])
+        .operands_out(vec![a])
+        .can_load(true),
+    );
+
+    let Sarg = &TypeVar::new(
+        "Sarg",
+        "Any scalar or vector type with at most 128 lanes",
+        TypeSetBuilder::new()
+            .specials(vec![crate::cdsl::types::SpecialType::StructArgument])
+            .build(),
+    );
+    let sarg_t = &Operand::new("sarg_t", Sarg);
+
+    // FIXME remove once the old style codegen backends are removed.
+    ig.push(
+        Inst::new(
+            "dummy_sarg_t",
+            r#"
+        This creates a sarg_t
+
+        This instruction is internal and should not be created by
+        Cranelift users.
+        "#,
+            &formats.nullary,
+        )
+        .operands_in(vec![])
+        .operands_out(vec![sarg_t]),
+    );
+
+    let src = &Operand::new("src", &imm.regunit);
+    let dst = &Operand::new("dst", &imm.regunit);
+
+    ig.push(
+        Inst::new(
+            "regmove",
+            r#"
+        Temporarily divert ``x`` from ``src`` to ``dst``.
+
+        This instruction moves the location of a value from one register to
+        another without creating a new SSA value. It is used by the register
+        allocator to temporarily rearrange register assignments in order to
+        satisfy instruction constraints.
+
+        The register diversions created by this instruction must be undone
+        before the value leaves the block. At the entry to a new block, all live
+        values must be in their originally assigned registers.
+        "#,
+            &formats.reg_move,
+        )
+        .operands_in(vec![x, src, dst])
+        .other_side_effects(true),
+    );
+
+    ig.push(
+        Inst::new(
+            "copy_special",
+            r#"
+        Copies the contents of ''src'' register to ''dst'' register.
+
+        This instructions copies the contents of one register to another
+        register without involving any SSA values. This is used for copying
+        special registers, e.g. copying the stack register to the frame
+        register in a function prologue.
+        "#,
+            &formats.copy_special,
+        )
+        .operands_in(vec![src, dst])
+        .other_side_effects(true),
+    );
+
+    ig.push(
+        Inst::new(
+            "copy_to_ssa",
+            r#"
+        Copies the contents of ''src'' register to ''a'' SSA name.
+
+        This instruction copies the contents of one register, regardless of its SSA name, to
+        another register, creating a new SSA name.  In that sense it is a one-sided version
+        of ''copy_special''.  This instruction is internal and should not be created by
+        Cranelift users.
+        "#,
+            &formats.copy_to_ssa,
+        )
+        .operands_in(vec![src])
+        .operands_out(vec![a])
+        .other_side_effects(true),
+    );
+
+    ig.push(
+        Inst::new(
+            "copy_nop",
+            r#"
+        Stack-slot-to-the-same-stack-slot copy, which is guaranteed to turn
+        into a no-op.  This instruction is for use only within Cranelift itself.
+
+        This instruction copies its input, preserving the value type.
+        "#,
+            &formats.unary,
+        )
+        .operands_in(vec![x])
+        .operands_out(vec![a]),
+    );
+
+    let delta = &Operand::new("delta", Int);
+
+    ig.push(
+        Inst::new(
+            "adjust_sp_down",
+            r#"
+    Subtracts ``delta`` offset value from the stack pointer register.
+
+    This instruction is used to adjust the stack pointer by a dynamic amount.
+    "#,
+            &formats.unary,
+        )
+        .operands_in(vec![delta])
+        .other_side_effects(true),
+    );
+
+    let Offset = &Operand::new("Offset", &imm.imm64).with_doc("Offset from current stack pointer");
+
+    ig.push(
+        Inst::new(
+            "adjust_sp_up_imm",
+            r#"
+    Adds ``Offset`` immediate offset value to the stack pointer register.
+
+    This instruction is used to adjust the stack pointer, primarily in function
+    prologues and epilogues. ``Offset`` is constrained to the size of a signed
+    32-bit integer.
+    "#,
+            &formats.unary_imm,
+        )
+        .operands_in(vec![Offset])
+        .other_side_effects(true),
+    );
+
+    let Offset = &Operand::new("Offset", &imm.imm64).with_doc("Offset from current stack pointer");
+
+    ig.push(
+        Inst::new(
+            "adjust_sp_down_imm",
+            r#"
+    Subtracts ``Offset`` immediate offset value from the stack pointer
+    register.
+
+    This instruction is used to adjust the stack pointer, primarily in function
+    prologues and epilogues. ``Offset`` is constrained to the size of a signed
+    32-bit integer.
+    "#,
+            &formats.unary_imm,
+        )
+        .operands_in(vec![Offset])
+        .other_side_effects(true),
+    );
+
+    let f = &Operand::new("f", iflags);
+
+    ig.push(
+        Inst::new(
+            "ifcmp_sp",
+            r#"
+    Compare ``addr`` with the stack pointer and set the CPU flags.
+
+    This is like `ifcmp` where ``addr`` is the LHS operand and the stack
+    pointer is the RHS.
+    "#,
+            &formats.unary,
+        )
+        .operands_in(vec![addr])
+        .operands_out(vec![f]),
+    );
+
+    ig.push(
+        Inst::new(
+            "regspill",
+            r#"
+        Temporarily divert ``x`` from ``src`` to ``SS``.
+
+        This instruction moves the location of a value from a register to a
+        stack slot without creating a new SSA value. It is used by the register
+        allocator to temporarily rearrange register assignments in order to
+        satisfy instruction constraints.
+
+        See also `regmove`.
+        "#,
+            &formats.reg_spill,
+        )
+        .operands_in(vec![x, src, SS])
+        .other_side_effects(true),
+    );
+
+    ig.push(
+        Inst::new(
+            "regfill",
+            r#"
+        Temporarily divert ``x`` from ``SS`` to ``dst``.
+
+        This instruction moves the location of a value from a stack slot to a
+        register without creating a new SSA value. It is used by the register
+        allocator to temporarily rearrange register assignments in order to
+        satisfy instruction constraints.
+
+        See also `regmove`.
+        "#,
+            &formats.reg_fill,
+        )
+        .operands_in(vec![x, SS, dst])
+        .other_side_effects(true),
+    );
+
+    let N =
+        &Operand::new("args", &entities.varargs).with_doc("Variable number of args for StackMap");
+
+    ig.push(
+        Inst::new(
+            "safepoint",
+            r#"
+        This instruction will provide live reference values at a point in
+        the function. It can only be used by the compiler.
+        "#,
+            &formats.multiary,
+        )
+        .operands_in(vec![N])
+        .other_side_effects(true),
+    );
+
+    let x = &Operand::new("x", TxN).with_doc("Vector to split");
+    let lo = &Operand::new("lo", &TxN.half_vector()).with_doc("Low-numbered lanes of `x`");
+    let hi = &Operand::new("hi", &TxN.half_vector()).with_doc("High-numbered lanes of `x`");
+
+    ig.push(
+        Inst::new(
+            "vsplit",
+            r#"
+        Split a vector into two halves.
+
+        Split the vector `x` into two separate values, each containing half of
+        the lanes from ``x``. The result may be two scalars if ``x`` only had
+        two lanes.
+        "#,
+            &formats.unary,
+        )
+        .operands_in(vec![x])
+        .operands_out(vec![lo, hi])
+        .is_ghost(true),
+    );
+
+    let Any128 = &TypeVar::new(
+        "Any128",
+        "Any scalar or vector type with as most 128 lanes",
+        TypeSetBuilder::new()
+            .ints(Interval::All)
+            .floats(Interval::All)
+            .bools(Interval::All)
+            .simd_lanes(1..128)
+            .includes_scalars(true)
+            .build(),
+    );
+
+    let x = &Operand::new("x", Any128).with_doc("Low-numbered lanes");
+    let y = &Operand::new("y", Any128).with_doc("High-numbered lanes");
+    let a = &Operand::new("a", &Any128.double_vector()).with_doc("Concatenation of `x` and `y`");
+
+    ig.push(
+        Inst::new(
+            "vconcat",
+            r#"
+        Vector concatenation.
+
+        Return a vector formed by concatenating ``x`` and ``y``. The resulting
+        vector type has twice as many lanes as each of the inputs. The lanes of
+        ``x`` appear as the low-numbered lanes, and the lanes of ``y`` become
+        the high-numbered lanes of ``a``.
+
+        It is possible to form a vector by concatenating two scalars.
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a])
+        .is_ghost(true),
+    );
+
+    let c = &Operand::new("c", &TxN.as_bool()).with_doc("Controlling vector");
+    let x = &Operand::new("x", TxN).with_doc("Value to use where `c` is true");
+    let y = &Operand::new("y", TxN).with_doc("Value to use where `c` is false");
+    let a = &Operand::new("a", TxN);
+
+    ig.push(
+        Inst::new(
+            "vselect",
+            r#"
+        Vector lane select.
+
+        Select lanes from ``x`` or ``y`` controlled by the lanes of the boolean
+        vector ``c``.
+        "#,
+            &formats.ternary,
+        )
+        .operands_in(vec![c, x, y])
+        .operands_out(vec![a]),
+    );
+
+    let s = &Operand::new("s", b1);
+
+    ig.push(
+        Inst::new(
+            "vany_true",
+            r#"
+        Reduce a vector to a scalar boolean.
+
+        Return a scalar boolean true if any lane in ``a`` is non-zero, false otherwise.
+        "#,
+            &formats.unary,
+        )
+        .operands_in(vec![a])
+        .operands_out(vec![s]),
+    );
+
+    ig.push(
+        Inst::new(
+            "vall_true",
+            r#"
+        Reduce a vector to a scalar boolean.
+
+        Return a scalar boolean true if all lanes in ``i`` are non-zero, false otherwise.
+        "#,
+            &formats.unary,
+        )
+        .operands_in(vec![a])
+        .operands_out(vec![s]),
+    );
+
+    let a = &Operand::new("a", TxN);
+    let x = &Operand::new("x", Int);
+
+    ig.push(
+        Inst::new(
+            "vhigh_bits",
+            r#"
+        Reduce a vector to a scalar integer.
+
+        Return a scalar integer, consisting of the concatenation of the most significant bit
+        of each lane of ``a``.
+        "#,
+            &formats.unary,
+        )
+        .operands_in(vec![a])
+        .operands_out(vec![x]),
+    );
+
+    let a = &Operand::new("a", &Int.as_bool());
+    let Cond = &Operand::new("Cond", &imm.intcc);
+    let x = &Operand::new("x", Int);
+    let y = &Operand::new("y", Int);
+
+    ig.push(
+        Inst::new(
+            "icmp",
+            r#"
+        Integer comparison.
+
+        The condition code determines if the operands are interpreted as signed
+        or unsigned integers.
+
+        | Signed | Unsigned | Condition             |
+        |--------|----------|-----------------------|
+        | eq     | eq       | Equal                 |
+        | ne     | ne       | Not equal             |
+        | slt    | ult      | Less than             |
+        | sge    | uge      | Greater than or equal |
+        | sgt    | ugt      | Greater than          |
+        | sle    | ule      | Less than or equal    |
+        | of     | *        | Overflow              |
+        | nof    | *        | No Overflow           |
+
+        \* The unsigned version of overflow conditions have ISA-specific
+        semantics and thus have been kept as methods on the TargetIsa trait as
+        [unsigned_add_overflow_condition][isa::TargetIsa::unsigned_add_overflow_condition] and
+        [unsigned_sub_overflow_condition][isa::TargetIsa::unsigned_sub_overflow_condition].
+
+        When this instruction compares integer vectors, it returns a boolean
+        vector of lane-wise comparisons.
+        "#,
+            &formats.int_compare,
+        )
+        .operands_in(vec![Cond, x, y])
+        .operands_out(vec![a]),
+    );
+
+    let a = &Operand::new("a", b1);
+    let x = &Operand::new("x", iB);
+    let Y = &Operand::new("Y", &imm.imm64);
+
+    ig.push(
+        Inst::new(
+            "icmp_imm",
+            r#"
+        Compare scalar integer to a constant.
+
+        This is the same as the `icmp` instruction, except one operand is
+        an immediate constant.
+
+        This instruction can only compare scalars. Use `icmp` for
+        lane-wise vector comparisons.
+        "#,
+            &formats.int_compare_imm,
+        )
+        .operands_in(vec![Cond, x, Y])
+        .operands_out(vec![a]),
+    );
+
+    let f = &Operand::new("f", iflags);
+    let x = &Operand::new("x", iB);
+    let y = &Operand::new("y", iB);
+
+    ig.push(
+        Inst::new(
+            "ifcmp",
+            r#"
+        Compare scalar integers and return flags.
+
+        Compare two scalar integer values and return integer CPU flags
+        representing the result.
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![f]),
+    );
+
+    ig.push(
+        Inst::new(
+            "ifcmp_imm",
+            r#"
+        Compare scalar integer to a constant and return flags.
+
+        Like `icmp_imm`, but returns integer CPU flags instead of testing
+        a specific condition code.
+        "#,
+            &formats.binary_imm64,
+        )
+        .operands_in(vec![x, Y])
+        .operands_out(vec![f]),
+    );
+
+    let a = &Operand::new("a", Int);
+    let x = &Operand::new("x", Int);
+    let y = &Operand::new("y", Int);
+
+    ig.push(
+        Inst::new(
+            "iadd",
+            r#"
+        Wrapping integer addition: `a := x + y \pmod{2^B}`.
+
+        This instruction does not depend on the signed/unsigned interpretation
+        of the operands.
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "uadd_sat",
+            r#"
+        Add with unsigned saturation.
+
+        This is similar to `iadd` but the operands are interpreted as unsigned integers and their
+        summed result, instead of wrapping, will be saturated to the highest unsigned integer for
+        the controlling type (e.g. `0xFF` for i8).
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "sadd_sat",
+            r#"
+        Add with signed saturation.
+
+        This is similar to `iadd` but the operands are interpreted as signed integers and their
+        summed result, instead of wrapping, will be saturated to the lowest or highest
+        signed integer for the controlling type (e.g. `0x80` or `0x7F` for i8). For example,
+        since an `sadd_sat.i8` of `0x70` and `0x70` is greater than `0x7F`, the result will be
+        clamped to `0x7F`.
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "isub",
+            r#"
+        Wrapping integer subtraction: `a := x - y \pmod{2^B}`.
+
+        This instruction does not depend on the signed/unsigned interpretation
+        of the operands.
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "usub_sat",
+            r#"
+        Subtract with unsigned saturation.
+
+        This is similar to `isub` but the operands are interpreted as unsigned integers and their
+        difference, instead of wrapping, will be saturated to the lowest unsigned integer for
+        the controlling type (e.g. `0x00` for i8).
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "ssub_sat",
+            r#"
+        Subtract with signed saturation.
+
+        This is similar to `isub` but the operands are interpreted as signed integers and their
+        difference, instead of wrapping, will be saturated to the lowest or highest
+        signed integer for the controlling type (e.g. `0x80` or `0x7F` for i8).
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "ineg",
+            r#"
+        Integer negation: `a := -x \pmod{2^B}`.
+        "#,
+            &formats.unary,
+        )
+        .operands_in(vec![x])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "iabs",
+            r#"
+        Integer absolute value with wrapping: `a := |x|`.
+        "#,
+            &formats.unary,
+        )
+        .operands_in(vec![x])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "imul",
+            r#"
+        Wrapping integer multiplication: `a := x y \pmod{2^B}`.
+
+        This instruction does not depend on the signed/unsigned interpretation
+        of the operands.
+
+        Polymorphic over all integer types (vector and scalar).
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "umulhi",
+            r#"
+        Unsigned integer multiplication, producing the high half of a
+        double-length result.
+
+        Polymorphic over all scalar integer types, but does not support vector
+        types.
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "smulhi",
+            r#"
+        Signed integer multiplication, producing the high half of a
+        double-length result.
+
+        Polymorphic over all scalar integer types, but does not support vector
+        types.
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "udiv",
+            r#"
+        Unsigned integer division: `a := \lfloor {x \over y} \rfloor`.
+
+        This operation traps if the divisor is zero.
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a])
+        .can_trap(true),
+    );
+
+    ig.push(
+        Inst::new(
+            "sdiv",
+            r#"
+        Signed integer division rounded toward zero: `a := sign(xy)
+        \lfloor {|x| \over |y|}\rfloor`.
+
+        This operation traps if the divisor is zero, or if the result is not
+        representable in `B` bits two's complement. This only happens
+        when `x = -2^{B-1}, y = -1`.
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a])
+        .can_trap(true),
+    );
+
+    ig.push(
+        Inst::new(
+            "urem",
+            r#"
+        Unsigned integer remainder.
+
+        This operation traps if the divisor is zero.
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a])
+        .can_trap(true),
+    );
+
+    ig.push(
+        Inst::new(
+            "srem",
+            r#"
+        Signed integer remainder. The result has the sign of the dividend.
+
+        This operation traps if the divisor is zero.
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a])
+        .can_trap(true),
+    );
+
+    let a = &Operand::new("a", iB);
+    let x = &Operand::new("x", iB);
+    let Y = &Operand::new("Y", &imm.imm64);
+
+    ig.push(
+        Inst::new(
+            "iadd_imm",
+            r#"
+        Add immediate integer.
+
+        Same as `iadd`, but one operand is an immediate constant.
+
+        Polymorphic over all scalar integer types, but does not support vector
+        types.
+        "#,
+            &formats.binary_imm64,
+        )
+        .operands_in(vec![x, Y])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "imul_imm",
+            r#"
+        Integer multiplication by immediate constant.
+
+        Polymorphic over all scalar integer types, but does not support vector
+        types.
+        "#,
+            &formats.binary_imm64,
+        )
+        .operands_in(vec![x, Y])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "udiv_imm",
+            r#"
+        Unsigned integer division by an immediate constant.
+
+        This operation traps if the divisor is zero.
+        "#,
+            &formats.binary_imm64,
+        )
+        .operands_in(vec![x, Y])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "sdiv_imm",
+            r#"
+        Signed integer division by an immediate constant.
+
+        This operation traps if the divisor is zero, or if the result is not
+        representable in `B` bits two's complement. This only happens
+        when `x = -2^{B-1}, Y = -1`.
+        "#,
+            &formats.binary_imm64,
+        )
+        .operands_in(vec![x, Y])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "urem_imm",
+            r#"
+        Unsigned integer remainder with immediate divisor.
+
+        This operation traps if the divisor is zero.
+        "#,
+            &formats.binary_imm64,
+        )
+        .operands_in(vec![x, Y])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "srem_imm",
+            r#"
+        Signed integer remainder with immediate divisor.
+
+        This operation traps if the divisor is zero.
+        "#,
+            &formats.binary_imm64,
+        )
+        .operands_in(vec![x, Y])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "irsub_imm",
+            r#"
+        Immediate reverse wrapping subtraction: `a := Y - x \pmod{2^B}`.
+
+        Also works as integer negation when `Y = 0`. Use `iadd_imm`
+        with a negative immediate operand for the reverse immediate
+        subtraction.
+
+        Polymorphic over all scalar integer types, but does not support vector
+        types.
+        "#,
+            &formats.binary_imm64,
+        )
+        .operands_in(vec![x, Y])
+        .operands_out(vec![a]),
+    );
+
+    let a = &Operand::new("a", iB);
+    let x = &Operand::new("x", iB);
+    let y = &Operand::new("y", iB);
+
+    let c_in = &Operand::new("c_in", b1).with_doc("Input carry flag");
+    let c_out = &Operand::new("c_out", b1).with_doc("Output carry flag");
+    let b_in = &Operand::new("b_in", b1).with_doc("Input borrow flag");
+    let b_out = &Operand::new("b_out", b1).with_doc("Output borrow flag");
+
+    let c_if_in = &Operand::new("c_in", iflags);
+    let c_if_out = &Operand::new("c_out", iflags);
+    let b_if_in = &Operand::new("b_in", iflags);
+    let b_if_out = &Operand::new("b_out", iflags);
+
+    ig.push(
+        Inst::new(
+            "iadd_cin",
+            r#"
+        Add integers with carry in.
+
+        Same as `iadd` with an additional carry input. Computes:
+
+        ```text
+            a = x + y + c_{in} \pmod 2^B
+        ```
+
+        Polymorphic over all scalar integer types, but does not support vector
+        types.
+        "#,
+            &formats.ternary,
+        )
+        .operands_in(vec![x, y, c_in])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "iadd_ifcin",
+            r#"
+        Add integers with carry in.
+
+        Same as `iadd` with an additional carry flag input. Computes:
+
+        ```text
+            a = x + y + c_{in} \pmod 2^B
+        ```
+
+        Polymorphic over all scalar integer types, but does not support vector
+        types.
+        "#,
+            &formats.ternary,
+        )
+        .operands_in(vec![x, y, c_if_in])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "iadd_cout",
+            r#"
+        Add integers with carry out.
+
+        Same as `iadd` with an additional carry output.
+
+        ```text
+            a &= x + y \pmod 2^B \\
+            c_{out} &= x+y >= 2^B
+        ```
+
+        Polymorphic over all scalar integer types, but does not support vector
+        types.
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a, c_out]),
+    );
+
+    ig.push(
+        Inst::new(
+            "iadd_ifcout",
+            r#"
+        Add integers with carry out.
+
+        Same as `iadd` with an additional carry flag output.
+
+        ```text
+            a &= x + y \pmod 2^B \\
+            c_{out} &= x+y >= 2^B
+        ```
+
+        Polymorphic over all scalar integer types, but does not support vector
+        types.
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a, c_if_out]),
+    );
+
+    ig.push(
+        Inst::new(
+            "iadd_carry",
+            r#"
+        Add integers with carry in and out.
+
+        Same as `iadd` with an additional carry input and output.
+
+        ```text
+            a &= x + y + c_{in} \pmod 2^B \\
+            c_{out} &= x + y + c_{in} >= 2^B
+        ```
+
+        Polymorphic over all scalar integer types, but does not support vector
+        types.
+        "#,
+            &formats.ternary,
+        )
+        .operands_in(vec![x, y, c_in])
+        .operands_out(vec![a, c_out]),
+    );
+
+    ig.push(
+        Inst::new(
+            "iadd_ifcarry",
+            r#"
+        Add integers with carry in and out.
+
+        Same as `iadd` with an additional carry flag input and output.
+
+        ```text
+            a &= x + y + c_{in} \pmod 2^B \\
+            c_{out} &= x + y + c_{in} >= 2^B
+        ```
+
+        Polymorphic over all scalar integer types, but does not support vector
+        types.
+        "#,
+            &formats.ternary,
+        )
+        .operands_in(vec![x, y, c_if_in])
+        .operands_out(vec![a, c_if_out]),
+    );
+
+    ig.push(
+        Inst::new(
+            "isub_bin",
+            r#"
+        Subtract integers with borrow in.
+
+        Same as `isub` with an additional borrow flag input. Computes:
+
+        ```text
+            a = x - (y + b_{in}) \pmod 2^B
+        ```
+
+        Polymorphic over all scalar integer types, but does not support vector
+        types.
+        "#,
+            &formats.ternary,
+        )
+        .operands_in(vec![x, y, b_in])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "isub_ifbin",
+            r#"
+        Subtract integers with borrow in.
+
+        Same as `isub` with an additional borrow flag input. Computes:
+
+        ```text
+            a = x - (y + b_{in}) \pmod 2^B
+        ```
+
+        Polymorphic over all scalar integer types, but does not support vector
+        types.
+        "#,
+            &formats.ternary,
+        )
+        .operands_in(vec![x, y, b_if_in])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "isub_bout",
+            r#"
+        Subtract integers with borrow out.
+
+        Same as `isub` with an additional borrow flag output.
+
+        ```text
+            a &= x - y \pmod 2^B \\
+            b_{out} &= x < y
+        ```
+
+        Polymorphic over all scalar integer types, but does not support vector
+        types.
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a, b_out]),
+    );
+
+    ig.push(
+        Inst::new(
+            "isub_ifbout",
+            r#"
+        Subtract integers with borrow out.
+
+        Same as `isub` with an additional borrow flag output.
+
+        ```text
+            a &= x - y \pmod 2^B \\
+            b_{out} &= x < y
+        ```
+
+        Polymorphic over all scalar integer types, but does not support vector
+        types.
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a, b_if_out]),
+    );
+
+    ig.push(
+        Inst::new(
+            "isub_borrow",
+            r#"
+        Subtract integers with borrow in and out.
+
+        Same as `isub` with an additional borrow flag input and output.
+
+        ```text
+            a &= x - (y + b_{in}) \pmod 2^B \\
+            b_{out} &= x < y + b_{in}
+        ```
+
+        Polymorphic over all scalar integer types, but does not support vector
+        types.
+        "#,
+            &formats.ternary,
+        )
+        .operands_in(vec![x, y, b_in])
+        .operands_out(vec![a, b_out]),
+    );
+
+    ig.push(
+        Inst::new(
+            "isub_ifborrow",
+            r#"
+        Subtract integers with borrow in and out.
+
+        Same as `isub` with an additional borrow flag input and output.
+
+        ```text
+            a &= x - (y + b_{in}) \pmod 2^B \\
+            b_{out} &= x < y + b_{in}
+        ```
+
+        Polymorphic over all scalar integer types, but does not support vector
+        types.
+        "#,
+            &formats.ternary,
+        )
+        .operands_in(vec![x, y, b_if_in])
+        .operands_out(vec![a, b_if_out]),
+    );
+
+    let bits = &TypeVar::new(
+        "bits",
+        "Any integer, float, or boolean scalar or vector type",
+        TypeSetBuilder::new()
+            .ints(Interval::All)
+            .floats(Interval::All)
+            .bools(Interval::All)
+            .simd_lanes(Interval::All)
+            .includes_scalars(true)
+            .build(),
+    );
+    let x = &Operand::new("x", bits);
+    let y = &Operand::new("y", bits);
+    let a = &Operand::new("a", bits);
+
+    ig.push(
+        Inst::new(
+            "band",
+            r#"
+        Bitwise and.
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "bor",
+            r#"
+        Bitwise or.
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "bxor",
+            r#"
+        Bitwise xor.
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "bnot",
+            r#"
+        Bitwise not.
+        "#,
+            &formats.unary,
+        )
+        .operands_in(vec![x])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "band_not",
+            r#"
+        Bitwise and not.
+
+        Computes `x & ~y`.
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "bor_not",
+            r#"
+        Bitwise or not.
+
+        Computes `x | ~y`.
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "bxor_not",
+            r#"
+        Bitwise xor not.
+
+        Computes `x ^ ~y`.
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a]),
+    );
+
+    let x = &Operand::new("x", iB);
+    let Y = &Operand::new("Y", &imm.imm64);
+    let a = &Operand::new("a", iB);
+
+    ig.push(
+        Inst::new(
+            "band_imm",
+            r#"
+        Bitwise and with immediate.
+
+        Same as `band`, but one operand is an immediate constant.
+
+        Polymorphic over all scalar integer types, but does not support vector
+        types.
+        "#,
+            &formats.binary_imm64,
+        )
+        .operands_in(vec![x, Y])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "bor_imm",
+            r#"
+        Bitwise or with immediate.
+
+        Same as `bor`, but one operand is an immediate constant.
+
+        Polymorphic over all scalar integer types, but does not support vector
+        types.
+        "#,
+            &formats.binary_imm64,
+        )
+        .operands_in(vec![x, Y])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "bxor_imm",
+            r#"
+        Bitwise xor with immediate.
+
+        Same as `bxor`, but one operand is an immediate constant.
+
+        Polymorphic over all scalar integer types, but does not support vector
+        types.
+        "#,
+            &formats.binary_imm64,
+        )
+        .operands_in(vec![x, Y])
+        .operands_out(vec![a]),
+    );
+
+    let x = &Operand::new("x", Int).with_doc("Scalar or vector value to shift");
+    let y = &Operand::new("y", iB).with_doc("Number of bits to shift");
+    let Y = &Operand::new("Y", &imm.imm64);
+    let a = &Operand::new("a", Int);
+
+    ig.push(
+        Inst::new(
+            "rotl",
+            r#"
+        Rotate left.
+
+        Rotate the bits in ``x`` by ``y`` places.
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "rotr",
+            r#"
+        Rotate right.
+
+        Rotate the bits in ``x`` by ``y`` places.
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "rotl_imm",
+            r#"
+        Rotate left by immediate.
+        "#,
+            &formats.binary_imm64,
+        )
+        .operands_in(vec![x, Y])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "rotr_imm",
+            r#"
+        Rotate right by immediate.
+        "#,
+            &formats.binary_imm64,
+        )
+        .operands_in(vec![x, Y])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "ishl",
+            r#"
+        Integer shift left. Shift the bits in ``x`` towards the MSB by ``y``
+        places. Shift in zero bits to the LSB.
+
+        The shift amount is masked to the size of ``x``.
+
+        When shifting a B-bits integer type, this instruction computes:
+
+        ```text
+            s &:= y \pmod B,
+            a &:= x \cdot 2^s \pmod{2^B}.
+        ```
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "ushr",
+            r#"
+        Unsigned shift right. Shift bits in ``x`` towards the LSB by ``y``
+        places, shifting in zero bits to the MSB. Also called a *logical
+        shift*.
+
+        The shift amount is masked to the size of the register.
+
+        When shifting a B-bits integer type, this instruction computes:
+
+        ```text
+            s &:= y \pmod B,
+            a &:= \lfloor x \cdot 2^{-s} \rfloor.
+        ```
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "sshr",
+            r#"
+        Signed shift right. Shift bits in ``x`` towards the LSB by ``y``
+        places, shifting in sign bits to the MSB. Also called an *arithmetic
+        shift*.
+
+        The shift amount is masked to the size of the register.
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "ishl_imm",
+            r#"
+        Integer shift left by immediate.
+
+        The shift amount is masked to the size of ``x``.
+        "#,
+            &formats.binary_imm64,
+        )
+        .operands_in(vec![x, Y])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "ushr_imm",
+            r#"
+        Unsigned shift right by immediate.
+
+        The shift amount is masked to the size of the register.
+        "#,
+            &formats.binary_imm64,
+        )
+        .operands_in(vec![x, Y])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "sshr_imm",
+            r#"
+        Signed shift right by immediate.
+
+        The shift amount is masked to the size of the register.
+        "#,
+            &formats.binary_imm64,
+        )
+        .operands_in(vec![x, Y])
+        .operands_out(vec![a]),
+    );
+
+    let x = &Operand::new("x", iB);
+    let a = &Operand::new("a", iB);
+
+    ig.push(
+        Inst::new(
+            "bitrev",
+            r#"
+        Reverse the bits of a integer.
+
+        Reverses the bits in ``x``.
+        "#,
+            &formats.unary,
+        )
+        .operands_in(vec![x])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "clz",
+            r#"
+        Count leading zero bits.
+
+        Starting from the MSB in ``x``, count the number of zero bits before
+        reaching the first one bit. When ``x`` is zero, returns the size of x
+        in bits.
+        "#,
+            &formats.unary,
+        )
+        .operands_in(vec![x])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "cls",
+            r#"
+        Count leading sign bits.
+
+        Starting from the MSB after the sign bit in ``x``, count the number of
+        consecutive bits identical to the sign bit. When ``x`` is 0 or -1,
+        returns one less than the size of x in bits.
+        "#,
+            &formats.unary,
+        )
+        .operands_in(vec![x])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "ctz",
+            r#"
+        Count trailing zeros.
+
+        Starting from the LSB in ``x``, count the number of zero bits before
+        reaching the first one bit. When ``x`` is zero, returns the size of x
+        in bits.
+        "#,
+            &formats.unary,
+        )
+        .operands_in(vec![x])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "popcnt",
+            r#"
+        Population count
+
+        Count the number of one bits in ``x``.
+        "#,
+            &formats.unary,
+        )
+        .operands_in(vec![x])
+        .operands_out(vec![a]),
+    );
+
+    let Float = &TypeVar::new(
+        "Float",
+        "A scalar or vector floating point number",
+        TypeSetBuilder::new()
+            .floats(Interval::All)
+            .simd_lanes(Interval::All)
+            .build(),
+    );
+    let Cond = &Operand::new("Cond", &imm.floatcc);
+    let x = &Operand::new("x", Float);
+    let y = &Operand::new("y", Float);
+    let a = &Operand::new("a", &Float.as_bool());
+
+    ig.push(
+        Inst::new(
+            "fcmp",
+            r#"
+        Floating point comparison.
+
+        Two IEEE 754-2008 floating point numbers, `x` and `y`, relate to each
+        other in exactly one of four ways:
+
+        == ==========================================
+        UN Unordered when one or both numbers is NaN.
+        EQ When `x = y`. (And `0.0 = -0.0`).
+        LT When `x < y`.
+        GT When `x > y`.
+        == ==========================================
+
+        The 14 `floatcc` condition codes each correspond to a subset of
+        the four relations, except for the empty set which would always be
+        false, and the full set which would always be true.
+
+        The condition codes are divided into 7 'ordered' conditions which don't
+        include UN, and 7 unordered conditions which all include UN.
+
+        +-------+------------+---------+------------+-------------------------+
+        |Ordered             |Unordered             |Condition                |
+        +=======+============+=========+============+=========================+
+        |ord    |EQ | LT | GT|uno      |UN          |NaNs absent / present.   |
+        +-------+------------+---------+------------+-------------------------+
+        |eq     |EQ          |ueq      |UN | EQ     |Equal                    |
+        +-------+------------+---------+------------+-------------------------+
+        |one    |LT | GT     |ne       |UN | LT | GT|Not equal                |
+        +-------+------------+---------+------------+-------------------------+
+        |lt     |LT          |ult      |UN | LT     |Less than                |
+        +-------+------------+---------+------------+-------------------------+
+        |le     |LT | EQ     |ule      |UN | LT | EQ|Less than or equal       |
+        +-------+------------+---------+------------+-------------------------+
+        |gt     |GT          |ugt      |UN | GT     |Greater than             |
+        +-------+------------+---------+------------+-------------------------+
+        |ge     |GT | EQ     |uge      |UN | GT | EQ|Greater than or equal    |
+        +-------+------------+---------+------------+-------------------------+
+
+        The standard C comparison operators, `<, <=, >, >=`, are all ordered,
+        so they are false if either operand is NaN. The C equality operator,
+        `==`, is ordered, and since inequality is defined as the logical
+        inverse it is *unordered*. They map to the `floatcc` condition
+        codes as follows:
+
+        ==== ====== ============
+        C    `Cond` Subset
+        ==== ====== ============
+        `==` eq     EQ
+        `!=` ne     UN | LT | GT
+        `<`  lt     LT
+        `<=` le     LT | EQ
+        `>`  gt     GT
+        `>=` ge     GT | EQ
+        ==== ====== ============
+
+        This subset of condition codes also corresponds to the WebAssembly
+        floating point comparisons of the same name.
+
+        When this instruction compares floating point vectors, it returns a
+        boolean vector with the results of lane-wise comparisons.
+        "#,
+            &formats.float_compare,
+        )
+        .operands_in(vec![Cond, x, y])
+        .operands_out(vec![a]),
+    );
+
+    let f = &Operand::new("f", fflags);
+
+    ig.push(
+        Inst::new(
+            "ffcmp",
+            r#"
+        Floating point comparison returning flags.
+
+        Compares two numbers like `fcmp`, but returns floating point CPU
+        flags instead of testing a specific condition.
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![f]),
+    );
+
+    let x = &Operand::new("x", Float);
+    let y = &Operand::new("y", Float);
+    let z = &Operand::new("z", Float);
+    let a = &Operand::new("a", Float).with_doc("Result of applying operator to each lane");
+
+    ig.push(
+        Inst::new(
+            "fadd",
+            r#"
+        Floating point addition.
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "fsub",
+            r#"
+        Floating point subtraction.
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "fmul",
+            r#"
+        Floating point multiplication.
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "fdiv",
+            r#"
+        Floating point division.
+
+        Unlike the integer division instructions ` and
+        `udiv`, this can't trap. Division by zero is infinity or
+        NaN, depending on the dividend.
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "sqrt",
+            r#"
+        Floating point square root.
+        "#,
+            &formats.unary,
+        )
+        .operands_in(vec![x])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "fma",
+            r#"
+        Floating point fused multiply-and-add.
+
+        Computes `a := xy+z` without any intermediate rounding of the
+        product.
+        "#,
+            &formats.ternary,
+        )
+        .operands_in(vec![x, y, z])
+        .operands_out(vec![a]),
+    );
+
+    let a = &Operand::new("a", Float).with_doc("``x`` with its sign bit inverted");
+
+    ig.push(
+        Inst::new(
+            "fneg",
+            r#"
+        Floating point negation.
+
+        Note that this is a pure bitwise operation.
+        "#,
+            &formats.unary,
+        )
+        .operands_in(vec![x])
+        .operands_out(vec![a]),
+    );
+
+    let a = &Operand::new("a", Float).with_doc("``x`` with its sign bit cleared");
+
+    ig.push(
+        Inst::new(
+            "fabs",
+            r#"
+        Floating point absolute value.
+
+        Note that this is a pure bitwise operation.
+        "#,
+            &formats.unary,
+        )
+        .operands_in(vec![x])
+        .operands_out(vec![a]),
+    );
+
+    let a = &Operand::new("a", Float).with_doc("``x`` with its sign bit changed to that of ``y``");
+
+    ig.push(
+        Inst::new(
+            "fcopysign",
+            r#"
+        Floating point copy sign.
+
+        Note that this is a pure bitwise operation. The sign bit from ``y`` is
+        copied to the sign bit of ``x``.
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a]),
+    );
+
+    let a = &Operand::new("a", Float).with_doc("The smaller of ``x`` and ``y``");
+
+    ig.push(
+        Inst::new(
+            "fmin",
+            r#"
+        Floating point minimum, propagating NaNs.
+
+        If either operand is NaN, this returns a NaN.
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "fmin_pseudo",
+            r#"
+        Floating point pseudo-minimum, propagating NaNs.  This behaves differently from ``fmin``.
+        See https://github.com/WebAssembly/simd/pull/122 for background.
+
+        The behaviour is defined as ``fmin_pseudo(a, b) = (b < a) ? b : a``, and the behaviour
+        for zero or NaN inputs follows from the behaviour of ``<`` with such inputs.
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a]),
+    );
+
+    let a = &Operand::new("a", Float).with_doc("The larger of ``x`` and ``y``");
+
+    ig.push(
+        Inst::new(
+            "fmax",
+            r#"
+        Floating point maximum, propagating NaNs.
+
+        If either operand is NaN, this returns a NaN.
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "fmax_pseudo",
+            r#"
+        Floating point pseudo-maximum, propagating NaNs.  This behaves differently from ``fmax``.
+        See https://github.com/WebAssembly/simd/pull/122 for background.
+
+        The behaviour is defined as ``fmax_pseudo(a, b) = (a < b) ? b : a``, and the behaviour
+        for zero or NaN inputs follows from the behaviour of ``<`` with such inputs.
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a]),
+    );
+
+    let a = &Operand::new("a", Float).with_doc("``x`` rounded to integral value");
+
+    ig.push(
+        Inst::new(
+            "ceil",
+            r#"
+        Round floating point round to integral, towards positive infinity.
+        "#,
+            &formats.unary,
+        )
+        .operands_in(vec![x])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "floor",
+            r#"
+        Round floating point round to integral, towards negative infinity.
+        "#,
+            &formats.unary,
+        )
+        .operands_in(vec![x])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "trunc",
+            r#"
+        Round floating point round to integral, towards zero.
+        "#,
+            &formats.unary,
+        )
+        .operands_in(vec![x])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "nearest",
+            r#"
+        Round floating point round to integral, towards nearest with ties to
+        even.
+        "#,
+            &formats.unary,
+        )
+        .operands_in(vec![x])
+        .operands_out(vec![a]),
+    );
+
+    let a = &Operand::new("a", b1);
+    let x = &Operand::new("x", Ref);
+
+    ig.push(
+        Inst::new(
+            "is_null",
+            r#"
+        Reference verification.
+
+        The condition code determines if the reference type in question is
+        null or not.
+        "#,
+            &formats.unary,
+        )
+        .operands_in(vec![x])
+        .operands_out(vec![a]),
+    );
+
+    let a = &Operand::new("a", b1);
+    let x = &Operand::new("x", Ref);
+
+    ig.push(
+        Inst::new(
+            "is_invalid",
+            r#"
+        Reference verification.
+
+        The condition code determines if the reference type in question is
+        invalid or not.
+        "#,
+            &formats.unary,
+        )
+        .operands_in(vec![x])
+        .operands_out(vec![a]),
+    );
+
+    let Cond = &Operand::new("Cond", &imm.intcc);
+    let f = &Operand::new("f", iflags);
+    let a = &Operand::new("a", b1);
+
+    ig.push(
+        Inst::new(
+            "trueif",
+            r#"
+        Test integer CPU flags for a specific condition.
+
+        Check the CPU flags in ``f`` against the ``Cond`` condition code and
+        return true when the condition code is satisfied.
+        "#,
+            &formats.int_cond,
+        )
+        .operands_in(vec![Cond, f])
+        .operands_out(vec![a]),
+    );
+
+    let Cond = &Operand::new("Cond", &imm.floatcc);
+    let f = &Operand::new("f", fflags);
+
+    ig.push(
+        Inst::new(
+            "trueff",
+            r#"
+        Test floating point CPU flags for a specific condition.
+
+        Check the CPU flags in ``f`` against the ``Cond`` condition code and
+        return true when the condition code is satisfied.
+        "#,
+            &formats.float_cond,
+        )
+        .operands_in(vec![Cond, f])
+        .operands_out(vec![a]),
+    );
+
+    let x = &Operand::new("x", Mem);
+    let a = &Operand::new("a", MemTo).with_doc("Bits of `x` reinterpreted");
+
+    ig.push(
+        Inst::new(
+            "bitcast",
+            r#"
+        Reinterpret the bits in `x` as a different type.
+
+        The input and output types must be storable to memory and of the same
+        size. A bitcast is equivalent to storing one type and loading the other
+        type from the same address.
+        "#,
+            &formats.unary,
+        )
+        .operands_in(vec![x])
+        .operands_out(vec![a]),
+    );
+
+    let x = &Operand::new("x", Any);
+    let a = &Operand::new("a", AnyTo).with_doc("Bits of `x` reinterpreted");
+
+    ig.push(
+        Inst::new(
+            "raw_bitcast",
+            r#"
+        Cast the bits in `x` as a different type of the same bit width.
+
+        This instruction does not change the data's representation but allows
+        data in registers to be used as different types, e.g. an i32x4 as a
+        b8x16. The only constraint on the result `a` is that it can be
+        `raw_bitcast` back to the original type. Also, in a raw_bitcast between
+        vector types with the same number of lanes, the value of each result
+        lane is a raw_bitcast of the corresponding operand lane. TODO there is
+        currently no mechanism for enforcing the bit width constraint.
+        "#,
+            &formats.unary,
+        )
+        .operands_in(vec![x])
+        .operands_out(vec![a]),
+    );
+
+    let a = &Operand::new("a", TxN).with_doc("A vector value");
+    let s = &Operand::new("s", &TxN.lane_of()).with_doc("A scalar value");
+
+    ig.push(
+        Inst::new(
+            "scalar_to_vector",
+            r#"
+            Copies a scalar value to a vector value.  The scalar is copied into the
+            least significant lane of the vector, and all other lanes will be zero.
+            "#,
+            &formats.unary,
+        )
+        .operands_in(vec![s])
+        .operands_out(vec![a]),
+    );
+
+    let Bool = &TypeVar::new(
+        "Bool",
+        "A scalar or vector boolean type",
+        TypeSetBuilder::new()
+            .bools(Interval::All)
+            .simd_lanes(Interval::All)
+            .build(),
+    );
+
+    let BoolTo = &TypeVar::new(
+        "BoolTo",
+        "A smaller boolean type with the same number of lanes",
+        TypeSetBuilder::new()
+            .bools(Interval::All)
+            .simd_lanes(Interval::All)
+            .build(),
+    );
+
+    let x = &Operand::new("x", Bool);
+    let a = &Operand::new("a", BoolTo);
+
+    ig.push(
+        Inst::new(
+            "breduce",
+            r#"
+        Convert `x` to a smaller boolean type in the platform-defined way.
+
+        The result type must have the same number of vector lanes as the input,
+        and each lane must not have more bits that the input lanes. If the
+        input and output types are the same, this is a no-op.
+        "#,
+            &formats.unary,
+        )
+        .operands_in(vec![x])
+        .operands_out(vec![a])
+        .constraints(vec![WiderOrEq(Bool.clone(), BoolTo.clone())]),
+    );
+
+    let BoolTo = &TypeVar::new(
+        "BoolTo",
+        "A larger boolean type with the same number of lanes",
+        TypeSetBuilder::new()
+            .bools(Interval::All)
+            .simd_lanes(Interval::All)
+            .build(),
+    );
+    let x = &Operand::new("x", Bool);
+    let a = &Operand::new("a", BoolTo);
+
+    ig.push(
+        Inst::new(
+            "bextend",
+            r#"
+        Convert `x` to a larger boolean type in the platform-defined way.
+
+        The result type must have the same number of vector lanes as the input,
+        and each lane must not have fewer bits that the input lanes. If the
+        input and output types are the same, this is a no-op.
+        "#,
+            &formats.unary,
+        )
+        .operands_in(vec![x])
+        .operands_out(vec![a])
+        .constraints(vec![WiderOrEq(BoolTo.clone(), Bool.clone())]),
+    );
+
+    let IntTo = &TypeVar::new(
+        "IntTo",
+        "An integer type with the same number of lanes",
+        TypeSetBuilder::new()
+            .ints(Interval::All)
+            .simd_lanes(Interval::All)
+            .build(),
+    );
+    let x = &Operand::new("x", Bool);
+    let a = &Operand::new("a", IntTo);
+
+    ig.push(
+        Inst::new(
+            "bint",
+            r#"
+        Convert `x` to an integer.
+
+        True maps to 1 and false maps to 0. The result type must have the same
+        number of vector lanes as the input.
+        "#,
+            &formats.unary,
+        )
+        .operands_in(vec![x])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "bmask",
+            r#"
+        Convert `x` to an integer mask.
+
+        True maps to all 1s and false maps to all 0s. The result type must have
+        the same number of vector lanes as the input.
+        "#,
+            &formats.unary,
+        )
+        .operands_in(vec![x])
+        .operands_out(vec![a]),
+    );
+
+    let Int = &TypeVar::new(
+        "Int",
+        "A scalar or vector integer type",
+        TypeSetBuilder::new()
+            .ints(Interval::All)
+            .simd_lanes(Interval::All)
+            .build(),
+    );
+
+    let IntTo = &TypeVar::new(
+        "IntTo",
+        "A smaller integer type with the same number of lanes",
+        TypeSetBuilder::new()
+            .ints(Interval::All)
+            .simd_lanes(Interval::All)
+            .build(),
+    );
+    let x = &Operand::new("x", Int);
+    let a = &Operand::new("a", IntTo);
+
+    ig.push(
+        Inst::new(
+            "ireduce",
+            r#"
+        Convert `x` to a smaller integer type by dropping high bits.
+
+        Each lane in `x` is converted to a smaller integer type by discarding
+        the most significant bits. This is the same as reducing modulo
+        `2^n`.
+
+        The result type must have the same number of vector lanes as the input,
+        and each lane must not have more bits that the input lanes. If the
+        input and output types are the same, this is a no-op.
+        "#,
+            &formats.unary,
+        )
+        .operands_in(vec![x])
+        .operands_out(vec![a])
+        .constraints(vec![WiderOrEq(Int.clone(), IntTo.clone())]),
+    );
+
+    let I16or32xN = &TypeVar::new(
+        "I16or32xN",
+        "A SIMD vector type containing integer lanes 16 or 32 bits wide",
+        TypeSetBuilder::new()
+            .ints(16..32)
+            .simd_lanes(4..8)
+            .includes_scalars(false)
+            .build(),
+    );
+
+    let x = &Operand::new("x", I16or32xN);
+    let y = &Operand::new("y", I16or32xN);
+    let a = &Operand::new("a", &I16or32xN.split_lanes());
+
+    ig.push(
+        Inst::new(
+            "snarrow",
+            r#"
+        Combine `x` and `y` into a vector with twice the lanes but half the integer width while
+        saturating overflowing values to the signed maximum and minimum.
+
+        The lanes will be concatenated after narrowing. For example, when `x` and `y` are `i32x4`
+        and `x = [x3, x2, x1, x0]` and `y = [y3, y2, y1, y0]`, then after narrowing the value
+        returned is an `i16x8`: `a = [y3', y2', y1', y0', x3', x2', x1', x0']`.
+            "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "unarrow",
+            r#"
+        Combine `x` and `y` into a vector with twice the lanes but half the integer width while
+        saturating overflowing values to the unsigned maximum and minimum.
+
+        Note that all input lanes are considered signed: any negative lanes will overflow and be
+        replaced with the unsigned minimum, `0x00`.
+
+        The lanes will be concatenated after narrowing. For example, when `x` and `y` are `i32x4`
+        and `x = [x3, x2, x1, x0]` and `y = [y3, y2, y1, y0]`, then after narrowing the value
+        returned is an `i16x8`: `a = [y3', y2', y1', y0', x3', x2', x1', x0']`.
+            "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a]),
+    );
+
+    let I8or16xN = &TypeVar::new(
+        "I8or16xN",
+        "A SIMD vector type containing integer lanes 8 or 16 bits wide.",
+        TypeSetBuilder::new()
+            .ints(8..16)
+            .simd_lanes(8..16)
+            .includes_scalars(false)
+            .build(),
+    );
+
+    let x = &Operand::new("x", I8or16xN);
+    let a = &Operand::new("a", &I8or16xN.merge_lanes());
+
+    ig.push(
+        Inst::new(
+            "swiden_low",
+            r#"
+        Widen the low lanes of `x` using signed extension.
+
+        This will double the lane width and halve the number of lanes.
+            "#,
+            &formats.unary,
+        )
+        .operands_in(vec![x])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "swiden_high",
+            r#"
+        Widen the high lanes of `x` using signed extension.
+
+        This will double the lane width and halve the number of lanes.
+            "#,
+            &formats.unary,
+        )
+        .operands_in(vec![x])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "uwiden_low",
+            r#"
+        Widen the low lanes of `x` using unsigned extension.
+
+        This will double the lane width and halve the number of lanes.
+            "#,
+            &formats.unary,
+        )
+        .operands_in(vec![x])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "uwiden_high",
+            r#"
+        Widen the high lanes of `x` using unsigned extension.
+
+        This will double the lane width and halve the number of lanes.
+            "#,
+            &formats.unary,
+        )
+        .operands_in(vec![x])
+        .operands_out(vec![a]),
+    );
+
+    let I16x8 = &TypeVar::new(
+        "I16x8",
+        "A SIMD vector type containing 8 integer lanes each 16 bits wide.",
+        TypeSetBuilder::new()
+            .ints(16..16)
+            .simd_lanes(8..8)
+            .includes_scalars(false)
+            .build(),
+    );
+
+    let x = &Operand::new("x", I16x8);
+    let y = &Operand::new("y", I16x8);
+    let a = &Operand::new("a", &I16x8.merge_lanes());
+
+    ig.push(
+        Inst::new(
+            "widening_pairwise_dot_product_s",
+            r#"
+        Takes corresponding elements in `x` and `y`, performs a sign-extending length-doubling
+        multiplication on them, then adds adjacent pairs of elements to form the result.  For
+        example, if the input vectors are `[x3, x2, x1, x0]` and `[y3, y2, y1, y0]`, it produces
+        the vector `[r1, r0]`, where `r1 = sx(x3) * sx(y3) + sx(x2) * sx(y2)` and
+        `r0 = sx(x1) * sx(y1) + sx(x0) * sx(y0)`, and `sx(n)` sign-extends `n` to twice its width.
+
+        This will double the lane width and halve the number of lanes.  So the resulting
+        vector has the same number of bits as `x` and `y` do (individually).
+
+        See https://github.com/WebAssembly/simd/pull/127 for background info.
+            "#,
+            &formats.binary,
+        )
+        .operands_in(vec![x, y])
+        .operands_out(vec![a]),
+    );
+
+    let IntTo = &TypeVar::new(
+        "IntTo",
+        "A larger integer type with the same number of lanes",
+        TypeSetBuilder::new()
+            .ints(Interval::All)
+            .simd_lanes(Interval::All)
+            .build(),
+    );
+    let x = &Operand::new("x", Int);
+    let a = &Operand::new("a", IntTo);
+
+    ig.push(
+        Inst::new(
+            "uextend",
+            r#"
+        Convert `x` to a larger integer type by zero-extending.
+
+        Each lane in `x` is converted to a larger integer type by adding
+        zeroes. The result has the same numerical value as `x` when both are
+        interpreted as unsigned integers.
+
+        The result type must have the same number of vector lanes as the input,
+        and each lane must not have fewer bits that the input lanes. If the
+        input and output types are the same, this is a no-op.
+        "#,
+            &formats.unary,
+        )
+        .operands_in(vec![x])
+        .operands_out(vec![a])
+        .constraints(vec![WiderOrEq(IntTo.clone(), Int.clone())]),
+    );
+
+    ig.push(
+        Inst::new(
+            "sextend",
+            r#"
+        Convert `x` to a larger integer type by sign-extending.
+
+        Each lane in `x` is converted to a larger integer type by replicating
+        the sign bit. The result has the same numerical value as `x` when both
+        are interpreted as signed integers.
+
+        The result type must have the same number of vector lanes as the input,
+        and each lane must not have fewer bits that the input lanes. If the
+        input and output types are the same, this is a no-op.
+        "#,
+            &formats.unary,
+        )
+        .operands_in(vec![x])
+        .operands_out(vec![a])
+        .constraints(vec![WiderOrEq(IntTo.clone(), Int.clone())]),
+    );
+
+    let FloatTo = &TypeVar::new(
+        "FloatTo",
+        "A scalar or vector floating point number",
+        TypeSetBuilder::new()
+            .floats(Interval::All)
+            .simd_lanes(Interval::All)
+            .build(),
+    );
+    let x = &Operand::new("x", Float);
+    let a = &Operand::new("a", FloatTo);
+
+    ig.push(
+        Inst::new(
+            "fpromote",
+            r#"
+        Convert `x` to a larger floating point format.
+
+        Each lane in `x` is converted to the destination floating point format.
+        This is an exact operation.
+
+        Cranelift currently only supports two floating point formats
+        - `f32` and `f64`. This may change in the future.
+
+        The result type must have the same number of vector lanes as the input,
+        and the result lanes must not have fewer bits than the input lanes. If
+        the input and output types are the same, this is a no-op.
+        "#,
+            &formats.unary,
+        )
+        .operands_in(vec![x])
+        .operands_out(vec![a])
+        .constraints(vec![WiderOrEq(FloatTo.clone(), Float.clone())]),
+    );
+
+    ig.push(
+        Inst::new(
+            "fdemote",
+            r#"
+        Convert `x` to a smaller floating point format.
+
+        Each lane in `x` is converted to the destination floating point format
+        by rounding to nearest, ties to even.
+
+        Cranelift currently only supports two floating point formats
+        - `f32` and `f64`. This may change in the future.
+
+        The result type must have the same number of vector lanes as the input,
+        and the result lanes must not have more bits than the input lanes. If
+        the input and output types are the same, this is a no-op.
+        "#,
+            &formats.unary,
+        )
+        .operands_in(vec![x])
+        .operands_out(vec![a])
+        .constraints(vec![WiderOrEq(Float.clone(), FloatTo.clone())]),
+    );
+
+    let x = &Operand::new("x", Float);
+    let a = &Operand::new("a", IntTo);
+
+    ig.push(
+        Inst::new(
+            "fcvt_to_uint",
+            r#"
+        Convert floating point to unsigned integer.
+
+        Each lane in `x` is converted to an unsigned integer by rounding
+        towards zero. If `x` is NaN or if the unsigned integral value cannot be
+        represented in the result type, this instruction traps.
+
+        The result type must have the same number of vector lanes as the input.
+        "#,
+            &formats.unary,
+        )
+        .operands_in(vec![x])
+        .operands_out(vec![a])
+        .can_trap(true),
+    );
+
+    ig.push(
+        Inst::new(
+            "fcvt_to_uint_sat",
+            r#"
+        Convert floating point to unsigned integer as fcvt_to_uint does, but
+        saturates the input instead of trapping. NaN and negative values are
+        converted to 0.
+        "#,
+            &formats.unary,
+        )
+        .operands_in(vec![x])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "fcvt_to_sint",
+            r#"
+        Convert floating point to signed integer.
+
+        Each lane in `x` is converted to a signed integer by rounding towards
+        zero. If `x` is NaN or if the signed integral value cannot be
+        represented in the result type, this instruction traps.
+
+        The result type must have the same number of vector lanes as the input.
+        "#,
+            &formats.unary,
+        )
+        .operands_in(vec![x])
+        .operands_out(vec![a])
+        .can_trap(true),
+    );
+
+    ig.push(
+        Inst::new(
+            "fcvt_to_sint_sat",
+            r#"
+        Convert floating point to signed integer as fcvt_to_sint does, but
+        saturates the input instead of trapping. NaN values are converted to 0.
+        "#,
+            &formats.unary,
+        )
+        .operands_in(vec![x])
+        .operands_out(vec![a]),
+    );
+
+    let x = &Operand::new("x", Int);
+    let a = &Operand::new("a", FloatTo);
+
+    ig.push(
+        Inst::new(
+            "fcvt_from_uint",
+            r#"
+        Convert unsigned integer to floating point.
+
+        Each lane in `x` is interpreted as an unsigned integer and converted to
+        floating point using round to nearest, ties to even.
+
+        The result type must have the same number of vector lanes as the input.
+        "#,
+            &formats.unary,
+        )
+        .operands_in(vec![x])
+        .operands_out(vec![a]),
+    );
+
+    ig.push(
+        Inst::new(
+            "fcvt_from_sint",
+            r#"
+        Convert signed integer to floating point.
+
+        Each lane in `x` is interpreted as a signed integer and converted to
+        floating point using round to nearest, ties to even.
+
+        The result type must have the same number of vector lanes as the input.
+        "#,
+            &formats.unary,
+        )
+        .operands_in(vec![x])
+        .operands_out(vec![a]),
+    );
+
+    let WideInt = &TypeVar::new(
+        "WideInt",
+        "An integer type with lanes from `i16` upwards",
+        TypeSetBuilder::new()
+            .ints(16..128)
+            .simd_lanes(Interval::All)
+            .build(),
+    );
+    let x = &Operand::new("x", WideInt);
+    let lo = &Operand::new("lo", &WideInt.half_width()).with_doc("The low bits of `x`");
+    let hi = &Operand::new("hi", &WideInt.half_width()).with_doc("The high bits of `x`");
+
+    ig.push(
+        Inst::new(
+            "isplit",
+            r#"
+        Split an integer into low and high parts.
+
+        Vectors of integers are split lane-wise, so the results have the same
+        number of lanes as the input, but the lanes are half the size.
+
+        Returns the low half of `x` and the high half of `x` as two independent
+        values.
+        "#,
+            &formats.unary,
+        )
+        .operands_in(vec![x])
+        .operands_out(vec![lo, hi])
+        .is_ghost(true),
+    );
+
+    let NarrowInt = &TypeVar::new(
+        "NarrowInt",
+        "An integer type with lanes type to `i64`",
+        TypeSetBuilder::new()
+            .ints(8..64)
+            .simd_lanes(Interval::All)
+            .build(),
+    );
+
+    let lo = &Operand::new("lo", NarrowInt);
+    let hi = &Operand::new("hi", NarrowInt);
+    let a = &Operand::new("a", &NarrowInt.double_width())
+        .with_doc("The concatenation of `lo` and `hi`");
+
+    ig.push(
+        Inst::new(
+            "iconcat",
+            r#"
+        Concatenate low and high bits to form a larger integer type.
+
+        Vectors of integers are concatenated lane-wise such that the result has
+        the same number of lanes as the inputs, but the lanes are twice the
+        size.
+        "#,
+            &formats.binary,
+        )
+        .operands_in(vec![lo, hi])
+        .operands_out(vec![a])
+        .is_ghost(true),
+    );
+
+    // Instructions relating to atomic memory accesses and fences
+    let AtomicMem = &TypeVar::new(
+        "AtomicMem",
+        "Any type that can be stored in memory, which can be used in an atomic operation",
+        TypeSetBuilder::new().ints(8..64).build(),
+    );
+    let x = &Operand::new("x", AtomicMem).with_doc("Value to be atomically stored");
+    let a = &Operand::new("a", AtomicMem).with_doc("Value atomically loaded");
+    let e = &Operand::new("e", AtomicMem).with_doc("Expected value in CAS");
+    let p = &Operand::new("p", iAddr);
+    let MemFlags = &Operand::new("MemFlags", &imm.memflags);
+    let AtomicRmwOp = &Operand::new("AtomicRmwOp", &imm.atomic_rmw_op);
+
+    ig.push(
+        Inst::new(
+            "atomic_rmw",
+            r#"
+        Atomically read-modify-write memory at `p`, with second operand `x`.  The old value is
+        returned.  `p` has the type of the target word size, and `x` may be an integer type of
+        8, 16, 32 or 64 bits, even on a 32-bit target.  The type of the returned value is the
+        same as the type of `x`.  This operation is sequentially consistent and creates
+        happens-before edges that order normal (non-atomic) loads and stores.
+        "#,
+            &formats.atomic_rmw,
+        )
+        .operands_in(vec![MemFlags, AtomicRmwOp, p, x])
+        .operands_out(vec![a])
+        .can_load(true)
+        .can_store(true)
+        .other_side_effects(true),
+    );
+
+    ig.push(
+        Inst::new(
+            "atomic_cas",
+            r#"
+        Perform an atomic compare-and-swap operation on memory at `p`, with expected value `e`,
+        storing `x` if the value at `p` equals `e`.  The old value at `p` is returned,
+        regardless of whether the operation succeeds or fails.  `p` has the type of the target
+        word size, and `x` and `e` must have the same type and the same size, which may be an
+        integer type of 8, 16, 32 or 64 bits, even on a 32-bit target.  The type of the returned
+        value is the same as the type of `x` and `e`.  This operation is sequentially
+        consistent and creates happens-before edges that order normal (non-atomic) loads and
+        stores.
+        "#,
+            &formats.atomic_cas,
+        )
+        .operands_in(vec![MemFlags, p, e, x])
+        .operands_out(vec![a])
+        .can_load(true)
+        .can_store(true)
+        .other_side_effects(true),
+    );
+
+    ig.push(
+        Inst::new(
+            "atomic_load",
+            r#"
+        Atomically load from memory at `p`.
+
+        This is a polymorphic instruction that can load any value type which has a memory
+        representation.  It should only be used for integer types with 8, 16, 32 or 64 bits.
+        This operation is sequentially consistent and creates happens-before edges that order
+        normal (non-atomic) loads and stores.
+        "#,
+            &formats.load_no_offset,
+        )
+        .operands_in(vec![MemFlags, p])
+        .operands_out(vec![a])
+        .can_load(true)
+        .other_side_effects(true),
+    );
+
+    ig.push(
+        Inst::new(
+            "atomic_store",
+            r#"
+        Atomically store `x` to memory at `p`.
+
+        This is a polymorphic instruction that can store any value type with a memory
+        representation.  It should only be used for integer types with 8, 16, 32 or 64 bits.
+        This operation is sequentially consistent and creates happens-before edges that order
+        normal (non-atomic) loads and stores.
+        "#,
+            &formats.store_no_offset,
+        )
+        .operands_in(vec![MemFlags, x, p])
+        .can_store(true)
+        .other_side_effects(true),
+    );
+
+    ig.push(
+        Inst::new(
+            "fence",
+            r#"
+        A memory fence.  This must provide ordering to ensure that, at a minimum, neither loads
+        nor stores of any kind may move forwards or backwards across the fence.  This operation
+        is sequentially consistent.
+        "#,
+            &formats.nullary,
+        )
+        .other_side_effects(true),
+    );
+
+    let Offset = &Operand::new("Offset", &imm.offset32).with_doc("Byte offset from base address");
+    let a = &Operand::new("a", TxN);
+
+    ig.push(
+        Inst::new(
+            "load_splat",
+            r#"
+        Load an element from memory at ``p + Offset`` and return a vector
+        whose lanes are all set to that element.
+
+        This is equivalent to ``load`` followed by ``splat``.
+        "#,
+            &formats.load,
+        )
+        .operands_in(vec![MemFlags, p, Offset])
+        .operands_out(vec![a])
+        .can_load(true),
+    );
+
+    ig.build()
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/shared/legalize.rs b/third_party/rust/cranelift-codegen-meta/src/shared/legalize.rs
new file mode 100644
index 0000000000..9a0d6cffde
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/shared/legalize.rs
@@ -0,0 +1,1087 @@
+use crate::cdsl::ast::{var, ExprBuilder, Literal};
+use crate::cdsl::instructions::{Bindable, Instruction, InstructionGroup};
+use crate::cdsl::xform::{TransformGroupBuilder, TransformGroups};
+
+use crate::shared::immediates::Immediates;
+use crate::shared::types::Float::{F32, F64};
+use crate::shared::types::Int::{I128, I16, I32, I64, I8};
+use cranelift_codegen_shared::condcodes::{CondCode, IntCC};
+
+#[allow(clippy::many_single_char_names, clippy::cognitive_complexity)]
+pub(crate) fn define(insts: &InstructionGroup, imm: &Immediates) -> TransformGroups {
+    let mut narrow = TransformGroupBuilder::new(
+        "narrow",
+        r#"
+        Legalize instructions by narrowing.
+
+        The transformations in the 'narrow' group work by expressing
+        instructions in terms of smaller types. Operations on vector types are
+        expressed in terms of vector types with fewer lanes, and integer
+        operations are expressed in terms of smaller integer types.
+    "#,
+    );
+
+    let mut widen = TransformGroupBuilder::new(
+        "widen",
+        r#"
+        Legalize instructions by widening.
+
+        The transformations in the 'widen' group work by expressing
+        instructions in terms of larger types.
+    "#,
+    );
+
+    let mut expand = TransformGroupBuilder::new(
+        "expand",
+        r#"
+        Legalize instructions by expansion.
+
+        Rewrite instructions in terms of other instructions, generally
+        operating on the same types as the original instructions.
+    "#,
+    );
+
+    // List of instructions.
+    let band = insts.by_name("band");
+    let band_imm = insts.by_name("band_imm");
+    let band_not = insts.by_name("band_not");
+    let bint = insts.by_name("bint");
+    let bitrev = insts.by_name("bitrev");
+    let bnot = insts.by_name("bnot");
+    let bor = insts.by_name("bor");
+    let bor_imm = insts.by_name("bor_imm");
+    let bor_not = insts.by_name("bor_not");
+    let brnz = insts.by_name("brnz");
+    let brz = insts.by_name("brz");
+    let br_icmp = insts.by_name("br_icmp");
+    let br_table = insts.by_name("br_table");
+    let bxor = insts.by_name("bxor");
+    let bxor_imm = insts.by_name("bxor_imm");
+    let bxor_not = insts.by_name("bxor_not");
+    let cls = insts.by_name("cls");
+    let clz = insts.by_name("clz");
+    let ctz = insts.by_name("ctz");
+    let copy = insts.by_name("copy");
+    let fabs = insts.by_name("fabs");
+    let f32const = insts.by_name("f32const");
+    let f64const = insts.by_name("f64const");
+    let fcopysign = insts.by_name("fcopysign");
+    let fcvt_from_sint = insts.by_name("fcvt_from_sint");
+    let fneg = insts.by_name("fneg");
+    let iadd = insts.by_name("iadd");
+    let iadd_cin = insts.by_name("iadd_cin");
+    let iadd_cout = insts.by_name("iadd_cout");
+    let iadd_carry = insts.by_name("iadd_carry");
+    let iadd_ifcin = insts.by_name("iadd_ifcin");
+    let iadd_ifcout = insts.by_name("iadd_ifcout");
+    let iadd_imm = insts.by_name("iadd_imm");
+    let icmp = insts.by_name("icmp");
+    let icmp_imm = insts.by_name("icmp_imm");
+    let iconcat = insts.by_name("iconcat");
+    let iconst = insts.by_name("iconst");
+    let ifcmp = insts.by_name("ifcmp");
+    let ifcmp_imm = insts.by_name("ifcmp_imm");
+    let imul = insts.by_name("imul");
+    let imul_imm = insts.by_name("imul_imm");
+    let ireduce = insts.by_name("ireduce");
+    let irsub_imm = insts.by_name("irsub_imm");
+    let ishl = insts.by_name("ishl");
+    let ishl_imm = insts.by_name("ishl_imm");
+    let isplit = insts.by_name("isplit");
+    let istore8 = insts.by_name("istore8");
+    let istore16 = insts.by_name("istore16");
+    let isub = insts.by_name("isub");
+    let isub_bin = insts.by_name("isub_bin");
+    let isub_bout = insts.by_name("isub_bout");
+    let isub_borrow = insts.by_name("isub_borrow");
+    let isub_ifbin = insts.by_name("isub_ifbin");
+    let isub_ifbout = insts.by_name("isub_ifbout");
+    let jump = insts.by_name("jump");
+    let load = insts.by_name("load");
+    let popcnt = insts.by_name("popcnt");
+    let resumable_trapnz = insts.by_name("resumable_trapnz");
+    let rotl = insts.by_name("rotl");
+    let rotl_imm = insts.by_name("rotl_imm");
+    let rotr = insts.by_name("rotr");
+    let rotr_imm = insts.by_name("rotr_imm");
+    let sdiv = insts.by_name("sdiv");
+    let sdiv_imm = insts.by_name("sdiv_imm");
+    let select = insts.by_name("select");
+    let sextend = insts.by_name("sextend");
+    let sshr = insts.by_name("sshr");
+    let sshr_imm = insts.by_name("sshr_imm");
+    let srem = insts.by_name("srem");
+    let srem_imm = insts.by_name("srem_imm");
+    let store = insts.by_name("store");
+    let udiv = insts.by_name("udiv");
+    let udiv_imm = insts.by_name("udiv_imm");
+    let uextend = insts.by_name("uextend");
+    let uload8 = insts.by_name("uload8");
+    let uload16 = insts.by_name("uload16");
+    let umulhi = insts.by_name("umulhi");
+    let ushr = insts.by_name("ushr");
+    let ushr_imm = insts.by_name("ushr_imm");
+    let urem = insts.by_name("urem");
+    let urem_imm = insts.by_name("urem_imm");
+    let trapif = insts.by_name("trapif");
+    let trapnz = insts.by_name("trapnz");
+    let trapz = insts.by_name("trapz");
+
+    // Custom expansions for memory objects.
+    expand.custom_legalize(insts.by_name("global_value"), "expand_global_value");
+    expand.custom_legalize(insts.by_name("heap_addr"), "expand_heap_addr");
+    expand.custom_legalize(insts.by_name("table_addr"), "expand_table_addr");
+
+    // Custom expansions for calls.
+    expand.custom_legalize(insts.by_name("call"), "expand_call");
+
+    // Custom expansions that need to change the CFG.
+    // TODO: Add sufficient XForm syntax that we don't need to hand-code these.
+    expand.custom_legalize(trapz, "expand_cond_trap");
+    expand.custom_legalize(trapnz, "expand_cond_trap");
+    expand.custom_legalize(resumable_trapnz, "expand_cond_trap");
+    expand.custom_legalize(br_table, "expand_br_table");
+    expand.custom_legalize(select, "expand_select");
+    widen.custom_legalize(select, "expand_select"); // small ints
+
+    // Custom expansions for floating point constants.
+    // These expansions require bit-casting or creating constant pool entries.
+    expand.custom_legalize(f32const, "expand_fconst");
+    expand.custom_legalize(f64const, "expand_fconst");
+
+    // Custom expansions for stack memory accesses.
+    expand.custom_legalize(insts.by_name("stack_load"), "expand_stack_load");
+    expand.custom_legalize(insts.by_name("stack_store"), "expand_stack_store");
+
+    // Custom expansions for small stack memory acccess.
+    widen.custom_legalize(insts.by_name("stack_load"), "expand_stack_load");
+    widen.custom_legalize(insts.by_name("stack_store"), "expand_stack_store");
+
+    // List of variables to reuse in patterns.
+    let x = var("x");
+    let y = var("y");
+    let z = var("z");
+    let a = var("a");
+    let a1 = var("a1");
+    let a2 = var("a2");
+    let a3 = var("a3");
+    let a4 = var("a4");
+    let b = var("b");
+    let b1 = var("b1");
+    let b2 = var("b2");
+    let b3 = var("b3");
+    let b4 = var("b4");
+    let b_in = var("b_in");
+    let b_int = var("b_int");
+    let c = var("c");
+    let c1 = var("c1");
+    let c2 = var("c2");
+    let c3 = var("c3");
+    let c4 = var("c4");
+    let c_in = var("c_in");
+    let c_int = var("c_int");
+    let d = var("d");
+    let d1 = var("d1");
+    let d2 = var("d2");
+    let d3 = var("d3");
+    let d4 = var("d4");
+    let e = var("e");
+    let e1 = var("e1");
+    let e2 = var("e2");
+    let e3 = var("e3");
+    let e4 = var("e4");
+    let f = var("f");
+    let f1 = var("f1");
+    let f2 = var("f2");
+    let xl = var("xl");
+    let xh = var("xh");
+    let yl = var("yl");
+    let yh = var("yh");
+    let al = var("al");
+    let ah = var("ah");
+    let cc = var("cc");
+    let block = var("block");
+    let ptr = var("ptr");
+    let flags = var("flags");
+    let offset = var("off");
+    let vararg = var("vararg");
+
+    narrow.custom_legalize(load, "narrow_load");
+    narrow.custom_legalize(store, "narrow_store");
+
+    // iconst.i64 can't be legalized in the meta langage (because integer literals can't be
+    // embedded as part of arguments), so use a custom legalization for now.
+    narrow.custom_legalize(iconst, "narrow_iconst");
+
+    for &(ty, ty_half) in &[(I128, I64), (I64, I32)] {
+        let inst = uextend.bind(ty).bind(ty_half);
+        narrow.legalize(
+            def!(a = inst(x)),
+            vec![
+                def!(ah = iconst(Literal::constant(&imm.imm64, 0))),
+                def!(a = iconcat(x, ah)),
+            ],
+        );
+    }
+
+    for &(ty, ty_half, shift) in &[(I128, I64, 63), (I64, I32, 31)] {
+        let inst = sextend.bind(ty).bind(ty_half);
+        narrow.legalize(
+            def!(a = inst(x)),
+            vec![
+                def!(ah = sshr_imm(x, Literal::constant(&imm.imm64, shift))), // splat sign bit to whole number
+                def!(a = iconcat(x, ah)),
+            ],
+        );
+    }
+
+    for &bin_op in &[band, bor, bxor, band_not, bor_not, bxor_not] {
+        narrow.legalize(
+            def!(a = bin_op(x, y)),
+            vec![
+                def!((xl, xh) = isplit(x)),
+                def!((yl, yh) = isplit(y)),
+                def!(al = bin_op(xl, yl)),
+                def!(ah = bin_op(xh, yh)),
+                def!(a = iconcat(al, ah)),
+            ],
+        );
+    }
+
+    narrow.legalize(
+        def!(a = bnot(x)),
+        vec![
+            def!((xl, xh) = isplit(x)),
+            def!(al = bnot(xl)),
+            def!(ah = bnot(xh)),
+            def!(a = iconcat(al, ah)),
+        ],
+    );
+
+    narrow.legalize(
+        def!(a = select(c, x, y)),
+        vec![
+            def!((xl, xh) = isplit(x)),
+            def!((yl, yh) = isplit(y)),
+            def!(al = select(c, xl, yl)),
+            def!(ah = select(c, xh, yh)),
+            def!(a = iconcat(al, ah)),
+        ],
+    );
+
+    for &ty in &[I128, I64] {
+        let block = var("block");
+        let block1 = var("block1");
+        let block2 = var("block2");
+
+        narrow.legalize(
+            def!(brz.ty(x, block, vararg)),
+            vec![
+                def!((xl, xh) = isplit(x)),
+                def!(
+                    a = icmp_imm(
+                        Literal::enumerator_for(&imm.intcc, "eq"),
+                        xl,
+                        Literal::constant(&imm.imm64, 0)
+                    )
+                ),
+                def!(
+                    b = icmp_imm(
+                        Literal::enumerator_for(&imm.intcc, "eq"),
+                        xh,
+                        Literal::constant(&imm.imm64, 0)
+                    )
+                ),
+                def!(c = band(a, b)),
+                def!(brnz(c, block, vararg)),
+            ],
+        );
+
+        narrow.legalize(
+            def!(brnz.ty(x, block1, vararg)),
+            vec![
+                def!((xl, xh) = isplit(x)),
+                def!(brnz(xl, block1, vararg)),
+                def!(jump(block2, Literal::empty_vararg())),
+                block!(block2),
+                def!(brnz(xh, block1, vararg)),
+            ],
+        );
+    }
+
+    narrow.legalize(
+        def!(a = popcnt.I128(x)),
+        vec![
+            def!((xl, xh) = isplit(x)),
+            def!(e1 = popcnt(xl)),
+            def!(e2 = popcnt(xh)),
+            def!(e3 = iadd(e1, e2)),
+            def!(a = uextend(e3)),
+        ],
+    );
+
+    // TODO(ryzokuken): benchmark this and decide if branching is a faster
+    // approach than evaluating boolean expressions.
+
+    narrow.custom_legalize(icmp_imm, "narrow_icmp_imm");
+
+    let intcc_eq = Literal::enumerator_for(&imm.intcc, "eq");
+    let intcc_ne = Literal::enumerator_for(&imm.intcc, "ne");
+    for &(int_ty, int_ty_half) in &[(I64, I32), (I128, I64)] {
+        narrow.legalize(
+            def!(b = icmp.int_ty(intcc_eq, x, y)),
+            vec![
+                def!((xl, xh) = isplit(x)),
+                def!((yl, yh) = isplit(y)),
+                def!(b1 = icmp.int_ty_half(intcc_eq, xl, yl)),
+                def!(b2 = icmp.int_ty_half(intcc_eq, xh, yh)),
+                def!(b = band(b1, b2)),
+            ],
+        );
+
+        narrow.legalize(
+            def!(b = icmp.int_ty(intcc_ne, x, y)),
+            vec![
+                def!((xl, xh) = isplit(x)),
+                def!((yl, yh) = isplit(y)),
+                def!(b1 = icmp.int_ty_half(intcc_ne, xl, yl)),
+                def!(b2 = icmp.int_ty_half(intcc_ne, xh, yh)),
+                def!(b = bor(b1, b2)),
+            ],
+        );
+
+        use IntCC::*;
+        for cc in &[
+            SignedGreaterThan,
+            SignedGreaterThanOrEqual,
+            SignedLessThan,
+            SignedLessThanOrEqual,
+            UnsignedGreaterThan,
+            UnsignedGreaterThanOrEqual,
+            UnsignedLessThan,
+            UnsignedLessThanOrEqual,
+        ] {
+            let intcc_cc = Literal::enumerator_for(&imm.intcc, cc.to_static_str());
+            let cc1 = Literal::enumerator_for(&imm.intcc, cc.without_equal().to_static_str());
+            let cc2 =
+                Literal::enumerator_for(&imm.intcc, cc.inverse().without_equal().to_static_str());
+            let cc3 = Literal::enumerator_for(&imm.intcc, cc.unsigned().to_static_str());
+            narrow.legalize(
+                def!(b = icmp.int_ty(intcc_cc, x, y)),
+                vec![
+                    def!((xl, xh) = isplit(x)),
+                    def!((yl, yh) = isplit(y)),
+                    // X = cc1 || (!cc2 && cc3)
+                    def!(b1 = icmp.int_ty_half(cc1, xh, yh)),
+                    def!(b2 = icmp.int_ty_half(cc2, xh, yh)),
+                    def!(b3 = icmp.int_ty_half(cc3, xl, yl)),
+                    def!(c1 = bnot(b2)),
+                    def!(c2 = band(c1, b3)),
+                    def!(b = bor(b1, c2)),
+                ],
+            );
+        }
+    }
+
+    // TODO(ryzokuken): explore the perf diff w/ x86_umulx and consider have a
+    // separate legalization for x86.
+    for &ty in &[I64, I128] {
+        narrow.legalize(
+            def!(a = imul.ty(x, y)),
+            vec![
+                def!((xl, xh) = isplit(x)),
+                def!((yl, yh) = isplit(y)),
+                def!(a1 = imul(xh, yl)),
+                def!(a2 = imul(xl, yh)),
+                def!(a3 = iadd(a1, a2)),
+                def!(a4 = umulhi(xl, yl)),
+                def!(ah = iadd(a3, a4)),
+                def!(al = imul(xl, yl)),
+                def!(a = iconcat(al, ah)),
+            ],
+        );
+    }
+
+    let zero = Literal::constant(&imm.imm64, 0);
+    narrow.legalize(
+        def!(a = iadd_imm.I128(x, c)),
+        vec![
+            def!(yh = iconst.I64(zero)),
+            def!(yl = iconst.I64(c)),
+            def!(y = iconcat.I64(yh, yl)),
+            def!(a = iadd(x, y)),
+        ],
+    );
+
+    // Widen instructions with one input operand.
+    for &op in &[bnot, popcnt] {
+        for &int_ty in &[I8, I16] {
+            widen.legalize(
+                def!(a = op.int_ty(b)),
+                vec![
+                    def!(x = uextend.I32(b)),
+                    def!(z = op.I32(x)),
+                    def!(a = ireduce.int_ty(z)),
+                ],
+            );
+        }
+    }
+
+    // Widen instructions with two input operands.
+    let mut widen_two_arg = |signed: bool, op: &Instruction| {
+        for &int_ty in &[I8, I16] {
+            let sign_ext_op = if signed { sextend } else { uextend };
+            widen.legalize(
+                def!(a = op.int_ty(b, c)),
+                vec![
+                    def!(x = sign_ext_op.I32(b)),
+                    def!(y = sign_ext_op.I32(c)),
+                    def!(z = op.I32(x, y)),
+                    def!(a = ireduce.int_ty(z)),
+                ],
+            );
+        }
+    };
+
+    for bin_op in &[
+        iadd, isub, imul, udiv, urem, band, bor, bxor, band_not, bor_not, bxor_not,
+    ] {
+        widen_two_arg(false, bin_op);
+    }
+    for bin_op in &[sdiv, srem] {
+        widen_two_arg(true, bin_op);
+    }
+
+    // Widen instructions using immediate operands.
+    let mut widen_imm = |signed: bool, op: &Instruction| {
+        for &int_ty in &[I8, I16] {
+            let sign_ext_op = if signed { sextend } else { uextend };
+            widen.legalize(
+                def!(a = op.int_ty(b, c)),
+                vec![
+                    def!(x = sign_ext_op.I32(b)),
+                    def!(z = op.I32(x, c)),
+                    def!(a = ireduce.int_ty(z)),
+                ],
+            );
+        }
+    };
+
+    for bin_op in &[
+        iadd_imm, imul_imm, udiv_imm, urem_imm, band_imm, bor_imm, bxor_imm, irsub_imm,
+    ] {
+        widen_imm(false, bin_op);
+    }
+    for bin_op in &[sdiv_imm, srem_imm] {
+        widen_imm(true, bin_op);
+    }
+
+    for &(int_ty, num) in &[(I8, 24), (I16, 16)] {
+        let imm = Literal::constant(&imm.imm64, -num);
+
+        widen.legalize(
+            def!(a = clz.int_ty(b)),
+            vec![
+                def!(c = uextend.I32(b)),
+                def!(d = clz.I32(c)),
+                def!(e = iadd_imm(d, imm)),
+                def!(a = ireduce.int_ty(e)),
+            ],
+        );
+
+        widen.legalize(
+            def!(a = cls.int_ty(b)),
+            vec![
+                def!(c = sextend.I32(b)),
+                def!(d = cls.I32(c)),
+                def!(e = iadd_imm(d, imm)),
+                def!(a = ireduce.int_ty(e)),
+            ],
+        );
+    }
+
+    for &(int_ty, num) in &[(I8, 1 << 8), (I16, 1 << 16)] {
+        let num = Literal::constant(&imm.imm64, num);
+        widen.legalize(
+            def!(a = ctz.int_ty(b)),
+            vec![
+                def!(c = uextend.I32(b)),
+                // When `b` is zero, returns the size of x in bits.
+                def!(d = bor_imm(c, num)),
+                def!(e = ctz.I32(d)),
+                def!(a = ireduce.int_ty(e)),
+            ],
+        );
+    }
+
+    // iconst
+    for &int_ty in &[I8, I16] {
+        widen.legalize(
+            def!(a = iconst.int_ty(b)),
+            vec![def!(c = iconst.I32(b)), def!(a = ireduce.int_ty(c))],
+        );
+    }
+
+    for &extend_op in &[uextend, sextend] {
+        // The sign extension operators have two typevars: the result has one and controls the
+        // instruction, then the input has one.
+        let bound = extend_op.bind(I16).bind(I8);
+        widen.legalize(
+            def!(a = bound(b)),
+            vec![def!(c = extend_op.I32(b)), def!(a = ireduce(c))],
+        );
+    }
+
+    widen.legalize(
+        def!(store.I8(flags, a, ptr, offset)),
+        vec![
+            def!(b = uextend.I32(a)),
+            def!(istore8(flags, b, ptr, offset)),
+        ],
+    );
+
+    widen.legalize(
+        def!(store.I16(flags, a, ptr, offset)),
+        vec![
+            def!(b = uextend.I32(a)),
+            def!(istore16(flags, b, ptr, offset)),
+        ],
+    );
+
+    widen.legalize(
+        def!(a = load.I8(flags, ptr, offset)),
+        vec![
+            def!(b = uload8.I32(flags, ptr, offset)),
+            def!(a = ireduce(b)),
+        ],
+    );
+
+    widen.legalize(
+        def!(a = load.I16(flags, ptr, offset)),
+        vec![
+            def!(b = uload16.I32(flags, ptr, offset)),
+            def!(a = ireduce(b)),
+        ],
+    );
+
+    for &int_ty in &[I8, I16] {
+        widen.legalize(
+            def!(br_table.int_ty(x, y, z)),
+            vec![def!(b = uextend.I32(x)), def!(br_table(b, y, z))],
+        );
+    }
+
+    for &int_ty in &[I8, I16] {
+        widen.legalize(
+            def!(a = bint.int_ty(b)),
+            vec![def!(x = bint.I32(b)), def!(a = ireduce.int_ty(x))],
+        );
+    }
+
+    for &int_ty in &[I8, I16] {
+        for &op in &[ishl, ishl_imm, ushr, ushr_imm] {
+            widen.legalize(
+                def!(a = op.int_ty(b, c)),
+                vec![
+                    def!(x = uextend.I32(b)),
+                    def!(z = op.I32(x, c)),
+                    def!(a = ireduce.int_ty(z)),
+                ],
+            );
+        }
+
+        for &op in &[sshr, sshr_imm] {
+            widen.legalize(
+                def!(a = op.int_ty(b, c)),
+                vec![
+                    def!(x = sextend.I32(b)),
+                    def!(z = op.I32(x, c)),
+                    def!(a = ireduce.int_ty(z)),
+                ],
+            );
+        }
+
+        for cc in &["eq", "ne", "ugt", "ult", "uge", "ule"] {
+            let w_cc = Literal::enumerator_for(&imm.intcc, cc);
+            widen.legalize(
+                def!(a = icmp_imm.int_ty(w_cc, b, c)),
+                vec![def!(x = uextend.I32(b)), def!(a = icmp_imm(w_cc, x, c))],
+            );
+            widen.legalize(
+                def!(a = icmp.int_ty(w_cc, b, c)),
+                vec![
+                    def!(x = uextend.I32(b)),
+                    def!(y = uextend.I32(c)),
+                    def!(a = icmp.I32(w_cc, x, y)),
+                ],
+            );
+        }
+
+        for cc in &["sgt", "slt", "sge", "sle"] {
+            let w_cc = Literal::enumerator_for(&imm.intcc, cc);
+            widen.legalize(
+                def!(a = icmp_imm.int_ty(w_cc, b, c)),
+                vec![def!(x = sextend.I32(b)), def!(a = icmp_imm(w_cc, x, c))],
+            );
+
+            widen.legalize(
+                def!(a = icmp.int_ty(w_cc, b, c)),
+                vec![
+                    def!(x = sextend.I32(b)),
+                    def!(y = sextend.I32(c)),
+                    def!(a = icmp(w_cc, x, y)),
+                ],
+            );
+        }
+    }
+
+    for &ty in &[I8, I16] {
+        widen.legalize(
+            def!(brz.ty(x, block, vararg)),
+            vec![def!(a = uextend.I32(x)), def!(brz(a, block, vararg))],
+        );
+
+        widen.legalize(
+            def!(brnz.ty(x, block, vararg)),
+            vec![def!(a = uextend.I32(x)), def!(brnz(a, block, vararg))],
+        );
+    }
+
+    for &(ty_half, ty) in &[(I64, I128), (I32, I64)] {
+        let inst = ireduce.bind(ty_half).bind(ty);
+        expand.legalize(
+            def!(a = inst(x)),
+            vec![def!((b, c) = isplit(x)), def!(a = copy(b))],
+        );
+    }
+
+    // Expand integer operations with carry for RISC architectures that don't have
+    // the flags.
+    let intcc_ult = Literal::enumerator_for(&imm.intcc, "ult");
+    expand.legalize(
+        def!((a, c) = iadd_cout(x, y)),
+        vec![def!(a = iadd(x, y)), def!(c = icmp(intcc_ult, a, x))],
+    );
+
+    let intcc_ugt = Literal::enumerator_for(&imm.intcc, "ugt");
+    expand.legalize(
+        def!((a, b) = isub_bout(x, y)),
+        vec![def!(a = isub(x, y)), def!(b = icmp(intcc_ugt, a, x))],
+    );
+
+    expand.legalize(
+        def!(a = iadd_cin(x, y, c)),
+        vec![
+            def!(a1 = iadd(x, y)),
+            def!(c_int = bint(c)),
+            def!(a = iadd(a1, c_int)),
+        ],
+    );
+
+    expand.legalize(
+        def!(a = isub_bin(x, y, b)),
+        vec![
+            def!(a1 = isub(x, y)),
+            def!(b_int = bint(b)),
+            def!(a = isub(a1, b_int)),
+        ],
+    );
+
+    expand.legalize(
+        def!((a, c) = iadd_carry(x, y, c_in)),
+        vec![
+            def!((a1, c1) = iadd_cout(x, y)),
+            def!(c_int = bint(c_in)),
+            def!((a, c2) = iadd_cout(a1, c_int)),
+            def!(c = bor(c1, c2)),
+        ],
+    );
+
+    expand.legalize(
+        def!((a, b) = isub_borrow(x, y, b_in)),
+        vec![
+            def!((a1, b1) = isub_bout(x, y)),
+            def!(b_int = bint(b_in)),
+            def!((a, b2) = isub_bout(a1, b_int)),
+            def!(b = bor(b1, b2)),
+        ],
+    );
+
+    // Expansion for fcvt_from_sint for smaller integer types.
+    // This uses expand and not widen because the controlling type variable for
+    // this instruction is f32/f64, which is legalized as part of the expand
+    // group.
+    for &dest_ty in &[F32, F64] {
+        for &src_ty in &[I8, I16] {
+            let bound_inst = fcvt_from_sint.bind(dest_ty).bind(src_ty);
+            expand.legalize(
+                def!(a = bound_inst(b)),
+                vec![
+                    def!(x = sextend.I32(b)),
+                    def!(a = fcvt_from_sint.dest_ty(x)),
+                ],
+            );
+        }
+    }
+
+    // Expansions for immediate operands that are out of range.
+    for &(inst_imm, inst) in &[
+        (iadd_imm, iadd),
+        (imul_imm, imul),
+        (sdiv_imm, sdiv),
+        (udiv_imm, udiv),
+        (srem_imm, srem),
+        (urem_imm, urem),
+        (band_imm, band),
+        (bor_imm, bor),
+        (bxor_imm, bxor),
+        (ifcmp_imm, ifcmp),
+    ] {
+        expand.legalize(
+            def!(a = inst_imm(x, y)),
+            vec![def!(a1 = iconst(y)), def!(a = inst(x, a1))],
+        );
+    }
+
+    expand.legalize(
+        def!(a = irsub_imm(y, x)),
+        vec![def!(a1 = iconst(x)), def!(a = isub(a1, y))],
+    );
+
+    // Rotates and shifts.
+    for &(inst_imm, inst) in &[
+        (rotl_imm, rotl),
+        (rotr_imm, rotr),
+        (ishl_imm, ishl),
+        (sshr_imm, sshr),
+        (ushr_imm, ushr),
+    ] {
+        expand.legalize(
+            def!(a = inst_imm(x, y)),
+            vec![def!(a1 = iconst.I32(y)), def!(a = inst(x, a1))],
+        );
+    }
+
+    expand.legalize(
+        def!(a = icmp_imm(cc, x, y)),
+        vec![def!(a1 = iconst(y)), def!(a = icmp(cc, x, a1))],
+    );
+
+    //# Expansions for *_not variants of bitwise ops.
+    for &(inst_not, inst) in &[(band_not, band), (bor_not, bor), (bxor_not, bxor)] {
+        expand.legalize(
+            def!(a = inst_not(x, y)),
+            vec![def!(a1 = bnot(y)), def!(a = inst(x, a1))],
+        );
+    }
+
+    //# Expand bnot using xor.
+    let minus_one = Literal::constant(&imm.imm64, -1);
+    expand.legalize(
+        def!(a = bnot(x)),
+        vec![def!(y = iconst(minus_one)), def!(a = bxor(x, y))],
+    );
+
+    //# Expand bitrev
+    //# Adapted from Stack Overflow.
+    //# https://stackoverflow.com/questions/746171/most-efficient-algorithm-for-bit-reversal-from-msb-lsb-to-lsb-msb-in-c
+    let imm64_1 = Literal::constant(&imm.imm64, 1);
+    let imm64_2 = Literal::constant(&imm.imm64, 2);
+    let imm64_4 = Literal::constant(&imm.imm64, 4);
+
+    widen.legalize(
+        def!(a = bitrev.I8(x)),
+        vec![
+            def!(a1 = band_imm(x, Literal::constant(&imm.imm64, 0xaa))),
+            def!(a2 = ushr_imm(a1, imm64_1)),
+            def!(a3 = band_imm(x, Literal::constant(&imm.imm64, 0x55))),
+            def!(a4 = ishl_imm(a3, imm64_1)),
+            def!(b = bor(a2, a4)),
+            def!(b1 = band_imm(b, Literal::constant(&imm.imm64, 0xcc))),
+            def!(b2 = ushr_imm(b1, imm64_2)),
+            def!(b3 = band_imm(b, Literal::constant(&imm.imm64, 0x33))),
+            def!(b4 = ishl_imm(b3, imm64_2)),
+            def!(c = bor(b2, b4)),
+            def!(c1 = band_imm(c, Literal::constant(&imm.imm64, 0xf0))),
+            def!(c2 = ushr_imm(c1, imm64_4)),
+            def!(c3 = band_imm(c, Literal::constant(&imm.imm64, 0x0f))),
+            def!(c4 = ishl_imm(c3, imm64_4)),
+            def!(a = bor(c2, c4)),
+        ],
+    );
+
+    let imm64_8 = Literal::constant(&imm.imm64, 8);
+
+    widen.legalize(
+        def!(a = bitrev.I16(x)),
+        vec![
+            def!(a1 = band_imm(x, Literal::constant(&imm.imm64, 0xaaaa))),
+            def!(a2 = ushr_imm(a1, imm64_1)),
+            def!(a3 = band_imm(x, Literal::constant(&imm.imm64, 0x5555))),
+            def!(a4 = ishl_imm(a3, imm64_1)),
+            def!(b = bor(a2, a4)),
+            def!(b1 = band_imm(b, Literal::constant(&imm.imm64, 0xcccc))),
+            def!(b2 = ushr_imm(b1, imm64_2)),
+            def!(b3 = band_imm(b, Literal::constant(&imm.imm64, 0x3333))),
+            def!(b4 = ishl_imm(b3, imm64_2)),
+            def!(c = bor(b2, b4)),
+            def!(c1 = band_imm(c, Literal::constant(&imm.imm64, 0xf0f0))),
+            def!(c2 = ushr_imm(c1, imm64_4)),
+            def!(c3 = band_imm(c, Literal::constant(&imm.imm64, 0x0f0f))),
+            def!(c4 = ishl_imm(c3, imm64_4)),
+            def!(d = bor(c2, c4)),
+            def!(d1 = band_imm(d, Literal::constant(&imm.imm64, 0xff00))),
+            def!(d2 = ushr_imm(d1, imm64_8)),
+            def!(d3 = band_imm(d, Literal::constant(&imm.imm64, 0x00ff))),
+            def!(d4 = ishl_imm(d3, imm64_8)),
+            def!(a = bor(d2, d4)),
+        ],
+    );
+
+    let imm64_16 = Literal::constant(&imm.imm64, 16);
+
+    expand.legalize(
+        def!(a = bitrev.I32(x)),
+        vec![
+            def!(a1 = band_imm(x, Literal::constant(&imm.imm64, 0xaaaa_aaaa))),
+            def!(a2 = ushr_imm(a1, imm64_1)),
+            def!(a3 = band_imm(x, Literal::constant(&imm.imm64, 0x5555_5555))),
+            def!(a4 = ishl_imm(a3, imm64_1)),
+            def!(b = bor(a2, a4)),
+            def!(b1 = band_imm(b, Literal::constant(&imm.imm64, 0xcccc_cccc))),
+            def!(b2 = ushr_imm(b1, imm64_2)),
+            def!(b3 = band_imm(b, Literal::constant(&imm.imm64, 0x3333_3333))),
+            def!(b4 = ishl_imm(b3, imm64_2)),
+            def!(c = bor(b2, b4)),
+            def!(c1 = band_imm(c, Literal::constant(&imm.imm64, 0xf0f0_f0f0))),
+            def!(c2 = ushr_imm(c1, imm64_4)),
+            def!(c3 = band_imm(c, Literal::constant(&imm.imm64, 0x0f0f_0f0f))),
+            def!(c4 = ishl_imm(c3, imm64_4)),
+            def!(d = bor(c2, c4)),
+            def!(d1 = band_imm(d, Literal::constant(&imm.imm64, 0xff00_ff00))),
+            def!(d2 = ushr_imm(d1, imm64_8)),
+            def!(d3 = band_imm(d, Literal::constant(&imm.imm64, 0x00ff_00ff))),
+            def!(d4 = ishl_imm(d3, imm64_8)),
+            def!(e = bor(d2, d4)),
+            def!(e1 = ushr_imm(e, imm64_16)),
+            def!(e2 = ishl_imm(e, imm64_16)),
+            def!(a = bor(e1, e2)),
+        ],
+    );
+
+    #[allow(overflowing_literals)]
+    let imm64_0xaaaaaaaaaaaaaaaa = Literal::constant(&imm.imm64, 0xaaaa_aaaa_aaaa_aaaa);
+    let imm64_0x5555555555555555 = Literal::constant(&imm.imm64, 0x5555_5555_5555_5555);
+    #[allow(overflowing_literals)]
+    let imm64_0xcccccccccccccccc = Literal::constant(&imm.imm64, 0xcccc_cccc_cccc_cccc);
+    let imm64_0x3333333333333333 = Literal::constant(&imm.imm64, 0x3333_3333_3333_3333);
+    #[allow(overflowing_literals)]
+    let imm64_0xf0f0f0f0f0f0f0f0 = Literal::constant(&imm.imm64, 0xf0f0_f0f0_f0f0_f0f0);
+    let imm64_0x0f0f0f0f0f0f0f0f = Literal::constant(&imm.imm64, 0x0f0f_0f0f_0f0f_0f0f);
+    #[allow(overflowing_literals)]
+    let imm64_0xff00ff00ff00ff00 = Literal::constant(&imm.imm64, 0xff00_ff00_ff00_ff00);
+    let imm64_0x00ff00ff00ff00ff = Literal::constant(&imm.imm64, 0x00ff_00ff_00ff_00ff);
+    #[allow(overflowing_literals)]
+    let imm64_0xffff0000ffff0000 = Literal::constant(&imm.imm64, 0xffff_0000_ffff_0000);
+    let imm64_0x0000ffff0000ffff = Literal::constant(&imm.imm64, 0x0000_ffff_0000_ffff);
+    let imm64_32 = Literal::constant(&imm.imm64, 32);
+
+    expand.legalize(
+        def!(a = bitrev.I64(x)),
+        vec![
+            def!(a1 = band_imm(x, imm64_0xaaaaaaaaaaaaaaaa)),
+            def!(a2 = ushr_imm(a1, imm64_1)),
+            def!(a3 = band_imm(x, imm64_0x5555555555555555)),
+            def!(a4 = ishl_imm(a3, imm64_1)),
+            def!(b = bor(a2, a4)),
+            def!(b1 = band_imm(b, imm64_0xcccccccccccccccc)),
+            def!(b2 = ushr_imm(b1, imm64_2)),
+            def!(b3 = band_imm(b, imm64_0x3333333333333333)),
+            def!(b4 = ishl_imm(b3, imm64_2)),
+            def!(c = bor(b2, b4)),
+            def!(c1 = band_imm(c, imm64_0xf0f0f0f0f0f0f0f0)),
+            def!(c2 = ushr_imm(c1, imm64_4)),
+            def!(c3 = band_imm(c, imm64_0x0f0f0f0f0f0f0f0f)),
+            def!(c4 = ishl_imm(c3, imm64_4)),
+            def!(d = bor(c2, c4)),
+            def!(d1 = band_imm(d, imm64_0xff00ff00ff00ff00)),
+            def!(d2 = ushr_imm(d1, imm64_8)),
+            def!(d3 = band_imm(d, imm64_0x00ff00ff00ff00ff)),
+            def!(d4 = ishl_imm(d3, imm64_8)),
+            def!(e = bor(d2, d4)),
+            def!(e1 = band_imm(e, imm64_0xffff0000ffff0000)),
+            def!(e2 = ushr_imm(e1, imm64_16)),
+            def!(e3 = band_imm(e, imm64_0x0000ffff0000ffff)),
+            def!(e4 = ishl_imm(e3, imm64_16)),
+            def!(f = bor(e2, e4)),
+            def!(f1 = ushr_imm(f, imm64_32)),
+            def!(f2 = ishl_imm(f, imm64_32)),
+            def!(a = bor(f1, f2)),
+        ],
+    );
+
+    narrow.legalize(
+        def!(a = bitrev.I128(x)),
+        vec![
+            def!((xl, xh) = isplit(x)),
+            def!(yh = bitrev(xl)),
+            def!(yl = bitrev(xh)),
+            def!(a = iconcat(yl, yh)),
+        ],
+    );
+
+    // Floating-point sign manipulations.
+    for &(ty, const_inst, minus_zero) in &[
+        (F32, f32const, &Literal::bits(&imm.ieee32, 0x8000_0000)),
+        (
+            F64,
+            f64const,
+            &Literal::bits(&imm.ieee64, 0x8000_0000_0000_0000),
+        ),
+    ] {
+        expand.legalize(
+            def!(a = fabs.ty(x)),
+            vec![def!(b = const_inst(minus_zero)), def!(a = band_not(x, b))],
+        );
+
+        expand.legalize(
+            def!(a = fneg.ty(x)),
+            vec![def!(b = const_inst(minus_zero)), def!(a = bxor(x, b))],
+        );
+
+        expand.legalize(
+            def!(a = fcopysign.ty(x, y)),
+            vec![
+                def!(b = const_inst(minus_zero)),
+                def!(a1 = band_not(x, b)),
+                def!(a2 = band(y, b)),
+                def!(a = bor(a1, a2)),
+            ],
+        );
+    }
+
+    expand.custom_legalize(br_icmp, "expand_br_icmp");
+
+    let mut groups = TransformGroups::new();
+
+    let narrow_id = narrow.build_and_add_to(&mut groups);
+    let expand_id = expand.build_and_add_to(&mut groups);
+
+    // Expansions using CPU flags.
+    let mut expand_flags = TransformGroupBuilder::new(
+        "expand_flags",
+        r#"
+        Instruction expansions for architectures with flags.
+
+        Expand some instructions using CPU flags, then fall back to the normal
+        expansions. Not all architectures support CPU flags, so these patterns
+        are kept separate.
+    "#,
+    )
+    .chain_with(expand_id);
+
+    let imm64_0 = Literal::constant(&imm.imm64, 0);
+    let intcc_ne = Literal::enumerator_for(&imm.intcc, "ne");
+    let intcc_eq = Literal::enumerator_for(&imm.intcc, "eq");
+
+    expand_flags.legalize(
+        def!(trapnz(x, c)),
+        vec![
+            def!(a = ifcmp_imm(x, imm64_0)),
+            def!(trapif(intcc_ne, a, c)),
+        ],
+    );
+
+    expand_flags.legalize(
+        def!(trapz(x, c)),
+        vec![
+            def!(a = ifcmp_imm(x, imm64_0)),
+            def!(trapif(intcc_eq, a, c)),
+        ],
+    );
+
+    expand_flags.build_and_add_to(&mut groups);
+
+    // Narrow legalizations using CPU flags.
+    let mut narrow_flags = TransformGroupBuilder::new(
+        "narrow_flags",
+        r#"
+        Narrow instructions for architectures with flags.
+
+        Narrow some instructions using CPU flags, then fall back to the normal
+        legalizations. Not all architectures support CPU flags, so these
+        patterns are kept separate.
+    "#,
+    )
+    .chain_with(narrow_id);
+
+    narrow_flags.legalize(
+        def!(a = iadd(x, y)),
+        vec![
+            def!((xl, xh) = isplit(x)),
+            def!((yl, yh) = isplit(y)),
+            def!((al, c) = iadd_ifcout(xl, yl)),
+            def!(ah = iadd_ifcin(xh, yh, c)),
+            def!(a = iconcat(al, ah)),
+        ],
+    );
+
+    narrow_flags.legalize(
+        def!(a = isub(x, y)),
+        vec![
+            def!((xl, xh) = isplit(x)),
+            def!((yl, yh) = isplit(y)),
+            def!((al, b) = isub_ifbout(xl, yl)),
+            def!(ah = isub_ifbin(xh, yh, b)),
+            def!(a = iconcat(al, ah)),
+        ],
+    );
+
+    narrow_flags.build_and_add_to(&mut groups);
+
+    // TODO(ryzokuken): figure out a way to legalize iadd_c* to iadd_ifc* (and
+    // similarly isub_b* to isub_ifb*) on expand_flags so that this isn't required.
+    // Narrow legalizations for ISAs that don't have CPU flags.
+    let mut narrow_no_flags = TransformGroupBuilder::new(
+        "narrow_no_flags",
+        r#"
+        Narrow instructions for architectures without flags.
+
+        Narrow some instructions avoiding the use of CPU flags, then fall back
+        to the normal legalizations. Not all architectures support CPU flags,
+        so these patterns are kept separate.
+    "#,
+    )
+    .chain_with(narrow_id);
+
+    narrow_no_flags.legalize(
+        def!(a = iadd(x, y)),
+        vec![
+            def!((xl, xh) = isplit(x)),
+            def!((yl, yh) = isplit(y)),
+            def!((al, c) = iadd_cout(xl, yl)),
+            def!(ah = iadd_cin(xh, yh, c)),
+            def!(a = iconcat(al, ah)),
+        ],
+    );
+
+    narrow_no_flags.legalize(
+        def!(a = isub(x, y)),
+        vec![
+            def!((xl, xh) = isplit(x)),
+            def!((yl, yh) = isplit(y)),
+            def!((al, b) = isub_bout(xl, yl)),
+            def!(ah = isub_bin(xh, yh, b)),
+            def!(a = iconcat(al, ah)),
+        ],
+    );
+
+    narrow_no_flags.build_and_add_to(&mut groups);
+
+    // TODO The order of declarations unfortunately matters to be compatible with the Python code.
+    // When it's all migrated, we can put this next to the narrow/expand build_and_add_to calls
+    // above.
+    widen.build_and_add_to(&mut groups);
+
+    groups
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/shared/mod.rs b/third_party/rust/cranelift-codegen-meta/src/shared/mod.rs
new file mode 100644
index 0000000000..b185262ccd
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/shared/mod.rs
@@ -0,0 +1,101 @@
+//! Shared definitions for the Cranelift intermediate language.
+
+pub mod entities;
+pub mod formats;
+pub mod immediates;
+pub mod instructions;
+pub mod legalize;
+pub mod settings;
+pub mod types;
+
+use crate::cdsl::formats::{FormatStructure, InstructionFormat};
+use crate::cdsl::instructions::{AllInstructions, InstructionGroup};
+use crate::cdsl::settings::SettingGroup;
+use crate::cdsl::xform::TransformGroups;
+
+use crate::shared::entities::EntityRefs;
+use crate::shared::formats::Formats;
+use crate::shared::immediates::Immediates;
+
+use std::collections::HashMap;
+use std::iter::FromIterator;
+use std::rc::Rc;
+
+pub(crate) struct Definitions {
+    pub settings: SettingGroup,
+    pub all_instructions: AllInstructions,
+    pub instructions: InstructionGroup,
+    pub imm: Immediates,
+    pub formats: Formats,
+    pub transform_groups: TransformGroups,
+    pub entities: EntityRefs,
+}
+
+pub(crate) fn define() -> Definitions {
+    let mut all_instructions = AllInstructions::new();
+
+    let immediates = Immediates::new();
+    let entities = EntityRefs::new();
+    let formats = Formats::new(&immediates, &entities);
+    let instructions =
+        instructions::define(&mut all_instructions, &formats, &immediates, &entities);
+    let transform_groups = legalize::define(&instructions, &immediates);
+
+    Definitions {
+        settings: settings::define(),
+        all_instructions,
+        instructions,
+        imm: immediates,
+        formats,
+        transform_groups,
+        entities,
+    }
+}
+
+impl Definitions {
+    /// Verifies certain properties of formats.
+    ///
+    /// - Formats must be uniquely named: if two formats have the same name, they must refer to the
+    /// same data. Otherwise, two format variants in the codegen crate would have the same name.
+    /// - Formats must be structurally different from each other. Otherwise, this would lead to
+    /// code duplicate in the codegen crate.
+    ///
+    /// Returns a list of all the instruction formats effectively used.
+    pub fn verify_instruction_formats(&self) -> Vec<&InstructionFormat> {
+        let mut format_names: HashMap<&'static str, &Rc<InstructionFormat>> = HashMap::new();
+
+        // A structure is: number of input value operands / whether there's varargs or not / names
+        // of immediate fields.
+        let mut format_structures: HashMap<FormatStructure, &InstructionFormat> = HashMap::new();
+
+        for inst in self.all_instructions.values() {
+            // Check name.
+            if let Some(existing_format) = format_names.get(&inst.format.name) {
+                assert!(
+                    Rc::ptr_eq(&existing_format, &inst.format),
+                    "formats must uniquely named; there's a\
+                     conflict on the name '{}', please make sure it is used only once.",
+                    existing_format.name
+                );
+            } else {
+                format_names.insert(inst.format.name, &inst.format);
+            }
+
+            // Check structure.
+            let key = inst.format.structure();
+            if let Some(existing_format) = format_structures.get(&key) {
+                assert_eq!(
+                    existing_format.name, inst.format.name,
+                    "duplicate instruction formats {} and {}; please remove one.",
+                    existing_format.name, inst.format.name
+                );
+            } else {
+                format_structures.insert(key, &inst.format);
+            }
+        }
+
+        let mut result = Vec::from_iter(format_structures.into_iter().map(|(_, v)| v));
+        result.sort_by_key(|format| format.name);
+        result
+    }
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/shared/settings.rs b/third_party/rust/cranelift-codegen-meta/src/shared/settings.rs
new file mode 100644
index 0000000000..1ddc445927
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/shared/settings.rs
@@ -0,0 +1,287 @@
+use crate::cdsl::settings::{SettingGroup, SettingGroupBuilder};
+
+pub(crate) fn define() -> SettingGroup {
+    let mut settings = SettingGroupBuilder::new("shared");
+
+    settings.add_enum(
+        "regalloc",
+        r#"Register allocator to use with the MachInst backend.
+
+        This selects the register allocator as an option among those offered by the `regalloc.rs`
+        crate. Please report register allocation bugs to the maintainers of this crate whenever
+        possible.
+
+        Note: this only applies to target that use the MachInst backend. As of 2020-04-17, this
+        means the x86_64 backend doesn't use this yet.
+
+        Possible values:
+
+        - `backtracking` is a greedy, backtracking register allocator as implemented in
+        Spidermonkey's optimizing tier IonMonkey. It may take more time to allocate registers, but
+        it should generate better code in general, resulting in better throughput of generated
+        code.
+        - `backtracking_checked` is the backtracking allocator with additional self checks that may
+        take some time to run, and thus these checks are disabled by default.
+        - `experimental_linear_scan` is an experimental linear scan allocator. It may take less
+        time to allocate registers, but generated code's quality may be inferior. As of
+        2020-04-17, it is still experimental and it should not be used in production settings.
+        - `experimental_linear_scan_checked` is the linear scan allocator with additional self
+        checks that may take some time to run, and thus these checks are disabled by default.
+    "#,
+        vec![
+            "backtracking",
+            "backtracking_checked",
+            "experimental_linear_scan",
+            "experimental_linear_scan_checked",
+        ],
+    );
+
+    settings.add_enum(
+        "opt_level",
+        r#"
+        Optimization level:
+
+        - none: Minimise compile time by disabling most optimizations.
+        - speed: Generate the fastest possible code
+        - speed_and_size: like "speed", but also perform transformations
+          aimed at reducing code size.
+        "#,
+        vec!["none", "speed", "speed_and_size"],
+    );
+
+    settings.add_bool(
+        "enable_verifier",
+        r#"
+        Run the Cranelift IR verifier at strategic times during compilation.
+
+        This makes compilation slower but catches many bugs. The verifier is always enabled by
+        default, which is useful during development.
+        "#,
+        true,
+    );
+
+    // Note that Cranelift doesn't currently need an is_pie flag, because PIE is
+    // just PIC where symbols can't be pre-empted, which can be expressed with the
+    // `colocated` flag on external functions and global values.
+    settings.add_bool(
+        "is_pic",
+        "Enable Position-Independent Code generation",
+        false,
+    );
+
+    settings.add_bool(
+        "use_colocated_libcalls",
+        r#"
+            Use colocated libcalls.
+
+            Generate code that assumes that libcalls can be declared "colocated",
+            meaning they will be defined along with the current function, such that
+            they can use more efficient addressing.
+            "#,
+        false,
+    );
+
+    settings.add_bool(
+        "avoid_div_traps",
+        r#"
+            Generate explicit checks around native division instructions to avoid
+            their trapping.
+
+            This is primarily used by SpiderMonkey which doesn't install a signal
+            handler for SIGFPE, but expects a SIGILL trap for division by zero.
+
+            On ISAs like ARM where the native division instructions don't trap,
+            this setting has no effect - explicit checks are always inserted.
+            "#,
+        false,
+    );
+
+    settings.add_bool(
+        "enable_float",
+        r#"
+            Enable the use of floating-point instructions
+
+            Disabling use of floating-point instructions is not yet implemented.
+            "#,
+        true,
+    );
+
+    settings.add_bool(
+        "enable_nan_canonicalization",
+        r#"
+            Enable NaN canonicalization
+
+            This replaces NaNs with a single canonical value, for users requiring
+            entirely deterministic WebAssembly computation. This is not required
+            by the WebAssembly spec, so it is not enabled by default.
+            "#,
+        false,
+    );
+
+    settings.add_bool(
+        "enable_pinned_reg",
+        r#"Enable the use of the pinned register.
+
+        This register is excluded from register allocation, and is completely under the control of
+        the end-user. It is possible to read it via the get_pinned_reg instruction, and to set it
+        with the set_pinned_reg instruction.
+        "#,
+        false,
+    );
+
+    settings.add_bool(
+        "use_pinned_reg_as_heap_base",
+        r#"Use the pinned register as the heap base.
+
+        Enabling this requires the enable_pinned_reg setting to be set to true. It enables a custom
+        legalization of the `heap_addr` instruction so it will use the pinned register as the heap
+        base, instead of fetching it from a global value.
+
+        Warning! Enabling this means that the pinned register *must* be maintained to contain the
+        heap base address at all times, during the lifetime of a function. Using the pinned
+        register for other purposes when this is set is very likely to cause crashes.
+        "#,
+        false,
+    );
+
+    settings.add_bool("enable_simd", "Enable the use of SIMD instructions.", false);
+
+    settings.add_bool(
+        "enable_atomics",
+        "Enable the use of atomic instructions",
+        true,
+    );
+
+    settings.add_bool(
+        "enable_safepoints",
+        r#"
+            Enable safepoint instruction insertions.
+
+            This will allow the emit_stack_maps() function to insert the safepoint
+            instruction on top of calls and interrupt traps in order to display the
+            live reference values at that point in the program.
+            "#,
+        false,
+    );
+
+    settings.add_enum(
+        "tls_model",
+        r#"
+            Defines the model used to perform TLS accesses.
+        "#,
+        vec!["none", "elf_gd", "macho", "coff"],
+    );
+
+    // Settings specific to the `baldrdash` calling convention.
+
+    settings.add_enum(
+        "libcall_call_conv",
+        r#"
+            Defines the calling convention to use for LibCalls call expansion,
+            since it may be different from the ISA default calling convention.
+
+            The default value is to use the same calling convention as the ISA
+            default calling convention.
+
+            This list should be kept in sync with the list of calling
+            conventions available in isa/call_conv.rs.
+        "#,
+        vec![
+            "isa_default",
+            "fast",
+            "cold",
+            "system_v",
+            "windows_fastcall",
+            "baldrdash_system_v",
+            "baldrdash_windows",
+            "baldrdash_2020",
+            "probestack",
+        ],
+    );
+
+    settings.add_num(
+        "baldrdash_prologue_words",
+        r#"
+            Number of pointer-sized words pushed by the baldrdash prologue.
+
+            Functions with the `baldrdash` calling convention don't generate their
+            own prologue and epilogue. They depend on externally generated code
+            that pushes a fixed number of words in the prologue and restores them
+            in the epilogue.
+
+            This setting configures the number of pointer-sized words pushed on the
+            stack when the Cranelift-generated code is entered. This includes the
+            pushed return address on x86.
+            "#,
+        0,
+    );
+
+    // BaldrMonkey requires that not-yet-relocated function addresses be encoded
+    // as all-ones bitpatterns.
+    settings.add_bool(
+        "emit_all_ones_funcaddrs",
+        "Emit not-yet-relocated function addresses as all-ones bit patterns.",
+        false,
+    );
+
+    // Stack probing options.
+
+    settings.add_bool(
+        "enable_probestack",
+        r#"
+            Enable the use of stack probes, for calling conventions which support this
+            functionality.
+            "#,
+        true,
+    );
+
+    settings.add_bool(
+        "probestack_func_adjusts_sp",
+        r#"
+            Set this to true of the stack probe function modifies the stack pointer
+            itself.
+            "#,
+        false,
+    );
+
+    settings.add_num(
+        "probestack_size_log2",
+        r#"
+            The log2 of the size of the stack guard region.
+
+            Stack frames larger than this size will have stack overflow checked
+            by calling the probestack function.
+
+            The default is 12, which translates to a size of 4096.
+            "#,
+        12,
+    );
+
+    // Jump table options.
+
+    settings.add_bool(
+        "enable_jump_tables",
+        "Enable the use of jump tables in generated machine code.",
+        true,
+    );
+
+    // Spectre options.
+
+    settings.add_bool(
+        "enable_heap_access_spectre_mitigation",
+        r#"
+        Enable Spectre mitigation on heap bounds checks.
+
+        This is a no-op for any heap that needs no bounds checks; e.g.,
+        if the limit is static and the guard region is large enough that
+        the index cannot reach past it.
+
+        This option is enabled by default because it is highly
+        recommended for secure sandboxing. The embedder should consider
+        the security implications carefully before disabling this option.
+        "#,
+        true,
+    );
+
+    settings.build()
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/shared/types.rs b/third_party/rust/cranelift-codegen-meta/src/shared/types.rs
new file mode 100644
index 0000000000..631e5433e9
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/shared/types.rs
@@ -0,0 +1,236 @@
+//! This module predefines all the Cranelift scalar types.
+
+#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
+pub(crate) enum Bool {
+    /// 1-bit bool.
+    B1 = 1,
+    /// 8-bit bool.
+    B8 = 8,
+    /// 16-bit bool.
+    B16 = 16,
+    /// 32-bit bool.
+    B32 = 32,
+    /// 64-bit bool.
+    B64 = 64,
+    /// 128-bit bool.
+    B128 = 128,
+}
+
+/// This provides an iterator through all of the supported bool variants.
+pub(crate) struct BoolIterator {
+    index: u8,
+}
+
+impl BoolIterator {
+    pub fn new() -> Self {
+        Self { index: 0 }
+    }
+}
+
+impl Iterator for BoolIterator {
+    type Item = Bool;
+    fn next(&mut self) -> Option<Self::Item> {
+        let res = match self.index {
+            0 => Some(Bool::B1),
+            1 => Some(Bool::B8),
+            2 => Some(Bool::B16),
+            3 => Some(Bool::B32),
+            4 => Some(Bool::B64),
+            5 => Some(Bool::B128),
+            _ => return None,
+        };
+        self.index += 1;
+        res
+    }
+}
+
+#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
+pub(crate) enum Int {
+    /// 8-bit int.
+    I8 = 8,
+    /// 16-bit int.
+    I16 = 16,
+    /// 32-bit int.
+    I32 = 32,
+    /// 64-bit int.
+    I64 = 64,
+    /// 128-bit int.
+    I128 = 128,
+}
+
+/// This provides an iterator through all of the supported int variants.
+pub(crate) struct IntIterator {
+    index: u8,
+}
+
+impl IntIterator {
+    pub fn new() -> Self {
+        Self { index: 0 }
+    }
+}
+
+impl Iterator for IntIterator {
+    type Item = Int;
+    fn next(&mut self) -> Option<Self::Item> {
+        let res = match self.index {
+            0 => Some(Int::I8),
+            1 => Some(Int::I16),
+            2 => Some(Int::I32),
+            3 => Some(Int::I64),
+            4 => Some(Int::I128),
+            _ => return None,
+        };
+        self.index += 1;
+        res
+    }
+}
+
+#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
+pub(crate) enum Float {
+    F32 = 32,
+    F64 = 64,
+}
+
+/// Iterator through the variants of the Float enum.
+pub(crate) struct FloatIterator {
+    index: u8,
+}
+
+impl FloatIterator {
+    pub fn new() -> Self {
+        Self { index: 0 }
+    }
+}
+
+/// This provides an iterator through all of the supported float variants.
+impl Iterator for FloatIterator {
+    type Item = Float;
+    fn next(&mut self) -> Option<Self::Item> {
+        let res = match self.index {
+            0 => Some(Float::F32),
+            1 => Some(Float::F64),
+            _ => return None,
+        };
+        self.index += 1;
+        res
+    }
+}
+
+/// A type representing CPU flags.
+///
+/// Flags can't be stored in memory.
+#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
+pub(crate) enum Flag {
+    /// CPU flags from an integer comparison.
+    IFlags,
+    /// CPU flags from a floating point comparison.
+    FFlags,
+}
+
+/// Iterator through the variants of the Flag enum.
+pub(crate) struct FlagIterator {
+    index: u8,
+}
+
+impl FlagIterator {
+    pub fn new() -> Self {
+        Self { index: 0 }
+    }
+}
+
+impl Iterator for FlagIterator {
+    type Item = Flag;
+    fn next(&mut self) -> Option<Self::Item> {
+        let res = match self.index {
+            0 => Some(Flag::IFlags),
+            1 => Some(Flag::FFlags),
+            _ => return None,
+        };
+        self.index += 1;
+        res
+    }
+}
+
+#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
+pub(crate) enum Reference {
+    /// 32-bit reference.
+    R32 = 32,
+    /// 64-bit reference.
+    R64 = 64,
+}
+
+/// This provides an iterator through all of the supported reference variants.
+pub(crate) struct ReferenceIterator {
+    index: u8,
+}
+
+impl ReferenceIterator {
+    pub fn new() -> Self {
+        Self { index: 0 }
+    }
+}
+
+impl Iterator for ReferenceIterator {
+    type Item = Reference;
+    fn next(&mut self) -> Option<Self::Item> {
+        let res = match self.index {
+            0 => Some(Reference::R32),
+            1 => Some(Reference::R64),
+            _ => return None,
+        };
+        self.index += 1;
+        res
+    }
+}
+
+#[cfg(test)]
+mod iter_tests {
+    use super::*;
+
+    #[test]
+    fn bool_iter_works() {
+        let mut bool_iter = BoolIterator::new();
+        assert_eq!(bool_iter.next(), Some(Bool::B1));
+        assert_eq!(bool_iter.next(), Some(Bool::B8));
+        assert_eq!(bool_iter.next(), Some(Bool::B16));
+        assert_eq!(bool_iter.next(), Some(Bool::B32));
+        assert_eq!(bool_iter.next(), Some(Bool::B64));
+        assert_eq!(bool_iter.next(), Some(Bool::B128));
+        assert_eq!(bool_iter.next(), None);
+    }
+
+    #[test]
+    fn int_iter_works() {
+        let mut int_iter = IntIterator::new();
+        assert_eq!(int_iter.next(), Some(Int::I8));
+        assert_eq!(int_iter.next(), Some(Int::I16));
+        assert_eq!(int_iter.next(), Some(Int::I32));
+        assert_eq!(int_iter.next(), Some(Int::I64));
+        assert_eq!(int_iter.next(), Some(Int::I128));
+        assert_eq!(int_iter.next(), None);
+    }
+
+    #[test]
+    fn float_iter_works() {
+        let mut float_iter = FloatIterator::new();
+        assert_eq!(float_iter.next(), Some(Float::F32));
+        assert_eq!(float_iter.next(), Some(Float::F64));
+        assert_eq!(float_iter.next(), None);
+    }
+
+    #[test]
+    fn flag_iter_works() {
+        let mut flag_iter = FlagIterator::new();
+        assert_eq!(flag_iter.next(), Some(Flag::IFlags));
+        assert_eq!(flag_iter.next(), Some(Flag::FFlags));
+        assert_eq!(flag_iter.next(), None);
+    }
+
+    #[test]
+    fn reference_iter_works() {
+        let mut reference_iter = ReferenceIterator::new();
+        assert_eq!(reference_iter.next(), Some(Reference::R32));
+        assert_eq!(reference_iter.next(), Some(Reference::R64));
+        assert_eq!(reference_iter.next(), None);
+    }
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/srcgen.rs b/third_party/rust/cranelift-codegen-meta/src/srcgen.rs
new file mode 100644
index 0000000000..ad8db175d7
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/srcgen.rs
@@ -0,0 +1,484 @@
+//! Source code generator.
+//!
+//! The `srcgen` module contains generic helper routines and classes for
+//! generating source code.
+
+#![macro_use]
+
+use std::cmp;
+use std::collections::{BTreeMap, BTreeSet};
+use std::fs;
+use std::io::Write;
+use std::path;
+
+use crate::error;
+
+static SHIFTWIDTH: usize = 4;
+
+/// A macro that simplifies the usage of the Formatter by allowing format
+/// strings.
+macro_rules! fmtln {
+    ($fmt:ident, $fmtstring:expr, $($fmtargs:expr),*) => {
+        $fmt.line(format!($fmtstring, $($fmtargs),*));
+    };
+
+    ($fmt:ident, $arg:expr) => {
+        $fmt.line($arg);
+    };
+
+    ($_:tt, $($args:expr),+) => {
+        compile_error!("This macro requires at least two arguments: the Formatter instance and a format string.");
+    };
+
+    ($_:tt) => {
+        compile_error!("This macro requires at least two arguments: the Formatter instance and a format string.");
+    };
+}
+
+pub(crate) struct Formatter {
+    indent: usize,
+    lines: Vec<String>,
+}
+
+impl Formatter {
+    /// Source code formatter class. Used to collect source code to be written
+    /// to a file, and keep track of indentation.
+    pub fn new() -> Self {
+        Self {
+            indent: 0,
+            lines: Vec::new(),
+        }
+    }
+
+    /// Increase current indentation level by one.
+    pub fn indent_push(&mut self) {
+        self.indent += 1;
+    }
+
+    /// Decrease indentation by one level.
+    pub fn indent_pop(&mut self) {
+        assert!(self.indent > 0, "Already at top level indentation");
+        self.indent -= 1;
+    }
+
+    pub fn indent<T, F: FnOnce(&mut Formatter) -> T>(&mut self, f: F) -> T {
+        self.indent_push();
+        let ret = f(self);
+        self.indent_pop();
+        ret
+    }
+
+    /// Get the current whitespace indentation in the form of a String.
+    fn get_indent(&self) -> String {
+        if self.indent == 0 {
+            String::new()
+        } else {
+            format!("{:-1$}", " ", self.indent * SHIFTWIDTH)
+        }
+    }
+
+    /// Get a string containing whitespace outdented one level. Used for
+    /// lines of code that are inside a single indented block.
+    fn get_outdent(&mut self) -> String {
+        self.indent_pop();
+        let s = self.get_indent();
+        self.indent_push();
+        s
+    }
+
+    /// Add an indented line.
+    pub fn line(&mut self, contents: impl AsRef<str>) {
+        let indented_line = format!("{}{}\n", self.get_indent(), contents.as_ref());
+        self.lines.push(indented_line);
+    }
+
+    /// Pushes an empty line.
+    pub fn empty_line(&mut self) {
+        self.lines.push("\n".to_string());
+    }
+
+    /// Emit a line outdented one level.
+    pub fn outdented_line(&mut self, s: &str) {
+        let new_line = format!("{}{}\n", self.get_outdent(), s);
+        self.lines.push(new_line);
+    }
+
+    /// Write `self.lines` to a file.
+    pub fn update_file(
+        &self,
+        filename: impl AsRef<str>,
+        directory: &str,
+    ) -> Result<(), error::Error> {
+        #[cfg(target_family = "windows")]
+        let path_str = format!("{}\\{}", directory, filename.as_ref());
+        #[cfg(not(target_family = "windows"))]
+        let path_str = format!("{}/{}", directory, filename.as_ref());
+
+        let path = path::Path::new(&path_str);
+        let mut f = fs::File::create(path)?;
+
+        for l in self.lines.iter().map(|l| l.as_bytes()) {
+            f.write_all(l)?;
+        }
+
+        Ok(())
+    }
+
+    /// Add one or more lines after stripping common indentation.
+    pub fn multi_line(&mut self, s: &str) {
+        parse_multiline(s).into_iter().for_each(|l| self.line(&l));
+    }
+
+    /// Add a comment line.
+    pub fn comment(&mut self, s: impl AsRef<str>) {
+        fmtln!(self, "// {}", s.as_ref());
+    }
+
+    /// Add a (multi-line) documentation comment.
+    pub fn doc_comment(&mut self, contents: impl AsRef<str>) {
+        parse_multiline(contents.as_ref())
+            .iter()
+            .map(|l| {
+                if l.is_empty() {
+                    "///".into()
+                } else {
+                    format!("/// {}", l)
+                }
+            })
+            .for_each(|s| self.line(s.as_str()));
+    }
+
+    /// Add a match expression.
+    pub fn add_match(&mut self, m: Match) {
+        fmtln!(self, "match {} {{", m.expr);
+        self.indent(|fmt| {
+            for (&(ref fields, ref body), ref names) in m.arms.iter() {
+                // name { fields } | name { fields } => { body }
+                let conditions = names
+                    .iter()
+                    .map(|name| {
+                        if !fields.is_empty() {
+                            format!("{} {{ {} }}", name, fields.join(", "))
+                        } else {
+                            name.clone()
+                        }
+                    })
+                    .collect::<Vec<_>>()
+                    .join(" |\n")
+                    + " => {";
+
+                fmt.multi_line(&conditions);
+                fmt.indent(|fmt| {
+                    fmt.line(body);
+                });
+                fmt.line("}");
+            }
+
+            // Make sure to include the catch all clause last.
+            if let Some(body) = m.catch_all {
+                fmt.line("_ => {");
+                fmt.indent(|fmt| {
+                    fmt.line(body);
+                });
+                fmt.line("}");
+            }
+        });
+        self.line("}");
+    }
+}
+
+/// Compute the indentation of s, or None of an empty line.
+fn _indent(s: &str) -> Option<usize> {
+    if s.is_empty() {
+        None
+    } else {
+        let t = s.trim_start();
+        Some(s.len() - t.len())
+    }
+}
+
+/// Given a multi-line string, split it into a sequence of lines after
+/// stripping a common indentation. This is useful for strings defined with
+/// doc strings.
+fn parse_multiline(s: &str) -> Vec<String> {
+    // Convert tabs into spaces.
+    let expanded_tab = format!("{:-1$}", " ", SHIFTWIDTH);
+    let lines: Vec<String> = s.lines().map(|l| l.replace("\t", &expanded_tab)).collect();
+
+    // Determine minimum indentation, ignoring the first line and empty lines.
+    let indent = lines
+        .iter()
+        .skip(1)
+        .filter(|l| !l.trim().is_empty())
+        .map(|l| l.len() - l.trim_start().len())
+        .min();
+
+    // Strip off leading blank lines.
+    let mut lines_iter = lines.iter().skip_while(|l| l.is_empty());
+    let mut trimmed = Vec::with_capacity(lines.len());
+
+    // Remove indentation (first line is special)
+    if let Some(s) = lines_iter.next().map(|l| l.trim()).map(|l| l.to_string()) {
+        trimmed.push(s);
+    }
+
+    // Remove trailing whitespace from other lines.
+    let mut other_lines = if let Some(indent) = indent {
+        // Note that empty lines may have fewer than `indent` chars.
+        lines_iter
+            .map(|l| &l[cmp::min(indent, l.len())..])
+            .map(|l| l.trim_end())
+            .map(|l| l.to_string())
+            .collect::<Vec<_>>()
+    } else {
+        lines_iter
+            .map(|l| l.trim_end())
+            .map(|l| l.to_string())
+            .collect::<Vec<_>>()
+    };
+
+    trimmed.append(&mut other_lines);
+
+    // Strip off trailing blank lines.
+    while let Some(s) = trimmed.pop() {
+        if s.is_empty() {
+            continue;
+        } else {
+            trimmed.push(s);
+            break;
+        }
+    }
+
+    trimmed
+}
+
+/// Match formatting class.
+///
+/// Match objects collect all the information needed to emit a Rust `match`
+/// expression, automatically deduplicating overlapping identical arms.
+///
+/// Note that this class is ignorant of Rust types, and considers two fields
+/// with the same name to be equivalent. BTreeMap/BTreeSet are used to
+/// represent the arms in order to make the order deterministic.
+pub(crate) struct Match {
+    expr: String,
+    arms: BTreeMap<(Vec<String>, String), BTreeSet<String>>,
+    /// The clause for the placeholder pattern _.
+    catch_all: Option<String>,
+}
+
+impl Match {
+    /// Create a new match statement on `expr`.
+    pub fn new(expr: impl Into<String>) -> Self {
+        Self {
+            expr: expr.into(),
+            arms: BTreeMap::new(),
+            catch_all: None,
+        }
+    }
+
+    fn set_catch_all(&mut self, clause: String) {
+        assert!(self.catch_all.is_none());
+        self.catch_all = Some(clause);
+    }
+
+    /// Add an arm that reads fields to the Match statement.
+    pub fn arm<T: Into<String>, S: Into<String>>(&mut self, name: T, fields: Vec<S>, body: T) {
+        let name = name.into();
+        assert!(
+            name != "_",
+            "catch all clause can't extract fields, use arm_no_fields instead."
+        );
+
+        let body = body.into();
+        let fields = fields.into_iter().map(|x| x.into()).collect();
+        let match_arm = self
+            .arms
+            .entry((fields, body))
+            .or_insert_with(BTreeSet::new);
+        match_arm.insert(name);
+    }
+
+    /// Adds an arm that doesn't read anythings from the fields to the Match statement.
+    pub fn arm_no_fields(&mut self, name: impl Into<String>, body: impl Into<String>) {
+        let body = body.into();
+
+        let name = name.into();
+        if name == "_" {
+            self.set_catch_all(body);
+            return;
+        }
+
+        let match_arm = self
+            .arms
+            .entry((Vec::new(), body))
+            .or_insert_with(BTreeSet::new);
+        match_arm.insert(name);
+    }
+}
+
+#[cfg(test)]
+mod srcgen_tests {
+    use super::parse_multiline;
+    use super::Formatter;
+    use super::Match;
+
+    fn from_raw_string<S: Into<String>>(s: S) -> Vec<String> {
+        s.into()
+            .trim()
+            .split("\n")
+            .into_iter()
+            .map(|x| format!("{}\n", x))
+            .collect()
+    }
+
+    #[test]
+    fn adding_arms_works() {
+        let mut m = Match::new("x");
+        m.arm("Orange", vec!["a", "b"], "some body");
+        m.arm("Yellow", vec!["a", "b"], "some body");
+        m.arm("Green", vec!["a", "b"], "different body");
+        m.arm("Blue", vec!["x", "y"], "some body");
+        assert_eq!(m.arms.len(), 3);
+
+        let mut fmt = Formatter::new();
+        fmt.add_match(m);
+
+        let expected_lines = from_raw_string(
+            r#"
+match x {
+    Green { a, b } => {
+        different body
+    }
+    Orange { a, b } |
+    Yellow { a, b } => {
+        some body
+    }
+    Blue { x, y } => {
+        some body
+    }
+}
+        "#,
+        );
+        assert_eq!(fmt.lines, expected_lines);
+    }
+
+    #[test]
+    fn match_with_catchall_order() {
+        // The catchall placeholder must be placed after other clauses.
+        let mut m = Match::new("x");
+        m.arm("Orange", vec!["a", "b"], "some body");
+        m.arm("Green", vec!["a", "b"], "different body");
+        m.arm_no_fields("_", "unreachable!()");
+        assert_eq!(m.arms.len(), 2); // catchall is not counted
+
+        let mut fmt = Formatter::new();
+        fmt.add_match(m);
+
+        let expected_lines = from_raw_string(
+            r#"
+match x {
+    Green { a, b } => {
+        different body
+    }
+    Orange { a, b } => {
+        some body
+    }
+    _ => {
+        unreachable!()
+    }
+}
+        "#,
+        );
+        assert_eq!(fmt.lines, expected_lines);
+    }
+
+    #[test]
+    fn parse_multiline_works() {
+        let input = "\n    hello\n    world\n";
+        let expected = vec!["hello", "world"];
+        let output = parse_multiline(input);
+        assert_eq!(output, expected);
+    }
+
+    #[test]
+    fn formatter_basic_example_works() {
+        let mut fmt = Formatter::new();
+        fmt.line("Hello line 1");
+        fmt.indent_push();
+        fmt.comment("Nested comment");
+        fmt.indent_pop();
+        fmt.line("Back home again");
+        let expected_lines = vec![
+            "Hello line 1\n",
+            "    // Nested comment\n",
+            "Back home again\n",
+        ];
+        assert_eq!(fmt.lines, expected_lines);
+    }
+
+    #[test]
+    fn get_indent_works() {
+        let mut fmt = Formatter::new();
+        let expected_results = vec!["", "    ", "        ", ""];
+
+        let actual_results = Vec::with_capacity(4);
+        (0..3).for_each(|_| {
+            fmt.get_indent();
+            fmt.indent_push();
+        });
+        (0..3).for_each(|_| fmt.indent_pop());
+        fmt.get_indent();
+
+        actual_results
+            .into_iter()
+            .zip(expected_results.into_iter())
+            .for_each(|(actual, expected): (String, &str)| assert_eq!(&actual, expected));
+    }
+
+    #[test]
+    fn fmt_can_add_type_to_lines() {
+        let mut fmt = Formatter::new();
+        fmt.line(format!("pub const {}: Type = Type({:#x});", "example", 0,));
+        let expected_lines = vec!["pub const example: Type = Type(0x0);\n"];
+        assert_eq!(fmt.lines, expected_lines);
+    }
+
+    #[test]
+    fn fmt_can_add_indented_line() {
+        let mut fmt = Formatter::new();
+        fmt.line("hello");
+        fmt.indent_push();
+        fmt.line("world");
+        let expected_lines = vec!["hello\n", "    world\n"];
+        assert_eq!(fmt.lines, expected_lines);
+    }
+
+    #[test]
+    fn fmt_can_add_doc_comments() {
+        let mut fmt = Formatter::new();
+        fmt.doc_comment("documentation\nis\ngood");
+        let expected_lines = vec!["/// documentation\n", "/// is\n", "/// good\n"];
+        assert_eq!(fmt.lines, expected_lines);
+    }
+
+    #[test]
+    fn fmt_can_add_doc_comments_with_empty_lines() {
+        let mut fmt = Formatter::new();
+        fmt.doc_comment(
+            r#"documentation
+        can be really good.
+
+        If you stick to writing it.
+"#,
+        );
+        let expected_lines = from_raw_string(
+            r#"
+/// documentation
+/// can be really good.
+///
+/// If you stick to writing it."#,
+        );
+        assert_eq!(fmt.lines, expected_lines);
+    }
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/unique_table.rs b/third_party/rust/cranelift-codegen-meta/src/unique_table.rs
new file mode 100644
index 0000000000..65ef7e8b4a
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/unique_table.rs
@@ -0,0 +1,141 @@
+//! An index-accessed table implementation that avoids duplicate entries.
+use std::collections::HashMap;
+use std::hash::Hash;
+use std::slice;
+
+/// Collect items into the `table` list, removing duplicates.
+pub(crate) struct UniqueTable<'entries, T: Eq + Hash> {
+    table: Vec<&'entries T>,
+    map: HashMap<&'entries T, usize>,
+}
+
+impl<'entries, T: Eq + Hash> UniqueTable<'entries, T> {
+    pub fn new() -> Self {
+        Self {
+            table: Vec::new(),
+            map: HashMap::new(),
+        }
+    }
+
+    pub fn add(&mut self, entry: &'entries T) -> usize {
+        match self.map.get(&entry) {
+            None => {
+                let i = self.table.len();
+                self.table.push(entry);
+                self.map.insert(entry, i);
+                i
+            }
+            Some(&i) => i,
+        }
+    }
+
+    pub fn len(&self) -> usize {
+        self.table.len()
+    }
+    pub fn get(&self, index: usize) -> &T {
+        self.table[index]
+    }
+    pub fn iter(&self) -> slice::Iter<&'entries T> {
+        self.table.iter()
+    }
+}
+
+/// A table of sequences which tries to avoid common subsequences.
+pub(crate) struct UniqueSeqTable<T: PartialEq + Clone> {
+    table: Vec<T>,
+}
+
+impl<T: PartialEq + Clone> UniqueSeqTable<T> {
+    pub fn new() -> Self {
+        Self { table: Vec::new() }
+    }
+    pub fn add(&mut self, values: &[T]) -> usize {
+        if values.is_empty() {
+            return 0;
+        }
+        if let Some(offset) = find_subsequence(values, &self.table) {
+            offset
+        } else {
+            let table_len = self.table.len();
+
+            // Try to put in common the last elements of the table if they're a prefix of the new
+            // sequence.
+            //
+            // We know there wasn't a full match, so the best prefix we can hope to find contains
+            // all the values but the last one.
+            let mut start_from = usize::min(table_len, values.len() - 1);
+            while start_from != 0 {
+                // Loop invariant: start_from <= table_len, so table_len - start_from >= 0.
+                if values[0..start_from] == self.table[table_len - start_from..table_len] {
+                    break;
+                }
+                start_from -= 1;
+            }
+
+            self.table
+                .extend(values[start_from..values.len()].iter().cloned());
+            table_len - start_from
+        }
+    }
+    pub fn len(&self) -> usize {
+        self.table.len()
+    }
+    pub fn iter(&self) -> slice::Iter<T> {
+        self.table.iter()
+    }
+}
+
+/// Try to find the subsequence `sub` in the `whole` sequence. Returns None if
+/// it's not been found, or Some(index) if it has been. Naive implementation
+/// until proven we need something better.
+fn find_subsequence<T: PartialEq>(sub: &[T], whole: &[T]) -> Option<usize> {
+    assert!(!sub.is_empty());
+    // We want i + sub.len() <= whole.len(), i.e. i < whole.len() + 1 - sub.len().
+    if whole.len() < sub.len() {
+        return None;
+    }
+    let max = whole.len() - sub.len();
+    for i in 0..=max {
+        if whole[i..i + sub.len()] == sub[..] {
+            return Some(i);
+        }
+    }
+    None
+}
+
+#[test]
+fn test_find_subsequence() {
+    assert_eq!(find_subsequence(&vec![1], &vec![4]), None);
+    assert_eq!(find_subsequence(&vec![1], &vec![1]), Some(0));
+    assert_eq!(find_subsequence(&vec![1, 2], &vec![1]), None);
+    assert_eq!(find_subsequence(&vec![1, 2], &vec![1, 2]), Some(0));
+    assert_eq!(find_subsequence(&vec![1, 2], &vec![1, 3]), None);
+    assert_eq!(find_subsequence(&vec![1, 2], &vec![0, 1, 2]), Some(1));
+    assert_eq!(find_subsequence(&vec![1, 2], &vec![0, 1, 3, 1]), None);
+    assert_eq!(find_subsequence(&vec![1, 2], &vec![0, 1, 3, 1, 2]), Some(3));
+    assert_eq!(
+        find_subsequence(&vec![1, 1, 3], &vec![1, 1, 1, 3, 3]),
+        Some(1)
+    );
+}
+
+#[test]
+fn test_optimal_add() {
+    let mut seq_table = UniqueSeqTable::new();
+    // [0, 1, 2, 3]
+    assert_eq!(seq_table.add(&vec![0, 1, 2, 3]), 0);
+    assert_eq!(seq_table.add(&vec![0, 1, 2, 3]), 0);
+    assert_eq!(seq_table.add(&vec![1, 2, 3]), 1);
+    assert_eq!(seq_table.add(&vec![2, 3]), 2);
+    assert_eq!(seq_table.len(), 4);
+    // [0, 1, 2, 3, 4]
+    assert_eq!(seq_table.add(&vec![2, 3, 4]), 2);
+    assert_eq!(seq_table.len(), 5);
+    // [0, 1, 2, 3, 4, 6, 5, 7]
+    assert_eq!(seq_table.add(&vec![4, 6, 5, 7]), 4);
+    assert_eq!(seq_table.len(), 8);
+    // [0, 1, 2, 3, 4, 6, 5, 7, 8, 2, 3, 4]
+    assert_eq!(seq_table.add(&vec![8, 2, 3, 4]), 8);
+    assert_eq!(seq_table.add(&vec![8]), 8);
+    assert_eq!(seq_table.len(), 12);
+}