diff options
Diffstat (limited to 'third_party/rust/cranelift-codegen-meta/src')
49 files changed, 28429 insertions, 0 deletions
diff --git a/third_party/rust/cranelift-codegen-meta/src/cdsl/ast.rs b/third_party/rust/cranelift-codegen-meta/src/cdsl/ast.rs new file mode 100644 index 0000000000..82cdbad762 --- /dev/null +++ b/third_party/rust/cranelift-codegen-meta/src/cdsl/ast.rs @@ -0,0 +1,753 @@ +use crate::cdsl::instructions::{InstSpec, Instruction, InstructionPredicate}; +use crate::cdsl::operands::{OperandKind, OperandKindFields}; +use crate::cdsl::types::ValueType; +use crate::cdsl::typevar::{TypeSetBuilder, TypeVar}; + +use cranelift_entity::{entity_impl, PrimaryMap, SparseMap, SparseMapValue}; + +use std::fmt; +use std::iter::IntoIterator; + +pub(crate) enum Expr { + Var(VarIndex), + Literal(Literal), +} + +impl Expr { + pub fn maybe_literal(&self) -> Option<&Literal> { + match &self { + Expr::Literal(lit) => Some(lit), + _ => None, + } + } + + pub fn maybe_var(&self) -> Option<VarIndex> { + if let Expr::Var(var) = &self { + Some(*var) + } else { + None + } + } + + pub fn unwrap_var(&self) -> VarIndex { + self.maybe_var() + .expect("tried to unwrap a non-Var content in Expr::unwrap_var") + } + + pub fn to_rust_code(&self, var_pool: &VarPool) -> String { + match self { + Expr::Var(var_index) => var_pool.get(*var_index).to_rust_code(), + Expr::Literal(literal) => literal.to_rust_code(), + } + } +} + +/// An AST definition associates a set of variables with the values produced by an expression. +pub(crate) struct Def { + pub apply: Apply, + pub defined_vars: Vec<VarIndex>, +} + +impl Def { + pub fn to_comment_string(&self, var_pool: &VarPool) -> String { + let results = self + .defined_vars + .iter() + .map(|&x| var_pool.get(x).name.as_str()) + .collect::<Vec<_>>(); + + let results = if results.len() == 1 { + results[0].to_string() + } else { + format!("({})", results.join(", ")) + }; + + format!("{} := {}", results, self.apply.to_comment_string(var_pool)) + } +} + +pub(crate) struct DefPool { + pool: PrimaryMap<DefIndex, Def>, +} + +impl DefPool { + pub fn new() -> Self { + Self { + pool: PrimaryMap::new(), + } + } + pub fn get(&self, index: DefIndex) -> &Def { + self.pool.get(index).unwrap() + } + pub fn next_index(&self) -> DefIndex { + self.pool.next_key() + } + pub fn create_inst(&mut self, apply: Apply, defined_vars: Vec<VarIndex>) -> DefIndex { + self.pool.push(Def { + apply, + defined_vars, + }) + } +} + +#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub(crate) struct DefIndex(u32); +entity_impl!(DefIndex); + +/// A definition which would lead to generate a block creation. +#[derive(Clone)] +pub(crate) struct Block { + /// Instruction index after which the block entry is set. + pub location: DefIndex, + /// Variable holding the new created block. + pub name: VarIndex, +} + +pub(crate) struct BlockPool { + pool: SparseMap<DefIndex, Block>, +} + +impl SparseMapValue<DefIndex> for Block { + fn key(&self) -> DefIndex { + self.location + } +} + +impl BlockPool { + pub fn new() -> Self { + Self { + pool: SparseMap::new(), + } + } + pub fn get(&self, index: DefIndex) -> Option<&Block> { + self.pool.get(index) + } + pub fn create_block(&mut self, name: VarIndex, location: DefIndex) { + if self.pool.contains_key(location) { + panic!("Attempt to insert 2 blocks after the same instruction") + } + self.pool.insert(Block { location, name }); + } + pub fn is_empty(&self) -> bool { + self.pool.is_empty() + } +} + +// Implement IntoIterator such that we can iterate over blocks which are in the block pool. +impl<'a> IntoIterator for &'a BlockPool { + type Item = <&'a SparseMap<DefIndex, Block> as IntoIterator>::Item; + type IntoIter = <&'a SparseMap<DefIndex, Block> as IntoIterator>::IntoIter; + + fn into_iter(self) -> Self::IntoIter { + self.pool.into_iter() + } +} + +#[derive(Clone, Debug)] +pub(crate) enum Literal { + /// A value of an enumerated immediate operand. + /// + /// Some immediate operand kinds like `intcc` and `floatcc` have an enumerated range of values + /// corresponding to a Rust enum type. An `Enumerator` object is an AST leaf node representing one + /// of the values. + Enumerator { + rust_type: &'static str, + value: &'static str, + }, + + /// A bitwise value of an immediate operand, used for bitwise exact floating point constants. + Bits { rust_type: &'static str, value: u64 }, + + /// A value of an integer immediate operand. + Int(i64), + + /// A empty list of variable set of arguments. + EmptyVarArgs, +} + +impl Literal { + pub fn enumerator_for(kind: &OperandKind, value: &'static str) -> Self { + let value = match &kind.fields { + OperandKindFields::ImmEnum(values) => values.get(value).unwrap_or_else(|| { + panic!( + "nonexistent value '{}' in enumeration '{}'", + value, kind.rust_type + ) + }), + _ => panic!("enumerator is for enum values"), + }; + Literal::Enumerator { + rust_type: kind.rust_type, + value, + } + } + + pub fn bits(kind: &OperandKind, bits: u64) -> Self { + match kind.fields { + OperandKindFields::ImmValue => {} + _ => panic!("bits_of is for immediate scalar types"), + } + Literal::Bits { + rust_type: kind.rust_type, + value: bits, + } + } + + pub fn constant(kind: &OperandKind, value: i64) -> Self { + match kind.fields { + OperandKindFields::ImmValue => {} + _ => panic!("constant is for immediate scalar types"), + } + Literal::Int(value) + } + + pub fn empty_vararg() -> Self { + Literal::EmptyVarArgs + } + + pub fn to_rust_code(&self) -> String { + match self { + Literal::Enumerator { rust_type, value } => format!("{}::{}", rust_type, value), + Literal::Bits { rust_type, value } => format!("{}::with_bits({:#x})", rust_type, value), + Literal::Int(val) => val.to_string(), + Literal::EmptyVarArgs => "&[]".into(), + } + } +} + +#[derive(Clone, Copy, Debug)] +pub(crate) enum PatternPosition { + Source, + Destination, +} + +/// A free variable. +/// +/// When variables are used in `XForms` with source and destination patterns, they are classified +/// as follows: +/// +/// Input values: Uses in the source pattern with no preceding def. These may appear as inputs in +/// the destination pattern too, but no new inputs can be introduced. +/// +/// Output values: Variables that are defined in both the source and destination pattern. These +/// values may have uses outside the source pattern, and the destination pattern must compute the +/// same value. +/// +/// Intermediate values: Values that are defined in the source pattern, but not in the destination +/// pattern. These may have uses outside the source pattern, so the defining instruction can't be +/// deleted immediately. +/// +/// Temporary values are defined only in the destination pattern. +pub(crate) struct Var { + pub name: String, + + /// The `Def` defining this variable in a source pattern. + pub src_def: Option<DefIndex>, + + /// The `Def` defining this variable in a destination pattern. + pub dst_def: Option<DefIndex>, + + /// TypeVar representing the type of this variable. + type_var: Option<TypeVar>, + + /// Is this the original type variable, or has it be redefined with set_typevar? + is_original_type_var: bool, +} + +impl Var { + fn new(name: String) -> Self { + Self { + name, + src_def: None, + dst_def: None, + type_var: None, + is_original_type_var: false, + } + } + + /// Is this an input value to the src pattern? + pub fn is_input(&self) -> bool { + self.src_def.is_none() && self.dst_def.is_none() + } + + /// Is this an output value, defined in both src and dst patterns? + pub fn is_output(&self) -> bool { + self.src_def.is_some() && self.dst_def.is_some() + } + + /// Is this an intermediate value, defined only in the src pattern? + pub fn is_intermediate(&self) -> bool { + self.src_def.is_some() && self.dst_def.is_none() + } + + /// Is this a temp value, defined only in the dst pattern? + pub fn is_temp(&self) -> bool { + self.src_def.is_none() && self.dst_def.is_some() + } + + /// Get the def of this variable according to the position. + pub fn get_def(&self, position: PatternPosition) -> Option<DefIndex> { + match position { + PatternPosition::Source => self.src_def, + PatternPosition::Destination => self.dst_def, + } + } + + pub fn set_def(&mut self, position: PatternPosition, def: DefIndex) { + assert!( + self.get_def(position).is_none(), + format!("redefinition of variable {}", self.name) + ); + match position { + PatternPosition::Source => { + self.src_def = Some(def); + } + PatternPosition::Destination => { + self.dst_def = Some(def); + } + } + } + + /// Get the type variable representing the type of this variable. + pub fn get_or_create_typevar(&mut self) -> TypeVar { + match &self.type_var { + Some(tv) => tv.clone(), + None => { + // Create a new type var in which we allow all types. + let tv = TypeVar::new( + format!("typeof_{}", self.name), + format!("Type of the pattern variable {:?}", self), + TypeSetBuilder::all(), + ); + self.type_var = Some(tv.clone()); + self.is_original_type_var = true; + tv + } + } + } + pub fn get_typevar(&self) -> Option<TypeVar> { + self.type_var.clone() + } + pub fn set_typevar(&mut self, tv: TypeVar) { + self.is_original_type_var = if let Some(previous_tv) = &self.type_var { + *previous_tv == tv + } else { + false + }; + self.type_var = Some(tv); + } + + /// Check if this variable has a free type variable. If not, the type of this variable is + /// computed from the type of another variable. + pub fn has_free_typevar(&self) -> bool { + match &self.type_var { + Some(tv) => tv.base.is_none() && self.is_original_type_var, + None => false, + } + } + + pub fn to_rust_code(&self) -> String { + self.name.clone() + } + fn rust_type(&self) -> String { + self.type_var.as_ref().unwrap().to_rust_code() + } +} + +impl fmt::Debug for Var { + fn fmt(&self, fmt: &mut fmt::Formatter) -> Result<(), fmt::Error> { + fmt.write_fmt(format_args!( + "Var({}{}{})", + self.name, + if self.src_def.is_some() { ", src" } else { "" }, + if self.dst_def.is_some() { ", dst" } else { "" } + )) + } +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub(crate) struct VarIndex(u32); +entity_impl!(VarIndex); + +pub(crate) struct VarPool { + pool: PrimaryMap<VarIndex, Var>, +} + +impl VarPool { + pub fn new() -> Self { + Self { + pool: PrimaryMap::new(), + } + } + pub fn get(&self, index: VarIndex) -> &Var { + self.pool.get(index).unwrap() + } + pub fn get_mut(&mut self, index: VarIndex) -> &mut Var { + self.pool.get_mut(index).unwrap() + } + pub fn create(&mut self, name: impl Into<String>) -> VarIndex { + self.pool.push(Var::new(name.into())) + } +} + +/// Contains constants created in the AST that must be inserted into the true [ConstantPool] when +/// the legalizer code is generated. The constant data is named in the order it is inserted; +/// inserting data using [insert] will avoid duplicates. +/// +/// [ConstantPool]: ../../../cranelift_codegen/ir/constant/struct.ConstantPool.html +/// [insert]: ConstPool::insert +pub(crate) struct ConstPool { + pool: Vec<Vec<u8>>, +} + +impl ConstPool { + /// Create an empty constant pool. + pub fn new() -> Self { + Self { pool: vec![] } + } + + /// Create a name for a constant from its position in the pool. + fn create_name(position: usize) -> String { + format!("const{}", position) + } + + /// Insert constant data into the pool, returning the name of the variable used to reference it. + /// This method will search for data that matches the new data and return the existing constant + /// name to avoid duplicates. + pub fn insert(&mut self, data: Vec<u8>) -> String { + let possible_position = self.pool.iter().position(|d| d == &data); + let position = if let Some(found_position) = possible_position { + found_position + } else { + let new_position = self.pool.len(); + self.pool.push(data); + new_position + }; + ConstPool::create_name(position) + } + + /// Iterate over the name/value pairs in the pool. + pub fn iter(&self) -> impl Iterator<Item = (String, &Vec<u8>)> { + self.pool + .iter() + .enumerate() + .map(|(i, v)| (ConstPool::create_name(i), v)) + } +} + +/// Apply an instruction to arguments. +/// +/// An `Apply` AST expression is created by using function call syntax on instructions. This +/// applies to both bound and unbound polymorphic instructions. +pub(crate) struct Apply { + pub inst: Instruction, + pub args: Vec<Expr>, + pub value_types: Vec<ValueType>, +} + +impl Apply { + pub fn new(target: InstSpec, args: Vec<Expr>) -> Self { + let (inst, value_types) = match target { + InstSpec::Inst(inst) => (inst, Vec::new()), + InstSpec::Bound(bound_inst) => (bound_inst.inst, bound_inst.value_types), + }; + + // Apply should only operate on concrete value types, not "any". + let value_types = value_types + .into_iter() + .map(|vt| vt.expect("shouldn't be Any")) + .collect(); + + // Basic check on number of arguments. + assert!( + inst.operands_in.len() == args.len(), + format!("incorrect number of arguments in instruction {}", inst.name) + ); + + // Check that the kinds of Literals arguments match the expected operand. + for &imm_index in &inst.imm_opnums { + let arg = &args[imm_index]; + if let Some(literal) = arg.maybe_literal() { + let op = &inst.operands_in[imm_index]; + match &op.kind.fields { + OperandKindFields::ImmEnum(values) => { + if let Literal::Enumerator { value, .. } = literal { + assert!( + values.iter().any(|(_key, v)| v == value), + "Nonexistent enum value '{}' passed to field of kind '{}' -- \ + did you use the right enum?", + value, + op.kind.rust_type + ); + } else { + panic!( + "Passed non-enum field value {:?} to field of kind {}", + literal, op.kind.rust_type + ); + } + } + OperandKindFields::ImmValue => match &literal { + Literal::Enumerator { value, .. } => panic!( + "Expected immediate value in immediate field of kind '{}', \ + obtained enum value '{}'", + op.kind.rust_type, value + ), + Literal::Bits { .. } | Literal::Int(_) | Literal::EmptyVarArgs => {} + }, + _ => { + panic!( + "Literal passed to non-literal field of kind {}", + op.kind.rust_type + ); + } + } + } + } + + Self { + inst, + args, + value_types, + } + } + + fn to_comment_string(&self, var_pool: &VarPool) -> String { + let args = self + .args + .iter() + .map(|arg| arg.to_rust_code(var_pool)) + .collect::<Vec<_>>() + .join(", "); + + let mut inst_and_bound_types = vec![self.inst.name.to_string()]; + inst_and_bound_types.extend(self.value_types.iter().map(|vt| vt.to_string())); + let inst_name = inst_and_bound_types.join("."); + + format!("{}({})", inst_name, args) + } + + pub fn inst_predicate(&self, var_pool: &VarPool) -> InstructionPredicate { + let mut pred = InstructionPredicate::new(); + for (format_field, &op_num) in self + .inst + .format + .imm_fields + .iter() + .zip(self.inst.imm_opnums.iter()) + { + let arg = &self.args[op_num]; + if arg.maybe_var().is_some() { + // Ignore free variables for now. + continue; + } + pred = pred.and(InstructionPredicate::new_is_field_equal_ast( + &*self.inst.format, + format_field, + arg.to_rust_code(var_pool), + )); + } + + // Add checks for any bound secondary type variables. We can't check the controlling type + // variable this way since it may not appear as the type of an operand. + if self.value_types.len() > 1 { + let poly = self + .inst + .polymorphic_info + .as_ref() + .expect("must have polymorphic info if it has bounded types"); + for (bound_type, type_var) in + self.value_types[1..].iter().zip(poly.other_typevars.iter()) + { + pred = pred.and(InstructionPredicate::new_typevar_check( + &self.inst, type_var, bound_type, + )); + } + } + + pred + } + + /// Same as `inst_predicate()`, but also check the controlling type variable. + pub fn inst_predicate_with_ctrl_typevar(&self, var_pool: &VarPool) -> InstructionPredicate { + let mut pred = self.inst_predicate(var_pool); + + if !self.value_types.is_empty() { + let bound_type = &self.value_types[0]; + let poly = self.inst.polymorphic_info.as_ref().unwrap(); + let type_check = if poly.use_typevar_operand { + InstructionPredicate::new_typevar_check(&self.inst, &poly.ctrl_typevar, bound_type) + } else { + InstructionPredicate::new_ctrl_typevar_check(&bound_type) + }; + pred = pred.and(type_check); + } + + pred + } + + pub fn rust_builder(&self, defined_vars: &[VarIndex], var_pool: &VarPool) -> String { + let mut args = self + .args + .iter() + .map(|expr| expr.to_rust_code(var_pool)) + .collect::<Vec<_>>() + .join(", "); + + // Do we need to pass an explicit type argument? + if let Some(poly) = &self.inst.polymorphic_info { + if !poly.use_typevar_operand { + args = format!("{}, {}", var_pool.get(defined_vars[0]).rust_type(), args); + } + } + + format!("{}({})", self.inst.snake_name(), args) + } +} + +// Simple helpers for legalize actions construction. + +pub(crate) enum DummyExpr { + Var(DummyVar), + Literal(Literal), + Constant(DummyConstant), + Apply(InstSpec, Vec<DummyExpr>), + Block(DummyVar), +} + +#[derive(Clone)] +pub(crate) struct DummyVar { + pub name: String, +} + +impl Into<DummyExpr> for DummyVar { + fn into(self) -> DummyExpr { + DummyExpr::Var(self) + } +} +impl Into<DummyExpr> for Literal { + fn into(self) -> DummyExpr { + DummyExpr::Literal(self) + } +} + +#[derive(Clone)] +pub(crate) struct DummyConstant(pub(crate) Vec<u8>); + +pub(crate) fn constant(data: Vec<u8>) -> DummyConstant { + DummyConstant(data) +} + +impl Into<DummyExpr> for DummyConstant { + fn into(self) -> DummyExpr { + DummyExpr::Constant(self) + } +} + +pub(crate) fn var(name: &str) -> DummyVar { + DummyVar { + name: name.to_owned(), + } +} + +pub(crate) struct DummyDef { + pub expr: DummyExpr, + pub defined_vars: Vec<DummyVar>, +} + +pub(crate) struct ExprBuilder { + expr: DummyExpr, +} + +impl ExprBuilder { + pub fn apply(inst: InstSpec, args: Vec<DummyExpr>) -> Self { + let expr = DummyExpr::Apply(inst, args); + Self { expr } + } + + pub fn assign_to(self, defined_vars: Vec<DummyVar>) -> DummyDef { + DummyDef { + expr: self.expr, + defined_vars, + } + } + + pub fn block(name: DummyVar) -> Self { + let expr = DummyExpr::Block(name); + Self { expr } + } +} + +macro_rules! def_rhs { + // inst(a, b, c) + ($inst:ident($($src:expr),*)) => { + ExprBuilder::apply($inst.into(), vec![$($src.clone().into()),*]) + }; + + // inst.type(a, b, c) + ($inst:ident.$type:ident($($src:expr),*)) => { + ExprBuilder::apply($inst.bind($type).into(), vec![$($src.clone().into()),*]) + }; +} + +// Helper macro to define legalization recipes. +macro_rules! def { + // x = ... + ($dest:ident = $($tt:tt)*) => { + def_rhs!($($tt)*).assign_to(vec![$dest.clone()]) + }; + + // (x, y, ...) = ... + (($($dest:ident),*) = $($tt:tt)*) => { + def_rhs!($($tt)*).assign_to(vec![$($dest.clone()),*]) + }; + + // An instruction with no results. + ($($tt:tt)*) => { + def_rhs!($($tt)*).assign_to(Vec::new()) + } +} + +// Helper macro to define legalization recipes. +macro_rules! block { + // a basic block definition, splitting the current block in 2. + ($block: ident) => { + ExprBuilder::block($block).assign_to(Vec::new()) + }; +} + +#[cfg(test)] +mod tests { + use crate::cdsl::ast::ConstPool; + + #[test] + fn const_pool_returns_var_names() { + let mut c = ConstPool::new(); + assert_eq!(c.insert([0, 1, 2].to_vec()), "const0"); + assert_eq!(c.insert([1, 2, 3].to_vec()), "const1"); + } + + #[test] + fn const_pool_avoids_duplicates() { + let data = [0, 1, 2].to_vec(); + let mut c = ConstPool::new(); + assert_eq!(c.pool.len(), 0); + + assert_eq!(c.insert(data.clone()), "const0"); + assert_eq!(c.pool.len(), 1); + + assert_eq!(c.insert(data), "const0"); + assert_eq!(c.pool.len(), 1); + } + + #[test] + fn const_pool_iterates() { + let mut c = ConstPool::new(); + c.insert([0, 1, 2].to_vec()); + c.insert([3, 4, 5].to_vec()); + + let mut iter = c.iter(); + assert_eq!(iter.next(), Some(("const0".to_owned(), &vec![0, 1, 2]))); + assert_eq!(iter.next(), Some(("const1".to_owned(), &vec![3, 4, 5]))); + assert_eq!(iter.next(), None); + } +} diff --git a/third_party/rust/cranelift-codegen-meta/src/cdsl/cpu_modes.rs b/third_party/rust/cranelift-codegen-meta/src/cdsl/cpu_modes.rs new file mode 100644 index 0000000000..7d119b00ce --- /dev/null +++ b/third_party/rust/cranelift-codegen-meta/src/cdsl/cpu_modes.rs @@ -0,0 +1,88 @@ +use std::collections::{hash_map, HashMap, HashSet}; +use std::iter::FromIterator; + +use crate::cdsl::encodings::Encoding; +use crate::cdsl::types::{LaneType, ValueType}; +use crate::cdsl::xform::{TransformGroup, TransformGroupIndex}; + +pub(crate) struct CpuMode { + pub name: &'static str, + default_legalize: Option<TransformGroupIndex>, + monomorphic_legalize: Option<TransformGroupIndex>, + typed_legalize: HashMap<ValueType, TransformGroupIndex>, + pub encodings: Vec<Encoding>, +} + +impl CpuMode { + pub fn new(name: &'static str) -> Self { + Self { + name, + default_legalize: None, + monomorphic_legalize: None, + typed_legalize: HashMap::new(), + encodings: Vec::new(), + } + } + + pub fn set_encodings(&mut self, encodings: Vec<Encoding>) { + assert!(self.encodings.is_empty(), "clobbering encodings"); + self.encodings = encodings; + } + + pub fn legalize_monomorphic(&mut self, group: &TransformGroup) { + assert!(self.monomorphic_legalize.is_none()); + self.monomorphic_legalize = Some(group.id); + } + pub fn legalize_default(&mut self, group: &TransformGroup) { + assert!(self.default_legalize.is_none()); + self.default_legalize = Some(group.id); + } + pub fn legalize_value_type(&mut self, lane_type: impl Into<ValueType>, group: &TransformGroup) { + assert!(self + .typed_legalize + .insert(lane_type.into(), group.id) + .is_none()); + } + pub fn legalize_type(&mut self, lane_type: impl Into<LaneType>, group: &TransformGroup) { + assert!(self + .typed_legalize + .insert(lane_type.into().into(), group.id) + .is_none()); + } + + pub fn get_default_legalize_code(&self) -> TransformGroupIndex { + self.default_legalize + .expect("a finished CpuMode must have a default legalize code") + } + pub fn get_legalize_code_for(&self, typ: &Option<ValueType>) -> TransformGroupIndex { + match typ { + Some(typ) => self + .typed_legalize + .get(typ) + .copied() + .unwrap_or_else(|| self.get_default_legalize_code()), + None => self + .monomorphic_legalize + .unwrap_or_else(|| self.get_default_legalize_code()), + } + } + pub fn get_legalized_types(&self) -> hash_map::Keys<ValueType, TransformGroupIndex> { + self.typed_legalize.keys() + } + + /// Returns a deterministically ordered, deduplicated list of TransformGroupIndex for the directly + /// reachable set of TransformGroup this TargetIsa uses. + pub fn direct_transform_groups(&self) -> Vec<TransformGroupIndex> { + let mut set = HashSet::new(); + if let Some(i) = &self.default_legalize { + set.insert(*i); + } + if let Some(i) = &self.monomorphic_legalize { + set.insert(*i); + } + set.extend(self.typed_legalize.values().cloned()); + let mut ret = Vec::from_iter(set); + ret.sort(); + ret + } +} diff --git a/third_party/rust/cranelift-codegen-meta/src/cdsl/encodings.rs b/third_party/rust/cranelift-codegen-meta/src/cdsl/encodings.rs new file mode 100644 index 0000000000..f66746f92f --- /dev/null +++ b/third_party/rust/cranelift-codegen-meta/src/cdsl/encodings.rs @@ -0,0 +1,179 @@ +use crate::cdsl::instructions::{ + InstSpec, Instruction, InstructionPredicate, InstructionPredicateNode, + InstructionPredicateNumber, InstructionPredicateRegistry, ValueTypeOrAny, +}; +use crate::cdsl::recipes::{EncodingRecipeNumber, Recipes}; +use crate::cdsl::settings::SettingPredicateNumber; +use crate::cdsl::types::ValueType; +use std::rc::Rc; +use std::string::ToString; + +/// Encoding for a concrete instruction. +/// +/// An `Encoding` object ties an instruction opcode with concrete type variables together with an +/// encoding recipe and encoding encbits. +/// +/// The concrete instruction can be in three different forms: +/// +/// 1. A naked opcode: `trap` for non-polymorphic instructions. +/// 2. With bound type variables: `iadd.i32` for polymorphic instructions. +/// 3. With operands providing constraints: `icmp.i32(intcc.eq, x, y)`. +/// +/// If the instruction is polymorphic, all type variables must be provided. +pub(crate) struct EncodingContent { + /// The `Instruction` or `BoundInstruction` being encoded. + inst: InstSpec, + + /// The `EncodingRecipe` to use. + pub recipe: EncodingRecipeNumber, + + /// Additional encoding bits to be interpreted by `recipe`. + pub encbits: u16, + + /// An instruction predicate that must be true to allow selecting this encoding. + pub inst_predicate: Option<InstructionPredicateNumber>, + + /// An ISA predicate that must be true to allow selecting this encoding. + pub isa_predicate: Option<SettingPredicateNumber>, + + /// The value type this encoding has been bound to, for encodings of polymorphic instructions. + pub bound_type: Option<ValueType>, +} + +impl EncodingContent { + pub fn inst(&self) -> &Instruction { + self.inst.inst() + } + pub fn to_rust_comment(&self, recipes: &Recipes) -> String { + format!("[{}#{:02x}]", recipes[self.recipe].name, self.encbits) + } +} + +pub(crate) type Encoding = Rc<EncodingContent>; + +pub(crate) struct EncodingBuilder { + inst: InstSpec, + recipe: EncodingRecipeNumber, + encbits: u16, + inst_predicate: Option<InstructionPredicate>, + isa_predicate: Option<SettingPredicateNumber>, + bound_type: Option<ValueType>, +} + +impl EncodingBuilder { + pub fn new(inst: InstSpec, recipe: EncodingRecipeNumber, encbits: u16) -> Self { + let (inst_predicate, bound_type) = match &inst { + InstSpec::Bound(inst) => { + let other_typevars = &inst.inst.polymorphic_info.as_ref().unwrap().other_typevars; + + assert_eq!( + inst.value_types.len(), + other_typevars.len() + 1, + "partially bound polymorphic instruction" + ); + + // Add secondary type variables to the instruction predicate. + let value_types = &inst.value_types; + let mut inst_predicate: Option<InstructionPredicate> = None; + for (typevar, value_type) in other_typevars.iter().zip(value_types.iter().skip(1)) { + let value_type = match value_type { + ValueTypeOrAny::Any => continue, + ValueTypeOrAny::ValueType(vt) => vt, + }; + let type_predicate = + InstructionPredicate::new_typevar_check(&inst.inst, typevar, value_type); + inst_predicate = Some(type_predicate.into()); + } + + // Add immediate value predicates + for (immediate_value, immediate_operand) in inst + .immediate_values + .iter() + .zip(inst.inst.operands_in.iter().filter(|o| o.is_immediate())) + { + let immediate_predicate = InstructionPredicate::new_is_field_equal( + &inst.inst.format, + immediate_operand.kind.rust_field_name, + immediate_value.to_string(), + ); + inst_predicate = if let Some(type_predicate) = inst_predicate { + Some(type_predicate.and(immediate_predicate)) + } else { + Some(immediate_predicate.into()) + } + } + + let ctrl_type = value_types[0] + .clone() + .expect("Controlling type shouldn't be Any"); + (inst_predicate, Some(ctrl_type)) + } + + InstSpec::Inst(inst) => { + assert!( + inst.polymorphic_info.is_none(), + "unbound polymorphic instruction" + ); + (None, None) + } + }; + + Self { + inst, + recipe, + encbits, + inst_predicate, + isa_predicate: None, + bound_type, + } + } + + pub fn inst_predicate(mut self, inst_predicate: InstructionPredicateNode) -> Self { + let inst_predicate = Some(match self.inst_predicate { + Some(node) => node.and(inst_predicate), + None => inst_predicate.into(), + }); + self.inst_predicate = inst_predicate; + self + } + + pub fn isa_predicate(mut self, isa_predicate: SettingPredicateNumber) -> Self { + assert!(self.isa_predicate.is_none()); + self.isa_predicate = Some(isa_predicate); + self + } + + pub fn build( + self, + recipes: &Recipes, + inst_pred_reg: &mut InstructionPredicateRegistry, + ) -> Encoding { + let inst_predicate = self.inst_predicate.map(|pred| inst_pred_reg.insert(pred)); + + let inst = self.inst.inst(); + assert!( + Rc::ptr_eq(&inst.format, &recipes[self.recipe].format), + format!( + "Inst {} and recipe {} must have the same format!", + inst.name, recipes[self.recipe].name + ) + ); + + assert_eq!( + inst.is_branch && !inst.is_indirect_branch, + recipes[self.recipe].branch_range.is_some(), + "Inst {}'s is_branch contradicts recipe {} branch_range!", + inst.name, + recipes[self.recipe].name + ); + + Rc::new(EncodingContent { + inst: self.inst, + recipe: self.recipe, + encbits: self.encbits, + inst_predicate, + isa_predicate: self.isa_predicate, + bound_type: self.bound_type, + }) + } +} diff --git a/third_party/rust/cranelift-codegen-meta/src/cdsl/formats.rs b/third_party/rust/cranelift-codegen-meta/src/cdsl/formats.rs new file mode 100644 index 0000000000..e713a8bccb --- /dev/null +++ b/third_party/rust/cranelift-codegen-meta/src/cdsl/formats.rs @@ -0,0 +1,171 @@ +use crate::cdsl::operands::OperandKind; +use std::fmt; +use std::rc::Rc; + +/// An immediate field in an instruction format. +/// +/// This corresponds to a single member of a variant of the `InstructionData` +/// data type. +#[derive(Debug)] +pub(crate) struct FormatField { + /// Immediate operand kind. + pub kind: OperandKind, + + /// Member name in InstructionData variant. + pub member: &'static str, +} + +/// Every instruction opcode has a corresponding instruction format which determines the number of +/// operands and their kinds. Instruction formats are identified structurally, i.e., the format of +/// an instruction is derived from the kinds of operands used in its declaration. +/// +/// The instruction format stores two separate lists of operands: Immediates and values. Immediate +/// operands (including entity references) are represented as explicit members in the +/// `InstructionData` variants. The value operands are stored differently, depending on how many +/// there are. Beyond a certain point, instruction formats switch to an external value list for +/// storing value arguments. Value lists can hold an arbitrary number of values. +/// +/// All instruction formats must be predefined in the meta shared/formats.rs module. +#[derive(Debug)] +pub(crate) struct InstructionFormat { + /// Instruction format name in CamelCase. This is used as a Rust variant name in both the + /// `InstructionData` and `InstructionFormat` enums. + pub name: &'static str, + + pub num_value_operands: usize, + + pub has_value_list: bool, + + pub imm_fields: Vec<FormatField>, + + /// Index of the value input operand that is used to infer the controlling type variable. By + /// default, this is `0`, the first `value` operand. The index is relative to the values only, + /// ignoring immediate operands. + pub typevar_operand: Option<usize>, +} + +/// A tuple serving as a key to deduplicate InstructionFormat. +#[derive(Hash, PartialEq, Eq)] +pub(crate) struct FormatStructure { + pub num_value_operands: usize, + pub has_value_list: bool, + /// Tuples of (Rust field name / Rust type) for each immediate field. + pub imm_field_names: Vec<(&'static str, &'static str)>, +} + +impl fmt::Display for InstructionFormat { + fn fmt(&self, fmt: &mut fmt::Formatter) -> Result<(), fmt::Error> { + let imm_args = self + .imm_fields + .iter() + .map(|field| format!("{}: {}", field.member, field.kind.rust_type)) + .collect::<Vec<_>>() + .join(", "); + fmt.write_fmt(format_args!( + "{}(imms=({}), vals={})", + self.name, imm_args, self.num_value_operands + ))?; + Ok(()) + } +} + +impl InstructionFormat { + pub fn imm_by_name(&self, name: &'static str) -> &FormatField { + self.imm_fields + .iter() + .find(|&field| field.member == name) + .unwrap_or_else(|| { + panic!( + "unexpected immediate field named {} in instruction format {}", + name, self.name + ) + }) + } + + /// Returns a tuple that uniquely identifies the structure. + pub fn structure(&self) -> FormatStructure { + FormatStructure { + num_value_operands: self.num_value_operands, + has_value_list: self.has_value_list, + imm_field_names: self + .imm_fields + .iter() + .map(|field| (field.kind.rust_field_name, field.kind.rust_type)) + .collect::<Vec<_>>(), + } + } +} + +pub(crate) struct InstructionFormatBuilder { + name: &'static str, + num_value_operands: usize, + has_value_list: bool, + imm_fields: Vec<FormatField>, + typevar_operand: Option<usize>, +} + +impl InstructionFormatBuilder { + pub fn new(name: &'static str) -> Self { + Self { + name, + num_value_operands: 0, + has_value_list: false, + imm_fields: Vec::new(), + typevar_operand: None, + } + } + + pub fn value(mut self) -> Self { + self.num_value_operands += 1; + self + } + + pub fn varargs(mut self) -> Self { + self.has_value_list = true; + self + } + + pub fn imm(mut self, operand_kind: &OperandKind) -> Self { + let field = FormatField { + kind: operand_kind.clone(), + member: operand_kind.rust_field_name, + }; + self.imm_fields.push(field); + self + } + + pub fn imm_with_name(mut self, member: &'static str, operand_kind: &OperandKind) -> Self { + let field = FormatField { + kind: operand_kind.clone(), + member, + }; + self.imm_fields.push(field); + self + } + + pub fn typevar_operand(mut self, operand_index: usize) -> Self { + assert!(self.typevar_operand.is_none()); + assert!(self.has_value_list || operand_index < self.num_value_operands); + self.typevar_operand = Some(operand_index); + self + } + + pub fn build(self) -> Rc<InstructionFormat> { + let typevar_operand = if self.typevar_operand.is_some() { + self.typevar_operand + } else if self.has_value_list || self.num_value_operands > 0 { + // Default to the first value operand, if there's one. + Some(0) + } else { + None + }; + + Rc::new(InstructionFormat { + name: self.name, + num_value_operands: self.num_value_operands, + has_value_list: self.has_value_list, + imm_fields: self.imm_fields, + typevar_operand, + }) + } +} diff --git a/third_party/rust/cranelift-codegen-meta/src/cdsl/instructions.rs b/third_party/rust/cranelift-codegen-meta/src/cdsl/instructions.rs new file mode 100644 index 0000000000..88a15c6038 --- /dev/null +++ b/third_party/rust/cranelift-codegen-meta/src/cdsl/instructions.rs @@ -0,0 +1,1395 @@ +use cranelift_codegen_shared::condcodes::IntCC; +use cranelift_entity::{entity_impl, PrimaryMap}; + +use std::collections::HashMap; +use std::fmt; +use std::fmt::{Display, Error, Formatter}; +use std::rc::Rc; + +use crate::cdsl::camel_case; +use crate::cdsl::formats::{FormatField, InstructionFormat}; +use crate::cdsl::operands::Operand; +use crate::cdsl::type_inference::Constraint; +use crate::cdsl::types::{LaneType, ReferenceType, ValueType, VectorType}; +use crate::cdsl::typevar::TypeVar; + +use crate::shared::formats::Formats; +use crate::shared::types::{Bool, Float, Int, Reference}; + +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub(crate) struct OpcodeNumber(u32); +entity_impl!(OpcodeNumber); + +pub(crate) type AllInstructions = PrimaryMap<OpcodeNumber, Instruction>; + +pub(crate) struct InstructionGroupBuilder<'all_inst> { + all_instructions: &'all_inst mut AllInstructions, + own_instructions: Vec<Instruction>, +} + +impl<'all_inst> InstructionGroupBuilder<'all_inst> { + pub fn new(all_instructions: &'all_inst mut AllInstructions) -> Self { + Self { + all_instructions, + own_instructions: Vec::new(), + } + } + + pub fn push(&mut self, builder: InstructionBuilder) { + let opcode_number = OpcodeNumber(self.all_instructions.next_key().as_u32()); + let inst = builder.build(opcode_number); + // Note this clone is cheap, since Instruction is a Rc<> wrapper for InstructionContent. + self.own_instructions.push(inst.clone()); + self.all_instructions.push(inst); + } + + pub fn build(self) -> InstructionGroup { + InstructionGroup { + instructions: self.own_instructions, + } + } +} + +/// Every instruction must belong to exactly one instruction group. A given +/// target architecture can support instructions from multiple groups, and it +/// does not necessarily support all instructions in a group. +pub(crate) struct InstructionGroup { + instructions: Vec<Instruction>, +} + +impl InstructionGroup { + pub fn by_name(&self, name: &'static str) -> &Instruction { + self.instructions + .iter() + .find(|inst| inst.name == name) + .unwrap_or_else(|| panic!("instruction with name '{}' does not exist", name)) + } +} + +/// Instructions can have parameters bound to them to specialize them for more specific encodings +/// (e.g. the encoding for adding two float types may be different than that of adding two +/// integer types) +pub(crate) trait Bindable { + /// Bind a parameter to an instruction + fn bind(&self, parameter: impl Into<BindParameter>) -> BoundInstruction; +} + +#[derive(Debug)] +pub(crate) struct PolymorphicInfo { + pub use_typevar_operand: bool, + pub ctrl_typevar: TypeVar, + pub other_typevars: Vec<TypeVar>, +} + +#[derive(Debug)] +pub(crate) struct InstructionContent { + /// Instruction mnemonic, also becomes opcode name. + pub name: String, + pub camel_name: String, + pub opcode_number: OpcodeNumber, + + /// Documentation string. + pub doc: String, + + /// Input operands. This can be a mix of SSA value operands and other operand kinds. + pub operands_in: Vec<Operand>, + /// Output operands. The output operands must be SSA values or `variable_args`. + pub operands_out: Vec<Operand>, + /// Instruction-specific TypeConstraints. + pub constraints: Vec<Constraint>, + + /// Instruction format, automatically derived from the input operands. + pub format: Rc<InstructionFormat>, + + /// One of the input or output operands is a free type variable. None if the instruction is not + /// polymorphic, set otherwise. + pub polymorphic_info: Option<PolymorphicInfo>, + + /// Indices in operands_in of input operands that are values. + pub value_opnums: Vec<usize>, + /// Indices in operands_in of input operands that are immediates or entities. + pub imm_opnums: Vec<usize>, + /// Indices in operands_out of output operands that are values. + pub value_results: Vec<usize>, + + /// True for instructions that terminate the block. + pub is_terminator: bool, + /// True for all branch or jump instructions. + pub is_branch: bool, + /// True for all indirect branch or jump instructions.', + pub is_indirect_branch: bool, + /// Is this a call instruction? + pub is_call: bool, + /// Is this a return instruction? + pub is_return: bool, + /// Is this a ghost instruction? + pub is_ghost: bool, + /// Can this instruction read from memory? + pub can_load: bool, + /// Can this instruction write to memory? + pub can_store: bool, + /// Can this instruction cause a trap? + pub can_trap: bool, + /// Does this instruction have other side effects besides can_* flags? + pub other_side_effects: bool, + /// Does this instruction write to CPU flags? + pub writes_cpu_flags: bool, + /// Should this opcode be considered to clobber all live registers, during regalloc? + pub clobbers_all_regs: bool, +} + +impl InstructionContent { + pub fn snake_name(&self) -> &str { + if &self.name == "return" { + "return_" + } else { + &self.name + } + } + + pub fn all_typevars(&self) -> Vec<&TypeVar> { + match &self.polymorphic_info { + Some(poly) => { + let mut result = vec![&poly.ctrl_typevar]; + result.extend(&poly.other_typevars); + result + } + None => Vec::new(), + } + } +} + +pub(crate) type Instruction = Rc<InstructionContent>; + +impl Bindable for Instruction { + fn bind(&self, parameter: impl Into<BindParameter>) -> BoundInstruction { + BoundInstruction::new(self).bind(parameter) + } +} + +impl fmt::Display for InstructionContent { + fn fmt(&self, fmt: &mut fmt::Formatter) -> Result<(), fmt::Error> { + if !self.operands_out.is_empty() { + let operands_out = self + .operands_out + .iter() + .map(|op| op.name) + .collect::<Vec<_>>() + .join(", "); + fmt.write_str(&operands_out)?; + fmt.write_str(" = ")?; + } + + fmt.write_str(&self.name)?; + + if !self.operands_in.is_empty() { + let operands_in = self + .operands_in + .iter() + .map(|op| op.name) + .collect::<Vec<_>>() + .join(", "); + fmt.write_str(" ")?; + fmt.write_str(&operands_in)?; + } + + Ok(()) + } +} + +pub(crate) struct InstructionBuilder { + name: String, + doc: String, + format: Rc<InstructionFormat>, + operands_in: Option<Vec<Operand>>, + operands_out: Option<Vec<Operand>>, + constraints: Option<Vec<Constraint>>, + + // See Instruction comments for the meaning of these fields. + is_terminator: bool, + is_branch: bool, + is_indirect_branch: bool, + is_call: bool, + is_return: bool, + is_ghost: bool, + can_load: bool, + can_store: bool, + can_trap: bool, + other_side_effects: bool, + clobbers_all_regs: bool, +} + +impl InstructionBuilder { + pub fn new<S: Into<String>>(name: S, doc: S, format: &Rc<InstructionFormat>) -> Self { + Self { + name: name.into(), + doc: doc.into(), + format: format.clone(), + operands_in: None, + operands_out: None, + constraints: None, + + is_terminator: false, + is_branch: false, + is_indirect_branch: false, + is_call: false, + is_return: false, + is_ghost: false, + can_load: false, + can_store: false, + can_trap: false, + other_side_effects: false, + clobbers_all_regs: false, + } + } + + pub fn operands_in(mut self, operands: Vec<&Operand>) -> Self { + assert!(self.operands_in.is_none()); + self.operands_in = Some(operands.iter().map(|x| (*x).clone()).collect()); + self + } + + pub fn operands_out(mut self, operands: Vec<&Operand>) -> Self { + assert!(self.operands_out.is_none()); + self.operands_out = Some(operands.iter().map(|x| (*x).clone()).collect()); + self + } + + pub fn constraints(mut self, constraints: Vec<Constraint>) -> Self { + assert!(self.constraints.is_none()); + self.constraints = Some(constraints); + self + } + + #[allow(clippy::wrong_self_convention)] + pub fn is_terminator(mut self, val: bool) -> Self { + self.is_terminator = val; + self + } + + #[allow(clippy::wrong_self_convention)] + pub fn is_branch(mut self, val: bool) -> Self { + self.is_branch = val; + self + } + + #[allow(clippy::wrong_self_convention)] + pub fn is_indirect_branch(mut self, val: bool) -> Self { + self.is_indirect_branch = val; + self + } + + #[allow(clippy::wrong_self_convention)] + pub fn is_call(mut self, val: bool) -> Self { + self.is_call = val; + self + } + + #[allow(clippy::wrong_self_convention)] + pub fn is_return(mut self, val: bool) -> Self { + self.is_return = val; + self + } + + #[allow(clippy::wrong_self_convention)] + pub fn is_ghost(mut self, val: bool) -> Self { + self.is_ghost = val; + self + } + + pub fn can_load(mut self, val: bool) -> Self { + self.can_load = val; + self + } + + pub fn can_store(mut self, val: bool) -> Self { + self.can_store = val; + self + } + + pub fn can_trap(mut self, val: bool) -> Self { + self.can_trap = val; + self + } + + pub fn other_side_effects(mut self, val: bool) -> Self { + self.other_side_effects = val; + self + } + + pub fn clobbers_all_regs(mut self, val: bool) -> Self { + self.clobbers_all_regs = val; + self + } + + fn build(self, opcode_number: OpcodeNumber) -> Instruction { + let operands_in = self.operands_in.unwrap_or_else(Vec::new); + let operands_out = self.operands_out.unwrap_or_else(Vec::new); + + let mut value_opnums = Vec::new(); + let mut imm_opnums = Vec::new(); + for (i, op) in operands_in.iter().enumerate() { + if op.is_value() { + value_opnums.push(i); + } else if op.is_immediate_or_entityref() { + imm_opnums.push(i); + } else { + assert!(op.is_varargs()); + } + } + + let value_results = operands_out + .iter() + .enumerate() + .filter_map(|(i, op)| if op.is_value() { Some(i) } else { None }) + .collect(); + + verify_format(&self.name, &operands_in, &self.format); + + let polymorphic_info = + verify_polymorphic(&operands_in, &operands_out, &self.format, &value_opnums); + + // Infer from output operands whether an instruction clobbers CPU flags or not. + let writes_cpu_flags = operands_out.iter().any(|op| op.is_cpu_flags()); + + let camel_name = camel_case(&self.name); + + Rc::new(InstructionContent { + name: self.name, + camel_name, + opcode_number, + doc: self.doc, + operands_in, + operands_out, + constraints: self.constraints.unwrap_or_else(Vec::new), + format: self.format, + polymorphic_info, + value_opnums, + value_results, + imm_opnums, + is_terminator: self.is_terminator, + is_branch: self.is_branch, + is_indirect_branch: self.is_indirect_branch, + is_call: self.is_call, + is_return: self.is_return, + is_ghost: self.is_ghost, + can_load: self.can_load, + can_store: self.can_store, + can_trap: self.can_trap, + other_side_effects: self.other_side_effects, + writes_cpu_flags, + clobbers_all_regs: self.clobbers_all_regs, + }) + } +} + +/// A thin wrapper like Option<ValueType>, but with more precise semantics. +#[derive(Clone)] +pub(crate) enum ValueTypeOrAny { + ValueType(ValueType), + Any, +} + +impl ValueTypeOrAny { + pub fn expect(self, msg: &str) -> ValueType { + match self { + ValueTypeOrAny::ValueType(vt) => vt, + ValueTypeOrAny::Any => panic!(format!("Unexpected Any: {}", msg)), + } + } +} + +/// The number of bits in the vector +type VectorBitWidth = u64; + +/// An parameter used for binding instructions to specific types or values +pub(crate) enum BindParameter { + Any, + Lane(LaneType), + Vector(LaneType, VectorBitWidth), + Reference(ReferenceType), + Immediate(Immediate), +} + +/// Constructor for more easily building vector parameters from any lane type +pub(crate) fn vector(parameter: impl Into<LaneType>, vector_size: VectorBitWidth) -> BindParameter { + BindParameter::Vector(parameter.into(), vector_size) +} + +impl From<Int> for BindParameter { + fn from(ty: Int) -> Self { + BindParameter::Lane(ty.into()) + } +} + +impl From<Bool> for BindParameter { + fn from(ty: Bool) -> Self { + BindParameter::Lane(ty.into()) + } +} + +impl From<Float> for BindParameter { + fn from(ty: Float) -> Self { + BindParameter::Lane(ty.into()) + } +} + +impl From<LaneType> for BindParameter { + fn from(ty: LaneType) -> Self { + BindParameter::Lane(ty) + } +} + +impl From<Reference> for BindParameter { + fn from(ty: Reference) -> Self { + BindParameter::Reference(ty.into()) + } +} + +impl From<Immediate> for BindParameter { + fn from(imm: Immediate) -> Self { + BindParameter::Immediate(imm) + } +} + +#[derive(Clone)] +pub(crate) enum Immediate { + // When needed, this enum should be expanded to include other immediate types (e.g. u8, u128). + IntCC(IntCC), +} + +impl Display for Immediate { + fn fmt(&self, f: &mut Formatter) -> Result<(), Error> { + match self { + Immediate::IntCC(x) => write!(f, "IntCC::{:?}", x), + } + } +} + +#[derive(Clone)] +pub(crate) struct BoundInstruction { + pub inst: Instruction, + pub value_types: Vec<ValueTypeOrAny>, + pub immediate_values: Vec<Immediate>, +} + +impl BoundInstruction { + /// Construct a new bound instruction (with nothing bound yet) from an instruction + fn new(inst: &Instruction) -> Self { + BoundInstruction { + inst: inst.clone(), + value_types: vec![], + immediate_values: vec![], + } + } + + /// Verify that the bindings for a BoundInstruction are correct. + fn verify_bindings(&self) -> Result<(), String> { + // Verify that binding types to the instruction does not violate the polymorphic rules. + if !self.value_types.is_empty() { + match &self.inst.polymorphic_info { + Some(poly) => { + if self.value_types.len() > 1 + poly.other_typevars.len() { + return Err(format!( + "trying to bind too many types for {}", + self.inst.name + )); + } + } + None => { + return Err(format!( + "trying to bind a type for {} which is not a polymorphic instruction", + self.inst.name + )); + } + } + } + + // Verify that only the right number of immediates are bound. + let immediate_count = self + .inst + .operands_in + .iter() + .filter(|o| o.is_immediate_or_entityref()) + .count(); + if self.immediate_values.len() > immediate_count { + return Err(format!( + "trying to bind too many immediates ({}) to instruction {} which only expects {} \ + immediates", + self.immediate_values.len(), + self.inst.name, + immediate_count + )); + } + + Ok(()) + } +} + +impl Bindable for BoundInstruction { + fn bind(&self, parameter: impl Into<BindParameter>) -> BoundInstruction { + let mut modified = self.clone(); + match parameter.into() { + BindParameter::Any => modified.value_types.push(ValueTypeOrAny::Any), + BindParameter::Lane(lane_type) => modified + .value_types + .push(ValueTypeOrAny::ValueType(lane_type.into())), + BindParameter::Vector(lane_type, vector_size_in_bits) => { + let num_lanes = vector_size_in_bits / lane_type.lane_bits(); + assert!( + num_lanes >= 2, + "Minimum lane number for bind_vector is 2, found {}.", + num_lanes, + ); + let vector_type = ValueType::Vector(VectorType::new(lane_type, num_lanes)); + modified + .value_types + .push(ValueTypeOrAny::ValueType(vector_type)); + } + BindParameter::Reference(reference_type) => { + modified + .value_types + .push(ValueTypeOrAny::ValueType(reference_type.into())); + } + BindParameter::Immediate(immediate) => modified.immediate_values.push(immediate), + } + modified.verify_bindings().unwrap(); + modified + } +} + +/// Checks that the input operands actually match the given format. +fn verify_format(inst_name: &str, operands_in: &[Operand], format: &InstructionFormat) { + // A format is defined by: + // - its number of input value operands, + // - its number and names of input immediate operands, + // - whether it has a value list or not. + let mut num_values = 0; + let mut num_immediates = 0; + + for operand in operands_in.iter() { + if operand.is_varargs() { + assert!( + format.has_value_list, + "instruction {} has varargs, but its format {} doesn't have a value list; you may \ + need to use a different format.", + inst_name, format.name + ); + } + if operand.is_value() { + num_values += 1; + } + if operand.is_immediate_or_entityref() { + if let Some(format_field) = format.imm_fields.get(num_immediates) { + assert_eq!( + format_field.kind.rust_field_name, + operand.kind.rust_field_name, + "{}th operand of {} should be {} (according to format), not {} (according to \ + inst definition). You may need to use a different format.", + num_immediates, + inst_name, + format_field.kind.rust_field_name, + operand.kind.rust_field_name + ); + num_immediates += 1; + } + } + } + + assert_eq!( + num_values, format.num_value_operands, + "inst {} doesn't have as many value input operands as its format {} declares; you may need \ + to use a different format.", + inst_name, format.name + ); + + assert_eq!( + num_immediates, + format.imm_fields.len(), + "inst {} doesn't have as many immediate input \ + operands as its format {} declares; you may need to use a different format.", + inst_name, + format.name + ); +} + +/// Check if this instruction is polymorphic, and verify its use of type variables. +fn verify_polymorphic( + operands_in: &[Operand], + operands_out: &[Operand], + format: &InstructionFormat, + value_opnums: &[usize], +) -> Option<PolymorphicInfo> { + // The instruction is polymorphic if it has one free input or output operand. + let is_polymorphic = operands_in + .iter() + .any(|op| op.is_value() && op.type_var().unwrap().free_typevar().is_some()) + || operands_out + .iter() + .any(|op| op.is_value() && op.type_var().unwrap().free_typevar().is_some()); + + if !is_polymorphic { + return None; + } + + // Verify the use of type variables. + let tv_op = format.typevar_operand; + let mut maybe_error_message = None; + if let Some(tv_op) = tv_op { + if tv_op < value_opnums.len() { + let op_num = value_opnums[tv_op]; + let tv = operands_in[op_num].type_var().unwrap(); + let free_typevar = tv.free_typevar(); + if (free_typevar.is_some() && tv == &free_typevar.unwrap()) + || tv.singleton_type().is_some() + { + match is_ctrl_typevar_candidate(tv, &operands_in, &operands_out) { + Ok(other_typevars) => { + return Some(PolymorphicInfo { + use_typevar_operand: true, + ctrl_typevar: tv.clone(), + other_typevars, + }); + } + Err(error_message) => { + maybe_error_message = Some(error_message); + } + } + } + } + }; + + // If we reached here, it means the type variable indicated as the typevar operand couldn't + // control every other input and output type variable. We need to look at the result type + // variables. + if operands_out.is_empty() { + // No result means no other possible type variable, so it's a type inference failure. + match maybe_error_message { + Some(msg) => panic!(msg), + None => panic!("typevar_operand must be a free type variable"), + } + } + + // Otherwise, try to infer the controlling type variable by looking at the first result. + let tv = operands_out[0].type_var().unwrap(); + let free_typevar = tv.free_typevar(); + if free_typevar.is_some() && tv != &free_typevar.unwrap() { + panic!("first result must be a free type variable"); + } + + // At this point, if the next unwrap() fails, it means the output type couldn't be used as a + // controlling type variable either; panicking is the right behavior. + let other_typevars = is_ctrl_typevar_candidate(tv, &operands_in, &operands_out).unwrap(); + + Some(PolymorphicInfo { + use_typevar_operand: false, + ctrl_typevar: tv.clone(), + other_typevars, + }) +} + +/// Verify that the use of TypeVars is consistent with `ctrl_typevar` as the controlling type +/// variable. +/// +/// All polymorhic inputs must either be derived from `ctrl_typevar` or be independent free type +/// variables only used once. +/// +/// All polymorphic results must be derived from `ctrl_typevar`. +/// +/// Return a vector of other type variables used, or a string explaining what went wrong. +fn is_ctrl_typevar_candidate( + ctrl_typevar: &TypeVar, + operands_in: &[Operand], + operands_out: &[Operand], +) -> Result<Vec<TypeVar>, String> { + let mut other_typevars = Vec::new(); + + // Check value inputs. + for input in operands_in { + if !input.is_value() { + continue; + } + + let typ = input.type_var().unwrap(); + let free_typevar = typ.free_typevar(); + + // Non-polymorphic or derived from ctrl_typevar is OK. + if free_typevar.is_none() { + continue; + } + let free_typevar = free_typevar.unwrap(); + if &free_typevar == ctrl_typevar { + continue; + } + + // No other derived typevars allowed. + if typ != &free_typevar { + return Err(format!( + "{:?}: type variable {} must be derived from {:?} while it is derived from {:?}", + input, typ.name, ctrl_typevar, free_typevar + )); + } + + // Other free type variables can only be used once each. + for other_tv in &other_typevars { + if &free_typevar == other_tv { + return Err(format!( + "non-controlling type variable {} can't be used more than once", + free_typevar.name + )); + } + } + + other_typevars.push(free_typevar); + } + + // Check outputs. + for result in operands_out { + if !result.is_value() { + continue; + } + + let typ = result.type_var().unwrap(); + let free_typevar = typ.free_typevar(); + + // Non-polymorphic or derived from ctrl_typevar is OK. + if free_typevar.is_none() || &free_typevar.unwrap() == ctrl_typevar { + continue; + } + + return Err("type variable in output not derived from ctrl_typevar".into()); + } + + Ok(other_typevars) +} + +#[derive(Clone, Hash, PartialEq, Eq)] +pub(crate) enum FormatPredicateKind { + /// Is the field member equal to the expected value (stored here)? + IsEqual(String), + + /// Is the immediate instruction format field representable as an n-bit two's complement + /// integer? (with width: first member, scale: second member). + /// The predicate is true if the field is in the range: `-2^(width-1) -- 2^(width-1)-1` and a + /// multiple of `2^scale`. + IsSignedInt(usize, usize), + + /// Is the immediate instruction format field representable as an n-bit unsigned integer? (with + /// width: first member, scale: second member). + /// The predicate is true if the field is in the range: `0 -- 2^width - 1` and a multiple of + /// `2^scale`. + IsUnsignedInt(usize, usize), + + /// Is the immediate format field member an integer equal to zero? + IsZeroInt, + /// Is the immediate format field member equal to zero? (float32 version) + IsZero32BitFloat, + + /// Is the immediate format field member equal to zero? (float64 version) + IsZero64BitFloat, + + /// Is the immediate format field member equal zero in all lanes? + IsAllZeroes, + + /// Does the immediate format field member have ones in all bits of all lanes? + IsAllOnes, + + /// Has the value list (in member_name) the size specified in parameter? + LengthEquals(usize), + + /// Is the referenced function colocated? + IsColocatedFunc, + + /// Is the referenced data object colocated? + IsColocatedData, +} + +#[derive(Clone, Hash, PartialEq, Eq)] +pub(crate) struct FormatPredicateNode { + format_name: &'static str, + member_name: &'static str, + kind: FormatPredicateKind, +} + +impl FormatPredicateNode { + fn new( + format: &InstructionFormat, + field_name: &'static str, + kind: FormatPredicateKind, + ) -> Self { + let member_name = format.imm_by_name(field_name).member; + Self { + format_name: format.name, + member_name, + kind, + } + } + + fn new_raw( + format: &InstructionFormat, + member_name: &'static str, + kind: FormatPredicateKind, + ) -> Self { + Self { + format_name: format.name, + member_name, + kind, + } + } + + fn destructuring_member_name(&self) -> &'static str { + match &self.kind { + FormatPredicateKind::LengthEquals(_) => { + // Length operates on the argument value list. + assert!(self.member_name == "args"); + "ref args" + } + _ => self.member_name, + } + } + + fn rust_predicate(&self) -> String { + match &self.kind { + FormatPredicateKind::IsEqual(arg) => { + format!("predicates::is_equal({}, {})", self.member_name, arg) + } + FormatPredicateKind::IsSignedInt(width, scale) => format!( + "predicates::is_signed_int({}, {}, {})", + self.member_name, width, scale + ), + FormatPredicateKind::IsUnsignedInt(width, scale) => format!( + "predicates::is_unsigned_int({}, {}, {})", + self.member_name, width, scale + ), + FormatPredicateKind::IsZeroInt => { + format!("predicates::is_zero_int({})", self.member_name) + } + FormatPredicateKind::IsZero32BitFloat => { + format!("predicates::is_zero_32_bit_float({})", self.member_name) + } + FormatPredicateKind::IsZero64BitFloat => { + format!("predicates::is_zero_64_bit_float({})", self.member_name) + } + FormatPredicateKind::IsAllZeroes => format!( + "predicates::is_all_zeroes(func.dfg.constants.get({}))", + self.member_name + ), + FormatPredicateKind::IsAllOnes => format!( + "predicates::is_all_ones(func.dfg.constants.get({}))", + self.member_name + ), + FormatPredicateKind::LengthEquals(num) => format!( + "predicates::has_length_of({}, {}, func)", + self.member_name, num + ), + FormatPredicateKind::IsColocatedFunc => { + format!("predicates::is_colocated_func({}, func)", self.member_name,) + } + FormatPredicateKind::IsColocatedData => { + format!("predicates::is_colocated_data({}, func)", self.member_name) + } + } + } +} + +#[derive(Clone, Hash, PartialEq, Eq)] +pub(crate) enum TypePredicateNode { + /// Is the value argument (at the index designated by the first member) the same type as the + /// type name (second member)? + TypeVarCheck(usize, String), + + /// Is the controlling type variable the same type as the one designated by the type name + /// (only member)? + CtrlTypeVarCheck(String), +} + +impl TypePredicateNode { + fn rust_predicate(&self, func_str: &str) -> String { + match self { + TypePredicateNode::TypeVarCheck(index, value_type_name) => format!( + "{}.dfg.value_type(args[{}]) == {}", + func_str, index, value_type_name + ), + TypePredicateNode::CtrlTypeVarCheck(value_type_name) => { + format!("{}.dfg.ctrl_typevar(inst) == {}", func_str, value_type_name) + } + } + } +} + +/// A basic node in an instruction predicate: either an atom, or an AND of two conditions. +#[derive(Clone, Hash, PartialEq, Eq)] +pub(crate) enum InstructionPredicateNode { + FormatPredicate(FormatPredicateNode), + + TypePredicate(TypePredicateNode), + + /// An AND-combination of two or more other predicates. + And(Vec<InstructionPredicateNode>), + + /// An OR-combination of two or more other predicates. + Or(Vec<InstructionPredicateNode>), +} + +impl InstructionPredicateNode { + fn rust_predicate(&self, func_str: &str) -> String { + match self { + InstructionPredicateNode::FormatPredicate(node) => node.rust_predicate(), + InstructionPredicateNode::TypePredicate(node) => node.rust_predicate(func_str), + InstructionPredicateNode::And(nodes) => nodes + .iter() + .map(|x| x.rust_predicate(func_str)) + .collect::<Vec<_>>() + .join(" && "), + InstructionPredicateNode::Or(nodes) => nodes + .iter() + .map(|x| x.rust_predicate(func_str)) + .collect::<Vec<_>>() + .join(" || "), + } + } + + pub fn format_destructuring_member_name(&self) -> &str { + match self { + InstructionPredicateNode::FormatPredicate(format_pred) => { + format_pred.destructuring_member_name() + } + _ => panic!("Only for leaf format predicates"), + } + } + + pub fn format_name(&self) -> &str { + match self { + InstructionPredicateNode::FormatPredicate(format_pred) => format_pred.format_name, + _ => panic!("Only for leaf format predicates"), + } + } + + pub fn is_type_predicate(&self) -> bool { + match self { + InstructionPredicateNode::FormatPredicate(_) + | InstructionPredicateNode::And(_) + | InstructionPredicateNode::Or(_) => false, + InstructionPredicateNode::TypePredicate(_) => true, + } + } + + fn collect_leaves(&self) -> Vec<&InstructionPredicateNode> { + let mut ret = Vec::new(); + match self { + InstructionPredicateNode::And(nodes) | InstructionPredicateNode::Or(nodes) => { + for node in nodes { + ret.extend(node.collect_leaves()); + } + } + _ => ret.push(self), + } + ret + } +} + +#[derive(Clone, Hash, PartialEq, Eq)] +pub(crate) struct InstructionPredicate { + node: Option<InstructionPredicateNode>, +} + +impl Into<InstructionPredicate> for InstructionPredicateNode { + fn into(self) -> InstructionPredicate { + InstructionPredicate { node: Some(self) } + } +} + +impl InstructionPredicate { + pub fn new() -> Self { + Self { node: None } + } + + pub fn unwrap(self) -> InstructionPredicateNode { + self.node.unwrap() + } + + pub fn new_typevar_check( + inst: &Instruction, + type_var: &TypeVar, + value_type: &ValueType, + ) -> InstructionPredicateNode { + let index = inst + .value_opnums + .iter() + .enumerate() + .find(|(_, &op_num)| inst.operands_in[op_num].type_var().unwrap() == type_var) + .unwrap() + .0; + InstructionPredicateNode::TypePredicate(TypePredicateNode::TypeVarCheck( + index, + value_type.rust_name(), + )) + } + + pub fn new_ctrl_typevar_check(value_type: &ValueType) -> InstructionPredicateNode { + InstructionPredicateNode::TypePredicate(TypePredicateNode::CtrlTypeVarCheck( + value_type.rust_name(), + )) + } + + pub fn new_is_field_equal( + format: &InstructionFormat, + field_name: &'static str, + imm_value: String, + ) -> InstructionPredicateNode { + InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new( + format, + field_name, + FormatPredicateKind::IsEqual(imm_value), + )) + } + + /// Used only for the AST module, which directly passes in the format field. + pub fn new_is_field_equal_ast( + format: &InstructionFormat, + field: &FormatField, + imm_value: String, + ) -> InstructionPredicateNode { + InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new_raw( + format, + field.member, + FormatPredicateKind::IsEqual(imm_value), + )) + } + + pub fn new_is_signed_int( + format: &InstructionFormat, + field_name: &'static str, + width: usize, + scale: usize, + ) -> InstructionPredicateNode { + InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new( + format, + field_name, + FormatPredicateKind::IsSignedInt(width, scale), + )) + } + + pub fn new_is_unsigned_int( + format: &InstructionFormat, + field_name: &'static str, + width: usize, + scale: usize, + ) -> InstructionPredicateNode { + InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new( + format, + field_name, + FormatPredicateKind::IsUnsignedInt(width, scale), + )) + } + + pub fn new_is_zero_int( + format: &InstructionFormat, + field_name: &'static str, + ) -> InstructionPredicateNode { + InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new( + format, + field_name, + FormatPredicateKind::IsZeroInt, + )) + } + + pub fn new_is_zero_32bit_float( + format: &InstructionFormat, + field_name: &'static str, + ) -> InstructionPredicateNode { + InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new( + format, + field_name, + FormatPredicateKind::IsZero32BitFloat, + )) + } + + pub fn new_is_zero_64bit_float( + format: &InstructionFormat, + field_name: &'static str, + ) -> InstructionPredicateNode { + InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new( + format, + field_name, + FormatPredicateKind::IsZero64BitFloat, + )) + } + + pub fn new_is_all_zeroes( + format: &InstructionFormat, + field_name: &'static str, + ) -> InstructionPredicateNode { + InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new( + format, + field_name, + FormatPredicateKind::IsAllZeroes, + )) + } + + pub fn new_is_all_ones( + format: &InstructionFormat, + field_name: &'static str, + ) -> InstructionPredicateNode { + InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new( + format, + field_name, + FormatPredicateKind::IsAllOnes, + )) + } + + pub fn new_length_equals(format: &InstructionFormat, size: usize) -> InstructionPredicateNode { + assert!( + format.has_value_list, + "the format must be variadic in number of arguments" + ); + InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new_raw( + format, + "args", + FormatPredicateKind::LengthEquals(size), + )) + } + + pub fn new_is_colocated_func( + format: &InstructionFormat, + field_name: &'static str, + ) -> InstructionPredicateNode { + InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new( + format, + field_name, + FormatPredicateKind::IsColocatedFunc, + )) + } + + pub fn new_is_colocated_data(formats: &Formats) -> InstructionPredicateNode { + let format = &formats.unary_global_value; + InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new( + &*format, + "global_value", + FormatPredicateKind::IsColocatedData, + )) + } + + pub fn and(mut self, new_node: InstructionPredicateNode) -> Self { + let node = self.node; + let mut and_nodes = match node { + Some(node) => match node { + InstructionPredicateNode::And(nodes) => nodes, + InstructionPredicateNode::Or(_) => { + panic!("Can't mix and/or without implementing operator precedence!") + } + _ => vec![node], + }, + _ => Vec::new(), + }; + and_nodes.push(new_node); + self.node = Some(InstructionPredicateNode::And(and_nodes)); + self + } + + pub fn or(mut self, new_node: InstructionPredicateNode) -> Self { + let node = self.node; + let mut or_nodes = match node { + Some(node) => match node { + InstructionPredicateNode::Or(nodes) => nodes, + InstructionPredicateNode::And(_) => { + panic!("Can't mix and/or without implementing operator precedence!") + } + _ => vec![node], + }, + _ => Vec::new(), + }; + or_nodes.push(new_node); + self.node = Some(InstructionPredicateNode::Or(or_nodes)); + self + } + + pub fn rust_predicate(&self, func_str: &str) -> Option<String> { + self.node.as_ref().map(|root| root.rust_predicate(func_str)) + } + + /// Returns the type predicate if this is one, or None otherwise. + pub fn type_predicate(&self, func_str: &str) -> Option<String> { + let node = self.node.as_ref().unwrap(); + if node.is_type_predicate() { + Some(node.rust_predicate(func_str)) + } else { + None + } + } + + /// Returns references to all the nodes that are leaves in the condition (i.e. by flattening + /// AND/OR). + pub fn collect_leaves(&self) -> Vec<&InstructionPredicateNode> { + self.node.as_ref().unwrap().collect_leaves() + } +} + +#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub(crate) struct InstructionPredicateNumber(u32); +entity_impl!(InstructionPredicateNumber); + +pub(crate) type InstructionPredicateMap = + PrimaryMap<InstructionPredicateNumber, InstructionPredicate>; + +/// A registry of predicates to help deduplicating them, during Encodings construction. When the +/// construction process is over, it needs to be extracted with `extract` and associated to the +/// TargetIsa. +pub(crate) struct InstructionPredicateRegistry { + /// Maps a predicate number to its actual predicate. + map: InstructionPredicateMap, + + /// Inverse map: maps a predicate to its predicate number. This is used before inserting a + /// predicate, to check whether it already exists. + inverted_map: HashMap<InstructionPredicate, InstructionPredicateNumber>, +} + +impl InstructionPredicateRegistry { + pub fn new() -> Self { + Self { + map: PrimaryMap::new(), + inverted_map: HashMap::new(), + } + } + pub fn insert(&mut self, predicate: InstructionPredicate) -> InstructionPredicateNumber { + match self.inverted_map.get(&predicate) { + Some(&found) => found, + None => { + let key = self.map.push(predicate.clone()); + self.inverted_map.insert(predicate, key); + key + } + } + } + pub fn extract(self) -> InstructionPredicateMap { + self.map + } +} + +/// An instruction specification, containing an instruction that has bound types or not. +pub(crate) enum InstSpec { + Inst(Instruction), + Bound(BoundInstruction), +} + +impl InstSpec { + pub fn inst(&self) -> &Instruction { + match &self { + InstSpec::Inst(inst) => inst, + InstSpec::Bound(bound_inst) => &bound_inst.inst, + } + } +} + +impl Bindable for InstSpec { + fn bind(&self, parameter: impl Into<BindParameter>) -> BoundInstruction { + match self { + InstSpec::Inst(inst) => inst.bind(parameter.into()), + InstSpec::Bound(inst) => inst.bind(parameter.into()), + } + } +} + +impl Into<InstSpec> for &Instruction { + fn into(self) -> InstSpec { + InstSpec::Inst(self.clone()) + } +} + +impl Into<InstSpec> for BoundInstruction { + fn into(self) -> InstSpec { + InstSpec::Bound(self) + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::cdsl::formats::InstructionFormatBuilder; + use crate::cdsl::operands::{OperandKind, OperandKindFields}; + use crate::cdsl::typevar::TypeSetBuilder; + use crate::shared::types::Int::{I32, I64}; + + fn field_to_operand(index: usize, field: OperandKindFields) -> Operand { + // Pretend the index string is &'static. + let name = Box::leak(index.to_string().into_boxed_str()); + // Format's name / rust_type don't matter here. + let kind = OperandKind::new(name, name, field); + let operand = Operand::new(name, kind); + operand + } + + fn field_to_operands(types: Vec<OperandKindFields>) -> Vec<Operand> { + types + .iter() + .enumerate() + .map(|(i, f)| field_to_operand(i, f.clone())) + .collect() + } + + fn build_fake_instruction( + inputs: Vec<OperandKindFields>, + outputs: Vec<OperandKindFields>, + ) -> Instruction { + // Setup a format from the input operands. + let mut format = InstructionFormatBuilder::new("fake"); + for (i, f) in inputs.iter().enumerate() { + match f { + OperandKindFields::TypeVar(_) => format = format.value(), + OperandKindFields::ImmValue => { + format = format.imm(&field_to_operand(i, f.clone()).kind) + } + _ => {} + }; + } + let format = format.build(); + + // Create the fake instruction. + InstructionBuilder::new("fake", "A fake instruction for testing.", &format) + .operands_in(field_to_operands(inputs).iter().collect()) + .operands_out(field_to_operands(outputs).iter().collect()) + .build(OpcodeNumber(42)) + } + + #[test] + fn ensure_bound_instructions_can_bind_lane_types() { + let type1 = TypeSetBuilder::new().ints(8..64).build(); + let in1 = OperandKindFields::TypeVar(TypeVar::new("a", "...", type1)); + let inst = build_fake_instruction(vec![in1], vec![]); + inst.bind(LaneType::Int(I32)); + } + + #[test] + fn ensure_bound_instructions_can_bind_immediates() { + let inst = build_fake_instruction(vec![OperandKindFields::ImmValue], vec![]); + let bound_inst = inst.bind(Immediate::IntCC(IntCC::Equal)); + assert!(bound_inst.verify_bindings().is_ok()); + } + + #[test] + #[should_panic] + fn ensure_instructions_fail_to_bind() { + let inst = build_fake_instruction(vec![], vec![]); + inst.bind(BindParameter::Lane(LaneType::Int(I32))); + // Trying to bind to an instruction with no inputs should fail. + } + + #[test] + #[should_panic] + fn ensure_bound_instructions_fail_to_bind_too_many_types() { + let type1 = TypeSetBuilder::new().ints(8..64).build(); + let in1 = OperandKindFields::TypeVar(TypeVar::new("a", "...", type1)); + let inst = build_fake_instruction(vec![in1], vec![]); + inst.bind(LaneType::Int(I32)).bind(LaneType::Int(I64)); + } + + #[test] + #[should_panic] + fn ensure_instructions_fail_to_bind_too_many_immediates() { + let inst = build_fake_instruction(vec![OperandKindFields::ImmValue], vec![]); + inst.bind(BindParameter::Immediate(Immediate::IntCC(IntCC::Equal))) + .bind(BindParameter::Immediate(Immediate::IntCC(IntCC::Equal))); + // Trying to bind too many immediates to an instruction should fail; note that the immediate + // values are nonsensical but irrelevant to the purpose of this test. + } +} diff --git a/third_party/rust/cranelift-codegen-meta/src/cdsl/isa.rs b/third_party/rust/cranelift-codegen-meta/src/cdsl/isa.rs new file mode 100644 index 0000000000..512105d09a --- /dev/null +++ b/third_party/rust/cranelift-codegen-meta/src/cdsl/isa.rs @@ -0,0 +1,99 @@ +use std::collections::HashSet; +use std::iter::FromIterator; + +use crate::cdsl::cpu_modes::CpuMode; +use crate::cdsl::instructions::{InstructionGroup, InstructionPredicateMap}; +use crate::cdsl::recipes::Recipes; +use crate::cdsl::regs::IsaRegs; +use crate::cdsl::settings::SettingGroup; +use crate::cdsl::xform::{TransformGroupIndex, TransformGroups}; + +pub(crate) struct TargetIsa { + pub name: &'static str, + pub instructions: InstructionGroup, + pub settings: SettingGroup, + pub regs: IsaRegs, + pub recipes: Recipes, + pub cpu_modes: Vec<CpuMode>, + pub encodings_predicates: InstructionPredicateMap, + + /// TransformGroupIndex are global to all the ISAs, while we want to have indices into the + /// local array of transform groups that are directly used. We use this map to get this + /// information. + pub local_transform_groups: Vec<TransformGroupIndex>, +} + +impl TargetIsa { + pub fn new( + name: &'static str, + instructions: InstructionGroup, + settings: SettingGroup, + regs: IsaRegs, + recipes: Recipes, + cpu_modes: Vec<CpuMode>, + encodings_predicates: InstructionPredicateMap, + ) -> Self { + // Compute the local TransformGroup index. + let mut local_transform_groups = Vec::new(); + for cpu_mode in &cpu_modes { + let transform_groups = cpu_mode.direct_transform_groups(); + for group_index in transform_groups { + // find() is fine here: the number of transform group is < 5 as of June 2019. + if local_transform_groups + .iter() + .find(|&val| group_index == *val) + .is_none() + { + local_transform_groups.push(group_index); + } + } + } + + Self { + name, + instructions, + settings, + regs, + recipes, + cpu_modes, + encodings_predicates, + local_transform_groups, + } + } + + /// Returns a deterministically ordered, deduplicated list of TransformGroupIndex for the + /// transitive set of TransformGroup this TargetIsa uses. + pub fn transitive_transform_groups( + &self, + all_groups: &TransformGroups, + ) -> Vec<TransformGroupIndex> { + let mut set = HashSet::new(); + + for &root in self.local_transform_groups.iter() { + set.insert(root); + let mut base = root; + // Follow the chain of chain_with. + while let Some(chain_with) = &all_groups.get(base).chain_with { + set.insert(*chain_with); + base = *chain_with; + } + } + + let mut vec = Vec::from_iter(set); + vec.sort(); + vec + } + + /// Returns a deterministically ordered, deduplicated list of TransformGroupIndex for the directly + /// reachable set of TransformGroup this TargetIsa uses. + pub fn direct_transform_groups(&self) -> &Vec<TransformGroupIndex> { + &self.local_transform_groups + } + + pub fn translate_group_index(&self, group_index: TransformGroupIndex) -> usize { + self.local_transform_groups + .iter() + .position(|&val| val == group_index) + .expect("TransformGroup unused by this TargetIsa!") + } +} diff --git a/third_party/rust/cranelift-codegen-meta/src/cdsl/mod.rs b/third_party/rust/cranelift-codegen-meta/src/cdsl/mod.rs new file mode 100644 index 0000000000..698b64dff3 --- /dev/null +++ b/third_party/rust/cranelift-codegen-meta/src/cdsl/mod.rs @@ -0,0 +1,89 @@ +//! Cranelift DSL classes. +//! +//! This module defines the classes that are used to define Cranelift +//! instructions and other entities. + +#[macro_use] +pub mod ast; +pub mod cpu_modes; +pub mod encodings; +pub mod formats; +pub mod instructions; +pub mod isa; +pub mod operands; +pub mod recipes; +pub mod regs; +pub mod settings; +pub mod type_inference; +pub mod types; +pub mod typevar; +pub mod xform; + +/// A macro that converts boolean settings into predicates to look more natural. +#[macro_export] +macro_rules! predicate { + ($a:ident && $($b:tt)*) => { + PredicateNode::And(Box::new($a.into()), Box::new(predicate!($($b)*))) + }; + (!$a:ident && $($b:tt)*) => { + PredicateNode::And( + Box::new(PredicateNode::Not(Box::new($a.into()))), + Box::new(predicate!($($b)*)) + ) + }; + (!$a:ident) => { + PredicateNode::Not(Box::new($a.into())) + }; + ($a:ident) => { + $a.into() + }; +} + +/// A macro that joins boolean settings into a list (e.g. `preset!(feature_a && feature_b)`). +#[macro_export] +macro_rules! preset { + () => { + vec![] + }; + ($($x:ident)&&*) => { + { + let mut v = Vec::new(); + $( + v.push($x.into()); + )* + v + } + }; +} + +/// Convert the string `s` to CamelCase. +pub fn camel_case(s: &str) -> String { + let mut output_chars = String::with_capacity(s.len()); + + let mut capitalize = true; + for curr_char in s.chars() { + if curr_char == '_' { + capitalize = true; + } else { + if capitalize { + output_chars.extend(curr_char.to_uppercase()); + } else { + output_chars.push(curr_char); + } + capitalize = false; + } + } + + output_chars +} + +#[cfg(test)] +mod tests { + use super::camel_case; + + #[test] + fn camel_case_works() { + assert_eq!(camel_case("x"), "X"); + assert_eq!(camel_case("camel_case"), "CamelCase"); + } +} diff --git a/third_party/rust/cranelift-codegen-meta/src/cdsl/operands.rs b/third_party/rust/cranelift-codegen-meta/src/cdsl/operands.rs new file mode 100644 index 0000000000..605df24862 --- /dev/null +++ b/third_party/rust/cranelift-codegen-meta/src/cdsl/operands.rs @@ -0,0 +1,173 @@ +use std::collections::HashMap; + +use crate::cdsl::typevar::TypeVar; + +/// An instruction operand can be an *immediate*, an *SSA value*, or an *entity reference*. The +/// type of the operand is one of: +/// +/// 1. A `ValueType` instance indicates an SSA value operand with a concrete type. +/// +/// 2. A `TypeVar` instance indicates an SSA value operand, and the instruction is polymorphic over +/// the possible concrete types that the type variable can assume. +/// +/// 3. An `ImmediateKind` instance indicates an immediate operand whose value is encoded in the +/// instruction itself rather than being passed as an SSA value. +/// +/// 4. An `EntityRefKind` instance indicates an operand that references another entity in the +/// function, typically something declared in the function preamble. +#[derive(Clone, Debug)] +pub(crate) struct Operand { + /// Name of the operand variable, as it appears in function parameters, legalizations, etc. + pub name: &'static str, + + /// Type of the operand. + pub kind: OperandKind, + + doc: Option<&'static str>, +} + +impl Operand { + pub fn new(name: &'static str, kind: impl Into<OperandKind>) -> Self { + Self { + name, + doc: None, + kind: kind.into(), + } + } + pub fn with_doc(mut self, doc: &'static str) -> Self { + self.doc = Some(doc); + self + } + + pub fn doc(&self) -> Option<&str> { + if let Some(doc) = &self.doc { + return Some(doc); + } + match &self.kind.fields { + OperandKindFields::TypeVar(tvar) => Some(&tvar.doc), + _ => self.kind.doc(), + } + } + + pub fn is_value(&self) -> bool { + match self.kind.fields { + OperandKindFields::TypeVar(_) => true, + _ => false, + } + } + + pub fn type_var(&self) -> Option<&TypeVar> { + match &self.kind.fields { + OperandKindFields::TypeVar(typevar) => Some(typevar), + _ => None, + } + } + + pub fn is_varargs(&self) -> bool { + match self.kind.fields { + OperandKindFields::VariableArgs => true, + _ => false, + } + } + + /// Returns true if the operand has an immediate kind or is an EntityRef. + pub fn is_immediate_or_entityref(&self) -> bool { + match self.kind.fields { + OperandKindFields::ImmEnum(_) + | OperandKindFields::ImmValue + | OperandKindFields::EntityRef => true, + _ => false, + } + } + + /// Returns true if the operand has an immediate kind. + pub fn is_immediate(&self) -> bool { + match self.kind.fields { + OperandKindFields::ImmEnum(_) | OperandKindFields::ImmValue => true, + _ => false, + } + } + + pub fn is_cpu_flags(&self) -> bool { + match &self.kind.fields { + OperandKindFields::TypeVar(type_var) + if type_var.name == "iflags" || type_var.name == "fflags" => + { + true + } + _ => false, + } + } +} + +pub type EnumValues = HashMap<&'static str, &'static str>; + +#[derive(Clone, Debug)] +pub(crate) enum OperandKindFields { + EntityRef, + VariableArgs, + ImmValue, + ImmEnum(EnumValues), + TypeVar(TypeVar), +} + +#[derive(Clone, Debug)] +pub(crate) struct OperandKind { + /// String representation of the Rust type mapping to this OperandKind. + pub rust_type: &'static str, + + /// Name of this OperandKind in the format's member field. + pub rust_field_name: &'static str, + + /// Type-specific fields for this OperandKind. + pub fields: OperandKindFields, + + doc: Option<&'static str>, +} + +impl OperandKind { + pub fn new( + rust_field_name: &'static str, + rust_type: &'static str, + fields: OperandKindFields, + ) -> Self { + Self { + rust_field_name, + rust_type, + fields, + doc: None, + } + } + pub fn with_doc(mut self, doc: &'static str) -> Self { + assert!(self.doc.is_none()); + self.doc = Some(doc); + self + } + fn doc(&self) -> Option<&str> { + if let Some(doc) = &self.doc { + return Some(doc); + } + match &self.fields { + OperandKindFields::TypeVar(type_var) => Some(&type_var.doc), + OperandKindFields::ImmEnum(_) + | OperandKindFields::ImmValue + | OperandKindFields::EntityRef + | OperandKindFields::VariableArgs => None, + } + } +} + +impl Into<OperandKind> for &TypeVar { + fn into(self) -> OperandKind { + OperandKind::new( + "value", + "ir::Value", + OperandKindFields::TypeVar(self.into()), + ) + } +} +impl Into<OperandKind> for &OperandKind { + fn into(self) -> OperandKind { + self.clone() + } +} diff --git a/third_party/rust/cranelift-codegen-meta/src/cdsl/recipes.rs b/third_party/rust/cranelift-codegen-meta/src/cdsl/recipes.rs new file mode 100644 index 0000000000..dfe4cd67a5 --- /dev/null +++ b/third_party/rust/cranelift-codegen-meta/src/cdsl/recipes.rs @@ -0,0 +1,298 @@ +use std::rc::Rc; + +use cranelift_entity::{entity_impl, PrimaryMap}; + +use crate::cdsl::formats::InstructionFormat; +use crate::cdsl::instructions::InstructionPredicate; +use crate::cdsl::regs::RegClassIndex; +use crate::cdsl::settings::SettingPredicateNumber; + +/// A specific register in a register class. +/// +/// A register is identified by the top-level register class it belongs to and +/// its first register unit. +/// +/// Specific registers are used to describe constraints on instructions where +/// some operands must use a fixed register. +/// +/// Register instances can be created with the constructor, or accessed as +/// attributes on the register class: `GPR.rcx`. +#[derive(Copy, Clone, Hash, PartialEq, Eq)] +pub(crate) struct Register { + pub regclass: RegClassIndex, + pub unit: u8, +} + +impl Register { + pub fn new(regclass: RegClassIndex, unit: u8) -> Self { + Self { regclass, unit } + } +} + +/// An operand that must be in a stack slot. +/// +/// A `Stack` object can be used to indicate an operand constraint for a value +/// operand that must live in a stack slot. +#[derive(Copy, Clone, Hash, PartialEq)] +pub(crate) struct Stack { + pub regclass: RegClassIndex, +} + +impl Stack { + pub fn new(regclass: RegClassIndex) -> Self { + Self { regclass } + } + pub fn stack_base_mask(self) -> &'static str { + // TODO: Make this configurable instead of just using the SP. + "StackBaseMask(1)" + } +} + +#[derive(Clone, Hash, PartialEq)] +pub(crate) struct BranchRange { + pub inst_size: u64, + pub range: u64, +} + +#[derive(Copy, Clone, Hash, PartialEq)] +pub(crate) enum OperandConstraint { + RegClass(RegClassIndex), + FixedReg(Register), + TiedInput(usize), + Stack(Stack), +} + +impl Into<OperandConstraint> for RegClassIndex { + fn into(self) -> OperandConstraint { + OperandConstraint::RegClass(self) + } +} + +impl Into<OperandConstraint> for Register { + fn into(self) -> OperandConstraint { + OperandConstraint::FixedReg(self) + } +} + +impl Into<OperandConstraint> for usize { + fn into(self) -> OperandConstraint { + OperandConstraint::TiedInput(self) + } +} + +impl Into<OperandConstraint> for Stack { + fn into(self) -> OperandConstraint { + OperandConstraint::Stack(self) + } +} + +/// A recipe for encoding instructions with a given format. +/// +/// Many different instructions can be encoded by the same recipe, but they +/// must all have the same instruction format. +/// +/// The `operands_in` and `operands_out` arguments are tuples specifying the register +/// allocation constraints for the value operands and results respectively. The +/// possible constraints for an operand are: +/// +/// - A `RegClass` specifying the set of allowed registers. +/// - A `Register` specifying a fixed-register operand. +/// - An integer indicating that this result is tied to a value operand, so +/// they must use the same register. +/// - A `Stack` specifying a value in a stack slot. +/// +/// The `branch_range` argument must be provided for recipes that can encode +/// branch instructions. It is an `(origin, bits)` tuple describing the exact +/// range that can be encoded in a branch instruction. +#[derive(Clone)] +pub(crate) struct EncodingRecipe { + /// Short mnemonic name for this recipe. + pub name: String, + + /// Associated instruction format. + pub format: Rc<InstructionFormat>, + + /// Base number of bytes in the binary encoded instruction. + pub base_size: u64, + + /// Tuple of register constraints for value operands. + pub operands_in: Vec<OperandConstraint>, + + /// Tuple of register constraints for results. + pub operands_out: Vec<OperandConstraint>, + + /// Function name to use when computing actual size. + pub compute_size: &'static str, + + /// `(origin, bits)` range for branches. + pub branch_range: Option<BranchRange>, + + /// This instruction clobbers `iflags` and `fflags`; true by default. + pub clobbers_flags: bool, + + /// Instruction predicate. + pub inst_predicate: Option<InstructionPredicate>, + + /// ISA predicate. + pub isa_predicate: Option<SettingPredicateNumber>, + + /// Rust code for binary emission. + pub emit: Option<String>, +} + +// Implement PartialEq ourselves: take all the fields into account but the name. +impl PartialEq for EncodingRecipe { + fn eq(&self, other: &Self) -> bool { + Rc::ptr_eq(&self.format, &other.format) + && self.base_size == other.base_size + && self.operands_in == other.operands_in + && self.operands_out == other.operands_out + && self.compute_size == other.compute_size + && self.branch_range == other.branch_range + && self.clobbers_flags == other.clobbers_flags + && self.inst_predicate == other.inst_predicate + && self.isa_predicate == other.isa_predicate + && self.emit == other.emit + } +} + +// To allow using it in a hashmap. +impl Eq for EncodingRecipe {} + +#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub(crate) struct EncodingRecipeNumber(u32); +entity_impl!(EncodingRecipeNumber); + +pub(crate) type Recipes = PrimaryMap<EncodingRecipeNumber, EncodingRecipe>; + +#[derive(Clone)] +pub(crate) struct EncodingRecipeBuilder { + pub name: String, + format: Rc<InstructionFormat>, + pub base_size: u64, + pub operands_in: Option<Vec<OperandConstraint>>, + pub operands_out: Option<Vec<OperandConstraint>>, + pub compute_size: Option<&'static str>, + pub branch_range: Option<BranchRange>, + pub emit: Option<String>, + clobbers_flags: Option<bool>, + inst_predicate: Option<InstructionPredicate>, + isa_predicate: Option<SettingPredicateNumber>, +} + +impl EncodingRecipeBuilder { + pub fn new(name: impl Into<String>, format: &Rc<InstructionFormat>, base_size: u64) -> Self { + Self { + name: name.into(), + format: format.clone(), + base_size, + operands_in: None, + operands_out: None, + compute_size: None, + branch_range: None, + emit: None, + clobbers_flags: None, + inst_predicate: None, + isa_predicate: None, + } + } + + // Setters. + pub fn operands_in(mut self, constraints: Vec<impl Into<OperandConstraint>>) -> Self { + assert!(self.operands_in.is_none()); + self.operands_in = Some( + constraints + .into_iter() + .map(|constr| constr.into()) + .collect(), + ); + self + } + pub fn operands_out(mut self, constraints: Vec<impl Into<OperandConstraint>>) -> Self { + assert!(self.operands_out.is_none()); + self.operands_out = Some( + constraints + .into_iter() + .map(|constr| constr.into()) + .collect(), + ); + self + } + pub fn clobbers_flags(mut self, flag: bool) -> Self { + assert!(self.clobbers_flags.is_none()); + self.clobbers_flags = Some(flag); + self + } + pub fn emit(mut self, code: impl Into<String>) -> Self { + assert!(self.emit.is_none()); + self.emit = Some(code.into()); + self + } + pub fn branch_range(mut self, range: (u64, u64)) -> Self { + assert!(self.branch_range.is_none()); + self.branch_range = Some(BranchRange { + inst_size: range.0, + range: range.1, + }); + self + } + pub fn isa_predicate(mut self, pred: SettingPredicateNumber) -> Self { + assert!(self.isa_predicate.is_none()); + self.isa_predicate = Some(pred); + self + } + pub fn inst_predicate(mut self, inst_predicate: impl Into<InstructionPredicate>) -> Self { + assert!(self.inst_predicate.is_none()); + self.inst_predicate = Some(inst_predicate.into()); + self + } + pub fn compute_size(mut self, compute_size: &'static str) -> Self { + assert!(self.compute_size.is_none()); + self.compute_size = Some(compute_size); + self + } + + pub fn build(self) -> EncodingRecipe { + let operands_in = self.operands_in.unwrap_or_default(); + let operands_out = self.operands_out.unwrap_or_default(); + + // The number of input constraints must match the number of format input operands. + if !self.format.has_value_list { + assert!( + operands_in.len() == self.format.num_value_operands, + format!( + "missing operand constraints for recipe {} (format {})", + self.name, self.format.name + ) + ); + } + + // Ensure tied inputs actually refer to existing inputs. + for constraint in operands_in.iter().chain(operands_out.iter()) { + if let OperandConstraint::TiedInput(n) = *constraint { + assert!(n < operands_in.len()); + } + } + + let compute_size = match self.compute_size { + Some(compute_size) => compute_size, + None => "base_size", + }; + + let clobbers_flags = self.clobbers_flags.unwrap_or(true); + + EncodingRecipe { + name: self.name, + format: self.format, + base_size: self.base_size, + operands_in, + operands_out, + compute_size, + branch_range: self.branch_range, + clobbers_flags, + inst_predicate: self.inst_predicate, + isa_predicate: self.isa_predicate, + emit: self.emit, + } + } +} diff --git a/third_party/rust/cranelift-codegen-meta/src/cdsl/regs.rs b/third_party/rust/cranelift-codegen-meta/src/cdsl/regs.rs new file mode 100644 index 0000000000..864826ee43 --- /dev/null +++ b/third_party/rust/cranelift-codegen-meta/src/cdsl/regs.rs @@ -0,0 +1,412 @@ +use cranelift_codegen_shared::constants; +use cranelift_entity::{entity_impl, EntityRef, PrimaryMap}; + +#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub(crate) struct RegBankIndex(u32); +entity_impl!(RegBankIndex); + +pub(crate) struct RegBank { + pub name: &'static str, + pub first_unit: u8, + pub units: u8, + pub names: Vec<&'static str>, + pub prefix: &'static str, + pub pressure_tracking: bool, + pub pinned_reg: Option<u16>, + pub toprcs: Vec<RegClassIndex>, + pub classes: Vec<RegClassIndex>, +} + +impl RegBank { + pub fn new( + name: &'static str, + first_unit: u8, + units: u8, + names: Vec<&'static str>, + prefix: &'static str, + pressure_tracking: bool, + pinned_reg: Option<u16>, + ) -> Self { + RegBank { + name, + first_unit, + units, + names, + prefix, + pressure_tracking, + pinned_reg, + toprcs: Vec::new(), + classes: Vec::new(), + } + } + + fn unit_by_name(&self, name: &'static str) -> u8 { + let unit = if let Some(found) = self.names.iter().position(|®_name| reg_name == name) { + found + } else { + // Try to match without the bank prefix. + assert!(name.starts_with(self.prefix)); + let name_without_prefix = &name[self.prefix.len()..]; + if let Some(found) = self + .names + .iter() + .position(|®_name| reg_name == name_without_prefix) + { + found + } else { + // Ultimate try: try to parse a number and use this in the array, eg r15 on x86. + if let Ok(as_num) = name_without_prefix.parse::<u8>() { + assert!( + as_num < self.units, + "trying to get {}, but bank only has {} registers!", + name, + self.units + ); + as_num as usize + } else { + panic!("invalid register name {}", name); + } + } + }; + self.first_unit + (unit as u8) + } +} + +#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)] +pub(crate) struct RegClassIndex(u32); +entity_impl!(RegClassIndex); + +pub(crate) struct RegClass { + pub name: &'static str, + pub index: RegClassIndex, + pub width: u8, + pub bank: RegBankIndex, + pub toprc: RegClassIndex, + pub count: u8, + pub start: u8, + pub subclasses: Vec<RegClassIndex>, +} + +impl RegClass { + pub fn new( + name: &'static str, + index: RegClassIndex, + width: u8, + bank: RegBankIndex, + toprc: RegClassIndex, + count: u8, + start: u8, + ) -> Self { + Self { + name, + index, + width, + bank, + toprc, + count, + start, + subclasses: Vec::new(), + } + } + + /// Compute a bit-mask of subclasses, including self. + pub fn subclass_mask(&self) -> u64 { + let mut m = 1 << self.index.index(); + for rc in self.subclasses.iter() { + m |= 1 << rc.index(); + } + m + } + + /// Compute a bit-mask of the register units allocated by this register class. + pub fn mask(&self, bank_first_unit: u8) -> Vec<u32> { + let mut u = (self.start + bank_first_unit) as usize; + let mut out_mask = vec![0, 0, 0]; + for _ in 0..self.count { + out_mask[u / 32] |= 1 << (u % 32); + u += self.width as usize; + } + out_mask + } +} + +pub(crate) enum RegClassProto { + TopLevel(RegBankIndex), + SubClass(RegClassIndex), +} + +pub(crate) struct RegClassBuilder { + pub name: &'static str, + pub width: u8, + pub count: u8, + pub start: u8, + pub proto: RegClassProto, +} + +impl RegClassBuilder { + pub fn new_toplevel(name: &'static str, bank: RegBankIndex) -> Self { + Self { + name, + width: 1, + count: 0, + start: 0, + proto: RegClassProto::TopLevel(bank), + } + } + pub fn subclass_of( + name: &'static str, + parent_index: RegClassIndex, + start: u8, + stop: u8, + ) -> Self { + assert!(stop >= start); + Self { + name, + width: 0, + count: stop - start, + start, + proto: RegClassProto::SubClass(parent_index), + } + } + pub fn count(mut self, count: u8) -> Self { + self.count = count; + self + } + pub fn width(mut self, width: u8) -> Self { + match self.proto { + RegClassProto::TopLevel(_) => self.width = width, + RegClassProto::SubClass(_) => panic!("Subclasses inherit their parent's width."), + } + self + } +} + +pub(crate) struct RegBankBuilder { + pub name: &'static str, + pub units: u8, + pub names: Vec<&'static str>, + pub prefix: &'static str, + pub pressure_tracking: Option<bool>, + pub pinned_reg: Option<u16>, +} + +impl RegBankBuilder { + pub fn new(name: &'static str, prefix: &'static str) -> Self { + Self { + name, + units: 0, + names: vec![], + prefix, + pressure_tracking: None, + pinned_reg: None, + } + } + pub fn units(mut self, units: u8) -> Self { + self.units = units; + self + } + pub fn names(mut self, names: Vec<&'static str>) -> Self { + self.names = names; + self + } + pub fn track_pressure(mut self, track: bool) -> Self { + self.pressure_tracking = Some(track); + self + } + pub fn pinned_reg(mut self, unit: u16) -> Self { + assert!(unit < u16::from(self.units)); + self.pinned_reg = Some(unit); + self + } +} + +pub(crate) struct IsaRegsBuilder { + pub banks: PrimaryMap<RegBankIndex, RegBank>, + pub classes: PrimaryMap<RegClassIndex, RegClass>, +} + +impl IsaRegsBuilder { + pub fn new() -> Self { + Self { + banks: PrimaryMap::new(), + classes: PrimaryMap::new(), + } + } + + pub fn add_bank(&mut self, builder: RegBankBuilder) -> RegBankIndex { + let first_unit = if self.banks.is_empty() { + 0 + } else { + let last = &self.banks.last().unwrap(); + let first_available_unit = (last.first_unit + last.units) as i8; + let units = builder.units; + let align = if units.is_power_of_two() { + units + } else { + units.next_power_of_two() + } as i8; + (first_available_unit + align - 1) & -align + } as u8; + + self.banks.push(RegBank::new( + builder.name, + first_unit, + builder.units, + builder.names, + builder.prefix, + builder + .pressure_tracking + .expect("Pressure tracking must be explicitly set"), + builder.pinned_reg, + )) + } + + pub fn add_class(&mut self, builder: RegClassBuilder) -> RegClassIndex { + let class_index = self.classes.next_key(); + + // Finish delayed construction of RegClass. + let (bank, toprc, start, width) = match builder.proto { + RegClassProto::TopLevel(bank_index) => { + self.banks + .get_mut(bank_index) + .unwrap() + .toprcs + .push(class_index); + (bank_index, class_index, builder.start, builder.width) + } + RegClassProto::SubClass(parent_class_index) => { + assert!(builder.width == 0); + let (bank, toprc, start, width) = { + let parent = self.classes.get(parent_class_index).unwrap(); + (parent.bank, parent.toprc, parent.start, parent.width) + }; + for reg_class in self.classes.values_mut() { + if reg_class.toprc == toprc { + reg_class.subclasses.push(class_index); + } + } + let subclass_start = start + builder.start * width; + (bank, toprc, subclass_start, width) + } + }; + + let reg_bank_units = self.banks.get(bank).unwrap().units; + assert!(start < reg_bank_units); + + let count = if builder.count != 0 { + builder.count + } else { + reg_bank_units / width + }; + + let reg_class = RegClass::new(builder.name, class_index, width, bank, toprc, count, start); + self.classes.push(reg_class); + + let reg_bank = self.banks.get_mut(bank).unwrap(); + reg_bank.classes.push(class_index); + + class_index + } + + /// Checks that the set of register classes satisfies: + /// + /// 1. Closed under intersection: The intersection of any two register + /// classes in the set is either empty or identical to a member of the + /// set. + /// 2. There are no identical classes under different names. + /// 3. Classes are sorted topologically such that all subclasses have a + /// higher index that the superclass. + pub fn build(self) -> IsaRegs { + for reg_bank in self.banks.values() { + for i1 in reg_bank.classes.iter() { + for i2 in reg_bank.classes.iter() { + if i1 >= i2 { + continue; + } + + let rc1 = self.classes.get(*i1).unwrap(); + let rc2 = self.classes.get(*i2).unwrap(); + + let rc1_mask = rc1.mask(0); + let rc2_mask = rc2.mask(0); + + assert!( + rc1.width != rc2.width || rc1_mask != rc2_mask, + "no duplicates" + ); + if rc1.width != rc2.width { + continue; + } + + let mut intersect = Vec::new(); + for (a, b) in rc1_mask.iter().zip(rc2_mask.iter()) { + intersect.push(a & b); + } + if intersect == vec![0; intersect.len()] { + continue; + } + + // Classes must be topologically ordered, so the intersection can't be the + // superclass. + assert!(intersect != rc1_mask); + + // If the intersection is the second one, then it must be a subclass. + if intersect == rc2_mask { + assert!(self + .classes + .get(*i1) + .unwrap() + .subclasses + .iter() + .any(|x| *x == *i2)); + } + } + } + } + + assert!( + self.classes.len() <= constants::MAX_NUM_REG_CLASSES, + "Too many register classes" + ); + + let num_toplevel = self + .classes + .values() + .filter(|x| x.toprc == x.index && self.banks.get(x.bank).unwrap().pressure_tracking) + .count(); + + assert!( + num_toplevel <= constants::MAX_TRACKED_TOP_RCS, + "Too many top-level register classes" + ); + + IsaRegs::new(self.banks, self.classes) + } +} + +pub(crate) struct IsaRegs { + pub banks: PrimaryMap<RegBankIndex, RegBank>, + pub classes: PrimaryMap<RegClassIndex, RegClass>, +} + +impl IsaRegs { + fn new( + banks: PrimaryMap<RegBankIndex, RegBank>, + classes: PrimaryMap<RegClassIndex, RegClass>, + ) -> Self { + Self { banks, classes } + } + + pub fn class_by_name(&self, name: &str) -> RegClassIndex { + self.classes + .values() + .find(|&class| class.name == name) + .unwrap_or_else(|| panic!("register class {} not found", name)) + .index + } + + pub fn regunit_by_name(&self, class_index: RegClassIndex, name: &'static str) -> u8 { + let bank_index = self.classes.get(class_index).unwrap().bank; + self.banks.get(bank_index).unwrap().unit_by_name(name) + } +} diff --git a/third_party/rust/cranelift-codegen-meta/src/cdsl/settings.rs b/third_party/rust/cranelift-codegen-meta/src/cdsl/settings.rs new file mode 100644 index 0000000000..217bad9955 --- /dev/null +++ b/third_party/rust/cranelift-codegen-meta/src/cdsl/settings.rs @@ -0,0 +1,407 @@ +use std::iter; + +#[derive(Clone, Copy, Hash, PartialEq, Eq)] +pub(crate) struct BoolSettingIndex(usize); + +#[derive(Hash, PartialEq, Eq)] +pub(crate) struct BoolSetting { + pub default: bool, + pub bit_offset: u8, + pub predicate_number: u8, +} + +#[derive(Hash, PartialEq, Eq)] +pub(crate) enum SpecificSetting { + Bool(BoolSetting), + Enum(Vec<&'static str>), + Num(u8), +} + +#[derive(Hash, PartialEq, Eq)] +pub(crate) struct Setting { + pub name: &'static str, + pub comment: &'static str, + pub specific: SpecificSetting, + pub byte_offset: u8, +} + +impl Setting { + pub fn default_byte(&self) -> u8 { + match self.specific { + SpecificSetting::Bool(BoolSetting { + default, + bit_offset, + .. + }) => { + if default { + 1 << bit_offset + } else { + 0 + } + } + SpecificSetting::Enum(_) => 0, + SpecificSetting::Num(default) => default, + } + } + + fn byte_for_value(&self, v: bool) -> u8 { + match self.specific { + SpecificSetting::Bool(BoolSetting { bit_offset, .. }) => { + if v { + 1 << bit_offset + } else { + 0 + } + } + _ => panic!("byte_for_value shouldn't be used for non-boolean settings."), + } + } + + fn byte_mask(&self) -> u8 { + match self.specific { + SpecificSetting::Bool(BoolSetting { bit_offset, .. }) => 1 << bit_offset, + _ => panic!("byte_for_value shouldn't be used for non-boolean settings."), + } + } +} + +#[derive(Hash, PartialEq, Eq)] +pub(crate) struct PresetIndex(usize); + +#[derive(Hash, PartialEq, Eq)] +pub(crate) enum PresetType { + BoolSetting(BoolSettingIndex), + OtherPreset(PresetIndex), +} + +impl Into<PresetType> for BoolSettingIndex { + fn into(self) -> PresetType { + PresetType::BoolSetting(self) + } +} +impl Into<PresetType> for PresetIndex { + fn into(self) -> PresetType { + PresetType::OtherPreset(self) + } +} + +#[derive(Hash, PartialEq, Eq)] +pub(crate) struct Preset { + pub name: &'static str, + values: Vec<BoolSettingIndex>, +} + +impl Preset { + pub fn layout(&self, group: &SettingGroup) -> Vec<(u8, u8)> { + let mut layout: Vec<(u8, u8)> = iter::repeat((0, 0)) + .take(group.settings_size as usize) + .collect(); + for bool_index in &self.values { + let setting = &group.settings[bool_index.0]; + let mask = setting.byte_mask(); + let val = setting.byte_for_value(true); + assert!((val & !mask) == 0); + let (ref mut l_mask, ref mut l_val) = + *layout.get_mut(setting.byte_offset as usize).unwrap(); + *l_mask |= mask; + *l_val = (*l_val & !mask) | val; + } + layout + } +} + +pub(crate) struct SettingGroup { + pub name: &'static str, + pub settings: Vec<Setting>, + pub bool_start_byte_offset: u8, + pub settings_size: u8, + pub presets: Vec<Preset>, + pub predicates: Vec<Predicate>, +} + +impl SettingGroup { + fn num_bool_settings(&self) -> u8 { + self.settings + .iter() + .filter(|s| { + if let SpecificSetting::Bool(_) = s.specific { + true + } else { + false + } + }) + .count() as u8 + } + + pub fn byte_size(&self) -> u8 { + let num_predicates = self.num_bool_settings() + (self.predicates.len() as u8); + self.bool_start_byte_offset + (num_predicates + 7) / 8 + } + + pub fn get_bool(&self, name: &'static str) -> (BoolSettingIndex, &Self) { + for (i, s) in self.settings.iter().enumerate() { + if let SpecificSetting::Bool(_) = s.specific { + if s.name == name { + return (BoolSettingIndex(i), self); + } + } + } + panic!("Should have found bool setting by name."); + } + + pub fn predicate_by_name(&self, name: &'static str) -> SettingPredicateNumber { + self.predicates + .iter() + .find(|pred| pred.name == name) + .unwrap_or_else(|| panic!("unknown predicate {}", name)) + .number + } +} + +/// This is the basic information needed to track the specific parts of a setting when building +/// them. +pub(crate) enum ProtoSpecificSetting { + Bool(bool), + Enum(Vec<&'static str>), + Num(u8), +} + +/// This is the information provided during building for a setting. +struct ProtoSetting { + name: &'static str, + comment: &'static str, + specific: ProtoSpecificSetting, +} + +#[derive(Hash, PartialEq, Eq)] +pub(crate) enum PredicateNode { + OwnedBool(BoolSettingIndex), + SharedBool(&'static str, &'static str), + Not(Box<PredicateNode>), + And(Box<PredicateNode>, Box<PredicateNode>), +} + +impl Into<PredicateNode> for BoolSettingIndex { + fn into(self) -> PredicateNode { + PredicateNode::OwnedBool(self) + } +} +impl<'a> Into<PredicateNode> for (BoolSettingIndex, &'a SettingGroup) { + fn into(self) -> PredicateNode { + let (index, group) = (self.0, self.1); + let setting = &group.settings[index.0]; + PredicateNode::SharedBool(group.name, setting.name) + } +} + +impl PredicateNode { + fn render(&self, group: &SettingGroup) -> String { + match *self { + PredicateNode::OwnedBool(bool_setting_index) => format!( + "{}.{}()", + group.name, group.settings[bool_setting_index.0].name + ), + PredicateNode::SharedBool(ref group_name, ref bool_name) => { + format!("{}.{}()", group_name, bool_name) + } + PredicateNode::And(ref lhs, ref rhs) => { + format!("{} && {}", lhs.render(group), rhs.render(group)) + } + PredicateNode::Not(ref node) => format!("!({})", node.render(group)), + } + } +} + +struct ProtoPredicate { + pub name: &'static str, + node: PredicateNode, +} + +pub(crate) type SettingPredicateNumber = u8; + +pub(crate) struct Predicate { + pub name: &'static str, + node: PredicateNode, + pub number: SettingPredicateNumber, +} + +impl Predicate { + pub fn render(&self, group: &SettingGroup) -> String { + self.node.render(group) + } +} + +pub(crate) struct SettingGroupBuilder { + name: &'static str, + settings: Vec<ProtoSetting>, + presets: Vec<Preset>, + predicates: Vec<ProtoPredicate>, +} + +impl SettingGroupBuilder { + pub fn new(name: &'static str) -> Self { + Self { + name, + settings: Vec::new(), + presets: Vec::new(), + predicates: Vec::new(), + } + } + + fn add_setting( + &mut self, + name: &'static str, + comment: &'static str, + specific: ProtoSpecificSetting, + ) { + self.settings.push(ProtoSetting { + name, + comment, + specific, + }) + } + + pub fn add_bool( + &mut self, + name: &'static str, + comment: &'static str, + default: bool, + ) -> BoolSettingIndex { + assert!( + self.predicates.is_empty(), + "predicates must be added after the boolean settings" + ); + self.add_setting(name, comment, ProtoSpecificSetting::Bool(default)); + BoolSettingIndex(self.settings.len() - 1) + } + + pub fn add_enum( + &mut self, + name: &'static str, + comment: &'static str, + values: Vec<&'static str>, + ) { + self.add_setting(name, comment, ProtoSpecificSetting::Enum(values)); + } + + pub fn add_num(&mut self, name: &'static str, comment: &'static str, default: u8) { + self.add_setting(name, comment, ProtoSpecificSetting::Num(default)); + } + + pub fn add_predicate(&mut self, name: &'static str, node: PredicateNode) { + self.predicates.push(ProtoPredicate { name, node }); + } + + pub fn add_preset(&mut self, name: &'static str, args: Vec<PresetType>) -> PresetIndex { + let mut values = Vec::new(); + for arg in args { + match arg { + PresetType::OtherPreset(index) => { + values.extend(self.presets[index.0].values.iter()); + } + PresetType::BoolSetting(index) => values.push(index), + } + } + self.presets.push(Preset { name, values }); + PresetIndex(self.presets.len() - 1) + } + + /// Compute the layout of the byte vector used to represent this settings + /// group. + /// + /// The byte vector contains the following entries in order: + /// + /// 1. Byte-sized settings like `NumSetting` and `EnumSetting`. + /// 2. `BoolSetting` settings. + /// 3. Precomputed named predicates. + /// 4. Other numbered predicates, including parent predicates that need to be accessible by + /// number. + /// + /// Set `self.settings_size` to the length of the byte vector prefix that + /// contains the settings. All bytes after that are computed, not + /// configured. + /// + /// Set `self.boolean_offset` to the beginning of the numbered predicates, + /// 2. in the list above. + /// + /// Assign `byte_offset` and `bit_offset` fields in all settings. + pub fn build(self) -> SettingGroup { + let mut group = SettingGroup { + name: self.name, + settings: Vec::new(), + bool_start_byte_offset: 0, + settings_size: 0, + presets: Vec::new(), + predicates: Vec::new(), + }; + + let mut byte_offset = 0; + + // Assign the non-boolean settings first. + for s in &self.settings { + let specific = match s.specific { + ProtoSpecificSetting::Bool(..) => continue, + ProtoSpecificSetting::Enum(ref values) => SpecificSetting::Enum(values.clone()), + ProtoSpecificSetting::Num(default) => SpecificSetting::Num(default), + }; + + group.settings.push(Setting { + name: s.name, + comment: s.comment, + byte_offset, + specific, + }); + + byte_offset += 1; + } + + group.bool_start_byte_offset = byte_offset; + + let mut predicate_number = 0; + + // Then the boolean settings. + for s in &self.settings { + let default = match s.specific { + ProtoSpecificSetting::Bool(default) => default, + ProtoSpecificSetting::Enum(_) | ProtoSpecificSetting::Num(_) => continue, + }; + group.settings.push(Setting { + name: s.name, + comment: s.comment, + byte_offset: byte_offset + predicate_number / 8, + specific: SpecificSetting::Bool(BoolSetting { + default, + bit_offset: predicate_number % 8, + predicate_number, + }), + }); + predicate_number += 1; + } + + assert!( + group.predicates.is_empty(), + "settings_size is the byte size before adding predicates" + ); + group.settings_size = group.byte_size(); + + // Sort predicates by name to ensure the same order as the Python code. + let mut predicates = self.predicates; + predicates.sort_by_key(|predicate| predicate.name); + + group + .predicates + .extend(predicates.into_iter().map(|predicate| { + let number = predicate_number; + predicate_number += 1; + Predicate { + name: predicate.name, + node: predicate.node, + number, + } + })); + + group.presets.extend(self.presets); + + group + } +} diff --git a/third_party/rust/cranelift-codegen-meta/src/cdsl/type_inference.rs b/third_party/rust/cranelift-codegen-meta/src/cdsl/type_inference.rs new file mode 100644 index 0000000000..25a07a9b84 --- /dev/null +++ b/third_party/rust/cranelift-codegen-meta/src/cdsl/type_inference.rs @@ -0,0 +1,660 @@ +use crate::cdsl::ast::{Def, DefIndex, DefPool, Var, VarIndex, VarPool}; +use crate::cdsl::typevar::{DerivedFunc, TypeSet, TypeVar}; + +use std::collections::{HashMap, HashSet}; +use std::iter::FromIterator; + +#[derive(Debug, Hash, PartialEq, Eq)] +pub(crate) enum Constraint { + /// Constraint specifying that a type var tv1 must be wider than or equal to type var tv2 at + /// runtime. This requires that: + /// 1) They have the same number of lanes + /// 2) In a lane tv1 has at least as many bits as tv2. + WiderOrEq(TypeVar, TypeVar), + + /// Constraint specifying that two derived type vars must have the same runtime type. + Eq(TypeVar, TypeVar), + + /// Constraint specifying that a type var must belong to some typeset. + InTypeset(TypeVar, TypeSet), +} + +impl Constraint { + fn translate_with<F: Fn(&TypeVar) -> TypeVar>(&self, func: F) -> Constraint { + match self { + Constraint::WiderOrEq(lhs, rhs) => { + let lhs = func(&lhs); + let rhs = func(&rhs); + Constraint::WiderOrEq(lhs, rhs) + } + Constraint::Eq(lhs, rhs) => { + let lhs = func(&lhs); + let rhs = func(&rhs); + Constraint::Eq(lhs, rhs) + } + Constraint::InTypeset(tv, ts) => { + let tv = func(&tv); + Constraint::InTypeset(tv, ts.clone()) + } + } + } + + /// Creates a new constraint by replacing type vars by their hashmap equivalent. + fn translate_with_map( + &self, + original_to_own_typevar: &HashMap<&TypeVar, TypeVar>, + ) -> Constraint { + self.translate_with(|tv| substitute(original_to_own_typevar, tv)) + } + + /// Creates a new constraint by replacing type vars by their canonical equivalent. + fn translate_with_env(&self, type_env: &TypeEnvironment) -> Constraint { + self.translate_with(|tv| type_env.get_equivalent(tv)) + } + + fn is_trivial(&self) -> bool { + match self { + Constraint::WiderOrEq(lhs, rhs) => { + // Trivially true. + if lhs == rhs { + return true; + } + + let ts1 = lhs.get_typeset(); + let ts2 = rhs.get_typeset(); + + // Trivially true. + if ts1.is_wider_or_equal(&ts2) { + return true; + } + + // Trivially false. + if ts1.is_narrower(&ts2) { + return true; + } + + // Trivially false. + if (&ts1.lanes & &ts2.lanes).is_empty() { + return true; + } + + self.is_concrete() + } + Constraint::Eq(lhs, rhs) => lhs == rhs || self.is_concrete(), + Constraint::InTypeset(_, _) => { + // The way InTypeset are made, they would always be trivial if we were applying the + // same logic as the Python code did, so ignore this. + self.is_concrete() + } + } + } + + /// Returns true iff all the referenced type vars are singletons. + fn is_concrete(&self) -> bool { + match self { + Constraint::WiderOrEq(lhs, rhs) => { + lhs.singleton_type().is_some() && rhs.singleton_type().is_some() + } + Constraint::Eq(lhs, rhs) => { + lhs.singleton_type().is_some() && rhs.singleton_type().is_some() + } + Constraint::InTypeset(tv, _) => tv.singleton_type().is_some(), + } + } + + fn typevar_args(&self) -> Vec<&TypeVar> { + match self { + Constraint::WiderOrEq(lhs, rhs) => vec![lhs, rhs], + Constraint::Eq(lhs, rhs) => vec![lhs, rhs], + Constraint::InTypeset(tv, _) => vec![tv], + } + } +} + +#[derive(Clone, Copy)] +enum TypeEnvRank { + Singleton = 5, + Input = 4, + Intermediate = 3, + Output = 2, + Temp = 1, + Internal = 0, +} + +/// Class encapsulating the necessary bookkeeping for type inference. +pub(crate) struct TypeEnvironment { + vars: HashSet<VarIndex>, + ranks: HashMap<TypeVar, TypeEnvRank>, + equivalency_map: HashMap<TypeVar, TypeVar>, + pub constraints: Vec<Constraint>, +} + +impl TypeEnvironment { + fn new() -> Self { + TypeEnvironment { + vars: HashSet::new(), + ranks: HashMap::new(), + equivalency_map: HashMap::new(), + constraints: Vec::new(), + } + } + + fn register(&mut self, var_index: VarIndex, var: &mut Var) { + self.vars.insert(var_index); + let rank = if var.is_input() { + TypeEnvRank::Input + } else if var.is_intermediate() { + TypeEnvRank::Intermediate + } else if var.is_output() { + TypeEnvRank::Output + } else { + assert!(var.is_temp()); + TypeEnvRank::Temp + }; + self.ranks.insert(var.get_or_create_typevar(), rank); + } + + fn add_constraint(&mut self, constraint: Constraint) { + if self.constraints.iter().any(|item| *item == constraint) { + return; + } + + // Check extra conditions for InTypeset constraints. + if let Constraint::InTypeset(tv, _) = &constraint { + assert!( + tv.base.is_none(), + "type variable is {:?}, while expecting none", + tv + ); + assert!( + tv.name.starts_with("typeof_"), + "Name \"{}\" should start with \"typeof_\"", + tv.name + ); + } + + self.constraints.push(constraint); + } + + /// Returns the canonical representative of the equivalency class of the given argument, or + /// duplicates it if it's not there yet. + pub fn get_equivalent(&self, tv: &TypeVar) -> TypeVar { + let mut tv = tv; + while let Some(found) = self.equivalency_map.get(tv) { + tv = found; + } + match &tv.base { + Some(parent) => self + .get_equivalent(&parent.type_var) + .derived(parent.derived_func), + None => tv.clone(), + } + } + + /// Get the rank of tv in the partial order: + /// - TVs directly associated with a Var get their rank from the Var (see register()). + /// - Internally generated non-derived TVs implicitly get the lowest rank (0). + /// - Derived variables get their rank from their free typevar. + /// - Singletons have the highest rank. + /// - TVs associated with vars in a source pattern have a higher rank than TVs associated with + /// temporary vars. + fn rank(&self, tv: &TypeVar) -> u8 { + let actual_tv = match tv.base { + Some(_) => tv.free_typevar(), + None => Some(tv.clone()), + }; + + let rank = match actual_tv { + Some(actual_tv) => match self.ranks.get(&actual_tv) { + Some(rank) => Some(*rank), + None => { + assert!( + !actual_tv.name.starts_with("typeof_"), + format!("variable {} should be explicitly ranked", actual_tv.name) + ); + None + } + }, + None => None, + }; + + let rank = match rank { + Some(rank) => rank, + None => { + if tv.singleton_type().is_some() { + TypeEnvRank::Singleton + } else { + TypeEnvRank::Internal + } + } + }; + + rank as u8 + } + + /// Record the fact that the free tv1 is part of the same equivalence class as tv2. The + /// canonical representative of the merged class is tv2's canonical representative. + fn record_equivalent(&mut self, tv1: TypeVar, tv2: TypeVar) { + assert!(tv1.base.is_none()); + assert!(self.get_equivalent(&tv1) == tv1); + if let Some(tv2_base) = &tv2.base { + // Ensure there are no cycles. + assert!(self.get_equivalent(&tv2_base.type_var) != tv1); + } + self.equivalency_map.insert(tv1, tv2); + } + + /// Get the free typevars in the current type environment. + pub fn free_typevars(&self, var_pool: &mut VarPool) -> Vec<TypeVar> { + let mut typevars = Vec::new(); + typevars.extend(self.equivalency_map.keys().cloned()); + typevars.extend( + self.vars + .iter() + .map(|&var_index| var_pool.get_mut(var_index).get_or_create_typevar()), + ); + + let set: HashSet<TypeVar> = HashSet::from_iter( + typevars + .iter() + .map(|tv| self.get_equivalent(tv).free_typevar()) + .filter(|opt_tv| { + // Filter out singleton types. + opt_tv.is_some() + }) + .map(|tv| tv.unwrap()), + ); + Vec::from_iter(set) + } + + /// Normalize by collapsing any roots that don't correspond to a concrete type var AND have a + /// single type var derived from them or equivalent to them. + /// + /// e.g. if we have a root of the tree that looks like: + /// + /// typeof_a typeof_b + /// \\ / + /// typeof_x + /// | + /// half_width(1) + /// | + /// 1 + /// + /// we want to collapse the linear path between 1 and typeof_x. The resulting graph is: + /// + /// typeof_a typeof_b + /// \\ / + /// typeof_x + fn normalize(&mut self, var_pool: &mut VarPool) { + let source_tvs: HashSet<TypeVar> = HashSet::from_iter( + self.vars + .iter() + .map(|&var_index| var_pool.get_mut(var_index).get_or_create_typevar()), + ); + + let mut children: HashMap<TypeVar, HashSet<TypeVar>> = HashMap::new(); + + // Insert all the parents found by the derivation relationship. + for type_var in self.equivalency_map.values() { + if type_var.base.is_none() { + continue; + } + + let parent_tv = type_var.free_typevar(); + if parent_tv.is_none() { + // Ignore this type variable, it's a singleton. + continue; + } + let parent_tv = parent_tv.unwrap(); + + children + .entry(parent_tv) + .or_insert_with(HashSet::new) + .insert(type_var.clone()); + } + + // Insert all the explicit equivalency links. + for (equivalent_tv, canon_tv) in self.equivalency_map.iter() { + children + .entry(canon_tv.clone()) + .or_insert_with(HashSet::new) + .insert(equivalent_tv.clone()); + } + + // Remove links that are straight paths up to typevar of variables. + for free_root in self.free_typevars(var_pool) { + let mut root = &free_root; + while !source_tvs.contains(&root) + && children.contains_key(&root) + && children.get(&root).unwrap().len() == 1 + { + let child = children.get(&root).unwrap().iter().next().unwrap(); + assert_eq!(self.equivalency_map[child], root.clone()); + self.equivalency_map.remove(child); + root = child; + } + } + } + + /// Extract a clean type environment from self, that only mentions type vars associated with + /// real variables. + fn extract(self, var_pool: &mut VarPool) -> TypeEnvironment { + let vars_tv: HashSet<TypeVar> = HashSet::from_iter( + self.vars + .iter() + .map(|&var_index| var_pool.get_mut(var_index).get_or_create_typevar()), + ); + + let mut new_equivalency_map: HashMap<TypeVar, TypeVar> = HashMap::new(); + for tv in &vars_tv { + let canon_tv = self.get_equivalent(tv); + if *tv != canon_tv { + new_equivalency_map.insert(tv.clone(), canon_tv.clone()); + } + + // Sanity check: the translated type map should only refer to real variables. + assert!(vars_tv.contains(tv)); + let canon_free_tv = canon_tv.free_typevar(); + assert!(canon_free_tv.is_none() || vars_tv.contains(&canon_free_tv.unwrap())); + } + + let mut new_constraints: HashSet<Constraint> = HashSet::new(); + for constraint in &self.constraints { + let constraint = constraint.translate_with_env(&self); + if constraint.is_trivial() || new_constraints.contains(&constraint) { + continue; + } + + // Sanity check: translated constraints should refer only to real variables. + for arg in constraint.typevar_args() { + let arg_free_tv = arg.free_typevar(); + assert!(arg_free_tv.is_none() || vars_tv.contains(&arg_free_tv.unwrap())); + } + + new_constraints.insert(constraint); + } + + TypeEnvironment { + vars: self.vars, + ranks: self.ranks, + equivalency_map: new_equivalency_map, + constraints: Vec::from_iter(new_constraints), + } + } +} + +/// Replaces an external type variable according to the following rules: +/// - if a local copy is present in the map, return it. +/// - or if it's derived, create a local derived one that recursively substitutes the parent. +/// - or return itself. +fn substitute(map: &HashMap<&TypeVar, TypeVar>, external_type_var: &TypeVar) -> TypeVar { + match map.get(&external_type_var) { + Some(own_type_var) => own_type_var.clone(), + None => match &external_type_var.base { + Some(parent) => { + let parent_substitute = substitute(map, &parent.type_var); + TypeVar::derived(&parent_substitute, parent.derived_func) + } + None => external_type_var.clone(), + }, + } +} + +/// Normalize a (potentially derived) typevar using the following rules: +/// +/// - vector and width derived functions commute +/// {HALF,DOUBLE}VECTOR({HALF,DOUBLE}WIDTH(base)) -> +/// {HALF,DOUBLE}WIDTH({HALF,DOUBLE}VECTOR(base)) +/// +/// - half/double pairs collapse +/// {HALF,DOUBLE}WIDTH({DOUBLE,HALF}WIDTH(base)) -> base +/// {HALF,DOUBLE}VECTOR({DOUBLE,HALF}VECTOR(base)) -> base +fn canonicalize_derivations(tv: TypeVar) -> TypeVar { + let base = match &tv.base { + Some(base) => base, + None => return tv, + }; + + let derived_func = base.derived_func; + + if let Some(base_base) = &base.type_var.base { + let base_base_tv = &base_base.type_var; + match (derived_func, base_base.derived_func) { + (DerivedFunc::HalfWidth, DerivedFunc::DoubleWidth) + | (DerivedFunc::DoubleWidth, DerivedFunc::HalfWidth) + | (DerivedFunc::HalfVector, DerivedFunc::DoubleVector) + | (DerivedFunc::DoubleVector, DerivedFunc::HalfVector) => { + // Cancelling bijective transformations. This doesn't hide any overflow issues + // since derived type sets are checked upon derivaion, and base typesets are only + // allowed to shrink. + return canonicalize_derivations(base_base_tv.clone()); + } + (DerivedFunc::HalfWidth, DerivedFunc::HalfVector) + | (DerivedFunc::HalfWidth, DerivedFunc::DoubleVector) + | (DerivedFunc::DoubleWidth, DerivedFunc::DoubleVector) + | (DerivedFunc::DoubleWidth, DerivedFunc::HalfVector) => { + // Arbitrarily put WIDTH derivations before VECTOR derivations, since they commute. + return canonicalize_derivations( + base_base_tv + .derived(derived_func) + .derived(base_base.derived_func), + ); + } + _ => {} + }; + } + + canonicalize_derivations(base.type_var.clone()).derived(derived_func) +} + +/// Given typevars tv1 and tv2 (which could be derived from one another), constrain their typesets +/// to be the same. When one is derived from the other, repeat the constrain process until +/// a fixed point is reached. +fn constrain_fixpoint(tv1: &TypeVar, tv2: &TypeVar) { + loop { + let old_tv1_ts = tv1.get_typeset().clone(); + tv2.constrain_types(tv1.clone()); + if tv1.get_typeset() == old_tv1_ts { + break; + } + } + + let old_tv2_ts = tv2.get_typeset(); + tv1.constrain_types(tv2.clone()); + // The above loop should ensure that all reference cycles have been handled. + assert!(old_tv2_ts == tv2.get_typeset()); +} + +/// Unify tv1 and tv2 in the given type environment. tv1 must have a rank greater or equal to tv2's +/// one, modulo commutations. +fn unify(tv1: &TypeVar, tv2: &TypeVar, type_env: &mut TypeEnvironment) -> Result<(), String> { + let tv1 = canonicalize_derivations(type_env.get_equivalent(tv1)); + let tv2 = canonicalize_derivations(type_env.get_equivalent(tv2)); + + if tv1 == tv2 { + // Already unified. + return Ok(()); + } + + if type_env.rank(&tv2) < type_env.rank(&tv1) { + // Make sure tv1 always has the smallest rank, since real variables have the higher rank + // and we want them to be the canonical representatives of their equivalency classes. + return unify(&tv2, &tv1, type_env); + } + + constrain_fixpoint(&tv1, &tv2); + + if tv1.get_typeset().size() == 0 || tv2.get_typeset().size() == 0 { + return Err(format!( + "Error: empty type created when unifying {} and {}", + tv1.name, tv2.name + )); + } + + let base = match &tv1.base { + Some(base) => base, + None => { + type_env.record_equivalent(tv1, tv2); + return Ok(()); + } + }; + + if let Some(inverse) = base.derived_func.inverse() { + return unify(&base.type_var, &tv2.derived(inverse), type_env); + } + + type_env.add_constraint(Constraint::Eq(tv1, tv2)); + Ok(()) +} + +/// Perform type inference on one Def in the current type environment and return an updated type +/// environment or error. +/// +/// At a high level this works by creating fresh copies of each formal type var in the Def's +/// instruction's signature, and unifying the formal typevar with the corresponding actual typevar. +fn infer_definition( + def: &Def, + var_pool: &mut VarPool, + type_env: TypeEnvironment, + last_type_index: &mut usize, +) -> Result<TypeEnvironment, String> { + let apply = &def.apply; + let inst = &apply.inst; + + let mut type_env = type_env; + let free_formal_tvs = inst.all_typevars(); + + let mut original_to_own_typevar: HashMap<&TypeVar, TypeVar> = HashMap::new(); + for &tv in &free_formal_tvs { + assert!(original_to_own_typevar + .insert( + tv, + TypeVar::copy_from(tv, format!("own_{}", last_type_index)) + ) + .is_none()); + *last_type_index += 1; + } + + // Update the mapping with any explicity bound type vars: + for (i, value_type) in apply.value_types.iter().enumerate() { + let singleton = TypeVar::new_singleton(value_type.clone()); + assert!(original_to_own_typevar + .insert(free_formal_tvs[i], singleton) + .is_some()); + } + + // Get fresh copies for each typevar in the signature (both free and derived). + let mut formal_tvs = Vec::new(); + formal_tvs.extend(inst.value_results.iter().map(|&i| { + substitute( + &original_to_own_typevar, + inst.operands_out[i].type_var().unwrap(), + ) + })); + formal_tvs.extend(inst.value_opnums.iter().map(|&i| { + substitute( + &original_to_own_typevar, + inst.operands_in[i].type_var().unwrap(), + ) + })); + + // Get the list of actual vars. + let mut actual_vars = Vec::new(); + actual_vars.extend(inst.value_results.iter().map(|&i| def.defined_vars[i])); + actual_vars.extend( + inst.value_opnums + .iter() + .map(|&i| apply.args[i].unwrap_var()), + ); + + // Get the list of the actual TypeVars. + let mut actual_tvs = Vec::new(); + for var_index in actual_vars { + let var = var_pool.get_mut(var_index); + type_env.register(var_index, var); + actual_tvs.push(var.get_or_create_typevar()); + } + + // Make sure we start unifying with the control type variable first, by putting it at the + // front of both vectors. + if let Some(poly) = &inst.polymorphic_info { + let own_ctrl_tv = &original_to_own_typevar[&poly.ctrl_typevar]; + let ctrl_index = formal_tvs.iter().position(|tv| tv == own_ctrl_tv).unwrap(); + if ctrl_index != 0 { + formal_tvs.swap(0, ctrl_index); + actual_tvs.swap(0, ctrl_index); + } + } + + // Unify each actual type variable with the corresponding formal type variable. + for (actual_tv, formal_tv) in actual_tvs.iter().zip(&formal_tvs) { + if let Err(msg) = unify(actual_tv, formal_tv, &mut type_env) { + return Err(format!( + "fail ti on {} <: {}: {}", + actual_tv.name, formal_tv.name, msg + )); + } + } + + // Add any instruction specific constraints. + for constraint in &inst.constraints { + type_env.add_constraint(constraint.translate_with_map(&original_to_own_typevar)); + } + + Ok(type_env) +} + +/// Perform type inference on an transformation. Return an updated type environment or error. +pub(crate) fn infer_transform( + src: DefIndex, + dst: &[DefIndex], + def_pool: &DefPool, + var_pool: &mut VarPool, +) -> Result<TypeEnvironment, String> { + let mut type_env = TypeEnvironment::new(); + let mut last_type_index = 0; + + // Execute type inference on the source pattern. + type_env = infer_definition(def_pool.get(src), var_pool, type_env, &mut last_type_index) + .map_err(|err| format!("In src pattern: {}", err))?; + + // Collect the type sets once after applying the source patterm; we'll compare the typesets + // after we've also considered the destination pattern, and will emit supplementary InTypeset + // checks if they don't match. + let src_typesets = type_env + .vars + .iter() + .map(|&var_index| { + let var = var_pool.get_mut(var_index); + let tv = type_env.get_equivalent(&var.get_or_create_typevar()); + (var_index, tv.get_typeset()) + }) + .collect::<Vec<_>>(); + + // Execute type inference on the destination pattern. + for (i, &def_index) in dst.iter().enumerate() { + let def = def_pool.get(def_index); + type_env = infer_definition(def, var_pool, type_env, &mut last_type_index) + .map_err(|err| format!("line {}: {}", i, err))?; + } + + for (var_index, src_typeset) in src_typesets { + let var = var_pool.get(var_index); + if !var.has_free_typevar() { + continue; + } + let tv = type_env.get_equivalent(&var.get_typevar().unwrap()); + let new_typeset = tv.get_typeset(); + assert!( + new_typeset.is_subset(&src_typeset), + "type sets can only get narrower" + ); + if new_typeset != src_typeset { + type_env.add_constraint(Constraint::InTypeset(tv.clone(), new_typeset.clone())); + } + } + + type_env.normalize(var_pool); + + Ok(type_env.extract(var_pool)) +} diff --git a/third_party/rust/cranelift-codegen-meta/src/cdsl/types.rs b/third_party/rust/cranelift-codegen-meta/src/cdsl/types.rs new file mode 100644 index 0000000000..7e03c873db --- /dev/null +++ b/third_party/rust/cranelift-codegen-meta/src/cdsl/types.rs @@ -0,0 +1,587 @@ +//! Cranelift ValueType hierarchy + +use std::fmt; + +use crate::shared::types as shared_types; +use cranelift_codegen_shared::constants; + +// Rust name prefix used for the `rust_name` method. +static _RUST_NAME_PREFIX: &str = "ir::types::"; + +// ValueType variants (i8, i32, ...) are provided in `shared::types.rs`. + +/// A concrete SSA value type. +/// +/// All SSA values have a type that is described by an instance of `ValueType` +/// or one of its subclasses. +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub(crate) enum ValueType { + Lane(LaneType), + Reference(ReferenceType), + Special(SpecialType), + Vector(VectorType), +} + +impl ValueType { + /// Iterate through all of the lane types. + pub fn all_lane_types() -> LaneTypeIterator { + LaneTypeIterator::new() + } + + /// Iterate through all of the special types (neither lanes nor vectors). + pub fn all_special_types() -> SpecialTypeIterator { + SpecialTypeIterator::new() + } + + pub fn all_reference_types() -> ReferenceTypeIterator { + ReferenceTypeIterator::new() + } + + /// Return a string containing the documentation comment for this type. + pub fn doc(&self) -> String { + match *self { + ValueType::Lane(l) => l.doc(), + ValueType::Reference(r) => r.doc(), + ValueType::Special(s) => s.doc(), + ValueType::Vector(ref v) => v.doc(), + } + } + + /// Return the number of bits in a lane. + pub fn lane_bits(&self) -> u64 { + match *self { + ValueType::Lane(l) => l.lane_bits(), + ValueType::Reference(r) => r.lane_bits(), + ValueType::Special(s) => s.lane_bits(), + ValueType::Vector(ref v) => v.lane_bits(), + } + } + + /// Return the number of lanes. + pub fn lane_count(&self) -> u64 { + match *self { + ValueType::Vector(ref v) => v.lane_count(), + _ => 1, + } + } + + /// Find the number of bytes that this type occupies in memory. + pub fn membytes(&self) -> u64 { + self.width() / 8 + } + + /// Find the unique number associated with this type. + pub fn number(&self) -> Option<u8> { + match *self { + ValueType::Lane(l) => Some(l.number()), + ValueType::Reference(r) => Some(r.number()), + ValueType::Special(s) => Some(s.number()), + ValueType::Vector(ref v) => Some(v.number()), + } + } + + /// Return the name of this type for generated Rust source files. + pub fn rust_name(&self) -> String { + format!("{}{}", _RUST_NAME_PREFIX, self.to_string().to_uppercase()) + } + + /// Return true iff: + /// 1. self and other have equal number of lanes + /// 2. each lane in self has at least as many bits as a lane in other + pub fn _wider_or_equal(&self, rhs: &ValueType) -> bool { + (self.lane_count() == rhs.lane_count()) && (self.lane_bits() >= rhs.lane_bits()) + } + + /// Return the total number of bits of an instance of this type. + pub fn width(&self) -> u64 { + self.lane_count() * self.lane_bits() + } +} + +impl fmt::Display for ValueType { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match *self { + ValueType::Lane(l) => l.fmt(f), + ValueType::Reference(r) => r.fmt(f), + ValueType::Special(s) => s.fmt(f), + ValueType::Vector(ref v) => v.fmt(f), + } + } +} + +/// Create a ValueType from a given lane type. +impl From<LaneType> for ValueType { + fn from(lane: LaneType) -> Self { + ValueType::Lane(lane) + } +} + +/// Create a ValueType from a given reference type. +impl From<ReferenceType> for ValueType { + fn from(reference: ReferenceType) -> Self { + ValueType::Reference(reference) + } +} + +/// Create a ValueType from a given special type. +impl From<SpecialType> for ValueType { + fn from(spec: SpecialType) -> Self { + ValueType::Special(spec) + } +} + +/// Create a ValueType from a given vector type. +impl From<VectorType> for ValueType { + fn from(vector: VectorType) -> Self { + ValueType::Vector(vector) + } +} + +/// A concrete scalar type that can appear as a vector lane too. +#[derive(Clone, Copy, PartialEq, Eq, Hash)] +pub(crate) enum LaneType { + Bool(shared_types::Bool), + Float(shared_types::Float), + Int(shared_types::Int), +} + +impl LaneType { + /// Return a string containing the documentation comment for this lane type. + pub fn doc(self) -> String { + match self { + LaneType::Bool(_) => format!("A boolean type with {} bits.", self.lane_bits()), + LaneType::Float(shared_types::Float::F32) => String::from( + "A 32-bit floating point type represented in the IEEE 754-2008 + *binary32* interchange format. This corresponds to the :c:type:`float` + type in most C implementations.", + ), + LaneType::Float(shared_types::Float::F64) => String::from( + "A 64-bit floating point type represented in the IEEE 754-2008 + *binary64* interchange format. This corresponds to the :c:type:`double` + type in most C implementations.", + ), + LaneType::Int(_) if self.lane_bits() < 32 => format!( + "An integer type with {} bits. + WARNING: arithmetic on {}bit integers is incomplete", + self.lane_bits(), + self.lane_bits() + ), + LaneType::Int(_) => format!("An integer type with {} bits.", self.lane_bits()), + } + } + + /// Return the number of bits in a lane. + pub fn lane_bits(self) -> u64 { + match self { + LaneType::Bool(ref b) => *b as u64, + LaneType::Float(ref f) => *f as u64, + LaneType::Int(ref i) => *i as u64, + } + } + + /// Find the unique number associated with this lane type. + pub fn number(self) -> u8 { + constants::LANE_BASE + + match self { + LaneType::Bool(shared_types::Bool::B1) => 0, + LaneType::Bool(shared_types::Bool::B8) => 1, + LaneType::Bool(shared_types::Bool::B16) => 2, + LaneType::Bool(shared_types::Bool::B32) => 3, + LaneType::Bool(shared_types::Bool::B64) => 4, + LaneType::Bool(shared_types::Bool::B128) => 5, + LaneType::Int(shared_types::Int::I8) => 6, + LaneType::Int(shared_types::Int::I16) => 7, + LaneType::Int(shared_types::Int::I32) => 8, + LaneType::Int(shared_types::Int::I64) => 9, + LaneType::Int(shared_types::Int::I128) => 10, + LaneType::Float(shared_types::Float::F32) => 11, + LaneType::Float(shared_types::Float::F64) => 12, + } + } + + pub fn bool_from_bits(num_bits: u16) -> LaneType { + LaneType::Bool(match num_bits { + 1 => shared_types::Bool::B1, + 8 => shared_types::Bool::B8, + 16 => shared_types::Bool::B16, + 32 => shared_types::Bool::B32, + 64 => shared_types::Bool::B64, + 128 => shared_types::Bool::B128, + _ => unreachable!("unxpected num bits for bool"), + }) + } + + pub fn int_from_bits(num_bits: u16) -> LaneType { + LaneType::Int(match num_bits { + 8 => shared_types::Int::I8, + 16 => shared_types::Int::I16, + 32 => shared_types::Int::I32, + 64 => shared_types::Int::I64, + 128 => shared_types::Int::I128, + _ => unreachable!("unxpected num bits for int"), + }) + } + + pub fn float_from_bits(num_bits: u16) -> LaneType { + LaneType::Float(match num_bits { + 32 => shared_types::Float::F32, + 64 => shared_types::Float::F64, + _ => unreachable!("unxpected num bits for float"), + }) + } + + pub fn by(self, lanes: u16) -> ValueType { + if lanes == 1 { + self.into() + } else { + ValueType::Vector(VectorType::new(self, lanes.into())) + } + } + + pub fn is_float(self) -> bool { + match self { + LaneType::Float(_) => true, + _ => false, + } + } + + pub fn is_int(self) -> bool { + match self { + LaneType::Int(_) => true, + _ => false, + } + } +} + +impl fmt::Display for LaneType { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match *self { + LaneType::Bool(_) => write!(f, "b{}", self.lane_bits()), + LaneType::Float(_) => write!(f, "f{}", self.lane_bits()), + LaneType::Int(_) => write!(f, "i{}", self.lane_bits()), + } + } +} + +impl fmt::Debug for LaneType { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let inner_msg = format!("bits={}", self.lane_bits()); + write!( + f, + "{}", + match *self { + LaneType::Bool(_) => format!("BoolType({})", inner_msg), + LaneType::Float(_) => format!("FloatType({})", inner_msg), + LaneType::Int(_) => format!("IntType({})", inner_msg), + } + ) + } +} + +/// Create a LaneType from a given bool variant. +impl From<shared_types::Bool> for LaneType { + fn from(b: shared_types::Bool) -> Self { + LaneType::Bool(b) + } +} + +/// Create a LaneType from a given float variant. +impl From<shared_types::Float> for LaneType { + fn from(f: shared_types::Float) -> Self { + LaneType::Float(f) + } +} + +/// Create a LaneType from a given int variant. +impl From<shared_types::Int> for LaneType { + fn from(i: shared_types::Int) -> Self { + LaneType::Int(i) + } +} + +/// An iterator for different lane types. +pub(crate) struct LaneTypeIterator { + bool_iter: shared_types::BoolIterator, + int_iter: shared_types::IntIterator, + float_iter: shared_types::FloatIterator, +} + +impl LaneTypeIterator { + /// Create a new lane type iterator. + fn new() -> Self { + Self { + bool_iter: shared_types::BoolIterator::new(), + int_iter: shared_types::IntIterator::new(), + float_iter: shared_types::FloatIterator::new(), + } + } +} + +impl Iterator for LaneTypeIterator { + type Item = LaneType; + fn next(&mut self) -> Option<Self::Item> { + if let Some(b) = self.bool_iter.next() { + Some(LaneType::from(b)) + } else if let Some(i) = self.int_iter.next() { + Some(LaneType::from(i)) + } else if let Some(f) = self.float_iter.next() { + Some(LaneType::from(f)) + } else { + None + } + } +} + +/// A concrete SIMD vector type. +/// +/// A vector type has a lane type which is an instance of `LaneType`, +/// and a positive number of lanes. +#[derive(Clone, PartialEq, Eq, Hash)] +pub(crate) struct VectorType { + base: LaneType, + lanes: u64, +} + +impl VectorType { + /// Initialize a new integer type with `n` bits. + pub fn new(base: LaneType, lanes: u64) -> Self { + Self { base, lanes } + } + + /// Return a string containing the documentation comment for this vector type. + pub fn doc(&self) -> String { + format!( + "A SIMD vector with {} lanes containing a `{}` each.", + self.lane_count(), + self.base + ) + } + + /// Return the number of bits in a lane. + pub fn lane_bits(&self) -> u64 { + self.base.lane_bits() + } + + /// Return the number of lanes. + pub fn lane_count(&self) -> u64 { + self.lanes + } + + /// Return the lane type. + pub fn lane_type(&self) -> LaneType { + self.base + } + + /// Find the unique number associated with this vector type. + /// + /// Vector types are encoded with the lane type in the low 4 bits and + /// log2(lanes) in the high 4 bits, giving a range of 2-256 lanes. + pub fn number(&self) -> u8 { + let lanes_log_2: u32 = 63 - self.lane_count().leading_zeros(); + let base_num = u32::from(self.base.number()); + let num = (lanes_log_2 << 4) + base_num; + num as u8 + } +} + +impl fmt::Display for VectorType { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}x{}", self.base, self.lane_count()) + } +} + +impl fmt::Debug for VectorType { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "VectorType(base={}, lanes={})", + self.base, + self.lane_count() + ) + } +} + +/// A concrete scalar type that is neither a vector nor a lane type. +/// +/// Special types cannot be used to form vectors. +#[derive(Clone, Copy, PartialEq, Eq, Hash)] +pub(crate) enum SpecialType { + Flag(shared_types::Flag), + // FIXME remove once the old style backends are removed. + StructArgument, +} + +impl SpecialType { + /// Return a string containing the documentation comment for this special type. + pub fn doc(self) -> String { + match self { + SpecialType::Flag(shared_types::Flag::IFlags) => String::from( + "CPU flags representing the result of an integer comparison. These flags + can be tested with an :type:`intcc` condition code.", + ), + SpecialType::Flag(shared_types::Flag::FFlags) => String::from( + "CPU flags representing the result of a floating point comparison. These + flags can be tested with a :type:`floatcc` condition code.", + ), + SpecialType::StructArgument => { + String::from("After legalization sarg_t arguments will get this type.") + } + } + } + + /// Return the number of bits in a lane. + pub fn lane_bits(self) -> u64 { + match self { + SpecialType::Flag(_) => 0, + SpecialType::StructArgument => 0, + } + } + + /// Find the unique number associated with this special type. + pub fn number(self) -> u8 { + match self { + SpecialType::Flag(shared_types::Flag::IFlags) => 1, + SpecialType::Flag(shared_types::Flag::FFlags) => 2, + SpecialType::StructArgument => 3, + } + } +} + +impl fmt::Display for SpecialType { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match *self { + SpecialType::Flag(shared_types::Flag::IFlags) => write!(f, "iflags"), + SpecialType::Flag(shared_types::Flag::FFlags) => write!(f, "fflags"), + SpecialType::StructArgument => write!(f, "sarg_t"), + } + } +} + +impl fmt::Debug for SpecialType { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "{}", + match *self { + SpecialType::Flag(_) => format!("FlagsType({})", self), + SpecialType::StructArgument => format!("StructArgument"), + } + ) + } +} + +impl From<shared_types::Flag> for SpecialType { + fn from(f: shared_types::Flag) -> Self { + SpecialType::Flag(f) + } +} + +pub(crate) struct SpecialTypeIterator { + flag_iter: shared_types::FlagIterator, + done: bool, +} + +impl SpecialTypeIterator { + fn new() -> Self { + Self { + flag_iter: shared_types::FlagIterator::new(), + done: false, + } + } +} + +impl Iterator for SpecialTypeIterator { + type Item = SpecialType; + fn next(&mut self) -> Option<Self::Item> { + if let Some(f) = self.flag_iter.next() { + Some(SpecialType::from(f)) + } else { + if !self.done { + self.done = true; + Some(SpecialType::StructArgument) + } else { + None + } + } + } +} + +/// Reference type is scalar type, but not lane type. +#[derive(Clone, Copy, PartialEq, Eq, Hash)] +pub(crate) struct ReferenceType(pub shared_types::Reference); + +impl ReferenceType { + /// Return a string containing the documentation comment for this reference type. + pub fn doc(self) -> String { + format!("An opaque reference type with {} bits.", self.lane_bits()) + } + + /// Return the number of bits in a lane. + pub fn lane_bits(self) -> u64 { + match self.0 { + shared_types::Reference::R32 => 32, + shared_types::Reference::R64 => 64, + } + } + + /// Find the unique number associated with this reference type. + pub fn number(self) -> u8 { + constants::REFERENCE_BASE + + match self { + ReferenceType(shared_types::Reference::R32) => 0, + ReferenceType(shared_types::Reference::R64) => 1, + } + } + + pub fn ref_from_bits(num_bits: u16) -> ReferenceType { + ReferenceType(match num_bits { + 32 => shared_types::Reference::R32, + 64 => shared_types::Reference::R64, + _ => unreachable!("unexpected number of bits for a reference type"), + }) + } +} + +impl fmt::Display for ReferenceType { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "r{}", self.lane_bits()) + } +} + +impl fmt::Debug for ReferenceType { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "ReferenceType(bits={})", self.lane_bits()) + } +} + +/// Create a ReferenceType from a given reference variant. +impl From<shared_types::Reference> for ReferenceType { + fn from(r: shared_types::Reference) -> Self { + ReferenceType(r) + } +} + +/// An iterator for different reference types. +pub(crate) struct ReferenceTypeIterator { + reference_iter: shared_types::ReferenceIterator, +} + +impl ReferenceTypeIterator { + /// Create a new reference type iterator. + fn new() -> Self { + Self { + reference_iter: shared_types::ReferenceIterator::new(), + } + } +} + +impl Iterator for ReferenceTypeIterator { + type Item = ReferenceType; + fn next(&mut self) -> Option<Self::Item> { + if let Some(r) = self.reference_iter.next() { + Some(ReferenceType::from(r)) + } else { + None + } + } +} diff --git a/third_party/rust/cranelift-codegen-meta/src/cdsl/typevar.rs b/third_party/rust/cranelift-codegen-meta/src/cdsl/typevar.rs new file mode 100644 index 0000000000..c1027bf847 --- /dev/null +++ b/third_party/rust/cranelift-codegen-meta/src/cdsl/typevar.rs @@ -0,0 +1,1274 @@ +use std::cell::RefCell; +use std::collections::{BTreeSet, HashSet}; +use std::fmt; +use std::hash; +use std::iter::FromIterator; +use std::ops; +use std::rc::Rc; + +use crate::cdsl::types::{LaneType, ReferenceType, SpecialType, ValueType}; + +const MAX_LANES: u16 = 256; +const MAX_BITS: u16 = 128; +const MAX_FLOAT_BITS: u16 = 64; + +/// Type variables can be used in place of concrete types when defining +/// instructions. This makes the instructions *polymorphic*. +/// +/// A type variable is restricted to vary over a subset of the value types. +/// This subset is specified by a set of flags that control the permitted base +/// types and whether the type variable can assume scalar or vector types, or +/// both. +#[derive(Debug)] +pub(crate) struct TypeVarContent { + /// Short name of type variable used in instruction descriptions. + pub name: String, + + /// Documentation string. + pub doc: String, + + /// Type set associated to the type variable. + /// This field must remain private; use `get_typeset()` or `get_raw_typeset()` to get the + /// information you want. + type_set: TypeSet, + + pub base: Option<TypeVarParent>, +} + +#[derive(Clone, Debug)] +pub(crate) struct TypeVar { + content: Rc<RefCell<TypeVarContent>>, +} + +impl TypeVar { + pub fn new(name: impl Into<String>, doc: impl Into<String>, type_set: TypeSet) -> Self { + Self { + content: Rc::new(RefCell::new(TypeVarContent { + name: name.into(), + doc: doc.into(), + type_set, + base: None, + })), + } + } + + pub fn new_singleton(value_type: ValueType) -> Self { + let (name, doc) = (value_type.to_string(), value_type.doc()); + let mut builder = TypeSetBuilder::new(); + + let (scalar_type, num_lanes) = match value_type { + ValueType::Special(special_type) => { + return TypeVar::new(name, doc, builder.specials(vec![special_type]).build()); + } + ValueType::Reference(ReferenceType(reference_type)) => { + let bits = reference_type as RangeBound; + return TypeVar::new(name, doc, builder.refs(bits..bits).build()); + } + ValueType::Lane(lane_type) => (lane_type, 1), + ValueType::Vector(vec_type) => { + (vec_type.lane_type(), vec_type.lane_count() as RangeBound) + } + }; + + builder = builder.simd_lanes(num_lanes..num_lanes); + + let builder = match scalar_type { + LaneType::Int(int_type) => { + let bits = int_type as RangeBound; + builder.ints(bits..bits) + } + LaneType::Float(float_type) => { + let bits = float_type as RangeBound; + builder.floats(bits..bits) + } + LaneType::Bool(bool_type) => { + let bits = bool_type as RangeBound; + builder.bools(bits..bits) + } + }; + TypeVar::new(name, doc, builder.build()) + } + + /// Get a fresh copy of self, named after `name`. Can only be called on non-derived typevars. + pub fn copy_from(other: &TypeVar, name: String) -> TypeVar { + assert!( + other.base.is_none(), + "copy_from() can only be called on non-derived type variables" + ); + TypeVar { + content: Rc::new(RefCell::new(TypeVarContent { + name, + doc: "".into(), + type_set: other.type_set.clone(), + base: None, + })), + } + } + + /// Returns the typeset for this TV. If the TV is derived, computes it recursively from the + /// derived function and the base's typeset. + /// Note this can't be done non-lazily in the constructor, because the TypeSet of the base may + /// change over time. + pub fn get_typeset(&self) -> TypeSet { + match &self.base { + Some(base) => base.type_var.get_typeset().image(base.derived_func), + None => self.type_set.clone(), + } + } + + /// Returns this typevar's type set, assuming this type var has no parent. + pub fn get_raw_typeset(&self) -> &TypeSet { + assert_eq!(self.type_set, self.get_typeset()); + &self.type_set + } + + /// If the associated typeset has a single type return it. Otherwise return None. + pub fn singleton_type(&self) -> Option<ValueType> { + let type_set = self.get_typeset(); + if type_set.size() == 1 { + Some(type_set.get_singleton()) + } else { + None + } + } + + /// Get the free type variable controlling this one. + pub fn free_typevar(&self) -> Option<TypeVar> { + match &self.base { + Some(base) => base.type_var.free_typevar(), + None => { + match self.singleton_type() { + // A singleton type isn't a proper free variable. + Some(_) => None, + None => Some(self.clone()), + } + } + } + } + + /// Create a type variable that is a function of another. + pub fn derived(&self, derived_func: DerivedFunc) -> TypeVar { + let ts = self.get_typeset(); + + // Safety checks to avoid over/underflows. + assert!(ts.specials.is_empty(), "can't derive from special types"); + match derived_func { + DerivedFunc::HalfWidth => { + assert!( + ts.ints.is_empty() || *ts.ints.iter().min().unwrap() > 8, + "can't halve all integer types" + ); + assert!( + ts.floats.is_empty() || *ts.floats.iter().min().unwrap() > 32, + "can't halve all float types" + ); + assert!( + ts.bools.is_empty() || *ts.bools.iter().min().unwrap() > 8, + "can't halve all boolean types" + ); + } + DerivedFunc::DoubleWidth => { + assert!( + ts.ints.is_empty() || *ts.ints.iter().max().unwrap() < MAX_BITS, + "can't double all integer types" + ); + assert!( + ts.floats.is_empty() || *ts.floats.iter().max().unwrap() < MAX_FLOAT_BITS, + "can't double all float types" + ); + assert!( + ts.bools.is_empty() || *ts.bools.iter().max().unwrap() < MAX_BITS, + "can't double all boolean types" + ); + } + DerivedFunc::HalfVector => { + assert!( + *ts.lanes.iter().min().unwrap() > 1, + "can't halve a scalar type" + ); + } + DerivedFunc::DoubleVector => { + assert!( + *ts.lanes.iter().max().unwrap() < MAX_LANES, + "can't double 256 lanes" + ); + } + DerivedFunc::SplitLanes => { + assert!( + ts.ints.is_empty() || *ts.ints.iter().min().unwrap() > 8, + "can't halve all integer types" + ); + assert!( + ts.floats.is_empty() || *ts.floats.iter().min().unwrap() > 32, + "can't halve all float types" + ); + assert!( + ts.bools.is_empty() || *ts.bools.iter().min().unwrap() > 8, + "can't halve all boolean types" + ); + assert!( + *ts.lanes.iter().max().unwrap() < MAX_LANES, + "can't double 256 lanes" + ); + } + DerivedFunc::MergeLanes => { + assert!( + ts.ints.is_empty() || *ts.ints.iter().max().unwrap() < MAX_BITS, + "can't double all integer types" + ); + assert!( + ts.floats.is_empty() || *ts.floats.iter().max().unwrap() < MAX_FLOAT_BITS, + "can't double all float types" + ); + assert!( + ts.bools.is_empty() || *ts.bools.iter().max().unwrap() < MAX_BITS, + "can't double all boolean types" + ); + assert!( + *ts.lanes.iter().min().unwrap() > 1, + "can't halve a scalar type" + ); + } + DerivedFunc::LaneOf | DerivedFunc::AsBool => { /* no particular assertions */ } + } + + TypeVar { + content: Rc::new(RefCell::new(TypeVarContent { + name: format!("{}({})", derived_func.name(), self.name), + doc: "".into(), + type_set: ts, + base: Some(TypeVarParent { + type_var: self.clone(), + derived_func, + }), + })), + } + } + + pub fn lane_of(&self) -> TypeVar { + self.derived(DerivedFunc::LaneOf) + } + pub fn as_bool(&self) -> TypeVar { + self.derived(DerivedFunc::AsBool) + } + pub fn half_width(&self) -> TypeVar { + self.derived(DerivedFunc::HalfWidth) + } + pub fn double_width(&self) -> TypeVar { + self.derived(DerivedFunc::DoubleWidth) + } + pub fn half_vector(&self) -> TypeVar { + self.derived(DerivedFunc::HalfVector) + } + pub fn double_vector(&self) -> TypeVar { + self.derived(DerivedFunc::DoubleVector) + } + pub fn split_lanes(&self) -> TypeVar { + self.derived(DerivedFunc::SplitLanes) + } + pub fn merge_lanes(&self) -> TypeVar { + self.derived(DerivedFunc::MergeLanes) + } + + /// Constrain the range of types this variable can assume to a subset of those in the typeset + /// ts. + /// May mutate itself if it's not derived, or its parent if it is. + pub fn constrain_types_by_ts(&self, type_set: TypeSet) { + match &self.base { + Some(base) => { + base.type_var + .constrain_types_by_ts(type_set.preimage(base.derived_func)); + } + None => { + self.content + .borrow_mut() + .type_set + .inplace_intersect_with(&type_set); + } + } + } + + /// Constrain the range of types this variable can assume to a subset of those `other` can + /// assume. + /// May mutate itself if it's not derived, or its parent if it is. + pub fn constrain_types(&self, other: TypeVar) { + if self == &other { + return; + } + self.constrain_types_by_ts(other.get_typeset()); + } + + /// Get a Rust expression that computes the type of this type variable. + pub fn to_rust_code(&self) -> String { + match &self.base { + Some(base) => format!( + "{}.{}().unwrap()", + base.type_var.to_rust_code(), + base.derived_func.name() + ), + None => { + if let Some(singleton) = self.singleton_type() { + singleton.rust_name() + } else { + self.name.clone() + } + } + } + } +} + +impl Into<TypeVar> for &TypeVar { + fn into(self) -> TypeVar { + self.clone() + } +} +impl Into<TypeVar> for ValueType { + fn into(self) -> TypeVar { + TypeVar::new_singleton(self) + } +} + +// Hash TypeVars by pointers. +// There might be a better way to do this, but since TypeVar's content (namely TypeSet) can be +// mutated, it makes sense to use pointer equality/hashing here. +impl hash::Hash for TypeVar { + fn hash<H: hash::Hasher>(&self, h: &mut H) { + match &self.base { + Some(base) => { + base.type_var.hash(h); + base.derived_func.hash(h); + } + None => { + (&**self as *const TypeVarContent).hash(h); + } + } + } +} + +impl PartialEq for TypeVar { + fn eq(&self, other: &TypeVar) -> bool { + match (&self.base, &other.base) { + (Some(base1), Some(base2)) => { + base1.type_var.eq(&base2.type_var) && base1.derived_func == base2.derived_func + } + (None, None) => Rc::ptr_eq(&self.content, &other.content), + _ => false, + } + } +} + +// Allow TypeVar as map keys, based on pointer equality (see also above PartialEq impl). +impl Eq for TypeVar {} + +impl ops::Deref for TypeVar { + type Target = TypeVarContent; + fn deref(&self) -> &Self::Target { + unsafe { self.content.as_ptr().as_ref().unwrap() } + } +} + +#[derive(Clone, Copy, Debug, Hash, PartialEq)] +pub(crate) enum DerivedFunc { + LaneOf, + AsBool, + HalfWidth, + DoubleWidth, + HalfVector, + DoubleVector, + SplitLanes, + MergeLanes, +} + +impl DerivedFunc { + pub fn name(self) -> &'static str { + match self { + DerivedFunc::LaneOf => "lane_of", + DerivedFunc::AsBool => "as_bool", + DerivedFunc::HalfWidth => "half_width", + DerivedFunc::DoubleWidth => "double_width", + DerivedFunc::HalfVector => "half_vector", + DerivedFunc::DoubleVector => "double_vector", + DerivedFunc::SplitLanes => "split_lanes", + DerivedFunc::MergeLanes => "merge_lanes", + } + } + + /// Returns the inverse function of this one, if it is a bijection. + pub fn inverse(self) -> Option<DerivedFunc> { + match self { + DerivedFunc::HalfWidth => Some(DerivedFunc::DoubleWidth), + DerivedFunc::DoubleWidth => Some(DerivedFunc::HalfWidth), + DerivedFunc::HalfVector => Some(DerivedFunc::DoubleVector), + DerivedFunc::DoubleVector => Some(DerivedFunc::HalfVector), + DerivedFunc::MergeLanes => Some(DerivedFunc::SplitLanes), + DerivedFunc::SplitLanes => Some(DerivedFunc::MergeLanes), + _ => None, + } + } +} + +#[derive(Debug, Hash)] +pub(crate) struct TypeVarParent { + pub type_var: TypeVar, + pub derived_func: DerivedFunc, +} + +/// A set of types. +/// +/// We don't allow arbitrary subsets of types, but use a parametrized approach +/// instead. +/// +/// Objects of this class can be used as dictionary keys. +/// +/// Parametrized type sets are specified in terms of ranges: +/// - The permitted range of vector lanes, where 1 indicates a scalar type. +/// - The permitted range of integer types. +/// - The permitted range of floating point types, and +/// - The permitted range of boolean types. +/// +/// The ranges are inclusive from smallest bit-width to largest bit-width. +/// +/// Finally, a type set can contain special types (derived from `SpecialType`) +/// which can't appear as lane types. + +type RangeBound = u16; +type Range = ops::Range<RangeBound>; +type NumSet = BTreeSet<RangeBound>; + +macro_rules! num_set { + ($($expr:expr),*) => { + NumSet::from_iter(vec![$($expr),*]) + }; +} + +#[derive(Clone, PartialEq, Eq, Hash)] +pub(crate) struct TypeSet { + pub lanes: NumSet, + pub ints: NumSet, + pub floats: NumSet, + pub bools: NumSet, + pub refs: NumSet, + pub specials: Vec<SpecialType>, +} + +impl TypeSet { + fn new( + lanes: NumSet, + ints: NumSet, + floats: NumSet, + bools: NumSet, + refs: NumSet, + specials: Vec<SpecialType>, + ) -> Self { + Self { + lanes, + ints, + floats, + bools, + refs, + specials, + } + } + + /// Return the number of concrete types represented by this typeset. + pub fn size(&self) -> usize { + self.lanes.len() + * (self.ints.len() + self.floats.len() + self.bools.len() + self.refs.len()) + + self.specials.len() + } + + /// Return the image of self across the derived function func. + fn image(&self, derived_func: DerivedFunc) -> TypeSet { + match derived_func { + DerivedFunc::LaneOf => self.lane_of(), + DerivedFunc::AsBool => self.as_bool(), + DerivedFunc::HalfWidth => self.half_width(), + DerivedFunc::DoubleWidth => self.double_width(), + DerivedFunc::HalfVector => self.half_vector(), + DerivedFunc::DoubleVector => self.double_vector(), + DerivedFunc::SplitLanes => self.half_width().double_vector(), + DerivedFunc::MergeLanes => self.double_width().half_vector(), + } + } + + /// Return a TypeSet describing the image of self across lane_of. + fn lane_of(&self) -> TypeSet { + let mut copy = self.clone(); + copy.lanes = num_set![1]; + copy + } + + /// Return a TypeSet describing the image of self across as_bool. + fn as_bool(&self) -> TypeSet { + let mut copy = self.clone(); + copy.ints = NumSet::new(); + copy.floats = NumSet::new(); + copy.refs = NumSet::new(); + if !(&self.lanes - &num_set![1]).is_empty() { + copy.bools = &self.ints | &self.floats; + copy.bools = ©.bools | &self.bools; + } + if self.lanes.contains(&1) { + copy.bools.insert(1); + } + copy + } + + /// Return a TypeSet describing the image of self across halfwidth. + fn half_width(&self) -> TypeSet { + let mut copy = self.clone(); + copy.ints = NumSet::from_iter(self.ints.iter().filter(|&&x| x > 8).map(|&x| x / 2)); + copy.floats = NumSet::from_iter(self.floats.iter().filter(|&&x| x > 32).map(|&x| x / 2)); + copy.bools = NumSet::from_iter(self.bools.iter().filter(|&&x| x > 8).map(|&x| x / 2)); + copy.specials = Vec::new(); + copy + } + + /// Return a TypeSet describing the image of self across doublewidth. + fn double_width(&self) -> TypeSet { + let mut copy = self.clone(); + copy.ints = NumSet::from_iter(self.ints.iter().filter(|&&x| x < MAX_BITS).map(|&x| x * 2)); + copy.floats = NumSet::from_iter( + self.floats + .iter() + .filter(|&&x| x < MAX_FLOAT_BITS) + .map(|&x| x * 2), + ); + copy.bools = NumSet::from_iter( + self.bools + .iter() + .filter(|&&x| x < MAX_BITS) + .map(|&x| x * 2) + .filter(|x| legal_bool(*x)), + ); + copy.specials = Vec::new(); + copy + } + + /// Return a TypeSet describing the image of self across halfvector. + fn half_vector(&self) -> TypeSet { + let mut copy = self.clone(); + copy.lanes = NumSet::from_iter(self.lanes.iter().filter(|&&x| x > 1).map(|&x| x / 2)); + copy.specials = Vec::new(); + copy + } + + /// Return a TypeSet describing the image of self across doublevector. + fn double_vector(&self) -> TypeSet { + let mut copy = self.clone(); + copy.lanes = NumSet::from_iter( + self.lanes + .iter() + .filter(|&&x| x < MAX_LANES) + .map(|&x| x * 2), + ); + copy.specials = Vec::new(); + copy + } + + fn concrete_types(&self) -> Vec<ValueType> { + let mut ret = Vec::new(); + for &num_lanes in &self.lanes { + for &bits in &self.ints { + ret.push(LaneType::int_from_bits(bits).by(num_lanes)); + } + for &bits in &self.floats { + ret.push(LaneType::float_from_bits(bits).by(num_lanes)); + } + for &bits in &self.bools { + ret.push(LaneType::bool_from_bits(bits).by(num_lanes)); + } + for &bits in &self.refs { + ret.push(ReferenceType::ref_from_bits(bits).into()); + } + } + for &special in &self.specials { + ret.push(special.into()); + } + ret + } + + /// Return the singleton type represented by self. Can only call on typesets containing 1 type. + fn get_singleton(&self) -> ValueType { + let mut types = self.concrete_types(); + assert_eq!(types.len(), 1); + types.remove(0) + } + + /// Return the inverse image of self across the derived function func. + fn preimage(&self, func: DerivedFunc) -> TypeSet { + if self.size() == 0 { + // The inverse of the empty set is itself. + return self.clone(); + } + + match func { + DerivedFunc::LaneOf => { + let mut copy = self.clone(); + copy.lanes = + NumSet::from_iter((0..=MAX_LANES.trailing_zeros()).map(|i| u16::pow(2, i))); + copy + } + DerivedFunc::AsBool => { + let mut copy = self.clone(); + if self.bools.contains(&1) { + copy.ints = NumSet::from_iter(vec![8, 16, 32, 64, 128]); + copy.floats = NumSet::from_iter(vec![32, 64]); + } else { + copy.ints = &self.bools - &NumSet::from_iter(vec![1]); + copy.floats = &self.bools & &NumSet::from_iter(vec![32, 64]); + // If b1 is not in our typeset, than lanes=1 cannot be in the pre-image, as + // as_bool() of scalars is always b1. + copy.lanes = &self.lanes - &NumSet::from_iter(vec![1]); + } + copy + } + DerivedFunc::HalfWidth => self.double_width(), + DerivedFunc::DoubleWidth => self.half_width(), + DerivedFunc::HalfVector => self.double_vector(), + DerivedFunc::DoubleVector => self.half_vector(), + DerivedFunc::SplitLanes => self.double_width().half_vector(), + DerivedFunc::MergeLanes => self.half_width().double_vector(), + } + } + + pub fn inplace_intersect_with(&mut self, other: &TypeSet) { + self.lanes = &self.lanes & &other.lanes; + self.ints = &self.ints & &other.ints; + self.floats = &self.floats & &other.floats; + self.bools = &self.bools & &other.bools; + self.refs = &self.refs & &other.refs; + + let mut new_specials = Vec::new(); + for spec in &self.specials { + if let Some(spec) = other.specials.iter().find(|&other_spec| other_spec == spec) { + new_specials.push(*spec); + } + } + self.specials = new_specials; + } + + pub fn is_subset(&self, other: &TypeSet) -> bool { + self.lanes.is_subset(&other.lanes) + && self.ints.is_subset(&other.ints) + && self.floats.is_subset(&other.floats) + && self.bools.is_subset(&other.bools) + && self.refs.is_subset(&other.refs) + && { + let specials: HashSet<SpecialType> = HashSet::from_iter(self.specials.clone()); + let other_specials = HashSet::from_iter(other.specials.clone()); + specials.is_subset(&other_specials) + } + } + + pub fn is_wider_or_equal(&self, other: &TypeSet) -> bool { + set_wider_or_equal(&self.ints, &other.ints) + && set_wider_or_equal(&self.floats, &other.floats) + && set_wider_or_equal(&self.bools, &other.bools) + && set_wider_or_equal(&self.refs, &other.refs) + } + + pub fn is_narrower(&self, other: &TypeSet) -> bool { + set_narrower(&self.ints, &other.ints) + && set_narrower(&self.floats, &other.floats) + && set_narrower(&self.bools, &other.bools) + && set_narrower(&self.refs, &other.refs) + } +} + +fn set_wider_or_equal(s1: &NumSet, s2: &NumSet) -> bool { + !s1.is_empty() && !s2.is_empty() && s1.iter().min() >= s2.iter().max() +} + +fn set_narrower(s1: &NumSet, s2: &NumSet) -> bool { + !s1.is_empty() && !s2.is_empty() && s1.iter().min() < s2.iter().max() +} + +impl fmt::Debug for TypeSet { + fn fmt(&self, fmt: &mut fmt::Formatter) -> Result<(), fmt::Error> { + write!(fmt, "TypeSet(")?; + + let mut subsets = Vec::new(); + if !self.lanes.is_empty() { + subsets.push(format!( + "lanes={{{}}}", + Vec::from_iter(self.lanes.iter().map(|x| x.to_string())).join(", ") + )); + } + if !self.ints.is_empty() { + subsets.push(format!( + "ints={{{}}}", + Vec::from_iter(self.ints.iter().map(|x| x.to_string())).join(", ") + )); + } + if !self.floats.is_empty() { + subsets.push(format!( + "floats={{{}}}", + Vec::from_iter(self.floats.iter().map(|x| x.to_string())).join(", ") + )); + } + if !self.bools.is_empty() { + subsets.push(format!( + "bools={{{}}}", + Vec::from_iter(self.bools.iter().map(|x| x.to_string())).join(", ") + )); + } + if !self.refs.is_empty() { + subsets.push(format!( + "refs={{{}}}", + Vec::from_iter(self.refs.iter().map(|x| x.to_string())).join(", ") + )); + } + if !self.specials.is_empty() { + subsets.push(format!( + "specials={{{}}}", + Vec::from_iter(self.specials.iter().map(|x| x.to_string())).join(", ") + )); + } + + write!(fmt, "{})", subsets.join(", "))?; + Ok(()) + } +} + +pub(crate) struct TypeSetBuilder { + ints: Interval, + floats: Interval, + bools: Interval, + refs: Interval, + includes_scalars: bool, + simd_lanes: Interval, + specials: Vec<SpecialType>, +} + +impl TypeSetBuilder { + pub fn new() -> Self { + Self { + ints: Interval::None, + floats: Interval::None, + bools: Interval::None, + refs: Interval::None, + includes_scalars: true, + simd_lanes: Interval::None, + specials: Vec::new(), + } + } + + pub fn ints(mut self, interval: impl Into<Interval>) -> Self { + assert!(self.ints == Interval::None); + self.ints = interval.into(); + self + } + pub fn floats(mut self, interval: impl Into<Interval>) -> Self { + assert!(self.floats == Interval::None); + self.floats = interval.into(); + self + } + pub fn bools(mut self, interval: impl Into<Interval>) -> Self { + assert!(self.bools == Interval::None); + self.bools = interval.into(); + self + } + pub fn refs(mut self, interval: impl Into<Interval>) -> Self { + assert!(self.refs == Interval::None); + self.refs = interval.into(); + self + } + pub fn includes_scalars(mut self, includes_scalars: bool) -> Self { + self.includes_scalars = includes_scalars; + self + } + pub fn simd_lanes(mut self, interval: impl Into<Interval>) -> Self { + assert!(self.simd_lanes == Interval::None); + self.simd_lanes = interval.into(); + self + } + pub fn specials(mut self, specials: Vec<SpecialType>) -> Self { + assert!(self.specials.is_empty()); + self.specials = specials; + self + } + + pub fn build(self) -> TypeSet { + let min_lanes = if self.includes_scalars { 1 } else { 2 }; + + let bools = range_to_set(self.bools.to_range(1..MAX_BITS, None)) + .into_iter() + .filter(|x| legal_bool(*x)) + .collect(); + + TypeSet::new( + range_to_set(self.simd_lanes.to_range(min_lanes..MAX_LANES, Some(1))), + range_to_set(self.ints.to_range(8..MAX_BITS, None)), + range_to_set(self.floats.to_range(32..64, None)), + bools, + range_to_set(self.refs.to_range(32..64, None)), + self.specials, + ) + } + + pub fn all() -> TypeSet { + TypeSetBuilder::new() + .ints(Interval::All) + .floats(Interval::All) + .bools(Interval::All) + .refs(Interval::All) + .simd_lanes(Interval::All) + .specials(ValueType::all_special_types().collect()) + .includes_scalars(true) + .build() + } +} + +#[derive(PartialEq)] +pub(crate) enum Interval { + None, + All, + Range(Range), +} + +impl Interval { + fn to_range(&self, full_range: Range, default: Option<RangeBound>) -> Option<Range> { + match self { + Interval::None => { + if let Some(default_val) = default { + Some(default_val..default_val) + } else { + None + } + } + + Interval::All => Some(full_range), + + Interval::Range(range) => { + let (low, high) = (range.start, range.end); + assert!(low.is_power_of_two()); + assert!(high.is_power_of_two()); + assert!(low <= high); + assert!(low >= full_range.start); + assert!(high <= full_range.end); + Some(low..high) + } + } + } +} + +impl Into<Interval> for Range { + fn into(self) -> Interval { + Interval::Range(self) + } +} + +fn legal_bool(bits: RangeBound) -> bool { + // Only allow legal bit widths for bool types. + bits == 1 || (bits >= 8 && bits <= MAX_BITS && bits.is_power_of_two()) +} + +/// Generates a set with all the powers of two included in the range. +fn range_to_set(range: Option<Range>) -> NumSet { + let mut set = NumSet::new(); + + let (low, high) = match range { + Some(range) => (range.start, range.end), + None => return set, + }; + + assert!(low.is_power_of_two()); + assert!(high.is_power_of_two()); + assert!(low <= high); + + for i in low.trailing_zeros()..=high.trailing_zeros() { + assert!(1 << i <= RangeBound::max_value()); + set.insert(1 << i); + } + set +} + +#[test] +fn test_typevar_builder() { + let type_set = TypeSetBuilder::new().ints(Interval::All).build(); + assert_eq!(type_set.lanes, num_set![1]); + assert!(type_set.floats.is_empty()); + assert_eq!(type_set.ints, num_set![8, 16, 32, 64, 128]); + assert!(type_set.bools.is_empty()); + assert!(type_set.specials.is_empty()); + + let type_set = TypeSetBuilder::new().bools(Interval::All).build(); + assert_eq!(type_set.lanes, num_set![1]); + assert!(type_set.floats.is_empty()); + assert!(type_set.ints.is_empty()); + assert_eq!(type_set.bools, num_set![1, 8, 16, 32, 64, 128]); + assert!(type_set.specials.is_empty()); + + let type_set = TypeSetBuilder::new().floats(Interval::All).build(); + assert_eq!(type_set.lanes, num_set![1]); + assert_eq!(type_set.floats, num_set![32, 64]); + assert!(type_set.ints.is_empty()); + assert!(type_set.bools.is_empty()); + assert!(type_set.specials.is_empty()); + + let type_set = TypeSetBuilder::new() + .floats(Interval::All) + .simd_lanes(Interval::All) + .includes_scalars(false) + .build(); + assert_eq!(type_set.lanes, num_set![2, 4, 8, 16, 32, 64, 128, 256]); + assert_eq!(type_set.floats, num_set![32, 64]); + assert!(type_set.ints.is_empty()); + assert!(type_set.bools.is_empty()); + assert!(type_set.specials.is_empty()); + + let type_set = TypeSetBuilder::new() + .floats(Interval::All) + .simd_lanes(Interval::All) + .includes_scalars(true) + .build(); + assert_eq!(type_set.lanes, num_set![1, 2, 4, 8, 16, 32, 64, 128, 256]); + assert_eq!(type_set.floats, num_set![32, 64]); + assert!(type_set.ints.is_empty()); + assert!(type_set.bools.is_empty()); + assert!(type_set.specials.is_empty()); + + let type_set = TypeSetBuilder::new().ints(16..64).build(); + assert_eq!(type_set.lanes, num_set![1]); + assert_eq!(type_set.ints, num_set![16, 32, 64]); + assert!(type_set.floats.is_empty()); + assert!(type_set.bools.is_empty()); + assert!(type_set.specials.is_empty()); +} + +#[test] +#[should_panic] +fn test_typevar_builder_too_high_bound_panic() { + TypeSetBuilder::new().ints(16..2 * MAX_BITS).build(); +} + +#[test] +#[should_panic] +fn test_typevar_builder_inverted_bounds_panic() { + TypeSetBuilder::new().ints(32..16).build(); +} + +#[test] +fn test_as_bool() { + let a = TypeSetBuilder::new() + .simd_lanes(2..8) + .ints(8..8) + .floats(32..32) + .build(); + assert_eq!( + a.lane_of(), + TypeSetBuilder::new().ints(8..8).floats(32..32).build() + ); + + // Test as_bool with disjoint intervals. + let mut a_as_bool = TypeSetBuilder::new().simd_lanes(2..8).build(); + a_as_bool.bools = num_set![8, 32]; + assert_eq!(a.as_bool(), a_as_bool); + + let b = TypeSetBuilder::new() + .simd_lanes(1..8) + .ints(8..8) + .floats(32..32) + .build(); + let mut b_as_bool = TypeSetBuilder::new().simd_lanes(1..8).build(); + b_as_bool.bools = num_set![1, 8, 32]; + assert_eq!(b.as_bool(), b_as_bool); +} + +#[test] +fn test_forward_images() { + let empty_set = TypeSetBuilder::new().build(); + + // Half vector. + assert_eq!( + TypeSetBuilder::new() + .simd_lanes(1..32) + .build() + .half_vector(), + TypeSetBuilder::new().simd_lanes(1..16).build() + ); + + // Double vector. + assert_eq!( + TypeSetBuilder::new() + .simd_lanes(1..32) + .build() + .double_vector(), + TypeSetBuilder::new().simd_lanes(2..64).build() + ); + assert_eq!( + TypeSetBuilder::new() + .simd_lanes(128..256) + .build() + .double_vector(), + TypeSetBuilder::new().simd_lanes(256..256).build() + ); + + // Half width. + assert_eq!( + TypeSetBuilder::new().ints(8..32).build().half_width(), + TypeSetBuilder::new().ints(8..16).build() + ); + assert_eq!( + TypeSetBuilder::new().floats(32..32).build().half_width(), + empty_set + ); + assert_eq!( + TypeSetBuilder::new().floats(32..64).build().half_width(), + TypeSetBuilder::new().floats(32..32).build() + ); + assert_eq!( + TypeSetBuilder::new().bools(1..8).build().half_width(), + empty_set + ); + assert_eq!( + TypeSetBuilder::new().bools(1..32).build().half_width(), + TypeSetBuilder::new().bools(8..16).build() + ); + + // Double width. + assert_eq!( + TypeSetBuilder::new().ints(8..32).build().double_width(), + TypeSetBuilder::new().ints(16..64).build() + ); + assert_eq!( + TypeSetBuilder::new().ints(32..64).build().double_width(), + TypeSetBuilder::new().ints(64..128).build() + ); + assert_eq!( + TypeSetBuilder::new().floats(32..32).build().double_width(), + TypeSetBuilder::new().floats(64..64).build() + ); + assert_eq!( + TypeSetBuilder::new().floats(32..64).build().double_width(), + TypeSetBuilder::new().floats(64..64).build() + ); + assert_eq!( + TypeSetBuilder::new().bools(1..16).build().double_width(), + TypeSetBuilder::new().bools(16..32).build() + ); + assert_eq!( + TypeSetBuilder::new().bools(32..64).build().double_width(), + TypeSetBuilder::new().bools(64..128).build() + ); +} + +#[test] +fn test_backward_images() { + let empty_set = TypeSetBuilder::new().build(); + + // LaneOf. + assert_eq!( + TypeSetBuilder::new() + .simd_lanes(1..1) + .ints(8..8) + .floats(32..32) + .build() + .preimage(DerivedFunc::LaneOf), + TypeSetBuilder::new() + .simd_lanes(Interval::All) + .ints(8..8) + .floats(32..32) + .build() + ); + assert_eq!(empty_set.preimage(DerivedFunc::LaneOf), empty_set); + + // AsBool. + assert_eq!( + TypeSetBuilder::new() + .simd_lanes(1..4) + .bools(1..128) + .build() + .preimage(DerivedFunc::AsBool), + TypeSetBuilder::new() + .simd_lanes(1..4) + .ints(Interval::All) + .bools(Interval::All) + .floats(Interval::All) + .build() + ); + + // Double vector. + assert_eq!( + TypeSetBuilder::new() + .simd_lanes(1..1) + .ints(8..8) + .build() + .preimage(DerivedFunc::DoubleVector) + .size(), + 0 + ); + assert_eq!( + TypeSetBuilder::new() + .simd_lanes(1..16) + .ints(8..16) + .floats(32..32) + .build() + .preimage(DerivedFunc::DoubleVector), + TypeSetBuilder::new() + .simd_lanes(1..8) + .ints(8..16) + .floats(32..32) + .build(), + ); + + // Half vector. + assert_eq!( + TypeSetBuilder::new() + .simd_lanes(256..256) + .ints(8..8) + .build() + .preimage(DerivedFunc::HalfVector) + .size(), + 0 + ); + assert_eq!( + TypeSetBuilder::new() + .simd_lanes(64..128) + .bools(1..32) + .build() + .preimage(DerivedFunc::HalfVector), + TypeSetBuilder::new() + .simd_lanes(128..256) + .bools(1..32) + .build(), + ); + + // Half width. + assert_eq!( + TypeSetBuilder::new() + .ints(128..128) + .floats(64..64) + .bools(128..128) + .build() + .preimage(DerivedFunc::HalfWidth) + .size(), + 0 + ); + assert_eq!( + TypeSetBuilder::new() + .simd_lanes(64..256) + .bools(1..64) + .build() + .preimage(DerivedFunc::HalfWidth), + TypeSetBuilder::new() + .simd_lanes(64..256) + .bools(16..128) + .build(), + ); + + // Double width. + assert_eq!( + TypeSetBuilder::new() + .ints(8..8) + .floats(32..32) + .bools(1..8) + .build() + .preimage(DerivedFunc::DoubleWidth) + .size(), + 0 + ); + assert_eq!( + TypeSetBuilder::new() + .simd_lanes(1..16) + .ints(8..16) + .floats(32..64) + .build() + .preimage(DerivedFunc::DoubleWidth), + TypeSetBuilder::new() + .simd_lanes(1..16) + .ints(8..8) + .floats(32..32) + .build() + ); +} + +#[test] +#[should_panic] +fn test_typeset_singleton_panic_nonsingleton_types() { + TypeSetBuilder::new() + .ints(8..8) + .floats(32..32) + .build() + .get_singleton(); +} + +#[test] +#[should_panic] +fn test_typeset_singleton_panic_nonsingleton_lanes() { + TypeSetBuilder::new() + .simd_lanes(1..2) + .floats(32..32) + .build() + .get_singleton(); +} + +#[test] +fn test_typeset_singleton() { + use crate::shared::types as shared_types; + assert_eq!( + TypeSetBuilder::new().ints(16..16).build().get_singleton(), + ValueType::Lane(shared_types::Int::I16.into()) + ); + assert_eq!( + TypeSetBuilder::new().floats(64..64).build().get_singleton(), + ValueType::Lane(shared_types::Float::F64.into()) + ); + assert_eq!( + TypeSetBuilder::new().bools(1..1).build().get_singleton(), + ValueType::Lane(shared_types::Bool::B1.into()) + ); + assert_eq!( + TypeSetBuilder::new() + .simd_lanes(4..4) + .ints(32..32) + .build() + .get_singleton(), + LaneType::from(shared_types::Int::I32).by(4) + ); +} + +#[test] +fn test_typevar_functions() { + let x = TypeVar::new( + "x", + "i16 and up", + TypeSetBuilder::new().ints(16..64).build(), + ); + assert_eq!(x.half_width().name, "half_width(x)"); + assert_eq!( + x.half_width().double_width().name, + "double_width(half_width(x))" + ); + + let x = TypeVar::new("x", "up to i32", TypeSetBuilder::new().ints(8..32).build()); + assert_eq!(x.double_width().name, "double_width(x)"); +} + +#[test] +fn test_typevar_singleton() { + use crate::cdsl::types::VectorType; + use crate::shared::types as shared_types; + + // Test i32. + let typevar = TypeVar::new_singleton(ValueType::Lane(LaneType::Int(shared_types::Int::I32))); + assert_eq!(typevar.name, "i32"); + assert_eq!(typevar.type_set.ints, num_set![32]); + assert!(typevar.type_set.floats.is_empty()); + assert!(typevar.type_set.bools.is_empty()); + assert!(typevar.type_set.specials.is_empty()); + assert_eq!(typevar.type_set.lanes, num_set![1]); + + // Test f32x4. + let typevar = TypeVar::new_singleton(ValueType::Vector(VectorType::new( + LaneType::Float(shared_types::Float::F32), + 4, + ))); + assert_eq!(typevar.name, "f32x4"); + assert!(typevar.type_set.ints.is_empty()); + assert_eq!(typevar.type_set.floats, num_set![32]); + assert_eq!(typevar.type_set.lanes, num_set![4]); + assert!(typevar.type_set.bools.is_empty()); + assert!(typevar.type_set.specials.is_empty()); +} diff --git a/third_party/rust/cranelift-codegen-meta/src/cdsl/xform.rs b/third_party/rust/cranelift-codegen-meta/src/cdsl/xform.rs new file mode 100644 index 0000000000..d21e93128d --- /dev/null +++ b/third_party/rust/cranelift-codegen-meta/src/cdsl/xform.rs @@ -0,0 +1,484 @@ +use crate::cdsl::ast::{ + Apply, BlockPool, ConstPool, DefIndex, DefPool, DummyDef, DummyExpr, Expr, PatternPosition, + VarIndex, VarPool, +}; +use crate::cdsl::instructions::Instruction; +use crate::cdsl::type_inference::{infer_transform, TypeEnvironment}; +use crate::cdsl::typevar::TypeVar; + +use cranelift_entity::{entity_impl, PrimaryMap}; + +use std::collections::{HashMap, HashSet}; +use std::iter::FromIterator; + +/// An instruction transformation consists of a source and destination pattern. +/// +/// Patterns are expressed in *register transfer language* as tuples of Def or Expr nodes. A +/// pattern may optionally have a sequence of TypeConstraints, that additionally limit the set of +/// cases when it applies. +/// +/// The source pattern can contain only a single instruction. +pub(crate) struct Transform { + pub src: DefIndex, + pub dst: Vec<DefIndex>, + pub var_pool: VarPool, + pub def_pool: DefPool, + pub block_pool: BlockPool, + pub const_pool: ConstPool, + pub type_env: TypeEnvironment, +} + +type SymbolTable = HashMap<String, VarIndex>; + +impl Transform { + fn new(src: DummyDef, dst: Vec<DummyDef>) -> Self { + let mut var_pool = VarPool::new(); + let mut def_pool = DefPool::new(); + let mut block_pool = BlockPool::new(); + let mut const_pool = ConstPool::new(); + + let mut input_vars: Vec<VarIndex> = Vec::new(); + let mut defined_vars: Vec<VarIndex> = Vec::new(); + + // Maps variable names to our own Var copies. + let mut symbol_table: SymbolTable = SymbolTable::new(); + + // Rewrite variables in src and dst using our own copies. + let src = rewrite_def_list( + PatternPosition::Source, + vec![src], + &mut symbol_table, + &mut input_vars, + &mut defined_vars, + &mut var_pool, + &mut def_pool, + &mut block_pool, + &mut const_pool, + )[0]; + + let num_src_inputs = input_vars.len(); + + let dst = rewrite_def_list( + PatternPosition::Destination, + dst, + &mut symbol_table, + &mut input_vars, + &mut defined_vars, + &mut var_pool, + &mut def_pool, + &mut block_pool, + &mut const_pool, + ); + + // Sanity checks. + for &var_index in &input_vars { + assert!( + var_pool.get(var_index).is_input(), + format!("'{:?}' used as both input and def", var_pool.get(var_index)) + ); + } + assert!( + input_vars.len() == num_src_inputs, + format!( + "extra input vars in dst pattern: {:?}", + input_vars + .iter() + .map(|&i| var_pool.get(i)) + .skip(num_src_inputs) + .collect::<Vec<_>>() + ) + ); + + // Perform type inference and cleanup. + let type_env = infer_transform(src, &dst, &def_pool, &mut var_pool).unwrap(); + + // Sanity check: the set of inferred free type variables should be a subset of the type + // variables corresponding to Vars appearing in the source pattern. + { + let free_typevars: HashSet<TypeVar> = + HashSet::from_iter(type_env.free_typevars(&mut var_pool)); + let src_tvs = HashSet::from_iter( + input_vars + .clone() + .iter() + .chain( + defined_vars + .iter() + .filter(|&&var_index| !var_pool.get(var_index).is_temp()), + ) + .map(|&var_index| var_pool.get(var_index).get_typevar()) + .filter(|maybe_var| maybe_var.is_some()) + .map(|var| var.unwrap()), + ); + if !free_typevars.is_subset(&src_tvs) { + let missing_tvs = (&free_typevars - &src_tvs) + .iter() + .map(|tv| tv.name.clone()) + .collect::<Vec<_>>() + .join(", "); + panic!("Some free vars don't appear in src: {}", missing_tvs); + } + } + + for &var_index in input_vars.iter().chain(defined_vars.iter()) { + let var = var_pool.get_mut(var_index); + let canon_tv = type_env.get_equivalent(&var.get_or_create_typevar()); + var.set_typevar(canon_tv); + } + + Self { + src, + dst, + var_pool, + def_pool, + block_pool, + const_pool, + type_env, + } + } + + fn verify_legalize(&self) { + let def = self.def_pool.get(self.src); + for &var_index in def.defined_vars.iter() { + let defined_var = self.var_pool.get(var_index); + assert!( + defined_var.is_output(), + format!("{:?} not defined in the destination pattern", defined_var) + ); + } + } +} + +/// Inserts, if not present, a name in the `symbol_table`. Then returns its index in the variable +/// pool `var_pool`. If the variable was not present in the symbol table, then add it to the list of +/// `defined_vars`. +fn var_index( + name: &str, + symbol_table: &mut SymbolTable, + defined_vars: &mut Vec<VarIndex>, + var_pool: &mut VarPool, +) -> VarIndex { + let name = name.to_string(); + match symbol_table.get(&name) { + Some(&existing_var) => existing_var, + None => { + // Materialize the variable. + let new_var = var_pool.create(name.clone()); + symbol_table.insert(name, new_var); + defined_vars.push(new_var); + new_var + } + } +} + +/// Given a list of symbols defined in a Def, rewrite them to local symbols. Yield the new locals. +fn rewrite_defined_vars( + position: PatternPosition, + dummy_def: &DummyDef, + def_index: DefIndex, + symbol_table: &mut SymbolTable, + defined_vars: &mut Vec<VarIndex>, + var_pool: &mut VarPool, +) -> Vec<VarIndex> { + let mut new_defined_vars = Vec::new(); + for var in &dummy_def.defined_vars { + let own_var = var_index(&var.name, symbol_table, defined_vars, var_pool); + var_pool.get_mut(own_var).set_def(position, def_index); + new_defined_vars.push(own_var); + } + new_defined_vars +} + +/// Find all uses of variables in `expr` and replace them with our own local symbols. +fn rewrite_expr( + position: PatternPosition, + dummy_expr: DummyExpr, + symbol_table: &mut SymbolTable, + input_vars: &mut Vec<VarIndex>, + var_pool: &mut VarPool, + const_pool: &mut ConstPool, +) -> Apply { + let (apply_target, dummy_args) = if let DummyExpr::Apply(apply_target, dummy_args) = dummy_expr + { + (apply_target, dummy_args) + } else { + panic!("we only rewrite apply expressions"); + }; + + assert_eq!( + apply_target.inst().operands_in.len(), + dummy_args.len(), + "number of arguments in instruction {} is incorrect\nexpected: {:?}", + apply_target.inst().name, + apply_target + .inst() + .operands_in + .iter() + .map(|operand| format!("{}: {}", operand.name, operand.kind.rust_type)) + .collect::<Vec<_>>(), + ); + + let mut args = Vec::new(); + for (i, arg) in dummy_args.into_iter().enumerate() { + match arg { + DummyExpr::Var(var) => { + let own_var = var_index(&var.name, symbol_table, input_vars, var_pool); + let var = var_pool.get(own_var); + assert!( + var.is_input() || var.get_def(position).is_some(), + format!("{:?} used as both input and def", var) + ); + args.push(Expr::Var(own_var)); + } + DummyExpr::Literal(literal) => { + assert!(!apply_target.inst().operands_in[i].is_value()); + args.push(Expr::Literal(literal)); + } + DummyExpr::Constant(constant) => { + let const_name = const_pool.insert(constant.0); + // Here we abuse var_index by passing an empty, immediately-dropped vector to + // `defined_vars`; the reason for this is that unlike the `Var` case above, + // constants will create a variable that is not an input variable (it is tracked + // instead by ConstPool). + let const_var = var_index(&const_name, symbol_table, &mut vec![], var_pool); + args.push(Expr::Var(const_var)); + } + DummyExpr::Apply(..) => { + panic!("Recursive apply is not allowed."); + } + DummyExpr::Block(_block) => { + panic!("Blocks are not valid arguments."); + } + } + } + + Apply::new(apply_target, args) +} + +#[allow(clippy::too_many_arguments)] +fn rewrite_def_list( + position: PatternPosition, + dummy_defs: Vec<DummyDef>, + symbol_table: &mut SymbolTable, + input_vars: &mut Vec<VarIndex>, + defined_vars: &mut Vec<VarIndex>, + var_pool: &mut VarPool, + def_pool: &mut DefPool, + block_pool: &mut BlockPool, + const_pool: &mut ConstPool, +) -> Vec<DefIndex> { + let mut new_defs = Vec::new(); + // Register variable names of new blocks first as a block name can be used to jump forward. Thus + // the name has to be registered first to avoid misinterpreting it as an input-var. + for dummy_def in dummy_defs.iter() { + if let DummyExpr::Block(ref var) = dummy_def.expr { + var_index(&var.name, symbol_table, defined_vars, var_pool); + } + } + + // Iterate over the definitions and blocks, to map variables names to inputs or outputs. + for dummy_def in dummy_defs { + let def_index = def_pool.next_index(); + + let new_defined_vars = rewrite_defined_vars( + position, + &dummy_def, + def_index, + symbol_table, + defined_vars, + var_pool, + ); + if let DummyExpr::Block(var) = dummy_def.expr { + let var_index = *symbol_table + .get(&var.name) + .or_else(|| { + panic!( + "Block {} was not registered during the first visit", + var.name + ) + }) + .unwrap(); + var_pool.get_mut(var_index).set_def(position, def_index); + block_pool.create_block(var_index, def_index); + } else { + let new_apply = rewrite_expr( + position, + dummy_def.expr, + symbol_table, + input_vars, + var_pool, + const_pool, + ); + + assert!( + def_pool.next_index() == def_index, + "shouldn't have created new defs in the meanwhile" + ); + assert_eq!( + new_apply.inst.value_results.len(), + new_defined_vars.len(), + "number of Var results in instruction is incorrect" + ); + + new_defs.push(def_pool.create_inst(new_apply, new_defined_vars)); + } + } + new_defs +} + +/// A group of related transformations. +pub(crate) struct TransformGroup { + pub name: &'static str, + pub doc: &'static str, + pub chain_with: Option<TransformGroupIndex>, + pub isa_name: Option<&'static str>, + pub id: TransformGroupIndex, + + /// Maps Instruction camel_case names to custom legalization functions names. + pub custom_legalizes: HashMap<String, &'static str>, + pub transforms: Vec<Transform>, +} + +impl TransformGroup { + pub fn rust_name(&self) -> String { + match self.isa_name { + Some(_) => { + // This is a function in the same module as the LEGALIZE_ACTIONS table referring to + // it. + self.name.to_string() + } + None => format!("crate::legalizer::{}", self.name), + } + } +} + +#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub(crate) struct TransformGroupIndex(u32); +entity_impl!(TransformGroupIndex); + +pub(crate) struct TransformGroupBuilder { + name: &'static str, + doc: &'static str, + chain_with: Option<TransformGroupIndex>, + isa_name: Option<&'static str>, + pub custom_legalizes: HashMap<String, &'static str>, + pub transforms: Vec<Transform>, +} + +impl TransformGroupBuilder { + pub fn new(name: &'static str, doc: &'static str) -> Self { + Self { + name, + doc, + chain_with: None, + isa_name: None, + custom_legalizes: HashMap::new(), + transforms: Vec::new(), + } + } + + pub fn chain_with(mut self, next_id: TransformGroupIndex) -> Self { + assert!(self.chain_with.is_none()); + self.chain_with = Some(next_id); + self + } + + pub fn isa(mut self, isa_name: &'static str) -> Self { + assert!(self.isa_name.is_none()); + self.isa_name = Some(isa_name); + self + } + + /// Add a custom legalization action for `inst`. + /// + /// The `func_name` parameter is the fully qualified name of a Rust function which takes the + /// same arguments as the `isa::Legalize` actions. + /// + /// The custom function will be called to legalize `inst` and any return value is ignored. + pub fn custom_legalize(&mut self, inst: &Instruction, func_name: &'static str) { + assert!( + self.custom_legalizes + .insert(inst.camel_name.clone(), func_name) + .is_none(), + format!( + "custom legalization action for {} inserted twice", + inst.name + ) + ); + } + + /// Add a legalization pattern to this group. + pub fn legalize(&mut self, src: DummyDef, dst: Vec<DummyDef>) { + let transform = Transform::new(src, dst); + transform.verify_legalize(); + self.transforms.push(transform); + } + + pub fn build_and_add_to(self, owner: &mut TransformGroups) -> TransformGroupIndex { + let next_id = owner.next_key(); + owner.add(TransformGroup { + name: self.name, + doc: self.doc, + isa_name: self.isa_name, + id: next_id, + chain_with: self.chain_with, + custom_legalizes: self.custom_legalizes, + transforms: self.transforms, + }) + } +} + +pub(crate) struct TransformGroups { + groups: PrimaryMap<TransformGroupIndex, TransformGroup>, +} + +impl TransformGroups { + pub fn new() -> Self { + Self { + groups: PrimaryMap::new(), + } + } + pub fn add(&mut self, new_group: TransformGroup) -> TransformGroupIndex { + for group in self.groups.values() { + assert!( + group.name != new_group.name, + format!("trying to insert {} for the second time", new_group.name) + ); + } + self.groups.push(new_group) + } + pub fn get(&self, id: TransformGroupIndex) -> &TransformGroup { + &self.groups[id] + } + fn next_key(&self) -> TransformGroupIndex { + self.groups.next_key() + } + pub fn by_name(&self, name: &'static str) -> &TransformGroup { + for group in self.groups.values() { + if group.name == name { + return group; + } + } + panic!(format!("transform group with name {} not found", name)); + } +} + +#[test] +#[should_panic] +fn test_double_custom_legalization() { + use crate::cdsl::formats::InstructionFormatBuilder; + use crate::cdsl::instructions::{AllInstructions, InstructionBuilder, InstructionGroupBuilder}; + + let nullary = InstructionFormatBuilder::new("nullary").build(); + + let mut dummy_all = AllInstructions::new(); + let mut inst_group = InstructionGroupBuilder::new(&mut dummy_all); + inst_group.push(InstructionBuilder::new("dummy", "doc", &nullary)); + + let inst_group = inst_group.build(); + let dummy_inst = inst_group.by_name("dummy"); + + let mut transform_group = TransformGroupBuilder::new("test", "doc"); + transform_group.custom_legalize(&dummy_inst, "custom 1"); + transform_group.custom_legalize(&dummy_inst, "custom 2"); +} diff --git a/third_party/rust/cranelift-codegen-meta/src/default_map.rs b/third_party/rust/cranelift-codegen-meta/src/default_map.rs new file mode 100644 index 0000000000..3a2be05dac --- /dev/null +++ b/third_party/rust/cranelift-codegen-meta/src/default_map.rs @@ -0,0 +1,20 @@ +//! Trait for extending `HashMap` with `get_or_default`. +use std::collections::HashMap; +use std::hash::Hash; + +pub(crate) trait MapWithDefault<K, V: Default> { + fn get_or_default(&mut self, k: K) -> &mut V; +} + +impl<K: Eq + Hash, V: Default> MapWithDefault<K, V> for HashMap<K, V> { + fn get_or_default(&mut self, k: K) -> &mut V { + self.entry(k).or_insert_with(V::default) + } +} + +#[test] +fn test_default() { + let mut hash_map = HashMap::new(); + hash_map.insert(42, "hello"); + assert_eq!(*hash_map.get_or_default(43), ""); +} diff --git a/third_party/rust/cranelift-codegen-meta/src/error.rs b/third_party/rust/cranelift-codegen-meta/src/error.rs new file mode 100644 index 0000000000..4cbf3d8285 --- /dev/null +++ b/third_party/rust/cranelift-codegen-meta/src/error.rs @@ -0,0 +1,48 @@ +//! Error returned during meta code-generation. +use std::fmt; +use std::io; + +/// An error that occurred when the cranelift_codegen_meta crate was generating +/// source files for the cranelift_codegen crate. +#[derive(Debug)] +pub struct Error { + inner: Box<ErrorInner>, +} + +impl Error { + /// Create a new error object with the given message. + pub fn with_msg<S: Into<String>>(msg: S) -> Error { + Error { + inner: Box::new(ErrorInner::Msg(msg.into())), + } + } +} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.inner) + } +} + +impl From<io::Error> for Error { + fn from(e: io::Error) -> Self { + Error { + inner: Box::new(ErrorInner::IoError(e)), + } + } +} + +#[derive(Debug)] +enum ErrorInner { + Msg(String), + IoError(io::Error), +} + +impl fmt::Display for ErrorInner { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match *self { + ErrorInner::Msg(ref s) => write!(f, "{}", s), + ErrorInner::IoError(ref e) => write!(f, "{}", e), + } + } +} diff --git a/third_party/rust/cranelift-codegen-meta/src/gen_binemit.rs b/third_party/rust/cranelift-codegen-meta/src/gen_binemit.rs new file mode 100644 index 0000000000..f67aa9b5a9 --- /dev/null +++ b/third_party/rust/cranelift-codegen-meta/src/gen_binemit.rs @@ -0,0 +1,224 @@ +//! Generate binary emission code for each ISA. + +use cranelift_entity::EntityRef; + +use crate::error; +use crate::srcgen::Formatter; + +use crate::cdsl::recipes::{EncodingRecipe, OperandConstraint, Recipes}; + +/// Generate code to handle a single recipe. +/// +/// - Unpack the instruction data, knowing the format. +/// - Determine register locations for operands with register constraints. +/// - Determine stack slot locations for operands with stack constraints. +/// - Call hand-written code for the actual emission. +fn gen_recipe(recipe: &EncodingRecipe, fmt: &mut Formatter) { + let inst_format = &recipe.format; + let num_value_ops = inst_format.num_value_operands; + + // TODO: Set want_args to true for only MultiAry instructions instead of all formats with value + // list. + let want_args = inst_format.has_value_list + || recipe.operands_in.iter().any(|c| match c { + OperandConstraint::RegClass(_) | OperandConstraint::Stack(_) => true, + OperandConstraint::FixedReg(_) | OperandConstraint::TiedInput(_) => false, + }); + assert!(!want_args || num_value_ops > 0 || inst_format.has_value_list); + + let want_outs = recipe.operands_out.iter().any(|c| match c { + OperandConstraint::RegClass(_) | OperandConstraint::Stack(_) => true, + OperandConstraint::FixedReg(_) | OperandConstraint::TiedInput(_) => false, + }); + + let is_regmove = ["RegMove", "RegSpill", "RegFill"].contains(&inst_format.name); + + // Unpack the instruction data. + fmtln!(fmt, "if let InstructionData::{} {{", inst_format.name); + fmt.indent(|fmt| { + fmt.line("opcode,"); + for f in &inst_format.imm_fields { + fmtln!(fmt, "{},", f.member); + } + if want_args { + if inst_format.has_value_list || num_value_ops > 1 { + fmt.line("ref args,"); + } else { + fmt.line("arg,"); + } + } + fmt.line(".."); + + fmt.outdented_line("} = *inst_data {"); + + // Pass recipe arguments in this order: inputs, imm_fields, outputs. + let mut args = String::new(); + + if want_args && !is_regmove { + if inst_format.has_value_list { + fmt.line("let args = args.as_slice(&func.dfg.value_lists);"); + } else if num_value_ops == 1 { + fmt.line("let args = [arg];"); + } + args += &unwrap_values(&recipe.operands_in, "in", "args", fmt); + } + + for f in &inst_format.imm_fields { + args += &format!(", {}", f.member); + } + + // Unwrap interesting output arguments. + if want_outs { + if recipe.operands_out.len() == 1 { + fmt.line("let results = [func.dfg.first_result(inst)];") + } else { + fmt.line("let results = func.dfg.inst_results(inst);"); + } + args += &unwrap_values(&recipe.operands_out, "out", "results", fmt); + } + + // Optimization: Only update the register diversion tracker for regmove instructions. + if is_regmove { + fmt.line("divert.apply(inst_data);") + } + + match &recipe.emit { + Some(emit) => { + fmt.multi_line(emit); + fmt.line("return;"); + } + None => { + fmtln!( + fmt, + "return recipe_{}(func, inst, sink, bits{});", + recipe.name.to_lowercase(), + args + ); + } + } + }); + fmt.line("}"); +} + +/// Emit code that unwraps values living in registers or stack slots. +/// +/// :param args: Input or output constraints. +/// :param prefix: Prefix to be used for the generated local variables. +/// :param values: Name of slice containing the values to be unwrapped. +/// :returns: Comma separated list of the generated variables +fn unwrap_values( + args: &[OperandConstraint], + prefix: &str, + values_slice: &str, + fmt: &mut Formatter, +) -> String { + let mut varlist = String::new(); + for (i, cst) in args.iter().enumerate() { + match cst { + OperandConstraint::RegClass(_reg_class) => { + let v = format!("{}_reg{}", prefix, i); + varlist += &format!(", {}", v); + fmtln!( + fmt, + "let {} = divert.reg({}[{}], &func.locations);", + v, + values_slice, + i + ); + } + OperandConstraint::Stack(stack) => { + let v = format!("{}_stk{}", prefix, i); + varlist += &format!(", {}", v); + fmtln!(fmt, "let {} = StackRef::masked(", v); + fmt.indent(|fmt| { + fmtln!( + fmt, + "divert.stack({}[{}], &func.locations),", + values_slice, + i + ); + fmt.line(format!("{},", stack.stack_base_mask())); + fmt.line("&func.stack_slots,"); + }); + fmt.line(").unwrap();"); + } + _ => {} + } + } + varlist +} + +fn gen_isa(isa_name: &str, recipes: &Recipes, fmt: &mut Formatter) { + fmt.doc_comment(format!( + "Emit binary machine code for `inst` for the {} ISA.", + isa_name + )); + + if recipes.is_empty() { + fmt.line("pub fn emit_inst<CS: CodeSink + ?Sized>("); + fmt.indent(|fmt| { + fmt.line("func: &Function,"); + fmt.line("inst: Inst,"); + fmt.line("_divert: &mut RegDiversions,"); + fmt.line("_sink: &mut CS,"); + fmt.line("_isa: &dyn TargetIsa,"); + }); + fmt.line(") {"); + fmt.indent(|fmt| { + // No encoding recipes: Emit a stub. + fmt.line("bad_encoding(func, inst)"); + }); + fmt.line("}"); + return; + } + + fmt.line("#[allow(unused_variables, unreachable_code)]"); + fmt.line("pub fn emit_inst<CS: CodeSink + ?Sized>("); + fmt.indent(|fmt| { + fmt.line("func: &Function,"); + fmt.line("inst: Inst,"); + fmt.line("divert: &mut RegDiversions,"); + fmt.line("sink: &mut CS,"); + fmt.line("isa: &dyn TargetIsa,") + }); + + fmt.line(") {"); + fmt.indent(|fmt| { + fmt.line("let encoding = func.encodings[inst];"); + fmt.line("let bits = encoding.bits();"); + fmt.line("let inst_data = &func.dfg[inst];"); + fmt.line("match encoding.recipe() {"); + fmt.indent(|fmt| { + for (i, recipe) in recipes.iter() { + fmt.comment(format!("Recipe {}", recipe.name)); + fmtln!(fmt, "{} => {{", i.index()); + fmt.indent(|fmt| { + gen_recipe(recipe, fmt); + }); + fmt.line("}"); + } + fmt.line("_ => {},"); + }); + fmt.line("}"); + + // Allow for unencoded ghost instructions. The verifier will check details. + fmt.line("if encoding.is_legal() {"); + fmt.indent(|fmt| { + fmt.line("bad_encoding(func, inst);"); + }); + fmt.line("}"); + }); + fmt.line("}"); +} + +pub(crate) fn generate( + isa_name: &str, + recipes: &Recipes, + binemit_filename: &str, + out_dir: &str, +) -> Result<(), error::Error> { + let mut fmt = Formatter::new(); + gen_isa(isa_name, recipes, &mut fmt); + fmt.update_file(binemit_filename, out_dir)?; + Ok(()) +} diff --git a/third_party/rust/cranelift-codegen-meta/src/gen_encodings.rs b/third_party/rust/cranelift-codegen-meta/src/gen_encodings.rs new file mode 100644 index 0000000000..d79dc66340 --- /dev/null +++ b/third_party/rust/cranelift-codegen-meta/src/gen_encodings.rs @@ -0,0 +1,1139 @@ +//! Generate sources for instruction encoding. +//! +//! The tables and functions generated here support the `TargetISA::encode()` function which +//! determines if a given instruction is legal, and if so, its `Encoding` data which consists of a +//! *recipe* and some *encoding* bits. +//! +//! The `encode` function doesn't actually generate the binary machine bits. Each recipe has a +//! corresponding hand-written function to do that after registers are allocated. +//! +//! This is the information available to us: +//! +//! - The instruction to be encoded as an `InstructionData` reference. +//! - The controlling type variable. +//! - The data-flow graph giving us access to the types of all values involved. This is needed for +//! testing any secondary type variables. +//! - A `PredicateView` reference for the ISA-specific settings for evaluating ISA predicates. +//! - The currently active CPU mode is determined by the ISA. +//! +//! ## Level 1 table lookup +//! +//! The CPU mode provides the first table. The key is the instruction's controlling type variable. +//! If the instruction is not polymorphic, use `INVALID` for the type variable. The table values +//! are level 2 tables. +//! +//! ## Level 2 table lookup +//! +//! The level 2 table is keyed by the instruction's opcode. The table values are *encoding lists*. +//! +//! The two-level table lookup allows the level 2 tables to be much smaller with good locality. +//! Code in any given function usually only uses a few different types, so many of the level 2 +//! tables will be cold. +//! +//! ## Encoding lists +//! +//! An encoding list is a non-empty sequence of list entries. Each entry has one of these forms: +//! +//! 1. Recipe + bits. Use this encoding if the recipe predicate is satisfied. +//! 2. Recipe + bits, final entry. Use this encoding if the recipe predicate is satisfied. +//! Otherwise, stop with the default legalization code. +//! 3. Stop with legalization code. +//! 4. Predicate + skip count. Test predicate and skip N entries if it is false. +//! 5. Predicate + stop. Test predicate and stop with the default legalization code if it is false. +//! +//! The instruction predicate is also used to distinguish between polymorphic instructions with +//! different types for secondary type variables. + +use std::collections::btree_map; +use std::collections::{BTreeMap, HashMap, HashSet}; +use std::convert::TryFrom; +use std::iter::FromIterator; + +use cranelift_codegen_shared::constant_hash::generate_table; +use cranelift_entity::EntityRef; + +use crate::error; +use crate::srcgen::Formatter; + +use crate::cdsl::cpu_modes::CpuMode; +use crate::cdsl::encodings::Encoding; +use crate::cdsl::instructions::{Instruction, InstructionPredicate, InstructionPredicateNumber}; +use crate::cdsl::isa::TargetIsa; +use crate::cdsl::recipes::{EncodingRecipe, OperandConstraint, Recipes, Register}; +use crate::cdsl::regs::IsaRegs; +use crate::cdsl::settings::SettingPredicateNumber; +use crate::cdsl::types::ValueType; +use crate::cdsl::xform::TransformGroupIndex; + +use crate::shared::Definitions as SharedDefinitions; + +use crate::default_map::MapWithDefault; +use crate::unique_table::UniqueSeqTable; + +/// Emit code for matching an instruction predicate against an `InstructionData` reference called +/// `inst`. +/// +/// The generated code is an `if let` pattern match that falls through if the instruction has an +/// unexpected format. This should lead to a panic. +fn emit_instp(instp: &InstructionPredicate, has_func: bool, fmt: &mut Formatter) { + if let Some(type_predicate) = instp.type_predicate("func") { + fmt.line("let args = inst.arguments(&func.dfg.value_lists);"); + fmt.line(type_predicate); + return; + } + + let leaves = instp.collect_leaves(); + + let mut has_type_check = false; + let mut format_name = None; + let mut field_names = HashSet::new(); + + for leaf in leaves { + if leaf.is_type_predicate() { + has_type_check = true; + } else { + field_names.insert(leaf.format_destructuring_member_name()); + let leaf_format_name = leaf.format_name(); + match format_name { + None => format_name = Some(leaf_format_name), + Some(previous_format_name) => { + assert!( + previous_format_name == leaf_format_name, + format!("Format predicate can only operate on a single InstructionFormat; trying to use both {} and {}", previous_format_name, leaf_format_name + )); + } + } + } + } + + let mut fields = Vec::from_iter(field_names); + fields.sort(); + let fields = fields.join(", "); + + let format_name = format_name.expect("There should be a format name!"); + + fmtln!( + fmt, + "if let crate::ir::InstructionData::{} {{ {}, .. }} = *inst {{", + format_name, + fields + ); + fmt.indent(|fmt| { + if has_type_check { + // We could implement this. + assert!(has_func, "recipe predicates can't check type variables."); + fmt.line("let args = inst.arguments(&func.dfg.value_lists);"); + } else if has_func { + // Silence dead argument. + fmt.line("let _ = func;"); + } + fmtln!(fmt, "return {};", instp.rust_predicate("func").unwrap()); + }); + fmtln!(fmt, "}"); + + fmt.line("unreachable!();"); +} + +/// Emit private functions for checking recipe predicates as well as a static `RECIPE_PREDICATES` +/// array indexed by recipe number. +/// +/// A recipe predicate is a combination of an ISA predicate and an instruction predicate. Many +/// recipes have identical predicates. +fn emit_recipe_predicates(isa: &TargetIsa, fmt: &mut Formatter) { + let mut predicate_names = HashMap::new(); + + fmt.comment(format!("{} recipe predicates.", isa.name)); + for recipe in isa.recipes.values() { + let (isap, instp) = match (&recipe.isa_predicate, &recipe.inst_predicate) { + (None, None) => continue, + (isap, instp) if predicate_names.contains_key(&(isap, instp)) => continue, + (isap, instp) => (isap, instp), + }; + + let func_name = format!("recipe_predicate_{}", recipe.name.to_lowercase()); + predicate_names.insert((isap, instp), func_name.clone()); + + // Generate the predicate function. + fmtln!( + fmt, + "fn {}({}: crate::settings::PredicateView, {}: &ir::InstructionData) -> bool {{", + func_name, + if isap.is_some() { "isap" } else { "_" }, + if instp.is_some() { "inst" } else { "_" } + ); + fmt.indent(|fmt| { + match (isap, instp) { + (Some(isap), None) => { + fmtln!(fmt, "isap.test({})", isap); + } + (None, Some(instp)) => { + emit_instp(instp, /* has func */ false, fmt); + } + (Some(isap), Some(instp)) => { + fmtln!(fmt, "isap.test({}) &&", isap); + emit_instp(instp, /* has func */ false, fmt); + } + _ => panic!("skipped above"), + } + }); + fmtln!(fmt, "}"); + } + fmt.empty_line(); + + // Generate the static table. + fmt.doc_comment(format!( + r#"{} recipe predicate table. + + One entry per recipe, set to Some only when the recipe is guarded by a predicate."#, + isa.name + )); + fmtln!( + fmt, + "pub static RECIPE_PREDICATES: [RecipePredicate; {}] = [", + isa.recipes.len() + ); + fmt.indent(|fmt| { + for recipe in isa.recipes.values() { + match (&recipe.isa_predicate, &recipe.inst_predicate) { + (None, None) => fmt.line("None,"), + key => fmtln!(fmt, "Some({}),", predicate_names.get(&key).unwrap()), + } + } + }); + fmtln!(fmt, "];"); + fmt.empty_line(); +} + +/// Emit private functions for matching instruction predicates as well as a static +/// `INST_PREDICATES` array indexed by predicate number. +fn emit_inst_predicates(isa: &TargetIsa, fmt: &mut Formatter) { + fmt.comment(format!("{} instruction predicates.", isa.name)); + for (id, instp) in isa.encodings_predicates.iter() { + fmtln!(fmt, "fn inst_predicate_{}(func: &crate::ir::Function, inst: &crate::ir::InstructionData) -> bool {{", id.index()); + fmt.indent(|fmt| { + emit_instp(instp, /* has func */ true, fmt); + }); + fmtln!(fmt, "}"); + } + fmt.empty_line(); + + // Generate the static table. + fmt.doc_comment(format!( + r#"{} instruction predicate table. + + One entry per instruction predicate, so the encoding bytecode can embed indexes into this + table."#, + isa.name + )); + fmtln!( + fmt, + "pub static INST_PREDICATES: [InstPredicate; {}] = [", + isa.encodings_predicates.len() + ); + fmt.indent(|fmt| { + for id in isa.encodings_predicates.keys() { + fmtln!(fmt, "inst_predicate_{},", id.index()); + } + }); + fmtln!(fmt, "];"); + fmt.empty_line(); +} + +/// Emit a table of encoding recipe names keyed by recipe number. +/// +/// This is used for pretty-printing encodings. +fn emit_recipe_names(isa: &TargetIsa, fmt: &mut Formatter) { + fmt.doc_comment(format!( + r#"{} recipe names, using the same recipe index spaces as the one specified by the + corresponding binemit file."#, + isa.name + )); + fmtln!( + fmt, + "static RECIPE_NAMES: [&str; {}] = [", + isa.recipes.len() + ); + fmt.indent(|fmt| { + for recipe in isa.recipes.values() { + fmtln!(fmt, r#""{}","#, recipe.name); + } + }); + fmtln!(fmt, "];"); + fmt.empty_line(); +} + +/// Returns a set of all the registers involved in fixed register constraints. +fn get_fixed_registers(operands_in: &[OperandConstraint]) -> HashSet<Register> { + HashSet::from_iter( + operands_in + .iter() + .map(|constraint| { + if let OperandConstraint::FixedReg(reg) = &constraint { + Some(*reg) + } else { + None + } + }) + .filter(|opt| opt.is_some()) + .map(|opt| opt.unwrap()), + ) +} + +/// Emit a struct field initializer for an array of operand constraints. +/// +/// Note "fixed_registers" must refer to the other kind of operands (i.e. if we're operating on +/// inputs, fixed_registers must contain the fixed output registers). +fn emit_operand_constraints( + registers: &IsaRegs, + recipe: &EncodingRecipe, + constraints: &[OperandConstraint], + field_name: &'static str, + tied_operands: &HashMap<usize, usize>, + fixed_registers: &HashSet<Register>, + fmt: &mut Formatter, +) { + if constraints.is_empty() { + fmtln!(fmt, "{}: &[],", field_name); + return; + } + + fmtln!(fmt, "{}: &[", field_name); + fmt.indent(|fmt| { + for (n, constraint) in constraints.iter().enumerate() { + fmt.line("OperandConstraint {"); + fmt.indent(|fmt| { + match constraint { + OperandConstraint::RegClass(reg_class) => { + if let Some(tied_input) = tied_operands.get(&n) { + fmtln!(fmt, "kind: ConstraintKind::Tied({}),", tied_input); + } else { + fmt.line("kind: ConstraintKind::Reg,"); + } + fmtln!( + fmt, + "regclass: &{}_DATA,", + registers.classes[*reg_class].name + ); + } + OperandConstraint::FixedReg(reg) => { + assert!(!tied_operands.contains_key(&n), "can't tie fixed registers"); + let constraint_kind = if fixed_registers.contains(®) { + "FixedTied" + } else { + "FixedReg" + }; + fmtln!( + fmt, + "kind: ConstraintKind::{}({}),", + constraint_kind, + reg.unit + ); + fmtln!( + fmt, + "regclass: &{}_DATA,", + registers.classes[reg.regclass].name + ); + } + OperandConstraint::TiedInput(tied_input) => { + // This is a tied output constraint. It should never happen + // for input constraints. + assert!( + tied_input == tied_operands.get(&n).unwrap(), + "invalid tied constraint" + ); + fmtln!(fmt, "kind: ConstraintKind::Tied({}),", tied_input); + + let tied_class = if let OperandConstraint::RegClass(tied_class) = + recipe.operands_in[*tied_input] + { + tied_class + } else { + panic!("tied constraints relate only to register inputs"); + }; + + fmtln!( + fmt, + "regclass: &{}_DATA,", + registers.classes[tied_class].name + ); + } + OperandConstraint::Stack(stack) => { + assert!(!tied_operands.contains_key(&n), "can't tie stack operand"); + fmt.line("kind: ConstraintKind::Stack,"); + fmtln!( + fmt, + "regclass: &{}_DATA,", + registers.classes[stack.regclass].name + ); + } + } + }); + fmt.line("},"); + } + }); + fmtln!(fmt, "],"); +} + +/// Emit a table of encoding recipe operand constraints keyed by recipe number. +/// +/// These are used by the register allocator to pick registers that can be properly encoded. +fn emit_recipe_constraints(isa: &TargetIsa, fmt: &mut Formatter) { + fmt.doc_comment(format!( + r#"{} recipe constraints list, using the same recipe index spaces as the one + specified by the corresponding binemit file. These constraints are used by register + allocation to select the right location to use for input and output values."#, + isa.name + )); + fmtln!( + fmt, + "static RECIPE_CONSTRAINTS: [RecipeConstraints; {}] = [", + isa.recipes.len() + ); + fmt.indent(|fmt| { + for recipe in isa.recipes.values() { + // Compute a mapping of tied operands in both directions (input tied to outputs and + // conversely). + let mut tied_in_to_out = HashMap::new(); + let mut tied_out_to_in = HashMap::new(); + for (out_index, constraint) in recipe.operands_out.iter().enumerate() { + if let OperandConstraint::TiedInput(in_index) = &constraint { + tied_in_to_out.insert(*in_index, out_index); + tied_out_to_in.insert(out_index, *in_index); + } + } + + // Find the sets of registers involved in fixed register constraints. + let fixed_inputs = get_fixed_registers(&recipe.operands_in); + let fixed_outputs = get_fixed_registers(&recipe.operands_out); + + fmt.comment(format!("Constraints for recipe {}:", recipe.name)); + fmt.line("RecipeConstraints {"); + fmt.indent(|fmt| { + emit_operand_constraints( + &isa.regs, + recipe, + &recipe.operands_in, + "ins", + &tied_in_to_out, + &fixed_outputs, + fmt, + ); + emit_operand_constraints( + &isa.regs, + recipe, + &recipe.operands_out, + "outs", + &tied_out_to_in, + &fixed_inputs, + fmt, + ); + fmtln!( + fmt, + "fixed_ins: {},", + if !fixed_inputs.is_empty() { + "true" + } else { + "false" + } + ); + fmtln!( + fmt, + "fixed_outs: {},", + if !fixed_outputs.is_empty() { + "true" + } else { + "false" + } + ); + fmtln!( + fmt, + "tied_ops: {},", + if !tied_in_to_out.is_empty() { + "true" + } else { + "false" + } + ); + fmtln!( + fmt, + "clobbers_flags: {},", + if recipe.clobbers_flags { + "true" + } else { + "false" + } + ); + }); + fmt.line("},"); + } + }); + fmtln!(fmt, "];"); + fmt.empty_line(); +} + +/// Emit a table of encoding recipe code size information. +fn emit_recipe_sizing(isa: &TargetIsa, fmt: &mut Formatter) { + fmt.doc_comment(format!( + r#"{} recipe sizing descriptors, using the same recipe index spaces as the one + specified by the corresponding binemit file. These are used to compute the final size of an + instruction, as well as to compute the range of branches."#, + isa.name + )); + fmtln!( + fmt, + "static RECIPE_SIZING: [RecipeSizing; {}] = [", + isa.recipes.len() + ); + fmt.indent(|fmt| { + for recipe in isa.recipes.values() { + fmt.comment(format!("Code size information for recipe {}:", recipe.name)); + fmt.line("RecipeSizing {"); + fmt.indent(|fmt| { + fmtln!(fmt, "base_size: {},", recipe.base_size); + fmtln!(fmt, "compute_size: {},", recipe.compute_size); + if let Some(range) = &recipe.branch_range { + fmtln!( + fmt, + "branch_range: Some(BranchRange {{ origin: {}, bits: {} }}),", + range.inst_size, + range.range + ); + } else { + fmt.line("branch_range: None,"); + } + }); + fmt.line("},"); + } + }); + fmtln!(fmt, "];"); + fmt.empty_line(); +} + +/// Level 1 table mapping types to `Level2` objects. +struct Level1Table<'cpu_mode> { + cpu_mode: &'cpu_mode CpuMode, + legalize_code: TransformGroupIndex, + + table_map: HashMap<Option<ValueType>, usize>, + table_vec: Vec<Level2Table>, +} + +impl<'cpu_mode> Level1Table<'cpu_mode> { + fn new(cpu_mode: &'cpu_mode CpuMode) -> Self { + Self { + cpu_mode, + legalize_code: cpu_mode.get_default_legalize_code(), + table_map: HashMap::new(), + table_vec: Vec::new(), + } + } + + /// Returns the level2 table for the given type; None means monomorphic, in this context. + fn l2table_for(&mut self, typ: Option<ValueType>) -> &mut Level2Table { + let cpu_mode = &self.cpu_mode; + let index = match self.table_map.get(&typ) { + Some(&index) => index, + None => { + let legalize_code = cpu_mode.get_legalize_code_for(&typ); + let table = Level2Table::new(typ.clone(), legalize_code); + let index = self.table_vec.len(); + self.table_map.insert(typ, index); + self.table_vec.push(table); + index + } + }; + self.table_vec.get_mut(index).unwrap() + } + + fn l2tables(&mut self) -> Vec<&mut Level2Table> { + self.table_vec + .iter_mut() + .filter(|table| !table.is_empty()) + .collect::<Vec<_>>() + } +} + +struct Level2HashTableEntry { + inst_name: String, + offset: usize, +} + +/// Level 2 table mapping instruction opcodes to `EncList` objects. +/// +/// A level 2 table can be completely empty if it only holds a custom legalization action for `ty`. +struct Level2Table { + typ: Option<ValueType>, + legalize_code: TransformGroupIndex, + inst_to_encodings: BTreeMap<String, EncodingList>, + hash_table_offset: Option<usize>, + hash_table_len: Option<usize>, +} + +impl Level2Table { + fn new(typ: Option<ValueType>, legalize_code: TransformGroupIndex) -> Self { + Self { + typ, + legalize_code, + inst_to_encodings: BTreeMap::new(), + hash_table_offset: None, + hash_table_len: None, + } + } + + fn enclist_for(&mut self, inst: &Instruction) -> &mut EncodingList { + let copied_typ = self.typ.clone(); + self.inst_to_encodings + .entry(inst.name.clone()) + .or_insert_with(|| EncodingList::new(inst, copied_typ)) + } + + fn enclists(&mut self) -> btree_map::ValuesMut<'_, String, EncodingList> { + self.inst_to_encodings.values_mut() + } + + fn is_empty(&self) -> bool { + self.inst_to_encodings.is_empty() + } + + fn layout_hashtable( + &mut self, + level2_hashtables: &mut Vec<Option<Level2HashTableEntry>>, + level2_doc: &mut HashMap<usize, Vec<String>>, + ) { + let hash_table = generate_table( + self.inst_to_encodings.values(), + self.inst_to_encodings.len(), + // TODO the Python code wanted opcode numbers to start from 1. + |enc_list| enc_list.inst.opcode_number.index() + 1, + ); + + let hash_table_offset = level2_hashtables.len(); + let hash_table_len = hash_table.len(); + + assert!(self.hash_table_offset.is_none()); + assert!(self.hash_table_len.is_none()); + self.hash_table_offset = Some(hash_table_offset); + self.hash_table_len = Some(hash_table_len); + + level2_hashtables.extend(hash_table.iter().map(|opt_enc_list| { + opt_enc_list.map(|enc_list| Level2HashTableEntry { + inst_name: enc_list.inst.camel_name.clone(), + offset: enc_list.offset.unwrap(), + }) + })); + + let typ_comment = match &self.typ { + Some(ty) => ty.to_string(), + None => "typeless".into(), + }; + + level2_doc.get_or_default(hash_table_offset).push(format!( + "{:06x}: {}, {} entries", + hash_table_offset, typ_comment, hash_table_len + )); + } +} + +/// The u16 values in an encoding list entry are interpreted as follows: +/// +/// NR = len(all_recipes) +/// +/// entry < 2*NR +/// Try Encoding(entry/2, next_entry) if the recipe predicate is satisfied. +/// If bit 0 is set, stop with the default legalization code. +/// If bit 0 is clear, keep going down the list. +/// entry < PRED_START +/// Stop with legalization code `entry - 2*NR`. +/// +/// Remaining entries are interpreted as (skip, pred) pairs, where: +/// +/// skip = (entry - PRED_START) >> PRED_BITS +/// pred = (entry - PRED_START) & PRED_MASK +/// +/// If the predicate is satisfied, keep going. Otherwise skip over the next +/// `skip` entries. If skip == 0, stop with the default legalization code. +/// +/// The `pred` predicate number is interpreted as an instruction predicate if it +/// is in range, otherwise an ISA predicate. + +/// Encoding lists are represented as u16 arrays. +const CODE_BITS: usize = 16; + +/// Beginning of the predicate code words. +const PRED_START: u16 = 0x1000; + +/// Number of bits used to hold a predicate number (instruction + ISA predicates). +const PRED_BITS: usize = 12; + +/// Mask for extracting the predicate number. +const PRED_MASK: usize = (1 << PRED_BITS) - 1; + +/// Encoder for the list format above. +struct Encoder { + num_instruction_predicates: usize, + + /// u16 encoding list words. + words: Vec<u16>, + + /// Documentation comments: Index into `words` + comment. + docs: Vec<(usize, String)>, +} + +impl Encoder { + fn new(num_instruction_predicates: usize) -> Self { + Self { + num_instruction_predicates, + words: Vec::new(), + docs: Vec::new(), + } + } + + /// Add a recipe+bits entry to the list. + fn recipe(&mut self, recipes: &Recipes, enc: &Encoding, is_final: bool) { + let code = (2 * enc.recipe.index() + if is_final { 1 } else { 0 }) as u16; + assert!(code < PRED_START); + + let doc = format!( + "--> {}{}", + enc.to_rust_comment(recipes), + if is_final { " and stop" } else { "" } + ); + self.docs.push((self.words.len(), doc)); + + self.words.push(code); + self.words.push(enc.encbits); + } + + /// Add a predicate entry. + fn pred(&mut self, pred_comment: String, skip: usize, n: usize) { + assert!(n <= PRED_MASK); + let entry = (PRED_START as usize) + (n | (skip << PRED_BITS)); + assert!(entry < (1 << CODE_BITS)); + let entry = entry as u16; + + let doc = if skip == 0 { + "stop".to_string() + } else { + format!("skip {}", skip) + }; + let doc = format!("{} unless {}", doc, pred_comment); + + self.docs.push((self.words.len(), doc)); + self.words.push(entry); + } + + /// Add an instruction predicate entry. + fn inst_predicate(&mut self, pred: InstructionPredicateNumber, skip: usize) { + let number = pred.index(); + let pred_comment = format!("inst_predicate_{}", number); + self.pred(pred_comment, skip, number); + } + + /// Add an ISA predicate entry. + fn isa_predicate(&mut self, pred: SettingPredicateNumber, skip: usize) { + // ISA predicates follow the instruction predicates. + let n = self.num_instruction_predicates + (pred as usize); + let pred_comment = format!("PredicateView({})", pred); + self.pred(pred_comment, skip, n); + } +} + +/// List of instructions for encoding a given type + opcode pair. +/// +/// An encoding list contains a sequence of predicates and encoding recipes, all encoded as u16 +/// values. +struct EncodingList { + inst: Instruction, + typ: Option<ValueType>, + encodings: Vec<Encoding>, + offset: Option<usize>, +} + +impl EncodingList { + fn new(inst: &Instruction, typ: Option<ValueType>) -> Self { + Self { + inst: inst.clone(), + typ, + encodings: Default::default(), + offset: None, + } + } + + /// Encode this list as a sequence of u16 numbers. + /// + /// Adds the sequence to `enc_lists` and records the returned offset as + /// `self.offset`. + /// + /// Adds comment lines to `enc_lists_doc` keyed by enc_lists offsets. + fn encode( + &mut self, + isa: &TargetIsa, + cpu_mode: &CpuMode, + enc_lists: &mut UniqueSeqTable<u16>, + enc_lists_doc: &mut HashMap<usize, Vec<String>>, + ) { + assert!(!self.encodings.is_empty()); + + let mut encoder = Encoder::new(isa.encodings_predicates.len()); + + let mut index = 0; + while index < self.encodings.len() { + let encoding = &self.encodings[index]; + + // Try to see how many encodings are following and have the same ISA predicate and + // instruction predicate, so as to reduce the number of tests carried out by the + // encoding list interpreter.. + // + // Encodings with similar tests are hereby called a group. The group includes the + // current encoding we're looking at. + let (isa_predicate, inst_predicate) = + (&encoding.isa_predicate, &encoding.inst_predicate); + + let group_size = { + let mut group_size = 1; + while index + group_size < self.encodings.len() { + let next_encoding = &self.encodings[index + group_size]; + if &next_encoding.inst_predicate != inst_predicate + || &next_encoding.isa_predicate != isa_predicate + { + break; + } + group_size += 1; + } + group_size + }; + + let is_last_group = index + group_size == self.encodings.len(); + + // The number of entries to skip when a predicate isn't satisfied is the size of both + // predicates + the size of the group, minus one (for this predicate). Each recipe + // entry has a size of two u16 (recipe index + bits). + let mut skip = if is_last_group { + 0 + } else { + let isap_size = match isa_predicate { + Some(_) => 1, + None => 0, + }; + let instp_size = match inst_predicate { + Some(_) => 1, + None => 0, + }; + isap_size + instp_size + group_size * 2 - 1 + }; + + if let Some(pred) = isa_predicate { + encoder.isa_predicate(*pred, skip); + if !is_last_group { + skip -= 1; + } + } + + if let Some(pred) = inst_predicate { + encoder.inst_predicate(*pred, skip); + // No need to update skip, it's dead after this point. + } + + for i in 0..group_size { + let encoding = &self.encodings[index + i]; + let is_last_encoding = index + i == self.encodings.len() - 1; + encoder.recipe(&isa.recipes, encoding, is_last_encoding); + } + + index += group_size; + } + + assert!(self.offset.is_none()); + let offset = enc_lists.add(&encoder.words); + self.offset = Some(offset); + + // Doc comments. + let recipe_typ_mode_name = format!( + "{}{} ({})", + self.inst.name, + if let Some(typ) = &self.typ { + format!(".{}", typ.to_string()) + } else { + "".into() + }, + cpu_mode.name + ); + + enc_lists_doc + .get_or_default(offset) + .push(format!("{:06x}: {}", offset, recipe_typ_mode_name)); + for (pos, doc) in encoder.docs { + enc_lists_doc.get_or_default(offset + pos).push(doc); + } + enc_lists_doc + .get_or_default(offset + encoder.words.len()) + .insert(0, format!("end of {}", recipe_typ_mode_name)); + } +} + +fn make_tables(cpu_mode: &CpuMode) -> Level1Table { + let mut table = Level1Table::new(cpu_mode); + + for encoding in &cpu_mode.encodings { + table + .l2table_for(encoding.bound_type.clone()) + .enclist_for(encoding.inst()) + .encodings + .push(encoding.clone()); + } + + // Ensure there are level 1 table entries for all types with a custom legalize action. + for value_type in cpu_mode.get_legalized_types() { + table.l2table_for(Some(value_type.clone())); + } + // ... and also for monomorphic instructions. + table.l2table_for(None); + + table +} + +/// Compute encodings and doc comments for encoding lists in `level1`. +fn encode_enclists( + isa: &TargetIsa, + cpu_mode: &CpuMode, + level1: &mut Level1Table, + enc_lists: &mut UniqueSeqTable<u16>, + enc_lists_doc: &mut HashMap<usize, Vec<String>>, +) { + for level2 in level1.l2tables() { + for enclist in level2.enclists() { + enclist.encode(isa, cpu_mode, enc_lists, enc_lists_doc); + } + } +} + +fn encode_level2_hashtables<'a>( + level1: &'a mut Level1Table, + level2_hashtables: &mut Vec<Option<Level2HashTableEntry>>, + level2_doc: &mut HashMap<usize, Vec<String>>, +) { + for level2 in level1.l2tables() { + level2.layout_hashtable(level2_hashtables, level2_doc); + } +} + +fn emit_encoding_tables(defs: &SharedDefinitions, isa: &TargetIsa, fmt: &mut Formatter) { + // Level 1 tables, one per CPU mode. + let mut level1_tables: HashMap<&'static str, Level1Table> = HashMap::new(); + + // Single table containing all the level2 hash tables. + let mut level2_hashtables = Vec::new(); + let mut level2_doc: HashMap<usize, Vec<String>> = HashMap::new(); + + // Tables for encoding lists with comments. + let mut enc_lists = UniqueSeqTable::new(); + let mut enc_lists_doc = HashMap::new(); + + for cpu_mode in &isa.cpu_modes { + level2_doc + .get_or_default(level2_hashtables.len()) + .push(cpu_mode.name.into()); + + let mut level1 = make_tables(cpu_mode); + + encode_enclists( + isa, + cpu_mode, + &mut level1, + &mut enc_lists, + &mut enc_lists_doc, + ); + encode_level2_hashtables(&mut level1, &mut level2_hashtables, &mut level2_doc); + + level1_tables.insert(cpu_mode.name, level1); + } + + // Compute an appropriate Rust integer type to use for offsets into a table of the given length. + let offset_type = |length: usize| { + if length <= 0x10000 { + "u16" + } else { + assert!(u32::try_from(length).is_ok(), "table too big!"); + "u32" + } + }; + + let level1_offset_type = offset_type(level2_hashtables.len()); + let level2_offset_type = offset_type(enc_lists.len()); + + // Emit encoding lists. + fmt.doc_comment( + format!(r#"{} encoding lists. + + This contains the entire encodings bytecode for every single instruction; the encodings + interpreter knows where to start from thanks to the initial lookup in the level 1 and level 2 + table entries below."#, isa.name) + ); + fmtln!(fmt, "pub static ENCLISTS: [u16; {}] = [", enc_lists.len()); + fmt.indent(|fmt| { + let mut line = Vec::new(); + for (index, entry) in enc_lists.iter().enumerate() { + if let Some(comments) = enc_lists_doc.get(&index) { + if !line.is_empty() { + fmtln!(fmt, "{},", line.join(", ")); + line.clear(); + } + for comment in comments { + fmt.comment(comment); + } + } + line.push(format!("{:#06x}", entry)); + } + if !line.is_empty() { + fmtln!(fmt, "{},", line.join(", ")); + } + }); + fmtln!(fmt, "];"); + fmt.empty_line(); + + // Emit the full concatenation of level 2 hash tables. + fmt.doc_comment(format!( + r#"{} level 2 hash tables. + + This hash table, keyed by instruction opcode, contains all the starting offsets for the + encodings interpreter, for all the CPU modes. It is jumped to after a lookup on the + instruction's controlling type in the level 1 hash table."#, + isa.name + )); + fmtln!( + fmt, + "pub static LEVEL2: [Level2Entry<{}>; {}] = [", + level2_offset_type, + level2_hashtables.len() + ); + fmt.indent(|fmt| { + for (offset, entry) in level2_hashtables.iter().enumerate() { + if let Some(comments) = level2_doc.get(&offset) { + for comment in comments { + fmt.comment(comment); + } + } + if let Some(entry) = entry { + fmtln!( + fmt, + "Level2Entry {{ opcode: Some(crate::ir::Opcode::{}), offset: {:#08x} }},", + entry.inst_name, + entry.offset + ); + } else { + fmt.line("Level2Entry { opcode: None, offset: 0 },"); + } + } + }); + fmtln!(fmt, "];"); + fmt.empty_line(); + + // Emit a level 1 hash table for each CPU mode. + for cpu_mode in &isa.cpu_modes { + let level1 = &level1_tables.get(cpu_mode.name).unwrap(); + let hash_table = generate_table( + level1.table_vec.iter(), + level1.table_vec.len(), + |level2_table| { + if let Some(typ) = &level2_table.typ { + typ.number().expect("type without a number") as usize + } else { + 0 + } + }, + ); + + fmt.doc_comment(format!( + r#"{} level 1 hash table for the CPU mode {}. + + This hash table, keyed by instruction controlling type, contains all the level 2 + hash-tables offsets for the given CPU mode, as well as a legalization identifier indicating + which legalization scheme to apply when the instruction doesn't have any valid encoding for + this CPU mode. + "#, + isa.name, cpu_mode.name + )); + fmtln!( + fmt, + "pub static LEVEL1_{}: [Level1Entry<{}>; {}] = [", + cpu_mode.name.to_uppercase(), + level1_offset_type, + hash_table.len() + ); + fmt.indent(|fmt| { + for opt_level2 in hash_table { + let level2 = match opt_level2 { + None => { + // Empty hash table entry. Include the default legalization action. + fmtln!(fmt, "Level1Entry {{ ty: ir::types::INVALID, log2len: !0, offset: 0, legalize: {} }},", + isa.translate_group_index(level1.legalize_code)); + continue; + } + Some(level2) => level2, + }; + + let legalize_comment = defs.transform_groups.get(level2.legalize_code).name; + let legalize_code = isa.translate_group_index(level2.legalize_code); + + let typ_name = if let Some(typ) = &level2.typ { + typ.rust_name() + } else { + "ir::types::INVALID".into() + }; + + if level2.is_empty() { + // Empty level 2 table: Only a specialized legalization action, no actual + // table. + // Set an offset that is out of bounds, but make sure it doesn't overflow its + // type when adding `1<<log2len`. + fmtln!(fmt, "Level1Entry {{ ty: {}, log2len: 0, offset: !0 - 1, legalize: {} }}, // {}", + typ_name, legalize_code, legalize_comment); + continue; + } + + // Proper level 2 hash table. + let l2l = (level2.hash_table_len.unwrap() as f64).log2() as i32; + assert!(l2l > 0, "Level2 hash table was too small."); + fmtln!(fmt, "Level1Entry {{ ty: {}, log2len: {}, offset: {:#08x}, legalize: {} }}, // {}", + typ_name, l2l, level2.hash_table_offset.unwrap(), legalize_code, legalize_comment); + } + }); + fmtln!(fmt, "];"); + fmt.empty_line(); + } +} + +fn gen_isa(defs: &SharedDefinitions, isa: &TargetIsa, fmt: &mut Formatter) { + // Make the `RECIPE_PREDICATES` table. + emit_recipe_predicates(isa, fmt); + + // Make the `INST_PREDICATES` table. + emit_inst_predicates(isa, fmt); + + emit_encoding_tables(defs, isa, fmt); + + emit_recipe_names(isa, fmt); + emit_recipe_constraints(isa, fmt); + emit_recipe_sizing(isa, fmt); + + // Finally, tie it all together in an `EncInfo`. + fmt.line("pub static INFO: isa::EncInfo = isa::EncInfo {"); + fmt.indent(|fmt| { + fmt.line("constraints: &RECIPE_CONSTRAINTS,"); + fmt.line("sizing: &RECIPE_SIZING,"); + fmt.line("names: &RECIPE_NAMES,"); + }); + fmt.line("};"); +} + +pub(crate) fn generate( + defs: &SharedDefinitions, + isa: &TargetIsa, + filename: &str, + out_dir: &str, +) -> Result<(), error::Error> { + let mut fmt = Formatter::new(); + gen_isa(defs, isa, &mut fmt); + fmt.update_file(filename, out_dir)?; + Ok(()) +} diff --git a/third_party/rust/cranelift-codegen-meta/src/gen_inst.rs b/third_party/rust/cranelift-codegen-meta/src/gen_inst.rs new file mode 100644 index 0000000000..a2760b34d7 --- /dev/null +++ b/third_party/rust/cranelift-codegen-meta/src/gen_inst.rs @@ -0,0 +1,1184 @@ +//! Generate instruction data (including opcodes, formats, builders, etc.). +use std::fmt; + +use cranelift_codegen_shared::constant_hash; +use cranelift_entity::EntityRef; + +use crate::cdsl::camel_case; +use crate::cdsl::formats::InstructionFormat; +use crate::cdsl::instructions::{AllInstructions, Instruction}; +use crate::cdsl::operands::Operand; +use crate::cdsl::typevar::{TypeSet, TypeVar}; + +use crate::error; +use crate::srcgen::{Formatter, Match}; +use crate::unique_table::{UniqueSeqTable, UniqueTable}; + +// TypeSet indexes are encoded in 8 bits, with `0xff` reserved. +const TYPESET_LIMIT: usize = 0xff; + +/// Generate an instruction format enumeration. +fn gen_formats(formats: &[&InstructionFormat], fmt: &mut Formatter) { + fmt.doc_comment( + r#" + An instruction format + + Every opcode has a corresponding instruction format + which is represented by both the `InstructionFormat` + and the `InstructionData` enums. + "#, + ); + fmt.line("#[derive(Copy, Clone, PartialEq, Eq, Debug)]"); + fmt.line("pub enum InstructionFormat {"); + fmt.indent(|fmt| { + for format in formats { + fmt.doc_comment(format.to_string()); + fmtln!(fmt, "{},", format.name); + } + }); + fmt.line("}"); + fmt.empty_line(); + + // Emit a From<InstructionData> which also serves to verify that + // InstructionFormat and InstructionData are in sync. + fmt.line("impl<'a> From<&'a InstructionData> for InstructionFormat {"); + fmt.indent(|fmt| { + fmt.line("fn from(inst: &'a InstructionData) -> Self {"); + fmt.indent(|fmt| { + let mut m = Match::new("*inst"); + for format in formats { + m.arm( + format!("InstructionData::{}", format.name), + vec![".."], + format!("Self::{}", format.name), + ); + } + fmt.add_match(m); + }); + fmt.line("}"); + }); + fmt.line("}"); + fmt.empty_line(); +} + +/// Generate the InstructionData enum. +/// +/// Every variant must contain an `opcode` field. The size of `InstructionData` should be kept at +/// 16 bytes on 64-bit architectures. If more space is needed to represent an instruction, use a +/// `ValueList` to store the additional information out of line. +fn gen_instruction_data(formats: &[&InstructionFormat], fmt: &mut Formatter) { + fmt.line("#[derive(Clone, Debug)]"); + fmt.line("#[allow(missing_docs)]"); + fmt.line("pub enum InstructionData {"); + fmt.indent(|fmt| { + for format in formats { + fmtln!(fmt, "{} {{", format.name); + fmt.indent(|fmt| { + fmt.line("opcode: Opcode,"); + if format.typevar_operand.is_some() { + if format.has_value_list { + fmt.line("args: ValueList,"); + } else if format.num_value_operands == 1 { + fmt.line("arg: Value,"); + } else { + fmtln!(fmt, "args: [Value; {}],", format.num_value_operands); + } + } + for field in &format.imm_fields { + fmtln!(fmt, "{}: {},", field.member, field.kind.rust_type); + } + }); + fmtln!(fmt, "},"); + } + }); + fmt.line("}"); +} + +fn gen_arguments_method(formats: &[&InstructionFormat], fmt: &mut Formatter, is_mut: bool) { + let (method, mut_, rslice, as_slice) = if is_mut { + ( + "arguments_mut", + "mut ", + "core::slice::from_mut", + "as_mut_slice", + ) + } else { + ("arguments", "", "core::slice::from_ref", "as_slice") + }; + + fmtln!( + fmt, + "pub fn {}<'a>(&'a {}self, pool: &'a {}ir::ValueListPool) -> &{}[Value] {{", + method, + mut_, + mut_, + mut_ + ); + fmt.indent(|fmt| { + let mut m = Match::new("*self"); + for format in formats { + let name = format!("Self::{}", format.name); + + // Formats with a value list put all of their arguments in the list. We don't split + // them up, just return it all as variable arguments. (I expect the distinction to go + // away). + if format.has_value_list { + m.arm( + name, + vec![format!("ref {}args", mut_), "..".to_string()], + format!("args.{}(pool)", as_slice), + ); + continue; + } + + // Fixed args. + let mut fields = Vec::new(); + let arg = if format.num_value_operands == 0 { + format!("&{}[]", mut_) + } else if format.num_value_operands == 1 { + fields.push(format!("ref {}arg", mut_)); + format!("{}(arg)", rslice) + } else { + let arg = format!("args_arity{}", format.num_value_operands); + fields.push(format!("args: ref {}{}", mut_, arg)); + arg + }; + fields.push("..".into()); + + m.arm(name, fields, arg); + } + fmt.add_match(m); + }); + fmtln!(fmt, "}"); +} + +/// Generate the boring parts of the InstructionData implementation. +/// +/// These methods in `impl InstructionData` can be generated automatically from the instruction +/// formats: +/// +/// - `pub fn opcode(&self) -> Opcode` +/// - `pub fn arguments(&self, &pool) -> &[Value]` +/// - `pub fn arguments_mut(&mut self, &pool) -> &mut [Value]` +/// - `pub fn take_value_list(&mut self) -> Option<ir::ValueList>` +/// - `pub fn put_value_list(&mut self, args: ir::ValueList>` +/// - `pub fn eq(&self, &other: Self, &pool) -> bool` +/// - `pub fn hash<H: Hasher>(&self, state: &mut H, &pool)` +fn gen_instruction_data_impl(formats: &[&InstructionFormat], fmt: &mut Formatter) { + fmt.line("impl InstructionData {"); + fmt.indent(|fmt| { + fmt.doc_comment("Get the opcode of this instruction."); + fmt.line("pub fn opcode(&self) -> Opcode {"); + fmt.indent(|fmt| { + let mut m = Match::new("*self"); + for format in formats { + m.arm(format!("Self::{}", format.name), vec!["opcode", ".."], + "opcode".to_string()); + } + fmt.add_match(m); + }); + fmt.line("}"); + fmt.empty_line(); + + fmt.doc_comment("Get the controlling type variable operand."); + fmt.line("pub fn typevar_operand(&self, pool: &ir::ValueListPool) -> Option<Value> {"); + fmt.indent(|fmt| { + let mut m = Match::new("*self"); + for format in formats { + let name = format!("Self::{}", format.name); + if format.typevar_operand.is_none() { + m.arm(name, vec![".."], "None".to_string()); + } else if format.has_value_list { + // We keep all arguments in a value list. + m.arm(name, vec!["ref args", ".."], format!("args.get({}, pool)", format.typevar_operand.unwrap())); + } else if format.num_value_operands == 1 { + m.arm(name, vec!["arg", ".."], "Some(arg)".to_string()); + } else { + // We have multiple value operands and an array `args`. + // Which `args` index to use? + let args = format!("args_arity{}", format.num_value_operands); + m.arm(name, vec![format!("args: ref {}", args), "..".to_string()], + format!("Some({}[{}])", args, format.typevar_operand.unwrap())); + } + } + fmt.add_match(m); + }); + fmt.line("}"); + fmt.empty_line(); + + fmt.doc_comment("Get the value arguments to this instruction."); + gen_arguments_method(formats, fmt, false); + fmt.empty_line(); + + fmt.doc_comment(r#"Get mutable references to the value arguments to this + instruction."#); + gen_arguments_method(formats, fmt, true); + fmt.empty_line(); + + fmt.doc_comment(r#" + Take out the value list with all the value arguments and return + it. + + This leaves the value list in the instruction empty. Use + `put_value_list` to put the value list back. + "#); + fmt.line("pub fn take_value_list(&mut self) -> Option<ir::ValueList> {"); + fmt.indent(|fmt| { + let mut m = Match::new("*self"); + + for format in formats { + if format.has_value_list { + m.arm(format!("Self::{}", format.name), + vec!["ref mut args", ".."], + "Some(args.take())".to_string()); + } + } + + m.arm_no_fields("_", "None"); + + fmt.add_match(m); + }); + fmt.line("}"); + fmt.empty_line(); + + fmt.doc_comment(r#" + Put back a value list. + + After removing a value list with `take_value_list()`, use this + method to put it back. It is required that this instruction has + a format that accepts a value list, and that the existing value + list is empty. This avoids leaking list pool memory. + "#); + fmt.line("pub fn put_value_list(&mut self, vlist: ir::ValueList) {"); + fmt.indent(|fmt| { + fmt.line("let args = match *self {"); + fmt.indent(|fmt| { + for format in formats { + if format.has_value_list { + fmtln!(fmt, "Self::{} {{ ref mut args, .. }} => args,", format.name); + } + } + fmt.line("_ => panic!(\"No value list: {:?}\", self),"); + }); + fmt.line("};"); + fmt.line("debug_assert!(args.is_empty(), \"Value list already in use\");"); + fmt.line("*args = vlist;"); + }); + fmt.line("}"); + fmt.empty_line(); + + fmt.doc_comment(r#" + Compare two `InstructionData` for equality. + + This operation requires a reference to a `ValueListPool` to + determine if the contents of any `ValueLists` are equal. + "#); + fmt.line("pub fn eq(&self, other: &Self, pool: &ir::ValueListPool) -> bool {"); + fmt.indent(|fmt| { + fmt.line("if ::core::mem::discriminant(self) != ::core::mem::discriminant(other) {"); + fmt.indent(|fmt| { + fmt.line("return false;"); + }); + fmt.line("}"); + + fmt.line("match (self, other) {"); + fmt.indent(|fmt| { + for format in formats { + let name = format!("&Self::{}", format.name); + let mut members = vec!["opcode"]; + + let args_eq = if format.typevar_operand.is_none() { + None + } else if format.has_value_list { + members.push("args"); + Some("args1.as_slice(pool) == args2.as_slice(pool)") + } else if format.num_value_operands == 1 { + members.push("arg"); + Some("arg1 == arg2") + } else { + members.push("args"); + Some("args1 == args2") + }; + + for field in &format.imm_fields { + members.push(field.member); + } + + let pat1 = members.iter().map(|x| format!("{}: ref {}1", x, x)).collect::<Vec<_>>().join(", "); + let pat2 = members.iter().map(|x| format!("{}: ref {}2", x, x)).collect::<Vec<_>>().join(", "); + fmtln!(fmt, "({} {{ {} }}, {} {{ {} }}) => {{", name, pat1, name, pat2); + fmt.indent(|fmt| { + fmt.line("opcode1 == opcode2"); + for field in &format.imm_fields { + fmtln!(fmt, "&& {}1 == {}2", field.member, field.member); + } + if let Some(args_eq) = args_eq { + fmtln!(fmt, "&& {}", args_eq); + } + }); + fmtln!(fmt, "}"); + } + fmt.line("_ => unreachable!()"); + }); + fmt.line("}"); + }); + fmt.line("}"); + fmt.empty_line(); + + fmt.doc_comment(r#" + Hash an `InstructionData`. + + This operation requires a reference to a `ValueListPool` to + hash the contents of any `ValueLists`. + "#); + fmt.line("pub fn hash<H: ::core::hash::Hasher>(&self, state: &mut H, pool: &ir::ValueListPool) {"); + fmt.indent(|fmt| { + fmt.line("match *self {"); + fmt.indent(|fmt| { + for format in formats { + let name = format!("Self::{}", format.name); + let mut members = vec!["opcode"]; + + let args = if format.typevar_operand.is_none() { + "&()" + } else if format.has_value_list { + members.push("ref args"); + "args.as_slice(pool)" + } else if format.num_value_operands == 1 { + members.push("ref arg"); + "arg" + } else { + members.push("ref args"); + "args" + }; + + for field in &format.imm_fields { + members.push(field.member); + } + let members = members.join(", "); + + fmtln!(fmt, "{}{{{}}} => {{", name, members ); // beware the moustaches + fmt.indent(|fmt| { + fmt.line("::core::hash::Hash::hash( &::core::mem::discriminant(self), state);"); + fmt.line("::core::hash::Hash::hash(&opcode, state);"); + for field in &format.imm_fields { + fmtln!(fmt, "::core::hash::Hash::hash(&{}, state);", field.member); + } + fmtln!(fmt, "::core::hash::Hash::hash({}, state);", args); + }); + fmtln!(fmt, "}"); + } + }); + fmt.line("}"); + }); + fmt.line("}"); + }); + fmt.line("}"); +} + +fn gen_bool_accessor<T: Fn(&Instruction) -> bool>( + all_inst: &AllInstructions, + get_attr: T, + name: &'static str, + doc: &'static str, + fmt: &mut Formatter, +) { + fmt.doc_comment(doc); + fmtln!(fmt, "pub fn {}(self) -> bool {{", name); + fmt.indent(|fmt| { + let mut m = Match::new("self"); + for inst in all_inst.values() { + if get_attr(inst) { + m.arm_no_fields(format!("Self::{}", inst.camel_name), "true"); + } + } + m.arm_no_fields("_", "false"); + fmt.add_match(m); + }); + fmtln!(fmt, "}"); + fmt.empty_line(); +} + +fn gen_opcodes(all_inst: &AllInstructions, fmt: &mut Formatter) { + fmt.doc_comment( + r#" + An instruction opcode. + + All instructions from all supported ISAs are present. + "#, + ); + fmt.line("#[repr(u16)]"); + fmt.line("#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)]"); + fmt.line( + r#"#[cfg_attr(feature = "enable-peepmatic", derive(serde::Serialize, serde::Deserialize))]"# + ); + + // We explicitly set the discriminant of the first variant to 1, which allows us to take + // advantage of the NonZero optimization, meaning that wrapping enums can use the 0 + // discriminant instead of increasing the size of the whole type, and so the size of + // Option<Opcode> is the same as Opcode's. + fmt.line("pub enum Opcode {"); + fmt.indent(|fmt| { + let mut is_first_opcode = true; + for inst in all_inst.values() { + fmt.doc_comment(format!("`{}`. ({})", inst, inst.format.name)); + + // Document polymorphism. + if let Some(poly) = &inst.polymorphic_info { + if poly.use_typevar_operand { + let op_num = inst.value_opnums[inst.format.typevar_operand.unwrap()]; + fmt.doc_comment(format!( + "Type inferred from `{}`.", + inst.operands_in[op_num].name + )); + } + } + + // Enum variant itself. + if is_first_opcode { + assert!(inst.opcode_number.index() == 0); + // TODO the python crate requires opcode numbers to start from one. + fmtln!(fmt, "{} = 1,", inst.camel_name); + is_first_opcode = false; + } else { + fmtln!(fmt, "{},", inst.camel_name) + } + } + }); + fmt.line("}"); + fmt.empty_line(); + + fmt.line("impl Opcode {"); + fmt.indent(|fmt| { + gen_bool_accessor( + all_inst, + |inst| inst.is_terminator, + "is_terminator", + "True for instructions that terminate the block", + fmt, + ); + gen_bool_accessor( + all_inst, + |inst| inst.is_branch, + "is_branch", + "True for all branch or jump instructions.", + fmt, + ); + gen_bool_accessor( + all_inst, + |inst| inst.is_indirect_branch, + "is_indirect_branch", + "True for all indirect branch or jump instructions.", + fmt, + ); + gen_bool_accessor( + all_inst, + |inst| inst.is_call, + "is_call", + "Is this a call instruction?", + fmt, + ); + gen_bool_accessor( + all_inst, + |inst| inst.is_return, + "is_return", + "Is this a return instruction?", + fmt, + ); + gen_bool_accessor( + all_inst, + |inst| inst.is_ghost, + "is_ghost", + "Is this a ghost instruction?", + fmt, + ); + gen_bool_accessor( + all_inst, + |inst| inst.can_load, + "can_load", + "Can this instruction read from memory?", + fmt, + ); + gen_bool_accessor( + all_inst, + |inst| inst.can_store, + "can_store", + "Can this instruction write to memory?", + fmt, + ); + gen_bool_accessor( + all_inst, + |inst| inst.can_trap, + "can_trap", + "Can this instruction cause a trap?", + fmt, + ); + gen_bool_accessor( + all_inst, + |inst| inst.other_side_effects, + "other_side_effects", + "Does this instruction have other side effects besides can_* flags?", + fmt, + ); + gen_bool_accessor( + all_inst, + |inst| inst.writes_cpu_flags, + "writes_cpu_flags", + "Does this instruction write to CPU flags?", + fmt, + ); + gen_bool_accessor( + all_inst, + |inst| inst.clobbers_all_regs, + "clobbers_all_regs", + "Should this opcode be considered to clobber all the registers, during regalloc?", + fmt, + ); + }); + fmt.line("}"); + fmt.empty_line(); + + // Generate a private opcode_format table. + fmtln!( + fmt, + "const OPCODE_FORMAT: [InstructionFormat; {}] = [", + all_inst.len() + ); + fmt.indent(|fmt| { + for inst in all_inst.values() { + fmtln!( + fmt, + "InstructionFormat::{}, // {}", + inst.format.name, + inst.name + ); + } + }); + fmtln!(fmt, "];"); + fmt.empty_line(); + + // Generate a private opcode_name function. + fmt.line("fn opcode_name(opc: Opcode) -> &\'static str {"); + fmt.indent(|fmt| { + let mut m = Match::new("opc"); + for inst in all_inst.values() { + m.arm_no_fields( + format!("Opcode::{}", inst.camel_name), + format!("\"{}\"", inst.name), + ); + } + fmt.add_match(m); + }); + fmt.line("}"); + fmt.empty_line(); + + // Generate an opcode hash table for looking up opcodes by name. + let hash_table = constant_hash::generate_table(all_inst.values(), all_inst.len(), |inst| { + constant_hash::simple_hash(&inst.name) + }); + fmtln!( + fmt, + "const OPCODE_HASH_TABLE: [Option<Opcode>; {}] = [", + hash_table.len() + ); + fmt.indent(|fmt| { + for i in hash_table { + match i { + Some(i) => fmtln!(fmt, "Some(Opcode::{}),", i.camel_name), + None => fmtln!(fmt, "None,"), + } + } + }); + fmtln!(fmt, "];"); + fmt.empty_line(); +} + +fn gen_try_from(all_inst: &AllInstructions, fmt: &mut Formatter) { + fmt.line("impl core::convert::TryFrom<u16> for Opcode {"); + fmt.indent(|fmt| { + fmt.line("type Error = ();"); + fmt.line("#[inline]"); + fmt.line("fn try_from(x: u16) -> Result<Self, ()> {"); + fmt.indent(|fmt| { + fmtln!(fmt, "if 0 < x && x <= {} {{", all_inst.len()); + fmt.indent(|fmt| fmt.line("Ok(unsafe { core::mem::transmute(x) })")); + fmt.line("} else {"); + fmt.indent(|fmt| fmt.line("Err(())")); + fmt.line("}"); + }); + fmt.line("}"); + }); + fmt.line("}"); +} + +/// Get the value type constraint for an SSA value operand, where +/// `ctrl_typevar` is the controlling type variable. +/// +/// Each operand constraint is represented as a string, one of: +/// - `Concrete(vt)`, where `vt` is a value type name. +/// - `Free(idx)` where `idx` is an index into `type_sets`. +/// - `Same`, `Lane`, `AsBool` for controlling typevar-derived constraints. +fn get_constraint<'entries, 'table>( + operand: &'entries Operand, + ctrl_typevar: Option<&TypeVar>, + type_sets: &'table mut UniqueTable<'entries, TypeSet>, +) -> String { + assert!(operand.is_value()); + let type_var = operand.type_var().unwrap(); + + if let Some(typ) = type_var.singleton_type() { + return format!("Concrete({})", typ.rust_name()); + } + + if let Some(free_typevar) = type_var.free_typevar() { + if ctrl_typevar.is_some() && free_typevar != *ctrl_typevar.unwrap() { + assert!(type_var.base.is_none()); + return format!("Free({})", type_sets.add(&type_var.get_raw_typeset())); + } + } + + if let Some(base) = &type_var.base { + assert!(base.type_var == *ctrl_typevar.unwrap()); + return camel_case(base.derived_func.name()); + } + + assert!(type_var == ctrl_typevar.unwrap()); + "Same".into() +} + +fn gen_bitset<'a, T: IntoIterator<Item = &'a u16>>( + iterable: T, + name: &'static str, + field_size: u8, + fmt: &mut Formatter, +) { + let bits = iterable.into_iter().fold(0, |acc, x| { + assert!(x.is_power_of_two()); + assert!(u32::from(*x) < (1 << u32::from(field_size))); + acc | x + }); + fmtln!(fmt, "{}: BitSet::<u{}>({}),", name, field_size, bits); +} + +fn iterable_to_string<I: fmt::Display, T: IntoIterator<Item = I>>(iterable: T) -> String { + let elems = iterable + .into_iter() + .map(|x| x.to_string()) + .collect::<Vec<_>>() + .join(", "); + format!("{{{}}}", elems) +} + +fn typeset_to_string(ts: &TypeSet) -> String { + let mut result = format!("TypeSet(lanes={}", iterable_to_string(&ts.lanes)); + if !ts.ints.is_empty() { + result += &format!(", ints={}", iterable_to_string(&ts.ints)); + } + if !ts.floats.is_empty() { + result += &format!(", floats={}", iterable_to_string(&ts.floats)); + } + if !ts.bools.is_empty() { + result += &format!(", bools={}", iterable_to_string(&ts.bools)); + } + if !ts.specials.is_empty() { + result += &format!(", specials=[{}]", iterable_to_string(&ts.specials)); + } + if !ts.refs.is_empty() { + result += &format!(", refs={}", iterable_to_string(&ts.refs)); + } + result += ")"; + result +} + +/// Generate the table of ValueTypeSets described by type_sets. +pub(crate) fn gen_typesets_table(type_sets: &UniqueTable<TypeSet>, fmt: &mut Formatter) { + if type_sets.len() == 0 { + return; + } + + fmt.comment("Table of value type sets."); + assert!(type_sets.len() <= TYPESET_LIMIT, "Too many type sets!"); + fmtln!( + fmt, + "const TYPE_SETS: [ir::instructions::ValueTypeSet; {}] = [", + type_sets.len() + ); + fmt.indent(|fmt| { + for ts in type_sets.iter() { + fmt.line("ir::instructions::ValueTypeSet {"); + fmt.indent(|fmt| { + fmt.comment(typeset_to_string(ts)); + gen_bitset(&ts.lanes, "lanes", 16, fmt); + gen_bitset(&ts.ints, "ints", 8, fmt); + gen_bitset(&ts.floats, "floats", 8, fmt); + gen_bitset(&ts.bools, "bools", 8, fmt); + gen_bitset(&ts.refs, "refs", 8, fmt); + }); + fmt.line("},"); + } + }); + fmtln!(fmt, "];"); +} + +/// Generate value type constraints for all instructions. +/// - Emit a compact constant table of ValueTypeSet objects. +/// - Emit a compact constant table of OperandConstraint objects. +/// - Emit an opcode-indexed table of instruction constraints. +fn gen_type_constraints(all_inst: &AllInstructions, fmt: &mut Formatter) { + // Table of TypeSet instances. + let mut type_sets = UniqueTable::new(); + + // Table of operand constraint sequences (as tuples). Each operand + // constraint is represented as a string, one of: + // - `Concrete(vt)`, where `vt` is a value type name. + // - `Free(idx)` where `idx` is an index into `type_sets`. + // - `Same`, `Lane`, `AsBool` for controlling typevar-derived constraints. + let mut operand_seqs = UniqueSeqTable::new(); + + // Preload table with constraints for typical binops. + #[allow(clippy::useless_vec)] + operand_seqs.add(&vec!["Same".to_string(); 3]); + + fmt.comment("Table of opcode constraints."); + fmtln!( + fmt, + "const OPCODE_CONSTRAINTS: [OpcodeConstraints; {}] = [", + all_inst.len() + ); + fmt.indent(|fmt| { + for inst in all_inst.values() { + let (ctrl_typevar, ctrl_typeset) = if let Some(poly) = &inst.polymorphic_info { + let index = type_sets.add(&*poly.ctrl_typevar.get_raw_typeset()); + (Some(&poly.ctrl_typevar), index) + } else { + (None, TYPESET_LIMIT) + }; + + // Collect constraints for the value results, not including `variable_args` results + // which are always special cased. + let mut constraints = Vec::new(); + for &index in &inst.value_results { + constraints.push(get_constraint(&inst.operands_out[index], ctrl_typevar, &mut type_sets)); + } + for &index in &inst.value_opnums { + constraints.push(get_constraint(&inst.operands_in[index], ctrl_typevar, &mut type_sets)); + } + + let constraint_offset = operand_seqs.add(&constraints); + + let fixed_results = inst.value_results.len(); + let fixed_values = inst.value_opnums.len(); + + // Can the controlling type variable be inferred from the designated operand? + let use_typevar_operand = if let Some(poly) = &inst.polymorphic_info { + poly.use_typevar_operand + } else { + false + }; + + // Can the controlling type variable be inferred from the result? + let use_result = fixed_results > 0 && inst.operands_out[inst.value_results[0]].type_var() == ctrl_typevar; + + // Are we required to use the designated operand instead of the result? + let requires_typevar_operand = use_typevar_operand && !use_result; + + fmt.comment( + format!("{}: fixed_results={}, use_typevar_operand={}, requires_typevar_operand={}, fixed_values={}", + inst.camel_name, + fixed_results, + use_typevar_operand, + requires_typevar_operand, + fixed_values) + ); + fmt.comment(format!("Constraints=[{}]", constraints + .iter() + .map(|x| format!("'{}'", x)) + .collect::<Vec<_>>() + .join(", "))); + if let Some(poly) = &inst.polymorphic_info { + fmt.comment(format!("Polymorphic over {}", typeset_to_string(&poly.ctrl_typevar.get_raw_typeset()))); + } + + // Compute the bit field encoding, c.f. instructions.rs. + assert!(fixed_results < 8 && fixed_values < 8, "Bit field encoding too tight"); + let mut flags = fixed_results; // 3 bits + if use_typevar_operand { + flags |= 1<<3; // 4th bit + } + if requires_typevar_operand { + flags |= 1<<4; // 5th bit + } + flags |= fixed_values << 5; // 6th bit and more + + fmt.line("OpcodeConstraints {"); + fmt.indent(|fmt| { + fmtln!(fmt, "flags: {:#04x},", flags); + fmtln!(fmt, "typeset_offset: {},", ctrl_typeset); + fmtln!(fmt, "constraint_offset: {},", constraint_offset); + }); + fmt.line("},"); + } + }); + fmtln!(fmt, "];"); + fmt.empty_line(); + + gen_typesets_table(&type_sets, fmt); + fmt.empty_line(); + + fmt.comment("Table of operand constraint sequences."); + fmtln!( + fmt, + "const OPERAND_CONSTRAINTS: [OperandConstraint; {}] = [", + operand_seqs.len() + ); + fmt.indent(|fmt| { + for constraint in operand_seqs.iter() { + fmtln!(fmt, "OperandConstraint::{},", constraint); + } + }); + fmtln!(fmt, "];"); +} + +/// Emit member initializers for an instruction format. +fn gen_member_inits(format: &InstructionFormat, fmt: &mut Formatter) { + // Immediate operands. + // We have local variables with the same names as the members. + for f in &format.imm_fields { + fmtln!(fmt, "{},", f.member); + } + + // Value operands. + if format.has_value_list { + fmt.line("args,"); + } else if format.num_value_operands == 1 { + fmt.line("arg: arg0,"); + } else if format.num_value_operands > 1 { + let mut args = Vec::new(); + for i in 0..format.num_value_operands { + args.push(format!("arg{}", i)); + } + fmtln!(fmt, "args: [{}],", args.join(", ")); + } +} + +/// Emit a method for creating and inserting an instruction format. +/// +/// All instruction formats take an `opcode` argument and a `ctrl_typevar` argument for deducing +/// the result types. +fn gen_format_constructor(format: &InstructionFormat, fmt: &mut Formatter) { + // Construct method arguments. + let mut args = vec![ + "self".to_string(), + "opcode: Opcode".into(), + "ctrl_typevar: Type".into(), + ]; + + // Normal operand arguments. Start with the immediate operands. + for f in &format.imm_fields { + args.push(format!("{}: {}", f.member, f.kind.rust_type)); + } + + // Then the value operands. + if format.has_value_list { + // Take all value arguments as a finished value list. The value lists + // are created by the individual instruction constructors. + args.push("args: ir::ValueList".into()); + } else { + // Take a fixed number of value operands. + for i in 0..format.num_value_operands { + args.push(format!("arg{}: Value", i)); + } + } + + let proto = format!( + "{}({}) -> (Inst, &'f mut ir::DataFlowGraph)", + format.name, + args.join(", ") + ); + + let imms_need_sign_extension = format + .imm_fields + .iter() + .any(|f| f.kind.rust_type == "ir::immediates::Imm64"); + + fmt.doc_comment(format.to_string()); + fmt.line("#[allow(non_snake_case)]"); + fmtln!(fmt, "fn {} {{", proto); + fmt.indent(|fmt| { + // Generate the instruction data. + fmtln!( + fmt, + "let{} data = ir::InstructionData::{} {{", + if imms_need_sign_extension { " mut" } else { "" }, + format.name + ); + fmt.indent(|fmt| { + fmt.line("opcode,"); + gen_member_inits(format, fmt); + }); + fmtln!(fmt, "};"); + + if imms_need_sign_extension { + fmtln!(fmt, "data.sign_extend_immediates(ctrl_typevar);"); + } + + fmt.line("self.build(data, ctrl_typevar)"); + }); + fmtln!(fmt, "}"); +} + +/// Emit a method for generating the instruction `inst`. +/// +/// The method will create and insert an instruction, then return the result values, or the +/// instruction reference itself for instructions that don't have results. +fn gen_inst_builder(inst: &Instruction, format: &InstructionFormat, fmt: &mut Formatter) { + // Construct method arguments. + let mut args = vec![if format.has_value_list { + "mut self" + } else { + "self" + } + .to_string()]; + + let mut args_doc = Vec::new(); + let mut rets_doc = Vec::new(); + + // The controlling type variable will be inferred from the input values if + // possible. Otherwise, it is the first method argument. + if let Some(poly) = &inst.polymorphic_info { + if !poly.use_typevar_operand { + args.push(format!("{}: crate::ir::Type", poly.ctrl_typevar.name)); + args_doc.push(format!( + "- {} (controlling type variable): {}", + poly.ctrl_typevar.name, poly.ctrl_typevar.doc + )); + } + } + + let mut tmpl_types = Vec::new(); + let mut into_args = Vec::new(); + for op in &inst.operands_in { + let t = if op.is_immediate() { + let t = format!("T{}", tmpl_types.len() + 1); + tmpl_types.push(format!("{}: Into<{}>", t, op.kind.rust_type)); + into_args.push(op.name); + t + } else { + op.kind.rust_type.to_string() + }; + args.push(format!("{}: {}", op.name, t)); + args_doc.push(format!( + "- {}: {}", + op.name, + op.doc() + .expect("every instruction's input operand must be documented") + )); + } + + for op in &inst.operands_out { + rets_doc.push(format!( + "- {}: {}", + op.name, + op.doc() + .expect("every instruction's output operand must be documented") + )); + } + + let rtype = match inst.value_results.len() { + 0 => "Inst".into(), + 1 => "Value".into(), + _ => format!("({})", vec!["Value"; inst.value_results.len()].join(", ")), + }; + + let tmpl = if !tmpl_types.is_empty() { + format!("<{}>", tmpl_types.join(", ")) + } else { + "".into() + }; + + let proto = format!( + "{}{}({}) -> {}", + inst.snake_name(), + tmpl, + args.join(", "), + rtype + ); + + fmt.doc_comment(&inst.doc); + if !args_doc.is_empty() { + fmt.line("///"); + fmt.doc_comment("Inputs:"); + fmt.line("///"); + for doc_line in args_doc { + fmt.doc_comment(doc_line); + } + } + if !rets_doc.is_empty() { + fmt.line("///"); + fmt.doc_comment("Outputs:"); + fmt.line("///"); + for doc_line in rets_doc { + fmt.doc_comment(doc_line); + } + } + + fmt.line("#[allow(non_snake_case)]"); + fmtln!(fmt, "fn {} {{", proto); + fmt.indent(|fmt| { + // Convert all of the `Into<>` arguments. + for arg in &into_args { + fmtln!(fmt, "let {} = {}.into();", arg, arg); + } + + // Arguments for instruction constructor. + let first_arg = format!("Opcode::{}", inst.camel_name); + let mut args = vec![first_arg.as_str()]; + if let Some(poly) = &inst.polymorphic_info { + if poly.use_typevar_operand { + // Infer the controlling type variable from the input operands. + let op_num = inst.value_opnums[format.typevar_operand.unwrap()]; + fmtln!( + fmt, + "let ctrl_typevar = self.data_flow_graph().value_type({});", + inst.operands_in[op_num].name + ); + + // The format constructor will resolve the result types from the type var. + args.push("ctrl_typevar"); + } else { + // This was an explicit method argument. + args.push(&poly.ctrl_typevar.name); + } + } else { + // No controlling type variable needed. + args.push("types::INVALID"); + } + + // Now add all of the immediate operands to the constructor arguments. + for &op_num in &inst.imm_opnums { + args.push(inst.operands_in[op_num].name); + } + + // Finally, the value operands. + if format.has_value_list { + // We need to build a value list with all the arguments. + fmt.line("let mut vlist = ir::ValueList::default();"); + args.push("vlist"); + fmt.line("{"); + fmt.indent(|fmt| { + fmt.line("let pool = &mut self.data_flow_graph_mut().value_lists;"); + for op in &inst.operands_in { + if op.is_value() { + fmtln!(fmt, "vlist.push({}, pool);", op.name); + } else if op.is_varargs() { + fmtln!(fmt, "vlist.extend({}.iter().cloned(), pool);", op.name); + } + } + }); + fmt.line("}"); + } else { + // With no value list, we're guaranteed to just have a set of fixed value operands. + for &op_num in &inst.value_opnums { + args.push(inst.operands_in[op_num].name); + } + } + + // Call to the format constructor, + let fcall = format!("self.{}({})", format.name, args.join(", ")); + + if inst.value_results.is_empty() { + fmtln!(fmt, "{}.0", fcall); + return; + } + + fmtln!(fmt, "let (inst, dfg) = {};", fcall); + if inst.value_results.len() == 1 { + fmt.line("dfg.first_result(inst)"); + } else { + fmtln!( + fmt, + "let results = &dfg.inst_results(inst)[0..{}];", + inst.value_results.len() + ); + fmtln!( + fmt, + "({})", + inst.value_results + .iter() + .enumerate() + .map(|(i, _)| format!("results[{}]", i)) + .collect::<Vec<_>>() + .join(", ") + ); + } + }); + fmtln!(fmt, "}") +} + +/// Generate a Builder trait with methods for all instructions. +fn gen_builder( + instructions: &AllInstructions, + formats: &[&InstructionFormat], + fmt: &mut Formatter, +) { + fmt.doc_comment( + r#" + Convenience methods for building instructions. + + The `InstBuilder` trait has one method per instruction opcode for + conveniently constructing the instruction with minimum arguments. + Polymorphic instructions infer their result types from the input + arguments when possible. In some cases, an explicit `ctrl_typevar` + argument is required. + + The opcode methods return the new instruction's result values, or + the `Inst` itself for instructions that don't have any results. + + There is also a method per instruction format. These methods all + return an `Inst`. + "#, + ); + fmt.line("pub trait InstBuilder<'f>: InstBuilderBase<'f> {"); + fmt.indent(|fmt| { + for inst in instructions.values() { + gen_inst_builder(inst, &*inst.format, fmt); + fmt.empty_line(); + } + for (i, format) in formats.iter().enumerate() { + gen_format_constructor(format, fmt); + if i + 1 != formats.len() { + fmt.empty_line(); + } + } + }); + fmt.line("}"); +} + +pub(crate) fn generate( + formats: Vec<&InstructionFormat>, + all_inst: &AllInstructions, + opcode_filename: &str, + inst_builder_filename: &str, + out_dir: &str, +) -> Result<(), error::Error> { + // Opcodes. + let mut fmt = Formatter::new(); + gen_formats(&formats, &mut fmt); + gen_instruction_data(&formats, &mut fmt); + fmt.empty_line(); + gen_instruction_data_impl(&formats, &mut fmt); + fmt.empty_line(); + gen_opcodes(all_inst, &mut fmt); + fmt.empty_line(); + gen_type_constraints(all_inst, &mut fmt); + fmt.empty_line(); + gen_try_from(all_inst, &mut fmt); + fmt.update_file(opcode_filename, out_dir)?; + + // Instruction builder. + let mut fmt = Formatter::new(); + gen_builder(all_inst, &formats, &mut fmt); + fmt.update_file(inst_builder_filename, out_dir)?; + + Ok(()) +} diff --git a/third_party/rust/cranelift-codegen-meta/src/gen_legalizer.rs b/third_party/rust/cranelift-codegen-meta/src/gen_legalizer.rs new file mode 100644 index 0000000000..7b56b8db48 --- /dev/null +++ b/third_party/rust/cranelift-codegen-meta/src/gen_legalizer.rs @@ -0,0 +1,734 @@ +//! Generate transformations to legalize instructions without encodings. +use crate::cdsl::ast::{Def, DefPool, Expr, VarPool}; +use crate::cdsl::isa::TargetIsa; +use crate::cdsl::operands::Operand; +use crate::cdsl::type_inference::Constraint; +use crate::cdsl::typevar::{TypeSet, TypeVar}; +use crate::cdsl::xform::{Transform, TransformGroup, TransformGroups}; + +use crate::error; +use crate::gen_inst::gen_typesets_table; +use crate::srcgen::Formatter; +use crate::unique_table::UniqueTable; + +use std::collections::{HashMap, HashSet}; +use std::iter::FromIterator; + +/// Given a `Def` node, emit code that extracts all the instruction fields from +/// `pos.func.dfg[iref]`. +/// +/// Create local variables named after the `Var` instances in `node`. +/// +/// Also create a local variable named `predicate` with the value of the evaluated instruction +/// predicate, or `true` if the node has no predicate. +fn unwrap_inst(transform: &Transform, fmt: &mut Formatter) -> bool { + let var_pool = &transform.var_pool; + let def_pool = &transform.def_pool; + + let def = def_pool.get(transform.src); + let apply = &def.apply; + let inst = &apply.inst; + let iform = &inst.format; + + fmt.comment(format!( + "Unwrap fields from instruction format {}", + def.to_comment_string(&transform.var_pool) + )); + + // Extract the Var arguments. + let arg_names = apply + .args + .iter() + .enumerate() + .filter(|(arg_num, _)| { + // Variable args are specially handled after extracting args. + !inst.operands_in[*arg_num].is_varargs() + }) + .map(|(arg_num, arg)| match &arg { + Expr::Var(var_index) => var_pool.get(*var_index).name.as_ref(), + Expr::Literal(_) => { + let n = inst.imm_opnums.iter().position(|&i| i == arg_num).unwrap(); + iform.imm_fields[n].member + } + }) + .collect::<Vec<_>>() + .join(", "); + + // May we need "args" in the values consumed by predicates? + let emit_args = iform.num_value_operands >= 1 || iform.has_value_list; + + // We need a tuple: + // - if there's at least one value operand, then we emit a variable for the value, and the + // value list as args. + // - otherwise, if there's the count of immediate operands added to the presence of a value list exceeds one. + let need_tuple = if iform.num_value_operands >= 1 { + true + } else { + let mut imm_and_varargs = inst + .operands_in + .iter() + .filter(|op| op.is_immediate_or_entityref()) + .count(); + if iform.has_value_list { + imm_and_varargs += 1; + } + imm_and_varargs > 1 + }; + + let maybe_args = if emit_args { ", args" } else { "" }; + let defined_values = format!("{}{}", arg_names, maybe_args); + + let tuple_or_value = if need_tuple { + format!("({})", defined_values) + } else { + defined_values + }; + + fmtln!( + fmt, + "let {} = if let ir::InstructionData::{} {{", + tuple_or_value, + iform.name + ); + + fmt.indent(|fmt| { + // Fields are encoded directly. + for field in &iform.imm_fields { + fmtln!(fmt, "{},", field.member); + } + + if iform.has_value_list || iform.num_value_operands > 1 { + fmt.line("ref args,"); + } else if iform.num_value_operands == 1 { + fmt.line("arg,"); + } + + fmt.line(".."); + fmt.outdented_line("} = pos.func.dfg[inst] {"); + + if iform.has_value_list { + fmt.line("let args = args.as_slice(&pos.func.dfg.value_lists);"); + } else if iform.num_value_operands == 1 { + fmt.line("let args = [arg];") + } + + // Generate the values for the tuple. + let emit_one_value = + |fmt: &mut Formatter, needs_comma: bool, op_num: usize, op: &Operand| { + let comma = if needs_comma { "," } else { "" }; + if op.is_immediate_or_entityref() { + let n = inst.imm_opnums.iter().position(|&i| i == op_num).unwrap(); + fmtln!(fmt, "{}{}", iform.imm_fields[n].member, comma); + } else if op.is_value() { + let n = inst.value_opnums.iter().position(|&i| i == op_num).unwrap(); + fmtln!(fmt, "pos.func.dfg.resolve_aliases(args[{}]),", n); + } else { + // This is a value list argument or a varargs. + assert!(iform.has_value_list || op.is_varargs()); + } + }; + + if need_tuple { + fmt.line("("); + fmt.indent(|fmt| { + for (op_num, op) in inst.operands_in.iter().enumerate() { + let needs_comma = emit_args || op_num + 1 < inst.operands_in.len(); + emit_one_value(fmt, needs_comma, op_num, op); + } + if emit_args { + fmt.line("args"); + } + }); + fmt.line(")"); + } else { + // Only one of these can be true at the same time, otherwise we'd need a tuple. + emit_one_value(fmt, false, 0, &inst.operands_in[0]); + if emit_args { + fmt.line("args"); + } + } + + fmt.outdented_line("} else {"); + fmt.line(r#"unreachable!("bad instruction format")"#); + }); + fmtln!(fmt, "};"); + fmt.empty_line(); + + assert_eq!(inst.operands_in.len(), apply.args.len()); + for (i, op) in inst.operands_in.iter().enumerate() { + if op.is_varargs() { + let name = &var_pool + .get(apply.args[i].maybe_var().expect("vararg without name")) + .name; + let n = inst + .imm_opnums + .iter() + .chain(inst.value_opnums.iter()) + .max() + .copied() + .unwrap_or(0); + fmtln!(fmt, "let {} = &Vec::from(&args[{}..]);", name, n); + } + } + + for &op_num in &inst.value_opnums { + let arg = &apply.args[op_num]; + if let Some(var_index) = arg.maybe_var() { + let var = var_pool.get(var_index); + if var.has_free_typevar() { + fmtln!( + fmt, + "let typeof_{} = pos.func.dfg.value_type({});", + var.name, + var.name + ); + } + } + } + + // If the definition creates results, detach the values and place them in locals. + let mut replace_inst = false; + if !def.defined_vars.is_empty() { + if def.defined_vars + == def_pool + .get(var_pool.get(def.defined_vars[0]).dst_def.unwrap()) + .defined_vars + { + // Special case: The instruction replacing node defines the exact same values. + fmt.comment(format!( + "Results handled by {}.", + def_pool + .get(var_pool.get(def.defined_vars[0]).dst_def.unwrap()) + .to_comment_string(var_pool) + )); + + fmt.line("let r = pos.func.dfg.inst_results(inst);"); + for (i, &var_index) in def.defined_vars.iter().enumerate() { + let var = var_pool.get(var_index); + fmtln!(fmt, "let {} = &r[{}];", var.name, i); + fmtln!( + fmt, + "let typeof_{} = pos.func.dfg.value_type(*{});", + var.name, + var.name + ); + } + + replace_inst = true; + } else { + // Boring case: Detach the result values, capture them in locals. + for &var_index in &def.defined_vars { + fmtln!(fmt, "let {};", var_pool.get(var_index).name); + } + + fmt.line("{"); + fmt.indent(|fmt| { + fmt.line("let r = pos.func.dfg.inst_results(inst);"); + for i in 0..def.defined_vars.len() { + let var = var_pool.get(def.defined_vars[i]); + fmtln!(fmt, "{} = r[{}];", var.name, i); + } + }); + fmt.line("}"); + + for &var_index in &def.defined_vars { + let var = var_pool.get(var_index); + if var.has_free_typevar() { + fmtln!( + fmt, + "let typeof_{} = pos.func.dfg.value_type({});", + var.name, + var.name + ); + } + } + } + } + replace_inst +} + +fn build_derived_expr(tv: &TypeVar) -> String { + let base = match &tv.base { + Some(base) => base, + None => { + assert!(tv.name.starts_with("typeof_")); + return format!("Some({})", tv.name); + } + }; + let base_expr = build_derived_expr(&base.type_var); + format!( + "{}.map(|t: crate::ir::Type| t.{}())", + base_expr, + base.derived_func.name() + ) +} + +/// Emit rust code for the given check. +/// +/// The emitted code is a statement redefining the `predicate` variable like this: +/// let predicate = predicate && ... +fn emit_runtime_typecheck<'a>( + constraint: &'a Constraint, + type_sets: &mut UniqueTable<'a, TypeSet>, + fmt: &mut Formatter, +) { + match constraint { + Constraint::InTypeset(tv, ts) => { + let ts_index = type_sets.add(&ts); + fmt.comment(format!( + "{} must belong to {:?}", + tv.name, + type_sets.get(ts_index) + )); + fmtln!( + fmt, + "let predicate = predicate && TYPE_SETS[{}].contains({});", + ts_index, + tv.name + ); + } + Constraint::Eq(tv1, tv2) => { + fmtln!( + fmt, + "let predicate = predicate && match ({}, {}) {{", + build_derived_expr(tv1), + build_derived_expr(tv2) + ); + fmt.indent(|fmt| { + fmt.line("(Some(a), Some(b)) => a == b,"); + fmt.comment("On overflow, constraint doesn\'t apply"); + fmt.line("_ => false,"); + }); + fmtln!(fmt, "};"); + } + Constraint::WiderOrEq(tv1, tv2) => { + fmtln!( + fmt, + "let predicate = predicate && match ({}, {}) {{", + build_derived_expr(tv1), + build_derived_expr(tv2) + ); + fmt.indent(|fmt| { + fmt.line("(Some(a), Some(b)) => a.wider_or_equal(b),"); + fmt.comment("On overflow, constraint doesn\'t apply"); + fmt.line("_ => false,"); + }); + fmtln!(fmt, "};"); + } + } +} + +/// Determine if `node` represents one of the value splitting instructions: `isplit` or `vsplit. +/// These instructions are lowered specially by the `legalize::split` module. +fn is_value_split(def: &Def) -> bool { + let name = &def.apply.inst.name; + name == "isplit" || name == "vsplit" +} + +fn emit_dst_inst(def: &Def, def_pool: &DefPool, var_pool: &VarPool, fmt: &mut Formatter) { + let defined_vars = { + let vars = def + .defined_vars + .iter() + .map(|&var_index| var_pool.get(var_index).name.as_ref()) + .collect::<Vec<&str>>(); + if vars.len() == 1 { + vars[0].to_string() + } else { + format!("({})", vars.join(", ")) + } + }; + + if is_value_split(def) { + // Split instructions are not emitted with the builder, but by calling special functions in + // the `legalizer::split` module. These functions will eliminate concat-split patterns. + fmt.line("let curpos = pos.position();"); + fmt.line("let srcloc = pos.srcloc();"); + fmtln!( + fmt, + "let {} = split::{}(pos.func, cfg, curpos, srcloc, {});", + defined_vars, + def.apply.inst.snake_name(), + def.apply.args[0].to_rust_code(var_pool) + ); + return; + } + + if def.defined_vars.is_empty() { + // This node doesn't define any values, so just insert the new instruction. + fmtln!( + fmt, + "pos.ins().{};", + def.apply.rust_builder(&def.defined_vars, var_pool) + ); + return; + } + + if let Some(src_def0) = var_pool.get(def.defined_vars[0]).src_def { + if def.defined_vars == def_pool.get(src_def0).defined_vars { + // The replacement instruction defines the exact same values as the source pattern. + // Unwrapping would have left the results intact. Replace the whole instruction. + fmtln!( + fmt, + "let {} = pos.func.dfg.replace(inst).{};", + defined_vars, + def.apply.rust_builder(&def.defined_vars, var_pool) + ); + + // We need to bump the cursor so following instructions are inserted *after* the + // replaced instruction. + fmt.line("if pos.current_inst() == Some(inst) {"); + fmt.indent(|fmt| { + fmt.line("pos.next_inst();"); + }); + fmt.line("}"); + return; + } + } + + // Insert a new instruction. + let mut builder = format!("let {} = pos.ins()", defined_vars); + + if def.defined_vars.len() == 1 && var_pool.get(def.defined_vars[0]).is_output() { + // Reuse the single source result value. + builder = format!( + "{}.with_result({})", + builder, + var_pool.get(def.defined_vars[0]).to_rust_code() + ); + } else if def + .defined_vars + .iter() + .any(|&var_index| var_pool.get(var_index).is_output()) + { + // There are more than one output values that can be reused. + let array = def + .defined_vars + .iter() + .map(|&var_index| { + let var = var_pool.get(var_index); + if var.is_output() { + format!("Some({})", var.name) + } else { + "None".into() + } + }) + .collect::<Vec<_>>() + .join(", "); + builder = format!("{}.with_results([{}])", builder, array); + } + + fmtln!( + fmt, + "{}.{};", + builder, + def.apply.rust_builder(&def.defined_vars, var_pool) + ); +} + +/// Emit code for `transform`, assuming that the opcode of transform's root instruction +/// has already been matched. +/// +/// `inst: Inst` is the variable to be replaced. It is pointed to by `pos: Cursor`. +/// `dfg: DataFlowGraph` is available and mutable. +fn gen_transform<'a>( + replace_inst: bool, + transform: &'a Transform, + type_sets: &mut UniqueTable<'a, TypeSet>, + fmt: &mut Formatter, +) { + // Evaluate the instruction predicate if any. + let apply = &transform.def_pool.get(transform.src).apply; + + let inst_predicate = apply + .inst_predicate_with_ctrl_typevar(&transform.var_pool) + .rust_predicate("pos.func"); + + let has_extra_constraints = !transform.type_env.constraints.is_empty(); + if has_extra_constraints { + // Extra constraints rely on the predicate being a variable that we can rebind as we add + // more constraint predicates. + if let Some(pred) = &inst_predicate { + fmt.multi_line(&format!("let predicate = {};", pred)); + } else { + fmt.line("let predicate = true;"); + } + } + + // Emit any runtime checks; these will rebind `predicate` emitted right above. + for constraint in &transform.type_env.constraints { + emit_runtime_typecheck(constraint, type_sets, fmt); + } + + let do_expand = |fmt: &mut Formatter| { + // Emit any constants that must be created before use. + for (name, value) in transform.const_pool.iter() { + fmtln!( + fmt, + "let {} = pos.func.dfg.constants.insert(vec!{:?}.into());", + name, + value + ); + } + + // If we are adding some blocks, we need to recall the original block, such that we can + // recompute it. + if !transform.block_pool.is_empty() { + fmt.line("let orig_block = pos.current_block().unwrap();"); + } + + // If we're going to delete `inst`, we need to detach its results first so they can be + // reattached during pattern expansion. + if !replace_inst { + fmt.line("pos.func.dfg.clear_results(inst);"); + } + + // Emit new block creation. + for block in &transform.block_pool { + let var = transform.var_pool.get(block.name); + fmtln!(fmt, "let {} = pos.func.dfg.make_block();", var.name); + } + + // Emit the destination pattern. + for &def_index in &transform.dst { + if let Some(block) = transform.block_pool.get(def_index) { + let var = transform.var_pool.get(block.name); + fmtln!(fmt, "pos.insert_block({});", var.name); + } + emit_dst_inst( + transform.def_pool.get(def_index), + &transform.def_pool, + &transform.var_pool, + fmt, + ); + } + + // Insert a new block after the last instruction, if needed. + let def_next_index = transform.def_pool.next_index(); + if let Some(block) = transform.block_pool.get(def_next_index) { + let var = transform.var_pool.get(block.name); + fmtln!(fmt, "pos.insert_block({});", var.name); + } + + // Delete the original instruction if we didn't have an opportunity to replace it. + if !replace_inst { + fmt.line("let removed = pos.remove_inst();"); + fmt.line("debug_assert_eq!(removed, inst);"); + } + + if transform.block_pool.is_empty() { + if transform.def_pool.get(transform.src).apply.inst.is_branch { + // A branch might have been legalized into multiple branches, so we need to recompute + // the cfg. + fmt.line("cfg.recompute_block(pos.func, pos.current_block().unwrap());"); + } + } else { + // Update CFG for the new blocks. + fmt.line("cfg.recompute_block(pos.func, orig_block);"); + for block in &transform.block_pool { + let var = transform.var_pool.get(block.name); + fmtln!(fmt, "cfg.recompute_block(pos.func, {});", var.name); + } + } + + fmt.line("return true;"); + }; + + // Guard the actual expansion by `predicate`. + if has_extra_constraints { + fmt.line("if predicate {"); + fmt.indent(|fmt| { + do_expand(fmt); + }); + fmt.line("}"); + } else if let Some(pred) = &inst_predicate { + fmt.multi_line(&format!("if {} {{", pred)); + fmt.indent(|fmt| { + do_expand(fmt); + }); + fmt.line("}"); + } else { + // Unconditional transform (there was no predicate), just emit it. + do_expand(fmt); + } +} + +fn gen_transform_group<'a>( + group: &'a TransformGroup, + transform_groups: &TransformGroups, + type_sets: &mut UniqueTable<'a, TypeSet>, + fmt: &mut Formatter, +) { + fmt.doc_comment(group.doc); + fmt.line("#[allow(unused_variables,unused_assignments,unused_imports,non_snake_case)]"); + + // Function arguments. + fmtln!(fmt, "pub fn {}(", group.name); + fmt.indent(|fmt| { + fmt.line("inst: crate::ir::Inst,"); + fmt.line("func: &mut crate::ir::Function,"); + fmt.line("cfg: &mut crate::flowgraph::ControlFlowGraph,"); + fmt.line("isa: &dyn crate::isa::TargetIsa,"); + }); + fmtln!(fmt, ") -> bool {"); + + // Function body. + fmt.indent(|fmt| { + fmt.line("use crate::ir::InstBuilder;"); + fmt.line("use crate::cursor::{Cursor, FuncCursor};"); + fmt.line("let mut pos = FuncCursor::new(func).at_inst(inst);"); + fmt.line("pos.use_srcloc(inst);"); + + // Group the transforms by opcode so we can generate a big switch. + // Preserve ordering. + let mut inst_to_transforms = HashMap::new(); + for transform in &group.transforms { + let def_index = transform.src; + let inst = &transform.def_pool.get(def_index).apply.inst; + inst_to_transforms + .entry(inst.camel_name.clone()) + .or_insert_with(Vec::new) + .push(transform); + } + + let mut sorted_inst_names = Vec::from_iter(inst_to_transforms.keys()); + sorted_inst_names.sort(); + + fmt.line("{"); + fmt.indent(|fmt| { + fmt.line("match pos.func.dfg[inst].opcode() {"); + fmt.indent(|fmt| { + for camel_name in sorted_inst_names { + fmtln!(fmt, "ir::Opcode::{} => {{", camel_name); + fmt.indent(|fmt| { + let transforms = inst_to_transforms.get(camel_name).unwrap(); + + // Unwrap the source instruction, create local variables for the input variables. + let replace_inst = unwrap_inst(&transforms[0], fmt); + fmt.empty_line(); + + for (i, transform) in transforms.iter().enumerate() { + if i > 0 { + fmt.empty_line(); + } + gen_transform(replace_inst, transform, type_sets, fmt); + } + }); + fmtln!(fmt, "}"); + fmt.empty_line(); + } + + // Emit the custom transforms. The Rust compiler will complain about any overlap with + // the normal transforms. + let mut sorted_custom_legalizes = Vec::from_iter(&group.custom_legalizes); + sorted_custom_legalizes.sort(); + for (inst_camel_name, func_name) in sorted_custom_legalizes { + fmtln!(fmt, "ir::Opcode::{} => {{", inst_camel_name); + fmt.indent(|fmt| { + fmtln!(fmt, "{}(inst, func, cfg, isa);", func_name); + fmt.line("return true;"); + }); + fmtln!(fmt, "}"); + fmt.empty_line(); + } + + // We'll assume there are uncovered opcodes. + fmt.line("_ => {},"); + }); + fmt.line("}"); + }); + fmt.line("}"); + + // If we fall through, nothing was expanded; call the chain if any. + match &group.chain_with { + Some(group_id) => fmtln!( + fmt, + "{}(inst, func, cfg, isa)", + transform_groups.get(*group_id).rust_name() + ), + None => fmt.line("false"), + }; + }); + fmtln!(fmt, "}"); + fmt.empty_line(); +} + +/// Generate legalization functions for `isa` and add any shared `TransformGroup`s +/// encountered to `shared_groups`. +/// +/// Generate `TYPE_SETS` and `LEGALIZE_ACTIONS` tables. +fn gen_isa( + isa: &TargetIsa, + transform_groups: &TransformGroups, + shared_group_names: &mut HashSet<&'static str>, + fmt: &mut Formatter, +) { + let mut type_sets = UniqueTable::new(); + for group_index in isa.transitive_transform_groups(transform_groups) { + let group = transform_groups.get(group_index); + match group.isa_name { + Some(isa_name) => { + assert!( + isa_name == isa.name, + "ISA-specific legalizations must be used by the same ISA" + ); + gen_transform_group(group, transform_groups, &mut type_sets, fmt); + } + None => { + shared_group_names.insert(group.name); + } + } + } + + gen_typesets_table(&type_sets, fmt); + + let direct_groups = isa.direct_transform_groups(); + fmtln!( + fmt, + "pub static LEGALIZE_ACTIONS: [isa::Legalize; {}] = [", + direct_groups.len() + ); + fmt.indent(|fmt| { + for &group_index in direct_groups { + fmtln!(fmt, "{},", transform_groups.get(group_index).rust_name()); + } + }); + fmtln!(fmt, "];"); +} + +/// Generate the legalizer files. +pub(crate) fn generate( + isas: &[TargetIsa], + transform_groups: &TransformGroups, + extra_legalization_groups: &[&'static str], + filename_prefix: &str, + out_dir: &str, +) -> Result<(), error::Error> { + let mut shared_group_names = HashSet::new(); + + for isa in isas { + let mut fmt = Formatter::new(); + gen_isa(isa, transform_groups, &mut shared_group_names, &mut fmt); + fmt.update_file(format!("{}-{}.rs", filename_prefix, isa.name), out_dir)?; + } + + // Add extra legalization groups that were explicitly requested. + for group in extra_legalization_groups { + shared_group_names.insert(group); + } + + // Generate shared legalize groups. + let mut fmt = Formatter::new(); + // Generate shared legalize groups. + let mut type_sets = UniqueTable::new(); + let mut sorted_shared_group_names = Vec::from_iter(shared_group_names); + sorted_shared_group_names.sort(); + for group_name in &sorted_shared_group_names { + let group = transform_groups.by_name(group_name); + gen_transform_group(group, transform_groups, &mut type_sets, &mut fmt); + } + gen_typesets_table(&type_sets, &mut fmt); + fmt.update_file(format!("{}r.rs", filename_prefix), out_dir)?; + + Ok(()) +} diff --git a/third_party/rust/cranelift-codegen-meta/src/gen_registers.rs b/third_party/rust/cranelift-codegen-meta/src/gen_registers.rs new file mode 100644 index 0000000000..bd5ac95ae0 --- /dev/null +++ b/third_party/rust/cranelift-codegen-meta/src/gen_registers.rs @@ -0,0 +1,148 @@ +//! Generate the ISA-specific registers. +use crate::cdsl::isa::TargetIsa; +use crate::cdsl::regs::{RegBank, RegClass}; +use crate::error; +use crate::srcgen::Formatter; +use cranelift_entity::EntityRef; + +fn gen_regbank(fmt: &mut Formatter, reg_bank: &RegBank) { + let names = if !reg_bank.names.is_empty() { + format!(r#""{}""#, reg_bank.names.join(r#"", ""#)) + } else { + "".to_string() + }; + fmtln!(fmt, "RegBank {"); + fmt.indent(|fmt| { + fmtln!(fmt, r#"name: "{}","#, reg_bank.name); + fmtln!(fmt, "first_unit: {},", reg_bank.first_unit); + fmtln!(fmt, "units: {},", reg_bank.units); + fmtln!(fmt, "names: &[{}],", names); + fmtln!(fmt, r#"prefix: "{}","#, reg_bank.prefix); + fmtln!(fmt, "first_toprc: {},", reg_bank.toprcs[0].index()); + fmtln!(fmt, "num_toprcs: {},", reg_bank.toprcs.len()); + fmtln!( + fmt, + "pressure_tracking: {},", + if reg_bank.pressure_tracking { + "true" + } else { + "false" + } + ); + }); + fmtln!(fmt, "},"); +} + +fn gen_regclass(isa: &TargetIsa, reg_class: &RegClass, fmt: &mut Formatter) { + let reg_bank = isa.regs.banks.get(reg_class.bank).unwrap(); + + let mask: Vec<String> = reg_class + .mask(reg_bank.first_unit) + .iter() + .map(|x| format!("0x{:08x}", x)) + .collect(); + let mask = mask.join(", "); + + fmtln!( + fmt, + "pub static {}_DATA: RegClassData = RegClassData {{", + reg_class.name + ); + fmt.indent(|fmt| { + fmtln!(fmt, r#"name: "{}","#, reg_class.name); + fmtln!(fmt, "index: {},", reg_class.index.index()); + fmtln!(fmt, "width: {},", reg_class.width); + fmtln!(fmt, "bank: {},", reg_class.bank.index()); + fmtln!(fmt, "toprc: {},", reg_class.toprc.index()); + fmtln!(fmt, "first: {},", reg_bank.first_unit + reg_class.start); + fmtln!(fmt, "subclasses: {:#x},", reg_class.subclass_mask()); + fmtln!(fmt, "mask: [{}],", mask); + fmtln!( + fmt, + "pinned_reg: {:?},", + reg_bank + .pinned_reg + .map(|index| index + reg_bank.first_unit as u16 + reg_class.start as u16) + ); + fmtln!(fmt, "info: &INFO,"); + }); + fmtln!(fmt, "};"); + + fmtln!(fmt, "#[allow(dead_code)]"); + fmtln!( + fmt, + "pub static {}: RegClass = &{}_DATA;", + reg_class.name, + reg_class.name + ); +} + +fn gen_regbank_units(reg_bank: &RegBank, fmt: &mut Formatter) { + for unit in 0..reg_bank.units { + let v = unit + reg_bank.first_unit; + if (unit as usize) < reg_bank.names.len() { + fmtln!(fmt, "{} = {},", reg_bank.names[unit as usize], v); + continue; + } + fmtln!(fmt, "{}{} = {},", reg_bank.prefix, unit, v); + } +} + +fn gen_isa(isa: &TargetIsa, fmt: &mut Formatter) { + // Emit RegInfo. + fmtln!(fmt, "pub static INFO: RegInfo = RegInfo {"); + + fmt.indent(|fmt| { + fmtln!(fmt, "banks: &["); + // Bank descriptors. + fmt.indent(|fmt| { + for reg_bank in isa.regs.banks.values() { + gen_regbank(fmt, ®_bank); + } + }); + fmtln!(fmt, "],"); + // References to register classes. + fmtln!(fmt, "classes: &["); + fmt.indent(|fmt| { + for reg_class in isa.regs.classes.values() { + fmtln!(fmt, "&{}_DATA,", reg_class.name); + } + }); + fmtln!(fmt, "],"); + }); + fmtln!(fmt, "};"); + + // Register class descriptors. + for rc in isa.regs.classes.values() { + gen_regclass(&isa, rc, fmt); + } + + // Emit constants for all the register units. + fmtln!(fmt, "#[allow(dead_code, non_camel_case_types)]"); + fmtln!(fmt, "#[derive(Clone, Copy)]"); + fmtln!(fmt, "pub enum RU {"); + fmt.indent(|fmt| { + for reg_bank in isa.regs.banks.values() { + gen_regbank_units(reg_bank, fmt); + } + }); + fmtln!(fmt, "}"); + + // Emit Into conversion for the RU class. + fmtln!(fmt, "impl Into<RegUnit> for RU {"); + fmt.indent(|fmt| { + fmtln!(fmt, "fn into(self) -> RegUnit {"); + fmt.indent(|fmt| { + fmtln!(fmt, "self as RegUnit"); + }); + fmtln!(fmt, "}"); + }); + fmtln!(fmt, "}"); +} + +pub(crate) fn generate(isa: &TargetIsa, filename: &str, out_dir: &str) -> Result<(), error::Error> { + let mut fmt = Formatter::new(); + gen_isa(&isa, &mut fmt); + fmt.update_file(filename, out_dir)?; + Ok(()) +} diff --git a/third_party/rust/cranelift-codegen-meta/src/gen_settings.rs b/third_party/rust/cranelift-codegen-meta/src/gen_settings.rs new file mode 100644 index 0000000000..2ed5941b80 --- /dev/null +++ b/third_party/rust/cranelift-codegen-meta/src/gen_settings.rs @@ -0,0 +1,447 @@ +//! Generate the ISA-specific settings. +use std::collections::HashMap; + +use cranelift_codegen_shared::constant_hash::{generate_table, simple_hash}; + +use crate::cdsl::camel_case; +use crate::cdsl::settings::{ + BoolSetting, Predicate, Preset, Setting, SettingGroup, SpecificSetting, +}; +use crate::error; +use crate::srcgen::{Formatter, Match}; +use crate::unique_table::UniqueSeqTable; + +pub(crate) enum ParentGroup { + None, + Shared, +} + +/// Emits the constructor of the Flags structure. +fn gen_constructor(group: &SettingGroup, parent: ParentGroup, fmt: &mut Formatter) { + let args = match parent { + ParentGroup::None => "builder: Builder", + ParentGroup::Shared => "shared: &settings::Flags, builder: Builder", + }; + fmtln!(fmt, "impl Flags {"); + fmt.indent(|fmt| { + fmt.doc_comment(format!("Create flags {} settings group.", group.name)); + fmtln!(fmt, "#[allow(unused_variables)]"); + fmtln!(fmt, "pub fn new({}) -> Self {{", args); + fmt.indent(|fmt| { + fmtln!(fmt, "let bvec = builder.state_for(\"{}\");", group.name); + fmtln!( + fmt, + "let mut {} = Self {{ bytes: [0; {}] }};", + group.name, + group.byte_size() + ); + fmtln!( + fmt, + "debug_assert_eq!(bvec.len(), {});", + group.settings_size + ); + fmtln!( + fmt, + "{}.bytes[0..{}].copy_from_slice(&bvec);", + group.name, + group.settings_size + ); + + // Now compute the predicates. + for p in &group.predicates { + fmt.comment(format!("Precompute #{}.", p.number)); + fmtln!(fmt, "if {} {{", p.render(group)); + fmt.indent(|fmt| { + fmtln!( + fmt, + "{}.bytes[{}] |= 1 << {};", + group.name, + group.bool_start_byte_offset + p.number / 8, + p.number % 8 + ); + }); + fmtln!(fmt, "}"); + } + + fmtln!(fmt, group.name); + }); + fmtln!(fmt, "}"); + }); + fmtln!(fmt, "}"); +} + +/// Emit Display and FromStr implementations for enum settings. +fn gen_to_and_from_str(name: &str, values: &[&'static str], fmt: &mut Formatter) { + fmtln!(fmt, "impl fmt::Display for {} {{", name); + fmt.indent(|fmt| { + fmtln!( + fmt, + "fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {" + ); + fmt.indent(|fmt| { + fmtln!(fmt, "f.write_str(match *self {"); + fmt.indent(|fmt| { + for v in values.iter() { + fmtln!(fmt, "Self::{} => \"{}\",", camel_case(v), v); + } + }); + fmtln!(fmt, "})"); + }); + fmtln!(fmt, "}"); + }); + fmtln!(fmt, "}"); + + fmtln!(fmt, "impl str::FromStr for {} {{", name); + fmt.indent(|fmt| { + fmtln!(fmt, "type Err = ();"); + fmtln!(fmt, "fn from_str(s: &str) -> Result<Self, Self::Err> {"); + fmt.indent(|fmt| { + fmtln!(fmt, "match s {"); + fmt.indent(|fmt| { + for v in values.iter() { + fmtln!(fmt, "\"{}\" => Ok(Self::{}),", v, camel_case(v)); + } + fmtln!(fmt, "_ => Err(()),"); + }); + fmtln!(fmt, "}"); + }); + fmtln!(fmt, "}"); + }); + fmtln!(fmt, "}"); +} + +/// Emit real enum for the Enum settings. +fn gen_enum_types(group: &SettingGroup, fmt: &mut Formatter) { + for setting in group.settings.iter() { + let values = match setting.specific { + SpecificSetting::Bool(_) | SpecificSetting::Num(_) => continue, + SpecificSetting::Enum(ref values) => values, + }; + let name = camel_case(setting.name); + + fmt.doc_comment(format!("Values for `{}.{}`.", group.name, setting.name)); + fmtln!(fmt, "#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]"); + fmtln!(fmt, "pub enum {} {{", name); + fmt.indent(|fmt| { + for v in values.iter() { + fmt.doc_comment(format!("`{}`.", v)); + fmtln!(fmt, "{},", camel_case(v)); + } + }); + fmtln!(fmt, "}"); + + gen_to_and_from_str(&name, values, fmt); + } +} + +/// Emit a getter function for `setting`. +fn gen_getter(setting: &Setting, fmt: &mut Formatter) { + fmt.doc_comment(setting.comment); + match setting.specific { + SpecificSetting::Bool(BoolSetting { + predicate_number, .. + }) => { + fmtln!(fmt, "pub fn {}(&self) -> bool {{", setting.name); + fmt.indent(|fmt| { + fmtln!(fmt, "self.numbered_predicate({})", predicate_number); + }); + fmtln!(fmt, "}"); + } + SpecificSetting::Enum(ref values) => { + let ty = camel_case(setting.name); + fmtln!(fmt, "pub fn {}(&self) -> {} {{", setting.name, ty); + fmt.indent(|fmt| { + let mut m = Match::new(format!("self.bytes[{}]", setting.byte_offset)); + for (i, v) in values.iter().enumerate() { + m.arm_no_fields(format!("{}", i), format!("{}::{}", ty, camel_case(v))); + } + m.arm_no_fields("_", "panic!(\"Invalid enum value\")"); + fmt.add_match(m); + }); + fmtln!(fmt, "}"); + } + SpecificSetting::Num(_) => { + fmtln!(fmt, "pub fn {}(&self) -> u8 {{", setting.name); + fmt.indent(|fmt| { + fmtln!(fmt, "self.bytes[{}]", setting.byte_offset); + }); + fmtln!(fmt, "}"); + } + } +} + +fn gen_pred_getter(predicate: &Predicate, group: &SettingGroup, fmt: &mut Formatter) { + fmt.doc_comment(format!("Computed predicate `{}`.", predicate.render(group))); + fmtln!(fmt, "pub fn {}(&self) -> bool {{", predicate.name); + fmt.indent(|fmt| { + fmtln!(fmt, "self.numbered_predicate({})", predicate.number); + }); + fmtln!(fmt, "}"); +} + +/// Emits getters for each setting value. +fn gen_getters(group: &SettingGroup, fmt: &mut Formatter) { + fmt.doc_comment("User-defined settings."); + fmtln!(fmt, "#[allow(dead_code)]"); + fmtln!(fmt, "impl Flags {"); + fmt.indent(|fmt| { + fmt.doc_comment("Get a view of the boolean predicates."); + fmtln!( + fmt, + "pub fn predicate_view(&self) -> crate::settings::PredicateView {" + ); + fmt.indent(|fmt| { + fmtln!( + fmt, + "crate::settings::PredicateView::new(&self.bytes[{}..])", + group.bool_start_byte_offset + ); + }); + fmtln!(fmt, "}"); + + if !group.settings.is_empty() { + fmt.doc_comment("Dynamic numbered predicate getter."); + fmtln!(fmt, "fn numbered_predicate(&self, p: usize) -> bool {"); + fmt.indent(|fmt| { + fmtln!( + fmt, + "self.bytes[{} + p / 8] & (1 << (p % 8)) != 0", + group.bool_start_byte_offset + ); + }); + fmtln!(fmt, "}"); + } + + for setting in &group.settings { + gen_getter(&setting, fmt); + } + for predicate in &group.predicates { + gen_pred_getter(&predicate, &group, fmt); + } + }); + fmtln!(fmt, "}"); +} + +#[derive(Hash, PartialEq, Eq)] +enum SettingOrPreset<'a> { + Setting(&'a Setting), + Preset(&'a Preset), +} + +impl<'a> SettingOrPreset<'a> { + fn name(&self) -> &str { + match *self { + SettingOrPreset::Setting(s) => s.name, + SettingOrPreset::Preset(p) => p.name, + } + } +} + +/// Emits DESCRIPTORS, ENUMERATORS, HASH_TABLE and PRESETS. +fn gen_descriptors(group: &SettingGroup, fmt: &mut Formatter) { + let mut enum_table = UniqueSeqTable::new(); + + let mut descriptor_index_map: HashMap<SettingOrPreset, usize> = HashMap::new(); + + // Generate descriptors. + fmtln!( + fmt, + "static DESCRIPTORS: [detail::Descriptor; {}] = [", + group.settings.len() + group.presets.len() + ); + fmt.indent(|fmt| { + for (idx, setting) in group.settings.iter().enumerate() { + fmtln!(fmt, "detail::Descriptor {"); + fmt.indent(|fmt| { + fmtln!(fmt, "name: \"{}\",", setting.name); + fmtln!(fmt, "offset: {},", setting.byte_offset); + match setting.specific { + SpecificSetting::Bool(BoolSetting { bit_offset, .. }) => { + fmtln!( + fmt, + "detail: detail::Detail::Bool {{ bit: {} }},", + bit_offset + ); + } + SpecificSetting::Enum(ref values) => { + let offset = enum_table.add(values); + fmtln!( + fmt, + "detail: detail::Detail::Enum {{ last: {}, enumerators: {} }},", + values.len() - 1, + offset + ); + } + SpecificSetting::Num(_) => { + fmtln!(fmt, "detail: detail::Detail::Num,"); + } + } + + descriptor_index_map.insert(SettingOrPreset::Setting(setting), idx); + }); + fmtln!(fmt, "},"); + } + + for (idx, preset) in group.presets.iter().enumerate() { + fmtln!(fmt, "detail::Descriptor {"); + fmt.indent(|fmt| { + fmtln!(fmt, "name: \"{}\",", preset.name); + fmtln!(fmt, "offset: {},", (idx as u8) * group.settings_size); + fmtln!(fmt, "detail: detail::Detail::Preset,"); + }); + fmtln!(fmt, "},"); + + let whole_idx = idx + group.settings.len(); + descriptor_index_map.insert(SettingOrPreset::Preset(preset), whole_idx); + } + }); + fmtln!(fmt, "];"); + + // Generate enumerators. + fmtln!(fmt, "static ENUMERATORS: [&str; {}] = [", enum_table.len()); + fmt.indent(|fmt| { + for enum_val in enum_table.iter() { + fmtln!(fmt, "\"{}\",", enum_val); + } + }); + fmtln!(fmt, "];"); + + // Generate hash table. + let mut hash_entries: Vec<SettingOrPreset> = Vec::new(); + hash_entries.extend(group.settings.iter().map(|x| SettingOrPreset::Setting(x))); + hash_entries.extend(group.presets.iter().map(|x| SettingOrPreset::Preset(x))); + + let hash_table = generate_table(hash_entries.iter(), hash_entries.len(), |entry| { + simple_hash(entry.name()) + }); + fmtln!(fmt, "static HASH_TABLE: [u16; {}] = [", hash_table.len()); + fmt.indent(|fmt| { + for h in &hash_table { + match *h { + Some(setting_or_preset) => fmtln!( + fmt, + "{},", + &descriptor_index_map + .get(setting_or_preset) + .unwrap() + .to_string() + ), + None => fmtln!(fmt, "0xffff,"), + } + } + }); + fmtln!(fmt, "];"); + + // Generate presets. + fmtln!( + fmt, + "static PRESETS: [(u8, u8); {}] = [", + group.presets.len() * (group.settings_size as usize) + ); + fmt.indent(|fmt| { + for preset in &group.presets { + fmt.comment(preset.name); + for (mask, value) in preset.layout(&group) { + fmtln!(fmt, "(0b{:08b}, 0b{:08b}),", mask, value); + } + } + }); + fmtln!(fmt, "];"); +} + +fn gen_template(group: &SettingGroup, fmt: &mut Formatter) { + let mut default_bytes: Vec<u8> = vec![0; group.settings_size as usize]; + for setting in &group.settings { + *default_bytes.get_mut(setting.byte_offset as usize).unwrap() |= setting.default_byte(); + } + + let default_bytes: Vec<String> = default_bytes + .iter() + .map(|x| format!("{:#04x}", x)) + .collect(); + let default_bytes_str = default_bytes.join(", "); + + fmtln!( + fmt, + "static TEMPLATE: detail::Template = detail::Template {" + ); + fmt.indent(|fmt| { + fmtln!(fmt, "name: \"{}\",", group.name); + fmtln!(fmt, "descriptors: &DESCRIPTORS,"); + fmtln!(fmt, "enumerators: &ENUMERATORS,"); + fmtln!(fmt, "hash_table: &HASH_TABLE,"); + fmtln!(fmt, "defaults: &[{}],", default_bytes_str); + fmtln!(fmt, "presets: &PRESETS,"); + }); + fmtln!(fmt, "};"); + + fmt.doc_comment(format!( + "Create a `settings::Builder` for the {} settings group.", + group.name + )); + fmtln!(fmt, "pub fn builder() -> Builder {"); + fmt.indent(|fmt| { + fmtln!(fmt, "Builder::new(&TEMPLATE)"); + }); + fmtln!(fmt, "}"); +} + +fn gen_display(group: &SettingGroup, fmt: &mut Formatter) { + fmtln!(fmt, "impl fmt::Display for Flags {"); + fmt.indent(|fmt| { + fmtln!( + fmt, + "fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {" + ); + fmt.indent(|fmt| { + fmtln!(fmt, "writeln!(f, \"[{}]\")?;", group.name); + fmtln!(fmt, "for d in &DESCRIPTORS {"); + fmt.indent(|fmt| { + fmtln!(fmt, "if !d.detail.is_preset() {"); + fmt.indent(|fmt| { + fmtln!(fmt, "write!(f, \"{} = \", d.name)?;"); + fmtln!( + fmt, + "TEMPLATE.format_toml_value(d.detail, self.bytes[d.offset as usize], f)?;", + ); + fmtln!(fmt, "writeln!(f)?;"); + }); + fmtln!(fmt, "}"); + }); + fmtln!(fmt, "}"); + fmtln!(fmt, "Ok(())"); + }); + fmtln!(fmt, "}") + }); + fmtln!(fmt, "}"); +} + +fn gen_group(group: &SettingGroup, parent: ParentGroup, fmt: &mut Formatter) { + // Generate struct. + fmtln!(fmt, "#[derive(Clone)]"); + fmt.doc_comment(format!("Flags group `{}`.", group.name)); + fmtln!(fmt, "pub struct Flags {"); + fmt.indent(|fmt| { + fmtln!(fmt, "bytes: [u8; {}],", group.byte_size()); + }); + fmtln!(fmt, "}"); + + gen_constructor(group, parent, fmt); + gen_enum_types(group, fmt); + gen_getters(group, fmt); + gen_descriptors(group, fmt); + gen_template(group, fmt); + gen_display(group, fmt); +} + +pub(crate) fn generate( + settings: &SettingGroup, + parent_group: ParentGroup, + filename: &str, + out_dir: &str, +) -> Result<(), error::Error> { + let mut fmt = Formatter::new(); + gen_group(&settings, parent_group, &mut fmt); + fmt.update_file(filename, out_dir)?; + Ok(()) +} diff --git a/third_party/rust/cranelift-codegen-meta/src/gen_types.rs b/third_party/rust/cranelift-codegen-meta/src/gen_types.rs new file mode 100644 index 0000000000..6ced212b8d --- /dev/null +++ b/third_party/rust/cranelift-codegen-meta/src/gen_types.rs @@ -0,0 +1,76 @@ +//! Generate sources with type info. +//! +//! This generates a `types.rs` file which is included in +//! `cranelift-codegen/ir/types.rs`. The file provides constant definitions for the +//! most commonly used types, including all of the scalar types. +//! +//! This ensures that the metaprogram and the generated program see the same +//! type numbering. + +use crate::cdsl::types as cdsl_types; +use crate::error; +use crate::srcgen; + +/// Emit a constant definition of a single value type. +fn emit_type(ty: &cdsl_types::ValueType, fmt: &mut srcgen::Formatter) -> Result<(), error::Error> { + let name = ty.to_string().to_uppercase(); + let number = ty.number().ok_or_else(|| { + error::Error::with_msg(format!( + "Could not emit type `{}` which has no number.", + name + )) + })?; + + fmt.doc_comment(&ty.doc()); + fmtln!(fmt, "pub const {}: Type = Type({:#x});\n", name, number); + + Ok(()) +} + +/// Emit definition for all vector types with `bits` total size. +fn emit_vectors(bits: u64, fmt: &mut srcgen::Formatter) -> Result<(), error::Error> { + let vec_size: u64 = bits / 8; + for vec in cdsl_types::ValueType::all_lane_types() + .map(|ty| (ty, cdsl_types::ValueType::from(ty).membytes())) + .filter(|&(_, lane_size)| lane_size != 0 && lane_size < vec_size) + .map(|(ty, lane_size)| (ty, vec_size / lane_size)) + .map(|(ty, lanes)| cdsl_types::VectorType::new(ty, lanes)) + { + emit_type(&cdsl_types::ValueType::from(vec), fmt)?; + } + + Ok(()) +} + +/// Emit types using the given formatter object. +fn emit_types(fmt: &mut srcgen::Formatter) -> Result<(), error::Error> { + // Emit all of the special types, such as types for CPU flags. + for spec in cdsl_types::ValueType::all_special_types().map(cdsl_types::ValueType::from) { + emit_type(&spec, fmt)?; + } + + // Emit all of the lane types, such integers, floats, and booleans. + for ty in cdsl_types::ValueType::all_lane_types().map(cdsl_types::ValueType::from) { + emit_type(&ty, fmt)?; + } + + // Emit all reference types. + for ty in cdsl_types::ValueType::all_reference_types().map(cdsl_types::ValueType::from) { + emit_type(&ty, fmt)?; + } + + // Emit vector definitions for common SIMD sizes. + for vec_size in &[64_u64, 128, 256, 512] { + emit_vectors(*vec_size, fmt)?; + } + + Ok(()) +} + +/// Generate the types file. +pub(crate) fn generate(filename: &str, out_dir: &str) -> Result<(), error::Error> { + let mut fmt = srcgen::Formatter::new(); + emit_types(&mut fmt)?; + fmt.update_file(filename, out_dir)?; + Ok(()) +} diff --git a/third_party/rust/cranelift-codegen-meta/src/isa/arm32/mod.rs b/third_party/rust/cranelift-codegen-meta/src/isa/arm32/mod.rs new file mode 100644 index 0000000000..f699ece8eb --- /dev/null +++ b/third_party/rust/cranelift-codegen-meta/src/isa/arm32/mod.rs @@ -0,0 +1,88 @@ +use crate::cdsl::cpu_modes::CpuMode; +use crate::cdsl::instructions::{InstructionGroupBuilder, InstructionPredicateMap}; +use crate::cdsl::isa::TargetIsa; +use crate::cdsl::recipes::Recipes; +use crate::cdsl::regs::{IsaRegs, IsaRegsBuilder, RegBankBuilder, RegClassBuilder}; +use crate::cdsl::settings::{SettingGroup, SettingGroupBuilder}; + +use crate::shared::Definitions as SharedDefinitions; + +fn define_settings(_shared: &SettingGroup) -> SettingGroup { + let setting = SettingGroupBuilder::new("arm32"); + setting.build() +} + +fn define_regs() -> IsaRegs { + let mut regs = IsaRegsBuilder::new(); + + let builder = RegBankBuilder::new("FloatRegs", "s") + .units(64) + .track_pressure(true); + let float_regs = regs.add_bank(builder); + + let builder = RegBankBuilder::new("IntRegs", "r") + .units(16) + .track_pressure(true); + let int_regs = regs.add_bank(builder); + + let builder = RegBankBuilder::new("FlagRegs", "") + .units(1) + .names(vec!["nzcv"]) + .track_pressure(false); + let flag_reg = regs.add_bank(builder); + + let builder = RegClassBuilder::new_toplevel("S", float_regs).count(32); + regs.add_class(builder); + + let builder = RegClassBuilder::new_toplevel("D", float_regs).width(2); + regs.add_class(builder); + + let builder = RegClassBuilder::new_toplevel("Q", float_regs).width(4); + regs.add_class(builder); + + let builder = RegClassBuilder::new_toplevel("GPR", int_regs); + regs.add_class(builder); + + let builder = RegClassBuilder::new_toplevel("FLAG", flag_reg); + regs.add_class(builder); + + regs.build() +} + +pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa { + let settings = define_settings(&shared_defs.settings); + let regs = define_regs(); + + let inst_group = InstructionGroupBuilder::new(&mut shared_defs.all_instructions).build(); + + // CPU modes for 32-bit ARM and Thumb2. + let mut a32 = CpuMode::new("A32"); + let mut t32 = CpuMode::new("T32"); + + // TODO refine these. + let narrow_flags = shared_defs.transform_groups.by_name("narrow_flags"); + a32.legalize_default(narrow_flags); + t32.legalize_default(narrow_flags); + + // Make sure that the expand code is used, thus generated. + let expand = shared_defs.transform_groups.by_name("expand"); + a32.legalize_monomorphic(expand); + + let cpu_modes = vec![a32, t32]; + + // TODO implement arm32 recipes. + let recipes = Recipes::new(); + + // TODO implement arm32 encodings and predicates. + let encodings_predicates = InstructionPredicateMap::new(); + + TargetIsa::new( + "arm32", + inst_group, + settings, + regs, + recipes, + cpu_modes, + encodings_predicates, + ) +} diff --git a/third_party/rust/cranelift-codegen-meta/src/isa/arm64/mod.rs b/third_party/rust/cranelift-codegen-meta/src/isa/arm64/mod.rs new file mode 100644 index 0000000000..5d8bc76fc4 --- /dev/null +++ b/third_party/rust/cranelift-codegen-meta/src/isa/arm64/mod.rs @@ -0,0 +1,79 @@ +use crate::cdsl::cpu_modes::CpuMode; +use crate::cdsl::instructions::{InstructionGroupBuilder, InstructionPredicateMap}; +use crate::cdsl::isa::TargetIsa; +use crate::cdsl::recipes::Recipes; +use crate::cdsl::regs::{IsaRegs, IsaRegsBuilder, RegBankBuilder, RegClassBuilder}; +use crate::cdsl::settings::{SettingGroup, SettingGroupBuilder}; + +use crate::shared::Definitions as SharedDefinitions; + +fn define_settings(_shared: &SettingGroup) -> SettingGroup { + let setting = SettingGroupBuilder::new("arm64"); + setting.build() +} + +fn define_registers() -> IsaRegs { + let mut regs = IsaRegsBuilder::new(); + + // The `x31` regunit serves as the stack pointer / zero register depending on context. We + // reserve it and don't model the difference. + let builder = RegBankBuilder::new("IntRegs", "x") + .units(32) + .track_pressure(true); + let int_regs = regs.add_bank(builder); + + let builder = RegBankBuilder::new("FloatRegs", "v") + .units(32) + .track_pressure(true); + let float_regs = regs.add_bank(builder); + + let builder = RegBankBuilder::new("FlagRegs", "") + .units(1) + .names(vec!["nzcv"]) + .track_pressure(false); + let flag_reg = regs.add_bank(builder); + + let builder = RegClassBuilder::new_toplevel("GPR", int_regs); + regs.add_class(builder); + + let builder = RegClassBuilder::new_toplevel("FPR", float_regs); + regs.add_class(builder); + + let builder = RegClassBuilder::new_toplevel("FLAG", flag_reg); + regs.add_class(builder); + + regs.build() +} + +pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa { + let settings = define_settings(&shared_defs.settings); + let regs = define_registers(); + + let inst_group = InstructionGroupBuilder::new(&mut shared_defs.all_instructions).build(); + + let mut a64 = CpuMode::new("A64"); + + // TODO refine these. + let expand_flags = shared_defs.transform_groups.by_name("expand_flags"); + let narrow_flags = shared_defs.transform_groups.by_name("narrow_flags"); + a64.legalize_monomorphic(expand_flags); + a64.legalize_default(narrow_flags); + + let cpu_modes = vec![a64]; + + // TODO implement arm64 recipes. + let recipes = Recipes::new(); + + // TODO implement arm64 encodings and predicates. + let encodings_predicates = InstructionPredicateMap::new(); + + TargetIsa::new( + "arm64", + inst_group, + settings, + regs, + recipes, + cpu_modes, + encodings_predicates, + ) +} diff --git a/third_party/rust/cranelift-codegen-meta/src/isa/mod.rs b/third_party/rust/cranelift-codegen-meta/src/isa/mod.rs new file mode 100644 index 0000000000..ed8db85f0d --- /dev/null +++ b/third_party/rust/cranelift-codegen-meta/src/isa/mod.rs @@ -0,0 +1,67 @@ +//! Define supported ISAs; includes ISA-specific instructions, encodings, registers, settings, etc. +use crate::cdsl::isa::TargetIsa; +use crate::shared::Definitions as SharedDefinitions; +use std::fmt; + +mod arm32; +mod arm64; +mod riscv; +pub(crate) mod x86; + +/// Represents known ISA target. +#[derive(PartialEq, Copy, Clone)] +pub enum Isa { + Riscv, + X86, + Arm32, + Arm64, +} + +impl Isa { + /// Creates isa target using name. + pub fn from_name(name: &str) -> Option<Self> { + Isa::all() + .iter() + .cloned() + .find(|isa| isa.to_string() == name) + } + + /// Creates isa target from arch. + pub fn from_arch(arch: &str) -> Option<Self> { + match arch { + "riscv" => Some(Isa::Riscv), + "aarch64" => Some(Isa::Arm64), + x if ["x86_64", "i386", "i586", "i686"].contains(&x) => Some(Isa::X86), + x if x.starts_with("arm") || arch.starts_with("thumb") => Some(Isa::Arm32), + _ => None, + } + } + + /// Returns all supported isa targets. + pub fn all() -> &'static [Isa] { + &[Isa::Riscv, Isa::X86, Isa::Arm32, Isa::Arm64] + } +} + +impl fmt::Display for Isa { + // These names should be kept in sync with the crate features. + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match *self { + Isa::Riscv => write!(f, "riscv"), + Isa::X86 => write!(f, "x86"), + Isa::Arm32 => write!(f, "arm32"), + Isa::Arm64 => write!(f, "arm64"), + } + } +} + +pub(crate) fn define(isas: &[Isa], shared_defs: &mut SharedDefinitions) -> Vec<TargetIsa> { + isas.iter() + .map(|isa| match isa { + Isa::Riscv => riscv::define(shared_defs), + Isa::X86 => x86::define(shared_defs), + Isa::Arm32 => arm32::define(shared_defs), + Isa::Arm64 => arm64::define(shared_defs), + }) + .collect() +} diff --git a/third_party/rust/cranelift-codegen-meta/src/isa/riscv/encodings.rs b/third_party/rust/cranelift-codegen-meta/src/isa/riscv/encodings.rs new file mode 100644 index 0000000000..c255ddb483 --- /dev/null +++ b/third_party/rust/cranelift-codegen-meta/src/isa/riscv/encodings.rs @@ -0,0 +1,431 @@ +use crate::cdsl::ast::{Apply, Expr, Literal, VarPool}; +use crate::cdsl::encodings::{Encoding, EncodingBuilder}; +use crate::cdsl::instructions::{ + Bindable, BoundInstruction, InstSpec, InstructionPredicateNode, InstructionPredicateRegistry, +}; +use crate::cdsl::recipes::{EncodingRecipeNumber, Recipes}; +use crate::cdsl::settings::SettingGroup; + +use crate::shared::types::Bool::B1; +use crate::shared::types::Float::{F32, F64}; +use crate::shared::types::Int::{I16, I32, I64, I8}; +use crate::shared::types::Reference::{R32, R64}; +use crate::shared::Definitions as SharedDefinitions; + +use super::recipes::RecipeGroup; + +pub(crate) struct PerCpuModeEncodings<'defs> { + pub inst_pred_reg: InstructionPredicateRegistry, + pub enc32: Vec<Encoding>, + pub enc64: Vec<Encoding>, + recipes: &'defs Recipes, +} + +impl<'defs> PerCpuModeEncodings<'defs> { + fn new(recipes: &'defs Recipes) -> Self { + Self { + inst_pred_reg: InstructionPredicateRegistry::new(), + enc32: Vec::new(), + enc64: Vec::new(), + recipes, + } + } + fn enc( + &self, + inst: impl Into<InstSpec>, + recipe: EncodingRecipeNumber, + bits: u16, + ) -> EncodingBuilder { + EncodingBuilder::new(inst.into(), recipe, bits) + } + fn add32(&mut self, encoding: EncodingBuilder) { + self.enc32 + .push(encoding.build(self.recipes, &mut self.inst_pred_reg)); + } + fn add64(&mut self, encoding: EncodingBuilder) { + self.enc64 + .push(encoding.build(self.recipes, &mut self.inst_pred_reg)); + } +} + +// The low 7 bits of a RISC-V instruction is the base opcode. All 32-bit instructions have 11 as +// the two low bits, with bits 6:2 determining the base opcode. +// +// Encbits for the 32-bit recipes are opcode[6:2] | (funct3 << 5) | ... +// The functions below encode the encbits. + +fn load_bits(funct3: u16) -> u16 { + assert!(funct3 <= 0b111); + funct3 << 5 +} + +fn store_bits(funct3: u16) -> u16 { + assert!(funct3 <= 0b111); + 0b01000 | (funct3 << 5) +} + +fn branch_bits(funct3: u16) -> u16 { + assert!(funct3 <= 0b111); + 0b11000 | (funct3 << 5) +} + +fn jalr_bits() -> u16 { + // This was previously accepting an argument funct3 of 3 bits and used the following formula: + //0b11001 | (funct3 << 5) + 0b11001 +} + +fn jal_bits() -> u16 { + 0b11011 +} + +fn opimm_bits(funct3: u16, funct7: u16) -> u16 { + assert!(funct3 <= 0b111); + 0b00100 | (funct3 << 5) | (funct7 << 8) +} + +fn opimm32_bits(funct3: u16, funct7: u16) -> u16 { + assert!(funct3 <= 0b111); + 0b00110 | (funct3 << 5) | (funct7 << 8) +} + +fn op_bits(funct3: u16, funct7: u16) -> u16 { + assert!(funct3 <= 0b111); + assert!(funct7 <= 0b111_1111); + 0b01100 | (funct3 << 5) | (funct7 << 8) +} + +fn op32_bits(funct3: u16, funct7: u16) -> u16 { + assert!(funct3 <= 0b111); + assert!(funct7 <= 0b111_1111); + 0b01110 | (funct3 << 5) | (funct7 << 8) +} + +fn lui_bits() -> u16 { + 0b01101 +} + +pub(crate) fn define<'defs>( + shared_defs: &'defs SharedDefinitions, + isa_settings: &SettingGroup, + recipes: &'defs RecipeGroup, +) -> PerCpuModeEncodings<'defs> { + // Instructions shorthands. + let shared = &shared_defs.instructions; + + let band = shared.by_name("band"); + let band_imm = shared.by_name("band_imm"); + let bor = shared.by_name("bor"); + let bor_imm = shared.by_name("bor_imm"); + let br_icmp = shared.by_name("br_icmp"); + let brz = shared.by_name("brz"); + let brnz = shared.by_name("brnz"); + let bxor = shared.by_name("bxor"); + let bxor_imm = shared.by_name("bxor_imm"); + let call = shared.by_name("call"); + let call_indirect = shared.by_name("call_indirect"); + let copy = shared.by_name("copy"); + let copy_nop = shared.by_name("copy_nop"); + let copy_to_ssa = shared.by_name("copy_to_ssa"); + let fill = shared.by_name("fill"); + let fill_nop = shared.by_name("fill_nop"); + let iadd = shared.by_name("iadd"); + let iadd_imm = shared.by_name("iadd_imm"); + let iconst = shared.by_name("iconst"); + let icmp = shared.by_name("icmp"); + let icmp_imm = shared.by_name("icmp_imm"); + let imul = shared.by_name("imul"); + let ishl = shared.by_name("ishl"); + let ishl_imm = shared.by_name("ishl_imm"); + let isub = shared.by_name("isub"); + let jump = shared.by_name("jump"); + let regmove = shared.by_name("regmove"); + let spill = shared.by_name("spill"); + let sshr = shared.by_name("sshr"); + let sshr_imm = shared.by_name("sshr_imm"); + let ushr = shared.by_name("ushr"); + let ushr_imm = shared.by_name("ushr_imm"); + let return_ = shared.by_name("return"); + + // Recipes shorthands, prefixed with r_. + let r_copytossa = recipes.by_name("copytossa"); + let r_fillnull = recipes.by_name("fillnull"); + let r_icall = recipes.by_name("Icall"); + let r_icopy = recipes.by_name("Icopy"); + let r_ii = recipes.by_name("Ii"); + let r_iicmp = recipes.by_name("Iicmp"); + let r_iret = recipes.by_name("Iret"); + let r_irmov = recipes.by_name("Irmov"); + let r_iz = recipes.by_name("Iz"); + let r_gp_sp = recipes.by_name("GPsp"); + let r_gp_fi = recipes.by_name("GPfi"); + let r_r = recipes.by_name("R"); + let r_ricmp = recipes.by_name("Ricmp"); + let r_rshamt = recipes.by_name("Rshamt"); + let r_sb = recipes.by_name("SB"); + let r_sb_zero = recipes.by_name("SBzero"); + let r_stacknull = recipes.by_name("stacknull"); + let r_u = recipes.by_name("U"); + let r_uj = recipes.by_name("UJ"); + let r_uj_call = recipes.by_name("UJcall"); + + // Predicates shorthands. + let use_m = isa_settings.predicate_by_name("use_m"); + + // Definitions. + let mut e = PerCpuModeEncodings::new(&recipes.recipes); + + // Basic arithmetic binary instructions are encoded in an R-type instruction. + for &(inst, inst_imm, f3, f7) in &[ + (iadd, Some(iadd_imm), 0b000, 0b000_0000), + (isub, None, 0b000, 0b010_0000), + (bxor, Some(bxor_imm), 0b100, 0b000_0000), + (bor, Some(bor_imm), 0b110, 0b000_0000), + (band, Some(band_imm), 0b111, 0b000_0000), + ] { + e.add32(e.enc(inst.bind(I32), r_r, op_bits(f3, f7))); + e.add64(e.enc(inst.bind(I64), r_r, op_bits(f3, f7))); + + // Immediate versions for add/xor/or/and. + if let Some(inst_imm) = inst_imm { + e.add32(e.enc(inst_imm.bind(I32), r_ii, opimm_bits(f3, 0))); + e.add64(e.enc(inst_imm.bind(I64), r_ii, opimm_bits(f3, 0))); + } + } + + // 32-bit ops in RV64. + e.add64(e.enc(iadd.bind(I32), r_r, op32_bits(0b000, 0b000_0000))); + e.add64(e.enc(isub.bind(I32), r_r, op32_bits(0b000, 0b010_0000))); + // There are no andiw/oriw/xoriw variations. + e.add64(e.enc(iadd_imm.bind(I32), r_ii, opimm32_bits(0b000, 0))); + + // Use iadd_imm with %x0 to materialize constants. + e.add32(e.enc(iconst.bind(I32), r_iz, opimm_bits(0b0, 0))); + e.add64(e.enc(iconst.bind(I32), r_iz, opimm_bits(0b0, 0))); + e.add64(e.enc(iconst.bind(I64), r_iz, opimm_bits(0b0, 0))); + + // Dynamic shifts have the same masking semantics as the clif base instructions. + for &(inst, inst_imm, f3, f7) in &[ + (ishl, ishl_imm, 0b1, 0b0), + (ushr, ushr_imm, 0b101, 0b0), + (sshr, sshr_imm, 0b101, 0b10_0000), + ] { + e.add32(e.enc(inst.bind(I32).bind(I32), r_r, op_bits(f3, f7))); + e.add64(e.enc(inst.bind(I64).bind(I64), r_r, op_bits(f3, f7))); + e.add64(e.enc(inst.bind(I32).bind(I32), r_r, op32_bits(f3, f7))); + // Allow i32 shift amounts in 64-bit shifts. + e.add64(e.enc(inst.bind(I64).bind(I32), r_r, op_bits(f3, f7))); + e.add64(e.enc(inst.bind(I32).bind(I64), r_r, op32_bits(f3, f7))); + + // Immediate shifts. + e.add32(e.enc(inst_imm.bind(I32), r_rshamt, opimm_bits(f3, f7))); + e.add64(e.enc(inst_imm.bind(I64), r_rshamt, opimm_bits(f3, f7))); + e.add64(e.enc(inst_imm.bind(I32), r_rshamt, opimm32_bits(f3, f7))); + } + + // Signed and unsigned integer 'less than'. There are no 'w' variants for comparing 32-bit + // numbers in RV64. + { + let mut var_pool = VarPool::new(); + + // Helper that creates an instruction predicate for an instruction in the icmp family. + let mut icmp_instp = |bound_inst: &BoundInstruction, + intcc_field: &'static str| + -> InstructionPredicateNode { + let x = var_pool.create("x"); + let y = var_pool.create("y"); + let cc = Literal::enumerator_for(&shared_defs.imm.intcc, intcc_field); + Apply::new( + bound_inst.clone().into(), + vec![Expr::Literal(cc), Expr::Var(x), Expr::Var(y)], + ) + .inst_predicate(&var_pool) + .unwrap() + }; + + let icmp_i32 = icmp.bind(I32); + let icmp_i64 = icmp.bind(I64); + e.add32( + e.enc(icmp_i32.clone(), r_ricmp, op_bits(0b010, 0b000_0000)) + .inst_predicate(icmp_instp(&icmp_i32, "slt")), + ); + e.add64( + e.enc(icmp_i64.clone(), r_ricmp, op_bits(0b010, 0b000_0000)) + .inst_predicate(icmp_instp(&icmp_i64, "slt")), + ); + + e.add32( + e.enc(icmp_i32.clone(), r_ricmp, op_bits(0b011, 0b000_0000)) + .inst_predicate(icmp_instp(&icmp_i32, "ult")), + ); + e.add64( + e.enc(icmp_i64.clone(), r_ricmp, op_bits(0b011, 0b000_0000)) + .inst_predicate(icmp_instp(&icmp_i64, "ult")), + ); + + // Immediate variants. + let icmp_i32 = icmp_imm.bind(I32); + let icmp_i64 = icmp_imm.bind(I64); + e.add32( + e.enc(icmp_i32.clone(), r_iicmp, opimm_bits(0b010, 0)) + .inst_predicate(icmp_instp(&icmp_i32, "slt")), + ); + e.add64( + e.enc(icmp_i64.clone(), r_iicmp, opimm_bits(0b010, 0)) + .inst_predicate(icmp_instp(&icmp_i64, "slt")), + ); + + e.add32( + e.enc(icmp_i32.clone(), r_iicmp, opimm_bits(0b011, 0)) + .inst_predicate(icmp_instp(&icmp_i32, "ult")), + ); + e.add64( + e.enc(icmp_i64.clone(), r_iicmp, opimm_bits(0b011, 0)) + .inst_predicate(icmp_instp(&icmp_i64, "ult")), + ); + } + + // Integer constants with the low 12 bits clear are materialized by lui. + e.add32(e.enc(iconst.bind(I32), r_u, lui_bits())); + e.add64(e.enc(iconst.bind(I32), r_u, lui_bits())); + e.add64(e.enc(iconst.bind(I64), r_u, lui_bits())); + + // "M" Standard Extension for Integer Multiplication and Division. + // Gated by the `use_m` flag. + e.add32( + e.enc(imul.bind(I32), r_r, op_bits(0b000, 0b0000_0001)) + .isa_predicate(use_m), + ); + e.add64( + e.enc(imul.bind(I64), r_r, op_bits(0b000, 0b0000_0001)) + .isa_predicate(use_m), + ); + e.add64( + e.enc(imul.bind(I32), r_r, op32_bits(0b000, 0b0000_0001)) + .isa_predicate(use_m), + ); + + // Control flow. + + // Unconditional branches. + e.add32(e.enc(jump, r_uj, jal_bits())); + e.add64(e.enc(jump, r_uj, jal_bits())); + e.add32(e.enc(call, r_uj_call, jal_bits())); + e.add64(e.enc(call, r_uj_call, jal_bits())); + + // Conditional branches. + { + let mut var_pool = VarPool::new(); + + // Helper that creates an instruction predicate for an instruction in the icmp family. + let mut br_icmp_instp = |bound_inst: &BoundInstruction, + intcc_field: &'static str| + -> InstructionPredicateNode { + let x = var_pool.create("x"); + let y = var_pool.create("y"); + let dest = var_pool.create("dest"); + let args = var_pool.create("args"); + let cc = Literal::enumerator_for(&shared_defs.imm.intcc, intcc_field); + Apply::new( + bound_inst.clone().into(), + vec![ + Expr::Literal(cc), + Expr::Var(x), + Expr::Var(y), + Expr::Var(dest), + Expr::Var(args), + ], + ) + .inst_predicate(&var_pool) + .unwrap() + }; + + let br_icmp_i32 = br_icmp.bind(I32); + let br_icmp_i64 = br_icmp.bind(I64); + for &(cond, f3) in &[ + ("eq", 0b000), + ("ne", 0b001), + ("slt", 0b100), + ("sge", 0b101), + ("ult", 0b110), + ("uge", 0b111), + ] { + e.add32( + e.enc(br_icmp_i32.clone(), r_sb, branch_bits(f3)) + .inst_predicate(br_icmp_instp(&br_icmp_i32, cond)), + ); + e.add64( + e.enc(br_icmp_i64.clone(), r_sb, branch_bits(f3)) + .inst_predicate(br_icmp_instp(&br_icmp_i64, cond)), + ); + } + } + + for &(inst, f3) in &[(brz, 0b000), (brnz, 0b001)] { + e.add32(e.enc(inst.bind(I32), r_sb_zero, branch_bits(f3))); + e.add64(e.enc(inst.bind(I64), r_sb_zero, branch_bits(f3))); + e.add32(e.enc(inst.bind(B1), r_sb_zero, branch_bits(f3))); + e.add64(e.enc(inst.bind(B1), r_sb_zero, branch_bits(f3))); + } + + // Returns are a special case of jalr_bits using %x1 to hold the return address. + // The return address is provided by a special-purpose `link` return value that + // is added by legalize_signature(). + e.add32(e.enc(return_, r_iret, jalr_bits())); + e.add64(e.enc(return_, r_iret, jalr_bits())); + e.add32(e.enc(call_indirect.bind(I32), r_icall, jalr_bits())); + e.add64(e.enc(call_indirect.bind(I64), r_icall, jalr_bits())); + + // Spill and fill. + e.add32(e.enc(spill.bind(I32), r_gp_sp, store_bits(0b010))); + e.add64(e.enc(spill.bind(I32), r_gp_sp, store_bits(0b010))); + e.add64(e.enc(spill.bind(I64), r_gp_sp, store_bits(0b011))); + e.add32(e.enc(fill.bind(I32), r_gp_fi, load_bits(0b010))); + e.add64(e.enc(fill.bind(I32), r_gp_fi, load_bits(0b010))); + e.add64(e.enc(fill.bind(I64), r_gp_fi, load_bits(0b011))); + + // No-op fills, created by late-stage redundant-fill removal. + for &ty in &[I64, I32] { + e.add64(e.enc(fill_nop.bind(ty), r_fillnull, 0)); + e.add32(e.enc(fill_nop.bind(ty), r_fillnull, 0)); + } + e.add64(e.enc(fill_nop.bind(B1), r_fillnull, 0)); + e.add32(e.enc(fill_nop.bind(B1), r_fillnull, 0)); + + // Register copies. + e.add32(e.enc(copy.bind(I32), r_icopy, opimm_bits(0b000, 0))); + e.add64(e.enc(copy.bind(I64), r_icopy, opimm_bits(0b000, 0))); + e.add64(e.enc(copy.bind(I32), r_icopy, opimm32_bits(0b000, 0))); + + e.add32(e.enc(regmove.bind(I32), r_irmov, opimm_bits(0b000, 0))); + e.add64(e.enc(regmove.bind(I64), r_irmov, opimm_bits(0b000, 0))); + e.add64(e.enc(regmove.bind(I32), r_irmov, opimm32_bits(0b000, 0))); + + e.add32(e.enc(copy.bind(B1), r_icopy, opimm_bits(0b000, 0))); + e.add64(e.enc(copy.bind(B1), r_icopy, opimm_bits(0b000, 0))); + e.add32(e.enc(regmove.bind(B1), r_irmov, opimm_bits(0b000, 0))); + e.add64(e.enc(regmove.bind(B1), r_irmov, opimm_bits(0b000, 0))); + + // Stack-slot-to-the-same-stack-slot copy, which is guaranteed to turn + // into a no-op. + // The same encoding is generated for both the 64- and 32-bit architectures. + for &ty in &[I64, I32, I16, I8] { + e.add32(e.enc(copy_nop.bind(ty), r_stacknull, 0)); + e.add64(e.enc(copy_nop.bind(ty), r_stacknull, 0)); + } + for &ty in &[F64, F32] { + e.add32(e.enc(copy_nop.bind(ty), r_stacknull, 0)); + e.add64(e.enc(copy_nop.bind(ty), r_stacknull, 0)); + } + + // Copy-to-SSA + e.add32(e.enc(copy_to_ssa.bind(I32), r_copytossa, opimm_bits(0b000, 0))); + e.add64(e.enc(copy_to_ssa.bind(I64), r_copytossa, opimm_bits(0b000, 0))); + e.add64(e.enc(copy_to_ssa.bind(I32), r_copytossa, opimm32_bits(0b000, 0))); + e.add32(e.enc(copy_to_ssa.bind(B1), r_copytossa, opimm_bits(0b000, 0))); + e.add64(e.enc(copy_to_ssa.bind(B1), r_copytossa, opimm_bits(0b000, 0))); + e.add32(e.enc(copy_to_ssa.bind(R32), r_copytossa, opimm_bits(0b000, 0))); + e.add64(e.enc(copy_to_ssa.bind(R64), r_copytossa, opimm_bits(0b000, 0))); + + e +} diff --git a/third_party/rust/cranelift-codegen-meta/src/isa/riscv/mod.rs b/third_party/rust/cranelift-codegen-meta/src/isa/riscv/mod.rs new file mode 100644 index 0000000000..801e61a3d2 --- /dev/null +++ b/third_party/rust/cranelift-codegen-meta/src/isa/riscv/mod.rs @@ -0,0 +1,134 @@ +use crate::cdsl::cpu_modes::CpuMode; +use crate::cdsl::instructions::InstructionGroupBuilder; +use crate::cdsl::isa::TargetIsa; +use crate::cdsl::regs::{IsaRegs, IsaRegsBuilder, RegBankBuilder, RegClassBuilder}; +use crate::cdsl::settings::{PredicateNode, SettingGroup, SettingGroupBuilder}; + +use crate::shared::types::Float::{F32, F64}; +use crate::shared::types::Int::{I32, I64}; +use crate::shared::Definitions as SharedDefinitions; + +mod encodings; +mod recipes; + +fn define_settings(shared: &SettingGroup) -> SettingGroup { + let mut setting = SettingGroupBuilder::new("riscv"); + + let supports_m = setting.add_bool( + "supports_m", + "CPU supports the 'M' extension (mul/div)", + false, + ); + let supports_a = setting.add_bool( + "supports_a", + "CPU supports the 'A' extension (atomics)", + false, + ); + let supports_f = setting.add_bool( + "supports_f", + "CPU supports the 'F' extension (float)", + false, + ); + let supports_d = setting.add_bool( + "supports_d", + "CPU supports the 'D' extension (double)", + false, + ); + + let enable_m = setting.add_bool( + "enable_m", + "Enable the use of 'M' instructions if available", + true, + ); + + setting.add_bool( + "enable_e", + "Enable the 'RV32E' instruction set with only 16 registers", + false, + ); + + let shared_enable_atomics = shared.get_bool("enable_atomics"); + let shared_enable_float = shared.get_bool("enable_float"); + let shared_enable_simd = shared.get_bool("enable_simd"); + + setting.add_predicate("use_m", predicate!(supports_m && enable_m)); + setting.add_predicate("use_a", predicate!(supports_a && shared_enable_atomics)); + setting.add_predicate("use_f", predicate!(supports_f && shared_enable_float)); + setting.add_predicate("use_d", predicate!(supports_d && shared_enable_float)); + setting.add_predicate( + "full_float", + predicate!(shared_enable_simd && supports_f && supports_d), + ); + + setting.build() +} + +fn define_registers() -> IsaRegs { + let mut regs = IsaRegsBuilder::new(); + + let builder = RegBankBuilder::new("IntRegs", "x") + .units(32) + .track_pressure(true); + let int_regs = regs.add_bank(builder); + + let builder = RegBankBuilder::new("FloatRegs", "f") + .units(32) + .track_pressure(true); + let float_regs = regs.add_bank(builder); + + let builder = RegClassBuilder::new_toplevel("GPR", int_regs); + regs.add_class(builder); + + let builder = RegClassBuilder::new_toplevel("FPR", float_regs); + regs.add_class(builder); + + regs.build() +} + +pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa { + let settings = define_settings(&shared_defs.settings); + let regs = define_registers(); + + let inst_group = InstructionGroupBuilder::new(&mut shared_defs.all_instructions).build(); + + // CPU modes for 32-bit and 64-bit operation. + let mut rv_32 = CpuMode::new("RV32"); + let mut rv_64 = CpuMode::new("RV64"); + + let expand = shared_defs.transform_groups.by_name("expand"); + let narrow_no_flags = shared_defs.transform_groups.by_name("narrow_no_flags"); + + rv_32.legalize_monomorphic(expand); + rv_32.legalize_default(narrow_no_flags); + rv_32.legalize_type(I32, expand); + rv_32.legalize_type(F32, expand); + rv_32.legalize_type(F64, expand); + + rv_64.legalize_monomorphic(expand); + rv_64.legalize_default(narrow_no_flags); + rv_64.legalize_type(I32, expand); + rv_64.legalize_type(I64, expand); + rv_64.legalize_type(F32, expand); + rv_64.legalize_type(F64, expand); + + let recipes = recipes::define(shared_defs, ®s); + + let encodings = encodings::define(shared_defs, &settings, &recipes); + rv_32.set_encodings(encodings.enc32); + rv_64.set_encodings(encodings.enc64); + let encodings_predicates = encodings.inst_pred_reg.extract(); + + let recipes = recipes.collect(); + + let cpu_modes = vec![rv_32, rv_64]; + + TargetIsa::new( + "riscv", + inst_group, + settings, + regs, + recipes, + cpu_modes, + encodings_predicates, + ) +} diff --git a/third_party/rust/cranelift-codegen-meta/src/isa/riscv/recipes.rs b/third_party/rust/cranelift-codegen-meta/src/isa/riscv/recipes.rs new file mode 100644 index 0000000000..47acdbb042 --- /dev/null +++ b/third_party/rust/cranelift-codegen-meta/src/isa/riscv/recipes.rs @@ -0,0 +1,279 @@ +use std::collections::HashMap; + +use crate::cdsl::instructions::InstructionPredicate; +use crate::cdsl::recipes::{EncodingRecipeBuilder, EncodingRecipeNumber, Recipes, Stack}; +use crate::cdsl::regs::IsaRegs; +use crate::shared::Definitions as SharedDefinitions; + +/// An helper to create recipes and use them when defining the RISCV encodings. +pub(crate) struct RecipeGroup { + /// The actualy list of recipes explicitly created in this file. + pub recipes: Recipes, + + /// Provides fast lookup from a name to an encoding recipe. + name_to_recipe: HashMap<String, EncodingRecipeNumber>, +} + +impl RecipeGroup { + fn new() -> Self { + Self { + recipes: Recipes::new(), + name_to_recipe: HashMap::new(), + } + } + + fn push(&mut self, builder: EncodingRecipeBuilder) { + assert!( + self.name_to_recipe.get(&builder.name).is_none(), + format!("riscv recipe '{}' created twice", builder.name) + ); + let name = builder.name.clone(); + let number = self.recipes.push(builder.build()); + self.name_to_recipe.insert(name, number); + } + + pub fn by_name(&self, name: &str) -> EncodingRecipeNumber { + *self + .name_to_recipe + .get(name) + .unwrap_or_else(|| panic!("unknown riscv recipe name {}", name)) + } + + pub fn collect(self) -> Recipes { + self.recipes + } +} + +pub(crate) fn define(shared_defs: &SharedDefinitions, regs: &IsaRegs) -> RecipeGroup { + let formats = &shared_defs.formats; + + // Register classes shorthands. + let gpr = regs.class_by_name("GPR"); + + // Definitions. + let mut recipes = RecipeGroup::new(); + + // R-type 32-bit instructions: These are mostly binary arithmetic instructions. + // The encbits are `opcode[6:2] | (funct3 << 5) | (funct7 << 8) + recipes.push( + EncodingRecipeBuilder::new("R", &formats.binary, 4) + .operands_in(vec![gpr, gpr]) + .operands_out(vec![gpr]) + .emit("put_r(bits, in_reg0, in_reg1, out_reg0, sink);"), + ); + + // R-type with an immediate shift amount instead of rs2. + recipes.push( + EncodingRecipeBuilder::new("Rshamt", &formats.binary_imm64, 4) + .operands_in(vec![gpr]) + .operands_out(vec![gpr]) + .emit("put_rshamt(bits, in_reg0, imm.into(), out_reg0, sink);"), + ); + + // R-type encoding of an integer comparison. + recipes.push( + EncodingRecipeBuilder::new("Ricmp", &formats.int_compare, 4) + .operands_in(vec![gpr, gpr]) + .operands_out(vec![gpr]) + .emit("put_r(bits, in_reg0, in_reg1, out_reg0, sink);"), + ); + + recipes.push( + EncodingRecipeBuilder::new("Ii", &formats.binary_imm64, 4) + .operands_in(vec![gpr]) + .operands_out(vec![gpr]) + .inst_predicate(InstructionPredicate::new_is_signed_int( + &*formats.binary_imm64, + "imm", + 12, + 0, + )) + .emit("put_i(bits, in_reg0, imm.into(), out_reg0, sink);"), + ); + + // I-type instruction with a hardcoded %x0 rs1. + recipes.push( + EncodingRecipeBuilder::new("Iz", &formats.unary_imm, 4) + .operands_out(vec![gpr]) + .inst_predicate(InstructionPredicate::new_is_signed_int( + &formats.unary_imm, + "imm", + 12, + 0, + )) + .emit("put_i(bits, 0, imm.into(), out_reg0, sink);"), + ); + + // I-type encoding of an integer comparison. + recipes.push( + EncodingRecipeBuilder::new("Iicmp", &formats.int_compare_imm, 4) + .operands_in(vec![gpr]) + .operands_out(vec![gpr]) + .inst_predicate(InstructionPredicate::new_is_signed_int( + &formats.int_compare_imm, + "imm", + 12, + 0, + )) + .emit("put_i(bits, in_reg0, imm.into(), out_reg0, sink);"), + ); + + // I-type encoding for `jalr` as a return instruction. We won't use the immediate offset. The + // variable return values are not encoded. + recipes.push( + EncodingRecipeBuilder::new("Iret", &formats.multiary, 4).emit( + r#" + // Return instructions are always a jalr to %x1. + // The return address is provided as a special-purpose link argument. + put_i( + bits, + 1, // rs1 = %x1 + 0, // no offset. + 0, // rd = %x0: no address written. + sink, + ); + "#, + ), + ); + + // I-type encoding for `jalr` as a call_indirect. + recipes.push( + EncodingRecipeBuilder::new("Icall", &formats.call_indirect, 4) + .operands_in(vec![gpr]) + .emit( + r#" + // call_indirect instructions are jalr with rd=%x1. + put_i( + bits, + in_reg0, + 0, // no offset. + 1, // rd = %x1: link register. + sink, + ); + "#, + ), + ); + + // Copy of a GPR is implemented as addi x, 0. + recipes.push( + EncodingRecipeBuilder::new("Icopy", &formats.unary, 4) + .operands_in(vec![gpr]) + .operands_out(vec![gpr]) + .emit("put_i(bits, in_reg0, 0, out_reg0, sink);"), + ); + + // Same for a GPR regmove. + recipes.push( + EncodingRecipeBuilder::new("Irmov", &formats.reg_move, 4) + .operands_in(vec![gpr]) + .emit("put_i(bits, src, 0, dst, sink);"), + ); + + // Same for copy-to-SSA -- GPR regmove. + recipes.push( + EncodingRecipeBuilder::new("copytossa", &formats.copy_to_ssa, 4) + // No operands_in to mention, because a source register is specified directly. + .operands_out(vec![gpr]) + .emit("put_i(bits, src, 0, out_reg0, sink);"), + ); + + // U-type instructions have a 20-bit immediate that targets bits 12-31. + recipes.push( + EncodingRecipeBuilder::new("U", &formats.unary_imm, 4) + .operands_out(vec![gpr]) + .inst_predicate(InstructionPredicate::new_is_signed_int( + &formats.unary_imm, + "imm", + 32, + 12, + )) + .emit("put_u(bits, imm.into(), out_reg0, sink);"), + ); + + // UJ-type unconditional branch instructions. + recipes.push( + EncodingRecipeBuilder::new("UJ", &formats.jump, 4) + .branch_range((0, 21)) + .emit( + r#" + let dest = i64::from(func.offsets[destination]); + let disp = dest - i64::from(sink.offset()); + put_uj(bits, disp, 0, sink); + "#, + ), + ); + + recipes.push(EncodingRecipeBuilder::new("UJcall", &formats.call, 4).emit( + r#" + sink.reloc_external(func.srclocs[inst], + Reloc::RiscvCall, + &func.dfg.ext_funcs[func_ref].name, + 0); + // rd=%x1 is the standard link register. + put_uj(bits, 0, 1, sink); + "#, + )); + + // SB-type branch instructions. + recipes.push( + EncodingRecipeBuilder::new("SB", &formats.branch_icmp, 4) + .operands_in(vec![gpr, gpr]) + .branch_range((0, 13)) + .emit( + r#" + let dest = i64::from(func.offsets[destination]); + let disp = dest - i64::from(sink.offset()); + put_sb(bits, disp, in_reg0, in_reg1, sink); + "#, + ), + ); + + // SB-type branch instruction with rs2 fixed to zero. + recipes.push( + EncodingRecipeBuilder::new("SBzero", &formats.branch, 4) + .operands_in(vec![gpr]) + .branch_range((0, 13)) + .emit( + r#" + let dest = i64::from(func.offsets[destination]); + let disp = dest - i64::from(sink.offset()); + put_sb(bits, disp, in_reg0, 0, sink); + "#, + ), + ); + + // Spill of a GPR. + recipes.push( + EncodingRecipeBuilder::new("GPsp", &formats.unary, 4) + .operands_in(vec![gpr]) + .operands_out(vec![Stack::new(gpr)]) + .emit("unimplemented!();"), + ); + + // Fill of a GPR. + recipes.push( + EncodingRecipeBuilder::new("GPfi", &formats.unary, 4) + .operands_in(vec![Stack::new(gpr)]) + .operands_out(vec![gpr]) + .emit("unimplemented!();"), + ); + + // Stack-slot to same stack-slot copy, which is guaranteed to turn into a no-op. + recipes.push( + EncodingRecipeBuilder::new("stacknull", &formats.unary, 0) + .operands_in(vec![Stack::new(gpr)]) + .operands_out(vec![Stack::new(gpr)]) + .emit(""), + ); + + // No-op fills, created by late-stage redundant-fill removal. + recipes.push( + EncodingRecipeBuilder::new("fillnull", &formats.unary, 0) + .operands_in(vec![Stack::new(gpr)]) + .operands_out(vec![gpr]) + .clobbers_flags(false) + .emit(""), + ); + + recipes +} diff --git a/third_party/rust/cranelift-codegen-meta/src/isa/x86/encodings.rs b/third_party/rust/cranelift-codegen-meta/src/isa/x86/encodings.rs new file mode 100644 index 0000000000..9ee12656c0 --- /dev/null +++ b/third_party/rust/cranelift-codegen-meta/src/isa/x86/encodings.rs @@ -0,0 +1,2726 @@ +#![allow(non_snake_case)] + +use cranelift_codegen_shared::condcodes::IntCC; +use std::collections::HashMap; + +use crate::cdsl::encodings::{Encoding, EncodingBuilder}; +use crate::cdsl::instructions::{ + vector, Bindable, Immediate, InstSpec, Instruction, InstructionGroup, InstructionPredicate, + InstructionPredicateNode, InstructionPredicateRegistry, +}; +use crate::cdsl::recipes::{EncodingRecipe, EncodingRecipeNumber, Recipes}; +use crate::cdsl::settings::{SettingGroup, SettingPredicateNumber}; +use crate::cdsl::types::{LaneType, ValueType}; +use crate::shared::types::Bool::{B1, B16, B32, B64, B8}; +use crate::shared::types::Float::{F32, F64}; +use crate::shared::types::Int::{I16, I32, I64, I8}; +use crate::shared::types::Reference::{R32, R64}; +use crate::shared::Definitions as SharedDefinitions; + +use crate::isa::x86::opcodes::*; + +use super::recipes::{RecipeGroup, Template}; +use crate::cdsl::instructions::BindParameter::Any; + +pub(crate) struct PerCpuModeEncodings { + pub enc32: Vec<Encoding>, + pub enc64: Vec<Encoding>, + pub recipes: Recipes, + recipes_by_name: HashMap<String, EncodingRecipeNumber>, + pub inst_pred_reg: InstructionPredicateRegistry, +} + +impl PerCpuModeEncodings { + fn new() -> Self { + Self { + enc32: Vec::new(), + enc64: Vec::new(), + recipes: Recipes::new(), + recipes_by_name: HashMap::new(), + inst_pred_reg: InstructionPredicateRegistry::new(), + } + } + + fn add_recipe(&mut self, recipe: EncodingRecipe) -> EncodingRecipeNumber { + if let Some(found_index) = self.recipes_by_name.get(&recipe.name) { + assert!( + self.recipes[*found_index] == recipe, + format!( + "trying to insert different recipes with a same name ({})", + recipe.name + ) + ); + *found_index + } else { + let recipe_name = recipe.name.clone(); + let index = self.recipes.push(recipe); + self.recipes_by_name.insert(recipe_name, index); + index + } + } + + fn make_encoding<T>( + &mut self, + inst: InstSpec, + template: Template, + builder_closure: T, + ) -> Encoding + where + T: FnOnce(EncodingBuilder) -> EncodingBuilder, + { + let (recipe, bits) = template.build(); + let recipe_number = self.add_recipe(recipe); + let builder = EncodingBuilder::new(inst, recipe_number, bits); + builder_closure(builder).build(&self.recipes, &mut self.inst_pred_reg) + } + + fn enc32_func<T>(&mut self, inst: impl Into<InstSpec>, template: Template, builder_closure: T) + where + T: FnOnce(EncodingBuilder) -> EncodingBuilder, + { + let encoding = self.make_encoding(inst.into(), template, builder_closure); + self.enc32.push(encoding); + } + fn enc32(&mut self, inst: impl Into<InstSpec>, template: Template) { + self.enc32_func(inst, template, |x| x); + } + fn enc32_isap( + &mut self, + inst: impl Into<InstSpec>, + template: Template, + isap: SettingPredicateNumber, + ) { + self.enc32_func(inst, template, |encoding| encoding.isa_predicate(isap)); + } + fn enc32_instp( + &mut self, + inst: impl Into<InstSpec>, + template: Template, + instp: InstructionPredicateNode, + ) { + self.enc32_func(inst, template, |encoding| encoding.inst_predicate(instp)); + } + fn enc32_rec(&mut self, inst: impl Into<InstSpec>, recipe: &EncodingRecipe, bits: u16) { + let recipe_number = self.add_recipe(recipe.clone()); + let builder = EncodingBuilder::new(inst.into(), recipe_number, bits); + let encoding = builder.build(&self.recipes, &mut self.inst_pred_reg); + self.enc32.push(encoding); + } + + fn enc64_func<T>(&mut self, inst: impl Into<InstSpec>, template: Template, builder_closure: T) + where + T: FnOnce(EncodingBuilder) -> EncodingBuilder, + { + let encoding = self.make_encoding(inst.into(), template, builder_closure); + self.enc64.push(encoding); + } + fn enc64(&mut self, inst: impl Into<InstSpec>, template: Template) { + self.enc64_func(inst, template, |x| x); + } + fn enc64_isap( + &mut self, + inst: impl Into<InstSpec>, + template: Template, + isap: SettingPredicateNumber, + ) { + self.enc64_func(inst, template, |encoding| encoding.isa_predicate(isap)); + } + fn enc64_instp( + &mut self, + inst: impl Into<InstSpec>, + template: Template, + instp: InstructionPredicateNode, + ) { + self.enc64_func(inst, template, |encoding| encoding.inst_predicate(instp)); + } + fn enc64_rec(&mut self, inst: impl Into<InstSpec>, recipe: &EncodingRecipe, bits: u16) { + let recipe_number = self.add_recipe(recipe.clone()); + let builder = EncodingBuilder::new(inst.into(), recipe_number, bits); + let encoding = builder.build(&self.recipes, &mut self.inst_pred_reg); + self.enc64.push(encoding); + } + + /// Adds I32/I64 encodings as appropriate for a typed instruction. + /// The REX prefix is always inferred at runtime. + /// + /// Add encodings for `inst.i32` to X86_32. + /// Add encodings for `inst.i32` to X86_64 with optional, inferred REX. + /// Add encodings for `inst.i64` to X86_64 with a REX.W prefix. + fn enc_i32_i64(&mut self, inst: impl Into<InstSpec>, template: Template) { + let inst: InstSpec = inst.into(); + + // I32 on x86: no REX prefix. + self.enc32(inst.bind(I32), template.infer_rex()); + + // I32 on x86_64: REX.W unset; REX.RXB determined at runtime from registers. + self.enc64(inst.bind(I32), template.infer_rex()); + + // I64 on x86_64: REX.W set; REX.RXB determined at runtime from registers. + self.enc64(inst.bind(I64), template.rex().w()); + } + + /// Adds I32/I64 encodings as appropriate for a typed instruction. + /// All variants of REX prefix are explicitly emitted, not inferred. + /// + /// Add encodings for `inst.i32` to X86_32. + /// Add encodings for `inst.i32` to X86_64 with and without REX. + /// Add encodings for `inst.i64` to X86_64 with and without REX. + fn enc_i32_i64_explicit_rex(&mut self, inst: impl Into<InstSpec>, template: Template) { + let inst: InstSpec = inst.into(); + self.enc32(inst.bind(I32), template.nonrex()); + + // REX-less encoding must come after REX encoding so we don't use it by default. + // Otherwise reg-alloc would never use r8 and up. + self.enc64(inst.bind(I32), template.rex()); + self.enc64(inst.bind(I32), template.nonrex()); + self.enc64(inst.bind(I64), template.rex().w()); + } + + /// Adds B32/B64 encodings as appropriate for a typed instruction. + /// The REX prefix is always inferred at runtime. + /// + /// Adds encoding for `inst.b32` to X86_32. + /// Adds encoding for `inst.b32` to X86_64 with optional, inferred REX. + /// Adds encoding for `inst.b64` to X86_64 with a REX.W prefix. + fn enc_b32_b64(&mut self, inst: impl Into<InstSpec>, template: Template) { + let inst: InstSpec = inst.into(); + + // B32 on x86: no REX prefix. + self.enc32(inst.bind(B32), template.infer_rex()); + + // B32 on x86_64: REX.W unset; REX.RXB determined at runtime from registers. + self.enc64(inst.bind(B32), template.infer_rex()); + + // B64 on x86_64: REX.W set; REX.RXB determined at runtime from registers. + self.enc64(inst.bind(B64), template.rex().w()); + } + + /// Add encodings for `inst.i32` to X86_32. + /// Add encodings for `inst.i32` to X86_64 with a REX prefix. + /// Add encodings for `inst.i64` to X86_64 with a REX.W prefix. + fn enc_i32_i64_rex_only(&mut self, inst: impl Into<InstSpec>, template: Template) { + let inst: InstSpec = inst.into(); + self.enc32(inst.bind(I32), template.nonrex()); + self.enc64(inst.bind(I32), template.rex()); + self.enc64(inst.bind(I64), template.rex().w()); + } + + /// Add encodings for `inst.i32` to X86_32. + /// Add encodings for `inst.i32` to X86_64 with and without REX. + /// Add encodings for `inst.i64` to X86_64 with a REX.W prefix. + fn enc_i32_i64_instp( + &mut self, + inst: &Instruction, + template: Template, + instp: InstructionPredicateNode, + ) { + self.enc32_func(inst.bind(I32), template.nonrex(), |builder| { + builder.inst_predicate(instp.clone()) + }); + + // REX-less encoding must come after REX encoding so we don't use it by default. Otherwise + // reg-alloc would never use r8 and up. + self.enc64_func(inst.bind(I32), template.rex(), |builder| { + builder.inst_predicate(instp.clone()) + }); + self.enc64_func(inst.bind(I32), template.nonrex(), |builder| { + builder.inst_predicate(instp.clone()) + }); + self.enc64_func(inst.bind(I64), template.rex().w(), |builder| { + builder.inst_predicate(instp) + }); + } + + /// Add encodings for `inst.r32` to X86_32. + /// Add encodings for `inst.r32` to X86_64 with and without REX. + /// Add encodings for `inst.r64` to X86_64 with a REX.W prefix. + fn enc_r32_r64_instp( + &mut self, + inst: &Instruction, + template: Template, + instp: InstructionPredicateNode, + ) { + self.enc32_func(inst.bind(R32), template.nonrex(), |builder| { + builder.inst_predicate(instp.clone()) + }); + + // REX-less encoding must come after REX encoding so we don't use it by default. Otherwise + // reg-alloc would never use r8 and up. + self.enc64_func(inst.bind(R32), template.rex(), |builder| { + builder.inst_predicate(instp.clone()) + }); + self.enc64_func(inst.bind(R32), template.nonrex(), |builder| { + builder.inst_predicate(instp.clone()) + }); + self.enc64_func(inst.bind(R64), template.rex().w(), |builder| { + builder.inst_predicate(instp) + }); + } + + /// Add encodings for `inst.r32` to X86_32. + /// Add encodings for `inst.r64` to X86_64 with a REX.W prefix. + fn enc_r32_r64_rex_only(&mut self, inst: impl Into<InstSpec>, template: Template) { + let inst: InstSpec = inst.into(); + self.enc32(inst.bind(R32), template.nonrex()); + self.enc64(inst.bind(R64), template.rex().w()); + } + + fn enc_r32_r64_ld_st(&mut self, inst: &Instruction, w_bit: bool, template: Template) { + self.enc32(inst.clone().bind(R32).bind(Any), template.clone()); + + // REX-less encoding must come after REX encoding so we don't use it by + // default. Otherwise reg-alloc would never use r8 and up. + self.enc64(inst.clone().bind(R32).bind(Any), template.clone().rex()); + self.enc64(inst.clone().bind(R32).bind(Any), template.clone()); + + if w_bit { + self.enc64(inst.clone().bind(R64).bind(Any), template.rex().w()); + } else { + self.enc64(inst.clone().bind(R64).bind(Any), template.clone().rex()); + self.enc64(inst.clone().bind(R64).bind(Any), template); + } + } + + /// Add encodings for `inst` to X86_64 with and without a REX prefix. + fn enc_x86_64(&mut self, inst: impl Into<InstSpec> + Clone, template: Template) { + // See above comment about the ordering of rex vs non-rex encodings. + self.enc64(inst.clone(), template.rex()); + self.enc64(inst, template); + } + + /// Add encodings for `inst` to X86_64 with and without a REX prefix. + fn enc_x86_64_instp( + &mut self, + inst: impl Clone + Into<InstSpec>, + template: Template, + instp: InstructionPredicateNode, + ) { + // See above comment about the ordering of rex vs non-rex encodings. + self.enc64_func(inst.clone(), template.rex(), |builder| { + builder.inst_predicate(instp.clone()) + }); + self.enc64_func(inst, template, |builder| builder.inst_predicate(instp)); + } + fn enc_x86_64_isap( + &mut self, + inst: impl Clone + Into<InstSpec>, + template: Template, + isap: SettingPredicateNumber, + ) { + // See above comment about the ordering of rex vs non-rex encodings. + self.enc64_isap(inst.clone(), template.rex(), isap); + self.enc64_isap(inst, template, isap); + } + + /// Add all three encodings for `inst`: + /// - X86_32 + /// - X86_64 with and without the REX prefix. + fn enc_both(&mut self, inst: impl Clone + Into<InstSpec>, template: Template) { + self.enc32(inst.clone(), template.clone()); + self.enc_x86_64(inst, template); + } + fn enc_both_isap( + &mut self, + inst: impl Clone + Into<InstSpec>, + template: Template, + isap: SettingPredicateNumber, + ) { + self.enc32_isap(inst.clone(), template.clone(), isap); + self.enc_x86_64_isap(inst, template, isap); + } + fn enc_both_instp( + &mut self, + inst: impl Clone + Into<InstSpec>, + template: Template, + instp: InstructionPredicateNode, + ) { + self.enc32_instp(inst.clone(), template.clone(), instp.clone()); + self.enc_x86_64_instp(inst, template, instp); + } + + /// Add two encodings for `inst`: + /// - X86_32, no REX prefix, since this is not valid in 32-bit mode. + /// - X86_64, dynamically infer the REX prefix. + fn enc_both_inferred(&mut self, inst: impl Clone + Into<InstSpec>, template: Template) { + self.enc32(inst.clone(), template.clone()); + self.enc64(inst, template.infer_rex()); + } + fn enc_both_inferred_maybe_isap( + &mut self, + inst: impl Clone + Into<InstSpec>, + template: Template, + isap: Option<SettingPredicateNumber>, + ) { + self.enc32_maybe_isap(inst.clone(), template.clone(), isap); + self.enc64_maybe_isap(inst, template.infer_rex(), isap); + } + + /// Add two encodings for `inst`: + /// - X86_32 + /// - X86_64 with the REX prefix. + fn enc_both_rex_only(&mut self, inst: impl Clone + Into<InstSpec>, template: Template) { + self.enc32(inst.clone(), template.clone()); + self.enc64(inst, template.rex()); + } + + /// Add encodings for `inst.i32` to X86_32. + /// Add encodings for `inst.i32` to X86_64 with and without REX. + /// Add encodings for `inst.i64` to X86_64 with a REX prefix, using the `w_bit` + /// argument to determine whether or not to set the REX.W bit. + fn enc_i32_i64_ld_st(&mut self, inst: &Instruction, w_bit: bool, template: Template) { + self.enc32(inst.clone().bind(I32).bind(Any), template.clone()); + + // REX-less encoding must come after REX encoding so we don't use it by + // default. Otherwise reg-alloc would never use r8 and up. + self.enc64(inst.clone().bind(I32).bind(Any), template.clone().rex()); + self.enc64(inst.clone().bind(I32).bind(Any), template.clone()); + + if w_bit { + self.enc64(inst.clone().bind(I64).bind(Any), template.rex().w()); + } else { + self.enc64(inst.clone().bind(I64).bind(Any), template.clone().rex()); + self.enc64(inst.clone().bind(I64).bind(Any), template); + } + } + + /// Add the same encoding/recipe pairing to both X86_32 and X86_64 + fn enc_32_64_rec( + &mut self, + inst: impl Clone + Into<InstSpec>, + recipe: &EncodingRecipe, + bits: u16, + ) { + self.enc32_rec(inst.clone(), recipe, bits); + self.enc64_rec(inst, recipe, bits); + } + + /// Add the same encoding to both X86_32 and X86_64; assumes configuration (e.g. REX, operand binding) has already happened + fn enc_32_64_func<T>( + &mut self, + inst: impl Clone + Into<InstSpec>, + template: Template, + builder_closure: T, + ) where + T: FnOnce(EncodingBuilder) -> EncodingBuilder, + { + let encoding = self.make_encoding(inst.into(), template, builder_closure); + self.enc32.push(encoding.clone()); + self.enc64.push(encoding); + } + + /// Add the same encoding to both X86_32 and X86_64; assumes configuration (e.g. REX, operand + /// binding) has already happened. + fn enc_32_64_maybe_isap( + &mut self, + inst: impl Clone + Into<InstSpec>, + template: Template, + isap: Option<SettingPredicateNumber>, + ) { + self.enc32_maybe_isap(inst.clone(), template.clone(), isap); + self.enc64_maybe_isap(inst, template, isap); + } + + fn enc32_maybe_isap( + &mut self, + inst: impl Into<InstSpec>, + template: Template, + isap: Option<SettingPredicateNumber>, + ) { + match isap { + None => self.enc32(inst, template), + Some(isap) => self.enc32_isap(inst, template, isap), + } + } + + fn enc64_maybe_isap( + &mut self, + inst: impl Into<InstSpec>, + template: Template, + isap: Option<SettingPredicateNumber>, + ) { + match isap { + None => self.enc64(inst, template), + Some(isap) => self.enc64_isap(inst, template, isap), + } + } +} + +// Definitions. + +#[inline(never)] +fn define_moves(e: &mut PerCpuModeEncodings, shared_defs: &SharedDefinitions, r: &RecipeGroup) { + let shared = &shared_defs.instructions; + let formats = &shared_defs.formats; + + // Shorthands for instructions. + let bconst = shared.by_name("bconst"); + let bint = shared.by_name("bint"); + let copy = shared.by_name("copy"); + let copy_special = shared.by_name("copy_special"); + let copy_to_ssa = shared.by_name("copy_to_ssa"); + let get_pinned_reg = shared.by_name("get_pinned_reg"); + let iconst = shared.by_name("iconst"); + let ireduce = shared.by_name("ireduce"); + let regmove = shared.by_name("regmove"); + let sextend = shared.by_name("sextend"); + let set_pinned_reg = shared.by_name("set_pinned_reg"); + let uextend = shared.by_name("uextend"); + let dummy_sarg_t = shared.by_name("dummy_sarg_t"); + + // Shorthands for recipes. + let rec_copysp = r.template("copysp"); + let rec_furm_reg_to_ssa = r.template("furm_reg_to_ssa"); + let rec_get_pinned_reg = r.recipe("get_pinned_reg"); + let rec_null = r.recipe("null"); + let rec_pu_id = r.template("pu_id"); + let rec_pu_id_bool = r.template("pu_id_bool"); + let rec_pu_iq = r.template("pu_iq"); + let rec_rmov = r.template("rmov"); + let rec_set_pinned_reg = r.template("set_pinned_reg"); + let rec_u_id = r.template("u_id"); + let rec_u_id_z = r.template("u_id_z"); + let rec_umr = r.template("umr"); + let rec_umr_reg_to_ssa = r.template("umr_reg_to_ssa"); + let rec_urm_noflags = r.template("urm_noflags"); + let rec_urm_noflags_abcd = r.template("urm_noflags_abcd"); + let rec_dummy_sarg_t = r.recipe("dummy_sarg_t"); + + // The pinned reg is fixed to a certain value entirely user-controlled, so it generates nothing! + e.enc64_rec(get_pinned_reg.bind(I64), rec_get_pinned_reg, 0); + e.enc_x86_64( + set_pinned_reg.bind(I64), + rec_set_pinned_reg.opcodes(&MOV_STORE).rex().w(), + ); + + e.enc_i32_i64(copy, rec_umr.opcodes(&MOV_STORE)); + e.enc_r32_r64_rex_only(copy, rec_umr.opcodes(&MOV_STORE)); + e.enc_both(copy.bind(B1), rec_umr.opcodes(&MOV_STORE)); + e.enc_both(copy.bind(I8), rec_umr.opcodes(&MOV_STORE)); + e.enc_both(copy.bind(I16), rec_umr.opcodes(&MOV_STORE)); + + // TODO For x86-64, only define REX forms for now, since we can't describe the + // special regunit immediate operands with the current constraint language. + for &ty in &[I8, I16, I32] { + e.enc32(regmove.bind(ty), rec_rmov.opcodes(&MOV_STORE)); + e.enc64(regmove.bind(ty), rec_rmov.opcodes(&MOV_STORE).rex()); + } + for &ty in &[B8, B16, B32] { + e.enc32(regmove.bind(ty), rec_rmov.opcodes(&MOV_STORE)); + e.enc64(regmove.bind(ty), rec_rmov.opcodes(&MOV_STORE).rex()); + } + e.enc64(regmove.bind(I64), rec_rmov.opcodes(&MOV_STORE).rex().w()); + e.enc_both(regmove.bind(B1), rec_rmov.opcodes(&MOV_STORE)); + e.enc_both(regmove.bind(I8), rec_rmov.opcodes(&MOV_STORE)); + e.enc32(regmove.bind(R32), rec_rmov.opcodes(&MOV_STORE)); + e.enc64(regmove.bind(R32), rec_rmov.opcodes(&MOV_STORE).rex()); + e.enc64(regmove.bind(R64), rec_rmov.opcodes(&MOV_STORE).rex().w()); + + // Immediate constants. + e.enc32(iconst.bind(I32), rec_pu_id.opcodes(&MOV_IMM)); + + e.enc64(iconst.bind(I32), rec_pu_id.rex().opcodes(&MOV_IMM)); + e.enc64(iconst.bind(I32), rec_pu_id.opcodes(&MOV_IMM)); + + // The 32-bit immediate movl also zero-extends to 64 bits. + let is_unsigned_int32 = + InstructionPredicate::new_is_unsigned_int(&*formats.unary_imm, "imm", 32, 0); + + e.enc64_func( + iconst.bind(I64), + rec_pu_id.opcodes(&MOV_IMM).rex(), + |encoding| encoding.inst_predicate(is_unsigned_int32.clone()), + ); + e.enc64_func(iconst.bind(I64), rec_pu_id.opcodes(&MOV_IMM), |encoding| { + encoding.inst_predicate(is_unsigned_int32) + }); + + // Sign-extended 32-bit immediate. + e.enc64( + iconst.bind(I64), + rec_u_id.rex().opcodes(&MOV_IMM_SIGNEXTEND).rrr(0).w(), + ); + + // Finally, the MOV_IMM opcode takes an 8-byte immediate with a REX.W prefix. + e.enc64(iconst.bind(I64), rec_pu_iq.opcodes(&MOV_IMM).rex().w()); + + // Bool constants (uses MOV) + for &ty in &[B1, B8, B16, B32] { + e.enc_both(bconst.bind(ty), rec_pu_id_bool.opcodes(&MOV_IMM)); + } + e.enc64(bconst.bind(B64), rec_pu_id_bool.opcodes(&MOV_IMM).rex()); + + let is_zero_int = InstructionPredicate::new_is_zero_int(&formats.unary_imm, "imm"); + e.enc_both_instp( + iconst.bind(I8), + rec_u_id_z.opcodes(&XORB), + is_zero_int.clone(), + ); + + // You may expect that i16 encodings would have an 0x66 prefix on the opcode to indicate that + // encodings should be on 16-bit operands (f.ex, "xor %ax, %ax"). Cranelift currently does not + // know that it can drop the 0x66 prefix and clear the upper half of a 32-bit register in these + // scenarios, so we explicitly select a wider but permissible opcode. + // + // This effectively formalizes the i16->i32 widening that Cranelift performs when there isn't + // an appropriate i16 encoding available. + e.enc_both_instp( + iconst.bind(I16), + rec_u_id_z.opcodes(&XOR), + is_zero_int.clone(), + ); + e.enc_both_instp( + iconst.bind(I32), + rec_u_id_z.opcodes(&XOR), + is_zero_int.clone(), + ); + e.enc_x86_64_instp(iconst.bind(I64), rec_u_id_z.opcodes(&XOR), is_zero_int); + + // Numerical conversions. + + // Reducing an integer is a no-op. + e.enc32_rec(ireduce.bind(I8).bind(I16), rec_null, 0); + e.enc32_rec(ireduce.bind(I8).bind(I32), rec_null, 0); + e.enc32_rec(ireduce.bind(I16).bind(I32), rec_null, 0); + + e.enc64_rec(ireduce.bind(I8).bind(I16), rec_null, 0); + e.enc64_rec(ireduce.bind(I8).bind(I32), rec_null, 0); + e.enc64_rec(ireduce.bind(I16).bind(I32), rec_null, 0); + e.enc64_rec(ireduce.bind(I8).bind(I64), rec_null, 0); + e.enc64_rec(ireduce.bind(I16).bind(I64), rec_null, 0); + e.enc64_rec(ireduce.bind(I32).bind(I64), rec_null, 0); + + // TODO: Add encodings for cbw, cwde, cdqe, which are sign-extending + // instructions for %al/%ax/%eax to %ax/%eax/%rax. + + // movsbl + e.enc32( + sextend.bind(I32).bind(I8), + rec_urm_noflags_abcd.opcodes(&MOVSX_BYTE), + ); + e.enc64( + sextend.bind(I32).bind(I8), + rec_urm_noflags.opcodes(&MOVSX_BYTE).rex(), + ); + e.enc64( + sextend.bind(I32).bind(I8), + rec_urm_noflags_abcd.opcodes(&MOVSX_BYTE), + ); + + // movswl + e.enc32( + sextend.bind(I32).bind(I16), + rec_urm_noflags.opcodes(&MOVSX_WORD), + ); + e.enc64( + sextend.bind(I32).bind(I16), + rec_urm_noflags.opcodes(&MOVSX_WORD).rex(), + ); + e.enc64( + sextend.bind(I32).bind(I16), + rec_urm_noflags.opcodes(&MOVSX_WORD), + ); + + // movsbq + e.enc64( + sextend.bind(I64).bind(I8), + rec_urm_noflags.opcodes(&MOVSX_BYTE).rex().w(), + ); + + // movswq + e.enc64( + sextend.bind(I64).bind(I16), + rec_urm_noflags.opcodes(&MOVSX_WORD).rex().w(), + ); + + // movslq + e.enc64( + sextend.bind(I64).bind(I32), + rec_urm_noflags.opcodes(&MOVSXD).rex().w(), + ); + + // movzbl + e.enc32( + uextend.bind(I32).bind(I8), + rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE), + ); + e.enc64( + uextend.bind(I32).bind(I8), + rec_urm_noflags.opcodes(&MOVZX_BYTE).rex(), + ); + e.enc64( + uextend.bind(I32).bind(I8), + rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE), + ); + + // movzwl + e.enc32( + uextend.bind(I32).bind(I16), + rec_urm_noflags.opcodes(&MOVZX_WORD), + ); + e.enc64( + uextend.bind(I32).bind(I16), + rec_urm_noflags.opcodes(&MOVZX_WORD).rex(), + ); + e.enc64( + uextend.bind(I32).bind(I16), + rec_urm_noflags.opcodes(&MOVZX_WORD), + ); + + // movzbq, encoded as movzbl because it's equivalent and shorter. + e.enc64( + uextend.bind(I64).bind(I8), + rec_urm_noflags.opcodes(&MOVZX_BYTE).rex(), + ); + e.enc64( + uextend.bind(I64).bind(I8), + rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE), + ); + + // movzwq, encoded as movzwl because it's equivalent and shorter + e.enc64( + uextend.bind(I64).bind(I16), + rec_urm_noflags.opcodes(&MOVZX_WORD).rex(), + ); + e.enc64( + uextend.bind(I64).bind(I16), + rec_urm_noflags.opcodes(&MOVZX_WORD), + ); + + // A 32-bit register copy clears the high 32 bits. + e.enc64( + uextend.bind(I64).bind(I32), + rec_umr.opcodes(&MOV_STORE).rex(), + ); + e.enc64(uextend.bind(I64).bind(I32), rec_umr.opcodes(&MOV_STORE)); + + // Convert bool to int. + // + // This assumes that b1 is represented as an 8-bit low register with the value 0 + // or 1. + // + // Encode movzbq as movzbl, because it's equivalent and shorter. + for &to in &[I8, I16, I32, I64] { + for &from in &[B1, B8] { + e.enc64( + bint.bind(to).bind(from), + rec_urm_noflags.opcodes(&MOVZX_BYTE).rex(), + ); + e.enc64( + bint.bind(to).bind(from), + rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE), + ); + if to != I64 { + e.enc32( + bint.bind(to).bind(from), + rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE), + ); + } + } + } + for (to, from) in &[(I16, B16), (I32, B32), (I64, B64)] { + e.enc_both( + bint.bind(*to).bind(*from), + rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE), + ); + } + + // Copy Special + // For x86-64, only define REX forms for now, since we can't describe the + // special regunit immediate operands with the current constraint language. + e.enc64(copy_special, rec_copysp.opcodes(&MOV_STORE).rex().w()); + e.enc32(copy_special, rec_copysp.opcodes(&MOV_STORE)); + + // Copy to SSA. These have to be done with special _rex_only encoders, because the standard + // machinery for deciding whether a REX.{RXB} prefix is needed doesn't take into account + // the source register, which is specified directly in the instruction. + e.enc_i32_i64_rex_only(copy_to_ssa, rec_umr_reg_to_ssa.opcodes(&MOV_STORE)); + e.enc_r32_r64_rex_only(copy_to_ssa, rec_umr_reg_to_ssa.opcodes(&MOV_STORE)); + e.enc_both_rex_only(copy_to_ssa.bind(B1), rec_umr_reg_to_ssa.opcodes(&MOV_STORE)); + e.enc_both_rex_only(copy_to_ssa.bind(I8), rec_umr_reg_to_ssa.opcodes(&MOV_STORE)); + e.enc_both_rex_only( + copy_to_ssa.bind(I16), + rec_umr_reg_to_ssa.opcodes(&MOV_STORE), + ); + e.enc_both_rex_only( + copy_to_ssa.bind(F64), + rec_furm_reg_to_ssa.opcodes(&MOVSD_LOAD), + ); + e.enc_both_rex_only( + copy_to_ssa.bind(F32), + rec_furm_reg_to_ssa.opcodes(&MOVSS_LOAD), + ); + + e.enc_32_64_rec(dummy_sarg_t, rec_dummy_sarg_t, 0); +} + +#[inline(never)] +fn define_memory( + e: &mut PerCpuModeEncodings, + shared_defs: &SharedDefinitions, + x86: &InstructionGroup, + r: &RecipeGroup, +) { + let shared = &shared_defs.instructions; + let formats = &shared_defs.formats; + + // Shorthands for instructions. + let adjust_sp_down = shared.by_name("adjust_sp_down"); + let adjust_sp_down_imm = shared.by_name("adjust_sp_down_imm"); + let adjust_sp_up_imm = shared.by_name("adjust_sp_up_imm"); + let copy_nop = shared.by_name("copy_nop"); + let fill = shared.by_name("fill"); + let fill_nop = shared.by_name("fill_nop"); + let istore16 = shared.by_name("istore16"); + let istore16_complex = shared.by_name("istore16_complex"); + let istore32 = shared.by_name("istore32"); + let istore32_complex = shared.by_name("istore32_complex"); + let istore8 = shared.by_name("istore8"); + let istore8_complex = shared.by_name("istore8_complex"); + let load = shared.by_name("load"); + let load_complex = shared.by_name("load_complex"); + let regfill = shared.by_name("regfill"); + let regspill = shared.by_name("regspill"); + let sload16 = shared.by_name("sload16"); + let sload16_complex = shared.by_name("sload16_complex"); + let sload32 = shared.by_name("sload32"); + let sload32_complex = shared.by_name("sload32_complex"); + let sload8 = shared.by_name("sload8"); + let sload8_complex = shared.by_name("sload8_complex"); + let spill = shared.by_name("spill"); + let store = shared.by_name("store"); + let store_complex = shared.by_name("store_complex"); + let uload16 = shared.by_name("uload16"); + let uload16_complex = shared.by_name("uload16_complex"); + let uload32 = shared.by_name("uload32"); + let uload32_complex = shared.by_name("uload32_complex"); + let uload8 = shared.by_name("uload8"); + let uload8_complex = shared.by_name("uload8_complex"); + let x86_pop = x86.by_name("x86_pop"); + let x86_push = x86.by_name("x86_push"); + + // Shorthands for recipes. + let rec_adjustsp = r.template("adjustsp"); + let rec_adjustsp_ib = r.template("adjustsp_ib"); + let rec_adjustsp_id = r.template("adjustsp_id"); + let rec_ffillnull = r.recipe("ffillnull"); + let rec_fillnull = r.recipe("fillnull"); + let rec_fillSib32 = r.template("fillSib32"); + let rec_ld = r.template("ld"); + let rec_ldDisp32 = r.template("ldDisp32"); + let rec_ldDisp8 = r.template("ldDisp8"); + let rec_ldWithIndex = r.template("ldWithIndex"); + let rec_ldWithIndexDisp32 = r.template("ldWithIndexDisp32"); + let rec_ldWithIndexDisp8 = r.template("ldWithIndexDisp8"); + let rec_popq = r.template("popq"); + let rec_pushq = r.template("pushq"); + let rec_regfill32 = r.template("regfill32"); + let rec_regspill32 = r.template("regspill32"); + let rec_spillSib32 = r.template("spillSib32"); + let rec_st = r.template("st"); + let rec_stacknull = r.recipe("stacknull"); + let rec_stDisp32 = r.template("stDisp32"); + let rec_stDisp32_abcd = r.template("stDisp32_abcd"); + let rec_stDisp8 = r.template("stDisp8"); + let rec_stDisp8_abcd = r.template("stDisp8_abcd"); + let rec_stWithIndex = r.template("stWithIndex"); + let rec_stWithIndexDisp32 = r.template("stWithIndexDisp32"); + let rec_stWithIndexDisp32_abcd = r.template("stWithIndexDisp32_abcd"); + let rec_stWithIndexDisp8 = r.template("stWithIndexDisp8"); + let rec_stWithIndexDisp8_abcd = r.template("stWithIndexDisp8_abcd"); + let rec_stWithIndex_abcd = r.template("stWithIndex_abcd"); + let rec_st_abcd = r.template("st_abcd"); + + // Loads and stores. + let is_load_complex_length_two = + InstructionPredicate::new_length_equals(&*formats.load_complex, 2); + + for recipe in &[rec_ldWithIndex, rec_ldWithIndexDisp8, rec_ldWithIndexDisp32] { + e.enc_i32_i64_instp( + load_complex, + recipe.opcodes(&MOV_LOAD), + is_load_complex_length_two.clone(), + ); + e.enc_r32_r64_instp( + load_complex, + recipe.opcodes(&MOV_LOAD), + is_load_complex_length_two.clone(), + ); + e.enc_x86_64_instp( + uload32_complex, + recipe.opcodes(&MOV_LOAD), + is_load_complex_length_two.clone(), + ); + + e.enc64_instp( + sload32_complex, + recipe.opcodes(&MOVSXD).rex().w(), + is_load_complex_length_two.clone(), + ); + + e.enc_i32_i64_instp( + uload16_complex, + recipe.opcodes(&MOVZX_WORD), + is_load_complex_length_two.clone(), + ); + e.enc_i32_i64_instp( + sload16_complex, + recipe.opcodes(&MOVSX_WORD), + is_load_complex_length_two.clone(), + ); + + e.enc_i32_i64_instp( + uload8_complex, + recipe.opcodes(&MOVZX_BYTE), + is_load_complex_length_two.clone(), + ); + + e.enc_i32_i64_instp( + sload8_complex, + recipe.opcodes(&MOVSX_BYTE), + is_load_complex_length_two.clone(), + ); + } + + let is_store_complex_length_three = + InstructionPredicate::new_length_equals(&*formats.store_complex, 3); + + for recipe in &[rec_stWithIndex, rec_stWithIndexDisp8, rec_stWithIndexDisp32] { + e.enc_i32_i64_instp( + store_complex, + recipe.opcodes(&MOV_STORE), + is_store_complex_length_three.clone(), + ); + e.enc_r32_r64_instp( + store_complex, + recipe.opcodes(&MOV_STORE), + is_store_complex_length_three.clone(), + ); + e.enc_x86_64_instp( + istore32_complex, + recipe.opcodes(&MOV_STORE), + is_store_complex_length_three.clone(), + ); + e.enc_both_instp( + istore16_complex.bind(I32), + recipe.opcodes(&MOV_STORE_16), + is_store_complex_length_three.clone(), + ); + e.enc_x86_64_instp( + istore16_complex.bind(I64), + recipe.opcodes(&MOV_STORE_16), + is_store_complex_length_three.clone(), + ); + } + + for recipe in &[ + rec_stWithIndex_abcd, + rec_stWithIndexDisp8_abcd, + rec_stWithIndexDisp32_abcd, + ] { + e.enc_both_instp( + istore8_complex.bind(I32), + recipe.opcodes(&MOV_BYTE_STORE), + is_store_complex_length_three.clone(), + ); + e.enc_x86_64_instp( + istore8_complex.bind(I64), + recipe.opcodes(&MOV_BYTE_STORE), + is_store_complex_length_three.clone(), + ); + } + + for recipe in &[rec_st, rec_stDisp8, rec_stDisp32] { + e.enc_i32_i64_ld_st(store, true, recipe.opcodes(&MOV_STORE)); + e.enc_r32_r64_ld_st(store, true, recipe.opcodes(&MOV_STORE)); + e.enc_x86_64(istore32.bind(I64).bind(Any), recipe.opcodes(&MOV_STORE)); + e.enc_i32_i64_ld_st(istore16, false, recipe.opcodes(&MOV_STORE_16)); + } + + // Byte stores are more complicated because the registers they can address + // depends of the presence of a REX prefix. The st*_abcd recipes fall back to + // the corresponding st* recipes when a REX prefix is applied. + + for recipe in &[rec_st_abcd, rec_stDisp8_abcd, rec_stDisp32_abcd] { + e.enc_both(istore8.bind(I32).bind(Any), recipe.opcodes(&MOV_BYTE_STORE)); + e.enc_x86_64(istore8.bind(I64).bind(Any), recipe.opcodes(&MOV_BYTE_STORE)); + } + + e.enc_i32_i64_explicit_rex(spill, rec_spillSib32.opcodes(&MOV_STORE)); + e.enc_i32_i64_explicit_rex(regspill, rec_regspill32.opcodes(&MOV_STORE)); + e.enc_r32_r64_rex_only(spill, rec_spillSib32.opcodes(&MOV_STORE)); + e.enc_r32_r64_rex_only(regspill, rec_regspill32.opcodes(&MOV_STORE)); + + // Use a 32-bit write for spilling `b1`, `i8` and `i16` to avoid + // constraining the permitted registers. + // See MIN_SPILL_SLOT_SIZE which makes this safe. + + e.enc_both(spill.bind(B1), rec_spillSib32.opcodes(&MOV_STORE)); + e.enc_both(regspill.bind(B1), rec_regspill32.opcodes(&MOV_STORE)); + for &ty in &[I8, I16] { + e.enc_both(spill.bind(ty), rec_spillSib32.opcodes(&MOV_STORE)); + e.enc_both(regspill.bind(ty), rec_regspill32.opcodes(&MOV_STORE)); + } + + for recipe in &[rec_ld, rec_ldDisp8, rec_ldDisp32] { + e.enc_i32_i64_ld_st(load, true, recipe.opcodes(&MOV_LOAD)); + e.enc_r32_r64_ld_st(load, true, recipe.opcodes(&MOV_LOAD)); + e.enc_x86_64(uload32.bind(I64), recipe.opcodes(&MOV_LOAD)); + e.enc64(sload32.bind(I64), recipe.opcodes(&MOVSXD).rex().w()); + e.enc_i32_i64_ld_st(uload16, true, recipe.opcodes(&MOVZX_WORD)); + e.enc_i32_i64_ld_st(sload16, true, recipe.opcodes(&MOVSX_WORD)); + e.enc_i32_i64_ld_st(uload8, true, recipe.opcodes(&MOVZX_BYTE)); + e.enc_i32_i64_ld_st(sload8, true, recipe.opcodes(&MOVSX_BYTE)); + } + + e.enc_i32_i64_explicit_rex(fill, rec_fillSib32.opcodes(&MOV_LOAD)); + e.enc_i32_i64_explicit_rex(regfill, rec_regfill32.opcodes(&MOV_LOAD)); + e.enc_r32_r64_rex_only(fill, rec_fillSib32.opcodes(&MOV_LOAD)); + e.enc_r32_r64_rex_only(regfill, rec_regfill32.opcodes(&MOV_LOAD)); + + // No-op fills, created by late-stage redundant-fill removal. + for &ty in &[I64, I32, I16, I8] { + e.enc64_rec(fill_nop.bind(ty), rec_fillnull, 0); + e.enc32_rec(fill_nop.bind(ty), rec_fillnull, 0); + } + e.enc64_rec(fill_nop.bind(B1), rec_fillnull, 0); + e.enc32_rec(fill_nop.bind(B1), rec_fillnull, 0); + for &ty in &[F64, F32] { + e.enc64_rec(fill_nop.bind(ty), rec_ffillnull, 0); + e.enc32_rec(fill_nop.bind(ty), rec_ffillnull, 0); + } + for &ty in &[R64, R32] { + e.enc64_rec(fill_nop.bind(ty), rec_fillnull, 0); + e.enc32_rec(fill_nop.bind(ty), rec_fillnull, 0); + } + + // Load 32 bits from `b1`, `i8` and `i16` spill slots. See `spill.b1` above. + + e.enc_both(fill.bind(B1), rec_fillSib32.opcodes(&MOV_LOAD)); + e.enc_both(regfill.bind(B1), rec_regfill32.opcodes(&MOV_LOAD)); + for &ty in &[I8, I16] { + e.enc_both(fill.bind(ty), rec_fillSib32.opcodes(&MOV_LOAD)); + e.enc_both(regfill.bind(ty), rec_regfill32.opcodes(&MOV_LOAD)); + } + + // Push and Pop. + e.enc32(x86_push.bind(I32), rec_pushq.opcodes(&PUSH_REG)); + e.enc_x86_64(x86_push.bind(I64), rec_pushq.opcodes(&PUSH_REG)); + + e.enc32(x86_pop.bind(I32), rec_popq.opcodes(&POP_REG)); + e.enc_x86_64(x86_pop.bind(I64), rec_popq.opcodes(&POP_REG)); + + // Stack-slot-to-the-same-stack-slot copy, which is guaranteed to turn + // into a no-op. + // The same encoding is generated for both the 64- and 32-bit architectures. + for &ty in &[I64, I32, I16, I8] { + e.enc64_rec(copy_nop.bind(ty), rec_stacknull, 0); + e.enc32_rec(copy_nop.bind(ty), rec_stacknull, 0); + } + for &ty in &[F64, F32] { + e.enc64_rec(copy_nop.bind(ty), rec_stacknull, 0); + e.enc32_rec(copy_nop.bind(ty), rec_stacknull, 0); + } + + // Adjust SP down by a dynamic value (or up, with a negative operand). + e.enc32(adjust_sp_down.bind(I32), rec_adjustsp.opcodes(&SUB)); + e.enc64( + adjust_sp_down.bind(I64), + rec_adjustsp.opcodes(&SUB).rex().w(), + ); + + // Adjust SP up by an immediate (or down, with a negative immediate). + e.enc32(adjust_sp_up_imm, rec_adjustsp_ib.opcodes(&CMP_IMM8)); + e.enc32(adjust_sp_up_imm, rec_adjustsp_id.opcodes(&CMP_IMM)); + e.enc64( + adjust_sp_up_imm, + rec_adjustsp_ib.opcodes(&CMP_IMM8).rex().w(), + ); + e.enc64( + adjust_sp_up_imm, + rec_adjustsp_id.opcodes(&CMP_IMM).rex().w(), + ); + + // Adjust SP down by an immediate (or up, with a negative immediate). + e.enc32( + adjust_sp_down_imm, + rec_adjustsp_ib.opcodes(&CMP_IMM8).rrr(5), + ); + e.enc32(adjust_sp_down_imm, rec_adjustsp_id.opcodes(&CMP_IMM).rrr(5)); + e.enc64( + adjust_sp_down_imm, + rec_adjustsp_ib.opcodes(&CMP_IMM8).rrr(5).rex().w(), + ); + e.enc64( + adjust_sp_down_imm, + rec_adjustsp_id.opcodes(&CMP_IMM).rrr(5).rex().w(), + ); +} + +#[inline(never)] +fn define_fpu_moves(e: &mut PerCpuModeEncodings, shared_defs: &SharedDefinitions, r: &RecipeGroup) { + let shared = &shared_defs.instructions; + + // Shorthands for instructions. + let bitcast = shared.by_name("bitcast"); + let copy = shared.by_name("copy"); + let regmove = shared.by_name("regmove"); + + // Shorthands for recipes. + let rec_frmov = r.template("frmov"); + let rec_frurm = r.template("frurm"); + let rec_furm = r.template("furm"); + let rec_rfumr = r.template("rfumr"); + + // Floating-point moves. + // movd + e.enc_both( + bitcast.bind(F32).bind(I32), + rec_frurm.opcodes(&MOVD_LOAD_XMM), + ); + e.enc_both( + bitcast.bind(I32).bind(F32), + rec_rfumr.opcodes(&MOVD_STORE_XMM), + ); + + // movq + e.enc64( + bitcast.bind(F64).bind(I64), + rec_frurm.opcodes(&MOVD_LOAD_XMM).rex().w(), + ); + e.enc64( + bitcast.bind(I64).bind(F64), + rec_rfumr.opcodes(&MOVD_STORE_XMM).rex().w(), + ); + + // movaps + e.enc_both(copy.bind(F32), rec_furm.opcodes(&MOVAPS_LOAD)); + e.enc_both(copy.bind(F64), rec_furm.opcodes(&MOVAPS_LOAD)); + + // TODO For x86-64, only define REX forms for now, since we can't describe the special regunit + // immediate operands with the current constraint language. + e.enc32(regmove.bind(F32), rec_frmov.opcodes(&MOVAPS_LOAD)); + e.enc64(regmove.bind(F32), rec_frmov.opcodes(&MOVAPS_LOAD).rex()); + + // TODO For x86-64, only define REX forms for now, since we can't describe the special regunit + // immediate operands with the current constraint language. + e.enc32(regmove.bind(F64), rec_frmov.opcodes(&MOVAPS_LOAD)); + e.enc64(regmove.bind(F64), rec_frmov.opcodes(&MOVAPS_LOAD).rex()); +} + +#[inline(never)] +fn define_fpu_memory( + e: &mut PerCpuModeEncodings, + shared_defs: &SharedDefinitions, + r: &RecipeGroup, +) { + let shared = &shared_defs.instructions; + + // Shorthands for instructions. + let fill = shared.by_name("fill"); + let load = shared.by_name("load"); + let load_complex = shared.by_name("load_complex"); + let regfill = shared.by_name("regfill"); + let regspill = shared.by_name("regspill"); + let spill = shared.by_name("spill"); + let store = shared.by_name("store"); + let store_complex = shared.by_name("store_complex"); + + // Shorthands for recipes. + let rec_ffillSib32 = r.template("ffillSib32"); + let rec_fld = r.template("fld"); + let rec_fldDisp32 = r.template("fldDisp32"); + let rec_fldDisp8 = r.template("fldDisp8"); + let rec_fldWithIndex = r.template("fldWithIndex"); + let rec_fldWithIndexDisp32 = r.template("fldWithIndexDisp32"); + let rec_fldWithIndexDisp8 = r.template("fldWithIndexDisp8"); + let rec_fregfill32 = r.template("fregfill32"); + let rec_fregspill32 = r.template("fregspill32"); + let rec_fspillSib32 = r.template("fspillSib32"); + let rec_fst = r.template("fst"); + let rec_fstDisp32 = r.template("fstDisp32"); + let rec_fstDisp8 = r.template("fstDisp8"); + let rec_fstWithIndex = r.template("fstWithIndex"); + let rec_fstWithIndexDisp32 = r.template("fstWithIndexDisp32"); + let rec_fstWithIndexDisp8 = r.template("fstWithIndexDisp8"); + + // Float loads and stores. + e.enc_both(load.bind(F32).bind(Any), rec_fld.opcodes(&MOVSS_LOAD)); + e.enc_both(load.bind(F32).bind(Any), rec_fldDisp8.opcodes(&MOVSS_LOAD)); + e.enc_both(load.bind(F32).bind(Any), rec_fldDisp32.opcodes(&MOVSS_LOAD)); + + e.enc_both( + load_complex.bind(F32), + rec_fldWithIndex.opcodes(&MOVSS_LOAD), + ); + e.enc_both( + load_complex.bind(F32), + rec_fldWithIndexDisp8.opcodes(&MOVSS_LOAD), + ); + e.enc_both( + load_complex.bind(F32), + rec_fldWithIndexDisp32.opcodes(&MOVSS_LOAD), + ); + + e.enc_both(load.bind(F64).bind(Any), rec_fld.opcodes(&MOVSD_LOAD)); + e.enc_both(load.bind(F64).bind(Any), rec_fldDisp8.opcodes(&MOVSD_LOAD)); + e.enc_both(load.bind(F64).bind(Any), rec_fldDisp32.opcodes(&MOVSD_LOAD)); + + e.enc_both( + load_complex.bind(F64), + rec_fldWithIndex.opcodes(&MOVSD_LOAD), + ); + e.enc_both( + load_complex.bind(F64), + rec_fldWithIndexDisp8.opcodes(&MOVSD_LOAD), + ); + e.enc_both( + load_complex.bind(F64), + rec_fldWithIndexDisp32.opcodes(&MOVSD_LOAD), + ); + + e.enc_both(store.bind(F32).bind(Any), rec_fst.opcodes(&MOVSS_STORE)); + e.enc_both( + store.bind(F32).bind(Any), + rec_fstDisp8.opcodes(&MOVSS_STORE), + ); + e.enc_both( + store.bind(F32).bind(Any), + rec_fstDisp32.opcodes(&MOVSS_STORE), + ); + + e.enc_both( + store_complex.bind(F32), + rec_fstWithIndex.opcodes(&MOVSS_STORE), + ); + e.enc_both( + store_complex.bind(F32), + rec_fstWithIndexDisp8.opcodes(&MOVSS_STORE), + ); + e.enc_both( + store_complex.bind(F32), + rec_fstWithIndexDisp32.opcodes(&MOVSS_STORE), + ); + + e.enc_both(store.bind(F64).bind(Any), rec_fst.opcodes(&MOVSD_STORE)); + e.enc_both( + store.bind(F64).bind(Any), + rec_fstDisp8.opcodes(&MOVSD_STORE), + ); + e.enc_both( + store.bind(F64).bind(Any), + rec_fstDisp32.opcodes(&MOVSD_STORE), + ); + + e.enc_both( + store_complex.bind(F64), + rec_fstWithIndex.opcodes(&MOVSD_STORE), + ); + e.enc_both( + store_complex.bind(F64), + rec_fstWithIndexDisp8.opcodes(&MOVSD_STORE), + ); + e.enc_both( + store_complex.bind(F64), + rec_fstWithIndexDisp32.opcodes(&MOVSD_STORE), + ); + + e.enc_both(fill.bind(F32), rec_ffillSib32.opcodes(&MOVSS_LOAD)); + e.enc_both(regfill.bind(F32), rec_fregfill32.opcodes(&MOVSS_LOAD)); + e.enc_both(fill.bind(F64), rec_ffillSib32.opcodes(&MOVSD_LOAD)); + e.enc_both(regfill.bind(F64), rec_fregfill32.opcodes(&MOVSD_LOAD)); + + e.enc_both(spill.bind(F32), rec_fspillSib32.opcodes(&MOVSS_STORE)); + e.enc_both(regspill.bind(F32), rec_fregspill32.opcodes(&MOVSS_STORE)); + e.enc_both(spill.bind(F64), rec_fspillSib32.opcodes(&MOVSD_STORE)); + e.enc_both(regspill.bind(F64), rec_fregspill32.opcodes(&MOVSD_STORE)); +} + +#[inline(never)] +fn define_fpu_ops( + e: &mut PerCpuModeEncodings, + shared_defs: &SharedDefinitions, + settings: &SettingGroup, + x86: &InstructionGroup, + r: &RecipeGroup, +) { + let shared = &shared_defs.instructions; + let formats = &shared_defs.formats; + + // Shorthands for instructions. + let ceil = shared.by_name("ceil"); + let f32const = shared.by_name("f32const"); + let f64const = shared.by_name("f64const"); + let fadd = shared.by_name("fadd"); + let fcmp = shared.by_name("fcmp"); + let fcvt_from_sint = shared.by_name("fcvt_from_sint"); + let fdemote = shared.by_name("fdemote"); + let fdiv = shared.by_name("fdiv"); + let ffcmp = shared.by_name("ffcmp"); + let floor = shared.by_name("floor"); + let fmul = shared.by_name("fmul"); + let fpromote = shared.by_name("fpromote"); + let fsub = shared.by_name("fsub"); + let nearest = shared.by_name("nearest"); + let sqrt = shared.by_name("sqrt"); + let trunc = shared.by_name("trunc"); + let x86_cvtt2si = x86.by_name("x86_cvtt2si"); + let x86_fmax = x86.by_name("x86_fmax"); + let x86_fmin = x86.by_name("x86_fmin"); + + // Shorthands for recipes. + let rec_f32imm_z = r.template("f32imm_z"); + let rec_f64imm_z = r.template("f64imm_z"); + let rec_fa = r.template("fa"); + let rec_fcmp = r.template("fcmp"); + let rec_fcscc = r.template("fcscc"); + let rec_frurm = r.template("frurm"); + let rec_furm = r.template("furm"); + let rec_furmi_rnd = r.template("furmi_rnd"); + let rec_rfurm = r.template("rfurm"); + + // Predicates shorthands. + let use_sse41 = settings.predicate_by_name("use_sse41"); + + // Floating-point constants equal to 0.0 can be encoded using either `xorps` or `xorpd`, for + // 32-bit and 64-bit floats respectively. + let is_zero_32_bit_float = + InstructionPredicate::new_is_zero_32bit_float(&*formats.unary_ieee32, "imm"); + e.enc32_instp( + f32const, + rec_f32imm_z.opcodes(&XORPS), + is_zero_32_bit_float.clone(), + ); + + let is_zero_64_bit_float = + InstructionPredicate::new_is_zero_64bit_float(&*formats.unary_ieee64, "imm"); + e.enc32_instp( + f64const, + rec_f64imm_z.opcodes(&XORPD), + is_zero_64_bit_float.clone(), + ); + + e.enc_x86_64_instp(f32const, rec_f32imm_z.opcodes(&XORPS), is_zero_32_bit_float); + e.enc_x86_64_instp(f64const, rec_f64imm_z.opcodes(&XORPD), is_zero_64_bit_float); + + // cvtsi2ss + e.enc_i32_i64(fcvt_from_sint.bind(F32), rec_frurm.opcodes(&CVTSI2SS)); + + // cvtsi2sd + e.enc_i32_i64(fcvt_from_sint.bind(F64), rec_frurm.opcodes(&CVTSI2SD)); + + // cvtss2sd + e.enc_both(fpromote.bind(F64).bind(F32), rec_furm.opcodes(&CVTSS2SD)); + + // cvtsd2ss + e.enc_both(fdemote.bind(F32).bind(F64), rec_furm.opcodes(&CVTSD2SS)); + + // cvttss2si + e.enc_both( + x86_cvtt2si.bind(I32).bind(F32), + rec_rfurm.opcodes(&CVTTSS2SI), + ); + e.enc64( + x86_cvtt2si.bind(I64).bind(F32), + rec_rfurm.opcodes(&CVTTSS2SI).rex().w(), + ); + + // cvttsd2si + e.enc_both( + x86_cvtt2si.bind(I32).bind(F64), + rec_rfurm.opcodes(&CVTTSD2SI), + ); + e.enc64( + x86_cvtt2si.bind(I64).bind(F64), + rec_rfurm.opcodes(&CVTTSD2SI).rex().w(), + ); + + // Exact square roots. + e.enc_both(sqrt.bind(F32), rec_furm.opcodes(&SQRTSS)); + e.enc_both(sqrt.bind(F64), rec_furm.opcodes(&SQRTSD)); + + // Rounding. The recipe looks at the opcode to pick an immediate. + for inst in &[nearest, floor, ceil, trunc] { + e.enc_both_isap(inst.bind(F32), rec_furmi_rnd.opcodes(&ROUNDSS), use_sse41); + e.enc_both_isap(inst.bind(F64), rec_furmi_rnd.opcodes(&ROUNDSD), use_sse41); + } + + // Binary arithmetic ops. + e.enc_both(fadd.bind(F32), rec_fa.opcodes(&ADDSS)); + e.enc_both(fadd.bind(F64), rec_fa.opcodes(&ADDSD)); + + e.enc_both(fsub.bind(F32), rec_fa.opcodes(&SUBSS)); + e.enc_both(fsub.bind(F64), rec_fa.opcodes(&SUBSD)); + + e.enc_both(fmul.bind(F32), rec_fa.opcodes(&MULSS)); + e.enc_both(fmul.bind(F64), rec_fa.opcodes(&MULSD)); + + e.enc_both(fdiv.bind(F32), rec_fa.opcodes(&DIVSS)); + e.enc_both(fdiv.bind(F64), rec_fa.opcodes(&DIVSD)); + + e.enc_both(x86_fmin.bind(F32), rec_fa.opcodes(&MINSS)); + e.enc_both(x86_fmin.bind(F64), rec_fa.opcodes(&MINSD)); + + e.enc_both(x86_fmax.bind(F32), rec_fa.opcodes(&MAXSS)); + e.enc_both(x86_fmax.bind(F64), rec_fa.opcodes(&MAXSD)); + + // Comparisons. + // + // This only covers the condition codes in `supported_floatccs`, the rest are + // handled by legalization patterns. + e.enc_both(fcmp.bind(F32), rec_fcscc.opcodes(&UCOMISS)); + e.enc_both(fcmp.bind(F64), rec_fcscc.opcodes(&UCOMISD)); + e.enc_both(ffcmp.bind(F32), rec_fcmp.opcodes(&UCOMISS)); + e.enc_both(ffcmp.bind(F64), rec_fcmp.opcodes(&UCOMISD)); +} + +#[inline(never)] +fn define_alu( + e: &mut PerCpuModeEncodings, + shared_defs: &SharedDefinitions, + settings: &SettingGroup, + x86: &InstructionGroup, + r: &RecipeGroup, +) { + let shared = &shared_defs.instructions; + + // Shorthands for instructions. + let clz = shared.by_name("clz"); + let ctz = shared.by_name("ctz"); + let icmp = shared.by_name("icmp"); + let icmp_imm = shared.by_name("icmp_imm"); + let ifcmp = shared.by_name("ifcmp"); + let ifcmp_imm = shared.by_name("ifcmp_imm"); + let ifcmp_sp = shared.by_name("ifcmp_sp"); + let ishl = shared.by_name("ishl"); + let ishl_imm = shared.by_name("ishl_imm"); + let popcnt = shared.by_name("popcnt"); + let rotl = shared.by_name("rotl"); + let rotl_imm = shared.by_name("rotl_imm"); + let rotr = shared.by_name("rotr"); + let rotr_imm = shared.by_name("rotr_imm"); + let selectif = shared.by_name("selectif"); + let selectif_spectre_guard = shared.by_name("selectif_spectre_guard"); + let sshr = shared.by_name("sshr"); + let sshr_imm = shared.by_name("sshr_imm"); + let trueff = shared.by_name("trueff"); + let trueif = shared.by_name("trueif"); + let ushr = shared.by_name("ushr"); + let ushr_imm = shared.by_name("ushr_imm"); + let x86_bsf = x86.by_name("x86_bsf"); + let x86_bsr = x86.by_name("x86_bsr"); + + // Shorthands for recipes. + let rec_bsf_and_bsr = r.template("bsf_and_bsr"); + let rec_cmov = r.template("cmov"); + let rec_icscc = r.template("icscc"); + let rec_icscc_ib = r.template("icscc_ib"); + let rec_icscc_id = r.template("icscc_id"); + let rec_rcmp = r.template("rcmp"); + let rec_rcmp_ib = r.template("rcmp_ib"); + let rec_rcmp_id = r.template("rcmp_id"); + let rec_rcmp_sp = r.template("rcmp_sp"); + let rec_rc = r.template("rc"); + let rec_setf_abcd = r.template("setf_abcd"); + let rec_seti_abcd = r.template("seti_abcd"); + let rec_urm = r.template("urm"); + + // Predicates shorthands. + let use_popcnt = settings.predicate_by_name("use_popcnt"); + let use_lzcnt = settings.predicate_by_name("use_lzcnt"); + let use_bmi1 = settings.predicate_by_name("use_bmi1"); + + let band = shared.by_name("band"); + let band_imm = shared.by_name("band_imm"); + let band_not = shared.by_name("band_not"); + let bnot = shared.by_name("bnot"); + let bor = shared.by_name("bor"); + let bor_imm = shared.by_name("bor_imm"); + let bxor = shared.by_name("bxor"); + let bxor_imm = shared.by_name("bxor_imm"); + let iadd = shared.by_name("iadd"); + let iadd_ifcarry = shared.by_name("iadd_ifcarry"); + let iadd_ifcin = shared.by_name("iadd_ifcin"); + let iadd_ifcout = shared.by_name("iadd_ifcout"); + let iadd_imm = shared.by_name("iadd_imm"); + let imul = shared.by_name("imul"); + let isub = shared.by_name("isub"); + let isub_ifbin = shared.by_name("isub_ifbin"); + let isub_ifborrow = shared.by_name("isub_ifborrow"); + let isub_ifbout = shared.by_name("isub_ifbout"); + let x86_sdivmodx = x86.by_name("x86_sdivmodx"); + let x86_smulx = x86.by_name("x86_smulx"); + let x86_udivmodx = x86.by_name("x86_udivmodx"); + let x86_umulx = x86.by_name("x86_umulx"); + + let rec_div = r.template("div"); + let rec_fa = r.template("fa"); + let rec_fax = r.template("fax"); + let rec_mulx = r.template("mulx"); + let rec_r_ib = r.template("r_ib"); + let rec_r_id = r.template("r_id"); + let rec_rin = r.template("rin"); + let rec_rio = r.template("rio"); + let rec_rout = r.template("rout"); + let rec_rr = r.template("rr"); + let rec_rrx = r.template("rrx"); + let rec_ur = r.template("ur"); + + e.enc_i32_i64(iadd, rec_rr.opcodes(&ADD)); + e.enc_i32_i64(iadd_ifcout, rec_rout.opcodes(&ADD)); + e.enc_i32_i64(iadd_ifcin, rec_rin.opcodes(&ADC)); + e.enc_i32_i64(iadd_ifcarry, rec_rio.opcodes(&ADC)); + e.enc_i32_i64(iadd_imm, rec_r_ib.opcodes(&ADD_IMM8_SIGN_EXTEND).rrr(0)); + e.enc_i32_i64(iadd_imm, rec_r_id.opcodes(&ADD_IMM).rrr(0)); + + e.enc_i32_i64(isub, rec_rr.opcodes(&SUB)); + e.enc_i32_i64(isub_ifbout, rec_rout.opcodes(&SUB)); + e.enc_i32_i64(isub_ifbin, rec_rin.opcodes(&SBB)); + e.enc_i32_i64(isub_ifborrow, rec_rio.opcodes(&SBB)); + + e.enc_i32_i64(band, rec_rr.opcodes(&AND)); + e.enc_b32_b64(band, rec_rr.opcodes(&AND)); + + // TODO: band_imm.i64 with an unsigned 32-bit immediate can be encoded as band_imm.i32. Can + // even use the single-byte immediate for 0xffff_ffXX masks. + + e.enc_i32_i64(band_imm, rec_r_ib.opcodes(&AND_IMM8_SIGN_EXTEND).rrr(4)); + e.enc_i32_i64(band_imm, rec_r_id.opcodes(&AND_IMM).rrr(4)); + + e.enc_i32_i64(bor, rec_rr.opcodes(&OR)); + e.enc_b32_b64(bor, rec_rr.opcodes(&OR)); + e.enc_i32_i64(bor_imm, rec_r_ib.opcodes(&OR_IMM8_SIGN_EXTEND).rrr(1)); + e.enc_i32_i64(bor_imm, rec_r_id.opcodes(&OR_IMM).rrr(1)); + + e.enc_i32_i64(bxor, rec_rr.opcodes(&XOR)); + e.enc_b32_b64(bxor, rec_rr.opcodes(&XOR)); + e.enc_i32_i64(bxor_imm, rec_r_ib.opcodes(&XOR_IMM8_SIGN_EXTEND).rrr(6)); + e.enc_i32_i64(bxor_imm, rec_r_id.opcodes(&XOR_IMM).rrr(6)); + + // x86 has a bitwise not instruction NOT. + e.enc_i32_i64(bnot, rec_ur.opcodes(&NOT).rrr(2)); + e.enc_b32_b64(bnot, rec_ur.opcodes(&NOT).rrr(2)); + e.enc_both(bnot.bind(B1), rec_ur.opcodes(&NOT).rrr(2)); + + // Also add a `b1` encodings for the logic instructions. + // TODO: Should this be done with 8-bit instructions? It would improve partial register + // dependencies. + e.enc_both(band.bind(B1), rec_rr.opcodes(&AND)); + e.enc_both(bor.bind(B1), rec_rr.opcodes(&OR)); + e.enc_both(bxor.bind(B1), rec_rr.opcodes(&XOR)); + + e.enc_i32_i64(imul, rec_rrx.opcodes(&IMUL)); + e.enc_i32_i64(x86_sdivmodx, rec_div.opcodes(&IDIV).rrr(7)); + e.enc_i32_i64(x86_udivmodx, rec_div.opcodes(&DIV).rrr(6)); + + e.enc_i32_i64(x86_smulx, rec_mulx.opcodes(&IMUL_RDX_RAX).rrr(5)); + e.enc_i32_i64(x86_umulx, rec_mulx.opcodes(&MUL).rrr(4)); + + // Binary bitwise ops. + // + // The F64 version is intentionally encoded using the single-precision opcode: + // the operation is identical and the encoding is one byte shorter. + e.enc_both(band.bind(F32), rec_fa.opcodes(&ANDPS)); + e.enc_both(band.bind(F64), rec_fa.opcodes(&ANDPS)); + + e.enc_both(bor.bind(F32), rec_fa.opcodes(&ORPS)); + e.enc_both(bor.bind(F64), rec_fa.opcodes(&ORPS)); + + e.enc_both(bxor.bind(F32), rec_fa.opcodes(&XORPS)); + e.enc_both(bxor.bind(F64), rec_fa.opcodes(&XORPS)); + + // The `andnps(x,y)` instruction computes `~x&y`, while band_not(x,y)` is `x&~y. + e.enc_both(band_not.bind(F32), rec_fax.opcodes(&ANDNPS)); + e.enc_both(band_not.bind(F64), rec_fax.opcodes(&ANDNPS)); + + // Shifts and rotates. + // Note that the dynamic shift amount is only masked by 5 or 6 bits; the 8-bit + // and 16-bit shifts would need explicit masking. + + for &(inst, rrr) in &[(rotl, 0), (rotr, 1), (ishl, 4), (ushr, 5), (sshr, 7)] { + // Cannot use enc_i32_i64 for this pattern because instructions require + // to bind any. + e.enc32(inst.bind(I32).bind(I8), rec_rc.opcodes(&ROTATE_CL).rrr(rrr)); + e.enc32( + inst.bind(I32).bind(I16), + rec_rc.opcodes(&ROTATE_CL).rrr(rrr), + ); + e.enc32( + inst.bind(I32).bind(I32), + rec_rc.opcodes(&ROTATE_CL).rrr(rrr), + ); + e.enc64( + inst.bind(I64).bind(Any), + rec_rc.opcodes(&ROTATE_CL).rrr(rrr).rex().w(), + ); + e.enc64( + inst.bind(I32).bind(Any), + rec_rc.opcodes(&ROTATE_CL).rrr(rrr).rex(), + ); + e.enc64( + inst.bind(I32).bind(Any), + rec_rc.opcodes(&ROTATE_CL).rrr(rrr), + ); + } + + e.enc_i32_i64(rotl_imm, rec_r_ib.opcodes(&ROTATE_IMM8).rrr(0)); + e.enc_i32_i64(rotr_imm, rec_r_ib.opcodes(&ROTATE_IMM8).rrr(1)); + e.enc_i32_i64(ishl_imm, rec_r_ib.opcodes(&ROTATE_IMM8).rrr(4)); + e.enc_i32_i64(ushr_imm, rec_r_ib.opcodes(&ROTATE_IMM8).rrr(5)); + e.enc_i32_i64(sshr_imm, rec_r_ib.opcodes(&ROTATE_IMM8).rrr(7)); + + // Population count. + e.enc32_isap(popcnt.bind(I32), rec_urm.opcodes(&POPCNT), use_popcnt); + e.enc64_isap( + popcnt.bind(I64), + rec_urm.opcodes(&POPCNT).rex().w(), + use_popcnt, + ); + e.enc64_isap(popcnt.bind(I32), rec_urm.opcodes(&POPCNT).rex(), use_popcnt); + e.enc64_isap(popcnt.bind(I32), rec_urm.opcodes(&POPCNT), use_popcnt); + + // Count leading zero bits. + e.enc32_isap(clz.bind(I32), rec_urm.opcodes(&LZCNT), use_lzcnt); + e.enc64_isap(clz.bind(I64), rec_urm.opcodes(&LZCNT).rex().w(), use_lzcnt); + e.enc64_isap(clz.bind(I32), rec_urm.opcodes(&LZCNT).rex(), use_lzcnt); + e.enc64_isap(clz.bind(I32), rec_urm.opcodes(&LZCNT), use_lzcnt); + + // Count trailing zero bits. + e.enc32_isap(ctz.bind(I32), rec_urm.opcodes(&TZCNT), use_bmi1); + e.enc64_isap(ctz.bind(I64), rec_urm.opcodes(&TZCNT).rex().w(), use_bmi1); + e.enc64_isap(ctz.bind(I32), rec_urm.opcodes(&TZCNT).rex(), use_bmi1); + e.enc64_isap(ctz.bind(I32), rec_urm.opcodes(&TZCNT), use_bmi1); + + // Bit scan forwards and reverse + e.enc_i32_i64(x86_bsf, rec_bsf_and_bsr.opcodes(&BIT_SCAN_FORWARD)); + e.enc_i32_i64(x86_bsr, rec_bsf_and_bsr.opcodes(&BIT_SCAN_REVERSE)); + + // Comparisons + e.enc_i32_i64(icmp, rec_icscc.opcodes(&CMP_REG)); + e.enc_i32_i64(icmp_imm, rec_icscc_ib.opcodes(&CMP_IMM8).rrr(7)); + e.enc_i32_i64(icmp_imm, rec_icscc_id.opcodes(&CMP_IMM).rrr(7)); + e.enc_i32_i64(ifcmp, rec_rcmp.opcodes(&CMP_REG)); + e.enc_i32_i64(ifcmp_imm, rec_rcmp_ib.opcodes(&CMP_IMM8).rrr(7)); + e.enc_i32_i64(ifcmp_imm, rec_rcmp_id.opcodes(&CMP_IMM).rrr(7)); + // TODO: We could special-case ifcmp_imm(x, 0) to TEST(x, x). + + e.enc32(ifcmp_sp.bind(I32), rec_rcmp_sp.opcodes(&CMP_REG)); + e.enc64(ifcmp_sp.bind(I64), rec_rcmp_sp.opcodes(&CMP_REG).rex().w()); + + // Convert flags to bool. + // This encodes `b1` as an 8-bit low register with the value 0 or 1. + e.enc_both(trueif, rec_seti_abcd.opcodes(&SET_BYTE_IF_OVERFLOW)); + e.enc_both(trueff, rec_setf_abcd.opcodes(&SET_BYTE_IF_OVERFLOW)); + + // Conditional move (a.k.a integer select). + e.enc_i32_i64(selectif, rec_cmov.opcodes(&CMOV_OVERFLOW)); + // A Spectre-guard integer select is exactly the same as a selectif, but + // is not associated with any other legalization rules and is not + // recognized by any optimizations, so it must arrive here unmodified + // and in its original place. + e.enc_i32_i64(selectif_spectre_guard, rec_cmov.opcodes(&CMOV_OVERFLOW)); +} + +#[inline(never)] +#[allow(clippy::cognitive_complexity)] +fn define_simd( + e: &mut PerCpuModeEncodings, + shared_defs: &SharedDefinitions, + settings: &SettingGroup, + x86: &InstructionGroup, + r: &RecipeGroup, +) { + let shared = &shared_defs.instructions; + let formats = &shared_defs.formats; + + // Shorthands for instructions. + let avg_round = shared.by_name("avg_round"); + let bitcast = shared.by_name("bitcast"); + let bor = shared.by_name("bor"); + let bxor = shared.by_name("bxor"); + let copy = shared.by_name("copy"); + let copy_nop = shared.by_name("copy_nop"); + let copy_to_ssa = shared.by_name("copy_to_ssa"); + let fadd = shared.by_name("fadd"); + let fcmp = shared.by_name("fcmp"); + let fcvt_from_sint = shared.by_name("fcvt_from_sint"); + let fdiv = shared.by_name("fdiv"); + let fill = shared.by_name("fill"); + let fill_nop = shared.by_name("fill_nop"); + let fmul = shared.by_name("fmul"); + let fsub = shared.by_name("fsub"); + let iabs = shared.by_name("iabs"); + let iadd = shared.by_name("iadd"); + let icmp = shared.by_name("icmp"); + let imul = shared.by_name("imul"); + let ishl_imm = shared.by_name("ishl_imm"); + let load = shared.by_name("load"); + let load_complex = shared.by_name("load_complex"); + let raw_bitcast = shared.by_name("raw_bitcast"); + let regfill = shared.by_name("regfill"); + let regmove = shared.by_name("regmove"); + let regspill = shared.by_name("regspill"); + let sadd_sat = shared.by_name("sadd_sat"); + let scalar_to_vector = shared.by_name("scalar_to_vector"); + let sload8x8 = shared.by_name("sload8x8"); + let sload8x8_complex = shared.by_name("sload8x8_complex"); + let sload16x4 = shared.by_name("sload16x4"); + let sload16x4_complex = shared.by_name("sload16x4_complex"); + let sload32x2 = shared.by_name("sload32x2"); + let sload32x2_complex = shared.by_name("sload32x2_complex"); + let spill = shared.by_name("spill"); + let sqrt = shared.by_name("sqrt"); + let sshr_imm = shared.by_name("sshr_imm"); + let ssub_sat = shared.by_name("ssub_sat"); + let store = shared.by_name("store"); + let store_complex = shared.by_name("store_complex"); + let swiden_low = shared.by_name("swiden_low"); + let uadd_sat = shared.by_name("uadd_sat"); + let uload8x8 = shared.by_name("uload8x8"); + let uload8x8_complex = shared.by_name("uload8x8_complex"); + let uload16x4 = shared.by_name("uload16x4"); + let uload16x4_complex = shared.by_name("uload16x4_complex"); + let uload32x2 = shared.by_name("uload32x2"); + let uload32x2_complex = shared.by_name("uload32x2_complex"); + let snarrow = shared.by_name("snarrow"); + let unarrow = shared.by_name("unarrow"); + let uwiden_low = shared.by_name("uwiden_low"); + let ushr_imm = shared.by_name("ushr_imm"); + let usub_sat = shared.by_name("usub_sat"); + let vconst = shared.by_name("vconst"); + let vselect = shared.by_name("vselect"); + let x86_cvtt2si = x86.by_name("x86_cvtt2si"); + let x86_insertps = x86.by_name("x86_insertps"); + let x86_fmax = x86.by_name("x86_fmax"); + let x86_fmin = x86.by_name("x86_fmin"); + let x86_movlhps = x86.by_name("x86_movlhps"); + let x86_movsd = x86.by_name("x86_movsd"); + let x86_pblendw = x86.by_name("x86_pblendw"); + let x86_pextr = x86.by_name("x86_pextr"); + let x86_pinsr = x86.by_name("x86_pinsr"); + let x86_pmaxs = x86.by_name("x86_pmaxs"); + let x86_pmaxu = x86.by_name("x86_pmaxu"); + let x86_pmins = x86.by_name("x86_pmins"); + let x86_pminu = x86.by_name("x86_pminu"); + let x86_pmullq = x86.by_name("x86_pmullq"); + let x86_pmuludq = x86.by_name("x86_pmuludq"); + let x86_palignr = x86.by_name("x86_palignr"); + let x86_pshufb = x86.by_name("x86_pshufb"); + let x86_pshufd = x86.by_name("x86_pshufd"); + let x86_psll = x86.by_name("x86_psll"); + let x86_psra = x86.by_name("x86_psra"); + let x86_psrl = x86.by_name("x86_psrl"); + let x86_ptest = x86.by_name("x86_ptest"); + let x86_punpckh = x86.by_name("x86_punpckh"); + let x86_punpckl = x86.by_name("x86_punpckl"); + let x86_vcvtudq2ps = x86.by_name("x86_vcvtudq2ps"); + + // Shorthands for recipes. + let rec_blend = r.template("blend"); + let rec_evex_reg_vvvv_rm_128 = r.template("evex_reg_vvvv_rm_128"); + let rec_evex_reg_rm_128 = r.template("evex_reg_rm_128"); + let rec_f_ib = r.template("f_ib"); + let rec_fa = r.template("fa"); + let rec_fa_ib = r.template("fa_ib"); + let rec_fax = r.template("fax"); + let rec_fcmp = r.template("fcmp"); + let rec_ffillSib32 = r.template("ffillSib32"); + let rec_ffillnull = r.recipe("ffillnull"); + let rec_fld = r.template("fld"); + let rec_fldDisp32 = r.template("fldDisp32"); + let rec_fldDisp8 = r.template("fldDisp8"); + let rec_fldWithIndex = r.template("fldWithIndex"); + let rec_fldWithIndexDisp32 = r.template("fldWithIndexDisp32"); + let rec_fldWithIndexDisp8 = r.template("fldWithIndexDisp8"); + let rec_fregfill32 = r.template("fregfill32"); + let rec_fregspill32 = r.template("fregspill32"); + let rec_frmov = r.template("frmov"); + let rec_frurm = r.template("frurm"); + let rec_fspillSib32 = r.template("fspillSib32"); + let rec_fst = r.template("fst"); + let rec_fstDisp32 = r.template("fstDisp32"); + let rec_fstDisp8 = r.template("fstDisp8"); + let rec_fstWithIndex = r.template("fstWithIndex"); + let rec_fstWithIndexDisp32 = r.template("fstWithIndexDisp32"); + let rec_fstWithIndexDisp8 = r.template("fstWithIndexDisp8"); + let rec_furm = r.template("furm"); + let rec_furm_reg_to_ssa = r.template("furm_reg_to_ssa"); + let rec_icscc_fpr = r.template("icscc_fpr"); + let rec_null_fpr = r.recipe("null_fpr"); + let rec_pfcmp = r.template("pfcmp"); + let rec_r_ib_unsigned_fpr = r.template("r_ib_unsigned_fpr"); + let rec_r_ib_unsigned_gpr = r.template("r_ib_unsigned_gpr"); + let rec_r_ib_unsigned_r = r.template("r_ib_unsigned_r"); + let rec_stacknull = r.recipe("stacknull"); + let rec_vconst = r.template("vconst"); + let rec_vconst_optimized = r.template("vconst_optimized"); + + // Predicates shorthands. + settings.predicate_by_name("all_ones_funcaddrs_and_not_is_pic"); + settings.predicate_by_name("not_all_ones_funcaddrs_and_not_is_pic"); + let use_ssse3_simd = settings.predicate_by_name("use_ssse3_simd"); + let use_sse41_simd = settings.predicate_by_name("use_sse41_simd"); + let use_sse42_simd = settings.predicate_by_name("use_sse42_simd"); + let use_avx512dq_simd = settings.predicate_by_name("use_avx512dq_simd"); + let use_avx512vl_simd = settings.predicate_by_name("use_avx512vl_simd"); + + // SIMD vector size: eventually multiple vector sizes may be supported but for now only + // SSE-sized vectors are available. + let sse_vector_size: u64 = 128; + + // SIMD splat: before x86 can use vector data, it must be moved to XMM registers; see + // legalize.rs for how this is done; once there, x86_pshuf* (below) is used for broadcasting the + // value across the register. + + let allowed_simd_type = |t: &LaneType| t.lane_bits() >= 8 && t.lane_bits() < 128; + + // PSHUFB, 8-bit shuffle using two XMM registers. + for ty in ValueType::all_lane_types().filter(allowed_simd_type) { + let instruction = x86_pshufb.bind(vector(ty, sse_vector_size)); + let template = rec_fa.opcodes(&PSHUFB); + e.enc_both_inferred_maybe_isap(instruction.clone(), template.clone(), Some(use_ssse3_simd)); + } + + // PSHUFD, 32-bit shuffle using one XMM register and a u8 immediate. + for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 32) { + let instruction = x86_pshufd.bind(vector(ty, sse_vector_size)); + let template = rec_r_ib_unsigned_fpr.opcodes(&PSHUFD); + e.enc_both_inferred(instruction, template); + } + + // SIMD vselect; controlling value of vselect is a boolean vector, so each lane should be + // either all ones or all zeroes - it makes it possible to always use 8-bit PBLENDVB; + // for 32/64-bit lanes we can also use BLENDVPS and BLENDVPD + for ty in ValueType::all_lane_types().filter(allowed_simd_type) { + let opcode = match ty.lane_bits() { + 32 => &BLENDVPS, + 64 => &BLENDVPD, + _ => &PBLENDVB, + }; + let instruction = vselect.bind(vector(ty, sse_vector_size)); + let template = rec_blend.opcodes(opcode); + e.enc_both_inferred_maybe_isap(instruction, template, Some(use_sse41_simd)); + } + + // PBLENDW, select lanes using a u8 immediate. + for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 16) { + let instruction = x86_pblendw.bind(vector(ty, sse_vector_size)); + let template = rec_fa_ib.opcodes(&PBLENDW); + e.enc_both_inferred_maybe_isap(instruction, template, Some(use_sse41_simd)); + } + + // SIMD scalar_to_vector; this uses MOV to copy the scalar value to an XMM register; according + // to the Intel manual: "When the destination operand is an XMM register, the source operand is + // written to the low doubleword of the register and the register is zero-extended to 128 bits." + for ty in ValueType::all_lane_types().filter(allowed_simd_type) { + let instruction = scalar_to_vector.bind(vector(ty, sse_vector_size)); + if ty.is_float() { + // No need to move floats--they already live in XMM registers. + e.enc_32_64_rec(instruction, rec_null_fpr, 0); + } else { + let template = rec_frurm.opcodes(&MOVD_LOAD_XMM); + if ty.lane_bits() < 64 { + e.enc_both_inferred(instruction, template); + } else { + // No 32-bit encodings for 64-bit widths. + assert_eq!(ty.lane_bits(), 64); + e.enc64(instruction, template.rex().w()); + } + } + } + + // SIMD insertlane + for ty in ValueType::all_lane_types().filter(allowed_simd_type) { + let (opcode, isap): (&[_], _) = match ty.lane_bits() { + 8 => (&PINSRB, Some(use_sse41_simd)), + 16 => (&PINSRW, None), + 32 | 64 => (&PINSR, Some(use_sse41_simd)), + _ => panic!("invalid size for SIMD insertlane"), + }; + + let instruction = x86_pinsr.bind(vector(ty, sse_vector_size)); + let template = rec_r_ib_unsigned_r.opcodes(opcode); + if ty.lane_bits() < 64 { + e.enc_both_inferred_maybe_isap(instruction, template, isap); + } else { + // It turns out the 64-bit widths have REX/W encodings and only are available on + // x86_64. + e.enc64_maybe_isap(instruction, template.rex().w(), isap); + } + } + + // For legalizing insertlane with floats, INSERTPS from SSE4.1. + { + let instruction = x86_insertps.bind(vector(F32, sse_vector_size)); + let template = rec_fa_ib.opcodes(&INSERTPS); + e.enc_both_inferred_maybe_isap(instruction, template, Some(use_sse41_simd)); + } + + // For legalizing insertlane with floats, MOVSD from SSE2. + { + let instruction = x86_movsd.bind(vector(F64, sse_vector_size)); + let template = rec_fa.opcodes(&MOVSD_LOAD); + e.enc_both_inferred(instruction, template); // from SSE2 + } + + // For legalizing insertlane with floats, MOVLHPS from SSE. + { + let instruction = x86_movlhps.bind(vector(F64, sse_vector_size)); + let template = rec_fa.opcodes(&MOVLHPS); + e.enc_both_inferred(instruction, template); // from SSE + } + + // SIMD extractlane + for ty in ValueType::all_lane_types().filter(allowed_simd_type) { + let opcode = match ty.lane_bits() { + 8 => &PEXTRB, + 16 => &PEXTRW, + 32 | 64 => &PEXTR, + _ => panic!("invalid size for SIMD extractlane"), + }; + + let instruction = x86_pextr.bind(vector(ty, sse_vector_size)); + let template = rec_r_ib_unsigned_gpr.opcodes(opcode); + if ty.lane_bits() < 64 { + e.enc_both_inferred_maybe_isap(instruction, template, Some(use_sse41_simd)); + } else { + // It turns out the 64-bit widths have REX/W encodings and only are available on + // x86_64. + e.enc64_maybe_isap(instruction, template.rex().w(), Some(use_sse41_simd)); + } + } + + // SIMD packing/unpacking + for ty in ValueType::all_lane_types().filter(allowed_simd_type) { + let (high, low) = match ty.lane_bits() { + 8 => (&PUNPCKHBW, &PUNPCKLBW), + 16 => (&PUNPCKHWD, &PUNPCKLWD), + 32 => (&PUNPCKHDQ, &PUNPCKLDQ), + 64 => (&PUNPCKHQDQ, &PUNPCKLQDQ), + _ => panic!("invalid size for SIMD packing/unpacking"), + }; + + e.enc_both_inferred( + x86_punpckh.bind(vector(ty, sse_vector_size)), + rec_fa.opcodes(high), + ); + e.enc_both_inferred( + x86_punpckl.bind(vector(ty, sse_vector_size)), + rec_fa.opcodes(low), + ); + } + + // SIMD narrow/widen + for (ty, opcodes) in &[(I16, &PACKSSWB), (I32, &PACKSSDW)] { + let snarrow = snarrow.bind(vector(*ty, sse_vector_size)); + e.enc_both_inferred(snarrow, rec_fa.opcodes(*opcodes)); + } + for (ty, opcodes, isap) in &[ + (I16, &PACKUSWB[..], None), + (I32, &PACKUSDW[..], Some(use_sse41_simd)), + ] { + let unarrow = unarrow.bind(vector(*ty, sse_vector_size)); + e.enc_both_inferred_maybe_isap(unarrow, rec_fa.opcodes(*opcodes), *isap); + } + for (ty, swiden_opcode, uwiden_opcode) in &[ + (I8, &PMOVSXBW[..], &PMOVZXBW[..]), + (I16, &PMOVSXWD[..], &PMOVZXWD[..]), + ] { + let isap = Some(use_sse41_simd); + let swiden_low = swiden_low.bind(vector(*ty, sse_vector_size)); + e.enc_both_inferred_maybe_isap(swiden_low, rec_furm.opcodes(*swiden_opcode), isap); + let uwiden_low = uwiden_low.bind(vector(*ty, sse_vector_size)); + e.enc_both_inferred_maybe_isap(uwiden_low, rec_furm.opcodes(*uwiden_opcode), isap); + } + for ty in &[I8, I16, I32, I64] { + e.enc_both_inferred_maybe_isap( + x86_palignr.bind(vector(*ty, sse_vector_size)), + rec_fa_ib.opcodes(&PALIGNR[..]), + Some(use_ssse3_simd), + ); + } + + // SIMD bitcast all 128-bit vectors to each other (for legalizing splat.x16x8). + for from_type in ValueType::all_lane_types().filter(allowed_simd_type) { + for to_type in + ValueType::all_lane_types().filter(|t| allowed_simd_type(t) && *t != from_type) + { + let instruction = raw_bitcast + .bind(vector(to_type, sse_vector_size)) + .bind(vector(from_type, sse_vector_size)); + e.enc_32_64_rec(instruction, rec_null_fpr, 0); + } + } + + // SIMD raw bitcast floats to vector (and back); assumes that floats are already stored in an + // XMM register. + for float_type in &[F32, F64] { + for lane_type in ValueType::all_lane_types().filter(allowed_simd_type) { + e.enc_32_64_rec( + raw_bitcast + .bind(vector(lane_type, sse_vector_size)) + .bind(*float_type), + rec_null_fpr, + 0, + ); + e.enc_32_64_rec( + raw_bitcast + .bind(*float_type) + .bind(vector(lane_type, sse_vector_size)), + rec_null_fpr, + 0, + ); + } + } + + // SIMD conversions + { + let fcvt_from_sint_32 = fcvt_from_sint + .bind(vector(F32, sse_vector_size)) + .bind(vector(I32, sse_vector_size)); + e.enc_both(fcvt_from_sint_32, rec_furm.opcodes(&CVTDQ2PS)); + + e.enc_32_64_maybe_isap( + x86_vcvtudq2ps, + rec_evex_reg_rm_128.opcodes(&VCVTUDQ2PS), + Some(use_avx512vl_simd), // TODO need an OR predicate to join with AVX512F + ); + + e.enc_both_inferred( + x86_cvtt2si + .bind(vector(I32, sse_vector_size)) + .bind(vector(F32, sse_vector_size)), + rec_furm.opcodes(&CVTTPS2DQ), + ); + } + + // SIMD vconst for special cases (all zeroes, all ones) + // this must be encoded prior to the MOVUPS implementation (below) so the compiler sees this + // encoding first + for ty in ValueType::all_lane_types().filter(allowed_simd_type) { + let instruction = vconst.bind(vector(ty, sse_vector_size)); + + let is_zero_128bit = + InstructionPredicate::new_is_all_zeroes(&*formats.unary_const, "constant_handle"); + let template = rec_vconst_optimized.opcodes(&PXOR).infer_rex(); + e.enc_32_64_func(instruction.clone(), template, |builder| { + builder.inst_predicate(is_zero_128bit) + }); + + let is_ones_128bit = + InstructionPredicate::new_is_all_ones(&*formats.unary_const, "constant_handle"); + let template = rec_vconst_optimized.opcodes(&PCMPEQB).infer_rex(); + e.enc_32_64_func(instruction, template, |builder| { + builder.inst_predicate(is_ones_128bit) + }); + } + + // SIMD vconst using MOVUPS + // TODO it would be ideal if eventually this became the more efficient MOVAPS but we would have + // to guarantee that the constants are aligned when emitted and there is currently no mechanism + // for that; alternately, constants could be loaded into XMM registers using a sequence like: + // MOVQ + MOVHPD + MOVQ + MOVLPD (this allows the constants to be immediates instead of stored + // in memory) but some performance measurements are needed. + for ty in ValueType::all_lane_types().filter(allowed_simd_type) { + let instruction = vconst.bind(vector(ty, sse_vector_size)); + let template = rec_vconst.opcodes(&MOVUPS_LOAD); + e.enc_both_inferred(instruction, template); // from SSE + } + + // SIMD register movement: store, load, spill, fill, regmove, etc. All of these use encodings of + // MOVUPS and MOVAPS from SSE (TODO ideally all of these would either use MOVAPS when we have + // alignment or type-specific encodings, see https://github.com/bytecodealliance/wasmtime/issues/1124). + // Also, it would be ideal to infer REX prefixes for all of these instructions but for the + // time being only instructions with common recipes have `infer_rex()` support. + for ty in ValueType::all_lane_types().filter(allowed_simd_type) { + // Store + let bound_store = store.bind(vector(ty, sse_vector_size)).bind(Any); + e.enc_both_inferred(bound_store.clone(), rec_fst.opcodes(&MOVUPS_STORE)); + e.enc_both_inferred(bound_store.clone(), rec_fstDisp8.opcodes(&MOVUPS_STORE)); + e.enc_both_inferred(bound_store, rec_fstDisp32.opcodes(&MOVUPS_STORE)); + + // Store complex + let bound_store_complex = store_complex.bind(vector(ty, sse_vector_size)); + e.enc_both( + bound_store_complex.clone(), + rec_fstWithIndex.opcodes(&MOVUPS_STORE), + ); + e.enc_both( + bound_store_complex.clone(), + rec_fstWithIndexDisp8.opcodes(&MOVUPS_STORE), + ); + e.enc_both( + bound_store_complex, + rec_fstWithIndexDisp32.opcodes(&MOVUPS_STORE), + ); + + // Load + let bound_load = load.bind(vector(ty, sse_vector_size)).bind(Any); + e.enc_both_inferred(bound_load.clone(), rec_fld.opcodes(&MOVUPS_LOAD)); + e.enc_both_inferred(bound_load.clone(), rec_fldDisp8.opcodes(&MOVUPS_LOAD)); + e.enc_both_inferred(bound_load, rec_fldDisp32.opcodes(&MOVUPS_LOAD)); + + // Load complex + let bound_load_complex = load_complex.bind(vector(ty, sse_vector_size)); + e.enc_both( + bound_load_complex.clone(), + rec_fldWithIndex.opcodes(&MOVUPS_LOAD), + ); + e.enc_both( + bound_load_complex.clone(), + rec_fldWithIndexDisp8.opcodes(&MOVUPS_LOAD), + ); + e.enc_both( + bound_load_complex, + rec_fldWithIndexDisp32.opcodes(&MOVUPS_LOAD), + ); + + // Spill + let bound_spill = spill.bind(vector(ty, sse_vector_size)); + e.enc_both(bound_spill, rec_fspillSib32.opcodes(&MOVUPS_STORE)); + let bound_regspill = regspill.bind(vector(ty, sse_vector_size)); + e.enc_both(bound_regspill, rec_fregspill32.opcodes(&MOVUPS_STORE)); + + // Fill + let bound_fill = fill.bind(vector(ty, sse_vector_size)); + e.enc_both(bound_fill, rec_ffillSib32.opcodes(&MOVUPS_LOAD)); + let bound_regfill = regfill.bind(vector(ty, sse_vector_size)); + e.enc_both(bound_regfill, rec_fregfill32.opcodes(&MOVUPS_LOAD)); + let bound_fill_nop = fill_nop.bind(vector(ty, sse_vector_size)); + e.enc_32_64_rec(bound_fill_nop, rec_ffillnull, 0); + + // Regmove + let bound_regmove = regmove.bind(vector(ty, sse_vector_size)); + e.enc_both(bound_regmove, rec_frmov.opcodes(&MOVAPS_LOAD)); + + // Copy + let bound_copy = copy.bind(vector(ty, sse_vector_size)); + e.enc_both(bound_copy, rec_furm.opcodes(&MOVAPS_LOAD)); + let bound_copy_to_ssa = copy_to_ssa.bind(vector(ty, sse_vector_size)); + e.enc_both(bound_copy_to_ssa, rec_furm_reg_to_ssa.opcodes(&MOVAPS_LOAD)); + let bound_copy_nop = copy_nop.bind(vector(ty, sse_vector_size)); + e.enc_32_64_rec(bound_copy_nop, rec_stacknull, 0); + } + + // SIMD load extend + for (inst, opcodes) in &[ + (uload8x8, &PMOVZXBW), + (uload16x4, &PMOVZXWD), + (uload32x2, &PMOVZXDQ), + (sload8x8, &PMOVSXBW), + (sload16x4, &PMOVSXWD), + (sload32x2, &PMOVSXDQ), + ] { + let isap = Some(use_sse41_simd); + for recipe in &[rec_fld, rec_fldDisp8, rec_fldDisp32] { + let inst = *inst; + let template = recipe.opcodes(*opcodes); + e.enc_both_inferred_maybe_isap(inst.clone().bind(I32), template.clone(), isap); + e.enc64_maybe_isap(inst.bind(I64), template.infer_rex(), isap); + } + } + + // SIMD load extend (complex addressing) + let is_load_complex_length_two = + InstructionPredicate::new_length_equals(&*formats.load_complex, 2); + for (inst, opcodes) in &[ + (uload8x8_complex, &PMOVZXBW), + (uload16x4_complex, &PMOVZXWD), + (uload32x2_complex, &PMOVZXDQ), + (sload8x8_complex, &PMOVSXBW), + (sload16x4_complex, &PMOVSXWD), + (sload32x2_complex, &PMOVSXDQ), + ] { + for recipe in &[ + rec_fldWithIndex, + rec_fldWithIndexDisp8, + rec_fldWithIndexDisp32, + ] { + let template = recipe.opcodes(*opcodes); + let predicate = |encoding: EncodingBuilder| { + encoding + .isa_predicate(use_sse41_simd) + .inst_predicate(is_load_complex_length_two.clone()) + }; + e.enc32_func(inst.clone(), template.clone(), predicate); + // No infer_rex calculator for these recipes; place REX version first as in enc_x86_64. + e.enc64_func(inst.clone(), template.rex(), predicate); + e.enc64_func(inst.clone(), template, predicate); + } + } + + // SIMD integer addition + for (ty, opcodes) in &[(I8, &PADDB), (I16, &PADDW), (I32, &PADDD), (I64, &PADDQ)] { + let iadd = iadd.bind(vector(*ty, sse_vector_size)); + e.enc_both_inferred(iadd, rec_fa.opcodes(*opcodes)); + } + + // SIMD integer saturating addition + e.enc_both_inferred( + sadd_sat.bind(vector(I8, sse_vector_size)), + rec_fa.opcodes(&PADDSB), + ); + e.enc_both_inferred( + sadd_sat.bind(vector(I16, sse_vector_size)), + rec_fa.opcodes(&PADDSW), + ); + e.enc_both_inferred( + uadd_sat.bind(vector(I8, sse_vector_size)), + rec_fa.opcodes(&PADDUSB), + ); + e.enc_both_inferred( + uadd_sat.bind(vector(I16, sse_vector_size)), + rec_fa.opcodes(&PADDUSW), + ); + + // SIMD integer subtraction + let isub = shared.by_name("isub"); + for (ty, opcodes) in &[(I8, &PSUBB), (I16, &PSUBW), (I32, &PSUBD), (I64, &PSUBQ)] { + let isub = isub.bind(vector(*ty, sse_vector_size)); + e.enc_both_inferred(isub, rec_fa.opcodes(*opcodes)); + } + + // SIMD integer saturating subtraction + e.enc_both_inferred( + ssub_sat.bind(vector(I8, sse_vector_size)), + rec_fa.opcodes(&PSUBSB), + ); + e.enc_both_inferred( + ssub_sat.bind(vector(I16, sse_vector_size)), + rec_fa.opcodes(&PSUBSW), + ); + e.enc_both_inferred( + usub_sat.bind(vector(I8, sse_vector_size)), + rec_fa.opcodes(&PSUBUSB), + ); + e.enc_both_inferred( + usub_sat.bind(vector(I16, sse_vector_size)), + rec_fa.opcodes(&PSUBUSW), + ); + + // SIMD integer multiplication: the x86 ISA does not have instructions for multiplying I8x16 + // and I64x2 and these are (at the time of writing) not necessary for WASM SIMD. + for (ty, opcodes, isap) in &[ + (I16, &PMULLW[..], None), + (I32, &PMULLD[..], Some(use_sse41_simd)), + ] { + let imul = imul.bind(vector(*ty, sse_vector_size)); + e.enc_both_inferred_maybe_isap(imul, rec_fa.opcodes(opcodes), *isap); + } + + // SIMD multiplication with lane expansion. + e.enc_both_inferred(x86_pmuludq, rec_fa.opcodes(&PMULUDQ)); + + // SIMD integer multiplication for I64x2 using a AVX512. + { + e.enc_32_64_maybe_isap( + x86_pmullq, + rec_evex_reg_vvvv_rm_128.opcodes(&VPMULLQ).w(), + Some(use_avx512dq_simd), // TODO need an OR predicate to join with AVX512VL + ); + } + + // SIMD integer average with rounding. + for (ty, opcodes) in &[(I8, &PAVGB[..]), (I16, &PAVGW[..])] { + let avgr = avg_round.bind(vector(*ty, sse_vector_size)); + e.enc_both_inferred(avgr, rec_fa.opcodes(opcodes)); + } + + // SIMD integer absolute value. + for (ty, opcodes) in &[(I8, &PABSB[..]), (I16, &PABSW[..]), (I32, &PABSD)] { + let iabs = iabs.bind(vector(*ty, sse_vector_size)); + e.enc_both_inferred_maybe_isap(iabs, rec_furm.opcodes(opcodes), Some(use_ssse3_simd)); + } + + // SIMD logical operations + let band = shared.by_name("band"); + let band_not = shared.by_name("band_not"); + for ty in ValueType::all_lane_types().filter(allowed_simd_type) { + // and + let band = band.bind(vector(ty, sse_vector_size)); + e.enc_both_inferred(band, rec_fa.opcodes(&PAND)); + + // and not (note flipped recipe operands to match band_not order) + let band_not = band_not.bind(vector(ty, sse_vector_size)); + e.enc_both_inferred(band_not, rec_fax.opcodes(&PANDN)); + + // or + let bor = bor.bind(vector(ty, sse_vector_size)); + e.enc_both_inferred(bor, rec_fa.opcodes(&POR)); + + // xor + let bxor = bxor.bind(vector(ty, sse_vector_size)); + e.enc_both_inferred(bxor, rec_fa.opcodes(&PXOR)); + + // ptest + let x86_ptest = x86_ptest.bind(vector(ty, sse_vector_size)); + e.enc_both_inferred_maybe_isap(x86_ptest, rec_fcmp.opcodes(&PTEST), Some(use_sse41_simd)); + } + + // SIMD bitcast from I32/I64 to the low bits of a vector (e.g. I64x2); this register movement + // allows SIMD shifts to be legalized more easily. TODO ideally this would be typed as an + // I128x1 but restrictions on the type builder prevent this; the general idea here is that + // the upper bits are all zeroed and do not form parts of any separate lane. See + // https://github.com/bytecodealliance/wasmtime/issues/1140. + e.enc_both_inferred( + bitcast.bind(vector(I64, sse_vector_size)).bind(I32), + rec_frurm.opcodes(&MOVD_LOAD_XMM), + ); + e.enc64( + bitcast.bind(vector(I64, sse_vector_size)).bind(I64), + rec_frurm.opcodes(&MOVD_LOAD_XMM).rex().w(), + ); + + // SIMD shift left + for (ty, opcodes) in &[(I16, &PSLLW), (I32, &PSLLD), (I64, &PSLLQ)] { + let x86_psll = x86_psll.bind(vector(*ty, sse_vector_size)); + e.enc_both_inferred(x86_psll, rec_fa.opcodes(*opcodes)); + } + + // SIMD shift right (logical) + for (ty, opcodes) in &[(I16, &PSRLW), (I32, &PSRLD), (I64, &PSRLQ)] { + let x86_psrl = x86_psrl.bind(vector(*ty, sse_vector_size)); + e.enc_both_inferred(x86_psrl, rec_fa.opcodes(*opcodes)); + } + + // SIMD shift right (arithmetic) + for (ty, opcodes) in &[(I16, &PSRAW), (I32, &PSRAD)] { + let x86_psra = x86_psra.bind(vector(*ty, sse_vector_size)); + e.enc_both_inferred(x86_psra, rec_fa.opcodes(*opcodes)); + } + + // SIMD immediate shift + for (ty, opcodes) in &[(I16, &PS_W_IMM), (I32, &PS_D_IMM), (I64, &PS_Q_IMM)] { + let ishl_imm = ishl_imm.bind(vector(*ty, sse_vector_size)); + e.enc_both_inferred(ishl_imm, rec_f_ib.opcodes(*opcodes).rrr(6)); + + let ushr_imm = ushr_imm.bind(vector(*ty, sse_vector_size)); + e.enc_both_inferred(ushr_imm, rec_f_ib.opcodes(*opcodes).rrr(2)); + + // One exception: PSRAQ does not exist in for 64x2 in SSE2, it requires a higher CPU feature set. + if *ty != I64 { + let sshr_imm = sshr_imm.bind(vector(*ty, sse_vector_size)); + e.enc_both_inferred(sshr_imm, rec_f_ib.opcodes(*opcodes).rrr(4)); + } + } + + // SIMD integer comparisons + { + use IntCC::*; + for (ty, cc, opcodes, isa_predicate) in &[ + (I8, Equal, &PCMPEQB[..], None), + (I16, Equal, &PCMPEQW[..], None), + (I32, Equal, &PCMPEQD[..], None), + (I64, Equal, &PCMPEQQ[..], Some(use_sse41_simd)), + (I8, SignedGreaterThan, &PCMPGTB[..], None), + (I16, SignedGreaterThan, &PCMPGTW[..], None), + (I32, SignedGreaterThan, &PCMPGTD[..], None), + (I64, SignedGreaterThan, &PCMPGTQ, Some(use_sse42_simd)), + ] { + let instruction = icmp + .bind(Immediate::IntCC(*cc)) + .bind(vector(*ty, sse_vector_size)); + let template = rec_icscc_fpr.opcodes(opcodes); + e.enc_both_inferred_maybe_isap(instruction, template, *isa_predicate); + } + } + + // SIMD min/max + for (ty, inst, opcodes, isa_predicate) in &[ + (I8, x86_pmaxs, &PMAXSB[..], Some(use_sse41_simd)), + (I16, x86_pmaxs, &PMAXSW[..], None), + (I32, x86_pmaxs, &PMAXSD[..], Some(use_sse41_simd)), + (I8, x86_pmaxu, &PMAXUB[..], None), + (I16, x86_pmaxu, &PMAXUW[..], Some(use_sse41_simd)), + (I32, x86_pmaxu, &PMAXUD[..], Some(use_sse41_simd)), + (I8, x86_pmins, &PMINSB[..], Some(use_sse41_simd)), + (I16, x86_pmins, &PMINSW[..], None), + (I32, x86_pmins, &PMINSD[..], Some(use_sse41_simd)), + (I8, x86_pminu, &PMINUB[..], None), + (I16, x86_pminu, &PMINUW[..], Some(use_sse41_simd)), + (I32, x86_pminu, &PMINUD[..], Some(use_sse41_simd)), + ] { + let inst = inst.bind(vector(*ty, sse_vector_size)); + e.enc_both_inferred_maybe_isap(inst, rec_fa.opcodes(opcodes), *isa_predicate); + } + + // SIMD float comparisons + e.enc_both_inferred( + fcmp.bind(vector(F32, sse_vector_size)), + rec_pfcmp.opcodes(&CMPPS), + ); + e.enc_both_inferred( + fcmp.bind(vector(F64, sse_vector_size)), + rec_pfcmp.opcodes(&CMPPD), + ); + + // SIMD float arithmetic + for (ty, inst, opcodes) in &[ + (F32, fadd, &ADDPS[..]), + (F64, fadd, &ADDPD[..]), + (F32, fsub, &SUBPS[..]), + (F64, fsub, &SUBPD[..]), + (F32, fmul, &MULPS[..]), + (F64, fmul, &MULPD[..]), + (F32, fdiv, &DIVPS[..]), + (F64, fdiv, &DIVPD[..]), + (F32, x86_fmin, &MINPS[..]), + (F64, x86_fmin, &MINPD[..]), + (F32, x86_fmax, &MAXPS[..]), + (F64, x86_fmax, &MAXPD[..]), + ] { + let inst = inst.bind(vector(*ty, sse_vector_size)); + e.enc_both_inferred(inst, rec_fa.opcodes(opcodes)); + } + for (ty, inst, opcodes) in &[(F32, sqrt, &SQRTPS[..]), (F64, sqrt, &SQRTPD[..])] { + let inst = inst.bind(vector(*ty, sse_vector_size)); + e.enc_both_inferred(inst, rec_furm.opcodes(opcodes)); + } +} + +#[inline(never)] +fn define_entity_ref( + e: &mut PerCpuModeEncodings, + shared_defs: &SharedDefinitions, + settings: &SettingGroup, + r: &RecipeGroup, +) { + let shared = &shared_defs.instructions; + let formats = &shared_defs.formats; + + // Shorthands for instructions. + let const_addr = shared.by_name("const_addr"); + let func_addr = shared.by_name("func_addr"); + let stack_addr = shared.by_name("stack_addr"); + let symbol_value = shared.by_name("symbol_value"); + + // Shorthands for recipes. + let rec_allones_fnaddr4 = r.template("allones_fnaddr4"); + let rec_allones_fnaddr8 = r.template("allones_fnaddr8"); + let rec_fnaddr4 = r.template("fnaddr4"); + let rec_fnaddr8 = r.template("fnaddr8"); + let rec_const_addr = r.template("const_addr"); + let rec_got_fnaddr8 = r.template("got_fnaddr8"); + let rec_got_gvaddr8 = r.template("got_gvaddr8"); + let rec_gvaddr4 = r.template("gvaddr4"); + let rec_gvaddr8 = r.template("gvaddr8"); + let rec_pcrel_fnaddr8 = r.template("pcrel_fnaddr8"); + let rec_pcrel_gvaddr8 = r.template("pcrel_gvaddr8"); + let rec_spaddr_id = r.template("spaddr_id"); + + // Predicates shorthands. + let all_ones_funcaddrs_and_not_is_pic = + settings.predicate_by_name("all_ones_funcaddrs_and_not_is_pic"); + let is_pic = settings.predicate_by_name("is_pic"); + let not_all_ones_funcaddrs_and_not_is_pic = + settings.predicate_by_name("not_all_ones_funcaddrs_and_not_is_pic"); + let not_is_pic = settings.predicate_by_name("not_is_pic"); + + // Function addresses. + + // Non-PIC, all-ones funcaddresses. + e.enc32_isap( + func_addr.bind(I32), + rec_fnaddr4.opcodes(&MOV_IMM), + not_all_ones_funcaddrs_and_not_is_pic, + ); + e.enc64_isap( + func_addr.bind(I64), + rec_fnaddr8.opcodes(&MOV_IMM).rex().w(), + not_all_ones_funcaddrs_and_not_is_pic, + ); + + // Non-PIC, all-zeros funcaddresses. + e.enc32_isap( + func_addr.bind(I32), + rec_allones_fnaddr4.opcodes(&MOV_IMM), + all_ones_funcaddrs_and_not_is_pic, + ); + e.enc64_isap( + func_addr.bind(I64), + rec_allones_fnaddr8.opcodes(&MOV_IMM).rex().w(), + all_ones_funcaddrs_and_not_is_pic, + ); + + // 64-bit, colocated, both PIC and non-PIC. Use the lea instruction's pc-relative field. + let is_colocated_func = + InstructionPredicate::new_is_colocated_func(&*formats.func_addr, "func_ref"); + e.enc64_instp( + func_addr.bind(I64), + rec_pcrel_fnaddr8.opcodes(&LEA).rex().w(), + is_colocated_func, + ); + + // 64-bit, non-colocated, PIC. + e.enc64_isap( + func_addr.bind(I64), + rec_got_fnaddr8.opcodes(&MOV_LOAD).rex().w(), + is_pic, + ); + + // Global addresses. + + // Non-PIC. + e.enc32_isap( + symbol_value.bind(I32), + rec_gvaddr4.opcodes(&MOV_IMM), + not_is_pic, + ); + e.enc64_isap( + symbol_value.bind(I64), + rec_gvaddr8.opcodes(&MOV_IMM).rex().w(), + not_is_pic, + ); + + // PIC, colocated. + e.enc64_func( + symbol_value.bind(I64), + rec_pcrel_gvaddr8.opcodes(&LEA).rex().w(), + |encoding| { + encoding + .isa_predicate(is_pic) + .inst_predicate(InstructionPredicate::new_is_colocated_data(formats)) + }, + ); + + // PIC, non-colocated. + e.enc64_isap( + symbol_value.bind(I64), + rec_got_gvaddr8.opcodes(&MOV_LOAD).rex().w(), + is_pic, + ); + + // Stack addresses. + // + // TODO: Add encoding rules for stack_load and stack_store, so that they + // don't get legalized to stack_addr + load/store. + e.enc64(stack_addr.bind(I64), rec_spaddr_id.opcodes(&LEA).rex().w()); + e.enc32(stack_addr.bind(I32), rec_spaddr_id.opcodes(&LEA)); + + // Constant addresses (PIC). + e.enc64(const_addr.bind(I64), rec_const_addr.opcodes(&LEA).rex().w()); + e.enc32(const_addr.bind(I32), rec_const_addr.opcodes(&LEA)); +} + +/// Control flow opcodes. +#[inline(never)] +fn define_control_flow( + e: &mut PerCpuModeEncodings, + shared_defs: &SharedDefinitions, + settings: &SettingGroup, + r: &RecipeGroup, +) { + let shared = &shared_defs.instructions; + let formats = &shared_defs.formats; + + // Shorthands for instructions. + let brff = shared.by_name("brff"); + let brif = shared.by_name("brif"); + let brnz = shared.by_name("brnz"); + let brz = shared.by_name("brz"); + let call = shared.by_name("call"); + let call_indirect = shared.by_name("call_indirect"); + let debugtrap = shared.by_name("debugtrap"); + let indirect_jump_table_br = shared.by_name("indirect_jump_table_br"); + let jump = shared.by_name("jump"); + let jump_table_base = shared.by_name("jump_table_base"); + let jump_table_entry = shared.by_name("jump_table_entry"); + let return_ = shared.by_name("return"); + let trap = shared.by_name("trap"); + let trapff = shared.by_name("trapff"); + let trapif = shared.by_name("trapif"); + let resumable_trap = shared.by_name("resumable_trap"); + + // Shorthands for recipes. + let rec_brfb = r.template("brfb"); + let rec_brfd = r.template("brfd"); + let rec_brib = r.template("brib"); + let rec_brid = r.template("brid"); + let rec_call_id = r.template("call_id"); + let rec_call_plt_id = r.template("call_plt_id"); + let rec_call_r = r.template("call_r"); + let rec_debugtrap = r.recipe("debugtrap"); + let rec_indirect_jmp = r.template("indirect_jmp"); + let rec_jmpb = r.template("jmpb"); + let rec_jmpd = r.template("jmpd"); + let rec_jt_base = r.template("jt_base"); + let rec_jt_entry = r.template("jt_entry"); + let rec_ret = r.template("ret"); + let rec_t8jccb_abcd = r.template("t8jccb_abcd"); + let rec_t8jccd_abcd = r.template("t8jccd_abcd"); + let rec_t8jccd_long = r.template("t8jccd_long"); + let rec_tjccb = r.template("tjccb"); + let rec_tjccd = r.template("tjccd"); + let rec_trap = r.template("trap"); + let rec_trapif = r.recipe("trapif"); + let rec_trapff = r.recipe("trapff"); + + // Predicates shorthands. + let is_pic = settings.predicate_by_name("is_pic"); + + // Call/return + + // 32-bit, both PIC and non-PIC. + e.enc32(call, rec_call_id.opcodes(&CALL_RELATIVE)); + + // 64-bit, colocated, both PIC and non-PIC. Use the call instruction's pc-relative field. + let is_colocated_func = InstructionPredicate::new_is_colocated_func(&*formats.call, "func_ref"); + e.enc64_instp(call, rec_call_id.opcodes(&CALL_RELATIVE), is_colocated_func); + + // 64-bit, non-colocated, PIC. There is no 64-bit non-colocated non-PIC version, since non-PIC + // is currently using the large model, which requires calls be lowered to + // func_addr+call_indirect. + e.enc64_isap(call, rec_call_plt_id.opcodes(&CALL_RELATIVE), is_pic); + + e.enc32( + call_indirect.bind(I32), + rec_call_r.opcodes(&JUMP_ABSOLUTE).rrr(2), + ); + e.enc64( + call_indirect.bind(I64), + rec_call_r.opcodes(&JUMP_ABSOLUTE).rrr(2).rex(), + ); + e.enc64( + call_indirect.bind(I64), + rec_call_r.opcodes(&JUMP_ABSOLUTE).rrr(2), + ); + + e.enc32(return_, rec_ret.opcodes(&RET_NEAR)); + e.enc64(return_, rec_ret.opcodes(&RET_NEAR)); + + // Branches. + e.enc32(jump, rec_jmpb.opcodes(&JUMP_SHORT)); + e.enc64(jump, rec_jmpb.opcodes(&JUMP_SHORT)); + e.enc32(jump, rec_jmpd.opcodes(&JUMP_NEAR_RELATIVE)); + e.enc64(jump, rec_jmpd.opcodes(&JUMP_NEAR_RELATIVE)); + + e.enc_both(brif, rec_brib.opcodes(&JUMP_SHORT_IF_OVERFLOW)); + e.enc_both(brif, rec_brid.opcodes(&JUMP_NEAR_IF_OVERFLOW)); + + // Not all float condition codes are legal, see `supported_floatccs`. + e.enc_both(brff, rec_brfb.opcodes(&JUMP_SHORT_IF_OVERFLOW)); + e.enc_both(brff, rec_brfd.opcodes(&JUMP_NEAR_IF_OVERFLOW)); + + // Note that the tjccd opcode will be prefixed with 0x0f. + e.enc_i32_i64_explicit_rex(brz, rec_tjccb.opcodes(&JUMP_SHORT_IF_EQUAL)); + e.enc_i32_i64_explicit_rex(brz, rec_tjccd.opcodes(&TEST_BYTE_REG)); + e.enc_i32_i64_explicit_rex(brnz, rec_tjccb.opcodes(&JUMP_SHORT_IF_NOT_EQUAL)); + e.enc_i32_i64_explicit_rex(brnz, rec_tjccd.opcodes(&TEST_REG)); + + // Branch on a b1 value in a register only looks at the low 8 bits. See also + // bint encodings below. + // + // Start with the worst-case encoding for X86_32 only. The register allocator + // can't handle a branch with an ABCD-constrained operand. + e.enc32(brz.bind(B1), rec_t8jccd_long.opcodes(&TEST_BYTE_REG)); + e.enc32(brnz.bind(B1), rec_t8jccd_long.opcodes(&TEST_REG)); + + e.enc_both(brz.bind(B1), rec_t8jccb_abcd.opcodes(&JUMP_SHORT_IF_EQUAL)); + e.enc_both(brz.bind(B1), rec_t8jccd_abcd.opcodes(&TEST_BYTE_REG)); + e.enc_both( + brnz.bind(B1), + rec_t8jccb_abcd.opcodes(&JUMP_SHORT_IF_NOT_EQUAL), + ); + e.enc_both(brnz.bind(B1), rec_t8jccd_abcd.opcodes(&TEST_REG)); + + // Jump tables. + e.enc64( + jump_table_entry.bind(I64), + rec_jt_entry.opcodes(&MOVSXD).rex().w(), + ); + e.enc32(jump_table_entry.bind(I32), rec_jt_entry.opcodes(&MOV_LOAD)); + + e.enc64( + jump_table_base.bind(I64), + rec_jt_base.opcodes(&LEA).rex().w(), + ); + e.enc32(jump_table_base.bind(I32), rec_jt_base.opcodes(&LEA)); + + e.enc_x86_64( + indirect_jump_table_br.bind(I64), + rec_indirect_jmp.opcodes(&JUMP_ABSOLUTE).rrr(4), + ); + e.enc32( + indirect_jump_table_br.bind(I32), + rec_indirect_jmp.opcodes(&JUMP_ABSOLUTE).rrr(4), + ); + + // Trap as ud2 + e.enc32(trap, rec_trap.opcodes(&UNDEFINED2)); + e.enc64(trap, rec_trap.opcodes(&UNDEFINED2)); + e.enc32(resumable_trap, rec_trap.opcodes(&UNDEFINED2)); + e.enc64(resumable_trap, rec_trap.opcodes(&UNDEFINED2)); + + // Debug trap as int3 + e.enc32_rec(debugtrap, rec_debugtrap, 0); + e.enc64_rec(debugtrap, rec_debugtrap, 0); + + e.enc32_rec(trapif, rec_trapif, 0); + e.enc64_rec(trapif, rec_trapif, 0); + e.enc32_rec(trapff, rec_trapff, 0); + e.enc64_rec(trapff, rec_trapff, 0); +} + +/// Reference type instructions. +#[inline(never)] +fn define_reftypes(e: &mut PerCpuModeEncodings, shared_defs: &SharedDefinitions, r: &RecipeGroup) { + let shared = &shared_defs.instructions; + + let is_null = shared.by_name("is_null"); + let is_invalid = shared.by_name("is_invalid"); + let null = shared.by_name("null"); + let safepoint = shared.by_name("safepoint"); + + let rec_is_zero = r.template("is_zero"); + let rec_is_invalid = r.template("is_invalid"); + let rec_pu_id_ref = r.template("pu_id_ref"); + let rec_safepoint = r.recipe("safepoint"); + + // Null references implemented as iconst 0. + e.enc32(null.bind(R32), rec_pu_id_ref.opcodes(&MOV_IMM)); + + e.enc64(null.bind(R64), rec_pu_id_ref.rex().opcodes(&MOV_IMM)); + e.enc64(null.bind(R64), rec_pu_id_ref.opcodes(&MOV_IMM)); + + // is_null, implemented by testing whether the value is 0. + e.enc_r32_r64_rex_only(is_null, rec_is_zero.opcodes(&TEST_REG)); + + // is_invalid, implemented by testing whether the value is -1. + e.enc_r32_r64_rex_only(is_invalid, rec_is_invalid.opcodes(&CMP_IMM8).rrr(7)); + + // safepoint instruction calls sink, no actual encoding. + e.enc32_rec(safepoint, rec_safepoint, 0); + e.enc64_rec(safepoint, rec_safepoint, 0); +} + +#[allow(clippy::cognitive_complexity)] +pub(crate) fn define( + shared_defs: &SharedDefinitions, + settings: &SettingGroup, + x86: &InstructionGroup, + r: &RecipeGroup, +) -> PerCpuModeEncodings { + // Definitions. + let mut e = PerCpuModeEncodings::new(); + + define_moves(&mut e, shared_defs, r); + define_memory(&mut e, shared_defs, x86, r); + define_fpu_moves(&mut e, shared_defs, r); + define_fpu_memory(&mut e, shared_defs, r); + define_fpu_ops(&mut e, shared_defs, settings, x86, r); + define_alu(&mut e, shared_defs, settings, x86, r); + define_simd(&mut e, shared_defs, settings, x86, r); + define_entity_ref(&mut e, shared_defs, settings, r); + define_control_flow(&mut e, shared_defs, settings, r); + define_reftypes(&mut e, shared_defs, r); + + let x86_elf_tls_get_addr = x86.by_name("x86_elf_tls_get_addr"); + let x86_macho_tls_get_addr = x86.by_name("x86_macho_tls_get_addr"); + + let rec_elf_tls_get_addr = r.recipe("elf_tls_get_addr"); + let rec_macho_tls_get_addr = r.recipe("macho_tls_get_addr"); + + e.enc64_rec(x86_elf_tls_get_addr, rec_elf_tls_get_addr, 0); + e.enc64_rec(x86_macho_tls_get_addr, rec_macho_tls_get_addr, 0); + + e +} diff --git a/third_party/rust/cranelift-codegen-meta/src/isa/x86/instructions.rs b/third_party/rust/cranelift-codegen-meta/src/isa/x86/instructions.rs new file mode 100644 index 0000000000..7acd2e2c50 --- /dev/null +++ b/third_party/rust/cranelift-codegen-meta/src/isa/x86/instructions.rs @@ -0,0 +1,723 @@ +#![allow(non_snake_case)] + +use crate::cdsl::instructions::{ + AllInstructions, InstructionBuilder as Inst, InstructionGroup, InstructionGroupBuilder, +}; +use crate::cdsl::operands::Operand; +use crate::cdsl::types::ValueType; +use crate::cdsl::typevar::{Interval, TypeSetBuilder, TypeVar}; +use crate::shared::entities::EntityRefs; +use crate::shared::formats::Formats; +use crate::shared::immediates::Immediates; +use crate::shared::types; + +#[allow(clippy::many_single_char_names)] +pub(crate) fn define( + mut all_instructions: &mut AllInstructions, + formats: &Formats, + immediates: &Immediates, + entities: &EntityRefs, +) -> InstructionGroup { + let mut ig = InstructionGroupBuilder::new(&mut all_instructions); + + let iflags: &TypeVar = &ValueType::Special(types::Flag::IFlags.into()).into(); + + let iWord = &TypeVar::new( + "iWord", + "A scalar integer machine word", + TypeSetBuilder::new().ints(32..64).build(), + ); + let nlo = &Operand::new("nlo", iWord).with_doc("Low part of numerator"); + let nhi = &Operand::new("nhi", iWord).with_doc("High part of numerator"); + let d = &Operand::new("d", iWord).with_doc("Denominator"); + let q = &Operand::new("q", iWord).with_doc("Quotient"); + let r = &Operand::new("r", iWord).with_doc("Remainder"); + + ig.push( + Inst::new( + "x86_udivmodx", + r#" + Extended unsigned division. + + Concatenate the bits in `nhi` and `nlo` to form the numerator. + Interpret the bits as an unsigned number and divide by the unsigned + denominator `d`. Trap when `d` is zero or if the quotient is larger + than the range of the output. + + Return both quotient and remainder. + "#, + &formats.ternary, + ) + .operands_in(vec![nlo, nhi, d]) + .operands_out(vec![q, r]) + .can_trap(true), + ); + + ig.push( + Inst::new( + "x86_sdivmodx", + r#" + Extended signed division. + + Concatenate the bits in `nhi` and `nlo` to form the numerator. + Interpret the bits as a signed number and divide by the signed + denominator `d`. Trap when `d` is zero or if the quotient is outside + the range of the output. + + Return both quotient and remainder. + "#, + &formats.ternary, + ) + .operands_in(vec![nlo, nhi, d]) + .operands_out(vec![q, r]) + .can_trap(true), + ); + + let argL = &Operand::new("argL", iWord); + let argR = &Operand::new("argR", iWord); + let resLo = &Operand::new("resLo", iWord); + let resHi = &Operand::new("resHi", iWord); + + ig.push( + Inst::new( + "x86_umulx", + r#" + Unsigned integer multiplication, producing a double-length result. + + Polymorphic over all scalar integer types, but does not support vector + types. + "#, + &formats.binary, + ) + .operands_in(vec![argL, argR]) + .operands_out(vec![resLo, resHi]), + ); + + ig.push( + Inst::new( + "x86_smulx", + r#" + Signed integer multiplication, producing a double-length result. + + Polymorphic over all scalar integer types, but does not support vector + types. + "#, + &formats.binary, + ) + .operands_in(vec![argL, argR]) + .operands_out(vec![resLo, resHi]), + ); + + let Float = &TypeVar::new( + "Float", + "A scalar or vector floating point number", + TypeSetBuilder::new() + .floats(Interval::All) + .simd_lanes(Interval::All) + .build(), + ); + let IntTo = &TypeVar::new( + "IntTo", + "An integer type with the same number of lanes", + TypeSetBuilder::new() + .ints(32..64) + .simd_lanes(Interval::All) + .build(), + ); + let x = &Operand::new("x", Float); + let a = &Operand::new("a", IntTo); + + ig.push( + Inst::new( + "x86_cvtt2si", + r#" + Convert with truncation floating point to signed integer. + + The source floating point operand is converted to a signed integer by + rounding towards zero. If the result can't be represented in the output + type, returns the smallest signed value the output type can represent. + + This instruction does not trap. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + + let f32x4 = &TypeVar::new( + "f32x4", + "A floating point number", + TypeSetBuilder::new() + .floats(32..32) + .simd_lanes(4..4) + .build(), + ); + let i32x4 = &TypeVar::new( + "i32x4", + "An integer type with the same number of lanes", + TypeSetBuilder::new().ints(32..32).simd_lanes(4..4).build(), + ); + let x = &Operand::new("x", i32x4); + let a = &Operand::new("a", f32x4); + + ig.push( + Inst::new( + "x86_vcvtudq2ps", + r#" + Convert unsigned integer to floating point. + + Convert packed doubleword unsigned integers to packed single-precision floating-point + values. This instruction does not trap. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + + let x = &Operand::new("x", Float); + let a = &Operand::new("a", Float); + let y = &Operand::new("y", Float); + + ig.push( + Inst::new( + "x86_fmin", + r#" + Floating point minimum with x86 semantics. + + This is equivalent to the C ternary operator `x < y ? x : y` which + differs from `fmin` when either operand is NaN or when comparing + +0.0 to -0.0. + + When the two operands don't compare as LT, `y` is returned unchanged, + even if it is a signalling NaN. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "x86_fmax", + r#" + Floating point maximum with x86 semantics. + + This is equivalent to the C ternary operator `x > y ? x : y` which + differs from `fmax` when either operand is NaN or when comparing + +0.0 to -0.0. + + When the two operands don't compare as GT, `y` is returned unchanged, + even if it is a signalling NaN. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + let x = &Operand::new("x", iWord); + + ig.push( + Inst::new( + "x86_push", + r#" + Pushes a value onto the stack. + + Decrements the stack pointer and stores the specified value on to the top. + + This is polymorphic in i32 and i64. However, it is only implemented for i64 + in 64-bit mode, and only for i32 in 32-bit mode. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .other_side_effects(true) + .can_store(true), + ); + + ig.push( + Inst::new( + "x86_pop", + r#" + Pops a value from the stack. + + Loads a value from the top of the stack and then increments the stack + pointer. + + This is polymorphic in i32 and i64. However, it is only implemented for i64 + in 64-bit mode, and only for i32 in 32-bit mode. + "#, + &formats.nullary, + ) + .operands_out(vec![x]) + .other_side_effects(true) + .can_load(true), + ); + + let y = &Operand::new("y", iWord); + let rflags = &Operand::new("rflags", iflags); + + ig.push( + Inst::new( + "x86_bsr", + r#" + Bit Scan Reverse -- returns the bit-index of the most significant 1 + in the word. Result is undefined if the argument is zero. However, it + sets the Z flag depending on the argument, so it is at least easy to + detect and handle that case. + + This is polymorphic in i32 and i64. It is implemented for both i64 and + i32 in 64-bit mode, and only for i32 in 32-bit mode. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![y, rflags]), + ); + + ig.push( + Inst::new( + "x86_bsf", + r#" + Bit Scan Forwards -- returns the bit-index of the least significant 1 + in the word. Is otherwise identical to 'bsr', just above. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![y, rflags]), + ); + + let uimm8 = &immediates.uimm8; + let TxN = &TypeVar::new( + "TxN", + "A SIMD vector type", + TypeSetBuilder::new() + .ints(Interval::All) + .floats(Interval::All) + .bools(Interval::All) + .simd_lanes(Interval::All) + .includes_scalars(false) + .build(), + ); + let a = &Operand::new("a", TxN).with_doc("A vector value (i.e. held in an XMM register)"); + let b = &Operand::new("b", TxN).with_doc("A vector value (i.e. held in an XMM register)"); + let i = &Operand::new("i", uimm8).with_doc("An ordering operand controlling the copying of data from the source to the destination; see PSHUFD in Intel manual for details"); + + ig.push( + Inst::new( + "x86_pshufd", + r#" + Packed Shuffle Doublewords -- copies data from either memory or lanes in an extended + register and re-orders the data according to the passed immediate byte. + "#, + &formats.binary_imm8, + ) + .operands_in(vec![a, i]) // TODO allow copying from memory here (need more permissive type than TxN) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "x86_pshufb", + r#" + Packed Shuffle Bytes -- re-orders data in an extended register using a shuffle + mask from either memory or another extended register + "#, + &formats.binary, + ) + .operands_in(vec![a, b]) // TODO allow re-ordering from memory here (need more permissive type than TxN) + .operands_out(vec![a]), + ); + + let mask = &Operand::new("mask", uimm8).with_doc("mask to select lanes from b"); + ig.push( + Inst::new( + "x86_pblendw", + r#" + Blend packed words using an immediate mask. Each bit of the 8-bit immediate corresponds to a + lane in ``b``: if the bit is set, the lane is copied into ``a``. + "#, + &formats.ternary_imm8, + ) + .operands_in(vec![a, b, mask]) + .operands_out(vec![a]), + ); + + let Idx = &Operand::new("Idx", uimm8).with_doc("Lane index"); + let x = &Operand::new("x", TxN); + let a = &Operand::new("a", &TxN.lane_of()); + + ig.push( + Inst::new( + "x86_pextr", + r#" + Extract lane ``Idx`` from ``x``. + The lane index, ``Idx``, is an immediate value, not an SSA value. It + must indicate a valid lane index for the type of ``x``. + "#, + &formats.binary_imm8, + ) + .operands_in(vec![x, Idx]) + .operands_out(vec![a]), + ); + + let IBxN = &TypeVar::new( + "IBxN", + "A SIMD vector type containing only booleans and integers", + TypeSetBuilder::new() + .ints(Interval::All) + .bools(Interval::All) + .simd_lanes(Interval::All) + .includes_scalars(false) + .build(), + ); + let x = &Operand::new("x", IBxN); + let y = &Operand::new("y", &IBxN.lane_of()).with_doc("New lane value"); + let a = &Operand::new("a", IBxN); + + ig.push( + Inst::new( + "x86_pinsr", + r#" + Insert ``y`` into ``x`` at lane ``Idx``. + The lane index, ``Idx``, is an immediate value, not an SSA value. It + must indicate a valid lane index for the type of ``x``. + "#, + &formats.ternary_imm8, + ) + .operands_in(vec![x, y, Idx]) + .operands_out(vec![a]), + ); + + let FxN = &TypeVar::new( + "FxN", + "A SIMD vector type containing floats", + TypeSetBuilder::new() + .floats(Interval::All) + .simd_lanes(Interval::All) + .includes_scalars(false) + .build(), + ); + let x = &Operand::new("x", FxN); + let y = &Operand::new("y", &FxN.lane_of()).with_doc("New lane value"); + let a = &Operand::new("a", FxN); + + ig.push( + Inst::new( + "x86_insertps", + r#" + Insert a lane of ``y`` into ``x`` at using ``Idx`` to encode both which lane the value is + extracted from and which it is inserted to. This is similar to x86_pinsr but inserts + floats, which are already stored in an XMM register. + "#, + &formats.ternary_imm8, + ) + .operands_in(vec![x, y, Idx]) + .operands_out(vec![a]), + ); + + let x = &Operand::new("x", TxN); + let y = &Operand::new("y", TxN); + let a = &Operand::new("a", TxN); + + ig.push( + Inst::new( + "x86_punpckh", + r#" + Unpack the high-order lanes of ``x`` and ``y`` and interleave into ``a``. With notional + i8x4 vectors, where ``x = [x3, x2, x1, x0]`` and ``y = [y3, y2, y1, y0]``, this operation + would result in ``a = [y3, x3, y2, x2]`` (using the Intel manual's right-to-left lane + ordering). + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "x86_punpckl", + r#" + Unpack the low-order lanes of ``x`` and ``y`` and interleave into ``a``. With notional + i8x4 vectors, where ``x = [x3, x2, x1, x0]`` and ``y = [y3, y2, y1, y0]``, this operation + would result in ``a = [y1, x1, y0, x0]`` (using the Intel manual's right-to-left lane + ordering). + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + let x = &Operand::new("x", FxN); + let y = &Operand::new("y", FxN); + let a = &Operand::new("a", FxN); + + ig.push( + Inst::new( + "x86_movsd", + r#" + Move the low 64 bits of the float vector ``y`` to the low 64 bits of float vector ``x`` + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "x86_movlhps", + r#" + Move the low 64 bits of the float vector ``y`` to the high 64 bits of float vector ``x`` + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + let IxN = &TypeVar::new( + "IxN", + "A SIMD vector type containing integers", + TypeSetBuilder::new() + .ints(Interval::All) + .simd_lanes(Interval::All) + .includes_scalars(false) + .build(), + ); + let I128 = &TypeVar::new( + "I128", + "A SIMD vector type containing one large integer (due to Cranelift type constraints, \ + this uses the Cranelift I64X2 type but should be understood as one large value, i.e., the \ + upper lane is concatenated with the lower lane to form the integer)", + TypeSetBuilder::new() + .ints(64..64) + .simd_lanes(2..2) + .includes_scalars(false) + .build(), + ); + + let x = &Operand::new("x", IxN).with_doc("Vector value to shift"); + let y = &Operand::new("y", I128).with_doc("Number of bits to shift"); + let a = &Operand::new("a", IxN); + + ig.push( + Inst::new( + "x86_psll", + r#" + Shift Packed Data Left Logical -- This implements the behavior of the shared instruction + ``ishl`` but alters the shift operand to live in an XMM register as expected by the PSLL* + family of instructions. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "x86_psrl", + r#" + Shift Packed Data Right Logical -- This implements the behavior of the shared instruction + ``ushr`` but alters the shift operand to live in an XMM register as expected by the PSRL* + family of instructions. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "x86_psra", + r#" + Shift Packed Data Right Arithmetic -- This implements the behavior of the shared + instruction ``sshr`` but alters the shift operand to live in an XMM register as expected by + the PSRA* family of instructions. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + let I64x2 = &TypeVar::new( + "I64x2", + "A SIMD vector type containing two 64-bit integers", + TypeSetBuilder::new() + .ints(64..64) + .simd_lanes(2..2) + .includes_scalars(false) + .build(), + ); + + let x = &Operand::new("x", I64x2); + let y = &Operand::new("y", I64x2); + let a = &Operand::new("a", I64x2); + ig.push( + Inst::new( + "x86_pmullq", + r#" + Multiply Packed Integers -- Multiply two 64x2 integers and receive a 64x2 result with + lane-wise wrapping if the result overflows. This instruction is necessary to add distinct + encodings for CPUs with newer vector features. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "x86_pmuludq", + r#" + Multiply Packed Integers -- Using only the bottom 32 bits in each lane, multiply two 64x2 + unsigned integers and receive a 64x2 result. This instruction avoids the need for handling + overflow as in `x86_pmullq`. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + let x = &Operand::new("x", TxN); + let y = &Operand::new("y", TxN); + let f = &Operand::new("f", iflags); + ig.push( + Inst::new( + "x86_ptest", + r#" + Logical Compare -- PTEST will set the ZF flag if all bits in the result are 0 of the + bitwise AND of the first source operand (first operand) and the second source operand + (second operand). PTEST sets the CF flag if all bits in the result are 0 of the bitwise + AND of the second source operand (second operand) and the logical NOT of the destination + operand (first operand). + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![f]), + ); + + let x = &Operand::new("x", IxN); + let y = &Operand::new("y", IxN); + let a = &Operand::new("a", IxN); + ig.push( + Inst::new( + "x86_pmaxs", + r#" + Maximum of Packed Signed Integers -- Compare signed integers in the first and second + operand and return the maximum values. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "x86_pmaxu", + r#" + Maximum of Packed Unsigned Integers -- Compare unsigned integers in the first and second + operand and return the maximum values. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "x86_pmins", + r#" + Minimum of Packed Signed Integers -- Compare signed integers in the first and second + operand and return the minimum values. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "x86_pminu", + r#" + Minimum of Packed Unsigned Integers -- Compare unsigned integers in the first and second + operand and return the minimum values. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + let c = &Operand::new("c", uimm8) + .with_doc("The number of bytes to shift right; see PALIGNR in Intel manual for details"); + ig.push( + Inst::new( + "x86_palignr", + r#" + Concatenate destination and source operands, extracting a byte-aligned result shifted to + the right by `c`. + "#, + &formats.ternary_imm8, + ) + .operands_in(vec![x, y, c]) + .operands_out(vec![a]), + ); + + let i64_t = &TypeVar::new( + "i64_t", + "A scalar 64bit integer", + TypeSetBuilder::new().ints(64..64).build(), + ); + + let GV = &Operand::new("GV", &entities.global_value); + let addr = &Operand::new("addr", i64_t); + + ig.push( + Inst::new( + "x86_elf_tls_get_addr", + r#" + Elf tls get addr -- This implements the GD TLS model for ELF. The clobber output should + not be used. + "#, + &formats.unary_global_value, + ) + // This is a bit overly broad to mark as clobbering *all* the registers, because it should + // only preserve caller-saved registers. There's no way to indicate this to register + // allocation yet, though, so mark as clobbering all registers instead. + .clobbers_all_regs(true) + .operands_in(vec![GV]) + .operands_out(vec![addr]), + ); + ig.push( + Inst::new( + "x86_macho_tls_get_addr", + r#" + Mach-O tls get addr -- This implements TLS access for Mach-O. The clobber output should + not be used. + "#, + &formats.unary_global_value, + ) + // See above comment for x86_elf_tls_get_addr. + .clobbers_all_regs(true) + .operands_in(vec![GV]) + .operands_out(vec![addr]), + ); + + ig.build() +} diff --git a/third_party/rust/cranelift-codegen-meta/src/isa/x86/legalize.rs b/third_party/rust/cranelift-codegen-meta/src/isa/x86/legalize.rs new file mode 100644 index 0000000000..681b3104d5 --- /dev/null +++ b/third_party/rust/cranelift-codegen-meta/src/isa/x86/legalize.rs @@ -0,0 +1,829 @@ +use crate::cdsl::ast::{constant, var, ExprBuilder, Literal}; +use crate::cdsl::instructions::{vector, Bindable, InstructionGroup}; +use crate::cdsl::types::{LaneType, ValueType}; +use crate::cdsl::xform::TransformGroupBuilder; +use crate::shared::types::Float::{F32, F64}; +use crate::shared::types::Int::{I16, I32, I64, I8}; +use crate::shared::Definitions as SharedDefinitions; + +#[allow(clippy::many_single_char_names)] +pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &InstructionGroup) { + let mut expand = TransformGroupBuilder::new( + "x86_expand", + r#" + Legalize instructions by expansion. + + Use x86-specific instructions if needed."#, + ) + .isa("x86") + .chain_with(shared.transform_groups.by_name("expand_flags").id); + + let mut narrow = TransformGroupBuilder::new( + "x86_narrow", + r#" + Legalize instructions by narrowing. + + Use x86-specific instructions if needed."#, + ) + .isa("x86") + .chain_with(shared.transform_groups.by_name("narrow_flags").id); + + let mut narrow_avx = TransformGroupBuilder::new( + "x86_narrow_avx", + r#" + Legalize instructions by narrowing with CPU feature checks. + + This special case converts using x86 AVX instructions where available."#, + ) + .isa("x86"); + // We cannot chain with the x86_narrow group until this group is built, see bottom of this + // function for where this is chained. + + let mut widen = TransformGroupBuilder::new( + "x86_widen", + r#" + Legalize instructions by widening. + + Use x86-specific instructions if needed."#, + ) + .isa("x86") + .chain_with(shared.transform_groups.by_name("widen").id); + + // List of instructions. + let insts = &shared.instructions; + let band = insts.by_name("band"); + let bor = insts.by_name("bor"); + let clz = insts.by_name("clz"); + let ctz = insts.by_name("ctz"); + let fcmp = insts.by_name("fcmp"); + let fcvt_from_uint = insts.by_name("fcvt_from_uint"); + let fcvt_to_sint = insts.by_name("fcvt_to_sint"); + let fcvt_to_uint = insts.by_name("fcvt_to_uint"); + let fcvt_to_sint_sat = insts.by_name("fcvt_to_sint_sat"); + let fcvt_to_uint_sat = insts.by_name("fcvt_to_uint_sat"); + let fmax = insts.by_name("fmax"); + let fmin = insts.by_name("fmin"); + let iadd = insts.by_name("iadd"); + let iconst = insts.by_name("iconst"); + let imul = insts.by_name("imul"); + let ineg = insts.by_name("ineg"); + let isub = insts.by_name("isub"); + let ishl = insts.by_name("ishl"); + let ireduce = insts.by_name("ireduce"); + let popcnt = insts.by_name("popcnt"); + let sdiv = insts.by_name("sdiv"); + let selectif = insts.by_name("selectif"); + let smulhi = insts.by_name("smulhi"); + let srem = insts.by_name("srem"); + let tls_value = insts.by_name("tls_value"); + let udiv = insts.by_name("udiv"); + let umulhi = insts.by_name("umulhi"); + let ushr = insts.by_name("ushr"); + let ushr_imm = insts.by_name("ushr_imm"); + let urem = insts.by_name("urem"); + + let x86_bsf = x86_instructions.by_name("x86_bsf"); + let x86_bsr = x86_instructions.by_name("x86_bsr"); + let x86_umulx = x86_instructions.by_name("x86_umulx"); + let x86_smulx = x86_instructions.by_name("x86_smulx"); + + let imm = &shared.imm; + + // Shift by a 64-bit amount is equivalent to a shift by that amount mod 32, so we can reduce + // the size of the shift amount. This is useful for x86_32, where an I64 shift amount is + // not encodable. + let a = var("a"); + let x = var("x"); + let y = var("y"); + let z = var("z"); + + for &ty in &[I8, I16, I32] { + let ishl_by_i64 = ishl.bind(ty).bind(I64); + let ireduce = ireduce.bind(I32); + expand.legalize( + def!(a = ishl_by_i64(x, y)), + vec![def!(z = ireduce(y)), def!(a = ishl(x, z))], + ); + } + + for &ty in &[I8, I16, I32] { + let ushr_by_i64 = ushr.bind(ty).bind(I64); + let ireduce = ireduce.bind(I32); + expand.legalize( + def!(a = ushr_by_i64(x, y)), + vec![def!(z = ireduce(y)), def!(a = ishl(x, z))], + ); + } + + // Division and remainder. + // + // The srem expansion requires custom code because srem INT_MIN, -1 is not + // allowed to trap. The other ops need to check avoid_div_traps. + expand.custom_legalize(sdiv, "expand_sdivrem"); + expand.custom_legalize(srem, "expand_sdivrem"); + expand.custom_legalize(udiv, "expand_udivrem"); + expand.custom_legalize(urem, "expand_udivrem"); + + // Double length (widening) multiplication. + let a = var("a"); + let x = var("x"); + let y = var("y"); + let a1 = var("a1"); + let a2 = var("a2"); + let res_lo = var("res_lo"); + let res_hi = var("res_hi"); + + expand.legalize( + def!(res_hi = umulhi(x, y)), + vec![def!((res_lo, res_hi) = x86_umulx(x, y))], + ); + + expand.legalize( + def!(res_hi = smulhi(x, y)), + vec![def!((res_lo, res_hi) = x86_smulx(x, y))], + ); + + // Floating point condition codes. + // + // The 8 condition codes in `supported_floatccs` are directly supported by a + // `ucomiss` or `ucomisd` instruction. The remaining codes need legalization + // patterns. + + let floatcc_eq = Literal::enumerator_for(&imm.floatcc, "eq"); + let floatcc_ord = Literal::enumerator_for(&imm.floatcc, "ord"); + let floatcc_ueq = Literal::enumerator_for(&imm.floatcc, "ueq"); + let floatcc_ne = Literal::enumerator_for(&imm.floatcc, "ne"); + let floatcc_uno = Literal::enumerator_for(&imm.floatcc, "uno"); + let floatcc_one = Literal::enumerator_for(&imm.floatcc, "one"); + + // Equality needs an explicit `ord` test which checks the parity bit. + expand.legalize( + def!(a = fcmp(floatcc_eq, x, y)), + vec![ + def!(a1 = fcmp(floatcc_ord, x, y)), + def!(a2 = fcmp(floatcc_ueq, x, y)), + def!(a = band(a1, a2)), + ], + ); + expand.legalize( + def!(a = fcmp(floatcc_ne, x, y)), + vec![ + def!(a1 = fcmp(floatcc_uno, x, y)), + def!(a2 = fcmp(floatcc_one, x, y)), + def!(a = bor(a1, a2)), + ], + ); + + let floatcc_lt = &Literal::enumerator_for(&imm.floatcc, "lt"); + let floatcc_gt = &Literal::enumerator_for(&imm.floatcc, "gt"); + let floatcc_le = &Literal::enumerator_for(&imm.floatcc, "le"); + let floatcc_ge = &Literal::enumerator_for(&imm.floatcc, "ge"); + let floatcc_ugt = &Literal::enumerator_for(&imm.floatcc, "ugt"); + let floatcc_ult = &Literal::enumerator_for(&imm.floatcc, "ult"); + let floatcc_uge = &Literal::enumerator_for(&imm.floatcc, "uge"); + let floatcc_ule = &Literal::enumerator_for(&imm.floatcc, "ule"); + + // Inequalities that need to be reversed. + for &(cc, rev_cc) in &[ + (floatcc_lt, floatcc_gt), + (floatcc_le, floatcc_ge), + (floatcc_ugt, floatcc_ult), + (floatcc_uge, floatcc_ule), + ] { + expand.legalize(def!(a = fcmp(cc, x, y)), vec![def!(a = fcmp(rev_cc, y, x))]); + } + + // We need to modify the CFG for min/max legalization. + expand.custom_legalize(fmin, "expand_minmax"); + expand.custom_legalize(fmax, "expand_minmax"); + + // Conversions from unsigned need special handling. + expand.custom_legalize(fcvt_from_uint, "expand_fcvt_from_uint"); + // Conversions from float to int can trap and modify the control flow graph. + expand.custom_legalize(fcvt_to_sint, "expand_fcvt_to_sint"); + expand.custom_legalize(fcvt_to_uint, "expand_fcvt_to_uint"); + expand.custom_legalize(fcvt_to_sint_sat, "expand_fcvt_to_sint_sat"); + expand.custom_legalize(fcvt_to_uint_sat, "expand_fcvt_to_uint_sat"); + + // Count leading and trailing zeroes, for baseline x86_64 + let c_minus_one = var("c_minus_one"); + let c_thirty_one = var("c_thirty_one"); + let c_thirty_two = var("c_thirty_two"); + let c_sixty_three = var("c_sixty_three"); + let c_sixty_four = var("c_sixty_four"); + let index1 = var("index1"); + let r2flags = var("r2flags"); + let index2 = var("index2"); + + let intcc_eq = Literal::enumerator_for(&imm.intcc, "eq"); + let imm64_minus_one = Literal::constant(&imm.imm64, -1); + let imm64_63 = Literal::constant(&imm.imm64, 63); + expand.legalize( + def!(a = clz.I64(x)), + vec![ + def!(c_minus_one = iconst(imm64_minus_one)), + def!(c_sixty_three = iconst(imm64_63)), + def!((index1, r2flags) = x86_bsr(x)), + def!(index2 = selectif(intcc_eq, r2flags, c_minus_one, index1)), + def!(a = isub(c_sixty_three, index2)), + ], + ); + + let imm64_31 = Literal::constant(&imm.imm64, 31); + expand.legalize( + def!(a = clz.I32(x)), + vec![ + def!(c_minus_one = iconst(imm64_minus_one)), + def!(c_thirty_one = iconst(imm64_31)), + def!((index1, r2flags) = x86_bsr(x)), + def!(index2 = selectif(intcc_eq, r2flags, c_minus_one, index1)), + def!(a = isub(c_thirty_one, index2)), + ], + ); + + let imm64_64 = Literal::constant(&imm.imm64, 64); + expand.legalize( + def!(a = ctz.I64(x)), + vec![ + def!(c_sixty_four = iconst(imm64_64)), + def!((index1, r2flags) = x86_bsf(x)), + def!(a = selectif(intcc_eq, r2flags, c_sixty_four, index1)), + ], + ); + + let imm64_32 = Literal::constant(&imm.imm64, 32); + expand.legalize( + def!(a = ctz.I32(x)), + vec![ + def!(c_thirty_two = iconst(imm64_32)), + def!((index1, r2flags) = x86_bsf(x)), + def!(a = selectif(intcc_eq, r2flags, c_thirty_two, index1)), + ], + ); + + // Population count for baseline x86_64 + let x = var("x"); + let r = var("r"); + + let qv3 = var("qv3"); + let qv4 = var("qv4"); + let qv5 = var("qv5"); + let qv6 = var("qv6"); + let qv7 = var("qv7"); + let qv8 = var("qv8"); + let qv9 = var("qv9"); + let qv10 = var("qv10"); + let qv11 = var("qv11"); + let qv12 = var("qv12"); + let qv13 = var("qv13"); + let qv14 = var("qv14"); + let qv15 = var("qv15"); + let qc77 = var("qc77"); + #[allow(non_snake_case)] + let qc0F = var("qc0F"); + let qc01 = var("qc01"); + + let imm64_1 = Literal::constant(&imm.imm64, 1); + let imm64_4 = Literal::constant(&imm.imm64, 4); + expand.legalize( + def!(r = popcnt.I64(x)), + vec![ + def!(qv3 = ushr_imm(x, imm64_1)), + def!(qc77 = iconst(Literal::constant(&imm.imm64, 0x7777_7777_7777_7777))), + def!(qv4 = band(qv3, qc77)), + def!(qv5 = isub(x, qv4)), + def!(qv6 = ushr_imm(qv4, imm64_1)), + def!(qv7 = band(qv6, qc77)), + def!(qv8 = isub(qv5, qv7)), + def!(qv9 = ushr_imm(qv7, imm64_1)), + def!(qv10 = band(qv9, qc77)), + def!(qv11 = isub(qv8, qv10)), + def!(qv12 = ushr_imm(qv11, imm64_4)), + def!(qv13 = iadd(qv11, qv12)), + def!(qc0F = iconst(Literal::constant(&imm.imm64, 0x0F0F_0F0F_0F0F_0F0F))), + def!(qv14 = band(qv13, qc0F)), + def!(qc01 = iconst(Literal::constant(&imm.imm64, 0x0101_0101_0101_0101))), + def!(qv15 = imul(qv14, qc01)), + def!(r = ushr_imm(qv15, Literal::constant(&imm.imm64, 56))), + ], + ); + + let lv3 = var("lv3"); + let lv4 = var("lv4"); + let lv5 = var("lv5"); + let lv6 = var("lv6"); + let lv7 = var("lv7"); + let lv8 = var("lv8"); + let lv9 = var("lv9"); + let lv10 = var("lv10"); + let lv11 = var("lv11"); + let lv12 = var("lv12"); + let lv13 = var("lv13"); + let lv14 = var("lv14"); + let lv15 = var("lv15"); + let lc77 = var("lc77"); + #[allow(non_snake_case)] + let lc0F = var("lc0F"); + let lc01 = var("lc01"); + + expand.legalize( + def!(r = popcnt.I32(x)), + vec![ + def!(lv3 = ushr_imm(x, imm64_1)), + def!(lc77 = iconst(Literal::constant(&imm.imm64, 0x7777_7777))), + def!(lv4 = band(lv3, lc77)), + def!(lv5 = isub(x, lv4)), + def!(lv6 = ushr_imm(lv4, imm64_1)), + def!(lv7 = band(lv6, lc77)), + def!(lv8 = isub(lv5, lv7)), + def!(lv9 = ushr_imm(lv7, imm64_1)), + def!(lv10 = band(lv9, lc77)), + def!(lv11 = isub(lv8, lv10)), + def!(lv12 = ushr_imm(lv11, imm64_4)), + def!(lv13 = iadd(lv11, lv12)), + def!(lc0F = iconst(Literal::constant(&imm.imm64, 0x0F0F_0F0F))), + def!(lv14 = band(lv13, lc0F)), + def!(lc01 = iconst(Literal::constant(&imm.imm64, 0x0101_0101))), + def!(lv15 = imul(lv14, lc01)), + def!(r = ushr_imm(lv15, Literal::constant(&imm.imm64, 24))), + ], + ); + + expand.custom_legalize(ineg, "convert_ineg"); + expand.custom_legalize(tls_value, "expand_tls_value"); + widen.custom_legalize(ineg, "convert_ineg"); + + // To reduce compilation times, separate out large blocks of legalizations by theme. + define_simd(shared, x86_instructions, &mut narrow, &mut narrow_avx); + + expand.build_and_add_to(&mut shared.transform_groups); + let narrow_id = narrow.build_and_add_to(&mut shared.transform_groups); + narrow_avx + .chain_with(narrow_id) + .build_and_add_to(&mut shared.transform_groups); + widen.build_and_add_to(&mut shared.transform_groups); +} + +fn define_simd( + shared: &mut SharedDefinitions, + x86_instructions: &InstructionGroup, + narrow: &mut TransformGroupBuilder, + narrow_avx: &mut TransformGroupBuilder, +) { + let insts = &shared.instructions; + let band = insts.by_name("band"); + let band_not = insts.by_name("band_not"); + let bitcast = insts.by_name("bitcast"); + let bitselect = insts.by_name("bitselect"); + let bor = insts.by_name("bor"); + let bnot = insts.by_name("bnot"); + let bxor = insts.by_name("bxor"); + let extractlane = insts.by_name("extractlane"); + let fabs = insts.by_name("fabs"); + let fcmp = insts.by_name("fcmp"); + let fcvt_from_uint = insts.by_name("fcvt_from_uint"); + let fcvt_to_sint_sat = insts.by_name("fcvt_to_sint_sat"); + let fcvt_to_uint_sat = insts.by_name("fcvt_to_uint_sat"); + let fmax = insts.by_name("fmax"); + let fmin = insts.by_name("fmin"); + let fneg = insts.by_name("fneg"); + let iadd_imm = insts.by_name("iadd_imm"); + let icmp = insts.by_name("icmp"); + let imax = insts.by_name("imax"); + let imin = insts.by_name("imin"); + let imul = insts.by_name("imul"); + let ineg = insts.by_name("ineg"); + let insertlane = insts.by_name("insertlane"); + let ishl = insts.by_name("ishl"); + let ishl_imm = insts.by_name("ishl_imm"); + let load_splat = insts.by_name("load_splat"); + let raw_bitcast = insts.by_name("raw_bitcast"); + let scalar_to_vector = insts.by_name("scalar_to_vector"); + let splat = insts.by_name("splat"); + let shuffle = insts.by_name("shuffle"); + let sshr = insts.by_name("sshr"); + let swizzle = insts.by_name("swizzle"); + let trueif = insts.by_name("trueif"); + let uadd_sat = insts.by_name("uadd_sat"); + let umax = insts.by_name("umax"); + let umin = insts.by_name("umin"); + let snarrow = insts.by_name("snarrow"); + let swiden_high = insts.by_name("swiden_high"); + let swiden_low = insts.by_name("swiden_low"); + let ushr_imm = insts.by_name("ushr_imm"); + let ushr = insts.by_name("ushr"); + let uwiden_high = insts.by_name("uwiden_high"); + let uwiden_low = insts.by_name("uwiden_low"); + let vconst = insts.by_name("vconst"); + let vall_true = insts.by_name("vall_true"); + let vany_true = insts.by_name("vany_true"); + let vselect = insts.by_name("vselect"); + + let x86_palignr = x86_instructions.by_name("x86_palignr"); + let x86_pmaxs = x86_instructions.by_name("x86_pmaxs"); + let x86_pmaxu = x86_instructions.by_name("x86_pmaxu"); + let x86_pmins = x86_instructions.by_name("x86_pmins"); + let x86_pminu = x86_instructions.by_name("x86_pminu"); + let x86_pshufb = x86_instructions.by_name("x86_pshufb"); + let x86_pshufd = x86_instructions.by_name("x86_pshufd"); + let x86_psra = x86_instructions.by_name("x86_psra"); + let x86_ptest = x86_instructions.by_name("x86_ptest"); + let x86_punpckh = x86_instructions.by_name("x86_punpckh"); + let x86_punpckl = x86_instructions.by_name("x86_punpckl"); + + let imm = &shared.imm; + + // Set up variables and immediates. + let uimm8_zero = Literal::constant(&imm.uimm8, 0x00); + let uimm8_one = Literal::constant(&imm.uimm8, 0x01); + let uimm8_eight = Literal::constant(&imm.uimm8, 8); + let u128_zeroes = constant(vec![0x00; 16]); + let u128_ones = constant(vec![0xff; 16]); + let u128_seventies = constant(vec![0x70; 16]); + let a = var("a"); + let b = var("b"); + let c = var("c"); + let d = var("d"); + let e = var("e"); + let f = var("f"); + let g = var("g"); + let h = var("h"); + let x = var("x"); + let y = var("y"); + let z = var("z"); + + // Limit the SIMD vector size: eventually multiple vector sizes may be supported + // but for now only SSE-sized vectors are available. + let sse_vector_size: u64 = 128; + let allowed_simd_type = |t: &LaneType| t.lane_bits() >= 8 && t.lane_bits() < 128; + + // SIMD splat: 8-bits + for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 8) { + let splat_any8x16 = splat.bind(vector(ty, sse_vector_size)); + narrow.legalize( + def!(y = splat_any8x16(x)), + vec![ + // Move into the lowest 8 bits of an XMM register. + def!(a = scalar_to_vector(x)), + // Zero out a different XMM register; the shuffle mask for moving the lowest byte + // to all other byte lanes is 0x0. + def!(b = vconst(u128_zeroes)), + // PSHUFB takes two XMM operands, one of which is a shuffle mask (i.e. b). + def!(y = x86_pshufb(a, b)), + ], + ); + } + + // SIMD splat: 16-bits + for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 16) { + let splat_x16x8 = splat.bind(vector(ty, sse_vector_size)); + let raw_bitcast_any16x8_to_i32x4 = raw_bitcast + .bind(vector(I32, sse_vector_size)) + .bind(vector(ty, sse_vector_size)); + let raw_bitcast_i32x4_to_any16x8 = raw_bitcast + .bind(vector(ty, sse_vector_size)) + .bind(vector(I32, sse_vector_size)); + narrow.legalize( + def!(y = splat_x16x8(x)), + vec![ + // Move into the lowest 16 bits of an XMM register. + def!(a = scalar_to_vector(x)), + // Insert the value again but in the next lowest 16 bits. + def!(b = insertlane(a, x, uimm8_one)), + // No instruction emitted; pretend this is an I32x4 so we can use PSHUFD. + def!(c = raw_bitcast_any16x8_to_i32x4(b)), + // Broadcast the bytes in the XMM register with PSHUFD. + def!(d = x86_pshufd(c, uimm8_zero)), + // No instruction emitted; pretend this is an X16x8 again. + def!(y = raw_bitcast_i32x4_to_any16x8(d)), + ], + ); + } + + // SIMD splat: 32-bits + for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 32) { + let splat_any32x4 = splat.bind(vector(ty, sse_vector_size)); + narrow.legalize( + def!(y = splat_any32x4(x)), + vec![ + // Translate to an x86 MOV to get the value in an XMM register. + def!(a = scalar_to_vector(x)), + // Broadcast the bytes in the XMM register with PSHUFD. + def!(y = x86_pshufd(a, uimm8_zero)), + ], + ); + } + + // SIMD splat: 64-bits + for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 64) { + let splat_any64x2 = splat.bind(vector(ty, sse_vector_size)); + narrow.legalize( + def!(y = splat_any64x2(x)), + vec![ + // Move into the lowest 64 bits of an XMM register. + def!(a = scalar_to_vector(x)), + // Move into the highest 64 bits of the same XMM register. + def!(y = insertlane(a, x, uimm8_one)), + ], + ); + } + + // SIMD swizzle; the following inefficient implementation is due to the Wasm SIMD spec requiring + // mask indexes greater than 15 to have the same semantics as a 0 index. For the spec discussion, + // see https://github.com/WebAssembly/simd/issues/93. + { + let swizzle = swizzle.bind(vector(I8, sse_vector_size)); + narrow.legalize( + def!(a = swizzle(x, y)), + vec![ + def!(b = vconst(u128_seventies)), + def!(c = uadd_sat(y, b)), + def!(a = x86_pshufb(x, c)), + ], + ); + } + + // SIMD bnot + for ty in ValueType::all_lane_types().filter(allowed_simd_type) { + let bnot = bnot.bind(vector(ty, sse_vector_size)); + narrow.legalize( + def!(y = bnot(x)), + vec![def!(a = vconst(u128_ones)), def!(y = bxor(a, x))], + ); + } + + // SIMD shift right (arithmetic, i16x8 and i32x4) + for ty in &[I16, I32] { + let sshr = sshr.bind(vector(*ty, sse_vector_size)); + let bitcast_i64x2 = bitcast.bind(vector(I64, sse_vector_size)); + narrow.legalize( + def!(a = sshr(x, y)), + vec![def!(b = bitcast_i64x2(y)), def!(a = x86_psra(x, b))], + ); + } + // SIMD shift right (arithmetic, i8x16) + { + let sshr = sshr.bind(vector(I8, sse_vector_size)); + let bitcast_i64x2 = bitcast.bind(vector(I64, sse_vector_size)); + let raw_bitcast_i16x8 = raw_bitcast.bind(vector(I16, sse_vector_size)); + let raw_bitcast_i16x8_again = raw_bitcast.bind(vector(I16, sse_vector_size)); + narrow.legalize( + def!(z = sshr(x, y)), + vec![ + // Since we will use the high byte of each 16x8 lane, shift an extra 8 bits. + def!(a = iadd_imm(y, uimm8_eight)), + def!(b = bitcast_i64x2(a)), + // Take the low 8 bytes of x, duplicate them in 16x8 lanes, then shift right. + def!(c = x86_punpckl(x, x)), + def!(d = raw_bitcast_i16x8(c)), + def!(e = x86_psra(d, b)), + // Take the high 8 bytes of x, duplicate them in 16x8 lanes, then shift right. + def!(f = x86_punpckh(x, x)), + def!(g = raw_bitcast_i16x8_again(f)), + def!(h = x86_psra(g, b)), + // Re-pack the vector. + def!(z = snarrow(e, h)), + ], + ); + } + // SIMD shift right (arithmetic, i64x2) + { + let sshr_vector = sshr.bind(vector(I64, sse_vector_size)); + let sshr_scalar_lane0 = sshr.bind(I64); + let sshr_scalar_lane1 = sshr.bind(I64); + narrow.legalize( + def!(z = sshr_vector(x, y)), + vec![ + // Use scalar operations to shift the first lane. + def!(a = extractlane(x, uimm8_zero)), + def!(b = sshr_scalar_lane0(a, y)), + def!(c = insertlane(x, b, uimm8_zero)), + // Do the same for the second lane. + def!(d = extractlane(x, uimm8_one)), + def!(e = sshr_scalar_lane1(d, y)), + def!(z = insertlane(c, e, uimm8_one)), + ], + ); + } + + // SIMD select + for ty in ValueType::all_lane_types().filter(allowed_simd_type) { + let bitselect = bitselect.bind(vector(ty, sse_vector_size)); // must bind both x/y and c + narrow.legalize( + def!(d = bitselect(c, x, y)), + vec![ + def!(a = band(x, c)), + def!(b = band_not(y, c)), + def!(d = bor(a, b)), + ], + ); + } + + // SIMD vselect; replace with bitselect if BLEND* instructions are not available. + // This works, because each lane of boolean vector is filled with zeroes or ones. + for ty in ValueType::all_lane_types().filter(allowed_simd_type) { + let vselect = vselect.bind(vector(ty, sse_vector_size)); + let raw_bitcast = raw_bitcast.bind(vector(ty, sse_vector_size)); + narrow.legalize( + def!(d = vselect(c, x, y)), + vec![def!(a = raw_bitcast(c)), def!(d = bitselect(a, x, y))], + ); + } + + // SIMD vany_true + let ne = Literal::enumerator_for(&imm.intcc, "ne"); + for ty in ValueType::all_lane_types().filter(allowed_simd_type) { + let vany_true = vany_true.bind(vector(ty, sse_vector_size)); + narrow.legalize( + def!(y = vany_true(x)), + vec![def!(a = x86_ptest(x, x)), def!(y = trueif(ne, a))], + ); + } + + // SIMD vall_true + let eq = Literal::enumerator_for(&imm.intcc, "eq"); + for ty in ValueType::all_lane_types().filter(allowed_simd_type) { + let vall_true = vall_true.bind(vector(ty, sse_vector_size)); + if ty.is_int() { + // In the common case (Wasm's integer-only all_true), we do not require a + // bitcast. + narrow.legalize( + def!(y = vall_true(x)), + vec![ + def!(a = vconst(u128_zeroes)), + def!(c = icmp(eq, x, a)), + def!(d = x86_ptest(c, c)), + def!(y = trueif(eq, d)), + ], + ); + } else { + // However, to support other types we must bitcast them to an integer vector to + // use icmp. + let lane_type_as_int = LaneType::int_from_bits(ty.lane_bits() as u16); + let raw_bitcast_to_int = raw_bitcast.bind(vector(lane_type_as_int, sse_vector_size)); + narrow.legalize( + def!(y = vall_true(x)), + vec![ + def!(a = vconst(u128_zeroes)), + def!(b = raw_bitcast_to_int(x)), + def!(c = icmp(eq, b, a)), + def!(d = x86_ptest(c, c)), + def!(y = trueif(eq, d)), + ], + ); + } + } + + // SIMD icmp ne + let ne = Literal::enumerator_for(&imm.intcc, "ne"); + for ty in ValueType::all_lane_types().filter(|ty| allowed_simd_type(ty) && ty.is_int()) { + let icmp_ = icmp.bind(vector(ty, sse_vector_size)); + narrow.legalize( + def!(c = icmp_(ne, a, b)), + vec![def!(x = icmp(eq, a, b)), def!(c = bnot(x))], + ); + } + + // SIMD icmp greater-/less-than + let sgt = Literal::enumerator_for(&imm.intcc, "sgt"); + let ugt = Literal::enumerator_for(&imm.intcc, "ugt"); + let sge = Literal::enumerator_for(&imm.intcc, "sge"); + let uge = Literal::enumerator_for(&imm.intcc, "uge"); + let slt = Literal::enumerator_for(&imm.intcc, "slt"); + let ult = Literal::enumerator_for(&imm.intcc, "ult"); + let sle = Literal::enumerator_for(&imm.intcc, "sle"); + let ule = Literal::enumerator_for(&imm.intcc, "ule"); + for ty in &[I8, I16, I32] { + // greater-than + let icmp_ = icmp.bind(vector(*ty, sse_vector_size)); + narrow.legalize( + def!(c = icmp_(ugt, a, b)), + vec![ + def!(x = x86_pmaxu(a, b)), + def!(y = icmp(eq, x, b)), + def!(c = bnot(y)), + ], + ); + let icmp_ = icmp.bind(vector(*ty, sse_vector_size)); + narrow.legalize( + def!(c = icmp_(sge, a, b)), + vec![def!(x = x86_pmins(a, b)), def!(c = icmp(eq, x, b))], + ); + let icmp_ = icmp.bind(vector(*ty, sse_vector_size)); + narrow.legalize( + def!(c = icmp_(uge, a, b)), + vec![def!(x = x86_pminu(a, b)), def!(c = icmp(eq, x, b))], + ); + + // less-than + let icmp_ = icmp.bind(vector(*ty, sse_vector_size)); + narrow.legalize(def!(c = icmp_(slt, a, b)), vec![def!(c = icmp(sgt, b, a))]); + let icmp_ = icmp.bind(vector(*ty, sse_vector_size)); + narrow.legalize(def!(c = icmp_(ult, a, b)), vec![def!(c = icmp(ugt, b, a))]); + let icmp_ = icmp.bind(vector(*ty, sse_vector_size)); + narrow.legalize(def!(c = icmp_(sle, a, b)), vec![def!(c = icmp(sge, b, a))]); + let icmp_ = icmp.bind(vector(*ty, sse_vector_size)); + narrow.legalize(def!(c = icmp_(ule, a, b)), vec![def!(c = icmp(uge, b, a))]); + } + + // SIMD integer min/max + for ty in &[I8, I16, I32] { + let imin = imin.bind(vector(*ty, sse_vector_size)); + narrow.legalize(def!(c = imin(a, b)), vec![def!(c = x86_pmins(a, b))]); + let umin = umin.bind(vector(*ty, sse_vector_size)); + narrow.legalize(def!(c = umin(a, b)), vec![def!(c = x86_pminu(a, b))]); + let imax = imax.bind(vector(*ty, sse_vector_size)); + narrow.legalize(def!(c = imax(a, b)), vec![def!(c = x86_pmaxs(a, b))]); + let umax = umax.bind(vector(*ty, sse_vector_size)); + narrow.legalize(def!(c = umax(a, b)), vec![def!(c = x86_pmaxu(a, b))]); + } + + // SIMD fcmp greater-/less-than + let gt = Literal::enumerator_for(&imm.floatcc, "gt"); + let lt = Literal::enumerator_for(&imm.floatcc, "lt"); + let ge = Literal::enumerator_for(&imm.floatcc, "ge"); + let le = Literal::enumerator_for(&imm.floatcc, "le"); + let ugt = Literal::enumerator_for(&imm.floatcc, "ugt"); + let ult = Literal::enumerator_for(&imm.floatcc, "ult"); + let uge = Literal::enumerator_for(&imm.floatcc, "uge"); + let ule = Literal::enumerator_for(&imm.floatcc, "ule"); + for ty in &[F32, F64] { + let fcmp_ = fcmp.bind(vector(*ty, sse_vector_size)); + narrow.legalize(def!(c = fcmp_(gt, a, b)), vec![def!(c = fcmp(lt, b, a))]); + let fcmp_ = fcmp.bind(vector(*ty, sse_vector_size)); + narrow.legalize(def!(c = fcmp_(ge, a, b)), vec![def!(c = fcmp(le, b, a))]); + let fcmp_ = fcmp.bind(vector(*ty, sse_vector_size)); + narrow.legalize(def!(c = fcmp_(ult, a, b)), vec![def!(c = fcmp(ugt, b, a))]); + let fcmp_ = fcmp.bind(vector(*ty, sse_vector_size)); + narrow.legalize(def!(c = fcmp_(ule, a, b)), vec![def!(c = fcmp(uge, b, a))]); + } + + for ty in &[F32, F64] { + let fneg = fneg.bind(vector(*ty, sse_vector_size)); + let lane_type_as_int = LaneType::int_from_bits(LaneType::from(*ty).lane_bits() as u16); + let uimm8_shift = Literal::constant(&imm.uimm8, lane_type_as_int.lane_bits() as i64 - 1); + let vconst = vconst.bind(vector(lane_type_as_int, sse_vector_size)); + let bitcast_to_float = raw_bitcast.bind(vector(*ty, sse_vector_size)); + narrow.legalize( + def!(b = fneg(a)), + vec![ + def!(c = vconst(u128_ones)), + def!(d = ishl_imm(c, uimm8_shift)), // Create a mask of all 0s except the MSB. + def!(e = bitcast_to_float(d)), // Cast mask to the floating-point type. + def!(b = bxor(a, e)), // Flip the MSB. + ], + ); + } + + // SIMD fabs + for ty in &[F32, F64] { + let fabs = fabs.bind(vector(*ty, sse_vector_size)); + let lane_type_as_int = LaneType::int_from_bits(LaneType::from(*ty).lane_bits() as u16); + let vconst = vconst.bind(vector(lane_type_as_int, sse_vector_size)); + let bitcast_to_float = raw_bitcast.bind(vector(*ty, sse_vector_size)); + narrow.legalize( + def!(b = fabs(a)), + vec![ + def!(c = vconst(u128_ones)), + def!(d = ushr_imm(c, uimm8_one)), // Create a mask of all 1s except the MSB. + def!(e = bitcast_to_float(d)), // Cast mask to the floating-point type. + def!(b = band(a, e)), // Unset the MSB. + ], + ); + } + + // SIMD widen + for ty in &[I8, I16] { + let swiden_high = swiden_high.bind(vector(*ty, sse_vector_size)); + narrow.legalize( + def!(b = swiden_high(a)), + vec![ + def!(c = x86_palignr(a, a, uimm8_eight)), + def!(b = swiden_low(c)), + ], + ); + let uwiden_high = uwiden_high.bind(vector(*ty, sse_vector_size)); + narrow.legalize( + def!(b = uwiden_high(a)), + vec![ + def!(c = x86_palignr(a, a, uimm8_eight)), + def!(b = uwiden_low(c)), + ], + ); + } + + narrow.custom_legalize(shuffle, "convert_shuffle"); + narrow.custom_legalize(extractlane, "convert_extractlane"); + narrow.custom_legalize(insertlane, "convert_insertlane"); + narrow.custom_legalize(ineg, "convert_ineg"); + narrow.custom_legalize(ushr, "convert_ushr"); + narrow.custom_legalize(ishl, "convert_ishl"); + narrow.custom_legalize(fcvt_to_sint_sat, "expand_fcvt_to_sint_sat_vector"); + narrow.custom_legalize(fmin, "expand_minmax_vector"); + narrow.custom_legalize(fmax, "expand_minmax_vector"); + narrow.custom_legalize(load_splat, "expand_load_splat"); + + narrow_avx.custom_legalize(imul, "convert_i64x2_imul"); + narrow_avx.custom_legalize(fcvt_from_uint, "expand_fcvt_from_uint_vector"); + narrow_avx.custom_legalize(fcvt_to_uint_sat, "expand_fcvt_to_uint_sat_vector"); +} diff --git a/third_party/rust/cranelift-codegen-meta/src/isa/x86/mod.rs b/third_party/rust/cranelift-codegen-meta/src/isa/x86/mod.rs new file mode 100644 index 0000000000..a272e83900 --- /dev/null +++ b/third_party/rust/cranelift-codegen-meta/src/isa/x86/mod.rs @@ -0,0 +1,88 @@ +use crate::cdsl::cpu_modes::CpuMode; +use crate::cdsl::isa::TargetIsa; +use crate::cdsl::types::{ReferenceType, VectorType}; + +use crate::shared::types::Bool::B1; +use crate::shared::types::Float::{F32, F64}; +use crate::shared::types::Int::{I16, I32, I64, I8}; +use crate::shared::types::Reference::{R32, R64}; +use crate::shared::Definitions as SharedDefinitions; + +mod encodings; +mod instructions; +mod legalize; +mod opcodes; +mod recipes; +mod registers; +pub(crate) mod settings; + +pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa { + let settings = settings::define(&shared_defs.settings); + let regs = registers::define(); + + let inst_group = instructions::define( + &mut shared_defs.all_instructions, + &shared_defs.formats, + &shared_defs.imm, + &shared_defs.entities, + ); + legalize::define(shared_defs, &inst_group); + + // CPU modes for 32-bit and 64-bit operations. + let mut x86_64 = CpuMode::new("I64"); + let mut x86_32 = CpuMode::new("I32"); + + let expand_flags = shared_defs.transform_groups.by_name("expand_flags"); + let x86_widen = shared_defs.transform_groups.by_name("x86_widen"); + let x86_narrow = shared_defs.transform_groups.by_name("x86_narrow"); + let x86_narrow_avx = shared_defs.transform_groups.by_name("x86_narrow_avx"); + let x86_expand = shared_defs.transform_groups.by_name("x86_expand"); + + x86_32.legalize_monomorphic(expand_flags); + x86_32.legalize_default(x86_narrow); + x86_32.legalize_type(B1, expand_flags); + x86_32.legalize_type(I8, x86_widen); + x86_32.legalize_type(I16, x86_widen); + x86_32.legalize_type(I32, x86_expand); + x86_32.legalize_value_type(ReferenceType(R32), x86_expand); + x86_32.legalize_type(F32, x86_expand); + x86_32.legalize_type(F64, x86_expand); + x86_32.legalize_value_type(VectorType::new(I32.into(), 4), x86_narrow_avx); + x86_32.legalize_value_type(VectorType::new(I64.into(), 2), x86_narrow_avx); + x86_32.legalize_value_type(VectorType::new(F32.into(), 4), x86_narrow_avx); + + x86_64.legalize_monomorphic(expand_flags); + x86_64.legalize_default(x86_narrow); + x86_64.legalize_type(B1, expand_flags); + x86_64.legalize_type(I8, x86_widen); + x86_64.legalize_type(I16, x86_widen); + x86_64.legalize_type(I32, x86_expand); + x86_64.legalize_type(I64, x86_expand); + x86_64.legalize_value_type(ReferenceType(R64), x86_expand); + x86_64.legalize_type(F32, x86_expand); + x86_64.legalize_type(F64, x86_expand); + x86_64.legalize_value_type(VectorType::new(I32.into(), 4), x86_narrow_avx); + x86_64.legalize_value_type(VectorType::new(I64.into(), 2), x86_narrow_avx); + x86_64.legalize_value_type(VectorType::new(F32.into(), 4), x86_narrow_avx); + + let recipes = recipes::define(shared_defs, &settings, ®s); + + let encodings = encodings::define(shared_defs, &settings, &inst_group, &recipes); + x86_32.set_encodings(encodings.enc32); + x86_64.set_encodings(encodings.enc64); + let encodings_predicates = encodings.inst_pred_reg.extract(); + + let recipes = encodings.recipes; + + let cpu_modes = vec![x86_64, x86_32]; + + TargetIsa::new( + "x86", + inst_group, + settings, + regs, + recipes, + cpu_modes, + encodings_predicates, + ) +} diff --git a/third_party/rust/cranelift-codegen-meta/src/isa/x86/opcodes.rs b/third_party/rust/cranelift-codegen-meta/src/isa/x86/opcodes.rs new file mode 100644 index 0000000000..09c07c458f --- /dev/null +++ b/third_party/rust/cranelift-codegen-meta/src/isa/x86/opcodes.rs @@ -0,0 +1,721 @@ +//! Static, named definitions of instruction opcodes. + +/// Empty opcode for use as a default. +pub static EMPTY: [u8; 0] = []; + +/// Add with carry flag r{16,32,64} to r/m of the same size. +pub static ADC: [u8; 1] = [0x11]; + +/// Add r{16,32,64} to r/m of the same size. +pub static ADD: [u8; 1] = [0x01]; + +/// Add imm{16,32} to r/m{16,32,64}, possibly sign-extended. +pub static ADD_IMM: [u8; 1] = [0x81]; + +/// Add sign-extended imm8 to r/m{16,32,64}. +pub static ADD_IMM8_SIGN_EXTEND: [u8; 1] = [0x83]; + +/// Add packed double-precision floating-point values from xmm2/mem to xmm1 and store result in +/// xmm1 (SSE2). +pub static ADDPD: [u8; 3] = [0x66, 0x0f, 0x58]; + +/// Add packed single-precision floating-point values from xmm2/mem to xmm1 and store result in +/// xmm1 (SSE). +pub static ADDPS: [u8; 2] = [0x0f, 0x58]; + +/// Add the low double-precision floating-point value from xmm2/mem to xmm1 +/// and store the result in xmm1. +pub static ADDSD: [u8; 3] = [0xf2, 0x0f, 0x58]; + +/// Add the low single-precision floating-point value from xmm2/mem to xmm1 +/// and store the result in xmm1. +pub static ADDSS: [u8; 3] = [0xf3, 0x0f, 0x58]; + +/// r/m{16,32,64} AND register of the same size (Intel docs have a typo). +pub static AND: [u8; 1] = [0x21]; + +/// imm{16,32} AND r/m{16,32,64}, possibly sign-extended. +pub static AND_IMM: [u8; 1] = [0x81]; + +/// r/m{16,32,64} AND sign-extended imm8. +pub static AND_IMM8_SIGN_EXTEND: [u8; 1] = [0x83]; + +/// Return the bitwise logical AND NOT of packed single-precision floating-point +/// values in xmm1 and xmm2/mem. +pub static ANDNPS: [u8; 2] = [0x0f, 0x55]; + +/// Return the bitwise logical AND of packed single-precision floating-point values +/// in xmm1 and xmm2/mem. +pub static ANDPS: [u8; 2] = [0x0f, 0x54]; + +/// Bit scan forward (stores index of first encountered 1 from the front). +pub static BIT_SCAN_FORWARD: [u8; 2] = [0x0f, 0xbc]; + +/// Bit scan reverse (stores index of first encountered 1 from the back). +pub static BIT_SCAN_REVERSE: [u8; 2] = [0x0f, 0xbd]; + +/// Select packed single-precision floating-point values from xmm1 and xmm2/m128 +/// from mask specified in XMM0 and store the values into xmm1 (SSE4.1). +pub static BLENDVPS: [u8; 4] = [0x66, 0x0f, 0x38, 0x14]; + +/// Select packed double-precision floating-point values from xmm1 and xmm2/m128 +/// from mask specified in XMM0 and store the values into xmm1 (SSE4.1). +pub static BLENDVPD: [u8; 4] = [0x66, 0x0f, 0x38, 0x15]; + +/// Call near, relative, displacement relative to next instruction (sign-extended). +pub static CALL_RELATIVE: [u8; 1] = [0xe8]; + +/// Move r/m{16,32,64} if overflow (OF=1). +pub static CMOV_OVERFLOW: [u8; 2] = [0x0f, 0x40]; + +/// Compare imm{16,32} with r/m{16,32,64} (sign-extended if 64). +pub static CMP_IMM: [u8; 1] = [0x81]; + +/// Compare imm8 with r/m{16,32,64}. +pub static CMP_IMM8: [u8; 1] = [0x83]; + +/// Compare r{16,32,64} with r/m of the same size. +pub static CMP_REG: [u8; 1] = [0x39]; + +/// Compare packed double-precision floating-point value in xmm2/m32 and xmm1 using bits 2:0 of +/// imm8 as comparison predicate (SSE2). +pub static CMPPD: [u8; 3] = [0x66, 0x0f, 0xc2]; + +/// Compare packed single-precision floating-point value in xmm2/m32 and xmm1 using bits 2:0 of +/// imm8 as comparison predicate (SSE). +pub static CMPPS: [u8; 2] = [0x0f, 0xc2]; + +/// Convert four packed signed doubleword integers from xmm2/mem to four packed single-precision +/// floating-point values in xmm1 (SSE2). +pub static CVTDQ2PS: [u8; 2] = [0x0f, 0x5b]; + +/// Convert scalar double-precision floating-point value to scalar single-precision +/// floating-point value. +pub static CVTSD2SS: [u8; 3] = [0xf2, 0x0f, 0x5a]; + +/// Convert doubleword integer to scalar double-precision floating-point value. +pub static CVTSI2SD: [u8; 3] = [0xf2, 0x0f, 0x2a]; + +/// Convert doubleword integer to scalar single-precision floating-point value. +pub static CVTSI2SS: [u8; 3] = [0xf3, 0x0f, 0x2a]; + +/// Convert scalar single-precision floating-point value to scalar double-precision +/// float-point value. +pub static CVTSS2SD: [u8; 3] = [0xf3, 0x0f, 0x5a]; + +/// Convert four packed single-precision floating-point values from xmm2/mem to four packed signed +/// doubleword values in xmm1 using truncation (SSE2). +pub static CVTTPS2DQ: [u8; 3] = [0xf3, 0x0f, 0x5b]; + +/// Convert with truncation scalar double-precision floating-point value to signed +/// integer. +pub static CVTTSD2SI: [u8; 3] = [0xf2, 0x0f, 0x2c]; + +/// Convert with truncation scalar single-precision floating-point value to integer. +pub static CVTTSS2SI: [u8; 3] = [0xf3, 0x0f, 0x2c]; + +/// Unsigned divide for {16,32,64}-bit. +pub static DIV: [u8; 1] = [0xf7]; + +/// Divide packed double-precision floating-point values in xmm1 by packed double-precision +/// floating-point values in xmm2/mem (SSE2). +pub static DIVPD: [u8; 3] = [0x66, 0x0f, 0x5e]; + +/// Divide packed single-precision floating-point values in xmm1 by packed single-precision +/// floating-point values in xmm2/mem (SSE). +pub static DIVPS: [u8; 2] = [0x0f, 0x5e]; + +/// Divide low double-precision floating-point value in xmm1 by low double-precision +/// floating-point value in xmm2/m64. +pub static DIVSD: [u8; 3] = [0xf2, 0x0f, 0x5e]; + +/// Divide low single-precision floating-point value in xmm1 by low single-precision +/// floating-point value in xmm2/m32. +pub static DIVSS: [u8; 3] = [0xf3, 0x0f, 0x5e]; + +/// Signed divide for {16,32,64}-bit. +pub static IDIV: [u8; 1] = [0xf7]; + +/// Signed multiply for {16,32,64}-bit, generic registers. +pub static IMUL: [u8; 2] = [0x0f, 0xaf]; + +/// Signed multiply for {16,32,64}-bit, storing into RDX:RAX. +pub static IMUL_RDX_RAX: [u8; 1] = [0xf7]; + +/// Insert scalar single-precision floating-point value. +pub static INSERTPS: [u8; 4] = [0x66, 0x0f, 0x3a, 0x21]; + +/// Either: +/// 1. Jump near, absolute indirect, RIP = 64-bit offset from register or memory. +/// 2. Jump far, absolute indirect, address given in m16:64. +pub static JUMP_ABSOLUTE: [u8; 1] = [0xff]; + +/// Jump near, relative, RIP = RIP + 32-bit displacement sign extended to 64 bits. +pub static JUMP_NEAR_RELATIVE: [u8; 1] = [0xe9]; + +/// Jump near (rel32) if overflow (OF=1). +pub static JUMP_NEAR_IF_OVERFLOW: [u8; 2] = [0x0f, 0x80]; + +/// Jump short, relative, RIP = RIP + 8-bit displacement sign extended to 64 bits. +pub static JUMP_SHORT: [u8; 1] = [0xeb]; + +/// Jump short (rel8) if equal (ZF=1). +pub static JUMP_SHORT_IF_EQUAL: [u8; 1] = [0x74]; + +/// Jump short (rel8) if not equal (ZF=0). +pub static JUMP_SHORT_IF_NOT_EQUAL: [u8; 1] = [0x75]; + +/// Jump short (rel8) if overflow (OF=1). +pub static JUMP_SHORT_IF_OVERFLOW: [u8; 1] = [0x70]; + +/// Store effective address for m in register r{16,32,64}. +pub static LEA: [u8; 1] = [0x8d]; + +/// Count the number of leading zero bits. +pub static LZCNT: [u8; 3] = [0xf3, 0x0f, 0xbd]; + +/// Return the maximum packed double-precision floating-point values between xmm1 and xmm2/m128 +/// (SSE2). +pub static MAXPD: [u8; 3] = [0x66, 0x0f, 0x5f]; + +/// Return the maximum packed single-precision floating-point values between xmm1 and xmm2/m128 +/// (SSE). +pub static MAXPS: [u8; 2] = [0x0f, 0x5f]; + +/// Return the maximum scalar double-precision floating-point value between +/// xmm2/m64 and xmm1. +pub static MAXSD: [u8; 3] = [0xf2, 0x0f, 0x5f]; + +/// Return the maximum scalar single-precision floating-point value between +/// xmm2/m32 and xmm1. +pub static MAXSS: [u8; 3] = [0xf3, 0x0f, 0x5f]; + +/// Return the minimum packed double-precision floating-point values between xmm1 and xmm2/m128 +/// (SSE2). +pub static MINPD: [u8; 3] = [0x66, 0x0f, 0x5d]; + +/// Return the minimum packed single-precision floating-point values between xmm1 and xmm2/m128 +/// (SSE). +pub static MINPS: [u8; 2] = [0x0f, 0x5d]; + +/// Return the minimum scalar double-precision floating-point value between +/// xmm2/m64 and xmm1. +pub static MINSD: [u8; 3] = [0xf2, 0x0f, 0x5d]; + +/// Return the minimum scalar single-precision floating-point value between +/// xmm2/m32 and xmm1. +pub static MINSS: [u8; 3] = [0xf3, 0x0f, 0x5d]; + +/// Move r8 to r/m8. +pub static MOV_BYTE_STORE: [u8; 1] = [0x88]; + +/// Move imm{16,32,64} to same-sized register. +pub static MOV_IMM: [u8; 1] = [0xb8]; + +/// Move imm{16,32} to r{16,32,64}, sign-extended if 64-bit target. +pub static MOV_IMM_SIGNEXTEND: [u8; 1] = [0xc7]; + +/// Move {r/m16, r/m32, r/m64} to same-sized register. +pub static MOV_LOAD: [u8; 1] = [0x8b]; + +/// Move r16 to r/m16. +pub static MOV_STORE_16: [u8; 2] = [0x66, 0x89]; + +/// Move {r16, r32, r64} to same-sized register or memory. +pub static MOV_STORE: [u8; 1] = [0x89]; + +/// Move aligned packed single-precision floating-point values from x/m to xmm (SSE). +pub static MOVAPS_LOAD: [u8; 2] = [0x0f, 0x28]; + +/// Move doubleword from r/m32 to xmm (SSE2). Quadword with REX prefix. +pub static MOVD_LOAD_XMM: [u8; 3] = [0x66, 0x0f, 0x6e]; + +/// Move doubleword from xmm to r/m32 (SSE2). Quadword with REX prefix. +pub static MOVD_STORE_XMM: [u8; 3] = [0x66, 0x0f, 0x7e]; + +/// Move packed single-precision floating-point values low to high (SSE). +pub static MOVLHPS: [u8; 2] = [0x0f, 0x16]; + +/// Move scalar double-precision floating-point value (from reg/mem to reg). +pub static MOVSD_LOAD: [u8; 3] = [0xf2, 0x0f, 0x10]; + +/// Move scalar double-precision floating-point value (from reg to reg/mem). +pub static MOVSD_STORE: [u8; 3] = [0xf2, 0x0f, 0x11]; + +/// Move scalar single-precision floating-point value (from reg to reg/mem). +pub static MOVSS_STORE: [u8; 3] = [0xf3, 0x0f, 0x11]; + +/// Move scalar single-precision floating-point-value (from reg/mem to reg). +pub static MOVSS_LOAD: [u8; 3] = [0xf3, 0x0f, 0x10]; + +/// Move byte to register with sign-extension. +pub static MOVSX_BYTE: [u8; 2] = [0x0f, 0xbe]; + +/// Move word to register with sign-extension. +pub static MOVSX_WORD: [u8; 2] = [0x0f, 0xbf]; + +/// Move doubleword to register with sign-extension. +pub static MOVSXD: [u8; 1] = [0x63]; + +/// Move unaligned packed single-precision floating-point from x/m to xmm (SSE). +pub static MOVUPS_LOAD: [u8; 2] = [0x0f, 0x10]; + +/// Move unaligned packed single-precision floating-point value from xmm to x/m (SSE). +pub static MOVUPS_STORE: [u8; 2] = [0x0f, 0x11]; + +/// Move byte to register with zero-extension. +pub static MOVZX_BYTE: [u8; 2] = [0x0f, 0xb6]; + +/// Move word to register with zero-extension. +pub static MOVZX_WORD: [u8; 2] = [0x0f, 0xb7]; + +/// Unsigned multiply for {16,32,64}-bit. +pub static MUL: [u8; 1] = [0xf7]; + +/// Multiply packed double-precision floating-point values from xmm2/mem to xmm1 and store result +/// in xmm1 (SSE2). +pub static MULPD: [u8; 3] = [0x66, 0x0f, 0x59]; + +/// Multiply packed single-precision floating-point values from xmm2/mem to xmm1 and store result +/// in xmm1 (SSE). +pub static MULPS: [u8; 2] = [0x0f, 0x59]; + +/// Multiply the low double-precision floating-point value in xmm2/m64 by the +/// low double-precision floating-point value in xmm1. +pub static MULSD: [u8; 3] = [0xf2, 0x0f, 0x59]; + +/// Multiply the low single-precision floating-point value in xmm2/m32 by the +/// low single-precision floating-point value in xmm1. +pub static MULSS: [u8; 3] = [0xf3, 0x0f, 0x59]; + +/// Reverse each bit of r/m{16,32,64}. +pub static NOT: [u8; 1] = [0xf7]; + +/// r{16,32,64} OR register of same size. +pub static OR: [u8; 1] = [0x09]; + +/// imm{16,32} OR r/m{16,32,64}, possibly sign-extended. +pub static OR_IMM: [u8; 1] = [0x81]; + +/// r/m{16,32,64} OR sign-extended imm8. +pub static OR_IMM8_SIGN_EXTEND: [u8; 1] = [0x83]; + +/// Return the bitwise logical OR of packed single-precision values in xmm and x/m (SSE). +pub static ORPS: [u8; 2] = [0x0f, 0x56]; + +/// Compute the absolute value of bytes in xmm2/m128 and store the unsigned result in xmm1 (SSSE3). +pub static PABSB: [u8; 4] = [0x66, 0x0f, 0x38, 0x1c]; + +/// Compute the absolute value of 32-bit integers in xmm2/m128 and store the unsigned result in +/// xmm1 (SSSE3). +pub static PABSD: [u8; 4] = [0x66, 0x0f, 0x38, 0x1e]; + +/// Compute the absolute value of 16-bit integers in xmm2/m128 and store the unsigned result in +/// xmm1 (SSSE3). +pub static PABSW: [u8; 4] = [0x66, 0x0f, 0x38, 0x1d]; + +/// Converts 8 packed signed word integers from xmm1 and from xmm2/m128 into 16 packed signed byte +/// integers in xmm1 using signed saturation (SSE2). +pub static PACKSSWB: [u8; 3] = [0x66, 0x0f, 0x63]; + +/// Converts 4 packed signed doubleword integers from xmm1 and from xmm2/m128 into 8 packed signed +/// word integers in xmm1 using signed saturation (SSE2). +pub static PACKSSDW: [u8; 3] = [0x66, 0x0f, 0x6b]; + +/// Converts 8 packed signed word integers from xmm1 and from xmm2/m128 into 16 packed unsigned byte +/// integers in xmm1 using unsigned saturation (SSE2). +pub static PACKUSWB: [u8; 3] = [0x66, 0x0f, 0x67]; + +/// Converts 4 packed signed doubleword integers from xmm1 and from xmm2/m128 into 8 unpacked signed +/// word integers in xmm1 using unsigned saturation (SSE4.1). +pub static PACKUSDW: [u8; 4] = [0x66, 0x0f, 0x38, 0x2b]; + +/// Add packed byte integers from xmm2/m128 and xmm1 (SSE2). +pub static PADDB: [u8; 3] = [0x66, 0x0f, 0xfc]; + +/// Add packed doubleword integers from xmm2/m128 and xmm1 (SSE2). +pub static PADDD: [u8; 3] = [0x66, 0x0f, 0xfe]; + +/// Add packed quadword integers from xmm2/m128 and xmm1 (SSE2). +pub static PADDQ: [u8; 3] = [0x66, 0x0f, 0xd4]; + +/// Add packed word integers from xmm2/m128 and xmm1 (SSE2). +pub static PADDW: [u8; 3] = [0x66, 0x0f, 0xfd]; + +/// Add packed signed byte integers from xmm2/m128 and xmm1 saturate the results (SSE). +pub static PADDSB: [u8; 3] = [0x66, 0x0f, 0xec]; + +/// Add packed signed word integers from xmm2/m128 and xmm1 saturate the results (SSE). +pub static PADDSW: [u8; 3] = [0x66, 0x0f, 0xed]; + +/// Add packed unsigned byte integers from xmm2/m128 and xmm1 saturate the results (SSE). +pub static PADDUSB: [u8; 3] = [0x66, 0x0f, 0xdc]; + +/// Add packed unsigned word integers from xmm2/m128 and xmm1 saturate the results (SSE). +pub static PADDUSW: [u8; 3] = [0x66, 0x0f, 0xdd]; + +/// Concatenate destination and source operands, extract a byte-aligned result into xmm1 that is +/// shifted to the right by the constant number of bytes in imm8 (SSSE3). +pub static PALIGNR: [u8; 4] = [0x66, 0x0f, 0x3a, 0x0f]; + +/// Bitwise AND of xmm2/m128 and xmm1 (SSE2). +pub static PAND: [u8; 3] = [0x66, 0x0f, 0xdb]; + +/// Bitwise AND NOT of xmm2/m128 and xmm1 (SSE2). +pub static PANDN: [u8; 3] = [0x66, 0x0f, 0xdf]; + +/// Average packed unsigned byte integers from xmm2/m128 and xmm1 with rounding (SSE2). +pub static PAVGB: [u8; 3] = [0x66, 0x0f, 0xE0]; + +/// Average packed unsigned word integers from xmm2/m128 and xmm1 with rounding (SSE2). +pub static PAVGW: [u8; 3] = [0x66, 0x0f, 0xE3]; + +/// Select byte values from xmm1 and xmm2/m128 from mask specified in the high bit of each byte +/// in XMM0 and store the values into xmm1 (SSE4.1). +pub static PBLENDVB: [u8; 4] = [0x66, 0x0f, 0x38, 0x10]; + +/// Select words from xmm1 and xmm2/m128 from mask specified in imm8 and store the values into xmm1 +/// (SSE4.1). +pub static PBLENDW: [u8; 4] = [0x66, 0x0f, 0x3a, 0x0e]; + +/// Compare packed data for equal (SSE2). +pub static PCMPEQB: [u8; 3] = [0x66, 0x0f, 0x74]; + +/// Compare packed data for equal (SSE2). +pub static PCMPEQD: [u8; 3] = [0x66, 0x0f, 0x76]; + +/// Compare packed data for equal (SSE4.1). +pub static PCMPEQQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x29]; + +/// Compare packed data for equal (SSE2). +pub static PCMPEQW: [u8; 3] = [0x66, 0x0f, 0x75]; + +/// Compare packed signed byte integers for greater than (SSE2). +pub static PCMPGTB: [u8; 3] = [0x66, 0x0f, 0x64]; + +/// Compare packed signed doubleword integers for greater than (SSE2). +pub static PCMPGTD: [u8; 3] = [0x66, 0x0f, 0x66]; + +/// Compare packed signed quadword integers for greater than (SSE4.2). +pub static PCMPGTQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x37]; + +/// Compare packed signed word integers for greater than (SSE2). +pub static PCMPGTW: [u8; 3] = [0x66, 0x0f, 0x65]; + +/// Extract doubleword or quadword, depending on REX.W (SSE4.1). +pub static PEXTR: [u8; 4] = [0x66, 0x0f, 0x3a, 0x16]; + +/// Extract byte (SSE4.1). +pub static PEXTRB: [u8; 4] = [0x66, 0x0f, 0x3a, 0x14]; + +/// Extract word (SSE4.1). There is a 3-byte SSE2 variant that can also move to m/16. +pub static PEXTRW: [u8; 4] = [0x66, 0x0f, 0x3a, 0x15]; + +/// Insert doubleword or quadword, depending on REX.W (SSE4.1). +pub static PINSR: [u8; 4] = [0x66, 0x0f, 0x3a, 0x22]; + +/// Insert byte (SSE4.1). +pub static PINSRB: [u8; 4] = [0x66, 0x0f, 0x3a, 0x20]; + +/// Insert word (SSE2). +pub static PINSRW: [u8; 3] = [0x66, 0x0f, 0xc4]; + +/// Compare packed signed byte integers in xmm1 and xmm2/m128 and store packed maximum values in +/// xmm1 (SSE4.1). +pub static PMAXSB: [u8; 4] = [0x66, 0x0f, 0x38, 0x3c]; + +/// Compare packed signed doubleword integers in xmm1 and xmm2/m128 and store packed maximum +/// values in xmm1 (SSE4.1). +pub static PMAXSD: [u8; 4] = [0x66, 0x0f, 0x38, 0x3d]; + +/// Compare packed signed word integers in xmm1 and xmm2/m128 and store packed maximum values in +/// xmm1 (SSE2). +pub static PMAXSW: [u8; 3] = [0x66, 0x0f, 0xee]; + +/// Compare packed unsigned byte integers in xmm1 and xmm2/m128 and store packed maximum values in +/// xmm1 (SSE2). +pub static PMAXUB: [u8; 3] = [0x66, 0x0f, 0xde]; + +/// Compare packed unsigned doubleword integers in xmm1 and xmm2/m128 and store packed maximum +/// values in xmm1 (SSE4.1). +pub static PMAXUD: [u8; 4] = [0x66, 0x0f, 0x38, 0x3f]; + +/// Compare packed unsigned word integers in xmm1 and xmm2/m128 and store packed maximum values in +/// xmm1 (SSE4.1). +pub static PMAXUW: [u8; 4] = [0x66, 0x0f, 0x38, 0x3e]; + +/// Compare packed signed byte integers in xmm1 and xmm2/m128 and store packed minimum values in +/// xmm1 (SSE4.1). +pub static PMINSB: [u8; 4] = [0x66, 0x0f, 0x38, 0x38]; + +/// Compare packed signed doubleword integers in xmm1 and xmm2/m128 and store packed minimum +/// values in xmm1 (SSE4.1). +pub static PMINSD: [u8; 4] = [0x66, 0x0f, 0x38, 0x39]; + +/// Compare packed signed word integers in xmm1 and xmm2/m128 and store packed minimum values in +/// xmm1 (SSE2). +pub static PMINSW: [u8; 3] = [0x66, 0x0f, 0xea]; + +/// Compare packed unsigned byte integers in xmm1 and xmm2/m128 and store packed minimum values in +/// xmm1 (SSE2). +pub static PMINUB: [u8; 3] = [0x66, 0x0f, 0xda]; + +/// Compare packed unsigned doubleword integers in xmm1 and xmm2/m128 and store packed minimum +/// values in xmm1 (SSE4.1). +pub static PMINUD: [u8; 4] = [0x66, 0x0f, 0x38, 0x3b]; + +/// Compare packed unsigned word integers in xmm1 and xmm2/m128 and store packed minimum values in +/// xmm1 (SSE4.1). +pub static PMINUW: [u8; 4] = [0x66, 0x0f, 0x38, 0x3a]; + +/// Sign extend 8 packed 8-bit integers in the low 8 bytes of xmm2/m64 to 8 packed 16-bit +/// integers in xmm1 (SSE4.1). +pub static PMOVSXBW: [u8; 4] = [0x66, 0x0f, 0x38, 0x20]; + +/// Sign extend 4 packed 16-bit integers in the low 8 bytes of xmm2/m64 to 4 packed 32-bit +/// integers in xmm1 (SSE4.1). +pub static PMOVSXWD: [u8; 4] = [0x66, 0x0f, 0x38, 0x23]; + +/// Sign extend 2 packed 32-bit integers in the low 8 bytes of xmm2/m64 to 2 packed 64-bit +/// integers in xmm1 (SSE4.1). +pub static PMOVSXDQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x25]; + +/// Zero extend 8 packed 8-bit integers in the low 8 bytes of xmm2/m64 to 8 packed 16-bit +/// integers in xmm1 (SSE4.1). +pub static PMOVZXBW: [u8; 4] = [0x66, 0x0f, 0x38, 0x30]; + +/// Zero extend 4 packed 16-bit integers in the low 8 bytes of xmm2/m64 to 4 packed 32-bit +/// integers in xmm1 (SSE4.1). +pub static PMOVZXWD: [u8; 4] = [0x66, 0x0f, 0x38, 0x33]; + +/// Zero extend 2 packed 32-bit integers in the low 8 bytes of xmm2/m64 to 2 packed 64-bit +/// integers in xmm1 (SSE4.1). +pub static PMOVZXDQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x35]; + +/// Multiply the packed signed word integers in xmm1 and xmm2/m128, and store the low 16 bits of +/// the results in xmm1 (SSE2). +pub static PMULLW: [u8; 3] = [0x66, 0x0f, 0xd5]; + +/// Multiply the packed doubleword signed integers in xmm1 and xmm2/m128 and store the low 32 +/// bits of each product in xmm1 (SSE4.1). +pub static PMULLD: [u8; 4] = [0x66, 0x0f, 0x38, 0x40]; + +/// Multiply the packed quadword signed integers in xmm2 and xmm3/m128 and store the low 64 +/// bits of each product in xmm1 (AVX512VL/DQ). Requires an EVEX encoding. +pub static VPMULLQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x40]; + +/// Multiply packed unsigned doubleword integers in xmm1 by packed unsigned doubleword integers +/// in xmm2/m128, and store the quadword results in xmm1 (SSE2). +pub static PMULUDQ: [u8; 3] = [0x66, 0x0f, 0xf4]; + +/// Pop top of stack into r{16,32,64}; increment stack pointer. +pub static POP_REG: [u8; 1] = [0x58]; + +/// Returns the count of number of bits set to 1. +pub static POPCNT: [u8; 3] = [0xf3, 0x0f, 0xb8]; + +/// Bitwise OR of xmm2/m128 and xmm1 (SSE2). +pub static POR: [u8; 3] = [0x66, 0x0f, 0xeb]; + +/// Shuffle bytes in xmm1 according to contents of xmm2/m128 (SSE3). +pub static PSHUFB: [u8; 4] = [0x66, 0x0f, 0x38, 0x00]; + +/// Shuffle the doublewords in xmm2/m128 based on the encoding in imm8 and +/// store the result in xmm1 (SSE2). +pub static PSHUFD: [u8; 3] = [0x66, 0x0f, 0x70]; + +/// Shift words in xmm1 by imm8; the direction and sign-bit behavior is controlled by the RRR +/// digit used in the ModR/M byte (SSE2). +pub static PS_W_IMM: [u8; 3] = [0x66, 0x0f, 0x71]; + +/// Shift doublewords in xmm1 by imm8; the direction and sign-bit behavior is controlled by the RRR +/// digit used in the ModR/M byte (SSE2). +pub static PS_D_IMM: [u8; 3] = [0x66, 0x0f, 0x72]; + +/// Shift quadwords in xmm1 by imm8; the direction and sign-bit behavior is controlled by the RRR +/// digit used in the ModR/M byte (SSE2). +pub static PS_Q_IMM: [u8; 3] = [0x66, 0x0f, 0x73]; + +/// Shift words in xmm1 left by xmm2/m128 while shifting in 0s (SSE2). +pub static PSLLW: [u8; 3] = [0x66, 0x0f, 0xf1]; + +/// Shift doublewords in xmm1 left by xmm2/m128 while shifting in 0s (SSE2). +pub static PSLLD: [u8; 3] = [0x66, 0x0f, 0xf2]; + +/// Shift quadwords in xmm1 left by xmm2/m128 while shifting in 0s (SSE2). +pub static PSLLQ: [u8; 3] = [0x66, 0x0f, 0xf3]; + +/// Shift words in xmm1 right by xmm2/m128 while shifting in 0s (SSE2). +pub static PSRLW: [u8; 3] = [0x66, 0x0f, 0xd1]; + +/// Shift doublewords in xmm1 right by xmm2/m128 while shifting in 0s (SSE2). +pub static PSRLD: [u8; 3] = [0x66, 0x0f, 0xd2]; + +/// Shift quadwords in xmm1 right by xmm2/m128 while shifting in 0s (SSE2). +pub static PSRLQ: [u8; 3] = [0x66, 0x0f, 0xd3]; + +/// Shift words in xmm1 right by xmm2/m128 while shifting in sign bits (SSE2). +pub static PSRAW: [u8; 3] = [0x66, 0x0f, 0xe1]; + +/// Shift doublewords in xmm1 right by xmm2/m128 while shifting in sign bits (SSE2). +pub static PSRAD: [u8; 3] = [0x66, 0x0f, 0xe2]; + +/// Subtract packed byte integers in xmm2/m128 from packed byte integers in xmm1 (SSE2). +pub static PSUBB: [u8; 3] = [0x66, 0x0f, 0xf8]; + +/// Subtract packed word integers in xmm2/m128 from packed word integers in xmm1 (SSE2). +pub static PSUBW: [u8; 3] = [0x66, 0x0f, 0xf9]; + +/// Subtract packed doubleword integers in xmm2/m128 from doubleword byte integers in xmm1 (SSE2). +pub static PSUBD: [u8; 3] = [0x66, 0x0f, 0xfa]; + +/// Subtract packed quadword integers in xmm2/m128 from xmm1 (SSE2). +pub static PSUBQ: [u8; 3] = [0x66, 0x0f, 0xfb]; + +/// Subtract packed signed byte integers in xmm2/m128 from packed signed byte integers in xmm1 +/// and saturate results (SSE2). +pub static PSUBSB: [u8; 3] = [0x66, 0x0f, 0xe8]; + +/// Subtract packed signed word integers in xmm2/m128 from packed signed word integers in xmm1 +/// and saturate results (SSE2). +pub static PSUBSW: [u8; 3] = [0x66, 0x0f, 0xe9]; + +/// Subtract packed unsigned byte integers in xmm2/m128 from packed unsigned byte integers in xmm1 +/// and saturate results (SSE2). +pub static PSUBUSB: [u8; 3] = [0x66, 0x0f, 0xd8]; + +/// Subtract packed unsigned word integers in xmm2/m128 from packed unsigned word integers in xmm1 +/// and saturate results (SSE2). +pub static PSUBUSW: [u8; 3] = [0x66, 0x0f, 0xd9]; + +/// Set ZF if xmm2/m128 AND xmm1 result is all 0s; set CF if xmm2/m128 AND NOT xmm1 result is all +/// 0s (SSE4.1). +pub static PTEST: [u8; 4] = [0x66, 0x0f, 0x38, 0x17]; + +/// Unpack and interleave high-order bytes from xmm1 and xmm2/m128 into xmm1 (SSE2). +pub static PUNPCKHBW: [u8; 3] = [0x66, 0x0f, 0x68]; + +/// Unpack and interleave high-order words from xmm1 and xmm2/m128 into xmm1 (SSE2). +pub static PUNPCKHWD: [u8; 3] = [0x66, 0x0f, 0x69]; + +/// Unpack and interleave high-order doublewords from xmm1 and xmm2/m128 into xmm1 (SSE2). +pub static PUNPCKHDQ: [u8; 3] = [0x66, 0x0f, 0x6A]; + +/// Unpack and interleave high-order quadwords from xmm1 and xmm2/m128 into xmm1 (SSE2). +pub static PUNPCKHQDQ: [u8; 3] = [0x66, 0x0f, 0x6D]; + +/// Unpack and interleave low-order bytes from xmm1 and xmm2/m128 into xmm1 (SSE2). +pub static PUNPCKLBW: [u8; 3] = [0x66, 0x0f, 0x60]; + +/// Unpack and interleave low-order words from xmm1 and xmm2/m128 into xmm1 (SSE2). +pub static PUNPCKLWD: [u8; 3] = [0x66, 0x0f, 0x61]; + +/// Unpack and interleave low-order doublewords from xmm1 and xmm2/m128 into xmm1 (SSE2). +pub static PUNPCKLDQ: [u8; 3] = [0x66, 0x0f, 0x62]; + +/// Unpack and interleave low-order quadwords from xmm1 and xmm2/m128 into xmm1 (SSE2). +pub static PUNPCKLQDQ: [u8; 3] = [0x66, 0x0f, 0x6C]; + +/// Push r{16,32,64}. +pub static PUSH_REG: [u8; 1] = [0x50]; + +/// Logical exclusive OR (SSE2). +pub static PXOR: [u8; 3] = [0x66, 0x0f, 0xef]; + +/// Near return to calling procedure. +pub static RET_NEAR: [u8; 1] = [0xc3]; + +/// General rotation opcode. Kind of rotation depends on encoding. +pub static ROTATE_CL: [u8; 1] = [0xd3]; + +/// General rotation opcode. Kind of rotation depends on encoding. +pub static ROTATE_IMM8: [u8; 1] = [0xc1]; + +/// Round scalar doubl-precision floating-point values. +pub static ROUNDSD: [u8; 4] = [0x66, 0x0f, 0x3a, 0x0b]; + +/// Round scalar single-precision floating-point values. +pub static ROUNDSS: [u8; 4] = [0x66, 0x0f, 0x3a, 0x0a]; + +/// Subtract with borrow r{16,32,64} from r/m of the same size. +pub static SBB: [u8; 1] = [0x19]; + +/// Set byte if overflow (OF=1). +pub static SET_BYTE_IF_OVERFLOW: [u8; 2] = [0x0f, 0x90]; + +/// Compute the square root of the packed double-precision floating-point values and store the +/// result in xmm1 (SSE2). +pub static SQRTPD: [u8; 3] = [0x66, 0x0f, 0x51]; + +/// Compute the square root of the packed double-precision floating-point values and store the +/// result in xmm1 (SSE). +pub static SQRTPS: [u8; 2] = [0x0f, 0x51]; + +/// Compute square root of scalar double-precision floating-point value. +pub static SQRTSD: [u8; 3] = [0xf2, 0x0f, 0x51]; + +/// Compute square root of scalar single-precision value. +pub static SQRTSS: [u8; 3] = [0xf3, 0x0f, 0x51]; + +/// Subtract r{16,32,64} from r/m of same size. +pub static SUB: [u8; 1] = [0x29]; + +/// Subtract packed double-precision floating-point values in xmm2/mem from xmm1 and store result +/// in xmm1 (SSE2). +pub static SUBPD: [u8; 3] = [0x66, 0x0f, 0x5c]; + +/// Subtract packed single-precision floating-point values in xmm2/mem from xmm1 and store result +/// in xmm1 (SSE). +pub static SUBPS: [u8; 2] = [0x0f, 0x5c]; + +/// Subtract the low double-precision floating-point value in xmm2/m64 from xmm1 +/// and store the result in xmm1. +pub static SUBSD: [u8; 3] = [0xf2, 0x0f, 0x5c]; + +/// Subtract the low single-precision floating-point value in xmm2/m32 from xmm1 +/// and store the result in xmm1. +pub static SUBSS: [u8; 3] = [0xf3, 0x0f, 0x5c]; + +/// AND r8 with r/m8; set SF, ZF, PF according to result. +pub static TEST_BYTE_REG: [u8; 1] = [0x84]; + +/// AND {r16, r32, r64} with r/m of the same size; set SF, ZF, PF according to result. +pub static TEST_REG: [u8; 1] = [0x85]; + +/// Count the number of trailing zero bits. +pub static TZCNT: [u8; 3] = [0xf3, 0x0f, 0xbc]; + +/// Compare low double-precision floating-point values in xmm1 and xmm2/mem64 +/// and set the EFLAGS flags accordingly. +pub static UCOMISD: [u8; 3] = [0x66, 0x0f, 0x2e]; + +/// Compare low single-precision floating-point values in xmm1 and xmm2/mem32 +/// and set the EFLAGS flags accordingly. +pub static UCOMISS: [u8; 2] = [0x0f, 0x2e]; + +/// Raise invalid opcode instruction. +pub static UNDEFINED2: [u8; 2] = [0x0f, 0x0b]; + +/// Convert four packed unsigned doubleword integers from xmm2/m128/m32bcst to packed +/// single-precision floating-point values in xmm1 with writemask k1. Rounding behavior +/// is controlled by MXCSR but can be overriden by EVEX.L'L in static rounding mode +/// (AVX512VL, AVX512F). +pub static VCVTUDQ2PS: [u8; 3] = [0xf2, 0x0f, 0x7a]; + +/// imm{16,32} XOR r/m{16,32,64}, possibly sign-extended. +pub static XOR_IMM: [u8; 1] = [0x81]; + +/// r/m{16,32,64} XOR sign-extended imm8. +pub static XOR_IMM8_SIGN_EXTEND: [u8; 1] = [0x83]; + +/// r/m{16,32,64} XOR register of the same size. +pub static XOR: [u8; 1] = [0x31]; + +/// r/m8 XOR r8. +pub static XORB: [u8; 1] = [0x30]; + +/// Bitwise logical XOR of packed double-precision floating-point values. +pub static XORPD: [u8; 3] = [0x66, 0x0f, 0x57]; + +/// Bitwise logical XOR of packed single-precision floating-point values. +pub static XORPS: [u8; 2] = [0x0f, 0x57]; diff --git a/third_party/rust/cranelift-codegen-meta/src/isa/x86/recipes.rs b/third_party/rust/cranelift-codegen-meta/src/isa/x86/recipes.rs new file mode 100644 index 0000000000..f45f8dc673 --- /dev/null +++ b/third_party/rust/cranelift-codegen-meta/src/isa/x86/recipes.rs @@ -0,0 +1,3445 @@ +//! Encoding recipes for x86/x86_64. +use std::rc::Rc; + +use cranelift_codegen_shared::isa::x86::EncodingBits; + +use crate::cdsl::ast::Literal; +use crate::cdsl::formats::InstructionFormat; +use crate::cdsl::instructions::InstructionPredicate; +use crate::cdsl::recipes::{ + EncodingRecipe, EncodingRecipeBuilder, OperandConstraint, Register, Stack, +}; +use crate::cdsl::regs::IsaRegs; +use crate::cdsl::settings::SettingGroup; +use crate::shared::Definitions as SharedDefinitions; + +use crate::isa::x86::opcodes; + +/// Helper data structure to create recipes and template recipes. +/// It contains all the recipes and recipe templates that might be used in the encodings crate of +/// this same directory. +pub(crate) struct RecipeGroup<'builder> { + /// Memoized registers description, to pass it to builders later. + regs: &'builder IsaRegs, + + /// All the recipes explicitly created in this file. This is different from the final set of + /// recipes, which is definitive only once encodings have generated new recipes on the fly. + recipes: Vec<EncodingRecipe>, + + /// All the recipe templates created in this file. + templates: Vec<Rc<Template<'builder>>>, +} + +impl<'builder> RecipeGroup<'builder> { + fn new(regs: &'builder IsaRegs) -> Self { + Self { + regs, + recipes: Vec::new(), + templates: Vec::new(), + } + } + fn add_recipe(&mut self, recipe: EncodingRecipeBuilder) { + self.recipes.push(recipe.build()); + } + fn add_template_recipe(&mut self, recipe: EncodingRecipeBuilder) -> Rc<Template<'builder>> { + let template = Rc::new(Template::new(recipe, self.regs)); + self.templates.push(template.clone()); + template + } + fn add_template_inferred( + &mut self, + recipe: EncodingRecipeBuilder, + infer_function: &'static str, + ) -> Rc<Template<'builder>> { + let template = + Rc::new(Template::new(recipe, self.regs).inferred_rex_compute_size(infer_function)); + self.templates.push(template.clone()); + template + } + fn add_template(&mut self, template: Template<'builder>) -> Rc<Template<'builder>> { + let template = Rc::new(template); + self.templates.push(template.clone()); + template + } + pub fn recipe(&self, name: &str) -> &EncodingRecipe { + self.recipes + .iter() + .find(|recipe| recipe.name == name) + .unwrap_or_else(|| panic!("unknown recipe name: {}. Try template?", name)) + } + pub fn template(&self, name: &str) -> &Template { + self.templates + .iter() + .find(|recipe| recipe.name() == name) + .unwrap_or_else(|| panic!("unknown template name: {}. Try recipe?", name)) + } +} + +// Opcode representation. +// +// Cranelift requires each recipe to have a single encoding size in bytes, and x86 opcodes are +// variable length, so we use separate recipes for different styles of opcodes and prefixes. The +// opcode format is indicated by the recipe name prefix. +// +// The match case below does not include the REX prefix which goes after the mandatory prefix. +// VEX/XOP and EVEX prefixes are not yet supported. Encodings using any of these prefixes are +// represented by separate recipes. +// +// The encoding bits are: +// +// 0-7: The opcode byte <op>. +// 8-9: pp, mandatory prefix: +// 00 none (Op*) +// 01 66 (Mp*) +// 10 F3 (Mp*) +// 11 F2 (Mp*) +// 10-11: mm, opcode map: +// 00 <op> (Op1/Mp1) +// 01 0F <op> (Op2/Mp2) +// 10 0F 38 <op> (Op3/Mp3) +// 11 0F 3A <op> (Op3/Mp3) +// 12-14 rrr, opcode bits for the ModR/M byte for certain opcodes. +// 15: REX.W bit (or VEX.W/E) +// +// There is some redundancy between bits 8-11 and the recipe names, but we have enough bits, and +// the pp+mm format is ready for supporting VEX prefixes. +// +// TODO Cranelift doesn't actually require recipe to have different encoding sizes anymore, so this +// could be simplified. + +/// Given a sequence of opcode bytes, compute the recipe name prefix and encoding bits. +fn decode_opcodes(op_bytes: &[u8], rrr: u16, w: u16) -> (&'static str, u16) { + let enc = EncodingBits::new(op_bytes, rrr, w); + (enc.prefix().recipe_name_prefix(), enc.bits()) +} + +/// Given a snippet of Rust code (or None), replace the `PUT_OP` macro with the +/// corresponding `put_*` function from the `binemit.rs` module. +fn replace_put_op(code: Option<String>, prefix: &str) -> Option<String> { + code.map(|code| code.replace("{{PUT_OP}}", &format!("put_{}", prefix.to_lowercase()))) +} + +/// Replaces constraints to a REX-prefixed register class by the equivalent non-REX register class. +fn replace_nonrex_constraints( + regs: &IsaRegs, + constraints: Vec<OperandConstraint>, +) -> Vec<OperandConstraint> { + constraints + .into_iter() + .map(|constraint| match constraint { + OperandConstraint::RegClass(rc_index) => { + let new_rc_index = if rc_index == regs.class_by_name("GPR") { + regs.class_by_name("GPR8") + } else if rc_index == regs.class_by_name("FPR") { + regs.class_by_name("FPR8") + } else { + rc_index + }; + OperandConstraint::RegClass(new_rc_index) + } + _ => constraint, + }) + .collect() +} + +fn replace_evex_constraints( + _: &IsaRegs, + constraints: Vec<OperandConstraint>, +) -> Vec<OperandConstraint> { + constraints + .into_iter() + .map(|constraint| match constraint { + OperandConstraint::RegClass(rc_index) => { + // FIXME(#1306) this should be able to upgrade the register class to FPR32 as in + // `replace_nonrex_constraints` above, e.g. When FPR32 is re-added, add back in the + // rc_index conversion to FPR32. In the meantime, this is effectively a no-op + // conversion--the register class stays the same. + OperandConstraint::RegClass(rc_index) + } + _ => constraint, + }) + .collect() +} + +/// Specifies how the prefix (e.g. REX) is emitted by a Recipe. +#[derive(Copy, Clone, PartialEq)] +pub enum RecipePrefixKind { + /// The REX emission behavior is not hardcoded for the Recipe + /// and may be overridden when using the Template. + Unspecified, + + /// The Recipe must hardcode the non-emission of the REX prefix. + NeverEmitRex, + + /// The Recipe must hardcode the emission of the REX prefix. + AlwaysEmitRex, + + /// The Recipe should infer the emission of the REX.RXB bits from registers, + /// and the REX.W bit from the EncodingBits. + /// + /// Because such a Recipe has a non-constant instruction size, it must have + /// a special `compute_size` handler for the inferrable-REX case. + InferRex, + + /// The Recipe must hardcode the emission of an EVEX prefix. + Evex, +} + +impl Default for RecipePrefixKind { + fn default() -> Self { + Self::Unspecified + } +} + +/// Previously called a TailRecipe in the Python meta language, this allows to create multiple +/// variants of a single base EncodingRecipe (rex prefix, specialized w/rrr bits, different +/// opcodes). It serves as a prototype of an EncodingRecipe, which is then used when actually creating +/// Encodings, in encodings.rs. This is an idiosyncrasy of the x86 meta-language, and could be +/// reconsidered later. +#[derive(Clone)] +pub(crate) struct Template<'builder> { + /// Description of registers, used in the build() method. + regs: &'builder IsaRegs, + + /// The recipe template, which is to be specialized (by copy). + recipe: EncodingRecipeBuilder, + + /// How is the REX prefix emitted? + rex_kind: RecipePrefixKind, + + /// Function for `compute_size()` when REX is inferrable. + inferred_rex_compute_size: Option<&'static str>, + + /// Other recipe to use when REX-prefixed. + when_prefixed: Option<Rc<Template<'builder>>>, + + // Parameters passed in the EncodingBits. + /// Value of the W bit (0 or 1), stored in the EncodingBits. + w_bit: u16, + /// Value of the RRR bits (between 0 and 0b111). + rrr_bits: u16, + /// Opcode bytes. + op_bytes: &'static [u8], +} + +impl<'builder> Template<'builder> { + fn new(recipe: EncodingRecipeBuilder, regs: &'builder IsaRegs) -> Self { + Self { + regs, + recipe, + rex_kind: RecipePrefixKind::default(), + inferred_rex_compute_size: None, + when_prefixed: None, + w_bit: 0, + rrr_bits: 0, + op_bytes: &opcodes::EMPTY, + } + } + + fn name(&self) -> &str { + &self.recipe.name + } + fn rex_kind(self, kind: RecipePrefixKind) -> Self { + Self { + rex_kind: kind, + ..self + } + } + fn inferred_rex_compute_size(self, function: &'static str) -> Self { + Self { + inferred_rex_compute_size: Some(function), + ..self + } + } + fn when_prefixed(self, template: Rc<Template<'builder>>) -> Self { + assert!(self.when_prefixed.is_none()); + Self { + when_prefixed: Some(template), + ..self + } + } + + // Copy setters. + pub fn opcodes(&self, op_bytes: &'static [u8]) -> Self { + assert!(!op_bytes.is_empty()); + let mut copy = self.clone(); + copy.op_bytes = op_bytes; + copy + } + pub fn w(&self) -> Self { + let mut copy = self.clone(); + copy.w_bit = 1; + copy + } + pub fn rrr(&self, value: u16) -> Self { + assert!(value <= 0b111); + let mut copy = self.clone(); + copy.rrr_bits = value; + copy + } + pub fn nonrex(&self) -> Self { + assert!( + self.rex_kind != RecipePrefixKind::AlwaysEmitRex, + "Template requires REX prefix." + ); + let mut copy = self.clone(); + copy.rex_kind = RecipePrefixKind::NeverEmitRex; + copy + } + pub fn rex(&self) -> Self { + assert!( + self.rex_kind != RecipePrefixKind::NeverEmitRex, + "Template requires no REX prefix." + ); + if let Some(prefixed) = &self.when_prefixed { + let mut ret = prefixed.rex(); + // Forward specialized parameters. + ret.op_bytes = self.op_bytes; + ret.w_bit = self.w_bit; + ret.rrr_bits = self.rrr_bits; + return ret; + } + let mut copy = self.clone(); + copy.rex_kind = RecipePrefixKind::AlwaysEmitRex; + copy + } + pub fn infer_rex(&self) -> Self { + assert!( + self.rex_kind != RecipePrefixKind::NeverEmitRex, + "Template requires no REX prefix." + ); + assert!( + self.when_prefixed.is_none(), + "infer_rex used with when_prefixed()." + ); + let mut copy = self.clone(); + copy.rex_kind = RecipePrefixKind::InferRex; + copy + } + + pub fn build(mut self) -> (EncodingRecipe, u16) { + let (opcode, bits) = decode_opcodes(&self.op_bytes, self.rrr_bits, self.w_bit); + + let (recipe_name, size_addendum) = match self.rex_kind { + RecipePrefixKind::Unspecified | RecipePrefixKind::NeverEmitRex => { + // Ensure the operands are limited to non-REX constraints. + let operands_in = self.recipe.operands_in.unwrap_or_default(); + self.recipe.operands_in = Some(replace_nonrex_constraints(self.regs, operands_in)); + let operands_out = self.recipe.operands_out.unwrap_or_default(); + self.recipe.operands_out = + Some(replace_nonrex_constraints(self.regs, operands_out)); + + (opcode.into(), self.op_bytes.len() as u64) + } + RecipePrefixKind::AlwaysEmitRex => { + ("Rex".to_string() + opcode, self.op_bytes.len() as u64 + 1) + } + RecipePrefixKind::InferRex => { + assert_eq!(self.w_bit, 0, "A REX.W bit always requires a REX prefix; avoid using `infer_rex().w()` and use `rex().w()` instead."); + // Hook up the right function for inferred compute_size(). + assert!( + self.inferred_rex_compute_size.is_some(), + "InferRex recipe '{}' needs an inferred_rex_compute_size function.", + &self.recipe.name + ); + self.recipe.compute_size = self.inferred_rex_compute_size; + + ("DynRex".to_string() + opcode, self.op_bytes.len() as u64) + } + RecipePrefixKind::Evex => { + // Allow the operands to expand limits to EVEX constraints. + let operands_in = self.recipe.operands_in.unwrap_or_default(); + self.recipe.operands_in = Some(replace_evex_constraints(self.regs, operands_in)); + let operands_out = self.recipe.operands_out.unwrap_or_default(); + self.recipe.operands_out = Some(replace_evex_constraints(self.regs, operands_out)); + + ("Evex".to_string() + opcode, 4 + 1) + } + }; + + self.recipe.base_size += size_addendum; + + // Branch ranges are relative to the end of the instruction. + // For InferRex, the range should be the minimum, assuming no REX. + if let Some(range) = self.recipe.branch_range.as_mut() { + range.inst_size += size_addendum; + } + + self.recipe.emit = replace_put_op(self.recipe.emit, &recipe_name); + self.recipe.name = recipe_name + &self.recipe.name; + + (self.recipe.build(), bits) + } +} + +/// Returns a predicate checking that the "cond" field of the instruction contains one of the +/// directly supported floating point condition codes. +fn supported_floatccs_predicate( + supported_cc: &[Literal], + format: &InstructionFormat, +) -> InstructionPredicate { + supported_cc + .iter() + .fold(InstructionPredicate::new(), |pred, literal| { + pred.or(InstructionPredicate::new_is_field_equal( + format, + "cond", + literal.to_rust_code(), + )) + }) +} + +/// Return an instruction predicate that checks if `iform.imm` is a valid `scale` for a SIB byte. +fn valid_scale(format: &InstructionFormat) -> InstructionPredicate { + ["1", "2", "4", "8"] + .iter() + .fold(InstructionPredicate::new(), |pred, &literal| { + pred.or(InstructionPredicate::new_is_field_equal( + format, + "imm", + literal.into(), + )) + }) +} + +pub(crate) fn define<'shared>( + shared_defs: &'shared SharedDefinitions, + settings: &'shared SettingGroup, + regs: &'shared IsaRegs, +) -> RecipeGroup<'shared> { + // The set of floating point condition codes that are directly supported. + // Other condition codes need to be reversed or expressed as two tests. + let floatcc = &shared_defs.imm.floatcc; + let supported_floatccs: Vec<Literal> = ["ord", "uno", "one", "ueq", "gt", "ge", "ult", "ule"] + .iter() + .map(|name| Literal::enumerator_for(floatcc, name)) + .collect(); + + // Register classes shorthands. + let abcd = regs.class_by_name("ABCD"); + let gpr = regs.class_by_name("GPR"); + let fpr = regs.class_by_name("FPR"); + let flag = regs.class_by_name("FLAG"); + + // Operand constraints shorthands. + let reg_rflags = Register::new(flag, regs.regunit_by_name(flag, "rflags")); + let reg_rax = Register::new(gpr, regs.regunit_by_name(gpr, "rax")); + let reg_rcx = Register::new(gpr, regs.regunit_by_name(gpr, "rcx")); + let reg_rdx = Register::new(gpr, regs.regunit_by_name(gpr, "rdx")); + let reg_r15 = Register::new(gpr, regs.regunit_by_name(gpr, "r15")); + let reg_xmm0 = Register::new(fpr, regs.regunit_by_name(fpr, "xmm0")); + + // Stack operand with a 32-bit signed displacement from either RBP or RSP. + let stack_gpr32 = Stack::new(gpr); + let stack_fpr32 = Stack::new(fpr); + + let formats = &shared_defs.formats; + + // Predicates shorthands. + let use_sse41 = settings.predicate_by_name("use_sse41"); + + // Definitions. + let mut recipes = RecipeGroup::new(regs); + + // A null unary instruction that takes a GPR register. Can be used for identity copies and + // no-op conversions. + recipes.add_recipe( + EncodingRecipeBuilder::new("null", &formats.unary, 0) + .operands_in(vec![gpr]) + .operands_out(vec![0]) + .emit(""), + ); + recipes.add_recipe( + EncodingRecipeBuilder::new("null_fpr", &formats.unary, 0) + .operands_in(vec![fpr]) + .operands_out(vec![0]) + .emit(""), + ); + recipes.add_recipe( + EncodingRecipeBuilder::new("stacknull", &formats.unary, 0) + .operands_in(vec![stack_gpr32]) + .operands_out(vec![stack_gpr32]) + .emit(""), + ); + + recipes.add_recipe( + EncodingRecipeBuilder::new("get_pinned_reg", &formats.nullary, 0) + .operands_out(vec![reg_r15]) + .emit(""), + ); + // umr with a fixed register output that's r15. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("set_pinned_reg", &formats.unary, 1) + .operands_in(vec![gpr]) + .clobbers_flags(false) + .emit( + r#" + let r15 = RU::r15.into(); + {{PUT_OP}}(bits, rex2(r15, in_reg0), sink); + modrm_rr(r15, in_reg0, sink); + "#, + ), + ); + + // No-op fills, created by late-stage redundant-fill removal. + recipes.add_recipe( + EncodingRecipeBuilder::new("fillnull", &formats.unary, 0) + .operands_in(vec![stack_gpr32]) + .operands_out(vec![gpr]) + .clobbers_flags(false) + .emit(""), + ); + recipes.add_recipe( + EncodingRecipeBuilder::new("ffillnull", &formats.unary, 0) + .operands_in(vec![stack_gpr32]) + .operands_out(vec![fpr]) + .clobbers_flags(false) + .emit(""), + ); + + recipes.add_recipe( + EncodingRecipeBuilder::new("debugtrap", &formats.nullary, 1).emit("sink.put1(0xcc);"), + ); + + // XX opcode, no ModR/M. + recipes.add_template_recipe(EncodingRecipeBuilder::new("trap", &formats.trap, 0).emit( + r#" + sink.trap(code, func.srclocs[inst]); + {{PUT_OP}}(bits, BASE_REX, sink); + "#, + )); + + // Macro: conditional jump over a ud2. + recipes.add_recipe( + EncodingRecipeBuilder::new("trapif", &formats.int_cond_trap, 4) + .operands_in(vec![reg_rflags]) + .clobbers_flags(false) + .emit( + r#" + // Jump over a 2-byte ud2. + sink.put1(0x70 | (icc2opc(cond.inverse()) as u8)); + sink.put1(2); + // ud2. + sink.trap(code, func.srclocs[inst]); + sink.put1(0x0f); + sink.put1(0x0b); + "#, + ), + ); + + recipes.add_recipe( + EncodingRecipeBuilder::new("trapff", &formats.float_cond_trap, 4) + .operands_in(vec![reg_rflags]) + .clobbers_flags(false) + .inst_predicate(supported_floatccs_predicate( + &supported_floatccs, + &*formats.float_cond_trap, + )) + .emit( + r#" + // Jump over a 2-byte ud2. + sink.put1(0x70 | (fcc2opc(cond.inverse()) as u8)); + sink.put1(2); + // ud2. + sink.trap(code, func.srclocs[inst]); + sink.put1(0x0f); + sink.put1(0x0b); + "#, + ), + ); + + // XX /r + recipes.add_template_inferred( + EncodingRecipeBuilder::new("rr", &formats.binary, 1) + .operands_in(vec![gpr, gpr]) + .operands_out(vec![0]) + .emit( + r#" + {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink); + modrm_rr(in_reg0, in_reg1, sink); + "#, + ), + "size_with_inferred_rex_for_inreg0_inreg1", + ); + + // XX /r with operands swapped. (RM form). + recipes.add_template_inferred( + EncodingRecipeBuilder::new("rrx", &formats.binary, 1) + .operands_in(vec![gpr, gpr]) + .operands_out(vec![0]) + .emit( + r#" + {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); + modrm_rr(in_reg1, in_reg0, sink); + "#, + ), + "size_with_inferred_rex_for_inreg0_inreg1", + ); + + // XX /r with FPR ins and outs. A form. + recipes.add_template_inferred( + EncodingRecipeBuilder::new("fa", &formats.binary, 1) + .operands_in(vec![fpr, fpr]) + .operands_out(vec![0]) + .emit( + r#" + {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); + modrm_rr(in_reg1, in_reg0, sink); + "#, + ), + "size_with_inferred_rex_for_inreg0_inreg1", + ); + + // XX /r with FPR ins and outs. A form with input operands swapped. + recipes.add_template_inferred( + EncodingRecipeBuilder::new("fax", &formats.binary, 1) + .operands_in(vec![fpr, fpr]) + .operands_out(vec![1]) + .emit( + r#" + {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink); + modrm_rr(in_reg0, in_reg1, sink); + "#, + ), + // The operand order does not matter for calculating whether a REX prefix is needed. + "size_with_inferred_rex_for_inreg0_inreg1", + ); + + // XX /r with FPR ins and outs. A form with a byte immediate. + { + recipes.add_template_inferred( + EncodingRecipeBuilder::new("fa_ib", &formats.ternary_imm8, 2) + .operands_in(vec![fpr, fpr]) + .operands_out(vec![0]) + .inst_predicate(InstructionPredicate::new_is_unsigned_int( + &*formats.ternary_imm8, + "imm", + 8, + 0, + )) + .emit( + r#" + {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); + modrm_rr(in_reg1, in_reg0, sink); + let imm: i64 = imm.into(); + sink.put1(imm as u8); + "#, + ), + "size_with_inferred_rex_for_inreg0_inreg1", + ); + } + + // XX /n for a unary operation with extension bits. + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("ur", &formats.unary, 1) + .operands_in(vec![gpr]) + .operands_out(vec![0]) + .emit( + r#" + {{PUT_OP}}(bits, rex1(in_reg0), sink); + modrm_r_bits(in_reg0, bits, sink); + "#, + ), + regs, + ) + .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"), + ); + + // XX /r, but for a unary operator with separate input/output register, like + // copies. MR form, preserving flags. + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("umr", &formats.unary, 1) + .operands_in(vec![gpr]) + .operands_out(vec![gpr]) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits, rex2(out_reg0, in_reg0), sink); + modrm_rr(out_reg0, in_reg0, sink); + "#, + ), + regs, + ) + .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_outreg0"), + ); + + // Same as umr, but with FPR -> GPR registers. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("rfumr", &formats.unary, 1) + .operands_in(vec![fpr]) + .operands_out(vec![gpr]) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits, rex2(out_reg0, in_reg0), sink); + modrm_rr(out_reg0, in_reg0, sink); + "#, + ), + ); + + // Same as umr, but with the source register specified directly. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("umr_reg_to_ssa", &formats.copy_to_ssa, 1) + // No operands_in to mention, because a source register is specified directly. + .operands_out(vec![gpr]) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits, rex2(out_reg0, src), sink); + modrm_rr(out_reg0, src, sink); + "#, + ), + ); + + // XX /r, but for a unary operator with separate input/output register. + // RM form. Clobbers FLAGS. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("urm", &formats.unary, 1) + .operands_in(vec![gpr]) + .operands_out(vec![gpr]) + .emit( + r#" + {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); + modrm_rr(in_reg0, out_reg0, sink); + "#, + ), + ); + + // XX /r. Same as urm, but doesn't clobber FLAGS. + let urm_noflags = recipes.add_template_recipe( + EncodingRecipeBuilder::new("urm_noflags", &formats.unary, 1) + .operands_in(vec![gpr]) + .operands_out(vec![gpr]) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); + modrm_rr(in_reg0, out_reg0, sink); + "#, + ), + ); + + // XX /r. Same as urm_noflags, but input limited to ABCD. + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("urm_noflags_abcd", &formats.unary, 1) + .operands_in(vec![abcd]) + .operands_out(vec![gpr]) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); + modrm_rr(in_reg0, out_reg0, sink); + "#, + ), + regs, + ) + .when_prefixed(urm_noflags), + ); + + // XX /r, RM form, FPR -> FPR. + recipes.add_template_inferred( + EncodingRecipeBuilder::new("furm", &formats.unary, 1) + .operands_in(vec![fpr]) + .operands_out(vec![fpr]) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); + modrm_rr(in_reg0, out_reg0, sink); + "#, + ), + "size_with_inferred_rex_for_inreg0_outreg0", + ); + + // Same as furm, but with the source register specified directly. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("furm_reg_to_ssa", &formats.copy_to_ssa, 1) + // No operands_in to mention, because a source register is specified directly. + .operands_out(vec![fpr]) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits, rex2(src, out_reg0), sink); + modrm_rr(src, out_reg0, sink); + "#, + ), + ); + + // XX /r, RM form, GPR -> FPR. + recipes.add_template_inferred( + EncodingRecipeBuilder::new("frurm", &formats.unary, 1) + .operands_in(vec![gpr]) + .operands_out(vec![fpr]) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); + modrm_rr(in_reg0, out_reg0, sink); + "#, + ), + "size_with_inferred_rex_for_inreg0_outreg0", + ); + + // XX /r, RM form, FPR -> GPR. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("rfurm", &formats.unary, 1) + .operands_in(vec![fpr]) + .operands_out(vec![gpr]) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); + modrm_rr(in_reg0, out_reg0, sink); + "#, + ), + ); + + // XX /r, RMI form for one of the roundXX SSE 4.1 instructions. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("furmi_rnd", &formats.unary, 2) + .operands_in(vec![fpr]) + .operands_out(vec![fpr]) + .isa_predicate(use_sse41) + .emit( + r#" + {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); + modrm_rr(in_reg0, out_reg0, sink); + sink.put1(match opcode { + Opcode::Nearest => 0b00, + Opcode::Floor => 0b01, + Opcode::Ceil => 0b10, + Opcode::Trunc => 0b11, + x => panic!("{} unexpected for furmi_rnd", opcode), + }); + "#, + ), + ); + + // XX /r, for regmove instructions. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("rmov", &formats.reg_move, 1) + .operands_in(vec![gpr]) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits, rex2(dst, src), sink); + modrm_rr(dst, src, sink); + "#, + ), + ); + + // XX /r, for regmove instructions (FPR version, RM encoded). + recipes.add_template_recipe( + EncodingRecipeBuilder::new("frmov", &formats.reg_move, 1) + .operands_in(vec![fpr]) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits, rex2(src, dst), sink); + modrm_rr(src, dst, sink); + "#, + ), + ); + + // XX /n with one arg in %rcx, for shifts. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("rc", &formats.binary, 1) + .operands_in(vec![ + OperandConstraint::RegClass(gpr), + OperandConstraint::FixedReg(reg_rcx), + ]) + .operands_out(vec![0]) + .emit( + r#" + {{PUT_OP}}(bits, rex1(in_reg0), sink); + modrm_r_bits(in_reg0, bits, sink); + "#, + ), + ); + + // XX /n for division: inputs in %rax, %rdx, r. Outputs in %rax, %rdx. + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("div", &formats.ternary, 1) + .operands_in(vec![ + OperandConstraint::FixedReg(reg_rax), + OperandConstraint::FixedReg(reg_rdx), + OperandConstraint::RegClass(gpr), + ]) + .operands_out(vec![reg_rax, reg_rdx]) + .emit( + r#" + sink.trap(TrapCode::IntegerDivisionByZero, func.srclocs[inst]); + {{PUT_OP}}(bits, rex1(in_reg2), sink); + modrm_r_bits(in_reg2, bits, sink); + "#, + ), + regs, + ) + .inferred_rex_compute_size("size_with_inferred_rex_for_inreg2"), + ); + + // XX /n for {s,u}mulx: inputs in %rax, r. Outputs in %rdx(hi):%rax(lo) + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("mulx", &formats.binary, 1) + .operands_in(vec![ + OperandConstraint::FixedReg(reg_rax), + OperandConstraint::RegClass(gpr), + ]) + .operands_out(vec![ + OperandConstraint::FixedReg(reg_rax), + OperandConstraint::FixedReg(reg_rdx), + ]) + .emit( + r#" + {{PUT_OP}}(bits, rex1(in_reg1), sink); + modrm_r_bits(in_reg1, bits, sink); + "#, + ), + regs, + ) + .inferred_rex_compute_size("size_with_inferred_rex_for_inreg1"), + ); + + // XX /r for BLEND* instructions + recipes.add_template_inferred( + EncodingRecipeBuilder::new("blend", &formats.ternary, 1) + .operands_in(vec![ + OperandConstraint::FixedReg(reg_xmm0), + OperandConstraint::RegClass(fpr), + OperandConstraint::RegClass(fpr), + ]) + .operands_out(vec![2]) + .emit( + r#" + {{PUT_OP}}(bits, rex2(in_reg1, in_reg2), sink); + modrm_rr(in_reg1, in_reg2, sink); + "#, + ), + "size_with_inferred_rex_for_inreg1_inreg2", + ); + + // XX /n ib with 8-bit immediate sign-extended. + { + recipes.add_template_inferred( + EncodingRecipeBuilder::new("r_ib", &formats.binary_imm64, 2) + .operands_in(vec![gpr]) + .operands_out(vec![0]) + .inst_predicate(InstructionPredicate::new_is_signed_int( + &*formats.binary_imm64, + "imm", + 8, + 0, + )) + .emit( + r#" + {{PUT_OP}}(bits, rex1(in_reg0), sink); + modrm_r_bits(in_reg0, bits, sink); + let imm: i64 = imm.into(); + sink.put1(imm as u8); + "#, + ), + "size_with_inferred_rex_for_inreg0", + ); + + recipes.add_template_inferred( + EncodingRecipeBuilder::new("f_ib", &formats.binary_imm64, 2) + .operands_in(vec![fpr]) + .operands_out(vec![0]) + .inst_predicate(InstructionPredicate::new_is_signed_int( + &*formats.binary_imm64, + "imm", + 8, + 0, + )) + .emit( + r#" + {{PUT_OP}}(bits, rex1(in_reg0), sink); + modrm_r_bits(in_reg0, bits, sink); + let imm: i64 = imm.into(); + sink.put1(imm as u8); + "#, + ), + "size_with_inferred_rex_for_inreg0", + ); + + // XX /n id with 32-bit immediate sign-extended. + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("r_id", &formats.binary_imm64, 5) + .operands_in(vec![gpr]) + .operands_out(vec![0]) + .inst_predicate(InstructionPredicate::new_is_signed_int( + &*formats.binary_imm64, + "imm", + 32, + 0, + )) + .emit( + r#" + {{PUT_OP}}(bits, rex1(in_reg0), sink); + modrm_r_bits(in_reg0, bits, sink); + let imm: i64 = imm.into(); + sink.put4(imm as u32); + "#, + ), + regs, + ) + .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"), + ); + } + + // XX /r ib with 8-bit unsigned immediate (e.g. for pshufd) + { + recipes.add_template_inferred( + EncodingRecipeBuilder::new("r_ib_unsigned_fpr", &formats.binary_imm8, 2) + .operands_in(vec![fpr]) + .operands_out(vec![fpr]) + .inst_predicate(InstructionPredicate::new_is_unsigned_int( + &*formats.binary_imm8, + "imm", + 8, + 0, + )) + .emit( + r#" + {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); + modrm_rr(in_reg0, out_reg0, sink); + let imm: i64 = imm.into(); + sink.put1(imm as u8); + "#, + ), + "size_with_inferred_rex_for_inreg0_outreg0", + ); + } + + // XX /r ib with 8-bit unsigned immediate (e.g. for extractlane) + { + recipes.add_template_inferred( + EncodingRecipeBuilder::new("r_ib_unsigned_gpr", &formats.binary_imm8, 2) + .operands_in(vec![fpr]) + .operands_out(vec![gpr]) + .inst_predicate(InstructionPredicate::new_is_unsigned_int( + &*formats.binary_imm8, "imm", 8, 0, + )) + .emit( + r#" + {{PUT_OP}}(bits, rex2(out_reg0, in_reg0), sink); + modrm_rr(out_reg0, in_reg0, sink); // note the flipped register in the ModR/M byte + let imm: i64 = imm.into(); + sink.put1(imm as u8); + "#, + ), "size_with_inferred_rex_for_inreg0_outreg0" + ); + } + + // XX /r ib with 8-bit unsigned immediate (e.g. for insertlane) + { + recipes.add_template_inferred( + EncodingRecipeBuilder::new("r_ib_unsigned_r", &formats.ternary_imm8, 2) + .operands_in(vec![fpr, gpr]) + .operands_out(vec![0]) + .inst_predicate(InstructionPredicate::new_is_unsigned_int( + &*formats.ternary_imm8, + "imm", + 8, + 0, + )) + .emit( + r#" + {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); + modrm_rr(in_reg1, in_reg0, sink); + let imm: i64 = imm.into(); + sink.put1(imm as u8); + "#, + ), + "size_with_inferred_rex_for_inreg0_inreg1", + ); + } + + { + // XX /n id with 32-bit immediate sign-extended. UnaryImm version. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("u_id", &formats.unary_imm, 5) + .operands_out(vec![gpr]) + .inst_predicate(InstructionPredicate::new_is_signed_int( + &*formats.unary_imm, + "imm", + 32, + 0, + )) + .emit( + r#" + {{PUT_OP}}(bits, rex1(out_reg0), sink); + modrm_r_bits(out_reg0, bits, sink); + let imm: i64 = imm.into(); + sink.put4(imm as u32); + "#, + ), + ); + } + + // XX+rd id unary with 32-bit immediate. Note no recipe predicate. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("pu_id", &formats.unary_imm, 4) + .operands_out(vec![gpr]) + .emit( + r#" + // The destination register is encoded in the low bits of the opcode. + // No ModR/M. + {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink); + let imm: i64 = imm.into(); + sink.put4(imm as u32); + "#, + ), + ); + + // XX+rd id unary with bool immediate. Note no recipe predicate. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("pu_id_bool", &formats.unary_bool, 4) + .operands_out(vec![gpr]) + .emit( + r#" + // The destination register is encoded in the low bits of the opcode. + // No ModR/M. + {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink); + let imm: u32 = if imm { 1 } else { 0 }; + sink.put4(imm); + "#, + ), + ); + + // XX+rd id nullary with 0 as 32-bit immediate. Note no recipe predicate. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("pu_id_ref", &formats.nullary, 4) + .operands_out(vec![gpr]) + .emit( + r#" + // The destination register is encoded in the low bits of the opcode. + // No ModR/M. + {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink); + sink.put4(0); + "#, + ), + ); + + // XX+rd iq unary with 64-bit immediate. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("pu_iq", &formats.unary_imm, 8) + .operands_out(vec![gpr]) + .emit( + r#" + {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink); + let imm: i64 = imm.into(); + sink.put8(imm as u64); + "#, + ), + ); + + // XX+rd id unary with zero immediate. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("u_id_z", &formats.unary_imm, 1) + .operands_out(vec![gpr]) + .emit( + r#" + {{PUT_OP}}(bits, rex2(out_reg0, out_reg0), sink); + modrm_rr(out_reg0, out_reg0, sink); + "#, + ), + ); + + // XX /n Unary with floating point 32-bit immediate equal to zero. + { + recipes.add_template_recipe( + EncodingRecipeBuilder::new("f32imm_z", &formats.unary_ieee32, 1) + .operands_out(vec![fpr]) + .inst_predicate(InstructionPredicate::new_is_zero_32bit_float( + &*formats.unary_ieee32, + "imm", + )) + .emit( + r#" + {{PUT_OP}}(bits, rex2(out_reg0, out_reg0), sink); + modrm_rr(out_reg0, out_reg0, sink); + "#, + ), + ); + } + + // XX /n Unary with floating point 64-bit immediate equal to zero. + { + recipes.add_template_recipe( + EncodingRecipeBuilder::new("f64imm_z", &formats.unary_ieee64, 1) + .operands_out(vec![fpr]) + .inst_predicate(InstructionPredicate::new_is_zero_64bit_float( + &*formats.unary_ieee64, + "imm", + )) + .emit( + r#" + {{PUT_OP}}(bits, rex2(out_reg0, out_reg0), sink); + modrm_rr(out_reg0, out_reg0, sink); + "#, + ), + ); + } + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("pushq", &formats.unary, 0) + .operands_in(vec![gpr]) + .emit( + r#" + sink.trap(TrapCode::StackOverflow, func.srclocs[inst]); + {{PUT_OP}}(bits | (in_reg0 & 7), rex1(in_reg0), sink); + "#, + ), + ); + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("popq", &formats.nullary, 0) + .operands_out(vec![gpr]) + .emit( + r#" + {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink); + "#, + ), + ); + + // XX /r, for regmove instructions. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("copysp", &formats.copy_special, 1) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits, rex2(dst, src), sink); + modrm_rr(dst, src, sink); + "#, + ), + ); + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("adjustsp", &formats.unary, 1) + .operands_in(vec![gpr]) + .emit( + r#" + {{PUT_OP}}(bits, rex2(RU::rsp.into(), in_reg0), sink); + modrm_rr(RU::rsp.into(), in_reg0, sink); + "#, + ), + ); + + { + recipes.add_template_recipe( + EncodingRecipeBuilder::new("adjustsp_ib", &formats.unary_imm, 2) + .inst_predicate(InstructionPredicate::new_is_signed_int( + &*formats.unary_imm, + "imm", + 8, + 0, + )) + .emit( + r#" + {{PUT_OP}}(bits, rex1(RU::rsp.into()), sink); + modrm_r_bits(RU::rsp.into(), bits, sink); + let imm: i64 = imm.into(); + sink.put1(imm as u8); + "#, + ), + ); + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("adjustsp_id", &formats.unary_imm, 5) + .inst_predicate(InstructionPredicate::new_is_signed_int( + &*formats.unary_imm, + "imm", + 32, + 0, + )) + .emit( + r#" + {{PUT_OP}}(bits, rex1(RU::rsp.into()), sink); + modrm_r_bits(RU::rsp.into(), bits, sink); + let imm: i64 = imm.into(); + sink.put4(imm as u32); + "#, + ), + ); + } + + recipes.add_recipe( + EncodingRecipeBuilder::new("dummy_sarg_t", &formats.nullary, 0) + .operands_out(vec![Stack::new(gpr)]) + .emit(""), + ); + + // XX+rd id with Abs4 function relocation. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("fnaddr4", &formats.func_addr, 4) + .operands_out(vec![gpr]) + .emit( + r#" + {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink); + sink.reloc_external(func.srclocs[inst], + Reloc::Abs4, + &func.dfg.ext_funcs[func_ref].name, + 0); + sink.put4(0); + "#, + ), + ); + + // XX+rd iq with Abs8 function relocation. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("fnaddr8", &formats.func_addr, 8) + .operands_out(vec![gpr]) + .emit( + r#" + {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink); + sink.reloc_external(func.srclocs[inst], + Reloc::Abs8, + &func.dfg.ext_funcs[func_ref].name, + 0); + sink.put8(0); + "#, + ), + ); + + // Similar to fnaddr4, but writes !0 (this is used by BaldrMonkey). + recipes.add_template_recipe( + EncodingRecipeBuilder::new("allones_fnaddr4", &formats.func_addr, 4) + .operands_out(vec![gpr]) + .emit( + r#" + {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink); + sink.reloc_external(func.srclocs[inst], + Reloc::Abs4, + &func.dfg.ext_funcs[func_ref].name, + 0); + // Write the immediate as `!0` for the benefit of BaldrMonkey. + sink.put4(!0); + "#, + ), + ); + + // Similar to fnaddr8, but writes !0 (this is used by BaldrMonkey). + recipes.add_template_recipe( + EncodingRecipeBuilder::new("allones_fnaddr8", &formats.func_addr, 8) + .operands_out(vec![gpr]) + .emit( + r#" + {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink); + sink.reloc_external(func.srclocs[inst], + Reloc::Abs8, + &func.dfg.ext_funcs[func_ref].name, + 0); + // Write the immediate as `!0` for the benefit of BaldrMonkey. + sink.put8(!0); + "#, + ), + ); + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("pcrel_fnaddr8", &formats.func_addr, 5) + .operands_out(vec![gpr]) + // rex2 gets passed 0 for r/m register because the upper bit of + // r/m doesn't get decoded when in rip-relative addressing mode. + .emit( + r#" + {{PUT_OP}}(bits, rex2(0, out_reg0), sink); + modrm_riprel(out_reg0, sink); + // The addend adjusts for the difference between the end of the + // instruction and the beginning of the immediate field. + sink.reloc_external(func.srclocs[inst], + Reloc::X86PCRel4, + &func.dfg.ext_funcs[func_ref].name, + -4); + sink.put4(0); + "#, + ), + ); + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("got_fnaddr8", &formats.func_addr, 5) + .operands_out(vec![gpr]) + // rex2 gets passed 0 for r/m register because the upper bit of + // r/m doesn't get decoded when in rip-relative addressing mode. + .emit( + r#" + {{PUT_OP}}(bits, rex2(0, out_reg0), sink); + modrm_riprel(out_reg0, sink); + // The addend adjusts for the difference between the end of the + // instruction and the beginning of the immediate field. + sink.reloc_external(func.srclocs[inst], + Reloc::X86GOTPCRel4, + &func.dfg.ext_funcs[func_ref].name, + -4); + sink.put4(0); + "#, + ), + ); + + // XX+rd id with Abs4 globalsym relocation. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("gvaddr4", &formats.unary_global_value, 4) + .operands_out(vec![gpr]) + .emit( + r#" + {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink); + sink.reloc_external(func.srclocs[inst], + Reloc::Abs4, + &func.global_values[global_value].symbol_name(), + 0); + sink.put4(0); + "#, + ), + ); + + // XX+rd iq with Abs8 globalsym relocation. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("gvaddr8", &formats.unary_global_value, 8) + .operands_out(vec![gpr]) + .emit( + r#" + {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink); + sink.reloc_external(func.srclocs[inst], + Reloc::Abs8, + &func.global_values[global_value].symbol_name(), + 0); + sink.put8(0); + "#, + ), + ); + + // XX+rd iq with PCRel4 globalsym relocation. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("pcrel_gvaddr8", &formats.unary_global_value, 5) + .operands_out(vec![gpr]) + .emit( + r#" + {{PUT_OP}}(bits, rex2(0, out_reg0), sink); + modrm_rm(5, out_reg0, sink); + // The addend adjusts for the difference between the end of the + // instruction and the beginning of the immediate field. + sink.reloc_external(func.srclocs[inst], + Reloc::X86PCRel4, + &func.global_values[global_value].symbol_name(), + -4); + sink.put4(0); + "#, + ), + ); + + // XX+rd iq with Abs8 globalsym relocation. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("got_gvaddr8", &formats.unary_global_value, 5) + .operands_out(vec![gpr]) + .emit( + r#" + {{PUT_OP}}(bits, rex2(0, out_reg0), sink); + modrm_rm(5, out_reg0, sink); + // The addend adjusts for the difference between the end of the + // instruction and the beginning of the immediate field. + sink.reloc_external(func.srclocs[inst], + Reloc::X86GOTPCRel4, + &func.global_values[global_value].symbol_name(), + -4); + sink.put4(0); + "#, + ), + ); + + // Stack addresses. + // + // TODO Alternative forms for 8-bit immediates, when applicable. + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("spaddr_id", &formats.stack_load, 6) + .operands_out(vec![gpr]) + .emit( + r#" + let sp = StackRef::sp(stack_slot, &func.stack_slots); + let base = stk_base(sp.base); + {{PUT_OP}}(bits, rex2(base, out_reg0), sink); + modrm_sib_disp32(out_reg0, sink); + sib_noindex(base, sink); + let imm : i32 = offset.into(); + sink.put4(sp.offset.checked_add(imm).unwrap() as u32); + "#, + ), + ); + + // Constant addresses. + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("const_addr", &formats.unary_const, 5) + .operands_out(vec![gpr]) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits, rex2(0, out_reg0), sink); + modrm_riprel(out_reg0, sink); + const_disp4(constant_handle, func, sink); + "#, + ), + ); + + // Store recipes. + + { + // Simple stores. + + // A predicate asking if the offset is zero. + let has_no_offset = + InstructionPredicate::new_is_field_equal(&*formats.store, "offset", "0".into()); + + // XX /r register-indirect store with no offset. + let st = recipes.add_template_recipe( + EncodingRecipeBuilder::new("st", &formats.store, 1) + .operands_in(vec![gpr, gpr]) + .inst_predicate(has_no_offset.clone()) + .clobbers_flags(false) + .compute_size("size_plus_maybe_sib_or_offset_for_inreg_1") + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); + if needs_sib_byte(in_reg1) { + modrm_sib(in_reg0, sink); + sib_noindex(in_reg1, sink); + } else if needs_offset(in_reg1) { + modrm_disp8(in_reg1, in_reg0, sink); + sink.put1(0); + } else { + modrm_rm(in_reg1, in_reg0, sink); + } + "#, + ), + ); + + // XX /r register-indirect store with no offset. + // Only ABCD allowed for stored value. This is for byte stores with no REX. + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("st_abcd", &formats.store, 1) + .operands_in(vec![abcd, gpr]) + .inst_predicate(has_no_offset.clone()) + .clobbers_flags(false) + .compute_size("size_plus_maybe_sib_or_offset_for_inreg_1") + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); + if needs_sib_byte(in_reg1) { + modrm_sib(in_reg0, sink); + sib_noindex(in_reg1, sink); + } else if needs_offset(in_reg1) { + modrm_disp8(in_reg1, in_reg0, sink); + sink.put1(0); + } else { + modrm_rm(in_reg1, in_reg0, sink); + } + "#, + ), + regs, + ) + .when_prefixed(st), + ); + + // XX /r register-indirect store of FPR with no offset. + recipes.add_template_inferred( + EncodingRecipeBuilder::new("fst", &formats.store, 1) + .operands_in(vec![fpr, gpr]) + .inst_predicate(has_no_offset) + .clobbers_flags(false) + .compute_size("size_plus_maybe_sib_or_offset_for_inreg_1") + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); + if needs_sib_byte(in_reg1) { + modrm_sib(in_reg0, sink); + sib_noindex(in_reg1, sink); + } else if needs_offset(in_reg1) { + modrm_disp8(in_reg1, in_reg0, sink); + sink.put1(0); + } else { + modrm_rm(in_reg1, in_reg0, sink); + } + "#, + ), + "size_plus_maybe_sib_or_offset_inreg1_plus_rex_prefix_for_inreg0_inreg1", + ); + + let has_small_offset = + InstructionPredicate::new_is_signed_int(&*formats.store, "offset", 8, 0); + + // XX /r register-indirect store with 8-bit offset. + let st_disp8 = recipes.add_template_recipe( + EncodingRecipeBuilder::new("stDisp8", &formats.store, 2) + .operands_in(vec![gpr, gpr]) + .inst_predicate(has_small_offset.clone()) + .clobbers_flags(false) + .compute_size("size_plus_maybe_sib_for_inreg_1") + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); + if needs_sib_byte(in_reg1) { + modrm_sib_disp8(in_reg0, sink); + sib_noindex(in_reg1, sink); + } else { + modrm_disp8(in_reg1, in_reg0, sink); + } + let offset: i32 = offset.into(); + sink.put1(offset as u8); + "#, + ), + ); + + // XX /r register-indirect store with 8-bit offset. + // Only ABCD allowed for stored value. This is for byte stores with no REX. + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("stDisp8_abcd", &formats.store, 2) + .operands_in(vec![abcd, gpr]) + .inst_predicate(has_small_offset.clone()) + .clobbers_flags(false) + .compute_size("size_plus_maybe_sib_for_inreg_1") + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); + if needs_sib_byte(in_reg1) { + modrm_sib_disp8(in_reg0, sink); + sib_noindex(in_reg1, sink); + } else { + modrm_disp8(in_reg1, in_reg0, sink); + } + let offset: i32 = offset.into(); + sink.put1(offset as u8); + "#, + ), + regs, + ) + .when_prefixed(st_disp8), + ); + + // XX /r register-indirect store with 8-bit offset of FPR. + recipes.add_template_inferred( + EncodingRecipeBuilder::new("fstDisp8", &formats.store, 2) + .operands_in(vec![fpr, gpr]) + .inst_predicate(has_small_offset) + .clobbers_flags(false) + .compute_size("size_plus_maybe_sib_for_inreg_1") + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); + if needs_sib_byte(in_reg1) { + modrm_sib_disp8(in_reg0, sink); + sib_noindex(in_reg1, sink); + } else { + modrm_disp8(in_reg1, in_reg0, sink); + } + let offset: i32 = offset.into(); + sink.put1(offset as u8); + "#, + ), + "size_plus_maybe_sib_inreg1_plus_rex_prefix_for_inreg0_inreg1", + ); + + // XX /r register-indirect store with 32-bit offset. + let st_disp32 = recipes.add_template_recipe( + EncodingRecipeBuilder::new("stDisp32", &formats.store, 5) + .operands_in(vec![gpr, gpr]) + .clobbers_flags(false) + .compute_size("size_plus_maybe_sib_for_inreg_1") + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); + if needs_sib_byte(in_reg1) { + modrm_sib_disp32(in_reg0, sink); + sib_noindex(in_reg1, sink); + } else { + modrm_disp32(in_reg1, in_reg0, sink); + } + let offset: i32 = offset.into(); + sink.put4(offset as u32); + "#, + ), + ); + + // XX /r register-indirect store with 32-bit offset. + // Only ABCD allowed for stored value. This is for byte stores with no REX. + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("stDisp32_abcd", &formats.store, 5) + .operands_in(vec![abcd, gpr]) + .clobbers_flags(false) + .compute_size("size_plus_maybe_sib_for_inreg_1") + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); + if needs_sib_byte(in_reg1) { + modrm_sib_disp32(in_reg0, sink); + sib_noindex(in_reg1, sink); + } else { + modrm_disp32(in_reg1, in_reg0, sink); + } + let offset: i32 = offset.into(); + sink.put4(offset as u32); + "#, + ), + regs, + ) + .when_prefixed(st_disp32), + ); + + // XX /r register-indirect store with 32-bit offset of FPR. + recipes.add_template_inferred( + EncodingRecipeBuilder::new("fstDisp32", &formats.store, 5) + .operands_in(vec![fpr, gpr]) + .clobbers_flags(false) + .compute_size("size_plus_maybe_sib_for_inreg_1") + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); + if needs_sib_byte(in_reg1) { + modrm_sib_disp32(in_reg0, sink); + sib_noindex(in_reg1, sink); + } else { + modrm_disp32(in_reg1, in_reg0, sink); + } + let offset: i32 = offset.into(); + sink.put4(offset as u32); + "#, + ), + "size_plus_maybe_sib_inreg1_plus_rex_prefix_for_inreg0_inreg1", + ); + } + + { + // Complex stores. + + // A predicate asking if the offset is zero. + let has_no_offset = + InstructionPredicate::new_is_field_equal(&*formats.store_complex, "offset", "0".into()); + + // XX /r register-indirect store with index and no offset. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("stWithIndex", &formats.store_complex, 2) + .operands_in(vec![gpr, gpr, gpr]) + .inst_predicate(has_no_offset.clone()) + .clobbers_flags(false) + .compute_size("size_plus_maybe_offset_for_inreg_1") + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink); + // The else branch always inserts an SIB byte. + if needs_offset(in_reg1) { + modrm_sib_disp8(in_reg0, sink); + sib(0, in_reg2, in_reg1, sink); + sink.put1(0); + } else { + modrm_sib(in_reg0, sink); + sib(0, in_reg2, in_reg1, sink); + } + "#, + ), + ); + + // XX /r register-indirect store with index and no offset. + // Only ABCD allowed for stored value. This is for byte stores with no REX. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("stWithIndex_abcd", &formats.store_complex, 2) + .operands_in(vec![abcd, gpr, gpr]) + .inst_predicate(has_no_offset.clone()) + .clobbers_flags(false) + .compute_size("size_plus_maybe_offset_for_inreg_1") + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink); + // The else branch always inserts an SIB byte. + if needs_offset(in_reg1) { + modrm_sib_disp8(in_reg0, sink); + sib(0, in_reg2, in_reg1, sink); + sink.put1(0); + } else { + modrm_sib(in_reg0, sink); + sib(0, in_reg2, in_reg1, sink); + } + "#, + ), + ); + + // XX /r register-indirect store with index and no offset of FPR. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("fstWithIndex", &formats.store_complex, 2) + .operands_in(vec![fpr, gpr, gpr]) + .inst_predicate(has_no_offset) + .clobbers_flags(false) + .compute_size("size_plus_maybe_offset_for_inreg_1") + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink); + // The else branch always inserts an SIB byte. + if needs_offset(in_reg1) { + modrm_sib_disp8(in_reg0, sink); + sib(0, in_reg2, in_reg1, sink); + sink.put1(0); + } else { + modrm_sib(in_reg0, sink); + sib(0, in_reg2, in_reg1, sink); + } + "#, + ), + ); + + let has_small_offset = + InstructionPredicate::new_is_signed_int(&*formats.store_complex, "offset", 8, 0); + + // XX /r register-indirect store with index and 8-bit offset. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("stWithIndexDisp8", &formats.store_complex, 3) + .operands_in(vec![gpr, gpr, gpr]) + .inst_predicate(has_small_offset.clone()) + .clobbers_flags(false) + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink); + modrm_sib_disp8(in_reg0, sink); + sib(0, in_reg2, in_reg1, sink); + let offset: i32 = offset.into(); + sink.put1(offset as u8); + "#, + ), + ); + + // XX /r register-indirect store with index and 8-bit offset. + // Only ABCD allowed for stored value. This is for byte stores with no REX. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("stWithIndexDisp8_abcd", &formats.store_complex, 3) + .operands_in(vec![abcd, gpr, gpr]) + .inst_predicate(has_small_offset.clone()) + .clobbers_flags(false) + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink); + modrm_sib_disp8(in_reg0, sink); + sib(0, in_reg2, in_reg1, sink); + let offset: i32 = offset.into(); + sink.put1(offset as u8); + "#, + ), + ); + + // XX /r register-indirect store with index and 8-bit offset of FPR. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("fstWithIndexDisp8", &formats.store_complex, 3) + .operands_in(vec![fpr, gpr, gpr]) + .inst_predicate(has_small_offset) + .clobbers_flags(false) + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink); + modrm_sib_disp8(in_reg0, sink); + sib(0, in_reg2, in_reg1, sink); + let offset: i32 = offset.into(); + sink.put1(offset as u8); + "#, + ), + ); + + let has_big_offset = + InstructionPredicate::new_is_signed_int(&*formats.store_complex, "offset", 32, 0); + + // XX /r register-indirect store with index and 32-bit offset. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("stWithIndexDisp32", &formats.store_complex, 6) + .operands_in(vec![gpr, gpr, gpr]) + .inst_predicate(has_big_offset.clone()) + .clobbers_flags(false) + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink); + modrm_sib_disp32(in_reg0, sink); + sib(0, in_reg2, in_reg1, sink); + let offset: i32 = offset.into(); + sink.put4(offset as u32); + "#, + ), + ); + + // XX /r register-indirect store with index and 32-bit offset. + // Only ABCD allowed for stored value. This is for byte stores with no REX. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("stWithIndexDisp32_abcd", &formats.store_complex, 6) + .operands_in(vec![abcd, gpr, gpr]) + .inst_predicate(has_big_offset.clone()) + .clobbers_flags(false) + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink); + modrm_sib_disp32(in_reg0, sink); + sib(0, in_reg2, in_reg1, sink); + let offset: i32 = offset.into(); + sink.put4(offset as u32); + "#, + ), + ); + + // XX /r register-indirect store with index and 32-bit offset of FPR. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("fstWithIndexDisp32", &formats.store_complex, 6) + .operands_in(vec![fpr, gpr, gpr]) + .inst_predicate(has_big_offset) + .clobbers_flags(false) + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink); + modrm_sib_disp32(in_reg0, sink); + sib(0, in_reg2, in_reg1, sink); + let offset: i32 = offset.into(); + sink.put4(offset as u32); + "#, + ), + ); + } + + // Unary spill with SIB and 32-bit displacement. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("spillSib32", &formats.unary, 6) + .operands_in(vec![gpr]) + .operands_out(vec![stack_gpr32]) + .clobbers_flags(false) + .emit( + r#" + sink.trap(TrapCode::StackOverflow, func.srclocs[inst]); + let base = stk_base(out_stk0.base); + {{PUT_OP}}(bits, rex2(base, in_reg0), sink); + modrm_sib_disp32(in_reg0, sink); + sib_noindex(base, sink); + sink.put4(out_stk0.offset as u32); + "#, + ), + ); + + // Like spillSib32, but targeting an FPR rather than a GPR. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("fspillSib32", &formats.unary, 6) + .operands_in(vec![fpr]) + .operands_out(vec![stack_fpr32]) + .clobbers_flags(false) + .emit( + r#" + sink.trap(TrapCode::StackOverflow, func.srclocs[inst]); + let base = stk_base(out_stk0.base); + {{PUT_OP}}(bits, rex2(base, in_reg0), sink); + modrm_sib_disp32(in_reg0, sink); + sib_noindex(base, sink); + sink.put4(out_stk0.offset as u32); + "#, + ), + ); + + // Regspill using RSP-relative addressing. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("regspill32", &formats.reg_spill, 6) + .operands_in(vec![gpr]) + .clobbers_flags(false) + .emit( + r#" + sink.trap(TrapCode::StackOverflow, func.srclocs[inst]); + let dst = StackRef::sp(dst, &func.stack_slots); + let base = stk_base(dst.base); + {{PUT_OP}}(bits, rex2(base, src), sink); + modrm_sib_disp32(src, sink); + sib_noindex(base, sink); + sink.put4(dst.offset as u32); + "#, + ), + ); + + // Like regspill32, but targeting an FPR rather than a GPR. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("fregspill32", &formats.reg_spill, 6) + .operands_in(vec![fpr]) + .clobbers_flags(false) + .emit( + r#" + sink.trap(TrapCode::StackOverflow, func.srclocs[inst]); + let dst = StackRef::sp(dst, &func.stack_slots); + let base = stk_base(dst.base); + {{PUT_OP}}(bits, rex2(base, src), sink); + modrm_sib_disp32(src, sink); + sib_noindex(base, sink); + sink.put4(dst.offset as u32); + "#, + ), + ); + + // Load recipes. + + { + // Simple loads. + + // A predicate asking if the offset is zero. + let has_no_offset = + InstructionPredicate::new_is_field_equal(&*formats.load, "offset", "0".into()); + + // XX /r load with no offset. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("ld", &formats.load, 1) + .operands_in(vec![gpr]) + .operands_out(vec![gpr]) + .inst_predicate(has_no_offset.clone()) + .clobbers_flags(false) + .compute_size("size_plus_maybe_sib_or_offset_for_inreg_0") + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); + if needs_sib_byte(in_reg0) { + modrm_sib(out_reg0, sink); + sib_noindex(in_reg0, sink); + } else if needs_offset(in_reg0) { + modrm_disp8(in_reg0, out_reg0, sink); + sink.put1(0); + } else { + modrm_rm(in_reg0, out_reg0, sink); + } + "#, + ), + ); + + // XX /r float load with no offset. + recipes.add_template_inferred( + EncodingRecipeBuilder::new("fld", &formats.load, 1) + .operands_in(vec![gpr]) + .operands_out(vec![fpr]) + .inst_predicate(has_no_offset) + .clobbers_flags(false) + .compute_size("size_plus_maybe_sib_or_offset_for_inreg_0") + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); + if needs_sib_byte(in_reg0) { + modrm_sib(out_reg0, sink); + sib_noindex(in_reg0, sink); + } else if needs_offset(in_reg0) { + modrm_disp8(in_reg0, out_reg0, sink); + sink.put1(0); + } else { + modrm_rm(in_reg0, out_reg0, sink); + } + "#, + ), + "size_plus_maybe_sib_or_offset_for_inreg_0_plus_rex_prefix_for_inreg0_outreg0", + ); + + let has_small_offset = + InstructionPredicate::new_is_signed_int(&*formats.load, "offset", 8, 0); + + // XX /r load with 8-bit offset. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("ldDisp8", &formats.load, 2) + .operands_in(vec![gpr]) + .operands_out(vec![gpr]) + .inst_predicate(has_small_offset.clone()) + .clobbers_flags(false) + .compute_size("size_plus_maybe_sib_for_inreg_0") + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); + if needs_sib_byte(in_reg0) { + modrm_sib_disp8(out_reg0, sink); + sib_noindex(in_reg0, sink); + } else { + modrm_disp8(in_reg0, out_reg0, sink); + } + let offset: i32 = offset.into(); + sink.put1(offset as u8); + "#, + ), + ); + + // XX /r float load with 8-bit offset. + recipes.add_template_inferred( + EncodingRecipeBuilder::new("fldDisp8", &formats.load, 2) + .operands_in(vec![gpr]) + .operands_out(vec![fpr]) + .inst_predicate(has_small_offset) + .clobbers_flags(false) + .compute_size("size_plus_maybe_sib_for_inreg_0") + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); + if needs_sib_byte(in_reg0) { + modrm_sib_disp8(out_reg0, sink); + sib_noindex(in_reg0, sink); + } else { + modrm_disp8(in_reg0, out_reg0, sink); + } + let offset: i32 = offset.into(); + sink.put1(offset as u8); + "#, + ), + "size_plus_maybe_sib_for_inreg_0_plus_rex_prefix_for_inreg0_outreg0", + ); + + let has_big_offset = + InstructionPredicate::new_is_signed_int(&*formats.load, "offset", 32, 0); + + // XX /r load with 32-bit offset. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("ldDisp32", &formats.load, 5) + .operands_in(vec![gpr]) + .operands_out(vec![gpr]) + .inst_predicate(has_big_offset.clone()) + .clobbers_flags(false) + .compute_size("size_plus_maybe_sib_for_inreg_0") + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); + if needs_sib_byte(in_reg0) { + modrm_sib_disp32(out_reg0, sink); + sib_noindex(in_reg0, sink); + } else { + modrm_disp32(in_reg0, out_reg0, sink); + } + let offset: i32 = offset.into(); + sink.put4(offset as u32); + "#, + ), + ); + + // XX /r float load with 32-bit offset. + recipes.add_template_inferred( + EncodingRecipeBuilder::new("fldDisp32", &formats.load, 5) + .operands_in(vec![gpr]) + .operands_out(vec![fpr]) + .inst_predicate(has_big_offset) + .clobbers_flags(false) + .compute_size("size_plus_maybe_sib_for_inreg_0") + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); + if needs_sib_byte(in_reg0) { + modrm_sib_disp32(out_reg0, sink); + sib_noindex(in_reg0, sink); + } else { + modrm_disp32(in_reg0, out_reg0, sink); + } + let offset: i32 = offset.into(); + sink.put4(offset as u32); + "#, + ), + "size_plus_maybe_sib_for_inreg_0_plus_rex_prefix_for_inreg0_outreg0", + ); + } + + { + // Complex loads. + + // A predicate asking if the offset is zero. + let has_no_offset = + InstructionPredicate::new_is_field_equal(&*formats.load_complex, "offset", "0".into()); + + // XX /r load with index and no offset. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("ldWithIndex", &formats.load_complex, 2) + .operands_in(vec![gpr, gpr]) + .operands_out(vec![gpr]) + .inst_predicate(has_no_offset.clone()) + .clobbers_flags(false) + .compute_size("size_plus_maybe_offset_for_inreg_0") + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink); + // The else branch always inserts an SIB byte. + if needs_offset(in_reg0) { + modrm_sib_disp8(out_reg0, sink); + sib(0, in_reg1, in_reg0, sink); + sink.put1(0); + } else { + modrm_sib(out_reg0, sink); + sib(0, in_reg1, in_reg0, sink); + } + "#, + ), + ); + + // XX /r float load with index and no offset. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("fldWithIndex", &formats.load_complex, 2) + .operands_in(vec![gpr, gpr]) + .operands_out(vec![fpr]) + .inst_predicate(has_no_offset) + .clobbers_flags(false) + .compute_size("size_plus_maybe_offset_for_inreg_0") + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink); + // The else branch always inserts an SIB byte. + if needs_offset(in_reg0) { + modrm_sib_disp8(out_reg0, sink); + sib(0, in_reg1, in_reg0, sink); + sink.put1(0); + } else { + modrm_sib(out_reg0, sink); + sib(0, in_reg1, in_reg0, sink); + } + "#, + ), + ); + + let has_small_offset = + InstructionPredicate::new_is_signed_int(&*formats.load_complex, "offset", 8, 0); + + // XX /r load with index and 8-bit offset. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("ldWithIndexDisp8", &formats.load_complex, 3) + .operands_in(vec![gpr, gpr]) + .operands_out(vec![gpr]) + .inst_predicate(has_small_offset.clone()) + .clobbers_flags(false) + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink); + modrm_sib_disp8(out_reg0, sink); + sib(0, in_reg1, in_reg0, sink); + let offset: i32 = offset.into(); + sink.put1(offset as u8); + "#, + ), + ); + + // XX /r float load with 8-bit offset. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("fldWithIndexDisp8", &formats.load_complex, 3) + .operands_in(vec![gpr, gpr]) + .operands_out(vec![fpr]) + .inst_predicate(has_small_offset) + .clobbers_flags(false) + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink); + modrm_sib_disp8(out_reg0, sink); + sib(0, in_reg1, in_reg0, sink); + let offset: i32 = offset.into(); + sink.put1(offset as u8); + "#, + ), + ); + + let has_big_offset = + InstructionPredicate::new_is_signed_int(&*formats.load_complex, "offset", 32, 0); + + // XX /r load with index and 32-bit offset. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("ldWithIndexDisp32", &formats.load_complex, 6) + .operands_in(vec![gpr, gpr]) + .operands_out(vec![gpr]) + .inst_predicate(has_big_offset.clone()) + .clobbers_flags(false) + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink); + modrm_sib_disp32(out_reg0, sink); + sib(0, in_reg1, in_reg0, sink); + let offset: i32 = offset.into(); + sink.put4(offset as u32); + "#, + ), + ); + + // XX /r float load with index and 32-bit offset. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("fldWithIndexDisp32", &formats.load_complex, 6) + .operands_in(vec![gpr, gpr]) + .operands_out(vec![fpr]) + .inst_predicate(has_big_offset) + .clobbers_flags(false) + .emit( + r#" + if !flags.notrap() { + sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); + } + {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink); + modrm_sib_disp32(out_reg0, sink); + sib(0, in_reg1, in_reg0, sink); + let offset: i32 = offset.into(); + sink.put4(offset as u32); + "#, + ), + ); + } + + // Unary fill with SIB and 32-bit displacement. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("fillSib32", &formats.unary, 6) + .operands_in(vec![stack_gpr32]) + .operands_out(vec![gpr]) + .clobbers_flags(false) + .emit( + r#" + let base = stk_base(in_stk0.base); + {{PUT_OP}}(bits, rex2(base, out_reg0), sink); + modrm_sib_disp32(out_reg0, sink); + sib_noindex(base, sink); + sink.put4(in_stk0.offset as u32); + "#, + ), + ); + + // Like fillSib32, but targeting an FPR rather than a GPR. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("ffillSib32", &formats.unary, 6) + .operands_in(vec![stack_fpr32]) + .operands_out(vec![fpr]) + .clobbers_flags(false) + .emit( + r#" + let base = stk_base(in_stk0.base); + {{PUT_OP}}(bits, rex2(base, out_reg0), sink); + modrm_sib_disp32(out_reg0, sink); + sib_noindex(base, sink); + sink.put4(in_stk0.offset as u32); + "#, + ), + ); + + // Regfill with RSP-relative 32-bit displacement. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("regfill32", &formats.reg_fill, 6) + .operands_in(vec![stack_gpr32]) + .clobbers_flags(false) + .emit( + r#" + let src = StackRef::sp(src, &func.stack_slots); + let base = stk_base(src.base); + {{PUT_OP}}(bits, rex2(base, dst), sink); + modrm_sib_disp32(dst, sink); + sib_noindex(base, sink); + sink.put4(src.offset as u32); + "#, + ), + ); + + // Like regfill32, but targeting an FPR rather than a GPR. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("fregfill32", &formats.reg_fill, 6) + .operands_in(vec![stack_fpr32]) + .clobbers_flags(false) + .emit( + r#" + let src = StackRef::sp(src, &func.stack_slots); + let base = stk_base(src.base); + {{PUT_OP}}(bits, rex2(base, dst), sink); + modrm_sib_disp32(dst, sink); + sib_noindex(base, sink); + sink.put4(src.offset as u32); + "#, + ), + ); + + // Call/return. + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("call_id", &formats.call, 4).emit( + r#" + sink.trap(TrapCode::StackOverflow, func.srclocs[inst]); + {{PUT_OP}}(bits, BASE_REX, sink); + // The addend adjusts for the difference between the end of the + // instruction and the beginning of the immediate field. + sink.reloc_external(func.srclocs[inst], + Reloc::X86CallPCRel4, + &func.dfg.ext_funcs[func_ref].name, + -4); + sink.put4(0); + sink.add_call_site(opcode, func.srclocs[inst]); + "#, + ), + ); + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("call_plt_id", &formats.call, 4).emit( + r#" + sink.trap(TrapCode::StackOverflow, func.srclocs[inst]); + {{PUT_OP}}(bits, BASE_REX, sink); + sink.reloc_external(func.srclocs[inst], + Reloc::X86CallPLTRel4, + &func.dfg.ext_funcs[func_ref].name, + -4); + sink.put4(0); + sink.add_call_site(opcode, func.srclocs[inst]); + "#, + ), + ); + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("call_r", &formats.call_indirect, 1) + .operands_in(vec![gpr]) + .emit( + r#" + sink.trap(TrapCode::StackOverflow, func.srclocs[inst]); + {{PUT_OP}}(bits, rex1(in_reg0), sink); + modrm_r_bits(in_reg0, bits, sink); + sink.add_call_site(opcode, func.srclocs[inst]); + "#, + ), + ); + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("ret", &formats.multiary, 0) + .emit("{{PUT_OP}}(bits, BASE_REX, sink);"), + ); + + // Branches. + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("jmpb", &formats.jump, 1) + .branch_range((1, 8)) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits, BASE_REX, sink); + disp1(destination, func, sink); + "#, + ), + ); + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("jmpd", &formats.jump, 4) + .branch_range((4, 32)) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits, BASE_REX, sink); + disp4(destination, func, sink); + "#, + ), + ); + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("brib", &formats.branch_int, 1) + .operands_in(vec![reg_rflags]) + .branch_range((1, 8)) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits | icc2opc(cond), BASE_REX, sink); + disp1(destination, func, sink); + "#, + ), + ); + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("brid", &formats.branch_int, 4) + .operands_in(vec![reg_rflags]) + .branch_range((4, 32)) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits | icc2opc(cond), BASE_REX, sink); + disp4(destination, func, sink); + "#, + ), + ); + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("brfb", &formats.branch_float, 1) + .operands_in(vec![reg_rflags]) + .branch_range((1, 8)) + .clobbers_flags(false) + .inst_predicate(supported_floatccs_predicate( + &supported_floatccs, + &*formats.branch_float, + )) + .emit( + r#" + {{PUT_OP}}(bits | fcc2opc(cond), BASE_REX, sink); + disp1(destination, func, sink); + "#, + ), + ); + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("brfd", &formats.branch_float, 4) + .operands_in(vec![reg_rflags]) + .branch_range((4, 32)) + .clobbers_flags(false) + .inst_predicate(supported_floatccs_predicate( + &supported_floatccs, + &*formats.branch_float, + )) + .emit( + r#" + {{PUT_OP}}(bits | fcc2opc(cond), BASE_REX, sink); + disp4(destination, func, sink); + "#, + ), + ); + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("indirect_jmp", &formats.indirect_jump, 1) + .operands_in(vec![gpr]) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits, rex1(in_reg0), sink); + modrm_r_bits(in_reg0, bits, sink); + "#, + ), + ); + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("jt_entry", &formats.branch_table_entry, 2) + .operands_in(vec![gpr, gpr]) + .operands_out(vec![gpr]) + .clobbers_flags(false) + .inst_predicate(valid_scale(&*formats.branch_table_entry)) + .compute_size("size_plus_maybe_offset_for_inreg_1") + .emit( + r#" + {{PUT_OP}}(bits, rex3(in_reg1, out_reg0, in_reg0), sink); + if needs_offset(in_reg1) { + modrm_sib_disp8(out_reg0, sink); + sib(imm.trailing_zeros() as u8, in_reg0, in_reg1, sink); + sink.put1(0); + } else { + modrm_sib(out_reg0, sink); + sib(imm.trailing_zeros() as u8, in_reg0, in_reg1, sink); + } + "#, + ), + ); + + recipes.add_template_inferred( + EncodingRecipeBuilder::new("vconst", &formats.unary_const, 5) + .operands_out(vec![fpr]) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits, rex2(0, out_reg0), sink); + modrm_riprel(out_reg0, sink); + const_disp4(constant_handle, func, sink); + "#, + ), + "size_with_inferred_rex_for_outreg0", + ); + + recipes.add_template_inferred( + EncodingRecipeBuilder::new("vconst_optimized", &formats.unary_const, 1) + .operands_out(vec![fpr]) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits, rex2(out_reg0, out_reg0), sink); + modrm_rr(out_reg0, out_reg0, sink); + "#, + ), + "size_with_inferred_rex_for_outreg0", + ); + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("jt_base", &formats.branch_table_base, 5) + .operands_out(vec![gpr]) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits, rex2(0, out_reg0), sink); + modrm_riprel(out_reg0, sink); + + // No reloc is needed here as the jump table is emitted directly after + // the function body. + jt_disp4(table, func, sink); + "#, + ), + ); + + // Test flags and set a register. + // + // These setCC instructions only set the low 8 bits, and they can only write ABCD registers + // without a REX prefix. + // + // Other instruction encodings accepting `b1` inputs have the same constraints and only look at + // the low 8 bits of the input register. + + let seti = recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("seti", &formats.int_cond, 1) + .operands_in(vec![reg_rflags]) + .operands_out(vec![gpr]) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits | icc2opc(cond), rex1(out_reg0), sink); + modrm_r_bits(out_reg0, bits, sink); + "#, + ), + regs, + ) + .rex_kind(RecipePrefixKind::AlwaysEmitRex), + ); + + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("seti_abcd", &formats.int_cond, 1) + .operands_in(vec![reg_rflags]) + .operands_out(vec![abcd]) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits | icc2opc(cond), rex1(out_reg0), sink); + modrm_r_bits(out_reg0, bits, sink); + "#, + ), + regs, + ) + .when_prefixed(seti), + ); + + let setf = recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("setf", &formats.float_cond, 1) + .operands_in(vec![reg_rflags]) + .operands_out(vec![gpr]) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits | fcc2opc(cond), rex1(out_reg0), sink); + modrm_r_bits(out_reg0, bits, sink); + "#, + ), + regs, + ) + .rex_kind(RecipePrefixKind::AlwaysEmitRex), + ); + + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("setf_abcd", &formats.float_cond, 1) + .operands_in(vec![reg_rflags]) + .operands_out(vec![abcd]) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits | fcc2opc(cond), rex1(out_reg0), sink); + modrm_r_bits(out_reg0, bits, sink); + "#, + ), + regs, + ) + .when_prefixed(setf), + ); + + // Conditional move (a.k.a integer select) + // (maybe-REX.W) 0F 4x modrm(r,r) + // 1 byte, modrm(r,r), is after the opcode + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("cmov", &formats.int_select, 1) + .operands_in(vec![ + OperandConstraint::FixedReg(reg_rflags), + OperandConstraint::RegClass(gpr), + OperandConstraint::RegClass(gpr), + ]) + .operands_out(vec![2]) + .clobbers_flags(false) + .emit( + r#" + {{PUT_OP}}(bits | icc2opc(cond), rex2(in_reg1, in_reg2), sink); + modrm_rr(in_reg1, in_reg2, sink); + "#, + ), + regs, + ) + .inferred_rex_compute_size("size_with_inferred_rex_for_cmov"), + ); + + // Bit scan forwards and reverse + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("bsf_and_bsr", &formats.unary, 1) + .operands_in(vec![gpr]) + .operands_out(vec![ + OperandConstraint::RegClass(gpr), + OperandConstraint::FixedReg(reg_rflags), + ]) + .emit( + r#" + {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink); + modrm_rr(in_reg0, out_reg0, sink); + "#, + ), + regs, + ) + .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_outreg0"), + ); + + // Arithematic with flag I/O. + + // XX /r, MR form. Add two GPR registers and set carry flag. + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("rout", &formats.binary, 1) + .operands_in(vec![gpr, gpr]) + .operands_out(vec![ + OperandConstraint::TiedInput(0), + OperandConstraint::FixedReg(reg_rflags), + ]) + .clobbers_flags(true) + .emit( + r#" + {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink); + modrm_rr(in_reg0, in_reg1, sink); + "#, + ), + regs, + ) + .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_inreg1"), + ); + + // XX /r, MR form. Add two GPR registers and get carry flag. + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("rin", &formats.ternary, 1) + .operands_in(vec![ + OperandConstraint::RegClass(gpr), + OperandConstraint::RegClass(gpr), + OperandConstraint::FixedReg(reg_rflags), + ]) + .operands_out(vec![0]) + .clobbers_flags(true) + .emit( + r#" + {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink); + modrm_rr(in_reg0, in_reg1, sink); + "#, + ), + regs, + ) + .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_inreg1"), + ); + + // XX /r, MR form. Add two GPR registers with carry flag. + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("rio", &formats.ternary, 1) + .operands_in(vec![ + OperandConstraint::RegClass(gpr), + OperandConstraint::RegClass(gpr), + OperandConstraint::FixedReg(reg_rflags), + ]) + .operands_out(vec![ + OperandConstraint::TiedInput(0), + OperandConstraint::FixedReg(reg_rflags), + ]) + .clobbers_flags(true) + .emit( + r#" + {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink); + modrm_rr(in_reg0, in_reg1, sink); + "#, + ), + regs, + ) + .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_inreg1"), + ); + + // Compare and set flags. + + // XX /r, MR form. Compare two GPR registers and set flags. + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("rcmp", &formats.binary, 1) + .operands_in(vec![gpr, gpr]) + .operands_out(vec![reg_rflags]) + .emit( + r#" + {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink); + modrm_rr(in_reg0, in_reg1, sink); + "#, + ), + regs, + ) + .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_inreg1"), + ); + + // Same as rcmp, but second operand is the stack pointer. + recipes.add_template_recipe( + EncodingRecipeBuilder::new("rcmp_sp", &formats.unary, 1) + .operands_in(vec![gpr]) + .operands_out(vec![reg_rflags]) + .emit( + r#" + {{PUT_OP}}(bits, rex2(in_reg0, RU::rsp.into()), sink); + modrm_rr(in_reg0, RU::rsp.into(), sink); + "#, + ), + ); + + // XX /r, RM form. Compare two FPR registers and set flags. + recipes.add_template_inferred( + EncodingRecipeBuilder::new("fcmp", &formats.binary, 1) + .operands_in(vec![fpr, fpr]) + .operands_out(vec![reg_rflags]) + .emit( + r#" + {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); + modrm_rr(in_reg1, in_reg0, sink); + "#, + ), + "size_with_inferred_rex_for_inreg0_inreg1", + ); + + { + let has_small_offset = + InstructionPredicate::new_is_signed_int(&*formats.binary_imm64, "imm", 8, 0); + + // XX /n, MI form with imm8. + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("rcmp_ib", &formats.binary_imm64, 2) + .operands_in(vec![gpr]) + .operands_out(vec![reg_rflags]) + .inst_predicate(has_small_offset) + .emit( + r#" + {{PUT_OP}}(bits, rex1(in_reg0), sink); + modrm_r_bits(in_reg0, bits, sink); + let imm: i64 = imm.into(); + sink.put1(imm as u8); + "#, + ), + regs, + ) + .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"), + ); + + let has_big_offset = + InstructionPredicate::new_is_signed_int(&*formats.binary_imm64, "imm", 32, 0); + + // XX /n, MI form with imm32. + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("rcmp_id", &formats.binary_imm64, 5) + .operands_in(vec![gpr]) + .operands_out(vec![reg_rflags]) + .inst_predicate(has_big_offset) + .emit( + r#" + {{PUT_OP}}(bits, rex1(in_reg0), sink); + modrm_r_bits(in_reg0, bits, sink); + let imm: i64 = imm.into(); + sink.put4(imm as u32); + "#, + ), + regs, + ) + .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"), + ); + } + + // Test-and-branch. + // + // This recipe represents the macro fusion of a test and a conditional branch. + // This serves two purposes: + // + // 1. Guarantee that the test and branch get scheduled next to each other so + // macro fusion is guaranteed to be possible. + // 2. Hide the status flags from Cranelift which doesn't currently model flags. + // + // The encoding bits affect both the test and the branch instruction: + // + // Bits 0-7 are the Jcc opcode. + // Bits 8-15 control the test instruction which always has opcode byte 0x85. + + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("tjccb", &formats.branch, 1 + 2) + .operands_in(vec![gpr]) + .branch_range((3, 8)) + .emit( + r#" + // test r, r. + {{PUT_OP}}((bits & 0xff00) | 0x85, rex2(in_reg0, in_reg0), sink); + modrm_rr(in_reg0, in_reg0, sink); + // Jcc instruction. + sink.put1(bits as u8); + disp1(destination, func, sink); + "#, + ), + regs, + ) + .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"), + ); + + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("tjccd", &formats.branch, 1 + 6) + .operands_in(vec![gpr]) + .branch_range((7, 32)) + .emit( + r#" + // test r, r. + {{PUT_OP}}((bits & 0xff00) | 0x85, rex2(in_reg0, in_reg0), sink); + modrm_rr(in_reg0, in_reg0, sink); + // Jcc instruction. + sink.put1(0x0f); + sink.put1(bits as u8); + disp4(destination, func, sink); + "#, + ), + regs, + ) + .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"), + ); + + // 8-bit test-and-branch. + + let t8jccb = recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("t8jccb", &formats.branch, 1 + 2) + .operands_in(vec![gpr]) + .branch_range((3, 8)) + .emit( + r#" + // test8 r, r. + {{PUT_OP}}((bits & 0xff00) | 0x84, rex2(in_reg0, in_reg0), sink); + modrm_rr(in_reg0, in_reg0, sink); + // Jcc instruction. + sink.put1(bits as u8); + disp1(destination, func, sink); + "#, + ), + regs, + ) + .rex_kind(RecipePrefixKind::AlwaysEmitRex), + ); + + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("t8jccb_abcd", &formats.branch, 1 + 2) + .operands_in(vec![abcd]) + .branch_range((3, 8)) + .emit( + r#" + // test8 r, r. + {{PUT_OP}}((bits & 0xff00) | 0x84, rex2(in_reg0, in_reg0), sink); + modrm_rr(in_reg0, in_reg0, sink); + // Jcc instruction. + sink.put1(bits as u8); + disp1(destination, func, sink); + "#, + ), + regs, + ) + .when_prefixed(t8jccb), + ); + + let t8jccd = recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("t8jccd", &formats.branch, 1 + 6) + .operands_in(vec![gpr]) + .branch_range((7, 32)) + .emit( + r#" + // test8 r, r. + {{PUT_OP}}((bits & 0xff00) | 0x84, rex2(in_reg0, in_reg0), sink); + modrm_rr(in_reg0, in_reg0, sink); + // Jcc instruction. + sink.put1(0x0f); + sink.put1(bits as u8); + disp4(destination, func, sink); + "#, + ), + regs, + ) + .rex_kind(RecipePrefixKind::AlwaysEmitRex), + ); + + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("t8jccd_abcd", &formats.branch, 1 + 6) + .operands_in(vec![abcd]) + .branch_range((7, 32)) + .emit( + r#" + // test8 r, r. + {{PUT_OP}}((bits & 0xff00) | 0x84, rex2(in_reg0, in_reg0), sink); + modrm_rr(in_reg0, in_reg0, sink); + // Jcc instruction. + sink.put1(0x0f); + sink.put1(bits as u8); + disp4(destination, func, sink); + "#, + ), + regs, + ) + .when_prefixed(t8jccd), + ); + + // Worst case test-and-branch recipe for brz.b1 and brnz.b1 in 32-bit mode. + // The register allocator can't handle a branch instruction with constrained + // operands like the t8jccd_abcd above. This variant can accept the b1 opernd in + // any register, but is is larger because it uses a 32-bit test instruction with + // a 0xff immediate. + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("t8jccd_long", &formats.branch, 5 + 6) + .operands_in(vec![gpr]) + .branch_range((11, 32)) + .emit( + r#" + // test32 r, 0xff. + {{PUT_OP}}((bits & 0xff00) | 0xf7, rex1(in_reg0), sink); + modrm_r_bits(in_reg0, bits, sink); + sink.put4(0xff); + // Jcc instruction. + sink.put1(0x0f); + sink.put1(bits as u8); + disp4(destination, func, sink); + "#, + ), + ); + + // Comparison that produces a `b1` result in a GPR. + // + // This is a macro of a `cmp` instruction followed by a `setCC` instruction. + // + // TODO This is not a great solution because: + // + // - The cmp+setcc combination is not recognized by CPU's macro fusion. + // - The 64-bit encoding has issues with REX prefixes. The `cmp` and `setCC` + // instructions may need a REX independently. + // - Modeling CPU flags in the type system would be better. + // + // Since the `setCC` instructions only write an 8-bit register, we use that as + // our `b1` representation: A `b1` value is represented as a GPR where the low 8 + // bits are known to be 0 or 1. The high bits are undefined. + // + // This bandaid macro doesn't support a REX prefix for the final `setCC` + // instruction, so it is limited to the `ABCD` register class for booleans. + // The omission of a `when_prefixed` alternative is deliberate here. + + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("icscc", &formats.int_compare, 1 + 3) + .operands_in(vec![gpr, gpr]) + .operands_out(vec![abcd]) + .emit( + r#" + // Comparison instruction. + {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink); + modrm_rr(in_reg0, in_reg1, sink); + // `setCC` instruction, no REX. + let setcc = 0x90 | icc2opc(cond); + sink.put1(0x0f); + sink.put1(setcc as u8); + modrm_rr(out_reg0, 0, sink); + "#, + ), + regs, + ) + .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_inreg1"), + ); + + recipes.add_template_inferred( + EncodingRecipeBuilder::new("icscc_fpr", &formats.int_compare, 1) + .operands_in(vec![fpr, fpr]) + .operands_out(vec![0]) + .emit( + r#" + // Comparison instruction. + {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); + modrm_rr(in_reg1, in_reg0, sink); + "#, + ), + "size_with_inferred_rex_for_inreg0_inreg1", + ); + + { + let is_small_imm = + InstructionPredicate::new_is_signed_int(&*formats.int_compare_imm, "imm", 8, 0); + + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("icscc_ib", &formats.int_compare_imm, 2 + 3) + .operands_in(vec![gpr]) + .operands_out(vec![abcd]) + .inst_predicate(is_small_imm) + .emit( + r#" + // Comparison instruction. + {{PUT_OP}}(bits, rex1(in_reg0), sink); + modrm_r_bits(in_reg0, bits, sink); + let imm: i64 = imm.into(); + sink.put1(imm as u8); + // `setCC` instruction, no REX. + let setcc = 0x90 | icc2opc(cond); + sink.put1(0x0f); + sink.put1(setcc as u8); + modrm_rr(out_reg0, 0, sink); + "#, + ), + regs, + ) + .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"), + ); + + let is_big_imm = + InstructionPredicate::new_is_signed_int(&*formats.int_compare_imm, "imm", 32, 0); + + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("icscc_id", &formats.int_compare_imm, 5 + 3) + .operands_in(vec![gpr]) + .operands_out(vec![abcd]) + .inst_predicate(is_big_imm) + .emit( + r#" + // Comparison instruction. + {{PUT_OP}}(bits, rex1(in_reg0), sink); + modrm_r_bits(in_reg0, bits, sink); + let imm: i64 = imm.into(); + sink.put4(imm as u32); + // `setCC` instruction, no REX. + let setcc = 0x90 | icc2opc(cond); + sink.put1(0x0f); + sink.put1(setcc as u8); + modrm_rr(out_reg0, 0, sink); + "#, + ), + regs, + ) + .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"), + ); + } + + // Make a FloatCompare instruction predicate with the supported condition codes. + // + // Same thing for floating point. + // + // The ucomiss/ucomisd instructions set the FLAGS bits CF/PF/CF like this: + // + // ZPC OSA + // UN 111 000 + // GT 000 000 + // LT 001 000 + // EQ 100 000 + // + // Not all floating point condition codes are supported. + // The omission of a `when_prefixed` alternative is deliberate here. + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("fcscc", &formats.float_compare, 1 + 3) + .operands_in(vec![fpr, fpr]) + .operands_out(vec![abcd]) + .inst_predicate(supported_floatccs_predicate( + &supported_floatccs, + &*formats.float_compare, + )) + .emit( + r#" + // Comparison instruction. + {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); + modrm_rr(in_reg1, in_reg0, sink); + // `setCC` instruction, no REX. + use crate::ir::condcodes::FloatCC::*; + let setcc = match cond { + Ordered => 0x9b, // EQ|LT|GT => setnp (P=0) + Unordered => 0x9a, // UN => setp (P=1) + OrderedNotEqual => 0x95, // LT|GT => setne (Z=0), + UnorderedOrEqual => 0x94, // UN|EQ => sete (Z=1) + GreaterThan => 0x97, // GT => seta (C=0&Z=0) + GreaterThanOrEqual => 0x93, // GT|EQ => setae (C=0) + UnorderedOrLessThan => 0x92, // UN|LT => setb (C=1) + UnorderedOrLessThanOrEqual => 0x96, // UN|LT|EQ => setbe (Z=1|C=1) + Equal | // EQ + NotEqual | // UN|LT|GT + LessThan | // LT + LessThanOrEqual | // LT|EQ + UnorderedOrGreaterThan | // UN|GT + UnorderedOrGreaterThanOrEqual // UN|GT|EQ + => panic!("{} not supported by fcscc", cond), + }; + sink.put1(0x0f); + sink.put1(setcc); + modrm_rr(out_reg0, 0, sink); + "#, + ), + ); + + { + let supported_floatccs: Vec<Literal> = ["eq", "lt", "le", "uno", "ne", "uge", "ugt", "ord"] + .iter() + .map(|name| Literal::enumerator_for(floatcc, name)) + .collect(); + recipes.add_template_inferred( + EncodingRecipeBuilder::new("pfcmp", &formats.float_compare, 2) + .operands_in(vec![fpr, fpr]) + .operands_out(vec![0]) + .inst_predicate(supported_floatccs_predicate( + &supported_floatccs[..], + &*formats.float_compare, + )) + .emit( + r#" + // Comparison instruction. + {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink); + modrm_rr(in_reg1, in_reg0, sink); + // Add immediate byte indicating what type of comparison. + use crate::ir::condcodes::FloatCC::*; + let imm = match cond { + Equal => 0x00, + LessThan => 0x01, + LessThanOrEqual => 0x02, + Unordered => 0x03, + NotEqual => 0x04, + UnorderedOrGreaterThanOrEqual => 0x05, + UnorderedOrGreaterThan => 0x06, + Ordered => 0x07, + _ => panic!("{} not supported by pfcmp", cond), + }; + sink.put1(imm); + "#, + ), + "size_with_inferred_rex_for_inreg0_inreg1", + ); + } + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("is_zero", &formats.unary, 2 + 2) + .operands_in(vec![gpr]) + .operands_out(vec![abcd]) + .emit( + r#" + // Test instruction. + {{PUT_OP}}(bits, rex2(in_reg0, in_reg0), sink); + modrm_rr(in_reg0, in_reg0, sink); + // Check ZF = 1 flag to see if register holds 0. + sink.put1(0x0f); + sink.put1(0x94); + modrm_rr(out_reg0, 0, sink); + "#, + ), + ); + + recipes.add_template_recipe( + EncodingRecipeBuilder::new("is_invalid", &formats.unary, 2 + 3) + .operands_in(vec![gpr]) + .operands_out(vec![abcd]) + .emit( + r#" + // Comparison instruction. + {{PUT_OP}}(bits, rex1(in_reg0), sink); + modrm_r_bits(in_reg0, bits, sink); + sink.put1(0xff); + // `setCC` instruction, no REX. + use crate::ir::condcodes::IntCC::*; + let setcc = 0x90 | icc2opc(Equal); + sink.put1(0x0f); + sink.put1(setcc as u8); + modrm_rr(out_reg0, 0, sink); + "#, + ), + ); + + recipes.add_recipe( + EncodingRecipeBuilder::new("safepoint", &formats.multiary, 0).emit( + r#" + sink.add_stack_map(args, func, isa); + "#, + ), + ); + + // Both `elf_tls_get_addr` and `macho_tls_get_addr` require all caller-saved registers to be spilled. + // This is currently special cased in `regalloc/spilling.rs` in the `visit_inst` function. + + recipes.add_recipe( + EncodingRecipeBuilder::new("elf_tls_get_addr", &formats.unary_global_value, 16) + // FIXME Correct encoding for non rax registers + .operands_out(vec![reg_rax]) + .emit( + r#" + // output %rax + // clobbers %rdi + + // Those data16 prefixes are necessary to pad to 16 bytes. + + // data16 lea gv@tlsgd(%rip),%rdi + sink.put1(0x66); // data16 + sink.put1(0b01001000); // rex.w + const LEA: u8 = 0x8d; + sink.put1(LEA); // lea + modrm_riprel(0b111/*out_reg0*/, sink); // 0x3d + sink.reloc_external(func.srclocs[inst], + Reloc::ElfX86_64TlsGd, + &func.global_values[global_value].symbol_name(), + -4); + sink.put4(0); + + // data16 data16 callq __tls_get_addr-4 + sink.put1(0x66); // data16 + sink.put1(0x66); // data16 + sink.put1(0b01001000); // rex.w + sink.put1(0xe8); // call + sink.reloc_external(func.srclocs[inst], + Reloc::X86CallPLTRel4, + &ExternalName::LibCall(LibCall::ElfTlsGetAddr), + -4); + sink.put4(0); + "#, + ), + ); + + recipes.add_recipe( + EncodingRecipeBuilder::new("macho_tls_get_addr", &formats.unary_global_value, 9) + // FIXME Correct encoding for non rax registers + .operands_out(vec![reg_rax]) + .emit( + r#" + // output %rax + // clobbers %rdi + + // movq gv@tlv(%rip), %rdi + sink.put1(0x48); // rex + sink.put1(0x8b); // mov + modrm_riprel(0b111/*out_reg0*/, sink); // 0x3d + sink.reloc_external(func.srclocs[inst], + Reloc::MachOX86_64Tlv, + &func.global_values[global_value].symbol_name(), + -4); + sink.put4(0); + + // callq *(%rdi) + sink.put1(0xff); + sink.put1(0x17); + "#, + ), + ); + + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("evex_reg_vvvv_rm_128", &formats.binary, 1) + .operands_in(vec![fpr, fpr]) + .operands_out(vec![fpr]) + .emit( + r#" + // instruction encoding operands: reg (op1, w), vvvv (op2, r), rm (op3, r) + // this maps to: out_reg0, in_reg0, in_reg1 + let context = EvexContext::Other { length: EvexVectorLength::V128 }; + let masking = EvexMasking::None; + put_evex(bits, out_reg0, in_reg0, in_reg1, context, masking, sink); // params: reg, vvvv, rm + modrm_rr(in_reg1, out_reg0, sink); // params: rm, reg + "#, + ), + regs).rex_kind(RecipePrefixKind::Evex) + ); + + recipes.add_template( + Template::new( + EncodingRecipeBuilder::new("evex_reg_rm_128", &formats.unary, 1) + .operands_in(vec![fpr]) + .operands_out(vec![fpr]) + .emit( + r#" + // instruction encoding operands: reg (op1, w), rm (op2, r) + // this maps to: out_reg0, in_reg0 + let context = EvexContext::Other { length: EvexVectorLength::V128 }; + let masking = EvexMasking::None; + put_evex(bits, out_reg0, 0, in_reg0, context, masking, sink); // params: reg, vvvv, rm + modrm_rr(in_reg0, out_reg0, sink); // params: rm, reg + "#, + ), + regs).rex_kind(RecipePrefixKind::Evex) + ); + + recipes +} diff --git a/third_party/rust/cranelift-codegen-meta/src/isa/x86/registers.rs b/third_party/rust/cranelift-codegen-meta/src/isa/x86/registers.rs new file mode 100644 index 0000000000..85a8965f89 --- /dev/null +++ b/third_party/rust/cranelift-codegen-meta/src/isa/x86/registers.rs @@ -0,0 +1,43 @@ +use crate::cdsl::regs::{IsaRegs, IsaRegsBuilder, RegBankBuilder, RegClassBuilder}; + +pub(crate) fn define() -> IsaRegs { + let mut regs = IsaRegsBuilder::new(); + + let builder = RegBankBuilder::new("FloatRegs", "xmm") + .units(16) + .track_pressure(true); + let float_regs = regs.add_bank(builder); + + let builder = RegBankBuilder::new("IntRegs", "r") + .units(16) + .names(vec!["rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi"]) + .track_pressure(true) + .pinned_reg(15); + let int_regs = regs.add_bank(builder); + + let builder = RegBankBuilder::new("FlagRegs", "") + .units(1) + .names(vec!["rflags"]) + .track_pressure(false); + let flag_reg = regs.add_bank(builder); + + let builder = RegClassBuilder::new_toplevel("GPR", int_regs); + let gpr = regs.add_class(builder); + + let builder = RegClassBuilder::new_toplevel("FPR", float_regs); + let fpr = regs.add_class(builder); + + let builder = RegClassBuilder::new_toplevel("FLAG", flag_reg); + regs.add_class(builder); + + let builder = RegClassBuilder::subclass_of("GPR8", gpr, 0, 8); + let gpr8 = regs.add_class(builder); + + let builder = RegClassBuilder::subclass_of("ABCD", gpr8, 0, 4); + regs.add_class(builder); + + let builder = RegClassBuilder::subclass_of("FPR8", fpr, 0, 8); + regs.add_class(builder); + + regs.build() +} diff --git a/third_party/rust/cranelift-codegen-meta/src/isa/x86/settings.rs b/third_party/rust/cranelift-codegen-meta/src/isa/x86/settings.rs new file mode 100644 index 0000000000..dddd69abb3 --- /dev/null +++ b/third_party/rust/cranelift-codegen-meta/src/isa/x86/settings.rs @@ -0,0 +1,135 @@ +use crate::cdsl::settings::{PredicateNode, SettingGroup, SettingGroupBuilder}; + +pub(crate) fn define(shared: &SettingGroup) -> SettingGroup { + let mut settings = SettingGroupBuilder::new("x86"); + + // CPUID.01H:ECX + let has_sse3 = settings.add_bool("has_sse3", "SSE3: CPUID.01H:ECX.SSE3[bit 0]", false); + let has_ssse3 = settings.add_bool("has_ssse3", "SSSE3: CPUID.01H:ECX.SSSE3[bit 9]", false); + let has_sse41 = settings.add_bool("has_sse41", "SSE4.1: CPUID.01H:ECX.SSE4_1[bit 19]", false); + let has_sse42 = settings.add_bool("has_sse42", "SSE4.2: CPUID.01H:ECX.SSE4_2[bit 20]", false); + let has_avx = settings.add_bool("has_avx", "AVX: CPUID.01H:ECX.AVX[bit 28]", false); + let has_avx2 = settings.add_bool("has_avx2", "AVX2: CPUID.07H:EBX.AVX2[bit 5]", false); + let has_avx512dq = settings.add_bool( + "has_avx512dq", + "AVX512DQ: CPUID.07H:EBX.AVX512DQ[bit 17]", + false, + ); + let has_avx512vl = settings.add_bool( + "has_avx512vl", + "AVX512VL: CPUID.07H:EBX.AVX512VL[bit 31]", + false, + ); + let has_avx512f = settings.add_bool( + "has_avx512f", + "AVX512F: CPUID.07H:EBX.AVX512F[bit 16]", + false, + ); + let has_popcnt = settings.add_bool("has_popcnt", "POPCNT: CPUID.01H:ECX.POPCNT[bit 23]", false); + + // CPUID.(EAX=07H, ECX=0H):EBX + let has_bmi1 = settings.add_bool( + "has_bmi1", + "BMI1: CPUID.(EAX=07H, ECX=0H):EBX.BMI1[bit 3]", + false, + ); + let has_bmi2 = settings.add_bool( + "has_bmi2", + "BMI2: CPUID.(EAX=07H, ECX=0H):EBX.BMI2[bit 8]", + false, + ); + + // CPUID.EAX=80000001H:ECX + let has_lzcnt = settings.add_bool( + "has_lzcnt", + "LZCNT: CPUID.EAX=80000001H:ECX.LZCNT[bit 5]", + false, + ); + + let shared_enable_simd = shared.get_bool("enable_simd"); + + settings.add_predicate("use_ssse3", predicate!(has_ssse3)); + settings.add_predicate("use_sse41", predicate!(has_sse41)); + settings.add_predicate("use_sse42", predicate!(has_sse41 && has_sse42)); + + settings.add_predicate( + "use_ssse3_simd", + predicate!(shared_enable_simd && has_ssse3), + ); + settings.add_predicate( + "use_sse41_simd", + predicate!(shared_enable_simd && has_sse41), + ); + settings.add_predicate( + "use_sse42_simd", + predicate!(shared_enable_simd && has_sse41 && has_sse42), + ); + + settings.add_predicate("use_avx_simd", predicate!(shared_enable_simd && has_avx)); + settings.add_predicate("use_avx2_simd", predicate!(shared_enable_simd && has_avx2)); + settings.add_predicate( + "use_avx512dq_simd", + predicate!(shared_enable_simd && has_avx512dq), + ); + settings.add_predicate( + "use_avx512vl_simd", + predicate!(shared_enable_simd && has_avx512vl), + ); + settings.add_predicate( + "use_avx512f_simd", + predicate!(shared_enable_simd && has_avx512f), + ); + + settings.add_predicate("use_popcnt", predicate!(has_popcnt && has_sse42)); + settings.add_predicate("use_bmi1", predicate!(has_bmi1)); + settings.add_predicate("use_lzcnt", predicate!(has_lzcnt)); + + // Some shared boolean values are used in x86 instruction predicates, so we need to group them + // in the same TargetIsa, for compabitibity with code generated by meta-python. + // TODO Once all the meta generation code has been migrated from Python to Rust, we can put it + // back in the shared SettingGroup, and use it in x86 instruction predicates. + + let is_pic = shared.get_bool("is_pic"); + let emit_all_ones_funcaddrs = shared.get_bool("emit_all_ones_funcaddrs"); + settings.add_predicate("is_pic", predicate!(is_pic)); + settings.add_predicate("not_is_pic", predicate!(!is_pic)); + settings.add_predicate( + "all_ones_funcaddrs_and_not_is_pic", + predicate!(emit_all_ones_funcaddrs && !is_pic), + ); + settings.add_predicate( + "not_all_ones_funcaddrs_and_not_is_pic", + predicate!(!emit_all_ones_funcaddrs && !is_pic), + ); + + // Presets corresponding to x86 CPUs. + + settings.add_preset("baseline", preset!()); + let nehalem = settings.add_preset( + "nehalem", + preset!(has_sse3 && has_ssse3 && has_sse41 && has_sse42 && has_popcnt), + ); + let haswell = settings.add_preset( + "haswell", + preset!(nehalem && has_bmi1 && has_bmi2 && has_lzcnt), + ); + let broadwell = settings.add_preset("broadwell", preset!(haswell)); + let skylake = settings.add_preset("skylake", preset!(broadwell)); + let cannonlake = settings.add_preset("cannonlake", preset!(skylake)); + settings.add_preset("icelake", preset!(cannonlake)); + settings.add_preset( + "znver1", + preset!( + has_sse3 + && has_ssse3 + && has_sse41 + && has_sse42 + && has_popcnt + && has_bmi1 + && has_bmi2 + && has_lzcnt + ), + ); + + settings.build() +} diff --git a/third_party/rust/cranelift-codegen-meta/src/lib.rs b/third_party/rust/cranelift-codegen-meta/src/lib.rs new file mode 100644 index 0000000000..ead2c4442f --- /dev/null +++ b/third_party/rust/cranelift-codegen-meta/src/lib.rs @@ -0,0 +1,124 @@ +//! This crate generates Rust sources for use by +//! [`cranelift_codegen`](../cranelift_codegen/index.html). +#[macro_use] +mod cdsl; +mod srcgen; + +pub mod error; +pub mod isa; + +mod gen_binemit; +mod gen_encodings; +mod gen_inst; +mod gen_legalizer; +mod gen_registers; +mod gen_settings; +mod gen_types; + +mod default_map; +mod shared; +mod unique_table; + +/// Generate an ISA from an architecture string (e.g. "x86_64"). +pub fn isa_from_arch(arch: &str) -> Result<isa::Isa, String> { + isa::Isa::from_arch(arch).ok_or_else(|| format!("no supported isa found for arch `{}`", arch)) +} + +/// Generates all the Rust source files used in Cranelift from the meta-language. +pub fn generate( + old_backend_isas: &[isa::Isa], + new_backend_isas: &[isa::Isa], + out_dir: &str, +) -> Result<(), error::Error> { + // Create all the definitions: + // - common definitions. + let mut shared_defs = shared::define(); + + gen_settings::generate( + &shared_defs.settings, + gen_settings::ParentGroup::None, + "settings.rs", + &out_dir, + )?; + gen_types::generate("types.rs", &out_dir)?; + + // - per ISA definitions. + let target_isas = isa::define(old_backend_isas, &mut shared_defs); + + // At this point, all definitions are done. + let all_formats = shared_defs.verify_instruction_formats(); + + // Generate all the code. + gen_inst::generate( + all_formats, + &shared_defs.all_instructions, + "opcodes.rs", + "inst_builder.rs", + &out_dir, + )?; + + let extra_legalization_groups: &[&'static str] = if !new_backend_isas.is_empty() { + // The new backend only requires the "expand" legalization group. + &["expand"] + } else { + &[] + }; + + gen_legalizer::generate( + &target_isas, + &shared_defs.transform_groups, + extra_legalization_groups, + "legalize", + &out_dir, + )?; + + for isa in target_isas { + gen_registers::generate(&isa, &format!("registers-{}.rs", isa.name), &out_dir)?; + + gen_settings::generate( + &isa.settings, + gen_settings::ParentGroup::Shared, + &format!("settings-{}.rs", isa.name), + &out_dir, + )?; + + gen_encodings::generate( + &shared_defs, + &isa, + &format!("encoding-{}.rs", isa.name), + &out_dir, + )?; + + gen_binemit::generate( + &isa.name, + &isa.recipes, + &format!("binemit-{}.rs", isa.name), + &out_dir, + )?; + } + + for isa in new_backend_isas { + match isa { + isa::Isa::X86 => { + // If the old backend ISAs contained x86, this file has already been generated. + if old_backend_isas.iter().any(|isa| *isa == isa::Isa::X86) { + continue; + } + + let settings = crate::isa::x86::settings::define(&shared_defs.settings); + gen_settings::generate( + &settings, + gen_settings::ParentGroup::Shared, + "settings-x86.rs", + &out_dir, + )?; + } + isa::Isa::Arm64 => { + // aarch64 doesn't have platform-specific settings. + } + isa::Isa::Arm32 | isa::Isa::Riscv => todo!(), + } + } + + Ok(()) +} diff --git a/third_party/rust/cranelift-codegen-meta/src/shared/entities.rs b/third_party/rust/cranelift-codegen-meta/src/shared/entities.rs new file mode 100644 index 0000000000..c3f2bc0387 --- /dev/null +++ b/third_party/rust/cranelift-codegen-meta/src/shared/entities.rs @@ -0,0 +1,73 @@ +use crate::cdsl::operands::{OperandKind, OperandKindFields}; + +/// Small helper to initialize an OperandBuilder with the right kind, for a given name and doc. +fn new(format_field_name: &'static str, rust_type: &'static str, doc: &'static str) -> OperandKind { + OperandKind::new(format_field_name, rust_type, OperandKindFields::EntityRef).with_doc(doc) +} + +pub(crate) struct EntityRefs { + /// A reference to a basic block in the same function. + /// This is primarliy used in control flow instructions. + pub(crate) block: OperandKind, + + /// A reference to a stack slot declared in the function preamble. + pub(crate) stack_slot: OperandKind, + + /// A reference to a global value. + pub(crate) global_value: OperandKind, + + /// A reference to a function signature declared in the function preamble. + /// This is used to provide the call signature in a call_indirect instruction. + pub(crate) sig_ref: OperandKind, + + /// A reference to an external function declared in the function preamble. + /// This is used to provide the callee and signature in a call instruction. + pub(crate) func_ref: OperandKind, + + /// A reference to a jump table declared in the function preamble. + pub(crate) jump_table: OperandKind, + + /// A reference to a heap declared in the function preamble. + pub(crate) heap: OperandKind, + + /// A reference to a table declared in the function preamble. + pub(crate) table: OperandKind, + + /// A variable-sized list of value operands. Use for Block and function call arguments. + pub(crate) varargs: OperandKind, +} + +impl EntityRefs { + pub fn new() -> Self { + Self { + block: new( + "destination", + "ir::Block", + "a basic block in the same function.", + ), + stack_slot: new("stack_slot", "ir::StackSlot", "A stack slot"), + + global_value: new("global_value", "ir::GlobalValue", "A global value."), + + sig_ref: new("sig_ref", "ir::SigRef", "A function signature."), + + func_ref: new("func_ref", "ir::FuncRef", "An external function."), + + jump_table: new("table", "ir::JumpTable", "A jump table."), + + heap: new("heap", "ir::Heap", "A heap."), + + table: new("table", "ir::Table", "A table."), + + varargs: OperandKind::new("", "&[Value]", OperandKindFields::VariableArgs).with_doc( + r#" + A variable size list of `value` operands. + + Use this to represent arguments passed to a function call, arguments + passed to a basic block, or a variable number of results + returned from an instruction. + "#, + ), + } + } +} diff --git a/third_party/rust/cranelift-codegen-meta/src/shared/formats.rs b/third_party/rust/cranelift-codegen-meta/src/shared/formats.rs new file mode 100644 index 0000000000..3d081951a5 --- /dev/null +++ b/third_party/rust/cranelift-codegen-meta/src/shared/formats.rs @@ -0,0 +1,330 @@ +use crate::cdsl::formats::{InstructionFormat, InstructionFormatBuilder as Builder}; +use crate::shared::{entities::EntityRefs, immediates::Immediates}; +use std::rc::Rc; + +pub(crate) struct Formats { + pub(crate) atomic_cas: Rc<InstructionFormat>, + pub(crate) atomic_rmw: Rc<InstructionFormat>, + pub(crate) binary: Rc<InstructionFormat>, + pub(crate) binary_imm8: Rc<InstructionFormat>, + pub(crate) binary_imm64: Rc<InstructionFormat>, + pub(crate) branch: Rc<InstructionFormat>, + pub(crate) branch_float: Rc<InstructionFormat>, + pub(crate) branch_icmp: Rc<InstructionFormat>, + pub(crate) branch_int: Rc<InstructionFormat>, + pub(crate) branch_table: Rc<InstructionFormat>, + pub(crate) branch_table_base: Rc<InstructionFormat>, + pub(crate) branch_table_entry: Rc<InstructionFormat>, + pub(crate) call: Rc<InstructionFormat>, + pub(crate) call_indirect: Rc<InstructionFormat>, + pub(crate) cond_trap: Rc<InstructionFormat>, + pub(crate) copy_special: Rc<InstructionFormat>, + pub(crate) copy_to_ssa: Rc<InstructionFormat>, + pub(crate) float_compare: Rc<InstructionFormat>, + pub(crate) float_cond: Rc<InstructionFormat>, + pub(crate) float_cond_trap: Rc<InstructionFormat>, + pub(crate) func_addr: Rc<InstructionFormat>, + pub(crate) heap_addr: Rc<InstructionFormat>, + pub(crate) indirect_jump: Rc<InstructionFormat>, + pub(crate) int_compare: Rc<InstructionFormat>, + pub(crate) int_compare_imm: Rc<InstructionFormat>, + pub(crate) int_cond: Rc<InstructionFormat>, + pub(crate) int_cond_trap: Rc<InstructionFormat>, + pub(crate) int_select: Rc<InstructionFormat>, + pub(crate) jump: Rc<InstructionFormat>, + pub(crate) load: Rc<InstructionFormat>, + pub(crate) load_complex: Rc<InstructionFormat>, + pub(crate) load_no_offset: Rc<InstructionFormat>, + pub(crate) multiary: Rc<InstructionFormat>, + pub(crate) nullary: Rc<InstructionFormat>, + pub(crate) reg_fill: Rc<InstructionFormat>, + pub(crate) reg_move: Rc<InstructionFormat>, + pub(crate) reg_spill: Rc<InstructionFormat>, + pub(crate) shuffle: Rc<InstructionFormat>, + pub(crate) stack_load: Rc<InstructionFormat>, + pub(crate) stack_store: Rc<InstructionFormat>, + pub(crate) store: Rc<InstructionFormat>, + pub(crate) store_complex: Rc<InstructionFormat>, + pub(crate) store_no_offset: Rc<InstructionFormat>, + pub(crate) table_addr: Rc<InstructionFormat>, + pub(crate) ternary: Rc<InstructionFormat>, + pub(crate) ternary_imm8: Rc<InstructionFormat>, + pub(crate) trap: Rc<InstructionFormat>, + pub(crate) unary: Rc<InstructionFormat>, + pub(crate) unary_bool: Rc<InstructionFormat>, + pub(crate) unary_const: Rc<InstructionFormat>, + pub(crate) unary_global_value: Rc<InstructionFormat>, + pub(crate) unary_ieee32: Rc<InstructionFormat>, + pub(crate) unary_ieee64: Rc<InstructionFormat>, + pub(crate) unary_imm: Rc<InstructionFormat>, +} + +impl Formats { + pub fn new(imm: &Immediates, entities: &EntityRefs) -> Self { + Self { + unary: Builder::new("Unary").value().build(), + + unary_imm: Builder::new("UnaryImm").imm(&imm.imm64).build(), + + unary_ieee32: Builder::new("UnaryIeee32").imm(&imm.ieee32).build(), + + unary_ieee64: Builder::new("UnaryIeee64").imm(&imm.ieee64).build(), + + unary_bool: Builder::new("UnaryBool").imm(&imm.boolean).build(), + + unary_const: Builder::new("UnaryConst").imm(&imm.pool_constant).build(), + + unary_global_value: Builder::new("UnaryGlobalValue") + .imm(&entities.global_value) + .build(), + + binary: Builder::new("Binary").value().value().build(), + + binary_imm8: Builder::new("BinaryImm8").value().imm(&imm.uimm8).build(), + + binary_imm64: Builder::new("BinaryImm64").value().imm(&imm.imm64).build(), + + // The select instructions are controlled by the second VALUE operand. + // The first VALUE operand is the controlling flag which has a derived type. + // The fma instruction has the same constraint on all inputs. + ternary: Builder::new("Ternary") + .value() + .value() + .value() + .typevar_operand(1) + .build(), + + ternary_imm8: Builder::new("TernaryImm8") + .value() + .imm(&imm.uimm8) + .value() + .build(), + + // Catch-all for instructions with many outputs and inputs and no immediate + // operands. + multiary: Builder::new("MultiAry").varargs().build(), + + nullary: Builder::new("NullAry").build(), + + shuffle: Builder::new("Shuffle") + .value() + .value() + .imm_with_name("mask", &imm.uimm128) + .build(), + + int_compare: Builder::new("IntCompare") + .imm(&imm.intcc) + .value() + .value() + .build(), + + int_compare_imm: Builder::new("IntCompareImm") + .imm(&imm.intcc) + .value() + .imm(&imm.imm64) + .build(), + + int_cond: Builder::new("IntCond").imm(&imm.intcc).value().build(), + + float_compare: Builder::new("FloatCompare") + .imm(&imm.floatcc) + .value() + .value() + .build(), + + float_cond: Builder::new("FloatCond").imm(&imm.floatcc).value().build(), + + int_select: Builder::new("IntSelect") + .imm(&imm.intcc) + .value() + .value() + .value() + .build(), + + jump: Builder::new("Jump").imm(&entities.block).varargs().build(), + + branch: Builder::new("Branch") + .value() + .imm(&entities.block) + .varargs() + .build(), + + branch_int: Builder::new("BranchInt") + .imm(&imm.intcc) + .value() + .imm(&entities.block) + .varargs() + .build(), + + branch_float: Builder::new("BranchFloat") + .imm(&imm.floatcc) + .value() + .imm(&entities.block) + .varargs() + .build(), + + branch_icmp: Builder::new("BranchIcmp") + .imm(&imm.intcc) + .value() + .value() + .imm(&entities.block) + .varargs() + .build(), + + branch_table: Builder::new("BranchTable") + .value() + .imm(&entities.block) + .imm(&entities.jump_table) + .build(), + + branch_table_entry: Builder::new("BranchTableEntry") + .value() + .value() + .imm(&imm.uimm8) + .imm(&entities.jump_table) + .build(), + + branch_table_base: Builder::new("BranchTableBase") + .imm(&entities.jump_table) + .build(), + + indirect_jump: Builder::new("IndirectJump") + .value() + .imm(&entities.jump_table) + .build(), + + call: Builder::new("Call") + .imm(&entities.func_ref) + .varargs() + .build(), + + call_indirect: Builder::new("CallIndirect") + .imm(&entities.sig_ref) + .value() + .varargs() + .build(), + + func_addr: Builder::new("FuncAddr").imm(&entities.func_ref).build(), + + atomic_rmw: Builder::new("AtomicRmw") + .imm(&imm.memflags) + .imm(&imm.atomic_rmw_op) + .value() + .value() + .build(), + + atomic_cas: Builder::new("AtomicCas") + .imm(&imm.memflags) + .value() + .value() + .value() + .typevar_operand(2) + .build(), + + load: Builder::new("Load") + .imm(&imm.memflags) + .value() + .imm(&imm.offset32) + .build(), + + load_complex: Builder::new("LoadComplex") + .imm(&imm.memflags) + .varargs() + .imm(&imm.offset32) + .build(), + + load_no_offset: Builder::new("LoadNoOffset") + .imm(&imm.memflags) + .value() + .build(), + + store: Builder::new("Store") + .imm(&imm.memflags) + .value() + .value() + .imm(&imm.offset32) + .build(), + + store_complex: Builder::new("StoreComplex") + .imm(&imm.memflags) + .value() + .varargs() + .imm(&imm.offset32) + .build(), + + store_no_offset: Builder::new("StoreNoOffset") + .imm(&imm.memflags) + .value() + .value() + .build(), + + stack_load: Builder::new("StackLoad") + .imm(&entities.stack_slot) + .imm(&imm.offset32) + .build(), + + stack_store: Builder::new("StackStore") + .value() + .imm(&entities.stack_slot) + .imm(&imm.offset32) + .build(), + + // Accessing a WebAssembly heap. + heap_addr: Builder::new("HeapAddr") + .imm(&entities.heap) + .value() + .imm(&imm.uimm32) + .build(), + + // Accessing a WebAssembly table. + table_addr: Builder::new("TableAddr") + .imm(&entities.table) + .value() + .imm(&imm.offset32) + .build(), + + reg_move: Builder::new("RegMove") + .value() + .imm_with_name("src", &imm.regunit) + .imm_with_name("dst", &imm.regunit) + .build(), + + copy_special: Builder::new("CopySpecial") + .imm_with_name("src", &imm.regunit) + .imm_with_name("dst", &imm.regunit) + .build(), + + copy_to_ssa: Builder::new("CopyToSsa") + .imm_with_name("src", &imm.regunit) + .build(), + + reg_spill: Builder::new("RegSpill") + .value() + .imm_with_name("src", &imm.regunit) + .imm_with_name("dst", &entities.stack_slot) + .build(), + + reg_fill: Builder::new("RegFill") + .value() + .imm_with_name("src", &entities.stack_slot) + .imm_with_name("dst", &imm.regunit) + .build(), + + trap: Builder::new("Trap").imm(&imm.trapcode).build(), + + cond_trap: Builder::new("CondTrap").value().imm(&imm.trapcode).build(), + + int_cond_trap: Builder::new("IntCondTrap") + .imm(&imm.intcc) + .value() + .imm(&imm.trapcode) + .build(), + + float_cond_trap: Builder::new("FloatCondTrap") + .imm(&imm.floatcc) + .value() + .imm(&imm.trapcode) + .build(), + } + } +} diff --git a/third_party/rust/cranelift-codegen-meta/src/shared/immediates.rs b/third_party/rust/cranelift-codegen-meta/src/shared/immediates.rs new file mode 100644 index 0000000000..0aa4129daf --- /dev/null +++ b/third_party/rust/cranelift-codegen-meta/src/shared/immediates.rs @@ -0,0 +1,175 @@ +use crate::cdsl::operands::{EnumValues, OperandKind, OperandKindFields}; + +use std::collections::HashMap; + +pub(crate) struct Immediates { + /// A 64-bit immediate integer operand. + /// + /// This type of immediate integer can interact with SSA values with any IntType type. + pub imm64: OperandKind, + + /// An unsigned 8-bit immediate integer operand. + /// + /// This small operand is used to indicate lane indexes in SIMD vectors and immediate bit + /// counts on shift instructions. + pub uimm8: OperandKind, + + /// An unsigned 32-bit immediate integer operand. + pub uimm32: OperandKind, + + /// An unsigned 128-bit immediate integer operand. + /// + /// This operand is used to pass entire 128-bit vectors as immediates to instructions like + /// const. + pub uimm128: OperandKind, + + /// A constant stored in the constant pool. + /// + /// This operand is used to pass constants to instructions like vconst while storing the + /// actual bytes in the constant pool. + pub pool_constant: OperandKind, + + /// A 32-bit immediate signed offset. + /// + /// This is used to represent an immediate address offset in load/store instructions. + pub offset32: OperandKind, + + /// A 32-bit immediate floating point operand. + /// + /// IEEE 754-2008 binary32 interchange format. + pub ieee32: OperandKind, + + /// A 64-bit immediate floating point operand. + /// + /// IEEE 754-2008 binary64 interchange format. + pub ieee64: OperandKind, + + /// An immediate boolean operand. + /// + /// This type of immediate boolean can interact with SSA values with any BoolType type. + pub boolean: OperandKind, + + /// A condition code for comparing integer values. + /// + /// This enumerated operand kind is used for the `icmp` instruction and corresponds to the + /// condcodes::IntCC` Rust type. + pub intcc: OperandKind, + + /// A condition code for comparing floating point values. + /// + /// This enumerated operand kind is used for the `fcmp` instruction and corresponds to the + /// `condcodes::FloatCC` Rust type. + pub floatcc: OperandKind, + + /// Flags for memory operations like `load` and `store`. + pub memflags: OperandKind, + + /// A register unit in the current target ISA. + pub regunit: OperandKind, + + /// A trap code indicating the reason for trapping. + /// + /// The Rust enum type also has a `User(u16)` variant for user-provided trap codes. + pub trapcode: OperandKind, + + /// A code indicating the arithmetic operation to perform in an atomic_rmw memory access. + pub atomic_rmw_op: OperandKind, +} + +fn new_imm(format_field_name: &'static str, rust_type: &'static str) -> OperandKind { + OperandKind::new(format_field_name, rust_type, OperandKindFields::ImmValue) +} +fn new_enum( + format_field_name: &'static str, + rust_type: &'static str, + values: EnumValues, +) -> OperandKind { + OperandKind::new( + format_field_name, + rust_type, + OperandKindFields::ImmEnum(values), + ) +} + +impl Immediates { + pub fn new() -> Self { + Self { + imm64: new_imm("imm", "ir::immediates::Imm64").with_doc("A 64-bit immediate integer."), + uimm8: new_imm("imm", "ir::immediates::Uimm8") + .with_doc("An 8-bit immediate unsigned integer."), + uimm32: new_imm("imm", "ir::immediates::Uimm32") + .with_doc("A 32-bit immediate unsigned integer."), + uimm128: new_imm("imm", "ir::Immediate") + .with_doc("A 128-bit immediate unsigned integer."), + pool_constant: new_imm("constant_handle", "ir::Constant") + .with_doc("A constant stored in the constant pool."), + offset32: new_imm("offset", "ir::immediates::Offset32") + .with_doc("A 32-bit immediate signed offset."), + ieee32: new_imm("imm", "ir::immediates::Ieee32") + .with_doc("A 32-bit immediate floating point number."), + ieee64: new_imm("imm", "ir::immediates::Ieee64") + .with_doc("A 64-bit immediate floating point number."), + boolean: new_imm("imm", "bool").with_doc("An immediate boolean."), + intcc: { + let mut intcc_values = HashMap::new(); + intcc_values.insert("eq", "Equal"); + intcc_values.insert("ne", "NotEqual"); + intcc_values.insert("sge", "SignedGreaterThanOrEqual"); + intcc_values.insert("sgt", "SignedGreaterThan"); + intcc_values.insert("sle", "SignedLessThanOrEqual"); + intcc_values.insert("slt", "SignedLessThan"); + intcc_values.insert("uge", "UnsignedGreaterThanOrEqual"); + intcc_values.insert("ugt", "UnsignedGreaterThan"); + intcc_values.insert("ule", "UnsignedLessThanOrEqual"); + intcc_values.insert("ult", "UnsignedLessThan"); + intcc_values.insert("of", "Overflow"); + intcc_values.insert("nof", "NotOverflow"); + new_enum("cond", "ir::condcodes::IntCC", intcc_values) + .with_doc("An integer comparison condition code.") + }, + + floatcc: { + let mut floatcc_values = HashMap::new(); + floatcc_values.insert("ord", "Ordered"); + floatcc_values.insert("uno", "Unordered"); + floatcc_values.insert("eq", "Equal"); + floatcc_values.insert("ne", "NotEqual"); + floatcc_values.insert("one", "OrderedNotEqual"); + floatcc_values.insert("ueq", "UnorderedOrEqual"); + floatcc_values.insert("lt", "LessThan"); + floatcc_values.insert("le", "LessThanOrEqual"); + floatcc_values.insert("gt", "GreaterThan"); + floatcc_values.insert("ge", "GreaterThanOrEqual"); + floatcc_values.insert("ult", "UnorderedOrLessThan"); + floatcc_values.insert("ule", "UnorderedOrLessThanOrEqual"); + floatcc_values.insert("ugt", "UnorderedOrGreaterThan"); + floatcc_values.insert("uge", "UnorderedOrGreaterThanOrEqual"); + new_enum("cond", "ir::condcodes::FloatCC", floatcc_values) + .with_doc("A floating point comparison condition code") + }, + + memflags: new_imm("flags", "ir::MemFlags").with_doc("Memory operation flags"), + regunit: new_imm("regunit", "isa::RegUnit") + .with_doc("A register unit in the target ISA"), + trapcode: { + let mut trapcode_values = HashMap::new(); + trapcode_values.insert("stk_ovf", "StackOverflow"); + trapcode_values.insert("heap_oob", "HeapOutOfBounds"); + trapcode_values.insert("int_ovf", "IntegerOverflow"); + trapcode_values.insert("int_divz", "IntegerDivisionByZero"); + new_enum("code", "ir::TrapCode", trapcode_values).with_doc("A trap reason code.") + }, + atomic_rmw_op: { + let mut atomic_rmw_op_values = HashMap::new(); + atomic_rmw_op_values.insert("add", "Add"); + atomic_rmw_op_values.insert("sub", "Sub"); + atomic_rmw_op_values.insert("and", "And"); + atomic_rmw_op_values.insert("or", "Or"); + atomic_rmw_op_values.insert("xor", "Xor"); + atomic_rmw_op_values.insert("xchg", "Xchg"); + new_enum("op", "ir::AtomicRmwOp", atomic_rmw_op_values) + .with_doc("Atomic Read-Modify-Write Ops") + }, + } + } +} diff --git a/third_party/rust/cranelift-codegen-meta/src/shared/instructions.rs b/third_party/rust/cranelift-codegen-meta/src/shared/instructions.rs new file mode 100644 index 0000000000..bd1444d79c --- /dev/null +++ b/third_party/rust/cranelift-codegen-meta/src/shared/instructions.rs @@ -0,0 +1,4514 @@ +#![allow(non_snake_case)] + +use crate::cdsl::instructions::{ + AllInstructions, InstructionBuilder as Inst, InstructionGroup, InstructionGroupBuilder, +}; +use crate::cdsl::operands::Operand; +use crate::cdsl::type_inference::Constraint::WiderOrEq; +use crate::cdsl::types::{LaneType, ValueType}; +use crate::cdsl::typevar::{Interval, TypeSetBuilder, TypeVar}; +use crate::shared::formats::Formats; +use crate::shared::types; +use crate::shared::{entities::EntityRefs, immediates::Immediates}; + +#[inline(never)] +fn define_control_flow( + ig: &mut InstructionGroupBuilder, + formats: &Formats, + imm: &Immediates, + entities: &EntityRefs, +) { + let block = &Operand::new("block", &entities.block).with_doc("Destination basic block"); + let args = &Operand::new("args", &entities.varargs).with_doc("block arguments"); + + ig.push( + Inst::new( + "jump", + r#" + Jump. + + Unconditionally jump to a basic block, passing the specified + block arguments. The number and types of arguments must match the + destination block. + "#, + &formats.jump, + ) + .operands_in(vec![block, args]) + .is_terminator(true) + .is_branch(true), + ); + + ig.push( + Inst::new( + "fallthrough", + r#" + Fall through to the next block. + + This is the same as `jump`, except the destination block must be + the next one in the layout. + + Jumps are turned into fall-through instructions by the branch + relaxation pass. There is no reason to use this instruction outside + that pass. + "#, + &formats.jump, + ) + .operands_in(vec![block, args]) + .is_terminator(true) + .is_branch(true), + ); + + let Testable = &TypeVar::new( + "Testable", + "A scalar boolean or integer type", + TypeSetBuilder::new() + .ints(Interval::All) + .bools(Interval::All) + .build(), + ); + + { + let c = &Operand::new("c", Testable).with_doc("Controlling value to test"); + + ig.push( + Inst::new( + "brz", + r#" + Branch when zero. + + If ``c`` is a `b1` value, take the branch when ``c`` is false. If + ``c`` is an integer value, take the branch when ``c = 0``. + "#, + &formats.branch, + ) + .operands_in(vec![c, block, args]) + .is_branch(true), + ); + + ig.push( + Inst::new( + "brnz", + r#" + Branch when non-zero. + + If ``c`` is a `b1` value, take the branch when ``c`` is true. If + ``c`` is an integer value, take the branch when ``c != 0``. + "#, + &formats.branch, + ) + .operands_in(vec![c, block, args]) + .is_branch(true), + ); + } + + let iB = &TypeVar::new( + "iB", + "A scalar integer type", + TypeSetBuilder::new().ints(Interval::All).build(), + ); + let iflags: &TypeVar = &ValueType::Special(types::Flag::IFlags.into()).into(); + let fflags: &TypeVar = &ValueType::Special(types::Flag::FFlags.into()).into(); + + { + let Cond = &Operand::new("Cond", &imm.intcc); + let x = &Operand::new("x", iB); + let y = &Operand::new("y", iB); + + ig.push( + Inst::new( + "br_icmp", + r#" + Compare scalar integers and branch. + + Compare ``x`` and ``y`` in the same way as the `icmp` instruction + and take the branch if the condition is true: + + ```text + br_icmp ugt v1, v2, block4(v5, v6) + ``` + + is semantically equivalent to: + + ```text + v10 = icmp ugt, v1, v2 + brnz v10, block4(v5, v6) + ``` + + Some RISC architectures like MIPS and RISC-V provide instructions that + implement all or some of the condition codes. The instruction can also + be used to represent *macro-op fusion* on architectures like Intel's. + "#, + &formats.branch_icmp, + ) + .operands_in(vec![Cond, x, y, block, args]) + .is_branch(true), + ); + + let f = &Operand::new("f", iflags); + + ig.push( + Inst::new( + "brif", + r#" + Branch when condition is true in integer CPU flags. + "#, + &formats.branch_int, + ) + .operands_in(vec![Cond, f, block, args]) + .is_branch(true), + ); + } + + { + let Cond = &Operand::new("Cond", &imm.floatcc); + + let f = &Operand::new("f", fflags); + + ig.push( + Inst::new( + "brff", + r#" + Branch when condition is true in floating point CPU flags. + "#, + &formats.branch_float, + ) + .operands_in(vec![Cond, f, block, args]) + .is_branch(true), + ); + } + + { + let x = &Operand::new("x", iB).with_doc("index into jump table"); + let JT = &Operand::new("JT", &entities.jump_table); + + ig.push( + Inst::new( + "br_table", + r#" + Indirect branch via jump table. + + Use ``x`` as an unsigned index into the jump table ``JT``. If a jump + table entry is found, branch to the corresponding block. If no entry was + found or the index is out-of-bounds, branch to the given default block. + + Note that this branch instruction can't pass arguments to the targeted + blocks. Split critical edges as needed to work around this. + + Do not confuse this with "tables" in WebAssembly. ``br_table`` is for + jump tables with destinations within the current function only -- think + of a ``match`` in Rust or a ``switch`` in C. If you want to call a + function in a dynamic library, that will typically use + ``call_indirect``. + "#, + &formats.branch_table, + ) + .operands_in(vec![x, block, JT]) + .is_terminator(true) + .is_branch(true), + ); + } + + let iAddr = &TypeVar::new( + "iAddr", + "An integer address type", + TypeSetBuilder::new().ints(32..64).refs(32..64).build(), + ); + + { + let x = &Operand::new("x", iAddr).with_doc("index into jump table"); + let addr = &Operand::new("addr", iAddr); + let Size = &Operand::new("Size", &imm.uimm8).with_doc("Size in bytes"); + let JT = &Operand::new("JT", &entities.jump_table); + let entry = &Operand::new("entry", iAddr).with_doc("entry of jump table"); + + ig.push( + Inst::new( + "jump_table_entry", + r#" + Get an entry from a jump table. + + Load a serialized ``entry`` from a jump table ``JT`` at a given index + ``addr`` with a specific ``Size``. The retrieved entry may need to be + decoded after loading, depending upon the jump table type used. + + Currently, the only type supported is entries which are relative to the + base of the jump table. + "#, + &formats.branch_table_entry, + ) + .operands_in(vec![x, addr, Size, JT]) + .operands_out(vec![entry]) + .can_load(true), + ); + + ig.push( + Inst::new( + "jump_table_base", + r#" + Get the absolute base address of a jump table. + + This is used for jump tables wherein the entries are stored relative to + the base of jump table. In order to use these, generated code should first + load an entry using ``jump_table_entry``, then use this instruction to add + the relative base back to it. + "#, + &formats.branch_table_base, + ) + .operands_in(vec![JT]) + .operands_out(vec![addr]), + ); + + ig.push( + Inst::new( + "indirect_jump_table_br", + r#" + Branch indirectly via a jump table entry. + + Unconditionally jump via a jump table entry that was previously loaded + with the ``jump_table_entry`` instruction. + "#, + &formats.indirect_jump, + ) + .operands_in(vec![addr, JT]) + .is_indirect_branch(true) + .is_terminator(true) + .is_branch(true), + ); + } + + ig.push( + Inst::new( + "debugtrap", + r#" + Encodes an assembly debug trap. + "#, + &formats.nullary, + ) + .other_side_effects(true) + .can_load(true) + .can_store(true), + ); + + { + let code = &Operand::new("code", &imm.trapcode); + ig.push( + Inst::new( + "trap", + r#" + Terminate execution unconditionally. + "#, + &formats.trap, + ) + .operands_in(vec![code]) + .can_trap(true) + .is_terminator(true), + ); + + let c = &Operand::new("c", Testable).with_doc("Controlling value to test"); + ig.push( + Inst::new( + "trapz", + r#" + Trap when zero. + + if ``c`` is non-zero, execution continues at the following instruction. + "#, + &formats.cond_trap, + ) + .operands_in(vec![c, code]) + .can_trap(true), + ); + + ig.push( + Inst::new( + "resumable_trap", + r#" + A resumable trap. + + This instruction allows non-conditional traps to be used as non-terminal instructions. + "#, + &formats.trap, + ) + .operands_in(vec![code]) + .can_trap(true), + ); + + let c = &Operand::new("c", Testable).with_doc("Controlling value to test"); + ig.push( + Inst::new( + "trapnz", + r#" + Trap when non-zero. + + If ``c`` is zero, execution continues at the following instruction. + "#, + &formats.cond_trap, + ) + .operands_in(vec![c, code]) + .can_trap(true), + ); + + ig.push( + Inst::new( + "resumable_trapnz", + r#" + A resumable trap to be called when the passed condition is non-zero. + + If ``c`` is zero, execution continues at the following instruction. + "#, + &formats.cond_trap, + ) + .operands_in(vec![c, code]) + .can_trap(true), + ); + + let Cond = &Operand::new("Cond", &imm.intcc); + let f = &Operand::new("f", iflags); + ig.push( + Inst::new( + "trapif", + r#" + Trap when condition is true in integer CPU flags. + "#, + &formats.int_cond_trap, + ) + .operands_in(vec![Cond, f, code]) + .can_trap(true), + ); + + let Cond = &Operand::new("Cond", &imm.floatcc); + let f = &Operand::new("f", fflags); + let code = &Operand::new("code", &imm.trapcode); + ig.push( + Inst::new( + "trapff", + r#" + Trap when condition is true in floating point CPU flags. + "#, + &formats.float_cond_trap, + ) + .operands_in(vec![Cond, f, code]) + .can_trap(true), + ); + } + + let rvals = &Operand::new("rvals", &entities.varargs).with_doc("return values"); + ig.push( + Inst::new( + "return", + r#" + Return from the function. + + Unconditionally transfer control to the calling function, passing the + provided return values. The list of return values must match the + function signature's return types. + "#, + &formats.multiary, + ) + .operands_in(vec![rvals]) + .is_return(true) + .is_terminator(true), + ); + + let rvals = &Operand::new("rvals", &entities.varargs).with_doc("return values"); + ig.push( + Inst::new( + "fallthrough_return", + r#" + Return from the function by fallthrough. + + This is a specialized instruction for use where one wants to append + a custom epilogue, which will then perform the real return. This + instruction has no encoding. + "#, + &formats.multiary, + ) + .operands_in(vec![rvals]) + .is_return(true) + .is_terminator(true), + ); + + let FN = &Operand::new("FN", &entities.func_ref) + .with_doc("function to call, declared by `function`"); + let args = &Operand::new("args", &entities.varargs).with_doc("call arguments"); + let rvals = &Operand::new("rvals", &entities.varargs).with_doc("return values"); + ig.push( + Inst::new( + "call", + r#" + Direct function call. + + Call a function which has been declared in the preamble. The argument + types must match the function's signature. + "#, + &formats.call, + ) + .operands_in(vec![FN, args]) + .operands_out(vec![rvals]) + .is_call(true), + ); + + let SIG = &Operand::new("SIG", &entities.sig_ref).with_doc("function signature"); + let callee = &Operand::new("callee", iAddr).with_doc("address of function to call"); + let args = &Operand::new("args", &entities.varargs).with_doc("call arguments"); + let rvals = &Operand::new("rvals", &entities.varargs).with_doc("return values"); + ig.push( + Inst::new( + "call_indirect", + r#" + Indirect function call. + + Call the function pointed to by `callee` with the given arguments. The + called function must match the specified signature. + + Note that this is different from WebAssembly's ``call_indirect``; the + callee is a native address, rather than a table index. For WebAssembly, + `table_addr` and `load` are used to obtain a native address + from a table. + "#, + &formats.call_indirect, + ) + .operands_in(vec![SIG, callee, args]) + .operands_out(vec![rvals]) + .is_call(true), + ); + + let FN = &Operand::new("FN", &entities.func_ref) + .with_doc("function to call, declared by `function`"); + let addr = &Operand::new("addr", iAddr); + ig.push( + Inst::new( + "func_addr", + r#" + Get the address of a function. + + Compute the absolute address of a function declared in the preamble. + The returned address can be used as a ``callee`` argument to + `call_indirect`. This is also a method for calling functions that + are too far away to be addressable by a direct `call` + instruction. + "#, + &formats.func_addr, + ) + .operands_in(vec![FN]) + .operands_out(vec![addr]), + ); +} + +#[inline(never)] +fn define_simd_lane_access( + ig: &mut InstructionGroupBuilder, + formats: &Formats, + imm: &Immediates, + _: &EntityRefs, +) { + let TxN = &TypeVar::new( + "TxN", + "A SIMD vector type", + TypeSetBuilder::new() + .ints(Interval::All) + .floats(Interval::All) + .bools(Interval::All) + .simd_lanes(Interval::All) + .includes_scalars(false) + .build(), + ); + + let x = &Operand::new("x", &TxN.lane_of()).with_doc("Value to splat to all lanes"); + let a = &Operand::new("a", TxN); + + ig.push( + Inst::new( + "splat", + r#" + Vector splat. + + Return a vector whose lanes are all ``x``. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + + let I8x16 = &TypeVar::new( + "I8x16", + "A SIMD vector type consisting of 16 lanes of 8-bit integers", + TypeSetBuilder::new() + .ints(8..8) + .simd_lanes(16..16) + .includes_scalars(false) + .build(), + ); + let x = &Operand::new("x", I8x16).with_doc("Vector to modify by re-arranging lanes"); + let y = &Operand::new("y", I8x16).with_doc("Mask for re-arranging lanes"); + + ig.push( + Inst::new( + "swizzle", + r#" + Vector swizzle. + + Returns a new vector with byte-width lanes selected from the lanes of the first input + vector ``x`` specified in the second input vector ``s``. The indices ``i`` in range + ``[0, 15]`` select the ``i``-th element of ``x``. For indices outside of the range the + resulting lane is 0. Note that this operates on byte-width lanes. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + let x = &Operand::new("x", TxN).with_doc("The vector to modify"); + let y = &Operand::new("y", &TxN.lane_of()).with_doc("New lane value"); + let Idx = &Operand::new("Idx", &imm.uimm8).with_doc("Lane index"); + + ig.push( + Inst::new( + "insertlane", + r#" + Insert ``y`` as lane ``Idx`` in x. + + The lane index, ``Idx``, is an immediate value, not an SSA value. It + must indicate a valid lane index for the type of ``x``. + "#, + &formats.ternary_imm8, + ) + .operands_in(vec![x, y, Idx]) + .operands_out(vec![a]), + ); + + let x = &Operand::new("x", TxN); + let a = &Operand::new("a", &TxN.lane_of()); + + ig.push( + Inst::new( + "extractlane", + r#" + Extract lane ``Idx`` from ``x``. + + The lane index, ``Idx``, is an immediate value, not an SSA value. It + must indicate a valid lane index for the type of ``x``. Note that the upper bits of ``a`` + may or may not be zeroed depending on the ISA but the type system should prevent using + ``a`` as anything other than the extracted value. + "#, + &formats.binary_imm8, + ) + .operands_in(vec![x, Idx]) + .operands_out(vec![a]), + ); +} + +#[inline(never)] +fn define_simd_arithmetic( + ig: &mut InstructionGroupBuilder, + formats: &Formats, + _: &Immediates, + _: &EntityRefs, +) { + let Int = &TypeVar::new( + "Int", + "A scalar or vector integer type", + TypeSetBuilder::new() + .ints(Interval::All) + .simd_lanes(Interval::All) + .build(), + ); + + let a = &Operand::new("a", Int); + let x = &Operand::new("x", Int); + let y = &Operand::new("y", Int); + + ig.push( + Inst::new( + "imin", + r#" + Signed integer minimum. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "umin", + r#" + Unsigned integer minimum. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "imax", + r#" + Signed integer maximum. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "umax", + r#" + Unsigned integer maximum. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + let IxN = &TypeVar::new( + "IxN", + "A SIMD vector type containing integers", + TypeSetBuilder::new() + .ints(Interval::All) + .simd_lanes(Interval::All) + .includes_scalars(false) + .build(), + ); + + let a = &Operand::new("a", IxN); + let x = &Operand::new("x", IxN); + let y = &Operand::new("y", IxN); + + ig.push( + Inst::new( + "avg_round", + r#" + Unsigned average with rounding: `a := (x + y + 1) // 2` + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); +} + +#[allow(clippy::many_single_char_names)] +pub(crate) fn define( + all_instructions: &mut AllInstructions, + formats: &Formats, + imm: &Immediates, + entities: &EntityRefs, +) -> InstructionGroup { + let mut ig = InstructionGroupBuilder::new(all_instructions); + + define_control_flow(&mut ig, formats, imm, entities); + define_simd_lane_access(&mut ig, formats, imm, entities); + define_simd_arithmetic(&mut ig, formats, imm, entities); + + // Operand kind shorthands. + let iflags: &TypeVar = &ValueType::Special(types::Flag::IFlags.into()).into(); + let fflags: &TypeVar = &ValueType::Special(types::Flag::FFlags.into()).into(); + + let b1: &TypeVar = &ValueType::from(LaneType::from(types::Bool::B1)).into(); + let f32_: &TypeVar = &ValueType::from(LaneType::from(types::Float::F32)).into(); + let f64_: &TypeVar = &ValueType::from(LaneType::from(types::Float::F64)).into(); + + // Starting definitions. + let Int = &TypeVar::new( + "Int", + "A scalar or vector integer type", + TypeSetBuilder::new() + .ints(Interval::All) + .simd_lanes(Interval::All) + .build(), + ); + + let Bool = &TypeVar::new( + "Bool", + "A scalar or vector boolean type", + TypeSetBuilder::new() + .bools(Interval::All) + .simd_lanes(Interval::All) + .build(), + ); + + let iB = &TypeVar::new( + "iB", + "A scalar integer type", + TypeSetBuilder::new().ints(Interval::All).build(), + ); + + let iAddr = &TypeVar::new( + "iAddr", + "An integer address type", + TypeSetBuilder::new().ints(32..64).refs(32..64).build(), + ); + + let Ref = &TypeVar::new( + "Ref", + "A scalar reference type", + TypeSetBuilder::new().refs(Interval::All).build(), + ); + + let Testable = &TypeVar::new( + "Testable", + "A scalar boolean or integer type", + TypeSetBuilder::new() + .ints(Interval::All) + .bools(Interval::All) + .build(), + ); + + let TxN = &TypeVar::new( + "TxN", + "A SIMD vector type", + TypeSetBuilder::new() + .ints(Interval::All) + .floats(Interval::All) + .bools(Interval::All) + .simd_lanes(Interval::All) + .includes_scalars(false) + .build(), + ); + let Any = &TypeVar::new( + "Any", + "Any integer, float, boolean, or reference scalar or vector type", + TypeSetBuilder::new() + .ints(Interval::All) + .floats(Interval::All) + .bools(Interval::All) + .refs(Interval::All) + .simd_lanes(Interval::All) + .includes_scalars(true) + .build(), + ); + + let AnyTo = &TypeVar::copy_from(Any, "AnyTo".to_string()); + + let Mem = &TypeVar::new( + "Mem", + "Any type that can be stored in memory", + TypeSetBuilder::new() + .ints(Interval::All) + .floats(Interval::All) + .simd_lanes(Interval::All) + .refs(Interval::All) + .build(), + ); + + let MemTo = &TypeVar::copy_from(Mem, "MemTo".to_string()); + + let addr = &Operand::new("addr", iAddr); + + let SS = &Operand::new("SS", &entities.stack_slot); + let Offset = &Operand::new("Offset", &imm.offset32).with_doc("Byte offset from base address"); + let x = &Operand::new("x", Mem).with_doc("Value to be stored"); + let a = &Operand::new("a", Mem).with_doc("Value loaded"); + let p = &Operand::new("p", iAddr); + let MemFlags = &Operand::new("MemFlags", &imm.memflags); + let args = &Operand::new("args", &entities.varargs).with_doc("Address arguments"); + + ig.push( + Inst::new( + "load", + r#" + Load from memory at ``p + Offset``. + + This is a polymorphic instruction that can load any value type which + has a memory representation. + "#, + &formats.load, + ) + .operands_in(vec![MemFlags, p, Offset]) + .operands_out(vec![a]) + .can_load(true), + ); + + ig.push( + Inst::new( + "load_complex", + r#" + Load from memory at ``sum(args) + Offset``. + + This is a polymorphic instruction that can load any value type which + has a memory representation. + "#, + &formats.load_complex, + ) + .operands_in(vec![MemFlags, args, Offset]) + .operands_out(vec![a]) + .can_load(true), + ); + + ig.push( + Inst::new( + "store", + r#" + Store ``x`` to memory at ``p + Offset``. + + This is a polymorphic instruction that can store any value type with a + memory representation. + "#, + &formats.store, + ) + .operands_in(vec![MemFlags, x, p, Offset]) + .can_store(true), + ); + + ig.push( + Inst::new( + "store_complex", + r#" + Store ``x`` to memory at ``sum(args) + Offset``. + + This is a polymorphic instruction that can store any value type with a + memory representation. + "#, + &formats.store_complex, + ) + .operands_in(vec![MemFlags, x, args, Offset]) + .can_store(true), + ); + + let iExt8 = &TypeVar::new( + "iExt8", + "An integer type with more than 8 bits", + TypeSetBuilder::new().ints(16..64).build(), + ); + let x = &Operand::new("x", iExt8); + let a = &Operand::new("a", iExt8); + + ig.push( + Inst::new( + "uload8", + r#" + Load 8 bits from memory at ``p + Offset`` and zero-extend. + + This is equivalent to ``load.i8`` followed by ``uextend``. + "#, + &formats.load, + ) + .operands_in(vec![MemFlags, p, Offset]) + .operands_out(vec![a]) + .can_load(true), + ); + + ig.push( + Inst::new( + "uload8_complex", + r#" + Load 8 bits from memory at ``sum(args) + Offset`` and zero-extend. + + This is equivalent to ``load.i8`` followed by ``uextend``. + "#, + &formats.load_complex, + ) + .operands_in(vec![MemFlags, args, Offset]) + .operands_out(vec![a]) + .can_load(true), + ); + + ig.push( + Inst::new( + "sload8", + r#" + Load 8 bits from memory at ``p + Offset`` and sign-extend. + + This is equivalent to ``load.i8`` followed by ``sextend``. + "#, + &formats.load, + ) + .operands_in(vec![MemFlags, p, Offset]) + .operands_out(vec![a]) + .can_load(true), + ); + + ig.push( + Inst::new( + "sload8_complex", + r#" + Load 8 bits from memory at ``sum(args) + Offset`` and sign-extend. + + This is equivalent to ``load.i8`` followed by ``sextend``. + "#, + &formats.load_complex, + ) + .operands_in(vec![MemFlags, args, Offset]) + .operands_out(vec![a]) + .can_load(true), + ); + + ig.push( + Inst::new( + "istore8", + r#" + Store the low 8 bits of ``x`` to memory at ``p + Offset``. + + This is equivalent to ``ireduce.i8`` followed by ``store.i8``. + "#, + &formats.store, + ) + .operands_in(vec![MemFlags, x, p, Offset]) + .can_store(true), + ); + + ig.push( + Inst::new( + "istore8_complex", + r#" + Store the low 8 bits of ``x`` to memory at ``sum(args) + Offset``. + + This is equivalent to ``ireduce.i8`` followed by ``store.i8``. + "#, + &formats.store_complex, + ) + .operands_in(vec![MemFlags, x, args, Offset]) + .can_store(true), + ); + + let iExt16 = &TypeVar::new( + "iExt16", + "An integer type with more than 16 bits", + TypeSetBuilder::new().ints(32..64).build(), + ); + let x = &Operand::new("x", iExt16); + let a = &Operand::new("a", iExt16); + + ig.push( + Inst::new( + "uload16", + r#" + Load 16 bits from memory at ``p + Offset`` and zero-extend. + + This is equivalent to ``load.i16`` followed by ``uextend``. + "#, + &formats.load, + ) + .operands_in(vec![MemFlags, p, Offset]) + .operands_out(vec![a]) + .can_load(true), + ); + + ig.push( + Inst::new( + "uload16_complex", + r#" + Load 16 bits from memory at ``sum(args) + Offset`` and zero-extend. + + This is equivalent to ``load.i16`` followed by ``uextend``. + "#, + &formats.load_complex, + ) + .operands_in(vec![MemFlags, args, Offset]) + .operands_out(vec![a]) + .can_load(true), + ); + + ig.push( + Inst::new( + "sload16", + r#" + Load 16 bits from memory at ``p + Offset`` and sign-extend. + + This is equivalent to ``load.i16`` followed by ``sextend``. + "#, + &formats.load, + ) + .operands_in(vec![MemFlags, p, Offset]) + .operands_out(vec![a]) + .can_load(true), + ); + + ig.push( + Inst::new( + "sload16_complex", + r#" + Load 16 bits from memory at ``sum(args) + Offset`` and sign-extend. + + This is equivalent to ``load.i16`` followed by ``sextend``. + "#, + &formats.load_complex, + ) + .operands_in(vec![MemFlags, args, Offset]) + .operands_out(vec![a]) + .can_load(true), + ); + + ig.push( + Inst::new( + "istore16", + r#" + Store the low 16 bits of ``x`` to memory at ``p + Offset``. + + This is equivalent to ``ireduce.i16`` followed by ``store.i16``. + "#, + &formats.store, + ) + .operands_in(vec![MemFlags, x, p, Offset]) + .can_store(true), + ); + + ig.push( + Inst::new( + "istore16_complex", + r#" + Store the low 16 bits of ``x`` to memory at ``sum(args) + Offset``. + + This is equivalent to ``ireduce.i16`` followed by ``store.i16``. + "#, + &formats.store_complex, + ) + .operands_in(vec![MemFlags, x, args, Offset]) + .can_store(true), + ); + + let iExt32 = &TypeVar::new( + "iExt32", + "An integer type with more than 32 bits", + TypeSetBuilder::new().ints(64..64).build(), + ); + let x = &Operand::new("x", iExt32); + let a = &Operand::new("a", iExt32); + + ig.push( + Inst::new( + "uload32", + r#" + Load 32 bits from memory at ``p + Offset`` and zero-extend. + + This is equivalent to ``load.i32`` followed by ``uextend``. + "#, + &formats.load, + ) + .operands_in(vec![MemFlags, p, Offset]) + .operands_out(vec![a]) + .can_load(true), + ); + + ig.push( + Inst::new( + "uload32_complex", + r#" + Load 32 bits from memory at ``sum(args) + Offset`` and zero-extend. + + This is equivalent to ``load.i32`` followed by ``uextend``. + "#, + &formats.load_complex, + ) + .operands_in(vec![MemFlags, args, Offset]) + .operands_out(vec![a]) + .can_load(true), + ); + + ig.push( + Inst::new( + "sload32", + r#" + Load 32 bits from memory at ``p + Offset`` and sign-extend. + + This is equivalent to ``load.i32`` followed by ``sextend``. + "#, + &formats.load, + ) + .operands_in(vec![MemFlags, p, Offset]) + .operands_out(vec![a]) + .can_load(true), + ); + + ig.push( + Inst::new( + "sload32_complex", + r#" + Load 32 bits from memory at ``sum(args) + Offset`` and sign-extend. + + This is equivalent to ``load.i32`` followed by ``sextend``. + "#, + &formats.load_complex, + ) + .operands_in(vec![MemFlags, args, Offset]) + .operands_out(vec![a]) + .can_load(true), + ); + + ig.push( + Inst::new( + "istore32", + r#" + Store the low 32 bits of ``x`` to memory at ``p + Offset``. + + This is equivalent to ``ireduce.i32`` followed by ``store.i32``. + "#, + &formats.store, + ) + .operands_in(vec![MemFlags, x, p, Offset]) + .can_store(true), + ); + + ig.push( + Inst::new( + "istore32_complex", + r#" + Store the low 32 bits of ``x`` to memory at ``sum(args) + Offset``. + + This is equivalent to ``ireduce.i32`` followed by ``store.i32``. + "#, + &formats.store_complex, + ) + .operands_in(vec![MemFlags, x, args, Offset]) + .can_store(true), + ); + + let I16x8 = &TypeVar::new( + "I16x8", + "A SIMD vector with exactly 8 lanes of 16-bit values", + TypeSetBuilder::new() + .ints(16..16) + .simd_lanes(8..8) + .includes_scalars(false) + .build(), + ); + let a = &Operand::new("a", I16x8).with_doc("Value loaded"); + + ig.push( + Inst::new( + "uload8x8", + r#" + Load an 8x8 vector (64 bits) from memory at ``p + Offset`` and zero-extend into an i16x8 + vector. + "#, + &formats.load, + ) + .operands_in(vec![MemFlags, p, Offset]) + .operands_out(vec![a]) + .can_load(true), + ); + + ig.push( + Inst::new( + "uload8x8_complex", + r#" + Load an 8x8 vector (64 bits) from memory at ``sum(args) + Offset`` and zero-extend into an + i16x8 vector. + "#, + &formats.load_complex, + ) + .operands_in(vec![MemFlags, args, Offset]) + .operands_out(vec![a]) + .can_load(true), + ); + + ig.push( + Inst::new( + "sload8x8", + r#" + Load an 8x8 vector (64 bits) from memory at ``p + Offset`` and sign-extend into an i16x8 + vector. + "#, + &formats.load, + ) + .operands_in(vec![MemFlags, p, Offset]) + .operands_out(vec![a]) + .can_load(true), + ); + + ig.push( + Inst::new( + "sload8x8_complex", + r#" + Load an 8x8 vector (64 bits) from memory at ``sum(args) + Offset`` and sign-extend into an + i16x8 vector. + "#, + &formats.load_complex, + ) + .operands_in(vec![MemFlags, args, Offset]) + .operands_out(vec![a]) + .can_load(true), + ); + + let I32x4 = &TypeVar::new( + "I32x4", + "A SIMD vector with exactly 4 lanes of 32-bit values", + TypeSetBuilder::new() + .ints(32..32) + .simd_lanes(4..4) + .includes_scalars(false) + .build(), + ); + let a = &Operand::new("a", I32x4).with_doc("Value loaded"); + + ig.push( + Inst::new( + "uload16x4", + r#" + Load a 16x4 vector (64 bits) from memory at ``p + Offset`` and zero-extend into an i32x4 + vector. + "#, + &formats.load, + ) + .operands_in(vec![MemFlags, p, Offset]) + .operands_out(vec![a]) + .can_load(true), + ); + + ig.push( + Inst::new( + "uload16x4_complex", + r#" + Load a 16x4 vector (64 bits) from memory at ``sum(args) + Offset`` and zero-extend into an + i32x4 vector. + "#, + &formats.load_complex, + ) + .operands_in(vec![MemFlags, args, Offset]) + .operands_out(vec![a]) + .can_load(true), + ); + + ig.push( + Inst::new( + "sload16x4", + r#" + Load a 16x4 vector (64 bits) from memory at ``p + Offset`` and sign-extend into an i32x4 + vector. + "#, + &formats.load, + ) + .operands_in(vec![MemFlags, p, Offset]) + .operands_out(vec![a]) + .can_load(true), + ); + + ig.push( + Inst::new( + "sload16x4_complex", + r#" + Load a 16x4 vector (64 bits) from memory at ``sum(args) + Offset`` and sign-extend into an + i32x4 vector. + "#, + &formats.load_complex, + ) + .operands_in(vec![MemFlags, args, Offset]) + .operands_out(vec![a]) + .can_load(true), + ); + + let I64x2 = &TypeVar::new( + "I64x2", + "A SIMD vector with exactly 2 lanes of 64-bit values", + TypeSetBuilder::new() + .ints(64..64) + .simd_lanes(2..2) + .includes_scalars(false) + .build(), + ); + let a = &Operand::new("a", I64x2).with_doc("Value loaded"); + + ig.push( + Inst::new( + "uload32x2", + r#" + Load an 32x2 vector (64 bits) from memory at ``p + Offset`` and zero-extend into an i64x2 + vector. + "#, + &formats.load, + ) + .operands_in(vec![MemFlags, p, Offset]) + .operands_out(vec![a]) + .can_load(true), + ); + + ig.push( + Inst::new( + "uload32x2_complex", + r#" + Load a 32x2 vector (64 bits) from memory at ``sum(args) + Offset`` and zero-extend into an + i64x2 vector. + "#, + &formats.load_complex, + ) + .operands_in(vec![MemFlags, args, Offset]) + .operands_out(vec![a]) + .can_load(true), + ); + + ig.push( + Inst::new( + "sload32x2", + r#" + Load a 32x2 vector (64 bits) from memory at ``p + Offset`` and sign-extend into an i64x2 + vector. + "#, + &formats.load, + ) + .operands_in(vec![MemFlags, p, Offset]) + .operands_out(vec![a]) + .can_load(true), + ); + + ig.push( + Inst::new( + "sload32x2_complex", + r#" + Load a 32x2 vector (64 bits) from memory at ``sum(args) + Offset`` and sign-extend into an + i64x2 vector. + "#, + &formats.load_complex, + ) + .operands_in(vec![MemFlags, args, Offset]) + .operands_out(vec![a]) + .can_load(true), + ); + + let x = &Operand::new("x", Mem).with_doc("Value to be stored"); + let a = &Operand::new("a", Mem).with_doc("Value loaded"); + let Offset = + &Operand::new("Offset", &imm.offset32).with_doc("In-bounds offset into stack slot"); + + ig.push( + Inst::new( + "stack_load", + r#" + Load a value from a stack slot at the constant offset. + + This is a polymorphic instruction that can load any value type which + has a memory representation. + + The offset is an immediate constant, not an SSA value. The memory + access cannot go out of bounds, i.e. + `sizeof(a) + Offset <= sizeof(SS)`. + "#, + &formats.stack_load, + ) + .operands_in(vec![SS, Offset]) + .operands_out(vec![a]) + .can_load(true), + ); + + ig.push( + Inst::new( + "stack_store", + r#" + Store a value to a stack slot at a constant offset. + + This is a polymorphic instruction that can store any value type with a + memory representation. + + The offset is an immediate constant, not an SSA value. The memory + access cannot go out of bounds, i.e. + `sizeof(a) + Offset <= sizeof(SS)`. + "#, + &formats.stack_store, + ) + .operands_in(vec![x, SS, Offset]) + .can_store(true), + ); + + ig.push( + Inst::new( + "stack_addr", + r#" + Get the address of a stack slot. + + Compute the absolute address of a byte in a stack slot. The offset must + refer to a byte inside the stack slot: + `0 <= Offset < sizeof(SS)`. + "#, + &formats.stack_load, + ) + .operands_in(vec![SS, Offset]) + .operands_out(vec![addr]), + ); + + let GV = &Operand::new("GV", &entities.global_value); + + ig.push( + Inst::new( + "global_value", + r#" + Compute the value of global GV. + "#, + &formats.unary_global_value, + ) + .operands_in(vec![GV]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "symbol_value", + r#" + Compute the value of global GV, which is a symbolic value. + "#, + &formats.unary_global_value, + ) + .operands_in(vec![GV]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "tls_value", + r#" + Compute the value of global GV, which is a TLS (thread local storage) value. + "#, + &formats.unary_global_value, + ) + .operands_in(vec![GV]) + .operands_out(vec![a]), + ); + + let HeapOffset = &TypeVar::new( + "HeapOffset", + "An unsigned heap offset", + TypeSetBuilder::new().ints(32..64).build(), + ); + + let H = &Operand::new("H", &entities.heap); + let p = &Operand::new("p", HeapOffset); + let Size = &Operand::new("Size", &imm.uimm32).with_doc("Size in bytes"); + + ig.push( + Inst::new( + "heap_addr", + r#" + Bounds check and compute absolute address of heap memory. + + Verify that the offset range ``p .. p + Size - 1`` is in bounds for the + heap H, and generate an absolute address that is safe to dereference. + + 1. If ``p + Size`` is not greater than the heap bound, return an + absolute address corresponding to a byte offset of ``p`` from the + heap's base address. + 2. If ``p + Size`` is greater than the heap bound, generate a trap. + "#, + &formats.heap_addr, + ) + .operands_in(vec![H, p, Size]) + .operands_out(vec![addr]), + ); + + // Note this instruction is marked as having other side-effects, so GVN won't try to hoist it, + // which would result in it being subject to spilling. While not hoisting would generally hurt + // performance, since a computed value used many times may need to be regenerated before each + // use, it is not the case here: this instruction doesn't generate any code. That's because, + // by definition the pinned register is never used by the register allocator, but is written to + // and read explicitly and exclusively by set_pinned_reg and get_pinned_reg. + ig.push( + Inst::new( + "get_pinned_reg", + r#" + Gets the content of the pinned register, when it's enabled. + "#, + &formats.nullary, + ) + .operands_out(vec![addr]) + .other_side_effects(true), + ); + + ig.push( + Inst::new( + "set_pinned_reg", + r#" + Sets the content of the pinned register, when it's enabled. + "#, + &formats.unary, + ) + .operands_in(vec![addr]) + .other_side_effects(true), + ); + + let TableOffset = &TypeVar::new( + "TableOffset", + "An unsigned table offset", + TypeSetBuilder::new().ints(32..64).build(), + ); + let T = &Operand::new("T", &entities.table); + let p = &Operand::new("p", TableOffset); + let Offset = + &Operand::new("Offset", &imm.offset32).with_doc("Byte offset from element address"); + + ig.push( + Inst::new( + "table_addr", + r#" + Bounds check and compute absolute address of a table entry. + + Verify that the offset ``p`` is in bounds for the table T, and generate + an absolute address that is safe to dereference. + + ``Offset`` must be less than the size of a table element. + + 1. If ``p`` is not greater than the table bound, return an absolute + address corresponding to a byte offset of ``p`` from the table's + base address. + 2. If ``p`` is greater than the table bound, generate a trap. + "#, + &formats.table_addr, + ) + .operands_in(vec![T, p, Offset]) + .operands_out(vec![addr]), + ); + + let N = &Operand::new("N", &imm.imm64); + let a = &Operand::new("a", Int).with_doc("A constant integer scalar or vector value"); + + ig.push( + Inst::new( + "iconst", + r#" + Integer constant. + + Create a scalar integer SSA value with an immediate constant value, or + an integer vector where all the lanes have the same value. + "#, + &formats.unary_imm, + ) + .operands_in(vec![N]) + .operands_out(vec![a]), + ); + + let N = &Operand::new("N", &imm.ieee32); + let a = &Operand::new("a", f32_).with_doc("A constant f32 scalar value"); + + ig.push( + Inst::new( + "f32const", + r#" + Floating point constant. + + Create a `f32` SSA value with an immediate constant value. + "#, + &formats.unary_ieee32, + ) + .operands_in(vec![N]) + .operands_out(vec![a]), + ); + + let N = &Operand::new("N", &imm.ieee64); + let a = &Operand::new("a", f64_).with_doc("A constant f64 scalar value"); + + ig.push( + Inst::new( + "f64const", + r#" + Floating point constant. + + Create a `f64` SSA value with an immediate constant value. + "#, + &formats.unary_ieee64, + ) + .operands_in(vec![N]) + .operands_out(vec![a]), + ); + + let N = &Operand::new("N", &imm.boolean); + let a = &Operand::new("a", Bool).with_doc("A constant boolean scalar or vector value"); + + ig.push( + Inst::new( + "bconst", + r#" + Boolean constant. + + Create a scalar boolean SSA value with an immediate constant value, or + a boolean vector where all the lanes have the same value. + "#, + &formats.unary_bool, + ) + .operands_in(vec![N]) + .operands_out(vec![a]), + ); + + let N = &Operand::new("N", &imm.pool_constant) + .with_doc("The 16 immediate bytes of a 128-bit vector"); + let a = &Operand::new("a", TxN).with_doc("A constant vector value"); + + ig.push( + Inst::new( + "vconst", + r#" + SIMD vector constant. + + Construct a vector with the given immediate bytes. + "#, + &formats.unary_const, + ) + .operands_in(vec![N]) + .operands_out(vec![a]), + ); + + let constant = + &Operand::new("constant", &imm.pool_constant).with_doc("A constant in the constant pool"); + let address = &Operand::new("address", iAddr); + ig.push( + Inst::new( + "const_addr", + r#" + Calculate the base address of a value in the constant pool. + "#, + &formats.unary_const, + ) + .operands_in(vec![constant]) + .operands_out(vec![address]), + ); + + let mask = &Operand::new("mask", &imm.uimm128) + .with_doc("The 16 immediate bytes used for selecting the elements to shuffle"); + let Tx16 = &TypeVar::new( + "Tx16", + "A SIMD vector with exactly 16 lanes of 8-bit values; eventually this may support other \ + lane counts and widths", + TypeSetBuilder::new() + .ints(8..8) + .bools(8..8) + .simd_lanes(16..16) + .includes_scalars(false) + .build(), + ); + let a = &Operand::new("a", Tx16).with_doc("A vector value"); + let b = &Operand::new("b", Tx16).with_doc("A vector value"); + + ig.push( + Inst::new( + "shuffle", + r#" + SIMD vector shuffle. + + Shuffle two vectors using the given immediate bytes. For each of the 16 bytes of the + immediate, a value i of 0-15 selects the i-th element of the first vector and a value i of + 16-31 selects the (i-16)th element of the second vector. Immediate values outside of the + 0-31 range place a 0 in the resulting vector lane. + "#, + &formats.shuffle, + ) + .operands_in(vec![a, b, mask]) + .operands_out(vec![a]), + ); + + let a = &Operand::new("a", Ref).with_doc("A constant reference null value"); + + ig.push( + Inst::new( + "null", + r#" + Null constant value for reference types. + + Create a scalar reference SSA value with a constant null value. + "#, + &formats.nullary, + ) + .operands_out(vec![a]), + ); + + ig.push(Inst::new( + "nop", + r#" + Just a dummy instruction. + + Note: this doesn't compile to a machine code nop. + "#, + &formats.nullary, + )); + + let c = &Operand::new("c", Testable).with_doc("Controlling value to test"); + let x = &Operand::new("x", Any).with_doc("Value to use when `c` is true"); + let y = &Operand::new("y", Any).with_doc("Value to use when `c` is false"); + let a = &Operand::new("a", Any); + + ig.push( + Inst::new( + "select", + r#" + Conditional select. + + This instruction selects whole values. Use `vselect` for + lane-wise selection. + "#, + &formats.ternary, + ) + .operands_in(vec![c, x, y]) + .operands_out(vec![a]), + ); + + let cc = &Operand::new("cc", &imm.intcc).with_doc("Controlling condition code"); + let flags = &Operand::new("flags", iflags).with_doc("The machine's flag register"); + + ig.push( + Inst::new( + "selectif", + r#" + Conditional select, dependent on integer condition codes. + "#, + &formats.int_select, + ) + .operands_in(vec![cc, flags, x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "selectif_spectre_guard", + r#" + Conditional select intended for Spectre guards. + + This operation is semantically equivalent to a selectif instruction. + However, it is guaranteed to not be removed or otherwise altered by any + optimization pass, and is guaranteed to result in a conditional-move + instruction, not a branch-based lowering. As such, it is suitable + for use when producing Spectre guards. For example, a bounds-check + may guard against unsafe speculation past a bounds-check conditional + branch by passing the address or index to be accessed through a + conditional move, also gated on the same condition. Because no + Spectre-vulnerable processors are known to perform speculation on + conditional move instructions, this is guaranteed to pick the + correct input. If the selected input in case of overflow is a "safe" + value, for example a null pointer that causes an exception in the + speculative path, this ensures that no Spectre vulnerability will + exist. + "#, + &formats.int_select, + ) + .operands_in(vec![cc, flags, x, y]) + .operands_out(vec![a]) + .other_side_effects(true), + ); + + let c = &Operand::new("c", Any).with_doc("Controlling value to test"); + ig.push( + Inst::new( + "bitselect", + r#" + Conditional select of bits. + + For each bit in `c`, this instruction selects the corresponding bit from `x` if the bit + in `c` is 1 and the corresponding bit from `y` if the bit in `c` is 0. See also: + `select`, `vselect`. + "#, + &formats.ternary, + ) + .operands_in(vec![c, x, y]) + .operands_out(vec![a]), + ); + + let x = &Operand::new("x", Any); + + ig.push( + Inst::new( + "copy", + r#" + Register-register copy. + + This instruction copies its input, preserving the value type. + + A pure SSA-form program does not need to copy values, but this + instruction is useful for representing intermediate stages during + instruction transformations, and the register allocator needs a way of + representing register copies. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "spill", + r#" + Spill a register value to a stack slot. + + This instruction behaves exactly like `copy`, but the result + value is assigned to a spill slot. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]) + .can_store(true), + ); + + ig.push( + Inst::new( + "fill", + r#" + Load a register value from a stack slot. + + This instruction behaves exactly like `copy`, but creates a new + SSA value for the spilled input value. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]) + .can_load(true), + ); + + ig.push( + Inst::new( + "fill_nop", + r#" + This is identical to `fill`, except it has no encoding, since it is a no-op. + + This instruction is created only during late-stage redundant-reload removal, after all + registers and stack slots have been assigned. It is used to replace `fill`s that have + been identified as redundant. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]) + .can_load(true), + ); + + let Sarg = &TypeVar::new( + "Sarg", + "Any scalar or vector type with at most 128 lanes", + TypeSetBuilder::new() + .specials(vec![crate::cdsl::types::SpecialType::StructArgument]) + .build(), + ); + let sarg_t = &Operand::new("sarg_t", Sarg); + + // FIXME remove once the old style codegen backends are removed. + ig.push( + Inst::new( + "dummy_sarg_t", + r#" + This creates a sarg_t + + This instruction is internal and should not be created by + Cranelift users. + "#, + &formats.nullary, + ) + .operands_in(vec![]) + .operands_out(vec![sarg_t]), + ); + + let src = &Operand::new("src", &imm.regunit); + let dst = &Operand::new("dst", &imm.regunit); + + ig.push( + Inst::new( + "regmove", + r#" + Temporarily divert ``x`` from ``src`` to ``dst``. + + This instruction moves the location of a value from one register to + another without creating a new SSA value. It is used by the register + allocator to temporarily rearrange register assignments in order to + satisfy instruction constraints. + + The register diversions created by this instruction must be undone + before the value leaves the block. At the entry to a new block, all live + values must be in their originally assigned registers. + "#, + &formats.reg_move, + ) + .operands_in(vec![x, src, dst]) + .other_side_effects(true), + ); + + ig.push( + Inst::new( + "copy_special", + r#" + Copies the contents of ''src'' register to ''dst'' register. + + This instructions copies the contents of one register to another + register without involving any SSA values. This is used for copying + special registers, e.g. copying the stack register to the frame + register in a function prologue. + "#, + &formats.copy_special, + ) + .operands_in(vec![src, dst]) + .other_side_effects(true), + ); + + ig.push( + Inst::new( + "copy_to_ssa", + r#" + Copies the contents of ''src'' register to ''a'' SSA name. + + This instruction copies the contents of one register, regardless of its SSA name, to + another register, creating a new SSA name. In that sense it is a one-sided version + of ''copy_special''. This instruction is internal and should not be created by + Cranelift users. + "#, + &formats.copy_to_ssa, + ) + .operands_in(vec![src]) + .operands_out(vec![a]) + .other_side_effects(true), + ); + + ig.push( + Inst::new( + "copy_nop", + r#" + Stack-slot-to-the-same-stack-slot copy, which is guaranteed to turn + into a no-op. This instruction is for use only within Cranelift itself. + + This instruction copies its input, preserving the value type. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + + let delta = &Operand::new("delta", Int); + + ig.push( + Inst::new( + "adjust_sp_down", + r#" + Subtracts ``delta`` offset value from the stack pointer register. + + This instruction is used to adjust the stack pointer by a dynamic amount. + "#, + &formats.unary, + ) + .operands_in(vec![delta]) + .other_side_effects(true), + ); + + let Offset = &Operand::new("Offset", &imm.imm64).with_doc("Offset from current stack pointer"); + + ig.push( + Inst::new( + "adjust_sp_up_imm", + r#" + Adds ``Offset`` immediate offset value to the stack pointer register. + + This instruction is used to adjust the stack pointer, primarily in function + prologues and epilogues. ``Offset`` is constrained to the size of a signed + 32-bit integer. + "#, + &formats.unary_imm, + ) + .operands_in(vec![Offset]) + .other_side_effects(true), + ); + + let Offset = &Operand::new("Offset", &imm.imm64).with_doc("Offset from current stack pointer"); + + ig.push( + Inst::new( + "adjust_sp_down_imm", + r#" + Subtracts ``Offset`` immediate offset value from the stack pointer + register. + + This instruction is used to adjust the stack pointer, primarily in function + prologues and epilogues. ``Offset`` is constrained to the size of a signed + 32-bit integer. + "#, + &formats.unary_imm, + ) + .operands_in(vec![Offset]) + .other_side_effects(true), + ); + + let f = &Operand::new("f", iflags); + + ig.push( + Inst::new( + "ifcmp_sp", + r#" + Compare ``addr`` with the stack pointer and set the CPU flags. + + This is like `ifcmp` where ``addr`` is the LHS operand and the stack + pointer is the RHS. + "#, + &formats.unary, + ) + .operands_in(vec![addr]) + .operands_out(vec![f]), + ); + + ig.push( + Inst::new( + "regspill", + r#" + Temporarily divert ``x`` from ``src`` to ``SS``. + + This instruction moves the location of a value from a register to a + stack slot without creating a new SSA value. It is used by the register + allocator to temporarily rearrange register assignments in order to + satisfy instruction constraints. + + See also `regmove`. + "#, + &formats.reg_spill, + ) + .operands_in(vec![x, src, SS]) + .other_side_effects(true), + ); + + ig.push( + Inst::new( + "regfill", + r#" + Temporarily divert ``x`` from ``SS`` to ``dst``. + + This instruction moves the location of a value from a stack slot to a + register without creating a new SSA value. It is used by the register + allocator to temporarily rearrange register assignments in order to + satisfy instruction constraints. + + See also `regmove`. + "#, + &formats.reg_fill, + ) + .operands_in(vec![x, SS, dst]) + .other_side_effects(true), + ); + + let N = + &Operand::new("args", &entities.varargs).with_doc("Variable number of args for StackMap"); + + ig.push( + Inst::new( + "safepoint", + r#" + This instruction will provide live reference values at a point in + the function. It can only be used by the compiler. + "#, + &formats.multiary, + ) + .operands_in(vec![N]) + .other_side_effects(true), + ); + + let x = &Operand::new("x", TxN).with_doc("Vector to split"); + let lo = &Operand::new("lo", &TxN.half_vector()).with_doc("Low-numbered lanes of `x`"); + let hi = &Operand::new("hi", &TxN.half_vector()).with_doc("High-numbered lanes of `x`"); + + ig.push( + Inst::new( + "vsplit", + r#" + Split a vector into two halves. + + Split the vector `x` into two separate values, each containing half of + the lanes from ``x``. The result may be two scalars if ``x`` only had + two lanes. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![lo, hi]) + .is_ghost(true), + ); + + let Any128 = &TypeVar::new( + "Any128", + "Any scalar or vector type with as most 128 lanes", + TypeSetBuilder::new() + .ints(Interval::All) + .floats(Interval::All) + .bools(Interval::All) + .simd_lanes(1..128) + .includes_scalars(true) + .build(), + ); + + let x = &Operand::new("x", Any128).with_doc("Low-numbered lanes"); + let y = &Operand::new("y", Any128).with_doc("High-numbered lanes"); + let a = &Operand::new("a", &Any128.double_vector()).with_doc("Concatenation of `x` and `y`"); + + ig.push( + Inst::new( + "vconcat", + r#" + Vector concatenation. + + Return a vector formed by concatenating ``x`` and ``y``. The resulting + vector type has twice as many lanes as each of the inputs. The lanes of + ``x`` appear as the low-numbered lanes, and the lanes of ``y`` become + the high-numbered lanes of ``a``. + + It is possible to form a vector by concatenating two scalars. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]) + .is_ghost(true), + ); + + let c = &Operand::new("c", &TxN.as_bool()).with_doc("Controlling vector"); + let x = &Operand::new("x", TxN).with_doc("Value to use where `c` is true"); + let y = &Operand::new("y", TxN).with_doc("Value to use where `c` is false"); + let a = &Operand::new("a", TxN); + + ig.push( + Inst::new( + "vselect", + r#" + Vector lane select. + + Select lanes from ``x`` or ``y`` controlled by the lanes of the boolean + vector ``c``. + "#, + &formats.ternary, + ) + .operands_in(vec![c, x, y]) + .operands_out(vec![a]), + ); + + let s = &Operand::new("s", b1); + + ig.push( + Inst::new( + "vany_true", + r#" + Reduce a vector to a scalar boolean. + + Return a scalar boolean true if any lane in ``a`` is non-zero, false otherwise. + "#, + &formats.unary, + ) + .operands_in(vec![a]) + .operands_out(vec![s]), + ); + + ig.push( + Inst::new( + "vall_true", + r#" + Reduce a vector to a scalar boolean. + + Return a scalar boolean true if all lanes in ``i`` are non-zero, false otherwise. + "#, + &formats.unary, + ) + .operands_in(vec![a]) + .operands_out(vec![s]), + ); + + let a = &Operand::new("a", TxN); + let x = &Operand::new("x", Int); + + ig.push( + Inst::new( + "vhigh_bits", + r#" + Reduce a vector to a scalar integer. + + Return a scalar integer, consisting of the concatenation of the most significant bit + of each lane of ``a``. + "#, + &formats.unary, + ) + .operands_in(vec![a]) + .operands_out(vec![x]), + ); + + let a = &Operand::new("a", &Int.as_bool()); + let Cond = &Operand::new("Cond", &imm.intcc); + let x = &Operand::new("x", Int); + let y = &Operand::new("y", Int); + + ig.push( + Inst::new( + "icmp", + r#" + Integer comparison. + + The condition code determines if the operands are interpreted as signed + or unsigned integers. + + | Signed | Unsigned | Condition | + |--------|----------|-----------------------| + | eq | eq | Equal | + | ne | ne | Not equal | + | slt | ult | Less than | + | sge | uge | Greater than or equal | + | sgt | ugt | Greater than | + | sle | ule | Less than or equal | + | of | * | Overflow | + | nof | * | No Overflow | + + \* The unsigned version of overflow conditions have ISA-specific + semantics and thus have been kept as methods on the TargetIsa trait as + [unsigned_add_overflow_condition][isa::TargetIsa::unsigned_add_overflow_condition] and + [unsigned_sub_overflow_condition][isa::TargetIsa::unsigned_sub_overflow_condition]. + + When this instruction compares integer vectors, it returns a boolean + vector of lane-wise comparisons. + "#, + &formats.int_compare, + ) + .operands_in(vec![Cond, x, y]) + .operands_out(vec![a]), + ); + + let a = &Operand::new("a", b1); + let x = &Operand::new("x", iB); + let Y = &Operand::new("Y", &imm.imm64); + + ig.push( + Inst::new( + "icmp_imm", + r#" + Compare scalar integer to a constant. + + This is the same as the `icmp` instruction, except one operand is + an immediate constant. + + This instruction can only compare scalars. Use `icmp` for + lane-wise vector comparisons. + "#, + &formats.int_compare_imm, + ) + .operands_in(vec![Cond, x, Y]) + .operands_out(vec![a]), + ); + + let f = &Operand::new("f", iflags); + let x = &Operand::new("x", iB); + let y = &Operand::new("y", iB); + + ig.push( + Inst::new( + "ifcmp", + r#" + Compare scalar integers and return flags. + + Compare two scalar integer values and return integer CPU flags + representing the result. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![f]), + ); + + ig.push( + Inst::new( + "ifcmp_imm", + r#" + Compare scalar integer to a constant and return flags. + + Like `icmp_imm`, but returns integer CPU flags instead of testing + a specific condition code. + "#, + &formats.binary_imm64, + ) + .operands_in(vec![x, Y]) + .operands_out(vec![f]), + ); + + let a = &Operand::new("a", Int); + let x = &Operand::new("x", Int); + let y = &Operand::new("y", Int); + + ig.push( + Inst::new( + "iadd", + r#" + Wrapping integer addition: `a := x + y \pmod{2^B}`. + + This instruction does not depend on the signed/unsigned interpretation + of the operands. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "uadd_sat", + r#" + Add with unsigned saturation. + + This is similar to `iadd` but the operands are interpreted as unsigned integers and their + summed result, instead of wrapping, will be saturated to the highest unsigned integer for + the controlling type (e.g. `0xFF` for i8). + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "sadd_sat", + r#" + Add with signed saturation. + + This is similar to `iadd` but the operands are interpreted as signed integers and their + summed result, instead of wrapping, will be saturated to the lowest or highest + signed integer for the controlling type (e.g. `0x80` or `0x7F` for i8). For example, + since an `sadd_sat.i8` of `0x70` and `0x70` is greater than `0x7F`, the result will be + clamped to `0x7F`. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "isub", + r#" + Wrapping integer subtraction: `a := x - y \pmod{2^B}`. + + This instruction does not depend on the signed/unsigned interpretation + of the operands. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "usub_sat", + r#" + Subtract with unsigned saturation. + + This is similar to `isub` but the operands are interpreted as unsigned integers and their + difference, instead of wrapping, will be saturated to the lowest unsigned integer for + the controlling type (e.g. `0x00` for i8). + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "ssub_sat", + r#" + Subtract with signed saturation. + + This is similar to `isub` but the operands are interpreted as signed integers and their + difference, instead of wrapping, will be saturated to the lowest or highest + signed integer for the controlling type (e.g. `0x80` or `0x7F` for i8). + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "ineg", + r#" + Integer negation: `a := -x \pmod{2^B}`. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "iabs", + r#" + Integer absolute value with wrapping: `a := |x|`. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "imul", + r#" + Wrapping integer multiplication: `a := x y \pmod{2^B}`. + + This instruction does not depend on the signed/unsigned interpretation + of the operands. + + Polymorphic over all integer types (vector and scalar). + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "umulhi", + r#" + Unsigned integer multiplication, producing the high half of a + double-length result. + + Polymorphic over all scalar integer types, but does not support vector + types. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "smulhi", + r#" + Signed integer multiplication, producing the high half of a + double-length result. + + Polymorphic over all scalar integer types, but does not support vector + types. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "udiv", + r#" + Unsigned integer division: `a := \lfloor {x \over y} \rfloor`. + + This operation traps if the divisor is zero. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]) + .can_trap(true), + ); + + ig.push( + Inst::new( + "sdiv", + r#" + Signed integer division rounded toward zero: `a := sign(xy) + \lfloor {|x| \over |y|}\rfloor`. + + This operation traps if the divisor is zero, or if the result is not + representable in `B` bits two's complement. This only happens + when `x = -2^{B-1}, y = -1`. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]) + .can_trap(true), + ); + + ig.push( + Inst::new( + "urem", + r#" + Unsigned integer remainder. + + This operation traps if the divisor is zero. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]) + .can_trap(true), + ); + + ig.push( + Inst::new( + "srem", + r#" + Signed integer remainder. The result has the sign of the dividend. + + This operation traps if the divisor is zero. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]) + .can_trap(true), + ); + + let a = &Operand::new("a", iB); + let x = &Operand::new("x", iB); + let Y = &Operand::new("Y", &imm.imm64); + + ig.push( + Inst::new( + "iadd_imm", + r#" + Add immediate integer. + + Same as `iadd`, but one operand is an immediate constant. + + Polymorphic over all scalar integer types, but does not support vector + types. + "#, + &formats.binary_imm64, + ) + .operands_in(vec![x, Y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "imul_imm", + r#" + Integer multiplication by immediate constant. + + Polymorphic over all scalar integer types, but does not support vector + types. + "#, + &formats.binary_imm64, + ) + .operands_in(vec![x, Y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "udiv_imm", + r#" + Unsigned integer division by an immediate constant. + + This operation traps if the divisor is zero. + "#, + &formats.binary_imm64, + ) + .operands_in(vec![x, Y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "sdiv_imm", + r#" + Signed integer division by an immediate constant. + + This operation traps if the divisor is zero, or if the result is not + representable in `B` bits two's complement. This only happens + when `x = -2^{B-1}, Y = -1`. + "#, + &formats.binary_imm64, + ) + .operands_in(vec![x, Y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "urem_imm", + r#" + Unsigned integer remainder with immediate divisor. + + This operation traps if the divisor is zero. + "#, + &formats.binary_imm64, + ) + .operands_in(vec![x, Y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "srem_imm", + r#" + Signed integer remainder with immediate divisor. + + This operation traps if the divisor is zero. + "#, + &formats.binary_imm64, + ) + .operands_in(vec![x, Y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "irsub_imm", + r#" + Immediate reverse wrapping subtraction: `a := Y - x \pmod{2^B}`. + + Also works as integer negation when `Y = 0`. Use `iadd_imm` + with a negative immediate operand for the reverse immediate + subtraction. + + Polymorphic over all scalar integer types, but does not support vector + types. + "#, + &formats.binary_imm64, + ) + .operands_in(vec![x, Y]) + .operands_out(vec![a]), + ); + + let a = &Operand::new("a", iB); + let x = &Operand::new("x", iB); + let y = &Operand::new("y", iB); + + let c_in = &Operand::new("c_in", b1).with_doc("Input carry flag"); + let c_out = &Operand::new("c_out", b1).with_doc("Output carry flag"); + let b_in = &Operand::new("b_in", b1).with_doc("Input borrow flag"); + let b_out = &Operand::new("b_out", b1).with_doc("Output borrow flag"); + + let c_if_in = &Operand::new("c_in", iflags); + let c_if_out = &Operand::new("c_out", iflags); + let b_if_in = &Operand::new("b_in", iflags); + let b_if_out = &Operand::new("b_out", iflags); + + ig.push( + Inst::new( + "iadd_cin", + r#" + Add integers with carry in. + + Same as `iadd` with an additional carry input. Computes: + + ```text + a = x + y + c_{in} \pmod 2^B + ``` + + Polymorphic over all scalar integer types, but does not support vector + types. + "#, + &formats.ternary, + ) + .operands_in(vec![x, y, c_in]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "iadd_ifcin", + r#" + Add integers with carry in. + + Same as `iadd` with an additional carry flag input. Computes: + + ```text + a = x + y + c_{in} \pmod 2^B + ``` + + Polymorphic over all scalar integer types, but does not support vector + types. + "#, + &formats.ternary, + ) + .operands_in(vec![x, y, c_if_in]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "iadd_cout", + r#" + Add integers with carry out. + + Same as `iadd` with an additional carry output. + + ```text + a &= x + y \pmod 2^B \\ + c_{out} &= x+y >= 2^B + ``` + + Polymorphic over all scalar integer types, but does not support vector + types. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a, c_out]), + ); + + ig.push( + Inst::new( + "iadd_ifcout", + r#" + Add integers with carry out. + + Same as `iadd` with an additional carry flag output. + + ```text + a &= x + y \pmod 2^B \\ + c_{out} &= x+y >= 2^B + ``` + + Polymorphic over all scalar integer types, but does not support vector + types. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a, c_if_out]), + ); + + ig.push( + Inst::new( + "iadd_carry", + r#" + Add integers with carry in and out. + + Same as `iadd` with an additional carry input and output. + + ```text + a &= x + y + c_{in} \pmod 2^B \\ + c_{out} &= x + y + c_{in} >= 2^B + ``` + + Polymorphic over all scalar integer types, but does not support vector + types. + "#, + &formats.ternary, + ) + .operands_in(vec![x, y, c_in]) + .operands_out(vec![a, c_out]), + ); + + ig.push( + Inst::new( + "iadd_ifcarry", + r#" + Add integers with carry in and out. + + Same as `iadd` with an additional carry flag input and output. + + ```text + a &= x + y + c_{in} \pmod 2^B \\ + c_{out} &= x + y + c_{in} >= 2^B + ``` + + Polymorphic over all scalar integer types, but does not support vector + types. + "#, + &formats.ternary, + ) + .operands_in(vec![x, y, c_if_in]) + .operands_out(vec![a, c_if_out]), + ); + + ig.push( + Inst::new( + "isub_bin", + r#" + Subtract integers with borrow in. + + Same as `isub` with an additional borrow flag input. Computes: + + ```text + a = x - (y + b_{in}) \pmod 2^B + ``` + + Polymorphic over all scalar integer types, but does not support vector + types. + "#, + &formats.ternary, + ) + .operands_in(vec![x, y, b_in]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "isub_ifbin", + r#" + Subtract integers with borrow in. + + Same as `isub` with an additional borrow flag input. Computes: + + ```text + a = x - (y + b_{in}) \pmod 2^B + ``` + + Polymorphic over all scalar integer types, but does not support vector + types. + "#, + &formats.ternary, + ) + .operands_in(vec![x, y, b_if_in]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "isub_bout", + r#" + Subtract integers with borrow out. + + Same as `isub` with an additional borrow flag output. + + ```text + a &= x - y \pmod 2^B \\ + b_{out} &= x < y + ``` + + Polymorphic over all scalar integer types, but does not support vector + types. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a, b_out]), + ); + + ig.push( + Inst::new( + "isub_ifbout", + r#" + Subtract integers with borrow out. + + Same as `isub` with an additional borrow flag output. + + ```text + a &= x - y \pmod 2^B \\ + b_{out} &= x < y + ``` + + Polymorphic over all scalar integer types, but does not support vector + types. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a, b_if_out]), + ); + + ig.push( + Inst::new( + "isub_borrow", + r#" + Subtract integers with borrow in and out. + + Same as `isub` with an additional borrow flag input and output. + + ```text + a &= x - (y + b_{in}) \pmod 2^B \\ + b_{out} &= x < y + b_{in} + ``` + + Polymorphic over all scalar integer types, but does not support vector + types. + "#, + &formats.ternary, + ) + .operands_in(vec![x, y, b_in]) + .operands_out(vec![a, b_out]), + ); + + ig.push( + Inst::new( + "isub_ifborrow", + r#" + Subtract integers with borrow in and out. + + Same as `isub` with an additional borrow flag input and output. + + ```text + a &= x - (y + b_{in}) \pmod 2^B \\ + b_{out} &= x < y + b_{in} + ``` + + Polymorphic over all scalar integer types, but does not support vector + types. + "#, + &formats.ternary, + ) + .operands_in(vec![x, y, b_if_in]) + .operands_out(vec![a, b_if_out]), + ); + + let bits = &TypeVar::new( + "bits", + "Any integer, float, or boolean scalar or vector type", + TypeSetBuilder::new() + .ints(Interval::All) + .floats(Interval::All) + .bools(Interval::All) + .simd_lanes(Interval::All) + .includes_scalars(true) + .build(), + ); + let x = &Operand::new("x", bits); + let y = &Operand::new("y", bits); + let a = &Operand::new("a", bits); + + ig.push( + Inst::new( + "band", + r#" + Bitwise and. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "bor", + r#" + Bitwise or. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "bxor", + r#" + Bitwise xor. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "bnot", + r#" + Bitwise not. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "band_not", + r#" + Bitwise and not. + + Computes `x & ~y`. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "bor_not", + r#" + Bitwise or not. + + Computes `x | ~y`. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "bxor_not", + r#" + Bitwise xor not. + + Computes `x ^ ~y`. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + let x = &Operand::new("x", iB); + let Y = &Operand::new("Y", &imm.imm64); + let a = &Operand::new("a", iB); + + ig.push( + Inst::new( + "band_imm", + r#" + Bitwise and with immediate. + + Same as `band`, but one operand is an immediate constant. + + Polymorphic over all scalar integer types, but does not support vector + types. + "#, + &formats.binary_imm64, + ) + .operands_in(vec![x, Y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "bor_imm", + r#" + Bitwise or with immediate. + + Same as `bor`, but one operand is an immediate constant. + + Polymorphic over all scalar integer types, but does not support vector + types. + "#, + &formats.binary_imm64, + ) + .operands_in(vec![x, Y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "bxor_imm", + r#" + Bitwise xor with immediate. + + Same as `bxor`, but one operand is an immediate constant. + + Polymorphic over all scalar integer types, but does not support vector + types. + "#, + &formats.binary_imm64, + ) + .operands_in(vec![x, Y]) + .operands_out(vec![a]), + ); + + let x = &Operand::new("x", Int).with_doc("Scalar or vector value to shift"); + let y = &Operand::new("y", iB).with_doc("Number of bits to shift"); + let Y = &Operand::new("Y", &imm.imm64); + let a = &Operand::new("a", Int); + + ig.push( + Inst::new( + "rotl", + r#" + Rotate left. + + Rotate the bits in ``x`` by ``y`` places. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "rotr", + r#" + Rotate right. + + Rotate the bits in ``x`` by ``y`` places. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "rotl_imm", + r#" + Rotate left by immediate. + "#, + &formats.binary_imm64, + ) + .operands_in(vec![x, Y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "rotr_imm", + r#" + Rotate right by immediate. + "#, + &formats.binary_imm64, + ) + .operands_in(vec![x, Y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "ishl", + r#" + Integer shift left. Shift the bits in ``x`` towards the MSB by ``y`` + places. Shift in zero bits to the LSB. + + The shift amount is masked to the size of ``x``. + + When shifting a B-bits integer type, this instruction computes: + + ```text + s &:= y \pmod B, + a &:= x \cdot 2^s \pmod{2^B}. + ``` + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "ushr", + r#" + Unsigned shift right. Shift bits in ``x`` towards the LSB by ``y`` + places, shifting in zero bits to the MSB. Also called a *logical + shift*. + + The shift amount is masked to the size of the register. + + When shifting a B-bits integer type, this instruction computes: + + ```text + s &:= y \pmod B, + a &:= \lfloor x \cdot 2^{-s} \rfloor. + ``` + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "sshr", + r#" + Signed shift right. Shift bits in ``x`` towards the LSB by ``y`` + places, shifting in sign bits to the MSB. Also called an *arithmetic + shift*. + + The shift amount is masked to the size of the register. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "ishl_imm", + r#" + Integer shift left by immediate. + + The shift amount is masked to the size of ``x``. + "#, + &formats.binary_imm64, + ) + .operands_in(vec![x, Y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "ushr_imm", + r#" + Unsigned shift right by immediate. + + The shift amount is masked to the size of the register. + "#, + &formats.binary_imm64, + ) + .operands_in(vec![x, Y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "sshr_imm", + r#" + Signed shift right by immediate. + + The shift amount is masked to the size of the register. + "#, + &formats.binary_imm64, + ) + .operands_in(vec![x, Y]) + .operands_out(vec![a]), + ); + + let x = &Operand::new("x", iB); + let a = &Operand::new("a", iB); + + ig.push( + Inst::new( + "bitrev", + r#" + Reverse the bits of a integer. + + Reverses the bits in ``x``. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "clz", + r#" + Count leading zero bits. + + Starting from the MSB in ``x``, count the number of zero bits before + reaching the first one bit. When ``x`` is zero, returns the size of x + in bits. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "cls", + r#" + Count leading sign bits. + + Starting from the MSB after the sign bit in ``x``, count the number of + consecutive bits identical to the sign bit. When ``x`` is 0 or -1, + returns one less than the size of x in bits. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "ctz", + r#" + Count trailing zeros. + + Starting from the LSB in ``x``, count the number of zero bits before + reaching the first one bit. When ``x`` is zero, returns the size of x + in bits. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "popcnt", + r#" + Population count + + Count the number of one bits in ``x``. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + + let Float = &TypeVar::new( + "Float", + "A scalar or vector floating point number", + TypeSetBuilder::new() + .floats(Interval::All) + .simd_lanes(Interval::All) + .build(), + ); + let Cond = &Operand::new("Cond", &imm.floatcc); + let x = &Operand::new("x", Float); + let y = &Operand::new("y", Float); + let a = &Operand::new("a", &Float.as_bool()); + + ig.push( + Inst::new( + "fcmp", + r#" + Floating point comparison. + + Two IEEE 754-2008 floating point numbers, `x` and `y`, relate to each + other in exactly one of four ways: + + == ========================================== + UN Unordered when one or both numbers is NaN. + EQ When `x = y`. (And `0.0 = -0.0`). + LT When `x < y`. + GT When `x > y`. + == ========================================== + + The 14 `floatcc` condition codes each correspond to a subset of + the four relations, except for the empty set which would always be + false, and the full set which would always be true. + + The condition codes are divided into 7 'ordered' conditions which don't + include UN, and 7 unordered conditions which all include UN. + + +-------+------------+---------+------------+-------------------------+ + |Ordered |Unordered |Condition | + +=======+============+=========+============+=========================+ + |ord |EQ | LT | GT|uno |UN |NaNs absent / present. | + +-------+------------+---------+------------+-------------------------+ + |eq |EQ |ueq |UN | EQ |Equal | + +-------+------------+---------+------------+-------------------------+ + |one |LT | GT |ne |UN | LT | GT|Not equal | + +-------+------------+---------+------------+-------------------------+ + |lt |LT |ult |UN | LT |Less than | + +-------+------------+---------+------------+-------------------------+ + |le |LT | EQ |ule |UN | LT | EQ|Less than or equal | + +-------+------------+---------+------------+-------------------------+ + |gt |GT |ugt |UN | GT |Greater than | + +-------+------------+---------+------------+-------------------------+ + |ge |GT | EQ |uge |UN | GT | EQ|Greater than or equal | + +-------+------------+---------+------------+-------------------------+ + + The standard C comparison operators, `<, <=, >, >=`, are all ordered, + so they are false if either operand is NaN. The C equality operator, + `==`, is ordered, and since inequality is defined as the logical + inverse it is *unordered*. They map to the `floatcc` condition + codes as follows: + + ==== ====== ============ + C `Cond` Subset + ==== ====== ============ + `==` eq EQ + `!=` ne UN | LT | GT + `<` lt LT + `<=` le LT | EQ + `>` gt GT + `>=` ge GT | EQ + ==== ====== ============ + + This subset of condition codes also corresponds to the WebAssembly + floating point comparisons of the same name. + + When this instruction compares floating point vectors, it returns a + boolean vector with the results of lane-wise comparisons. + "#, + &formats.float_compare, + ) + .operands_in(vec![Cond, x, y]) + .operands_out(vec![a]), + ); + + let f = &Operand::new("f", fflags); + + ig.push( + Inst::new( + "ffcmp", + r#" + Floating point comparison returning flags. + + Compares two numbers like `fcmp`, but returns floating point CPU + flags instead of testing a specific condition. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![f]), + ); + + let x = &Operand::new("x", Float); + let y = &Operand::new("y", Float); + let z = &Operand::new("z", Float); + let a = &Operand::new("a", Float).with_doc("Result of applying operator to each lane"); + + ig.push( + Inst::new( + "fadd", + r#" + Floating point addition. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "fsub", + r#" + Floating point subtraction. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "fmul", + r#" + Floating point multiplication. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "fdiv", + r#" + Floating point division. + + Unlike the integer division instructions ` and + `udiv`, this can't trap. Division by zero is infinity or + NaN, depending on the dividend. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "sqrt", + r#" + Floating point square root. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "fma", + r#" + Floating point fused multiply-and-add. + + Computes `a := xy+z` without any intermediate rounding of the + product. + "#, + &formats.ternary, + ) + .operands_in(vec![x, y, z]) + .operands_out(vec![a]), + ); + + let a = &Operand::new("a", Float).with_doc("``x`` with its sign bit inverted"); + + ig.push( + Inst::new( + "fneg", + r#" + Floating point negation. + + Note that this is a pure bitwise operation. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + + let a = &Operand::new("a", Float).with_doc("``x`` with its sign bit cleared"); + + ig.push( + Inst::new( + "fabs", + r#" + Floating point absolute value. + + Note that this is a pure bitwise operation. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + + let a = &Operand::new("a", Float).with_doc("``x`` with its sign bit changed to that of ``y``"); + + ig.push( + Inst::new( + "fcopysign", + r#" + Floating point copy sign. + + Note that this is a pure bitwise operation. The sign bit from ``y`` is + copied to the sign bit of ``x``. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + let a = &Operand::new("a", Float).with_doc("The smaller of ``x`` and ``y``"); + + ig.push( + Inst::new( + "fmin", + r#" + Floating point minimum, propagating NaNs. + + If either operand is NaN, this returns a NaN. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "fmin_pseudo", + r#" + Floating point pseudo-minimum, propagating NaNs. This behaves differently from ``fmin``. + See https://github.com/WebAssembly/simd/pull/122 for background. + + The behaviour is defined as ``fmin_pseudo(a, b) = (b < a) ? b : a``, and the behaviour + for zero or NaN inputs follows from the behaviour of ``<`` with such inputs. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + let a = &Operand::new("a", Float).with_doc("The larger of ``x`` and ``y``"); + + ig.push( + Inst::new( + "fmax", + r#" + Floating point maximum, propagating NaNs. + + If either operand is NaN, this returns a NaN. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "fmax_pseudo", + r#" + Floating point pseudo-maximum, propagating NaNs. This behaves differently from ``fmax``. + See https://github.com/WebAssembly/simd/pull/122 for background. + + The behaviour is defined as ``fmax_pseudo(a, b) = (a < b) ? b : a``, and the behaviour + for zero or NaN inputs follows from the behaviour of ``<`` with such inputs. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + let a = &Operand::new("a", Float).with_doc("``x`` rounded to integral value"); + + ig.push( + Inst::new( + "ceil", + r#" + Round floating point round to integral, towards positive infinity. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "floor", + r#" + Round floating point round to integral, towards negative infinity. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "trunc", + r#" + Round floating point round to integral, towards zero. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "nearest", + r#" + Round floating point round to integral, towards nearest with ties to + even. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + + let a = &Operand::new("a", b1); + let x = &Operand::new("x", Ref); + + ig.push( + Inst::new( + "is_null", + r#" + Reference verification. + + The condition code determines if the reference type in question is + null or not. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + + let a = &Operand::new("a", b1); + let x = &Operand::new("x", Ref); + + ig.push( + Inst::new( + "is_invalid", + r#" + Reference verification. + + The condition code determines if the reference type in question is + invalid or not. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + + let Cond = &Operand::new("Cond", &imm.intcc); + let f = &Operand::new("f", iflags); + let a = &Operand::new("a", b1); + + ig.push( + Inst::new( + "trueif", + r#" + Test integer CPU flags for a specific condition. + + Check the CPU flags in ``f`` against the ``Cond`` condition code and + return true when the condition code is satisfied. + "#, + &formats.int_cond, + ) + .operands_in(vec![Cond, f]) + .operands_out(vec![a]), + ); + + let Cond = &Operand::new("Cond", &imm.floatcc); + let f = &Operand::new("f", fflags); + + ig.push( + Inst::new( + "trueff", + r#" + Test floating point CPU flags for a specific condition. + + Check the CPU flags in ``f`` against the ``Cond`` condition code and + return true when the condition code is satisfied. + "#, + &formats.float_cond, + ) + .operands_in(vec![Cond, f]) + .operands_out(vec![a]), + ); + + let x = &Operand::new("x", Mem); + let a = &Operand::new("a", MemTo).with_doc("Bits of `x` reinterpreted"); + + ig.push( + Inst::new( + "bitcast", + r#" + Reinterpret the bits in `x` as a different type. + + The input and output types must be storable to memory and of the same + size. A bitcast is equivalent to storing one type and loading the other + type from the same address. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + + let x = &Operand::new("x", Any); + let a = &Operand::new("a", AnyTo).with_doc("Bits of `x` reinterpreted"); + + ig.push( + Inst::new( + "raw_bitcast", + r#" + Cast the bits in `x` as a different type of the same bit width. + + This instruction does not change the data's representation but allows + data in registers to be used as different types, e.g. an i32x4 as a + b8x16. The only constraint on the result `a` is that it can be + `raw_bitcast` back to the original type. Also, in a raw_bitcast between + vector types with the same number of lanes, the value of each result + lane is a raw_bitcast of the corresponding operand lane. TODO there is + currently no mechanism for enforcing the bit width constraint. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + + let a = &Operand::new("a", TxN).with_doc("A vector value"); + let s = &Operand::new("s", &TxN.lane_of()).with_doc("A scalar value"); + + ig.push( + Inst::new( + "scalar_to_vector", + r#" + Copies a scalar value to a vector value. The scalar is copied into the + least significant lane of the vector, and all other lanes will be zero. + "#, + &formats.unary, + ) + .operands_in(vec![s]) + .operands_out(vec![a]), + ); + + let Bool = &TypeVar::new( + "Bool", + "A scalar or vector boolean type", + TypeSetBuilder::new() + .bools(Interval::All) + .simd_lanes(Interval::All) + .build(), + ); + + let BoolTo = &TypeVar::new( + "BoolTo", + "A smaller boolean type with the same number of lanes", + TypeSetBuilder::new() + .bools(Interval::All) + .simd_lanes(Interval::All) + .build(), + ); + + let x = &Operand::new("x", Bool); + let a = &Operand::new("a", BoolTo); + + ig.push( + Inst::new( + "breduce", + r#" + Convert `x` to a smaller boolean type in the platform-defined way. + + The result type must have the same number of vector lanes as the input, + and each lane must not have more bits that the input lanes. If the + input and output types are the same, this is a no-op. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]) + .constraints(vec![WiderOrEq(Bool.clone(), BoolTo.clone())]), + ); + + let BoolTo = &TypeVar::new( + "BoolTo", + "A larger boolean type with the same number of lanes", + TypeSetBuilder::new() + .bools(Interval::All) + .simd_lanes(Interval::All) + .build(), + ); + let x = &Operand::new("x", Bool); + let a = &Operand::new("a", BoolTo); + + ig.push( + Inst::new( + "bextend", + r#" + Convert `x` to a larger boolean type in the platform-defined way. + + The result type must have the same number of vector lanes as the input, + and each lane must not have fewer bits that the input lanes. If the + input and output types are the same, this is a no-op. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]) + .constraints(vec![WiderOrEq(BoolTo.clone(), Bool.clone())]), + ); + + let IntTo = &TypeVar::new( + "IntTo", + "An integer type with the same number of lanes", + TypeSetBuilder::new() + .ints(Interval::All) + .simd_lanes(Interval::All) + .build(), + ); + let x = &Operand::new("x", Bool); + let a = &Operand::new("a", IntTo); + + ig.push( + Inst::new( + "bint", + r#" + Convert `x` to an integer. + + True maps to 1 and false maps to 0. The result type must have the same + number of vector lanes as the input. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "bmask", + r#" + Convert `x` to an integer mask. + + True maps to all 1s and false maps to all 0s. The result type must have + the same number of vector lanes as the input. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + + let Int = &TypeVar::new( + "Int", + "A scalar or vector integer type", + TypeSetBuilder::new() + .ints(Interval::All) + .simd_lanes(Interval::All) + .build(), + ); + + let IntTo = &TypeVar::new( + "IntTo", + "A smaller integer type with the same number of lanes", + TypeSetBuilder::new() + .ints(Interval::All) + .simd_lanes(Interval::All) + .build(), + ); + let x = &Operand::new("x", Int); + let a = &Operand::new("a", IntTo); + + ig.push( + Inst::new( + "ireduce", + r#" + Convert `x` to a smaller integer type by dropping high bits. + + Each lane in `x` is converted to a smaller integer type by discarding + the most significant bits. This is the same as reducing modulo + `2^n`. + + The result type must have the same number of vector lanes as the input, + and each lane must not have more bits that the input lanes. If the + input and output types are the same, this is a no-op. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]) + .constraints(vec![WiderOrEq(Int.clone(), IntTo.clone())]), + ); + + let I16or32xN = &TypeVar::new( + "I16or32xN", + "A SIMD vector type containing integer lanes 16 or 32 bits wide", + TypeSetBuilder::new() + .ints(16..32) + .simd_lanes(4..8) + .includes_scalars(false) + .build(), + ); + + let x = &Operand::new("x", I16or32xN); + let y = &Operand::new("y", I16or32xN); + let a = &Operand::new("a", &I16or32xN.split_lanes()); + + ig.push( + Inst::new( + "snarrow", + r#" + Combine `x` and `y` into a vector with twice the lanes but half the integer width while + saturating overflowing values to the signed maximum and minimum. + + The lanes will be concatenated after narrowing. For example, when `x` and `y` are `i32x4` + and `x = [x3, x2, x1, x0]` and `y = [y3, y2, y1, y0]`, then after narrowing the value + returned is an `i16x8`: `a = [y3', y2', y1', y0', x3', x2', x1', x0']`. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "unarrow", + r#" + Combine `x` and `y` into a vector with twice the lanes but half the integer width while + saturating overflowing values to the unsigned maximum and minimum. + + Note that all input lanes are considered signed: any negative lanes will overflow and be + replaced with the unsigned minimum, `0x00`. + + The lanes will be concatenated after narrowing. For example, when `x` and `y` are `i32x4` + and `x = [x3, x2, x1, x0]` and `y = [y3, y2, y1, y0]`, then after narrowing the value + returned is an `i16x8`: `a = [y3', y2', y1', y0', x3', x2', x1', x0']`. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + let I8or16xN = &TypeVar::new( + "I8or16xN", + "A SIMD vector type containing integer lanes 8 or 16 bits wide.", + TypeSetBuilder::new() + .ints(8..16) + .simd_lanes(8..16) + .includes_scalars(false) + .build(), + ); + + let x = &Operand::new("x", I8or16xN); + let a = &Operand::new("a", &I8or16xN.merge_lanes()); + + ig.push( + Inst::new( + "swiden_low", + r#" + Widen the low lanes of `x` using signed extension. + + This will double the lane width and halve the number of lanes. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "swiden_high", + r#" + Widen the high lanes of `x` using signed extension. + + This will double the lane width and halve the number of lanes. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "uwiden_low", + r#" + Widen the low lanes of `x` using unsigned extension. + + This will double the lane width and halve the number of lanes. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "uwiden_high", + r#" + Widen the high lanes of `x` using unsigned extension. + + This will double the lane width and halve the number of lanes. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + + let I16x8 = &TypeVar::new( + "I16x8", + "A SIMD vector type containing 8 integer lanes each 16 bits wide.", + TypeSetBuilder::new() + .ints(16..16) + .simd_lanes(8..8) + .includes_scalars(false) + .build(), + ); + + let x = &Operand::new("x", I16x8); + let y = &Operand::new("y", I16x8); + let a = &Operand::new("a", &I16x8.merge_lanes()); + + ig.push( + Inst::new( + "widening_pairwise_dot_product_s", + r#" + Takes corresponding elements in `x` and `y`, performs a sign-extending length-doubling + multiplication on them, then adds adjacent pairs of elements to form the result. For + example, if the input vectors are `[x3, x2, x1, x0]` and `[y3, y2, y1, y0]`, it produces + the vector `[r1, r0]`, where `r1 = sx(x3) * sx(y3) + sx(x2) * sx(y2)` and + `r0 = sx(x1) * sx(y1) + sx(x0) * sx(y0)`, and `sx(n)` sign-extends `n` to twice its width. + + This will double the lane width and halve the number of lanes. So the resulting + vector has the same number of bits as `x` and `y` do (individually). + + See https://github.com/WebAssembly/simd/pull/127 for background info. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + + let IntTo = &TypeVar::new( + "IntTo", + "A larger integer type with the same number of lanes", + TypeSetBuilder::new() + .ints(Interval::All) + .simd_lanes(Interval::All) + .build(), + ); + let x = &Operand::new("x", Int); + let a = &Operand::new("a", IntTo); + + ig.push( + Inst::new( + "uextend", + r#" + Convert `x` to a larger integer type by zero-extending. + + Each lane in `x` is converted to a larger integer type by adding + zeroes. The result has the same numerical value as `x` when both are + interpreted as unsigned integers. + + The result type must have the same number of vector lanes as the input, + and each lane must not have fewer bits that the input lanes. If the + input and output types are the same, this is a no-op. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]) + .constraints(vec![WiderOrEq(IntTo.clone(), Int.clone())]), + ); + + ig.push( + Inst::new( + "sextend", + r#" + Convert `x` to a larger integer type by sign-extending. + + Each lane in `x` is converted to a larger integer type by replicating + the sign bit. The result has the same numerical value as `x` when both + are interpreted as signed integers. + + The result type must have the same number of vector lanes as the input, + and each lane must not have fewer bits that the input lanes. If the + input and output types are the same, this is a no-op. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]) + .constraints(vec![WiderOrEq(IntTo.clone(), Int.clone())]), + ); + + let FloatTo = &TypeVar::new( + "FloatTo", + "A scalar or vector floating point number", + TypeSetBuilder::new() + .floats(Interval::All) + .simd_lanes(Interval::All) + .build(), + ); + let x = &Operand::new("x", Float); + let a = &Operand::new("a", FloatTo); + + ig.push( + Inst::new( + "fpromote", + r#" + Convert `x` to a larger floating point format. + + Each lane in `x` is converted to the destination floating point format. + This is an exact operation. + + Cranelift currently only supports two floating point formats + - `f32` and `f64`. This may change in the future. + + The result type must have the same number of vector lanes as the input, + and the result lanes must not have fewer bits than the input lanes. If + the input and output types are the same, this is a no-op. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]) + .constraints(vec![WiderOrEq(FloatTo.clone(), Float.clone())]), + ); + + ig.push( + Inst::new( + "fdemote", + r#" + Convert `x` to a smaller floating point format. + + Each lane in `x` is converted to the destination floating point format + by rounding to nearest, ties to even. + + Cranelift currently only supports two floating point formats + - `f32` and `f64`. This may change in the future. + + The result type must have the same number of vector lanes as the input, + and the result lanes must not have more bits than the input lanes. If + the input and output types are the same, this is a no-op. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]) + .constraints(vec![WiderOrEq(Float.clone(), FloatTo.clone())]), + ); + + let x = &Operand::new("x", Float); + let a = &Operand::new("a", IntTo); + + ig.push( + Inst::new( + "fcvt_to_uint", + r#" + Convert floating point to unsigned integer. + + Each lane in `x` is converted to an unsigned integer by rounding + towards zero. If `x` is NaN or if the unsigned integral value cannot be + represented in the result type, this instruction traps. + + The result type must have the same number of vector lanes as the input. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]) + .can_trap(true), + ); + + ig.push( + Inst::new( + "fcvt_to_uint_sat", + r#" + Convert floating point to unsigned integer as fcvt_to_uint does, but + saturates the input instead of trapping. NaN and negative values are + converted to 0. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "fcvt_to_sint", + r#" + Convert floating point to signed integer. + + Each lane in `x` is converted to a signed integer by rounding towards + zero. If `x` is NaN or if the signed integral value cannot be + represented in the result type, this instruction traps. + + The result type must have the same number of vector lanes as the input. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]) + .can_trap(true), + ); + + ig.push( + Inst::new( + "fcvt_to_sint_sat", + r#" + Convert floating point to signed integer as fcvt_to_sint does, but + saturates the input instead of trapping. NaN values are converted to 0. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + + let x = &Operand::new("x", Int); + let a = &Operand::new("a", FloatTo); + + ig.push( + Inst::new( + "fcvt_from_uint", + r#" + Convert unsigned integer to floating point. + + Each lane in `x` is interpreted as an unsigned integer and converted to + floating point using round to nearest, ties to even. + + The result type must have the same number of vector lanes as the input. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + + ig.push( + Inst::new( + "fcvt_from_sint", + r#" + Convert signed integer to floating point. + + Each lane in `x` is interpreted as a signed integer and converted to + floating point using round to nearest, ties to even. + + The result type must have the same number of vector lanes as the input. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![a]), + ); + + let WideInt = &TypeVar::new( + "WideInt", + "An integer type with lanes from `i16` upwards", + TypeSetBuilder::new() + .ints(16..128) + .simd_lanes(Interval::All) + .build(), + ); + let x = &Operand::new("x", WideInt); + let lo = &Operand::new("lo", &WideInt.half_width()).with_doc("The low bits of `x`"); + let hi = &Operand::new("hi", &WideInt.half_width()).with_doc("The high bits of `x`"); + + ig.push( + Inst::new( + "isplit", + r#" + Split an integer into low and high parts. + + Vectors of integers are split lane-wise, so the results have the same + number of lanes as the input, but the lanes are half the size. + + Returns the low half of `x` and the high half of `x` as two independent + values. + "#, + &formats.unary, + ) + .operands_in(vec![x]) + .operands_out(vec![lo, hi]) + .is_ghost(true), + ); + + let NarrowInt = &TypeVar::new( + "NarrowInt", + "An integer type with lanes type to `i64`", + TypeSetBuilder::new() + .ints(8..64) + .simd_lanes(Interval::All) + .build(), + ); + + let lo = &Operand::new("lo", NarrowInt); + let hi = &Operand::new("hi", NarrowInt); + let a = &Operand::new("a", &NarrowInt.double_width()) + .with_doc("The concatenation of `lo` and `hi`"); + + ig.push( + Inst::new( + "iconcat", + r#" + Concatenate low and high bits to form a larger integer type. + + Vectors of integers are concatenated lane-wise such that the result has + the same number of lanes as the inputs, but the lanes are twice the + size. + "#, + &formats.binary, + ) + .operands_in(vec![lo, hi]) + .operands_out(vec![a]) + .is_ghost(true), + ); + + // Instructions relating to atomic memory accesses and fences + let AtomicMem = &TypeVar::new( + "AtomicMem", + "Any type that can be stored in memory, which can be used in an atomic operation", + TypeSetBuilder::new().ints(8..64).build(), + ); + let x = &Operand::new("x", AtomicMem).with_doc("Value to be atomically stored"); + let a = &Operand::new("a", AtomicMem).with_doc("Value atomically loaded"); + let e = &Operand::new("e", AtomicMem).with_doc("Expected value in CAS"); + let p = &Operand::new("p", iAddr); + let MemFlags = &Operand::new("MemFlags", &imm.memflags); + let AtomicRmwOp = &Operand::new("AtomicRmwOp", &imm.atomic_rmw_op); + + ig.push( + Inst::new( + "atomic_rmw", + r#" + Atomically read-modify-write memory at `p`, with second operand `x`. The old value is + returned. `p` has the type of the target word size, and `x` may be an integer type of + 8, 16, 32 or 64 bits, even on a 32-bit target. The type of the returned value is the + same as the type of `x`. This operation is sequentially consistent and creates + happens-before edges that order normal (non-atomic) loads and stores. + "#, + &formats.atomic_rmw, + ) + .operands_in(vec![MemFlags, AtomicRmwOp, p, x]) + .operands_out(vec![a]) + .can_load(true) + .can_store(true) + .other_side_effects(true), + ); + + ig.push( + Inst::new( + "atomic_cas", + r#" + Perform an atomic compare-and-swap operation on memory at `p`, with expected value `e`, + storing `x` if the value at `p` equals `e`. The old value at `p` is returned, + regardless of whether the operation succeeds or fails. `p` has the type of the target + word size, and `x` and `e` must have the same type and the same size, which may be an + integer type of 8, 16, 32 or 64 bits, even on a 32-bit target. The type of the returned + value is the same as the type of `x` and `e`. This operation is sequentially + consistent and creates happens-before edges that order normal (non-atomic) loads and + stores. + "#, + &formats.atomic_cas, + ) + .operands_in(vec![MemFlags, p, e, x]) + .operands_out(vec![a]) + .can_load(true) + .can_store(true) + .other_side_effects(true), + ); + + ig.push( + Inst::new( + "atomic_load", + r#" + Atomically load from memory at `p`. + + This is a polymorphic instruction that can load any value type which has a memory + representation. It should only be used for integer types with 8, 16, 32 or 64 bits. + This operation is sequentially consistent and creates happens-before edges that order + normal (non-atomic) loads and stores. + "#, + &formats.load_no_offset, + ) + .operands_in(vec![MemFlags, p]) + .operands_out(vec![a]) + .can_load(true) + .other_side_effects(true), + ); + + ig.push( + Inst::new( + "atomic_store", + r#" + Atomically store `x` to memory at `p`. + + This is a polymorphic instruction that can store any value type with a memory + representation. It should only be used for integer types with 8, 16, 32 or 64 bits. + This operation is sequentially consistent and creates happens-before edges that order + normal (non-atomic) loads and stores. + "#, + &formats.store_no_offset, + ) + .operands_in(vec![MemFlags, x, p]) + .can_store(true) + .other_side_effects(true), + ); + + ig.push( + Inst::new( + "fence", + r#" + A memory fence. This must provide ordering to ensure that, at a minimum, neither loads + nor stores of any kind may move forwards or backwards across the fence. This operation + is sequentially consistent. + "#, + &formats.nullary, + ) + .other_side_effects(true), + ); + + let Offset = &Operand::new("Offset", &imm.offset32).with_doc("Byte offset from base address"); + let a = &Operand::new("a", TxN); + + ig.push( + Inst::new( + "load_splat", + r#" + Load an element from memory at ``p + Offset`` and return a vector + whose lanes are all set to that element. + + This is equivalent to ``load`` followed by ``splat``. + "#, + &formats.load, + ) + .operands_in(vec![MemFlags, p, Offset]) + .operands_out(vec![a]) + .can_load(true), + ); + + ig.build() +} diff --git a/third_party/rust/cranelift-codegen-meta/src/shared/legalize.rs b/third_party/rust/cranelift-codegen-meta/src/shared/legalize.rs new file mode 100644 index 0000000000..9a0d6cffde --- /dev/null +++ b/third_party/rust/cranelift-codegen-meta/src/shared/legalize.rs @@ -0,0 +1,1087 @@ +use crate::cdsl::ast::{var, ExprBuilder, Literal}; +use crate::cdsl::instructions::{Bindable, Instruction, InstructionGroup}; +use crate::cdsl::xform::{TransformGroupBuilder, TransformGroups}; + +use crate::shared::immediates::Immediates; +use crate::shared::types::Float::{F32, F64}; +use crate::shared::types::Int::{I128, I16, I32, I64, I8}; +use cranelift_codegen_shared::condcodes::{CondCode, IntCC}; + +#[allow(clippy::many_single_char_names, clippy::cognitive_complexity)] +pub(crate) fn define(insts: &InstructionGroup, imm: &Immediates) -> TransformGroups { + let mut narrow = TransformGroupBuilder::new( + "narrow", + r#" + Legalize instructions by narrowing. + + The transformations in the 'narrow' group work by expressing + instructions in terms of smaller types. Operations on vector types are + expressed in terms of vector types with fewer lanes, and integer + operations are expressed in terms of smaller integer types. + "#, + ); + + let mut widen = TransformGroupBuilder::new( + "widen", + r#" + Legalize instructions by widening. + + The transformations in the 'widen' group work by expressing + instructions in terms of larger types. + "#, + ); + + let mut expand = TransformGroupBuilder::new( + "expand", + r#" + Legalize instructions by expansion. + + Rewrite instructions in terms of other instructions, generally + operating on the same types as the original instructions. + "#, + ); + + // List of instructions. + let band = insts.by_name("band"); + let band_imm = insts.by_name("band_imm"); + let band_not = insts.by_name("band_not"); + let bint = insts.by_name("bint"); + let bitrev = insts.by_name("bitrev"); + let bnot = insts.by_name("bnot"); + let bor = insts.by_name("bor"); + let bor_imm = insts.by_name("bor_imm"); + let bor_not = insts.by_name("bor_not"); + let brnz = insts.by_name("brnz"); + let brz = insts.by_name("brz"); + let br_icmp = insts.by_name("br_icmp"); + let br_table = insts.by_name("br_table"); + let bxor = insts.by_name("bxor"); + let bxor_imm = insts.by_name("bxor_imm"); + let bxor_not = insts.by_name("bxor_not"); + let cls = insts.by_name("cls"); + let clz = insts.by_name("clz"); + let ctz = insts.by_name("ctz"); + let copy = insts.by_name("copy"); + let fabs = insts.by_name("fabs"); + let f32const = insts.by_name("f32const"); + let f64const = insts.by_name("f64const"); + let fcopysign = insts.by_name("fcopysign"); + let fcvt_from_sint = insts.by_name("fcvt_from_sint"); + let fneg = insts.by_name("fneg"); + let iadd = insts.by_name("iadd"); + let iadd_cin = insts.by_name("iadd_cin"); + let iadd_cout = insts.by_name("iadd_cout"); + let iadd_carry = insts.by_name("iadd_carry"); + let iadd_ifcin = insts.by_name("iadd_ifcin"); + let iadd_ifcout = insts.by_name("iadd_ifcout"); + let iadd_imm = insts.by_name("iadd_imm"); + let icmp = insts.by_name("icmp"); + let icmp_imm = insts.by_name("icmp_imm"); + let iconcat = insts.by_name("iconcat"); + let iconst = insts.by_name("iconst"); + let ifcmp = insts.by_name("ifcmp"); + let ifcmp_imm = insts.by_name("ifcmp_imm"); + let imul = insts.by_name("imul"); + let imul_imm = insts.by_name("imul_imm"); + let ireduce = insts.by_name("ireduce"); + let irsub_imm = insts.by_name("irsub_imm"); + let ishl = insts.by_name("ishl"); + let ishl_imm = insts.by_name("ishl_imm"); + let isplit = insts.by_name("isplit"); + let istore8 = insts.by_name("istore8"); + let istore16 = insts.by_name("istore16"); + let isub = insts.by_name("isub"); + let isub_bin = insts.by_name("isub_bin"); + let isub_bout = insts.by_name("isub_bout"); + let isub_borrow = insts.by_name("isub_borrow"); + let isub_ifbin = insts.by_name("isub_ifbin"); + let isub_ifbout = insts.by_name("isub_ifbout"); + let jump = insts.by_name("jump"); + let load = insts.by_name("load"); + let popcnt = insts.by_name("popcnt"); + let resumable_trapnz = insts.by_name("resumable_trapnz"); + let rotl = insts.by_name("rotl"); + let rotl_imm = insts.by_name("rotl_imm"); + let rotr = insts.by_name("rotr"); + let rotr_imm = insts.by_name("rotr_imm"); + let sdiv = insts.by_name("sdiv"); + let sdiv_imm = insts.by_name("sdiv_imm"); + let select = insts.by_name("select"); + let sextend = insts.by_name("sextend"); + let sshr = insts.by_name("sshr"); + let sshr_imm = insts.by_name("sshr_imm"); + let srem = insts.by_name("srem"); + let srem_imm = insts.by_name("srem_imm"); + let store = insts.by_name("store"); + let udiv = insts.by_name("udiv"); + let udiv_imm = insts.by_name("udiv_imm"); + let uextend = insts.by_name("uextend"); + let uload8 = insts.by_name("uload8"); + let uload16 = insts.by_name("uload16"); + let umulhi = insts.by_name("umulhi"); + let ushr = insts.by_name("ushr"); + let ushr_imm = insts.by_name("ushr_imm"); + let urem = insts.by_name("urem"); + let urem_imm = insts.by_name("urem_imm"); + let trapif = insts.by_name("trapif"); + let trapnz = insts.by_name("trapnz"); + let trapz = insts.by_name("trapz"); + + // Custom expansions for memory objects. + expand.custom_legalize(insts.by_name("global_value"), "expand_global_value"); + expand.custom_legalize(insts.by_name("heap_addr"), "expand_heap_addr"); + expand.custom_legalize(insts.by_name("table_addr"), "expand_table_addr"); + + // Custom expansions for calls. + expand.custom_legalize(insts.by_name("call"), "expand_call"); + + // Custom expansions that need to change the CFG. + // TODO: Add sufficient XForm syntax that we don't need to hand-code these. + expand.custom_legalize(trapz, "expand_cond_trap"); + expand.custom_legalize(trapnz, "expand_cond_trap"); + expand.custom_legalize(resumable_trapnz, "expand_cond_trap"); + expand.custom_legalize(br_table, "expand_br_table"); + expand.custom_legalize(select, "expand_select"); + widen.custom_legalize(select, "expand_select"); // small ints + + // Custom expansions for floating point constants. + // These expansions require bit-casting or creating constant pool entries. + expand.custom_legalize(f32const, "expand_fconst"); + expand.custom_legalize(f64const, "expand_fconst"); + + // Custom expansions for stack memory accesses. + expand.custom_legalize(insts.by_name("stack_load"), "expand_stack_load"); + expand.custom_legalize(insts.by_name("stack_store"), "expand_stack_store"); + + // Custom expansions for small stack memory acccess. + widen.custom_legalize(insts.by_name("stack_load"), "expand_stack_load"); + widen.custom_legalize(insts.by_name("stack_store"), "expand_stack_store"); + + // List of variables to reuse in patterns. + let x = var("x"); + let y = var("y"); + let z = var("z"); + let a = var("a"); + let a1 = var("a1"); + let a2 = var("a2"); + let a3 = var("a3"); + let a4 = var("a4"); + let b = var("b"); + let b1 = var("b1"); + let b2 = var("b2"); + let b3 = var("b3"); + let b4 = var("b4"); + let b_in = var("b_in"); + let b_int = var("b_int"); + let c = var("c"); + let c1 = var("c1"); + let c2 = var("c2"); + let c3 = var("c3"); + let c4 = var("c4"); + let c_in = var("c_in"); + let c_int = var("c_int"); + let d = var("d"); + let d1 = var("d1"); + let d2 = var("d2"); + let d3 = var("d3"); + let d4 = var("d4"); + let e = var("e"); + let e1 = var("e1"); + let e2 = var("e2"); + let e3 = var("e3"); + let e4 = var("e4"); + let f = var("f"); + let f1 = var("f1"); + let f2 = var("f2"); + let xl = var("xl"); + let xh = var("xh"); + let yl = var("yl"); + let yh = var("yh"); + let al = var("al"); + let ah = var("ah"); + let cc = var("cc"); + let block = var("block"); + let ptr = var("ptr"); + let flags = var("flags"); + let offset = var("off"); + let vararg = var("vararg"); + + narrow.custom_legalize(load, "narrow_load"); + narrow.custom_legalize(store, "narrow_store"); + + // iconst.i64 can't be legalized in the meta langage (because integer literals can't be + // embedded as part of arguments), so use a custom legalization for now. + narrow.custom_legalize(iconst, "narrow_iconst"); + + for &(ty, ty_half) in &[(I128, I64), (I64, I32)] { + let inst = uextend.bind(ty).bind(ty_half); + narrow.legalize( + def!(a = inst(x)), + vec![ + def!(ah = iconst(Literal::constant(&imm.imm64, 0))), + def!(a = iconcat(x, ah)), + ], + ); + } + + for &(ty, ty_half, shift) in &[(I128, I64, 63), (I64, I32, 31)] { + let inst = sextend.bind(ty).bind(ty_half); + narrow.legalize( + def!(a = inst(x)), + vec![ + def!(ah = sshr_imm(x, Literal::constant(&imm.imm64, shift))), // splat sign bit to whole number + def!(a = iconcat(x, ah)), + ], + ); + } + + for &bin_op in &[band, bor, bxor, band_not, bor_not, bxor_not] { + narrow.legalize( + def!(a = bin_op(x, y)), + vec![ + def!((xl, xh) = isplit(x)), + def!((yl, yh) = isplit(y)), + def!(al = bin_op(xl, yl)), + def!(ah = bin_op(xh, yh)), + def!(a = iconcat(al, ah)), + ], + ); + } + + narrow.legalize( + def!(a = bnot(x)), + vec![ + def!((xl, xh) = isplit(x)), + def!(al = bnot(xl)), + def!(ah = bnot(xh)), + def!(a = iconcat(al, ah)), + ], + ); + + narrow.legalize( + def!(a = select(c, x, y)), + vec![ + def!((xl, xh) = isplit(x)), + def!((yl, yh) = isplit(y)), + def!(al = select(c, xl, yl)), + def!(ah = select(c, xh, yh)), + def!(a = iconcat(al, ah)), + ], + ); + + for &ty in &[I128, I64] { + let block = var("block"); + let block1 = var("block1"); + let block2 = var("block2"); + + narrow.legalize( + def!(brz.ty(x, block, vararg)), + vec![ + def!((xl, xh) = isplit(x)), + def!( + a = icmp_imm( + Literal::enumerator_for(&imm.intcc, "eq"), + xl, + Literal::constant(&imm.imm64, 0) + ) + ), + def!( + b = icmp_imm( + Literal::enumerator_for(&imm.intcc, "eq"), + xh, + Literal::constant(&imm.imm64, 0) + ) + ), + def!(c = band(a, b)), + def!(brnz(c, block, vararg)), + ], + ); + + narrow.legalize( + def!(brnz.ty(x, block1, vararg)), + vec![ + def!((xl, xh) = isplit(x)), + def!(brnz(xl, block1, vararg)), + def!(jump(block2, Literal::empty_vararg())), + block!(block2), + def!(brnz(xh, block1, vararg)), + ], + ); + } + + narrow.legalize( + def!(a = popcnt.I128(x)), + vec![ + def!((xl, xh) = isplit(x)), + def!(e1 = popcnt(xl)), + def!(e2 = popcnt(xh)), + def!(e3 = iadd(e1, e2)), + def!(a = uextend(e3)), + ], + ); + + // TODO(ryzokuken): benchmark this and decide if branching is a faster + // approach than evaluating boolean expressions. + + narrow.custom_legalize(icmp_imm, "narrow_icmp_imm"); + + let intcc_eq = Literal::enumerator_for(&imm.intcc, "eq"); + let intcc_ne = Literal::enumerator_for(&imm.intcc, "ne"); + for &(int_ty, int_ty_half) in &[(I64, I32), (I128, I64)] { + narrow.legalize( + def!(b = icmp.int_ty(intcc_eq, x, y)), + vec![ + def!((xl, xh) = isplit(x)), + def!((yl, yh) = isplit(y)), + def!(b1 = icmp.int_ty_half(intcc_eq, xl, yl)), + def!(b2 = icmp.int_ty_half(intcc_eq, xh, yh)), + def!(b = band(b1, b2)), + ], + ); + + narrow.legalize( + def!(b = icmp.int_ty(intcc_ne, x, y)), + vec![ + def!((xl, xh) = isplit(x)), + def!((yl, yh) = isplit(y)), + def!(b1 = icmp.int_ty_half(intcc_ne, xl, yl)), + def!(b2 = icmp.int_ty_half(intcc_ne, xh, yh)), + def!(b = bor(b1, b2)), + ], + ); + + use IntCC::*; + for cc in &[ + SignedGreaterThan, + SignedGreaterThanOrEqual, + SignedLessThan, + SignedLessThanOrEqual, + UnsignedGreaterThan, + UnsignedGreaterThanOrEqual, + UnsignedLessThan, + UnsignedLessThanOrEqual, + ] { + let intcc_cc = Literal::enumerator_for(&imm.intcc, cc.to_static_str()); + let cc1 = Literal::enumerator_for(&imm.intcc, cc.without_equal().to_static_str()); + let cc2 = + Literal::enumerator_for(&imm.intcc, cc.inverse().without_equal().to_static_str()); + let cc3 = Literal::enumerator_for(&imm.intcc, cc.unsigned().to_static_str()); + narrow.legalize( + def!(b = icmp.int_ty(intcc_cc, x, y)), + vec![ + def!((xl, xh) = isplit(x)), + def!((yl, yh) = isplit(y)), + // X = cc1 || (!cc2 && cc3) + def!(b1 = icmp.int_ty_half(cc1, xh, yh)), + def!(b2 = icmp.int_ty_half(cc2, xh, yh)), + def!(b3 = icmp.int_ty_half(cc3, xl, yl)), + def!(c1 = bnot(b2)), + def!(c2 = band(c1, b3)), + def!(b = bor(b1, c2)), + ], + ); + } + } + + // TODO(ryzokuken): explore the perf diff w/ x86_umulx and consider have a + // separate legalization for x86. + for &ty in &[I64, I128] { + narrow.legalize( + def!(a = imul.ty(x, y)), + vec![ + def!((xl, xh) = isplit(x)), + def!((yl, yh) = isplit(y)), + def!(a1 = imul(xh, yl)), + def!(a2 = imul(xl, yh)), + def!(a3 = iadd(a1, a2)), + def!(a4 = umulhi(xl, yl)), + def!(ah = iadd(a3, a4)), + def!(al = imul(xl, yl)), + def!(a = iconcat(al, ah)), + ], + ); + } + + let zero = Literal::constant(&imm.imm64, 0); + narrow.legalize( + def!(a = iadd_imm.I128(x, c)), + vec![ + def!(yh = iconst.I64(zero)), + def!(yl = iconst.I64(c)), + def!(y = iconcat.I64(yh, yl)), + def!(a = iadd(x, y)), + ], + ); + + // Widen instructions with one input operand. + for &op in &[bnot, popcnt] { + for &int_ty in &[I8, I16] { + widen.legalize( + def!(a = op.int_ty(b)), + vec![ + def!(x = uextend.I32(b)), + def!(z = op.I32(x)), + def!(a = ireduce.int_ty(z)), + ], + ); + } + } + + // Widen instructions with two input operands. + let mut widen_two_arg = |signed: bool, op: &Instruction| { + for &int_ty in &[I8, I16] { + let sign_ext_op = if signed { sextend } else { uextend }; + widen.legalize( + def!(a = op.int_ty(b, c)), + vec![ + def!(x = sign_ext_op.I32(b)), + def!(y = sign_ext_op.I32(c)), + def!(z = op.I32(x, y)), + def!(a = ireduce.int_ty(z)), + ], + ); + } + }; + + for bin_op in &[ + iadd, isub, imul, udiv, urem, band, bor, bxor, band_not, bor_not, bxor_not, + ] { + widen_two_arg(false, bin_op); + } + for bin_op in &[sdiv, srem] { + widen_two_arg(true, bin_op); + } + + // Widen instructions using immediate operands. + let mut widen_imm = |signed: bool, op: &Instruction| { + for &int_ty in &[I8, I16] { + let sign_ext_op = if signed { sextend } else { uextend }; + widen.legalize( + def!(a = op.int_ty(b, c)), + vec![ + def!(x = sign_ext_op.I32(b)), + def!(z = op.I32(x, c)), + def!(a = ireduce.int_ty(z)), + ], + ); + } + }; + + for bin_op in &[ + iadd_imm, imul_imm, udiv_imm, urem_imm, band_imm, bor_imm, bxor_imm, irsub_imm, + ] { + widen_imm(false, bin_op); + } + for bin_op in &[sdiv_imm, srem_imm] { + widen_imm(true, bin_op); + } + + for &(int_ty, num) in &[(I8, 24), (I16, 16)] { + let imm = Literal::constant(&imm.imm64, -num); + + widen.legalize( + def!(a = clz.int_ty(b)), + vec![ + def!(c = uextend.I32(b)), + def!(d = clz.I32(c)), + def!(e = iadd_imm(d, imm)), + def!(a = ireduce.int_ty(e)), + ], + ); + + widen.legalize( + def!(a = cls.int_ty(b)), + vec![ + def!(c = sextend.I32(b)), + def!(d = cls.I32(c)), + def!(e = iadd_imm(d, imm)), + def!(a = ireduce.int_ty(e)), + ], + ); + } + + for &(int_ty, num) in &[(I8, 1 << 8), (I16, 1 << 16)] { + let num = Literal::constant(&imm.imm64, num); + widen.legalize( + def!(a = ctz.int_ty(b)), + vec![ + def!(c = uextend.I32(b)), + // When `b` is zero, returns the size of x in bits. + def!(d = bor_imm(c, num)), + def!(e = ctz.I32(d)), + def!(a = ireduce.int_ty(e)), + ], + ); + } + + // iconst + for &int_ty in &[I8, I16] { + widen.legalize( + def!(a = iconst.int_ty(b)), + vec![def!(c = iconst.I32(b)), def!(a = ireduce.int_ty(c))], + ); + } + + for &extend_op in &[uextend, sextend] { + // The sign extension operators have two typevars: the result has one and controls the + // instruction, then the input has one. + let bound = extend_op.bind(I16).bind(I8); + widen.legalize( + def!(a = bound(b)), + vec![def!(c = extend_op.I32(b)), def!(a = ireduce(c))], + ); + } + + widen.legalize( + def!(store.I8(flags, a, ptr, offset)), + vec![ + def!(b = uextend.I32(a)), + def!(istore8(flags, b, ptr, offset)), + ], + ); + + widen.legalize( + def!(store.I16(flags, a, ptr, offset)), + vec![ + def!(b = uextend.I32(a)), + def!(istore16(flags, b, ptr, offset)), + ], + ); + + widen.legalize( + def!(a = load.I8(flags, ptr, offset)), + vec![ + def!(b = uload8.I32(flags, ptr, offset)), + def!(a = ireduce(b)), + ], + ); + + widen.legalize( + def!(a = load.I16(flags, ptr, offset)), + vec![ + def!(b = uload16.I32(flags, ptr, offset)), + def!(a = ireduce(b)), + ], + ); + + for &int_ty in &[I8, I16] { + widen.legalize( + def!(br_table.int_ty(x, y, z)), + vec![def!(b = uextend.I32(x)), def!(br_table(b, y, z))], + ); + } + + for &int_ty in &[I8, I16] { + widen.legalize( + def!(a = bint.int_ty(b)), + vec![def!(x = bint.I32(b)), def!(a = ireduce.int_ty(x))], + ); + } + + for &int_ty in &[I8, I16] { + for &op in &[ishl, ishl_imm, ushr, ushr_imm] { + widen.legalize( + def!(a = op.int_ty(b, c)), + vec![ + def!(x = uextend.I32(b)), + def!(z = op.I32(x, c)), + def!(a = ireduce.int_ty(z)), + ], + ); + } + + for &op in &[sshr, sshr_imm] { + widen.legalize( + def!(a = op.int_ty(b, c)), + vec![ + def!(x = sextend.I32(b)), + def!(z = op.I32(x, c)), + def!(a = ireduce.int_ty(z)), + ], + ); + } + + for cc in &["eq", "ne", "ugt", "ult", "uge", "ule"] { + let w_cc = Literal::enumerator_for(&imm.intcc, cc); + widen.legalize( + def!(a = icmp_imm.int_ty(w_cc, b, c)), + vec![def!(x = uextend.I32(b)), def!(a = icmp_imm(w_cc, x, c))], + ); + widen.legalize( + def!(a = icmp.int_ty(w_cc, b, c)), + vec![ + def!(x = uextend.I32(b)), + def!(y = uextend.I32(c)), + def!(a = icmp.I32(w_cc, x, y)), + ], + ); + } + + for cc in &["sgt", "slt", "sge", "sle"] { + let w_cc = Literal::enumerator_for(&imm.intcc, cc); + widen.legalize( + def!(a = icmp_imm.int_ty(w_cc, b, c)), + vec![def!(x = sextend.I32(b)), def!(a = icmp_imm(w_cc, x, c))], + ); + + widen.legalize( + def!(a = icmp.int_ty(w_cc, b, c)), + vec![ + def!(x = sextend.I32(b)), + def!(y = sextend.I32(c)), + def!(a = icmp(w_cc, x, y)), + ], + ); + } + } + + for &ty in &[I8, I16] { + widen.legalize( + def!(brz.ty(x, block, vararg)), + vec![def!(a = uextend.I32(x)), def!(brz(a, block, vararg))], + ); + + widen.legalize( + def!(brnz.ty(x, block, vararg)), + vec![def!(a = uextend.I32(x)), def!(brnz(a, block, vararg))], + ); + } + + for &(ty_half, ty) in &[(I64, I128), (I32, I64)] { + let inst = ireduce.bind(ty_half).bind(ty); + expand.legalize( + def!(a = inst(x)), + vec![def!((b, c) = isplit(x)), def!(a = copy(b))], + ); + } + + // Expand integer operations with carry for RISC architectures that don't have + // the flags. + let intcc_ult = Literal::enumerator_for(&imm.intcc, "ult"); + expand.legalize( + def!((a, c) = iadd_cout(x, y)), + vec![def!(a = iadd(x, y)), def!(c = icmp(intcc_ult, a, x))], + ); + + let intcc_ugt = Literal::enumerator_for(&imm.intcc, "ugt"); + expand.legalize( + def!((a, b) = isub_bout(x, y)), + vec![def!(a = isub(x, y)), def!(b = icmp(intcc_ugt, a, x))], + ); + + expand.legalize( + def!(a = iadd_cin(x, y, c)), + vec![ + def!(a1 = iadd(x, y)), + def!(c_int = bint(c)), + def!(a = iadd(a1, c_int)), + ], + ); + + expand.legalize( + def!(a = isub_bin(x, y, b)), + vec![ + def!(a1 = isub(x, y)), + def!(b_int = bint(b)), + def!(a = isub(a1, b_int)), + ], + ); + + expand.legalize( + def!((a, c) = iadd_carry(x, y, c_in)), + vec![ + def!((a1, c1) = iadd_cout(x, y)), + def!(c_int = bint(c_in)), + def!((a, c2) = iadd_cout(a1, c_int)), + def!(c = bor(c1, c2)), + ], + ); + + expand.legalize( + def!((a, b) = isub_borrow(x, y, b_in)), + vec![ + def!((a1, b1) = isub_bout(x, y)), + def!(b_int = bint(b_in)), + def!((a, b2) = isub_bout(a1, b_int)), + def!(b = bor(b1, b2)), + ], + ); + + // Expansion for fcvt_from_sint for smaller integer types. + // This uses expand and not widen because the controlling type variable for + // this instruction is f32/f64, which is legalized as part of the expand + // group. + for &dest_ty in &[F32, F64] { + for &src_ty in &[I8, I16] { + let bound_inst = fcvt_from_sint.bind(dest_ty).bind(src_ty); + expand.legalize( + def!(a = bound_inst(b)), + vec![ + def!(x = sextend.I32(b)), + def!(a = fcvt_from_sint.dest_ty(x)), + ], + ); + } + } + + // Expansions for immediate operands that are out of range. + for &(inst_imm, inst) in &[ + (iadd_imm, iadd), + (imul_imm, imul), + (sdiv_imm, sdiv), + (udiv_imm, udiv), + (srem_imm, srem), + (urem_imm, urem), + (band_imm, band), + (bor_imm, bor), + (bxor_imm, bxor), + (ifcmp_imm, ifcmp), + ] { + expand.legalize( + def!(a = inst_imm(x, y)), + vec![def!(a1 = iconst(y)), def!(a = inst(x, a1))], + ); + } + + expand.legalize( + def!(a = irsub_imm(y, x)), + vec![def!(a1 = iconst(x)), def!(a = isub(a1, y))], + ); + + // Rotates and shifts. + for &(inst_imm, inst) in &[ + (rotl_imm, rotl), + (rotr_imm, rotr), + (ishl_imm, ishl), + (sshr_imm, sshr), + (ushr_imm, ushr), + ] { + expand.legalize( + def!(a = inst_imm(x, y)), + vec![def!(a1 = iconst.I32(y)), def!(a = inst(x, a1))], + ); + } + + expand.legalize( + def!(a = icmp_imm(cc, x, y)), + vec![def!(a1 = iconst(y)), def!(a = icmp(cc, x, a1))], + ); + + //# Expansions for *_not variants of bitwise ops. + for &(inst_not, inst) in &[(band_not, band), (bor_not, bor), (bxor_not, bxor)] { + expand.legalize( + def!(a = inst_not(x, y)), + vec![def!(a1 = bnot(y)), def!(a = inst(x, a1))], + ); + } + + //# Expand bnot using xor. + let minus_one = Literal::constant(&imm.imm64, -1); + expand.legalize( + def!(a = bnot(x)), + vec![def!(y = iconst(minus_one)), def!(a = bxor(x, y))], + ); + + //# Expand bitrev + //# Adapted from Stack Overflow. + //# https://stackoverflow.com/questions/746171/most-efficient-algorithm-for-bit-reversal-from-msb-lsb-to-lsb-msb-in-c + let imm64_1 = Literal::constant(&imm.imm64, 1); + let imm64_2 = Literal::constant(&imm.imm64, 2); + let imm64_4 = Literal::constant(&imm.imm64, 4); + + widen.legalize( + def!(a = bitrev.I8(x)), + vec![ + def!(a1 = band_imm(x, Literal::constant(&imm.imm64, 0xaa))), + def!(a2 = ushr_imm(a1, imm64_1)), + def!(a3 = band_imm(x, Literal::constant(&imm.imm64, 0x55))), + def!(a4 = ishl_imm(a3, imm64_1)), + def!(b = bor(a2, a4)), + def!(b1 = band_imm(b, Literal::constant(&imm.imm64, 0xcc))), + def!(b2 = ushr_imm(b1, imm64_2)), + def!(b3 = band_imm(b, Literal::constant(&imm.imm64, 0x33))), + def!(b4 = ishl_imm(b3, imm64_2)), + def!(c = bor(b2, b4)), + def!(c1 = band_imm(c, Literal::constant(&imm.imm64, 0xf0))), + def!(c2 = ushr_imm(c1, imm64_4)), + def!(c3 = band_imm(c, Literal::constant(&imm.imm64, 0x0f))), + def!(c4 = ishl_imm(c3, imm64_4)), + def!(a = bor(c2, c4)), + ], + ); + + let imm64_8 = Literal::constant(&imm.imm64, 8); + + widen.legalize( + def!(a = bitrev.I16(x)), + vec![ + def!(a1 = band_imm(x, Literal::constant(&imm.imm64, 0xaaaa))), + def!(a2 = ushr_imm(a1, imm64_1)), + def!(a3 = band_imm(x, Literal::constant(&imm.imm64, 0x5555))), + def!(a4 = ishl_imm(a3, imm64_1)), + def!(b = bor(a2, a4)), + def!(b1 = band_imm(b, Literal::constant(&imm.imm64, 0xcccc))), + def!(b2 = ushr_imm(b1, imm64_2)), + def!(b3 = band_imm(b, Literal::constant(&imm.imm64, 0x3333))), + def!(b4 = ishl_imm(b3, imm64_2)), + def!(c = bor(b2, b4)), + def!(c1 = band_imm(c, Literal::constant(&imm.imm64, 0xf0f0))), + def!(c2 = ushr_imm(c1, imm64_4)), + def!(c3 = band_imm(c, Literal::constant(&imm.imm64, 0x0f0f))), + def!(c4 = ishl_imm(c3, imm64_4)), + def!(d = bor(c2, c4)), + def!(d1 = band_imm(d, Literal::constant(&imm.imm64, 0xff00))), + def!(d2 = ushr_imm(d1, imm64_8)), + def!(d3 = band_imm(d, Literal::constant(&imm.imm64, 0x00ff))), + def!(d4 = ishl_imm(d3, imm64_8)), + def!(a = bor(d2, d4)), + ], + ); + + let imm64_16 = Literal::constant(&imm.imm64, 16); + + expand.legalize( + def!(a = bitrev.I32(x)), + vec![ + def!(a1 = band_imm(x, Literal::constant(&imm.imm64, 0xaaaa_aaaa))), + def!(a2 = ushr_imm(a1, imm64_1)), + def!(a3 = band_imm(x, Literal::constant(&imm.imm64, 0x5555_5555))), + def!(a4 = ishl_imm(a3, imm64_1)), + def!(b = bor(a2, a4)), + def!(b1 = band_imm(b, Literal::constant(&imm.imm64, 0xcccc_cccc))), + def!(b2 = ushr_imm(b1, imm64_2)), + def!(b3 = band_imm(b, Literal::constant(&imm.imm64, 0x3333_3333))), + def!(b4 = ishl_imm(b3, imm64_2)), + def!(c = bor(b2, b4)), + def!(c1 = band_imm(c, Literal::constant(&imm.imm64, 0xf0f0_f0f0))), + def!(c2 = ushr_imm(c1, imm64_4)), + def!(c3 = band_imm(c, Literal::constant(&imm.imm64, 0x0f0f_0f0f))), + def!(c4 = ishl_imm(c3, imm64_4)), + def!(d = bor(c2, c4)), + def!(d1 = band_imm(d, Literal::constant(&imm.imm64, 0xff00_ff00))), + def!(d2 = ushr_imm(d1, imm64_8)), + def!(d3 = band_imm(d, Literal::constant(&imm.imm64, 0x00ff_00ff))), + def!(d4 = ishl_imm(d3, imm64_8)), + def!(e = bor(d2, d4)), + def!(e1 = ushr_imm(e, imm64_16)), + def!(e2 = ishl_imm(e, imm64_16)), + def!(a = bor(e1, e2)), + ], + ); + + #[allow(overflowing_literals)] + let imm64_0xaaaaaaaaaaaaaaaa = Literal::constant(&imm.imm64, 0xaaaa_aaaa_aaaa_aaaa); + let imm64_0x5555555555555555 = Literal::constant(&imm.imm64, 0x5555_5555_5555_5555); + #[allow(overflowing_literals)] + let imm64_0xcccccccccccccccc = Literal::constant(&imm.imm64, 0xcccc_cccc_cccc_cccc); + let imm64_0x3333333333333333 = Literal::constant(&imm.imm64, 0x3333_3333_3333_3333); + #[allow(overflowing_literals)] + let imm64_0xf0f0f0f0f0f0f0f0 = Literal::constant(&imm.imm64, 0xf0f0_f0f0_f0f0_f0f0); + let imm64_0x0f0f0f0f0f0f0f0f = Literal::constant(&imm.imm64, 0x0f0f_0f0f_0f0f_0f0f); + #[allow(overflowing_literals)] + let imm64_0xff00ff00ff00ff00 = Literal::constant(&imm.imm64, 0xff00_ff00_ff00_ff00); + let imm64_0x00ff00ff00ff00ff = Literal::constant(&imm.imm64, 0x00ff_00ff_00ff_00ff); + #[allow(overflowing_literals)] + let imm64_0xffff0000ffff0000 = Literal::constant(&imm.imm64, 0xffff_0000_ffff_0000); + let imm64_0x0000ffff0000ffff = Literal::constant(&imm.imm64, 0x0000_ffff_0000_ffff); + let imm64_32 = Literal::constant(&imm.imm64, 32); + + expand.legalize( + def!(a = bitrev.I64(x)), + vec![ + def!(a1 = band_imm(x, imm64_0xaaaaaaaaaaaaaaaa)), + def!(a2 = ushr_imm(a1, imm64_1)), + def!(a3 = band_imm(x, imm64_0x5555555555555555)), + def!(a4 = ishl_imm(a3, imm64_1)), + def!(b = bor(a2, a4)), + def!(b1 = band_imm(b, imm64_0xcccccccccccccccc)), + def!(b2 = ushr_imm(b1, imm64_2)), + def!(b3 = band_imm(b, imm64_0x3333333333333333)), + def!(b4 = ishl_imm(b3, imm64_2)), + def!(c = bor(b2, b4)), + def!(c1 = band_imm(c, imm64_0xf0f0f0f0f0f0f0f0)), + def!(c2 = ushr_imm(c1, imm64_4)), + def!(c3 = band_imm(c, imm64_0x0f0f0f0f0f0f0f0f)), + def!(c4 = ishl_imm(c3, imm64_4)), + def!(d = bor(c2, c4)), + def!(d1 = band_imm(d, imm64_0xff00ff00ff00ff00)), + def!(d2 = ushr_imm(d1, imm64_8)), + def!(d3 = band_imm(d, imm64_0x00ff00ff00ff00ff)), + def!(d4 = ishl_imm(d3, imm64_8)), + def!(e = bor(d2, d4)), + def!(e1 = band_imm(e, imm64_0xffff0000ffff0000)), + def!(e2 = ushr_imm(e1, imm64_16)), + def!(e3 = band_imm(e, imm64_0x0000ffff0000ffff)), + def!(e4 = ishl_imm(e3, imm64_16)), + def!(f = bor(e2, e4)), + def!(f1 = ushr_imm(f, imm64_32)), + def!(f2 = ishl_imm(f, imm64_32)), + def!(a = bor(f1, f2)), + ], + ); + + narrow.legalize( + def!(a = bitrev.I128(x)), + vec![ + def!((xl, xh) = isplit(x)), + def!(yh = bitrev(xl)), + def!(yl = bitrev(xh)), + def!(a = iconcat(yl, yh)), + ], + ); + + // Floating-point sign manipulations. + for &(ty, const_inst, minus_zero) in &[ + (F32, f32const, &Literal::bits(&imm.ieee32, 0x8000_0000)), + ( + F64, + f64const, + &Literal::bits(&imm.ieee64, 0x8000_0000_0000_0000), + ), + ] { + expand.legalize( + def!(a = fabs.ty(x)), + vec![def!(b = const_inst(minus_zero)), def!(a = band_not(x, b))], + ); + + expand.legalize( + def!(a = fneg.ty(x)), + vec![def!(b = const_inst(minus_zero)), def!(a = bxor(x, b))], + ); + + expand.legalize( + def!(a = fcopysign.ty(x, y)), + vec![ + def!(b = const_inst(minus_zero)), + def!(a1 = band_not(x, b)), + def!(a2 = band(y, b)), + def!(a = bor(a1, a2)), + ], + ); + } + + expand.custom_legalize(br_icmp, "expand_br_icmp"); + + let mut groups = TransformGroups::new(); + + let narrow_id = narrow.build_and_add_to(&mut groups); + let expand_id = expand.build_and_add_to(&mut groups); + + // Expansions using CPU flags. + let mut expand_flags = TransformGroupBuilder::new( + "expand_flags", + r#" + Instruction expansions for architectures with flags. + + Expand some instructions using CPU flags, then fall back to the normal + expansions. Not all architectures support CPU flags, so these patterns + are kept separate. + "#, + ) + .chain_with(expand_id); + + let imm64_0 = Literal::constant(&imm.imm64, 0); + let intcc_ne = Literal::enumerator_for(&imm.intcc, "ne"); + let intcc_eq = Literal::enumerator_for(&imm.intcc, "eq"); + + expand_flags.legalize( + def!(trapnz(x, c)), + vec![ + def!(a = ifcmp_imm(x, imm64_0)), + def!(trapif(intcc_ne, a, c)), + ], + ); + + expand_flags.legalize( + def!(trapz(x, c)), + vec![ + def!(a = ifcmp_imm(x, imm64_0)), + def!(trapif(intcc_eq, a, c)), + ], + ); + + expand_flags.build_and_add_to(&mut groups); + + // Narrow legalizations using CPU flags. + let mut narrow_flags = TransformGroupBuilder::new( + "narrow_flags", + r#" + Narrow instructions for architectures with flags. + + Narrow some instructions using CPU flags, then fall back to the normal + legalizations. Not all architectures support CPU flags, so these + patterns are kept separate. + "#, + ) + .chain_with(narrow_id); + + narrow_flags.legalize( + def!(a = iadd(x, y)), + vec![ + def!((xl, xh) = isplit(x)), + def!((yl, yh) = isplit(y)), + def!((al, c) = iadd_ifcout(xl, yl)), + def!(ah = iadd_ifcin(xh, yh, c)), + def!(a = iconcat(al, ah)), + ], + ); + + narrow_flags.legalize( + def!(a = isub(x, y)), + vec![ + def!((xl, xh) = isplit(x)), + def!((yl, yh) = isplit(y)), + def!((al, b) = isub_ifbout(xl, yl)), + def!(ah = isub_ifbin(xh, yh, b)), + def!(a = iconcat(al, ah)), + ], + ); + + narrow_flags.build_and_add_to(&mut groups); + + // TODO(ryzokuken): figure out a way to legalize iadd_c* to iadd_ifc* (and + // similarly isub_b* to isub_ifb*) on expand_flags so that this isn't required. + // Narrow legalizations for ISAs that don't have CPU flags. + let mut narrow_no_flags = TransformGroupBuilder::new( + "narrow_no_flags", + r#" + Narrow instructions for architectures without flags. + + Narrow some instructions avoiding the use of CPU flags, then fall back + to the normal legalizations. Not all architectures support CPU flags, + so these patterns are kept separate. + "#, + ) + .chain_with(narrow_id); + + narrow_no_flags.legalize( + def!(a = iadd(x, y)), + vec![ + def!((xl, xh) = isplit(x)), + def!((yl, yh) = isplit(y)), + def!((al, c) = iadd_cout(xl, yl)), + def!(ah = iadd_cin(xh, yh, c)), + def!(a = iconcat(al, ah)), + ], + ); + + narrow_no_flags.legalize( + def!(a = isub(x, y)), + vec![ + def!((xl, xh) = isplit(x)), + def!((yl, yh) = isplit(y)), + def!((al, b) = isub_bout(xl, yl)), + def!(ah = isub_bin(xh, yh, b)), + def!(a = iconcat(al, ah)), + ], + ); + + narrow_no_flags.build_and_add_to(&mut groups); + + // TODO The order of declarations unfortunately matters to be compatible with the Python code. + // When it's all migrated, we can put this next to the narrow/expand build_and_add_to calls + // above. + widen.build_and_add_to(&mut groups); + + groups +} diff --git a/third_party/rust/cranelift-codegen-meta/src/shared/mod.rs b/third_party/rust/cranelift-codegen-meta/src/shared/mod.rs new file mode 100644 index 0000000000..b185262ccd --- /dev/null +++ b/third_party/rust/cranelift-codegen-meta/src/shared/mod.rs @@ -0,0 +1,101 @@ +//! Shared definitions for the Cranelift intermediate language. + +pub mod entities; +pub mod formats; +pub mod immediates; +pub mod instructions; +pub mod legalize; +pub mod settings; +pub mod types; + +use crate::cdsl::formats::{FormatStructure, InstructionFormat}; +use crate::cdsl::instructions::{AllInstructions, InstructionGroup}; +use crate::cdsl::settings::SettingGroup; +use crate::cdsl::xform::TransformGroups; + +use crate::shared::entities::EntityRefs; +use crate::shared::formats::Formats; +use crate::shared::immediates::Immediates; + +use std::collections::HashMap; +use std::iter::FromIterator; +use std::rc::Rc; + +pub(crate) struct Definitions { + pub settings: SettingGroup, + pub all_instructions: AllInstructions, + pub instructions: InstructionGroup, + pub imm: Immediates, + pub formats: Formats, + pub transform_groups: TransformGroups, + pub entities: EntityRefs, +} + +pub(crate) fn define() -> Definitions { + let mut all_instructions = AllInstructions::new(); + + let immediates = Immediates::new(); + let entities = EntityRefs::new(); + let formats = Formats::new(&immediates, &entities); + let instructions = + instructions::define(&mut all_instructions, &formats, &immediates, &entities); + let transform_groups = legalize::define(&instructions, &immediates); + + Definitions { + settings: settings::define(), + all_instructions, + instructions, + imm: immediates, + formats, + transform_groups, + entities, + } +} + +impl Definitions { + /// Verifies certain properties of formats. + /// + /// - Formats must be uniquely named: if two formats have the same name, they must refer to the + /// same data. Otherwise, two format variants in the codegen crate would have the same name. + /// - Formats must be structurally different from each other. Otherwise, this would lead to + /// code duplicate in the codegen crate. + /// + /// Returns a list of all the instruction formats effectively used. + pub fn verify_instruction_formats(&self) -> Vec<&InstructionFormat> { + let mut format_names: HashMap<&'static str, &Rc<InstructionFormat>> = HashMap::new(); + + // A structure is: number of input value operands / whether there's varargs or not / names + // of immediate fields. + let mut format_structures: HashMap<FormatStructure, &InstructionFormat> = HashMap::new(); + + for inst in self.all_instructions.values() { + // Check name. + if let Some(existing_format) = format_names.get(&inst.format.name) { + assert!( + Rc::ptr_eq(&existing_format, &inst.format), + "formats must uniquely named; there's a\ + conflict on the name '{}', please make sure it is used only once.", + existing_format.name + ); + } else { + format_names.insert(inst.format.name, &inst.format); + } + + // Check structure. + let key = inst.format.structure(); + if let Some(existing_format) = format_structures.get(&key) { + assert_eq!( + existing_format.name, inst.format.name, + "duplicate instruction formats {} and {}; please remove one.", + existing_format.name, inst.format.name + ); + } else { + format_structures.insert(key, &inst.format); + } + } + + let mut result = Vec::from_iter(format_structures.into_iter().map(|(_, v)| v)); + result.sort_by_key(|format| format.name); + result + } +} diff --git a/third_party/rust/cranelift-codegen-meta/src/shared/settings.rs b/third_party/rust/cranelift-codegen-meta/src/shared/settings.rs new file mode 100644 index 0000000000..1ddc445927 --- /dev/null +++ b/third_party/rust/cranelift-codegen-meta/src/shared/settings.rs @@ -0,0 +1,287 @@ +use crate::cdsl::settings::{SettingGroup, SettingGroupBuilder}; + +pub(crate) fn define() -> SettingGroup { + let mut settings = SettingGroupBuilder::new("shared"); + + settings.add_enum( + "regalloc", + r#"Register allocator to use with the MachInst backend. + + This selects the register allocator as an option among those offered by the `regalloc.rs` + crate. Please report register allocation bugs to the maintainers of this crate whenever + possible. + + Note: this only applies to target that use the MachInst backend. As of 2020-04-17, this + means the x86_64 backend doesn't use this yet. + + Possible values: + + - `backtracking` is a greedy, backtracking register allocator as implemented in + Spidermonkey's optimizing tier IonMonkey. It may take more time to allocate registers, but + it should generate better code in general, resulting in better throughput of generated + code. + - `backtracking_checked` is the backtracking allocator with additional self checks that may + take some time to run, and thus these checks are disabled by default. + - `experimental_linear_scan` is an experimental linear scan allocator. It may take less + time to allocate registers, but generated code's quality may be inferior. As of + 2020-04-17, it is still experimental and it should not be used in production settings. + - `experimental_linear_scan_checked` is the linear scan allocator with additional self + checks that may take some time to run, and thus these checks are disabled by default. + "#, + vec![ + "backtracking", + "backtracking_checked", + "experimental_linear_scan", + "experimental_linear_scan_checked", + ], + ); + + settings.add_enum( + "opt_level", + r#" + Optimization level: + + - none: Minimise compile time by disabling most optimizations. + - speed: Generate the fastest possible code + - speed_and_size: like "speed", but also perform transformations + aimed at reducing code size. + "#, + vec!["none", "speed", "speed_and_size"], + ); + + settings.add_bool( + "enable_verifier", + r#" + Run the Cranelift IR verifier at strategic times during compilation. + + This makes compilation slower but catches many bugs. The verifier is always enabled by + default, which is useful during development. + "#, + true, + ); + + // Note that Cranelift doesn't currently need an is_pie flag, because PIE is + // just PIC where symbols can't be pre-empted, which can be expressed with the + // `colocated` flag on external functions and global values. + settings.add_bool( + "is_pic", + "Enable Position-Independent Code generation", + false, + ); + + settings.add_bool( + "use_colocated_libcalls", + r#" + Use colocated libcalls. + + Generate code that assumes that libcalls can be declared "colocated", + meaning they will be defined along with the current function, such that + they can use more efficient addressing. + "#, + false, + ); + + settings.add_bool( + "avoid_div_traps", + r#" + Generate explicit checks around native division instructions to avoid + their trapping. + + This is primarily used by SpiderMonkey which doesn't install a signal + handler for SIGFPE, but expects a SIGILL trap for division by zero. + + On ISAs like ARM where the native division instructions don't trap, + this setting has no effect - explicit checks are always inserted. + "#, + false, + ); + + settings.add_bool( + "enable_float", + r#" + Enable the use of floating-point instructions + + Disabling use of floating-point instructions is not yet implemented. + "#, + true, + ); + + settings.add_bool( + "enable_nan_canonicalization", + r#" + Enable NaN canonicalization + + This replaces NaNs with a single canonical value, for users requiring + entirely deterministic WebAssembly computation. This is not required + by the WebAssembly spec, so it is not enabled by default. + "#, + false, + ); + + settings.add_bool( + "enable_pinned_reg", + r#"Enable the use of the pinned register. + + This register is excluded from register allocation, and is completely under the control of + the end-user. It is possible to read it via the get_pinned_reg instruction, and to set it + with the set_pinned_reg instruction. + "#, + false, + ); + + settings.add_bool( + "use_pinned_reg_as_heap_base", + r#"Use the pinned register as the heap base. + + Enabling this requires the enable_pinned_reg setting to be set to true. It enables a custom + legalization of the `heap_addr` instruction so it will use the pinned register as the heap + base, instead of fetching it from a global value. + + Warning! Enabling this means that the pinned register *must* be maintained to contain the + heap base address at all times, during the lifetime of a function. Using the pinned + register for other purposes when this is set is very likely to cause crashes. + "#, + false, + ); + + settings.add_bool("enable_simd", "Enable the use of SIMD instructions.", false); + + settings.add_bool( + "enable_atomics", + "Enable the use of atomic instructions", + true, + ); + + settings.add_bool( + "enable_safepoints", + r#" + Enable safepoint instruction insertions. + + This will allow the emit_stack_maps() function to insert the safepoint + instruction on top of calls and interrupt traps in order to display the + live reference values at that point in the program. + "#, + false, + ); + + settings.add_enum( + "tls_model", + r#" + Defines the model used to perform TLS accesses. + "#, + vec!["none", "elf_gd", "macho", "coff"], + ); + + // Settings specific to the `baldrdash` calling convention. + + settings.add_enum( + "libcall_call_conv", + r#" + Defines the calling convention to use for LibCalls call expansion, + since it may be different from the ISA default calling convention. + + The default value is to use the same calling convention as the ISA + default calling convention. + + This list should be kept in sync with the list of calling + conventions available in isa/call_conv.rs. + "#, + vec![ + "isa_default", + "fast", + "cold", + "system_v", + "windows_fastcall", + "baldrdash_system_v", + "baldrdash_windows", + "baldrdash_2020", + "probestack", + ], + ); + + settings.add_num( + "baldrdash_prologue_words", + r#" + Number of pointer-sized words pushed by the baldrdash prologue. + + Functions with the `baldrdash` calling convention don't generate their + own prologue and epilogue. They depend on externally generated code + that pushes a fixed number of words in the prologue and restores them + in the epilogue. + + This setting configures the number of pointer-sized words pushed on the + stack when the Cranelift-generated code is entered. This includes the + pushed return address on x86. + "#, + 0, + ); + + // BaldrMonkey requires that not-yet-relocated function addresses be encoded + // as all-ones bitpatterns. + settings.add_bool( + "emit_all_ones_funcaddrs", + "Emit not-yet-relocated function addresses as all-ones bit patterns.", + false, + ); + + // Stack probing options. + + settings.add_bool( + "enable_probestack", + r#" + Enable the use of stack probes, for calling conventions which support this + functionality. + "#, + true, + ); + + settings.add_bool( + "probestack_func_adjusts_sp", + r#" + Set this to true of the stack probe function modifies the stack pointer + itself. + "#, + false, + ); + + settings.add_num( + "probestack_size_log2", + r#" + The log2 of the size of the stack guard region. + + Stack frames larger than this size will have stack overflow checked + by calling the probestack function. + + The default is 12, which translates to a size of 4096. + "#, + 12, + ); + + // Jump table options. + + settings.add_bool( + "enable_jump_tables", + "Enable the use of jump tables in generated machine code.", + true, + ); + + // Spectre options. + + settings.add_bool( + "enable_heap_access_spectre_mitigation", + r#" + Enable Spectre mitigation on heap bounds checks. + + This is a no-op for any heap that needs no bounds checks; e.g., + if the limit is static and the guard region is large enough that + the index cannot reach past it. + + This option is enabled by default because it is highly + recommended for secure sandboxing. The embedder should consider + the security implications carefully before disabling this option. + "#, + true, + ); + + settings.build() +} diff --git a/third_party/rust/cranelift-codegen-meta/src/shared/types.rs b/third_party/rust/cranelift-codegen-meta/src/shared/types.rs new file mode 100644 index 0000000000..631e5433e9 --- /dev/null +++ b/third_party/rust/cranelift-codegen-meta/src/shared/types.rs @@ -0,0 +1,236 @@ +//! This module predefines all the Cranelift scalar types. + +#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)] +pub(crate) enum Bool { + /// 1-bit bool. + B1 = 1, + /// 8-bit bool. + B8 = 8, + /// 16-bit bool. + B16 = 16, + /// 32-bit bool. + B32 = 32, + /// 64-bit bool. + B64 = 64, + /// 128-bit bool. + B128 = 128, +} + +/// This provides an iterator through all of the supported bool variants. +pub(crate) struct BoolIterator { + index: u8, +} + +impl BoolIterator { + pub fn new() -> Self { + Self { index: 0 } + } +} + +impl Iterator for BoolIterator { + type Item = Bool; + fn next(&mut self) -> Option<Self::Item> { + let res = match self.index { + 0 => Some(Bool::B1), + 1 => Some(Bool::B8), + 2 => Some(Bool::B16), + 3 => Some(Bool::B32), + 4 => Some(Bool::B64), + 5 => Some(Bool::B128), + _ => return None, + }; + self.index += 1; + res + } +} + +#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)] +pub(crate) enum Int { + /// 8-bit int. + I8 = 8, + /// 16-bit int. + I16 = 16, + /// 32-bit int. + I32 = 32, + /// 64-bit int. + I64 = 64, + /// 128-bit int. + I128 = 128, +} + +/// This provides an iterator through all of the supported int variants. +pub(crate) struct IntIterator { + index: u8, +} + +impl IntIterator { + pub fn new() -> Self { + Self { index: 0 } + } +} + +impl Iterator for IntIterator { + type Item = Int; + fn next(&mut self) -> Option<Self::Item> { + let res = match self.index { + 0 => Some(Int::I8), + 1 => Some(Int::I16), + 2 => Some(Int::I32), + 3 => Some(Int::I64), + 4 => Some(Int::I128), + _ => return None, + }; + self.index += 1; + res + } +} + +#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)] +pub(crate) enum Float { + F32 = 32, + F64 = 64, +} + +/// Iterator through the variants of the Float enum. +pub(crate) struct FloatIterator { + index: u8, +} + +impl FloatIterator { + pub fn new() -> Self { + Self { index: 0 } + } +} + +/// This provides an iterator through all of the supported float variants. +impl Iterator for FloatIterator { + type Item = Float; + fn next(&mut self) -> Option<Self::Item> { + let res = match self.index { + 0 => Some(Float::F32), + 1 => Some(Float::F64), + _ => return None, + }; + self.index += 1; + res + } +} + +/// A type representing CPU flags. +/// +/// Flags can't be stored in memory. +#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)] +pub(crate) enum Flag { + /// CPU flags from an integer comparison. + IFlags, + /// CPU flags from a floating point comparison. + FFlags, +} + +/// Iterator through the variants of the Flag enum. +pub(crate) struct FlagIterator { + index: u8, +} + +impl FlagIterator { + pub fn new() -> Self { + Self { index: 0 } + } +} + +impl Iterator for FlagIterator { + type Item = Flag; + fn next(&mut self) -> Option<Self::Item> { + let res = match self.index { + 0 => Some(Flag::IFlags), + 1 => Some(Flag::FFlags), + _ => return None, + }; + self.index += 1; + res + } +} + +#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)] +pub(crate) enum Reference { + /// 32-bit reference. + R32 = 32, + /// 64-bit reference. + R64 = 64, +} + +/// This provides an iterator through all of the supported reference variants. +pub(crate) struct ReferenceIterator { + index: u8, +} + +impl ReferenceIterator { + pub fn new() -> Self { + Self { index: 0 } + } +} + +impl Iterator for ReferenceIterator { + type Item = Reference; + fn next(&mut self) -> Option<Self::Item> { + let res = match self.index { + 0 => Some(Reference::R32), + 1 => Some(Reference::R64), + _ => return None, + }; + self.index += 1; + res + } +} + +#[cfg(test)] +mod iter_tests { + use super::*; + + #[test] + fn bool_iter_works() { + let mut bool_iter = BoolIterator::new(); + assert_eq!(bool_iter.next(), Some(Bool::B1)); + assert_eq!(bool_iter.next(), Some(Bool::B8)); + assert_eq!(bool_iter.next(), Some(Bool::B16)); + assert_eq!(bool_iter.next(), Some(Bool::B32)); + assert_eq!(bool_iter.next(), Some(Bool::B64)); + assert_eq!(bool_iter.next(), Some(Bool::B128)); + assert_eq!(bool_iter.next(), None); + } + + #[test] + fn int_iter_works() { + let mut int_iter = IntIterator::new(); + assert_eq!(int_iter.next(), Some(Int::I8)); + assert_eq!(int_iter.next(), Some(Int::I16)); + assert_eq!(int_iter.next(), Some(Int::I32)); + assert_eq!(int_iter.next(), Some(Int::I64)); + assert_eq!(int_iter.next(), Some(Int::I128)); + assert_eq!(int_iter.next(), None); + } + + #[test] + fn float_iter_works() { + let mut float_iter = FloatIterator::new(); + assert_eq!(float_iter.next(), Some(Float::F32)); + assert_eq!(float_iter.next(), Some(Float::F64)); + assert_eq!(float_iter.next(), None); + } + + #[test] + fn flag_iter_works() { + let mut flag_iter = FlagIterator::new(); + assert_eq!(flag_iter.next(), Some(Flag::IFlags)); + assert_eq!(flag_iter.next(), Some(Flag::FFlags)); + assert_eq!(flag_iter.next(), None); + } + + #[test] + fn reference_iter_works() { + let mut reference_iter = ReferenceIterator::new(); + assert_eq!(reference_iter.next(), Some(Reference::R32)); + assert_eq!(reference_iter.next(), Some(Reference::R64)); + assert_eq!(reference_iter.next(), None); + } +} diff --git a/third_party/rust/cranelift-codegen-meta/src/srcgen.rs b/third_party/rust/cranelift-codegen-meta/src/srcgen.rs new file mode 100644 index 0000000000..ad8db175d7 --- /dev/null +++ b/third_party/rust/cranelift-codegen-meta/src/srcgen.rs @@ -0,0 +1,484 @@ +//! Source code generator. +//! +//! The `srcgen` module contains generic helper routines and classes for +//! generating source code. + +#![macro_use] + +use std::cmp; +use std::collections::{BTreeMap, BTreeSet}; +use std::fs; +use std::io::Write; +use std::path; + +use crate::error; + +static SHIFTWIDTH: usize = 4; + +/// A macro that simplifies the usage of the Formatter by allowing format +/// strings. +macro_rules! fmtln { + ($fmt:ident, $fmtstring:expr, $($fmtargs:expr),*) => { + $fmt.line(format!($fmtstring, $($fmtargs),*)); + }; + + ($fmt:ident, $arg:expr) => { + $fmt.line($arg); + }; + + ($_:tt, $($args:expr),+) => { + compile_error!("This macro requires at least two arguments: the Formatter instance and a format string."); + }; + + ($_:tt) => { + compile_error!("This macro requires at least two arguments: the Formatter instance and a format string."); + }; +} + +pub(crate) struct Formatter { + indent: usize, + lines: Vec<String>, +} + +impl Formatter { + /// Source code formatter class. Used to collect source code to be written + /// to a file, and keep track of indentation. + pub fn new() -> Self { + Self { + indent: 0, + lines: Vec::new(), + } + } + + /// Increase current indentation level by one. + pub fn indent_push(&mut self) { + self.indent += 1; + } + + /// Decrease indentation by one level. + pub fn indent_pop(&mut self) { + assert!(self.indent > 0, "Already at top level indentation"); + self.indent -= 1; + } + + pub fn indent<T, F: FnOnce(&mut Formatter) -> T>(&mut self, f: F) -> T { + self.indent_push(); + let ret = f(self); + self.indent_pop(); + ret + } + + /// Get the current whitespace indentation in the form of a String. + fn get_indent(&self) -> String { + if self.indent == 0 { + String::new() + } else { + format!("{:-1$}", " ", self.indent * SHIFTWIDTH) + } + } + + /// Get a string containing whitespace outdented one level. Used for + /// lines of code that are inside a single indented block. + fn get_outdent(&mut self) -> String { + self.indent_pop(); + let s = self.get_indent(); + self.indent_push(); + s + } + + /// Add an indented line. + pub fn line(&mut self, contents: impl AsRef<str>) { + let indented_line = format!("{}{}\n", self.get_indent(), contents.as_ref()); + self.lines.push(indented_line); + } + + /// Pushes an empty line. + pub fn empty_line(&mut self) { + self.lines.push("\n".to_string()); + } + + /// Emit a line outdented one level. + pub fn outdented_line(&mut self, s: &str) { + let new_line = format!("{}{}\n", self.get_outdent(), s); + self.lines.push(new_line); + } + + /// Write `self.lines` to a file. + pub fn update_file( + &self, + filename: impl AsRef<str>, + directory: &str, + ) -> Result<(), error::Error> { + #[cfg(target_family = "windows")] + let path_str = format!("{}\\{}", directory, filename.as_ref()); + #[cfg(not(target_family = "windows"))] + let path_str = format!("{}/{}", directory, filename.as_ref()); + + let path = path::Path::new(&path_str); + let mut f = fs::File::create(path)?; + + for l in self.lines.iter().map(|l| l.as_bytes()) { + f.write_all(l)?; + } + + Ok(()) + } + + /// Add one or more lines after stripping common indentation. + pub fn multi_line(&mut self, s: &str) { + parse_multiline(s).into_iter().for_each(|l| self.line(&l)); + } + + /// Add a comment line. + pub fn comment(&mut self, s: impl AsRef<str>) { + fmtln!(self, "// {}", s.as_ref()); + } + + /// Add a (multi-line) documentation comment. + pub fn doc_comment(&mut self, contents: impl AsRef<str>) { + parse_multiline(contents.as_ref()) + .iter() + .map(|l| { + if l.is_empty() { + "///".into() + } else { + format!("/// {}", l) + } + }) + .for_each(|s| self.line(s.as_str())); + } + + /// Add a match expression. + pub fn add_match(&mut self, m: Match) { + fmtln!(self, "match {} {{", m.expr); + self.indent(|fmt| { + for (&(ref fields, ref body), ref names) in m.arms.iter() { + // name { fields } | name { fields } => { body } + let conditions = names + .iter() + .map(|name| { + if !fields.is_empty() { + format!("{} {{ {} }}", name, fields.join(", ")) + } else { + name.clone() + } + }) + .collect::<Vec<_>>() + .join(" |\n") + + " => {"; + + fmt.multi_line(&conditions); + fmt.indent(|fmt| { + fmt.line(body); + }); + fmt.line("}"); + } + + // Make sure to include the catch all clause last. + if let Some(body) = m.catch_all { + fmt.line("_ => {"); + fmt.indent(|fmt| { + fmt.line(body); + }); + fmt.line("}"); + } + }); + self.line("}"); + } +} + +/// Compute the indentation of s, or None of an empty line. +fn _indent(s: &str) -> Option<usize> { + if s.is_empty() { + None + } else { + let t = s.trim_start(); + Some(s.len() - t.len()) + } +} + +/// Given a multi-line string, split it into a sequence of lines after +/// stripping a common indentation. This is useful for strings defined with +/// doc strings. +fn parse_multiline(s: &str) -> Vec<String> { + // Convert tabs into spaces. + let expanded_tab = format!("{:-1$}", " ", SHIFTWIDTH); + let lines: Vec<String> = s.lines().map(|l| l.replace("\t", &expanded_tab)).collect(); + + // Determine minimum indentation, ignoring the first line and empty lines. + let indent = lines + .iter() + .skip(1) + .filter(|l| !l.trim().is_empty()) + .map(|l| l.len() - l.trim_start().len()) + .min(); + + // Strip off leading blank lines. + let mut lines_iter = lines.iter().skip_while(|l| l.is_empty()); + let mut trimmed = Vec::with_capacity(lines.len()); + + // Remove indentation (first line is special) + if let Some(s) = lines_iter.next().map(|l| l.trim()).map(|l| l.to_string()) { + trimmed.push(s); + } + + // Remove trailing whitespace from other lines. + let mut other_lines = if let Some(indent) = indent { + // Note that empty lines may have fewer than `indent` chars. + lines_iter + .map(|l| &l[cmp::min(indent, l.len())..]) + .map(|l| l.trim_end()) + .map(|l| l.to_string()) + .collect::<Vec<_>>() + } else { + lines_iter + .map(|l| l.trim_end()) + .map(|l| l.to_string()) + .collect::<Vec<_>>() + }; + + trimmed.append(&mut other_lines); + + // Strip off trailing blank lines. + while let Some(s) = trimmed.pop() { + if s.is_empty() { + continue; + } else { + trimmed.push(s); + break; + } + } + + trimmed +} + +/// Match formatting class. +/// +/// Match objects collect all the information needed to emit a Rust `match` +/// expression, automatically deduplicating overlapping identical arms. +/// +/// Note that this class is ignorant of Rust types, and considers two fields +/// with the same name to be equivalent. BTreeMap/BTreeSet are used to +/// represent the arms in order to make the order deterministic. +pub(crate) struct Match { + expr: String, + arms: BTreeMap<(Vec<String>, String), BTreeSet<String>>, + /// The clause for the placeholder pattern _. + catch_all: Option<String>, +} + +impl Match { + /// Create a new match statement on `expr`. + pub fn new(expr: impl Into<String>) -> Self { + Self { + expr: expr.into(), + arms: BTreeMap::new(), + catch_all: None, + } + } + + fn set_catch_all(&mut self, clause: String) { + assert!(self.catch_all.is_none()); + self.catch_all = Some(clause); + } + + /// Add an arm that reads fields to the Match statement. + pub fn arm<T: Into<String>, S: Into<String>>(&mut self, name: T, fields: Vec<S>, body: T) { + let name = name.into(); + assert!( + name != "_", + "catch all clause can't extract fields, use arm_no_fields instead." + ); + + let body = body.into(); + let fields = fields.into_iter().map(|x| x.into()).collect(); + let match_arm = self + .arms + .entry((fields, body)) + .or_insert_with(BTreeSet::new); + match_arm.insert(name); + } + + /// Adds an arm that doesn't read anythings from the fields to the Match statement. + pub fn arm_no_fields(&mut self, name: impl Into<String>, body: impl Into<String>) { + let body = body.into(); + + let name = name.into(); + if name == "_" { + self.set_catch_all(body); + return; + } + + let match_arm = self + .arms + .entry((Vec::new(), body)) + .or_insert_with(BTreeSet::new); + match_arm.insert(name); + } +} + +#[cfg(test)] +mod srcgen_tests { + use super::parse_multiline; + use super::Formatter; + use super::Match; + + fn from_raw_string<S: Into<String>>(s: S) -> Vec<String> { + s.into() + .trim() + .split("\n") + .into_iter() + .map(|x| format!("{}\n", x)) + .collect() + } + + #[test] + fn adding_arms_works() { + let mut m = Match::new("x"); + m.arm("Orange", vec!["a", "b"], "some body"); + m.arm("Yellow", vec!["a", "b"], "some body"); + m.arm("Green", vec!["a", "b"], "different body"); + m.arm("Blue", vec!["x", "y"], "some body"); + assert_eq!(m.arms.len(), 3); + + let mut fmt = Formatter::new(); + fmt.add_match(m); + + let expected_lines = from_raw_string( + r#" +match x { + Green { a, b } => { + different body + } + Orange { a, b } | + Yellow { a, b } => { + some body + } + Blue { x, y } => { + some body + } +} + "#, + ); + assert_eq!(fmt.lines, expected_lines); + } + + #[test] + fn match_with_catchall_order() { + // The catchall placeholder must be placed after other clauses. + let mut m = Match::new("x"); + m.arm("Orange", vec!["a", "b"], "some body"); + m.arm("Green", vec!["a", "b"], "different body"); + m.arm_no_fields("_", "unreachable!()"); + assert_eq!(m.arms.len(), 2); // catchall is not counted + + let mut fmt = Formatter::new(); + fmt.add_match(m); + + let expected_lines = from_raw_string( + r#" +match x { + Green { a, b } => { + different body + } + Orange { a, b } => { + some body + } + _ => { + unreachable!() + } +} + "#, + ); + assert_eq!(fmt.lines, expected_lines); + } + + #[test] + fn parse_multiline_works() { + let input = "\n hello\n world\n"; + let expected = vec!["hello", "world"]; + let output = parse_multiline(input); + assert_eq!(output, expected); + } + + #[test] + fn formatter_basic_example_works() { + let mut fmt = Formatter::new(); + fmt.line("Hello line 1"); + fmt.indent_push(); + fmt.comment("Nested comment"); + fmt.indent_pop(); + fmt.line("Back home again"); + let expected_lines = vec![ + "Hello line 1\n", + " // Nested comment\n", + "Back home again\n", + ]; + assert_eq!(fmt.lines, expected_lines); + } + + #[test] + fn get_indent_works() { + let mut fmt = Formatter::new(); + let expected_results = vec!["", " ", " ", ""]; + + let actual_results = Vec::with_capacity(4); + (0..3).for_each(|_| { + fmt.get_indent(); + fmt.indent_push(); + }); + (0..3).for_each(|_| fmt.indent_pop()); + fmt.get_indent(); + + actual_results + .into_iter() + .zip(expected_results.into_iter()) + .for_each(|(actual, expected): (String, &str)| assert_eq!(&actual, expected)); + } + + #[test] + fn fmt_can_add_type_to_lines() { + let mut fmt = Formatter::new(); + fmt.line(format!("pub const {}: Type = Type({:#x});", "example", 0,)); + let expected_lines = vec!["pub const example: Type = Type(0x0);\n"]; + assert_eq!(fmt.lines, expected_lines); + } + + #[test] + fn fmt_can_add_indented_line() { + let mut fmt = Formatter::new(); + fmt.line("hello"); + fmt.indent_push(); + fmt.line("world"); + let expected_lines = vec!["hello\n", " world\n"]; + assert_eq!(fmt.lines, expected_lines); + } + + #[test] + fn fmt_can_add_doc_comments() { + let mut fmt = Formatter::new(); + fmt.doc_comment("documentation\nis\ngood"); + let expected_lines = vec!["/// documentation\n", "/// is\n", "/// good\n"]; + assert_eq!(fmt.lines, expected_lines); + } + + #[test] + fn fmt_can_add_doc_comments_with_empty_lines() { + let mut fmt = Formatter::new(); + fmt.doc_comment( + r#"documentation + can be really good. + + If you stick to writing it. +"#, + ); + let expected_lines = from_raw_string( + r#" +/// documentation +/// can be really good. +/// +/// If you stick to writing it."#, + ); + assert_eq!(fmt.lines, expected_lines); + } +} diff --git a/third_party/rust/cranelift-codegen-meta/src/unique_table.rs b/third_party/rust/cranelift-codegen-meta/src/unique_table.rs new file mode 100644 index 0000000000..65ef7e8b4a --- /dev/null +++ b/third_party/rust/cranelift-codegen-meta/src/unique_table.rs @@ -0,0 +1,141 @@ +//! An index-accessed table implementation that avoids duplicate entries. +use std::collections::HashMap; +use std::hash::Hash; +use std::slice; + +/// Collect items into the `table` list, removing duplicates. +pub(crate) struct UniqueTable<'entries, T: Eq + Hash> { + table: Vec<&'entries T>, + map: HashMap<&'entries T, usize>, +} + +impl<'entries, T: Eq + Hash> UniqueTable<'entries, T> { + pub fn new() -> Self { + Self { + table: Vec::new(), + map: HashMap::new(), + } + } + + pub fn add(&mut self, entry: &'entries T) -> usize { + match self.map.get(&entry) { + None => { + let i = self.table.len(); + self.table.push(entry); + self.map.insert(entry, i); + i + } + Some(&i) => i, + } + } + + pub fn len(&self) -> usize { + self.table.len() + } + pub fn get(&self, index: usize) -> &T { + self.table[index] + } + pub fn iter(&self) -> slice::Iter<&'entries T> { + self.table.iter() + } +} + +/// A table of sequences which tries to avoid common subsequences. +pub(crate) struct UniqueSeqTable<T: PartialEq + Clone> { + table: Vec<T>, +} + +impl<T: PartialEq + Clone> UniqueSeqTable<T> { + pub fn new() -> Self { + Self { table: Vec::new() } + } + pub fn add(&mut self, values: &[T]) -> usize { + if values.is_empty() { + return 0; + } + if let Some(offset) = find_subsequence(values, &self.table) { + offset + } else { + let table_len = self.table.len(); + + // Try to put in common the last elements of the table if they're a prefix of the new + // sequence. + // + // We know there wasn't a full match, so the best prefix we can hope to find contains + // all the values but the last one. + let mut start_from = usize::min(table_len, values.len() - 1); + while start_from != 0 { + // Loop invariant: start_from <= table_len, so table_len - start_from >= 0. + if values[0..start_from] == self.table[table_len - start_from..table_len] { + break; + } + start_from -= 1; + } + + self.table + .extend(values[start_from..values.len()].iter().cloned()); + table_len - start_from + } + } + pub fn len(&self) -> usize { + self.table.len() + } + pub fn iter(&self) -> slice::Iter<T> { + self.table.iter() + } +} + +/// Try to find the subsequence `sub` in the `whole` sequence. Returns None if +/// it's not been found, or Some(index) if it has been. Naive implementation +/// until proven we need something better. +fn find_subsequence<T: PartialEq>(sub: &[T], whole: &[T]) -> Option<usize> { + assert!(!sub.is_empty()); + // We want i + sub.len() <= whole.len(), i.e. i < whole.len() + 1 - sub.len(). + if whole.len() < sub.len() { + return None; + } + let max = whole.len() - sub.len(); + for i in 0..=max { + if whole[i..i + sub.len()] == sub[..] { + return Some(i); + } + } + None +} + +#[test] +fn test_find_subsequence() { + assert_eq!(find_subsequence(&vec![1], &vec![4]), None); + assert_eq!(find_subsequence(&vec![1], &vec![1]), Some(0)); + assert_eq!(find_subsequence(&vec![1, 2], &vec![1]), None); + assert_eq!(find_subsequence(&vec![1, 2], &vec![1, 2]), Some(0)); + assert_eq!(find_subsequence(&vec![1, 2], &vec![1, 3]), None); + assert_eq!(find_subsequence(&vec![1, 2], &vec![0, 1, 2]), Some(1)); + assert_eq!(find_subsequence(&vec![1, 2], &vec![0, 1, 3, 1]), None); + assert_eq!(find_subsequence(&vec![1, 2], &vec![0, 1, 3, 1, 2]), Some(3)); + assert_eq!( + find_subsequence(&vec![1, 1, 3], &vec![1, 1, 1, 3, 3]), + Some(1) + ); +} + +#[test] +fn test_optimal_add() { + let mut seq_table = UniqueSeqTable::new(); + // [0, 1, 2, 3] + assert_eq!(seq_table.add(&vec![0, 1, 2, 3]), 0); + assert_eq!(seq_table.add(&vec![0, 1, 2, 3]), 0); + assert_eq!(seq_table.add(&vec![1, 2, 3]), 1); + assert_eq!(seq_table.add(&vec![2, 3]), 2); + assert_eq!(seq_table.len(), 4); + // [0, 1, 2, 3, 4] + assert_eq!(seq_table.add(&vec![2, 3, 4]), 2); + assert_eq!(seq_table.len(), 5); + // [0, 1, 2, 3, 4, 6, 5, 7] + assert_eq!(seq_table.add(&vec![4, 6, 5, 7]), 4); + assert_eq!(seq_table.len(), 8); + // [0, 1, 2, 3, 4, 6, 5, 7, 8, 2, 3, 4] + assert_eq!(seq_table.add(&vec![8, 2, 3, 4]), 8); + assert_eq!(seq_table.add(&vec![8]), 8); + assert_eq!(seq_table.len(), 12); +} |