summaryrefslogtreecommitdiffstats
path: root/third_party/rust/cranelift-codegen-meta/src
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/rust/cranelift-codegen-meta/src')
-rw-r--r--third_party/rust/cranelift-codegen-meta/src/cdsl/ast.rs753
-rw-r--r--third_party/rust/cranelift-codegen-meta/src/cdsl/cpu_modes.rs88
-rw-r--r--third_party/rust/cranelift-codegen-meta/src/cdsl/encodings.rs179
-rw-r--r--third_party/rust/cranelift-codegen-meta/src/cdsl/formats.rs171
-rw-r--r--third_party/rust/cranelift-codegen-meta/src/cdsl/instructions.rs1395
-rw-r--r--third_party/rust/cranelift-codegen-meta/src/cdsl/isa.rs99
-rw-r--r--third_party/rust/cranelift-codegen-meta/src/cdsl/mod.rs89
-rw-r--r--third_party/rust/cranelift-codegen-meta/src/cdsl/operands.rs173
-rw-r--r--third_party/rust/cranelift-codegen-meta/src/cdsl/recipes.rs298
-rw-r--r--third_party/rust/cranelift-codegen-meta/src/cdsl/regs.rs412
-rw-r--r--third_party/rust/cranelift-codegen-meta/src/cdsl/settings.rs407
-rw-r--r--third_party/rust/cranelift-codegen-meta/src/cdsl/type_inference.rs660
-rw-r--r--third_party/rust/cranelift-codegen-meta/src/cdsl/types.rs587
-rw-r--r--third_party/rust/cranelift-codegen-meta/src/cdsl/typevar.rs1274
-rw-r--r--third_party/rust/cranelift-codegen-meta/src/cdsl/xform.rs484
-rw-r--r--third_party/rust/cranelift-codegen-meta/src/default_map.rs20
-rw-r--r--third_party/rust/cranelift-codegen-meta/src/error.rs48
-rw-r--r--third_party/rust/cranelift-codegen-meta/src/gen_binemit.rs224
-rw-r--r--third_party/rust/cranelift-codegen-meta/src/gen_encodings.rs1139
-rw-r--r--third_party/rust/cranelift-codegen-meta/src/gen_inst.rs1184
-rw-r--r--third_party/rust/cranelift-codegen-meta/src/gen_legalizer.rs734
-rw-r--r--third_party/rust/cranelift-codegen-meta/src/gen_registers.rs148
-rw-r--r--third_party/rust/cranelift-codegen-meta/src/gen_settings.rs447
-rw-r--r--third_party/rust/cranelift-codegen-meta/src/gen_types.rs76
-rw-r--r--third_party/rust/cranelift-codegen-meta/src/isa/arm32/mod.rs88
-rw-r--r--third_party/rust/cranelift-codegen-meta/src/isa/arm64/mod.rs79
-rw-r--r--third_party/rust/cranelift-codegen-meta/src/isa/mod.rs67
-rw-r--r--third_party/rust/cranelift-codegen-meta/src/isa/riscv/encodings.rs431
-rw-r--r--third_party/rust/cranelift-codegen-meta/src/isa/riscv/mod.rs134
-rw-r--r--third_party/rust/cranelift-codegen-meta/src/isa/riscv/recipes.rs279
-rw-r--r--third_party/rust/cranelift-codegen-meta/src/isa/x86/encodings.rs2726
-rw-r--r--third_party/rust/cranelift-codegen-meta/src/isa/x86/instructions.rs723
-rw-r--r--third_party/rust/cranelift-codegen-meta/src/isa/x86/legalize.rs829
-rw-r--r--third_party/rust/cranelift-codegen-meta/src/isa/x86/mod.rs88
-rw-r--r--third_party/rust/cranelift-codegen-meta/src/isa/x86/opcodes.rs721
-rw-r--r--third_party/rust/cranelift-codegen-meta/src/isa/x86/recipes.rs3445
-rw-r--r--third_party/rust/cranelift-codegen-meta/src/isa/x86/registers.rs43
-rw-r--r--third_party/rust/cranelift-codegen-meta/src/isa/x86/settings.rs135
-rw-r--r--third_party/rust/cranelift-codegen-meta/src/lib.rs124
-rw-r--r--third_party/rust/cranelift-codegen-meta/src/shared/entities.rs73
-rw-r--r--third_party/rust/cranelift-codegen-meta/src/shared/formats.rs330
-rw-r--r--third_party/rust/cranelift-codegen-meta/src/shared/immediates.rs175
-rw-r--r--third_party/rust/cranelift-codegen-meta/src/shared/instructions.rs4514
-rw-r--r--third_party/rust/cranelift-codegen-meta/src/shared/legalize.rs1087
-rw-r--r--third_party/rust/cranelift-codegen-meta/src/shared/mod.rs101
-rw-r--r--third_party/rust/cranelift-codegen-meta/src/shared/settings.rs287
-rw-r--r--third_party/rust/cranelift-codegen-meta/src/shared/types.rs236
-rw-r--r--third_party/rust/cranelift-codegen-meta/src/srcgen.rs484
-rw-r--r--third_party/rust/cranelift-codegen-meta/src/unique_table.rs141
49 files changed, 28429 insertions, 0 deletions
diff --git a/third_party/rust/cranelift-codegen-meta/src/cdsl/ast.rs b/third_party/rust/cranelift-codegen-meta/src/cdsl/ast.rs
new file mode 100644
index 0000000000..82cdbad762
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/cdsl/ast.rs
@@ -0,0 +1,753 @@
+use crate::cdsl::instructions::{InstSpec, Instruction, InstructionPredicate};
+use crate::cdsl::operands::{OperandKind, OperandKindFields};
+use crate::cdsl::types::ValueType;
+use crate::cdsl::typevar::{TypeSetBuilder, TypeVar};
+
+use cranelift_entity::{entity_impl, PrimaryMap, SparseMap, SparseMapValue};
+
+use std::fmt;
+use std::iter::IntoIterator;
+
+pub(crate) enum Expr {
+ Var(VarIndex),
+ Literal(Literal),
+}
+
+impl Expr {
+ pub fn maybe_literal(&self) -> Option<&Literal> {
+ match &self {
+ Expr::Literal(lit) => Some(lit),
+ _ => None,
+ }
+ }
+
+ pub fn maybe_var(&self) -> Option<VarIndex> {
+ if let Expr::Var(var) = &self {
+ Some(*var)
+ } else {
+ None
+ }
+ }
+
+ pub fn unwrap_var(&self) -> VarIndex {
+ self.maybe_var()
+ .expect("tried to unwrap a non-Var content in Expr::unwrap_var")
+ }
+
+ pub fn to_rust_code(&self, var_pool: &VarPool) -> String {
+ match self {
+ Expr::Var(var_index) => var_pool.get(*var_index).to_rust_code(),
+ Expr::Literal(literal) => literal.to_rust_code(),
+ }
+ }
+}
+
+/// An AST definition associates a set of variables with the values produced by an expression.
+pub(crate) struct Def {
+ pub apply: Apply,
+ pub defined_vars: Vec<VarIndex>,
+}
+
+impl Def {
+ pub fn to_comment_string(&self, var_pool: &VarPool) -> String {
+ let results = self
+ .defined_vars
+ .iter()
+ .map(|&x| var_pool.get(x).name.as_str())
+ .collect::<Vec<_>>();
+
+ let results = if results.len() == 1 {
+ results[0].to_string()
+ } else {
+ format!("({})", results.join(", "))
+ };
+
+ format!("{} := {}", results, self.apply.to_comment_string(var_pool))
+ }
+}
+
+pub(crate) struct DefPool {
+ pool: PrimaryMap<DefIndex, Def>,
+}
+
+impl DefPool {
+ pub fn new() -> Self {
+ Self {
+ pool: PrimaryMap::new(),
+ }
+ }
+ pub fn get(&self, index: DefIndex) -> &Def {
+ self.pool.get(index).unwrap()
+ }
+ pub fn next_index(&self) -> DefIndex {
+ self.pool.next_key()
+ }
+ pub fn create_inst(&mut self, apply: Apply, defined_vars: Vec<VarIndex>) -> DefIndex {
+ self.pool.push(Def {
+ apply,
+ defined_vars,
+ })
+ }
+}
+
+#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
+pub(crate) struct DefIndex(u32);
+entity_impl!(DefIndex);
+
+/// A definition which would lead to generate a block creation.
+#[derive(Clone)]
+pub(crate) struct Block {
+ /// Instruction index after which the block entry is set.
+ pub location: DefIndex,
+ /// Variable holding the new created block.
+ pub name: VarIndex,
+}
+
+pub(crate) struct BlockPool {
+ pool: SparseMap<DefIndex, Block>,
+}
+
+impl SparseMapValue<DefIndex> for Block {
+ fn key(&self) -> DefIndex {
+ self.location
+ }
+}
+
+impl BlockPool {
+ pub fn new() -> Self {
+ Self {
+ pool: SparseMap::new(),
+ }
+ }
+ pub fn get(&self, index: DefIndex) -> Option<&Block> {
+ self.pool.get(index)
+ }
+ pub fn create_block(&mut self, name: VarIndex, location: DefIndex) {
+ if self.pool.contains_key(location) {
+ panic!("Attempt to insert 2 blocks after the same instruction")
+ }
+ self.pool.insert(Block { location, name });
+ }
+ pub fn is_empty(&self) -> bool {
+ self.pool.is_empty()
+ }
+}
+
+// Implement IntoIterator such that we can iterate over blocks which are in the block pool.
+impl<'a> IntoIterator for &'a BlockPool {
+ type Item = <&'a SparseMap<DefIndex, Block> as IntoIterator>::Item;
+ type IntoIter = <&'a SparseMap<DefIndex, Block> as IntoIterator>::IntoIter;
+
+ fn into_iter(self) -> Self::IntoIter {
+ self.pool.into_iter()
+ }
+}
+
+#[derive(Clone, Debug)]
+pub(crate) enum Literal {
+ /// A value of an enumerated immediate operand.
+ ///
+ /// Some immediate operand kinds like `intcc` and `floatcc` have an enumerated range of values
+ /// corresponding to a Rust enum type. An `Enumerator` object is an AST leaf node representing one
+ /// of the values.
+ Enumerator {
+ rust_type: &'static str,
+ value: &'static str,
+ },
+
+ /// A bitwise value of an immediate operand, used for bitwise exact floating point constants.
+ Bits { rust_type: &'static str, value: u64 },
+
+ /// A value of an integer immediate operand.
+ Int(i64),
+
+ /// A empty list of variable set of arguments.
+ EmptyVarArgs,
+}
+
+impl Literal {
+ pub fn enumerator_for(kind: &OperandKind, value: &'static str) -> Self {
+ let value = match &kind.fields {
+ OperandKindFields::ImmEnum(values) => values.get(value).unwrap_or_else(|| {
+ panic!(
+ "nonexistent value '{}' in enumeration '{}'",
+ value, kind.rust_type
+ )
+ }),
+ _ => panic!("enumerator is for enum values"),
+ };
+ Literal::Enumerator {
+ rust_type: kind.rust_type,
+ value,
+ }
+ }
+
+ pub fn bits(kind: &OperandKind, bits: u64) -> Self {
+ match kind.fields {
+ OperandKindFields::ImmValue => {}
+ _ => panic!("bits_of is for immediate scalar types"),
+ }
+ Literal::Bits {
+ rust_type: kind.rust_type,
+ value: bits,
+ }
+ }
+
+ pub fn constant(kind: &OperandKind, value: i64) -> Self {
+ match kind.fields {
+ OperandKindFields::ImmValue => {}
+ _ => panic!("constant is for immediate scalar types"),
+ }
+ Literal::Int(value)
+ }
+
+ pub fn empty_vararg() -> Self {
+ Literal::EmptyVarArgs
+ }
+
+ pub fn to_rust_code(&self) -> String {
+ match self {
+ Literal::Enumerator { rust_type, value } => format!("{}::{}", rust_type, value),
+ Literal::Bits { rust_type, value } => format!("{}::with_bits({:#x})", rust_type, value),
+ Literal::Int(val) => val.to_string(),
+ Literal::EmptyVarArgs => "&[]".into(),
+ }
+ }
+}
+
+#[derive(Clone, Copy, Debug)]
+pub(crate) enum PatternPosition {
+ Source,
+ Destination,
+}
+
+/// A free variable.
+///
+/// When variables are used in `XForms` with source and destination patterns, they are classified
+/// as follows:
+///
+/// Input values: Uses in the source pattern with no preceding def. These may appear as inputs in
+/// the destination pattern too, but no new inputs can be introduced.
+///
+/// Output values: Variables that are defined in both the source and destination pattern. These
+/// values may have uses outside the source pattern, and the destination pattern must compute the
+/// same value.
+///
+/// Intermediate values: Values that are defined in the source pattern, but not in the destination
+/// pattern. These may have uses outside the source pattern, so the defining instruction can't be
+/// deleted immediately.
+///
+/// Temporary values are defined only in the destination pattern.
+pub(crate) struct Var {
+ pub name: String,
+
+ /// The `Def` defining this variable in a source pattern.
+ pub src_def: Option<DefIndex>,
+
+ /// The `Def` defining this variable in a destination pattern.
+ pub dst_def: Option<DefIndex>,
+
+ /// TypeVar representing the type of this variable.
+ type_var: Option<TypeVar>,
+
+ /// Is this the original type variable, or has it be redefined with set_typevar?
+ is_original_type_var: bool,
+}
+
+impl Var {
+ fn new(name: String) -> Self {
+ Self {
+ name,
+ src_def: None,
+ dst_def: None,
+ type_var: None,
+ is_original_type_var: false,
+ }
+ }
+
+ /// Is this an input value to the src pattern?
+ pub fn is_input(&self) -> bool {
+ self.src_def.is_none() && self.dst_def.is_none()
+ }
+
+ /// Is this an output value, defined in both src and dst patterns?
+ pub fn is_output(&self) -> bool {
+ self.src_def.is_some() && self.dst_def.is_some()
+ }
+
+ /// Is this an intermediate value, defined only in the src pattern?
+ pub fn is_intermediate(&self) -> bool {
+ self.src_def.is_some() && self.dst_def.is_none()
+ }
+
+ /// Is this a temp value, defined only in the dst pattern?
+ pub fn is_temp(&self) -> bool {
+ self.src_def.is_none() && self.dst_def.is_some()
+ }
+
+ /// Get the def of this variable according to the position.
+ pub fn get_def(&self, position: PatternPosition) -> Option<DefIndex> {
+ match position {
+ PatternPosition::Source => self.src_def,
+ PatternPosition::Destination => self.dst_def,
+ }
+ }
+
+ pub fn set_def(&mut self, position: PatternPosition, def: DefIndex) {
+ assert!(
+ self.get_def(position).is_none(),
+ format!("redefinition of variable {}", self.name)
+ );
+ match position {
+ PatternPosition::Source => {
+ self.src_def = Some(def);
+ }
+ PatternPosition::Destination => {
+ self.dst_def = Some(def);
+ }
+ }
+ }
+
+ /// Get the type variable representing the type of this variable.
+ pub fn get_or_create_typevar(&mut self) -> TypeVar {
+ match &self.type_var {
+ Some(tv) => tv.clone(),
+ None => {
+ // Create a new type var in which we allow all types.
+ let tv = TypeVar::new(
+ format!("typeof_{}", self.name),
+ format!("Type of the pattern variable {:?}", self),
+ TypeSetBuilder::all(),
+ );
+ self.type_var = Some(tv.clone());
+ self.is_original_type_var = true;
+ tv
+ }
+ }
+ }
+ pub fn get_typevar(&self) -> Option<TypeVar> {
+ self.type_var.clone()
+ }
+ pub fn set_typevar(&mut self, tv: TypeVar) {
+ self.is_original_type_var = if let Some(previous_tv) = &self.type_var {
+ *previous_tv == tv
+ } else {
+ false
+ };
+ self.type_var = Some(tv);
+ }
+
+ /// Check if this variable has a free type variable. If not, the type of this variable is
+ /// computed from the type of another variable.
+ pub fn has_free_typevar(&self) -> bool {
+ match &self.type_var {
+ Some(tv) => tv.base.is_none() && self.is_original_type_var,
+ None => false,
+ }
+ }
+
+ pub fn to_rust_code(&self) -> String {
+ self.name.clone()
+ }
+ fn rust_type(&self) -> String {
+ self.type_var.as_ref().unwrap().to_rust_code()
+ }
+}
+
+impl fmt::Debug for Var {
+ fn fmt(&self, fmt: &mut fmt::Formatter) -> Result<(), fmt::Error> {
+ fmt.write_fmt(format_args!(
+ "Var({}{}{})",
+ self.name,
+ if self.src_def.is_some() { ", src" } else { "" },
+ if self.dst_def.is_some() { ", dst" } else { "" }
+ ))
+ }
+}
+
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
+pub(crate) struct VarIndex(u32);
+entity_impl!(VarIndex);
+
+pub(crate) struct VarPool {
+ pool: PrimaryMap<VarIndex, Var>,
+}
+
+impl VarPool {
+ pub fn new() -> Self {
+ Self {
+ pool: PrimaryMap::new(),
+ }
+ }
+ pub fn get(&self, index: VarIndex) -> &Var {
+ self.pool.get(index).unwrap()
+ }
+ pub fn get_mut(&mut self, index: VarIndex) -> &mut Var {
+ self.pool.get_mut(index).unwrap()
+ }
+ pub fn create(&mut self, name: impl Into<String>) -> VarIndex {
+ self.pool.push(Var::new(name.into()))
+ }
+}
+
+/// Contains constants created in the AST that must be inserted into the true [ConstantPool] when
+/// the legalizer code is generated. The constant data is named in the order it is inserted;
+/// inserting data using [insert] will avoid duplicates.
+///
+/// [ConstantPool]: ../../../cranelift_codegen/ir/constant/struct.ConstantPool.html
+/// [insert]: ConstPool::insert
+pub(crate) struct ConstPool {
+ pool: Vec<Vec<u8>>,
+}
+
+impl ConstPool {
+ /// Create an empty constant pool.
+ pub fn new() -> Self {
+ Self { pool: vec![] }
+ }
+
+ /// Create a name for a constant from its position in the pool.
+ fn create_name(position: usize) -> String {
+ format!("const{}", position)
+ }
+
+ /// Insert constant data into the pool, returning the name of the variable used to reference it.
+ /// This method will search for data that matches the new data and return the existing constant
+ /// name to avoid duplicates.
+ pub fn insert(&mut self, data: Vec<u8>) -> String {
+ let possible_position = self.pool.iter().position(|d| d == &data);
+ let position = if let Some(found_position) = possible_position {
+ found_position
+ } else {
+ let new_position = self.pool.len();
+ self.pool.push(data);
+ new_position
+ };
+ ConstPool::create_name(position)
+ }
+
+ /// Iterate over the name/value pairs in the pool.
+ pub fn iter(&self) -> impl Iterator<Item = (String, &Vec<u8>)> {
+ self.pool
+ .iter()
+ .enumerate()
+ .map(|(i, v)| (ConstPool::create_name(i), v))
+ }
+}
+
+/// Apply an instruction to arguments.
+///
+/// An `Apply` AST expression is created by using function call syntax on instructions. This
+/// applies to both bound and unbound polymorphic instructions.
+pub(crate) struct Apply {
+ pub inst: Instruction,
+ pub args: Vec<Expr>,
+ pub value_types: Vec<ValueType>,
+}
+
+impl Apply {
+ pub fn new(target: InstSpec, args: Vec<Expr>) -> Self {
+ let (inst, value_types) = match target {
+ InstSpec::Inst(inst) => (inst, Vec::new()),
+ InstSpec::Bound(bound_inst) => (bound_inst.inst, bound_inst.value_types),
+ };
+
+ // Apply should only operate on concrete value types, not "any".
+ let value_types = value_types
+ .into_iter()
+ .map(|vt| vt.expect("shouldn't be Any"))
+ .collect();
+
+ // Basic check on number of arguments.
+ assert!(
+ inst.operands_in.len() == args.len(),
+ format!("incorrect number of arguments in instruction {}", inst.name)
+ );
+
+ // Check that the kinds of Literals arguments match the expected operand.
+ for &imm_index in &inst.imm_opnums {
+ let arg = &args[imm_index];
+ if let Some(literal) = arg.maybe_literal() {
+ let op = &inst.operands_in[imm_index];
+ match &op.kind.fields {
+ OperandKindFields::ImmEnum(values) => {
+ if let Literal::Enumerator { value, .. } = literal {
+ assert!(
+ values.iter().any(|(_key, v)| v == value),
+ "Nonexistent enum value '{}' passed to field of kind '{}' -- \
+ did you use the right enum?",
+ value,
+ op.kind.rust_type
+ );
+ } else {
+ panic!(
+ "Passed non-enum field value {:?} to field of kind {}",
+ literal, op.kind.rust_type
+ );
+ }
+ }
+ OperandKindFields::ImmValue => match &literal {
+ Literal::Enumerator { value, .. } => panic!(
+ "Expected immediate value in immediate field of kind '{}', \
+ obtained enum value '{}'",
+ op.kind.rust_type, value
+ ),
+ Literal::Bits { .. } | Literal::Int(_) | Literal::EmptyVarArgs => {}
+ },
+ _ => {
+ panic!(
+ "Literal passed to non-literal field of kind {}",
+ op.kind.rust_type
+ );
+ }
+ }
+ }
+ }
+
+ Self {
+ inst,
+ args,
+ value_types,
+ }
+ }
+
+ fn to_comment_string(&self, var_pool: &VarPool) -> String {
+ let args = self
+ .args
+ .iter()
+ .map(|arg| arg.to_rust_code(var_pool))
+ .collect::<Vec<_>>()
+ .join(", ");
+
+ let mut inst_and_bound_types = vec![self.inst.name.to_string()];
+ inst_and_bound_types.extend(self.value_types.iter().map(|vt| vt.to_string()));
+ let inst_name = inst_and_bound_types.join(".");
+
+ format!("{}({})", inst_name, args)
+ }
+
+ pub fn inst_predicate(&self, var_pool: &VarPool) -> InstructionPredicate {
+ let mut pred = InstructionPredicate::new();
+ for (format_field, &op_num) in self
+ .inst
+ .format
+ .imm_fields
+ .iter()
+ .zip(self.inst.imm_opnums.iter())
+ {
+ let arg = &self.args[op_num];
+ if arg.maybe_var().is_some() {
+ // Ignore free variables for now.
+ continue;
+ }
+ pred = pred.and(InstructionPredicate::new_is_field_equal_ast(
+ &*self.inst.format,
+ format_field,
+ arg.to_rust_code(var_pool),
+ ));
+ }
+
+ // Add checks for any bound secondary type variables. We can't check the controlling type
+ // variable this way since it may not appear as the type of an operand.
+ if self.value_types.len() > 1 {
+ let poly = self
+ .inst
+ .polymorphic_info
+ .as_ref()
+ .expect("must have polymorphic info if it has bounded types");
+ for (bound_type, type_var) in
+ self.value_types[1..].iter().zip(poly.other_typevars.iter())
+ {
+ pred = pred.and(InstructionPredicate::new_typevar_check(
+ &self.inst, type_var, bound_type,
+ ));
+ }
+ }
+
+ pred
+ }
+
+ /// Same as `inst_predicate()`, but also check the controlling type variable.
+ pub fn inst_predicate_with_ctrl_typevar(&self, var_pool: &VarPool) -> InstructionPredicate {
+ let mut pred = self.inst_predicate(var_pool);
+
+ if !self.value_types.is_empty() {
+ let bound_type = &self.value_types[0];
+ let poly = self.inst.polymorphic_info.as_ref().unwrap();
+ let type_check = if poly.use_typevar_operand {
+ InstructionPredicate::new_typevar_check(&self.inst, &poly.ctrl_typevar, bound_type)
+ } else {
+ InstructionPredicate::new_ctrl_typevar_check(&bound_type)
+ };
+ pred = pred.and(type_check);
+ }
+
+ pred
+ }
+
+ pub fn rust_builder(&self, defined_vars: &[VarIndex], var_pool: &VarPool) -> String {
+ let mut args = self
+ .args
+ .iter()
+ .map(|expr| expr.to_rust_code(var_pool))
+ .collect::<Vec<_>>()
+ .join(", ");
+
+ // Do we need to pass an explicit type argument?
+ if let Some(poly) = &self.inst.polymorphic_info {
+ if !poly.use_typevar_operand {
+ args = format!("{}, {}", var_pool.get(defined_vars[0]).rust_type(), args);
+ }
+ }
+
+ format!("{}({})", self.inst.snake_name(), args)
+ }
+}
+
+// Simple helpers for legalize actions construction.
+
+pub(crate) enum DummyExpr {
+ Var(DummyVar),
+ Literal(Literal),
+ Constant(DummyConstant),
+ Apply(InstSpec, Vec<DummyExpr>),
+ Block(DummyVar),
+}
+
+#[derive(Clone)]
+pub(crate) struct DummyVar {
+ pub name: String,
+}
+
+impl Into<DummyExpr> for DummyVar {
+ fn into(self) -> DummyExpr {
+ DummyExpr::Var(self)
+ }
+}
+impl Into<DummyExpr> for Literal {
+ fn into(self) -> DummyExpr {
+ DummyExpr::Literal(self)
+ }
+}
+
+#[derive(Clone)]
+pub(crate) struct DummyConstant(pub(crate) Vec<u8>);
+
+pub(crate) fn constant(data: Vec<u8>) -> DummyConstant {
+ DummyConstant(data)
+}
+
+impl Into<DummyExpr> for DummyConstant {
+ fn into(self) -> DummyExpr {
+ DummyExpr::Constant(self)
+ }
+}
+
+pub(crate) fn var(name: &str) -> DummyVar {
+ DummyVar {
+ name: name.to_owned(),
+ }
+}
+
+pub(crate) struct DummyDef {
+ pub expr: DummyExpr,
+ pub defined_vars: Vec<DummyVar>,
+}
+
+pub(crate) struct ExprBuilder {
+ expr: DummyExpr,
+}
+
+impl ExprBuilder {
+ pub fn apply(inst: InstSpec, args: Vec<DummyExpr>) -> Self {
+ let expr = DummyExpr::Apply(inst, args);
+ Self { expr }
+ }
+
+ pub fn assign_to(self, defined_vars: Vec<DummyVar>) -> DummyDef {
+ DummyDef {
+ expr: self.expr,
+ defined_vars,
+ }
+ }
+
+ pub fn block(name: DummyVar) -> Self {
+ let expr = DummyExpr::Block(name);
+ Self { expr }
+ }
+}
+
+macro_rules! def_rhs {
+ // inst(a, b, c)
+ ($inst:ident($($src:expr),*)) => {
+ ExprBuilder::apply($inst.into(), vec![$($src.clone().into()),*])
+ };
+
+ // inst.type(a, b, c)
+ ($inst:ident.$type:ident($($src:expr),*)) => {
+ ExprBuilder::apply($inst.bind($type).into(), vec![$($src.clone().into()),*])
+ };
+}
+
+// Helper macro to define legalization recipes.
+macro_rules! def {
+ // x = ...
+ ($dest:ident = $($tt:tt)*) => {
+ def_rhs!($($tt)*).assign_to(vec![$dest.clone()])
+ };
+
+ // (x, y, ...) = ...
+ (($($dest:ident),*) = $($tt:tt)*) => {
+ def_rhs!($($tt)*).assign_to(vec![$($dest.clone()),*])
+ };
+
+ // An instruction with no results.
+ ($($tt:tt)*) => {
+ def_rhs!($($tt)*).assign_to(Vec::new())
+ }
+}
+
+// Helper macro to define legalization recipes.
+macro_rules! block {
+ // a basic block definition, splitting the current block in 2.
+ ($block: ident) => {
+ ExprBuilder::block($block).assign_to(Vec::new())
+ };
+}
+
+#[cfg(test)]
+mod tests {
+ use crate::cdsl::ast::ConstPool;
+
+ #[test]
+ fn const_pool_returns_var_names() {
+ let mut c = ConstPool::new();
+ assert_eq!(c.insert([0, 1, 2].to_vec()), "const0");
+ assert_eq!(c.insert([1, 2, 3].to_vec()), "const1");
+ }
+
+ #[test]
+ fn const_pool_avoids_duplicates() {
+ let data = [0, 1, 2].to_vec();
+ let mut c = ConstPool::new();
+ assert_eq!(c.pool.len(), 0);
+
+ assert_eq!(c.insert(data.clone()), "const0");
+ assert_eq!(c.pool.len(), 1);
+
+ assert_eq!(c.insert(data), "const0");
+ assert_eq!(c.pool.len(), 1);
+ }
+
+ #[test]
+ fn const_pool_iterates() {
+ let mut c = ConstPool::new();
+ c.insert([0, 1, 2].to_vec());
+ c.insert([3, 4, 5].to_vec());
+
+ let mut iter = c.iter();
+ assert_eq!(iter.next(), Some(("const0".to_owned(), &vec![0, 1, 2])));
+ assert_eq!(iter.next(), Some(("const1".to_owned(), &vec![3, 4, 5])));
+ assert_eq!(iter.next(), None);
+ }
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/cdsl/cpu_modes.rs b/third_party/rust/cranelift-codegen-meta/src/cdsl/cpu_modes.rs
new file mode 100644
index 0000000000..7d119b00ce
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/cdsl/cpu_modes.rs
@@ -0,0 +1,88 @@
+use std::collections::{hash_map, HashMap, HashSet};
+use std::iter::FromIterator;
+
+use crate::cdsl::encodings::Encoding;
+use crate::cdsl::types::{LaneType, ValueType};
+use crate::cdsl::xform::{TransformGroup, TransformGroupIndex};
+
+pub(crate) struct CpuMode {
+ pub name: &'static str,
+ default_legalize: Option<TransformGroupIndex>,
+ monomorphic_legalize: Option<TransformGroupIndex>,
+ typed_legalize: HashMap<ValueType, TransformGroupIndex>,
+ pub encodings: Vec<Encoding>,
+}
+
+impl CpuMode {
+ pub fn new(name: &'static str) -> Self {
+ Self {
+ name,
+ default_legalize: None,
+ monomorphic_legalize: None,
+ typed_legalize: HashMap::new(),
+ encodings: Vec::new(),
+ }
+ }
+
+ pub fn set_encodings(&mut self, encodings: Vec<Encoding>) {
+ assert!(self.encodings.is_empty(), "clobbering encodings");
+ self.encodings = encodings;
+ }
+
+ pub fn legalize_monomorphic(&mut self, group: &TransformGroup) {
+ assert!(self.monomorphic_legalize.is_none());
+ self.monomorphic_legalize = Some(group.id);
+ }
+ pub fn legalize_default(&mut self, group: &TransformGroup) {
+ assert!(self.default_legalize.is_none());
+ self.default_legalize = Some(group.id);
+ }
+ pub fn legalize_value_type(&mut self, lane_type: impl Into<ValueType>, group: &TransformGroup) {
+ assert!(self
+ .typed_legalize
+ .insert(lane_type.into(), group.id)
+ .is_none());
+ }
+ pub fn legalize_type(&mut self, lane_type: impl Into<LaneType>, group: &TransformGroup) {
+ assert!(self
+ .typed_legalize
+ .insert(lane_type.into().into(), group.id)
+ .is_none());
+ }
+
+ pub fn get_default_legalize_code(&self) -> TransformGroupIndex {
+ self.default_legalize
+ .expect("a finished CpuMode must have a default legalize code")
+ }
+ pub fn get_legalize_code_for(&self, typ: &Option<ValueType>) -> TransformGroupIndex {
+ match typ {
+ Some(typ) => self
+ .typed_legalize
+ .get(typ)
+ .copied()
+ .unwrap_or_else(|| self.get_default_legalize_code()),
+ None => self
+ .monomorphic_legalize
+ .unwrap_or_else(|| self.get_default_legalize_code()),
+ }
+ }
+ pub fn get_legalized_types(&self) -> hash_map::Keys<ValueType, TransformGroupIndex> {
+ self.typed_legalize.keys()
+ }
+
+ /// Returns a deterministically ordered, deduplicated list of TransformGroupIndex for the directly
+ /// reachable set of TransformGroup this TargetIsa uses.
+ pub fn direct_transform_groups(&self) -> Vec<TransformGroupIndex> {
+ let mut set = HashSet::new();
+ if let Some(i) = &self.default_legalize {
+ set.insert(*i);
+ }
+ if let Some(i) = &self.monomorphic_legalize {
+ set.insert(*i);
+ }
+ set.extend(self.typed_legalize.values().cloned());
+ let mut ret = Vec::from_iter(set);
+ ret.sort();
+ ret
+ }
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/cdsl/encodings.rs b/third_party/rust/cranelift-codegen-meta/src/cdsl/encodings.rs
new file mode 100644
index 0000000000..f66746f92f
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/cdsl/encodings.rs
@@ -0,0 +1,179 @@
+use crate::cdsl::instructions::{
+ InstSpec, Instruction, InstructionPredicate, InstructionPredicateNode,
+ InstructionPredicateNumber, InstructionPredicateRegistry, ValueTypeOrAny,
+};
+use crate::cdsl::recipes::{EncodingRecipeNumber, Recipes};
+use crate::cdsl::settings::SettingPredicateNumber;
+use crate::cdsl::types::ValueType;
+use std::rc::Rc;
+use std::string::ToString;
+
+/// Encoding for a concrete instruction.
+///
+/// An `Encoding` object ties an instruction opcode with concrete type variables together with an
+/// encoding recipe and encoding encbits.
+///
+/// The concrete instruction can be in three different forms:
+///
+/// 1. A naked opcode: `trap` for non-polymorphic instructions.
+/// 2. With bound type variables: `iadd.i32` for polymorphic instructions.
+/// 3. With operands providing constraints: `icmp.i32(intcc.eq, x, y)`.
+///
+/// If the instruction is polymorphic, all type variables must be provided.
+pub(crate) struct EncodingContent {
+ /// The `Instruction` or `BoundInstruction` being encoded.
+ inst: InstSpec,
+
+ /// The `EncodingRecipe` to use.
+ pub recipe: EncodingRecipeNumber,
+
+ /// Additional encoding bits to be interpreted by `recipe`.
+ pub encbits: u16,
+
+ /// An instruction predicate that must be true to allow selecting this encoding.
+ pub inst_predicate: Option<InstructionPredicateNumber>,
+
+ /// An ISA predicate that must be true to allow selecting this encoding.
+ pub isa_predicate: Option<SettingPredicateNumber>,
+
+ /// The value type this encoding has been bound to, for encodings of polymorphic instructions.
+ pub bound_type: Option<ValueType>,
+}
+
+impl EncodingContent {
+ pub fn inst(&self) -> &Instruction {
+ self.inst.inst()
+ }
+ pub fn to_rust_comment(&self, recipes: &Recipes) -> String {
+ format!("[{}#{:02x}]", recipes[self.recipe].name, self.encbits)
+ }
+}
+
+pub(crate) type Encoding = Rc<EncodingContent>;
+
+pub(crate) struct EncodingBuilder {
+ inst: InstSpec,
+ recipe: EncodingRecipeNumber,
+ encbits: u16,
+ inst_predicate: Option<InstructionPredicate>,
+ isa_predicate: Option<SettingPredicateNumber>,
+ bound_type: Option<ValueType>,
+}
+
+impl EncodingBuilder {
+ pub fn new(inst: InstSpec, recipe: EncodingRecipeNumber, encbits: u16) -> Self {
+ let (inst_predicate, bound_type) = match &inst {
+ InstSpec::Bound(inst) => {
+ let other_typevars = &inst.inst.polymorphic_info.as_ref().unwrap().other_typevars;
+
+ assert_eq!(
+ inst.value_types.len(),
+ other_typevars.len() + 1,
+ "partially bound polymorphic instruction"
+ );
+
+ // Add secondary type variables to the instruction predicate.
+ let value_types = &inst.value_types;
+ let mut inst_predicate: Option<InstructionPredicate> = None;
+ for (typevar, value_type) in other_typevars.iter().zip(value_types.iter().skip(1)) {
+ let value_type = match value_type {
+ ValueTypeOrAny::Any => continue,
+ ValueTypeOrAny::ValueType(vt) => vt,
+ };
+ let type_predicate =
+ InstructionPredicate::new_typevar_check(&inst.inst, typevar, value_type);
+ inst_predicate = Some(type_predicate.into());
+ }
+
+ // Add immediate value predicates
+ for (immediate_value, immediate_operand) in inst
+ .immediate_values
+ .iter()
+ .zip(inst.inst.operands_in.iter().filter(|o| o.is_immediate()))
+ {
+ let immediate_predicate = InstructionPredicate::new_is_field_equal(
+ &inst.inst.format,
+ immediate_operand.kind.rust_field_name,
+ immediate_value.to_string(),
+ );
+ inst_predicate = if let Some(type_predicate) = inst_predicate {
+ Some(type_predicate.and(immediate_predicate))
+ } else {
+ Some(immediate_predicate.into())
+ }
+ }
+
+ let ctrl_type = value_types[0]
+ .clone()
+ .expect("Controlling type shouldn't be Any");
+ (inst_predicate, Some(ctrl_type))
+ }
+
+ InstSpec::Inst(inst) => {
+ assert!(
+ inst.polymorphic_info.is_none(),
+ "unbound polymorphic instruction"
+ );
+ (None, None)
+ }
+ };
+
+ Self {
+ inst,
+ recipe,
+ encbits,
+ inst_predicate,
+ isa_predicate: None,
+ bound_type,
+ }
+ }
+
+ pub fn inst_predicate(mut self, inst_predicate: InstructionPredicateNode) -> Self {
+ let inst_predicate = Some(match self.inst_predicate {
+ Some(node) => node.and(inst_predicate),
+ None => inst_predicate.into(),
+ });
+ self.inst_predicate = inst_predicate;
+ self
+ }
+
+ pub fn isa_predicate(mut self, isa_predicate: SettingPredicateNumber) -> Self {
+ assert!(self.isa_predicate.is_none());
+ self.isa_predicate = Some(isa_predicate);
+ self
+ }
+
+ pub fn build(
+ self,
+ recipes: &Recipes,
+ inst_pred_reg: &mut InstructionPredicateRegistry,
+ ) -> Encoding {
+ let inst_predicate = self.inst_predicate.map(|pred| inst_pred_reg.insert(pred));
+
+ let inst = self.inst.inst();
+ assert!(
+ Rc::ptr_eq(&inst.format, &recipes[self.recipe].format),
+ format!(
+ "Inst {} and recipe {} must have the same format!",
+ inst.name, recipes[self.recipe].name
+ )
+ );
+
+ assert_eq!(
+ inst.is_branch && !inst.is_indirect_branch,
+ recipes[self.recipe].branch_range.is_some(),
+ "Inst {}'s is_branch contradicts recipe {} branch_range!",
+ inst.name,
+ recipes[self.recipe].name
+ );
+
+ Rc::new(EncodingContent {
+ inst: self.inst,
+ recipe: self.recipe,
+ encbits: self.encbits,
+ inst_predicate,
+ isa_predicate: self.isa_predicate,
+ bound_type: self.bound_type,
+ })
+ }
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/cdsl/formats.rs b/third_party/rust/cranelift-codegen-meta/src/cdsl/formats.rs
new file mode 100644
index 0000000000..e713a8bccb
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/cdsl/formats.rs
@@ -0,0 +1,171 @@
+use crate::cdsl::operands::OperandKind;
+use std::fmt;
+use std::rc::Rc;
+
+/// An immediate field in an instruction format.
+///
+/// This corresponds to a single member of a variant of the `InstructionData`
+/// data type.
+#[derive(Debug)]
+pub(crate) struct FormatField {
+ /// Immediate operand kind.
+ pub kind: OperandKind,
+
+ /// Member name in InstructionData variant.
+ pub member: &'static str,
+}
+
+/// Every instruction opcode has a corresponding instruction format which determines the number of
+/// operands and their kinds. Instruction formats are identified structurally, i.e., the format of
+/// an instruction is derived from the kinds of operands used in its declaration.
+///
+/// The instruction format stores two separate lists of operands: Immediates and values. Immediate
+/// operands (including entity references) are represented as explicit members in the
+/// `InstructionData` variants. The value operands are stored differently, depending on how many
+/// there are. Beyond a certain point, instruction formats switch to an external value list for
+/// storing value arguments. Value lists can hold an arbitrary number of values.
+///
+/// All instruction formats must be predefined in the meta shared/formats.rs module.
+#[derive(Debug)]
+pub(crate) struct InstructionFormat {
+ /// Instruction format name in CamelCase. This is used as a Rust variant name in both the
+ /// `InstructionData` and `InstructionFormat` enums.
+ pub name: &'static str,
+
+ pub num_value_operands: usize,
+
+ pub has_value_list: bool,
+
+ pub imm_fields: Vec<FormatField>,
+
+ /// Index of the value input operand that is used to infer the controlling type variable. By
+ /// default, this is `0`, the first `value` operand. The index is relative to the values only,
+ /// ignoring immediate operands.
+ pub typevar_operand: Option<usize>,
+}
+
+/// A tuple serving as a key to deduplicate InstructionFormat.
+#[derive(Hash, PartialEq, Eq)]
+pub(crate) struct FormatStructure {
+ pub num_value_operands: usize,
+ pub has_value_list: bool,
+ /// Tuples of (Rust field name / Rust type) for each immediate field.
+ pub imm_field_names: Vec<(&'static str, &'static str)>,
+}
+
+impl fmt::Display for InstructionFormat {
+ fn fmt(&self, fmt: &mut fmt::Formatter) -> Result<(), fmt::Error> {
+ let imm_args = self
+ .imm_fields
+ .iter()
+ .map(|field| format!("{}: {}", field.member, field.kind.rust_type))
+ .collect::<Vec<_>>()
+ .join(", ");
+ fmt.write_fmt(format_args!(
+ "{}(imms=({}), vals={})",
+ self.name, imm_args, self.num_value_operands
+ ))?;
+ Ok(())
+ }
+}
+
+impl InstructionFormat {
+ pub fn imm_by_name(&self, name: &'static str) -> &FormatField {
+ self.imm_fields
+ .iter()
+ .find(|&field| field.member == name)
+ .unwrap_or_else(|| {
+ panic!(
+ "unexpected immediate field named {} in instruction format {}",
+ name, self.name
+ )
+ })
+ }
+
+ /// Returns a tuple that uniquely identifies the structure.
+ pub fn structure(&self) -> FormatStructure {
+ FormatStructure {
+ num_value_operands: self.num_value_operands,
+ has_value_list: self.has_value_list,
+ imm_field_names: self
+ .imm_fields
+ .iter()
+ .map(|field| (field.kind.rust_field_name, field.kind.rust_type))
+ .collect::<Vec<_>>(),
+ }
+ }
+}
+
+pub(crate) struct InstructionFormatBuilder {
+ name: &'static str,
+ num_value_operands: usize,
+ has_value_list: bool,
+ imm_fields: Vec<FormatField>,
+ typevar_operand: Option<usize>,
+}
+
+impl InstructionFormatBuilder {
+ pub fn new(name: &'static str) -> Self {
+ Self {
+ name,
+ num_value_operands: 0,
+ has_value_list: false,
+ imm_fields: Vec::new(),
+ typevar_operand: None,
+ }
+ }
+
+ pub fn value(mut self) -> Self {
+ self.num_value_operands += 1;
+ self
+ }
+
+ pub fn varargs(mut self) -> Self {
+ self.has_value_list = true;
+ self
+ }
+
+ pub fn imm(mut self, operand_kind: &OperandKind) -> Self {
+ let field = FormatField {
+ kind: operand_kind.clone(),
+ member: operand_kind.rust_field_name,
+ };
+ self.imm_fields.push(field);
+ self
+ }
+
+ pub fn imm_with_name(mut self, member: &'static str, operand_kind: &OperandKind) -> Self {
+ let field = FormatField {
+ kind: operand_kind.clone(),
+ member,
+ };
+ self.imm_fields.push(field);
+ self
+ }
+
+ pub fn typevar_operand(mut self, operand_index: usize) -> Self {
+ assert!(self.typevar_operand.is_none());
+ assert!(self.has_value_list || operand_index < self.num_value_operands);
+ self.typevar_operand = Some(operand_index);
+ self
+ }
+
+ pub fn build(self) -> Rc<InstructionFormat> {
+ let typevar_operand = if self.typevar_operand.is_some() {
+ self.typevar_operand
+ } else if self.has_value_list || self.num_value_operands > 0 {
+ // Default to the first value operand, if there's one.
+ Some(0)
+ } else {
+ None
+ };
+
+ Rc::new(InstructionFormat {
+ name: self.name,
+ num_value_operands: self.num_value_operands,
+ has_value_list: self.has_value_list,
+ imm_fields: self.imm_fields,
+ typevar_operand,
+ })
+ }
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/cdsl/instructions.rs b/third_party/rust/cranelift-codegen-meta/src/cdsl/instructions.rs
new file mode 100644
index 0000000000..88a15c6038
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/cdsl/instructions.rs
@@ -0,0 +1,1395 @@
+use cranelift_codegen_shared::condcodes::IntCC;
+use cranelift_entity::{entity_impl, PrimaryMap};
+
+use std::collections::HashMap;
+use std::fmt;
+use std::fmt::{Display, Error, Formatter};
+use std::rc::Rc;
+
+use crate::cdsl::camel_case;
+use crate::cdsl::formats::{FormatField, InstructionFormat};
+use crate::cdsl::operands::Operand;
+use crate::cdsl::type_inference::Constraint;
+use crate::cdsl::types::{LaneType, ReferenceType, ValueType, VectorType};
+use crate::cdsl::typevar::TypeVar;
+
+use crate::shared::formats::Formats;
+use crate::shared::types::{Bool, Float, Int, Reference};
+
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
+pub(crate) struct OpcodeNumber(u32);
+entity_impl!(OpcodeNumber);
+
+pub(crate) type AllInstructions = PrimaryMap<OpcodeNumber, Instruction>;
+
+pub(crate) struct InstructionGroupBuilder<'all_inst> {
+ all_instructions: &'all_inst mut AllInstructions,
+ own_instructions: Vec<Instruction>,
+}
+
+impl<'all_inst> InstructionGroupBuilder<'all_inst> {
+ pub fn new(all_instructions: &'all_inst mut AllInstructions) -> Self {
+ Self {
+ all_instructions,
+ own_instructions: Vec::new(),
+ }
+ }
+
+ pub fn push(&mut self, builder: InstructionBuilder) {
+ let opcode_number = OpcodeNumber(self.all_instructions.next_key().as_u32());
+ let inst = builder.build(opcode_number);
+ // Note this clone is cheap, since Instruction is a Rc<> wrapper for InstructionContent.
+ self.own_instructions.push(inst.clone());
+ self.all_instructions.push(inst);
+ }
+
+ pub fn build(self) -> InstructionGroup {
+ InstructionGroup {
+ instructions: self.own_instructions,
+ }
+ }
+}
+
+/// Every instruction must belong to exactly one instruction group. A given
+/// target architecture can support instructions from multiple groups, and it
+/// does not necessarily support all instructions in a group.
+pub(crate) struct InstructionGroup {
+ instructions: Vec<Instruction>,
+}
+
+impl InstructionGroup {
+ pub fn by_name(&self, name: &'static str) -> &Instruction {
+ self.instructions
+ .iter()
+ .find(|inst| inst.name == name)
+ .unwrap_or_else(|| panic!("instruction with name '{}' does not exist", name))
+ }
+}
+
+/// Instructions can have parameters bound to them to specialize them for more specific encodings
+/// (e.g. the encoding for adding two float types may be different than that of adding two
+/// integer types)
+pub(crate) trait Bindable {
+ /// Bind a parameter to an instruction
+ fn bind(&self, parameter: impl Into<BindParameter>) -> BoundInstruction;
+}
+
+#[derive(Debug)]
+pub(crate) struct PolymorphicInfo {
+ pub use_typevar_operand: bool,
+ pub ctrl_typevar: TypeVar,
+ pub other_typevars: Vec<TypeVar>,
+}
+
+#[derive(Debug)]
+pub(crate) struct InstructionContent {
+ /// Instruction mnemonic, also becomes opcode name.
+ pub name: String,
+ pub camel_name: String,
+ pub opcode_number: OpcodeNumber,
+
+ /// Documentation string.
+ pub doc: String,
+
+ /// Input operands. This can be a mix of SSA value operands and other operand kinds.
+ pub operands_in: Vec<Operand>,
+ /// Output operands. The output operands must be SSA values or `variable_args`.
+ pub operands_out: Vec<Operand>,
+ /// Instruction-specific TypeConstraints.
+ pub constraints: Vec<Constraint>,
+
+ /// Instruction format, automatically derived from the input operands.
+ pub format: Rc<InstructionFormat>,
+
+ /// One of the input or output operands is a free type variable. None if the instruction is not
+ /// polymorphic, set otherwise.
+ pub polymorphic_info: Option<PolymorphicInfo>,
+
+ /// Indices in operands_in of input operands that are values.
+ pub value_opnums: Vec<usize>,
+ /// Indices in operands_in of input operands that are immediates or entities.
+ pub imm_opnums: Vec<usize>,
+ /// Indices in operands_out of output operands that are values.
+ pub value_results: Vec<usize>,
+
+ /// True for instructions that terminate the block.
+ pub is_terminator: bool,
+ /// True for all branch or jump instructions.
+ pub is_branch: bool,
+ /// True for all indirect branch or jump instructions.',
+ pub is_indirect_branch: bool,
+ /// Is this a call instruction?
+ pub is_call: bool,
+ /// Is this a return instruction?
+ pub is_return: bool,
+ /// Is this a ghost instruction?
+ pub is_ghost: bool,
+ /// Can this instruction read from memory?
+ pub can_load: bool,
+ /// Can this instruction write to memory?
+ pub can_store: bool,
+ /// Can this instruction cause a trap?
+ pub can_trap: bool,
+ /// Does this instruction have other side effects besides can_* flags?
+ pub other_side_effects: bool,
+ /// Does this instruction write to CPU flags?
+ pub writes_cpu_flags: bool,
+ /// Should this opcode be considered to clobber all live registers, during regalloc?
+ pub clobbers_all_regs: bool,
+}
+
+impl InstructionContent {
+ pub fn snake_name(&self) -> &str {
+ if &self.name == "return" {
+ "return_"
+ } else {
+ &self.name
+ }
+ }
+
+ pub fn all_typevars(&self) -> Vec<&TypeVar> {
+ match &self.polymorphic_info {
+ Some(poly) => {
+ let mut result = vec![&poly.ctrl_typevar];
+ result.extend(&poly.other_typevars);
+ result
+ }
+ None => Vec::new(),
+ }
+ }
+}
+
+pub(crate) type Instruction = Rc<InstructionContent>;
+
+impl Bindable for Instruction {
+ fn bind(&self, parameter: impl Into<BindParameter>) -> BoundInstruction {
+ BoundInstruction::new(self).bind(parameter)
+ }
+}
+
+impl fmt::Display for InstructionContent {
+ fn fmt(&self, fmt: &mut fmt::Formatter) -> Result<(), fmt::Error> {
+ if !self.operands_out.is_empty() {
+ let operands_out = self
+ .operands_out
+ .iter()
+ .map(|op| op.name)
+ .collect::<Vec<_>>()
+ .join(", ");
+ fmt.write_str(&operands_out)?;
+ fmt.write_str(" = ")?;
+ }
+
+ fmt.write_str(&self.name)?;
+
+ if !self.operands_in.is_empty() {
+ let operands_in = self
+ .operands_in
+ .iter()
+ .map(|op| op.name)
+ .collect::<Vec<_>>()
+ .join(", ");
+ fmt.write_str(" ")?;
+ fmt.write_str(&operands_in)?;
+ }
+
+ Ok(())
+ }
+}
+
+pub(crate) struct InstructionBuilder {
+ name: String,
+ doc: String,
+ format: Rc<InstructionFormat>,
+ operands_in: Option<Vec<Operand>>,
+ operands_out: Option<Vec<Operand>>,
+ constraints: Option<Vec<Constraint>>,
+
+ // See Instruction comments for the meaning of these fields.
+ is_terminator: bool,
+ is_branch: bool,
+ is_indirect_branch: bool,
+ is_call: bool,
+ is_return: bool,
+ is_ghost: bool,
+ can_load: bool,
+ can_store: bool,
+ can_trap: bool,
+ other_side_effects: bool,
+ clobbers_all_regs: bool,
+}
+
+impl InstructionBuilder {
+ pub fn new<S: Into<String>>(name: S, doc: S, format: &Rc<InstructionFormat>) -> Self {
+ Self {
+ name: name.into(),
+ doc: doc.into(),
+ format: format.clone(),
+ operands_in: None,
+ operands_out: None,
+ constraints: None,
+
+ is_terminator: false,
+ is_branch: false,
+ is_indirect_branch: false,
+ is_call: false,
+ is_return: false,
+ is_ghost: false,
+ can_load: false,
+ can_store: false,
+ can_trap: false,
+ other_side_effects: false,
+ clobbers_all_regs: false,
+ }
+ }
+
+ pub fn operands_in(mut self, operands: Vec<&Operand>) -> Self {
+ assert!(self.operands_in.is_none());
+ self.operands_in = Some(operands.iter().map(|x| (*x).clone()).collect());
+ self
+ }
+
+ pub fn operands_out(mut self, operands: Vec<&Operand>) -> Self {
+ assert!(self.operands_out.is_none());
+ self.operands_out = Some(operands.iter().map(|x| (*x).clone()).collect());
+ self
+ }
+
+ pub fn constraints(mut self, constraints: Vec<Constraint>) -> Self {
+ assert!(self.constraints.is_none());
+ self.constraints = Some(constraints);
+ self
+ }
+
+ #[allow(clippy::wrong_self_convention)]
+ pub fn is_terminator(mut self, val: bool) -> Self {
+ self.is_terminator = val;
+ self
+ }
+
+ #[allow(clippy::wrong_self_convention)]
+ pub fn is_branch(mut self, val: bool) -> Self {
+ self.is_branch = val;
+ self
+ }
+
+ #[allow(clippy::wrong_self_convention)]
+ pub fn is_indirect_branch(mut self, val: bool) -> Self {
+ self.is_indirect_branch = val;
+ self
+ }
+
+ #[allow(clippy::wrong_self_convention)]
+ pub fn is_call(mut self, val: bool) -> Self {
+ self.is_call = val;
+ self
+ }
+
+ #[allow(clippy::wrong_self_convention)]
+ pub fn is_return(mut self, val: bool) -> Self {
+ self.is_return = val;
+ self
+ }
+
+ #[allow(clippy::wrong_self_convention)]
+ pub fn is_ghost(mut self, val: bool) -> Self {
+ self.is_ghost = val;
+ self
+ }
+
+ pub fn can_load(mut self, val: bool) -> Self {
+ self.can_load = val;
+ self
+ }
+
+ pub fn can_store(mut self, val: bool) -> Self {
+ self.can_store = val;
+ self
+ }
+
+ pub fn can_trap(mut self, val: bool) -> Self {
+ self.can_trap = val;
+ self
+ }
+
+ pub fn other_side_effects(mut self, val: bool) -> Self {
+ self.other_side_effects = val;
+ self
+ }
+
+ pub fn clobbers_all_regs(mut self, val: bool) -> Self {
+ self.clobbers_all_regs = val;
+ self
+ }
+
+ fn build(self, opcode_number: OpcodeNumber) -> Instruction {
+ let operands_in = self.operands_in.unwrap_or_else(Vec::new);
+ let operands_out = self.operands_out.unwrap_or_else(Vec::new);
+
+ let mut value_opnums = Vec::new();
+ let mut imm_opnums = Vec::new();
+ for (i, op) in operands_in.iter().enumerate() {
+ if op.is_value() {
+ value_opnums.push(i);
+ } else if op.is_immediate_or_entityref() {
+ imm_opnums.push(i);
+ } else {
+ assert!(op.is_varargs());
+ }
+ }
+
+ let value_results = operands_out
+ .iter()
+ .enumerate()
+ .filter_map(|(i, op)| if op.is_value() { Some(i) } else { None })
+ .collect();
+
+ verify_format(&self.name, &operands_in, &self.format);
+
+ let polymorphic_info =
+ verify_polymorphic(&operands_in, &operands_out, &self.format, &value_opnums);
+
+ // Infer from output operands whether an instruction clobbers CPU flags or not.
+ let writes_cpu_flags = operands_out.iter().any(|op| op.is_cpu_flags());
+
+ let camel_name = camel_case(&self.name);
+
+ Rc::new(InstructionContent {
+ name: self.name,
+ camel_name,
+ opcode_number,
+ doc: self.doc,
+ operands_in,
+ operands_out,
+ constraints: self.constraints.unwrap_or_else(Vec::new),
+ format: self.format,
+ polymorphic_info,
+ value_opnums,
+ value_results,
+ imm_opnums,
+ is_terminator: self.is_terminator,
+ is_branch: self.is_branch,
+ is_indirect_branch: self.is_indirect_branch,
+ is_call: self.is_call,
+ is_return: self.is_return,
+ is_ghost: self.is_ghost,
+ can_load: self.can_load,
+ can_store: self.can_store,
+ can_trap: self.can_trap,
+ other_side_effects: self.other_side_effects,
+ writes_cpu_flags,
+ clobbers_all_regs: self.clobbers_all_regs,
+ })
+ }
+}
+
+/// A thin wrapper like Option<ValueType>, but with more precise semantics.
+#[derive(Clone)]
+pub(crate) enum ValueTypeOrAny {
+ ValueType(ValueType),
+ Any,
+}
+
+impl ValueTypeOrAny {
+ pub fn expect(self, msg: &str) -> ValueType {
+ match self {
+ ValueTypeOrAny::ValueType(vt) => vt,
+ ValueTypeOrAny::Any => panic!(format!("Unexpected Any: {}", msg)),
+ }
+ }
+}
+
+/// The number of bits in the vector
+type VectorBitWidth = u64;
+
+/// An parameter used for binding instructions to specific types or values
+pub(crate) enum BindParameter {
+ Any,
+ Lane(LaneType),
+ Vector(LaneType, VectorBitWidth),
+ Reference(ReferenceType),
+ Immediate(Immediate),
+}
+
+/// Constructor for more easily building vector parameters from any lane type
+pub(crate) fn vector(parameter: impl Into<LaneType>, vector_size: VectorBitWidth) -> BindParameter {
+ BindParameter::Vector(parameter.into(), vector_size)
+}
+
+impl From<Int> for BindParameter {
+ fn from(ty: Int) -> Self {
+ BindParameter::Lane(ty.into())
+ }
+}
+
+impl From<Bool> for BindParameter {
+ fn from(ty: Bool) -> Self {
+ BindParameter::Lane(ty.into())
+ }
+}
+
+impl From<Float> for BindParameter {
+ fn from(ty: Float) -> Self {
+ BindParameter::Lane(ty.into())
+ }
+}
+
+impl From<LaneType> for BindParameter {
+ fn from(ty: LaneType) -> Self {
+ BindParameter::Lane(ty)
+ }
+}
+
+impl From<Reference> for BindParameter {
+ fn from(ty: Reference) -> Self {
+ BindParameter::Reference(ty.into())
+ }
+}
+
+impl From<Immediate> for BindParameter {
+ fn from(imm: Immediate) -> Self {
+ BindParameter::Immediate(imm)
+ }
+}
+
+#[derive(Clone)]
+pub(crate) enum Immediate {
+ // When needed, this enum should be expanded to include other immediate types (e.g. u8, u128).
+ IntCC(IntCC),
+}
+
+impl Display for Immediate {
+ fn fmt(&self, f: &mut Formatter) -> Result<(), Error> {
+ match self {
+ Immediate::IntCC(x) => write!(f, "IntCC::{:?}", x),
+ }
+ }
+}
+
+#[derive(Clone)]
+pub(crate) struct BoundInstruction {
+ pub inst: Instruction,
+ pub value_types: Vec<ValueTypeOrAny>,
+ pub immediate_values: Vec<Immediate>,
+}
+
+impl BoundInstruction {
+ /// Construct a new bound instruction (with nothing bound yet) from an instruction
+ fn new(inst: &Instruction) -> Self {
+ BoundInstruction {
+ inst: inst.clone(),
+ value_types: vec![],
+ immediate_values: vec![],
+ }
+ }
+
+ /// Verify that the bindings for a BoundInstruction are correct.
+ fn verify_bindings(&self) -> Result<(), String> {
+ // Verify that binding types to the instruction does not violate the polymorphic rules.
+ if !self.value_types.is_empty() {
+ match &self.inst.polymorphic_info {
+ Some(poly) => {
+ if self.value_types.len() > 1 + poly.other_typevars.len() {
+ return Err(format!(
+ "trying to bind too many types for {}",
+ self.inst.name
+ ));
+ }
+ }
+ None => {
+ return Err(format!(
+ "trying to bind a type for {} which is not a polymorphic instruction",
+ self.inst.name
+ ));
+ }
+ }
+ }
+
+ // Verify that only the right number of immediates are bound.
+ let immediate_count = self
+ .inst
+ .operands_in
+ .iter()
+ .filter(|o| o.is_immediate_or_entityref())
+ .count();
+ if self.immediate_values.len() > immediate_count {
+ return Err(format!(
+ "trying to bind too many immediates ({}) to instruction {} which only expects {} \
+ immediates",
+ self.immediate_values.len(),
+ self.inst.name,
+ immediate_count
+ ));
+ }
+
+ Ok(())
+ }
+}
+
+impl Bindable for BoundInstruction {
+ fn bind(&self, parameter: impl Into<BindParameter>) -> BoundInstruction {
+ let mut modified = self.clone();
+ match parameter.into() {
+ BindParameter::Any => modified.value_types.push(ValueTypeOrAny::Any),
+ BindParameter::Lane(lane_type) => modified
+ .value_types
+ .push(ValueTypeOrAny::ValueType(lane_type.into())),
+ BindParameter::Vector(lane_type, vector_size_in_bits) => {
+ let num_lanes = vector_size_in_bits / lane_type.lane_bits();
+ assert!(
+ num_lanes >= 2,
+ "Minimum lane number for bind_vector is 2, found {}.",
+ num_lanes,
+ );
+ let vector_type = ValueType::Vector(VectorType::new(lane_type, num_lanes));
+ modified
+ .value_types
+ .push(ValueTypeOrAny::ValueType(vector_type));
+ }
+ BindParameter::Reference(reference_type) => {
+ modified
+ .value_types
+ .push(ValueTypeOrAny::ValueType(reference_type.into()));
+ }
+ BindParameter::Immediate(immediate) => modified.immediate_values.push(immediate),
+ }
+ modified.verify_bindings().unwrap();
+ modified
+ }
+}
+
+/// Checks that the input operands actually match the given format.
+fn verify_format(inst_name: &str, operands_in: &[Operand], format: &InstructionFormat) {
+ // A format is defined by:
+ // - its number of input value operands,
+ // - its number and names of input immediate operands,
+ // - whether it has a value list or not.
+ let mut num_values = 0;
+ let mut num_immediates = 0;
+
+ for operand in operands_in.iter() {
+ if operand.is_varargs() {
+ assert!(
+ format.has_value_list,
+ "instruction {} has varargs, but its format {} doesn't have a value list; you may \
+ need to use a different format.",
+ inst_name, format.name
+ );
+ }
+ if operand.is_value() {
+ num_values += 1;
+ }
+ if operand.is_immediate_or_entityref() {
+ if let Some(format_field) = format.imm_fields.get(num_immediates) {
+ assert_eq!(
+ format_field.kind.rust_field_name,
+ operand.kind.rust_field_name,
+ "{}th operand of {} should be {} (according to format), not {} (according to \
+ inst definition). You may need to use a different format.",
+ num_immediates,
+ inst_name,
+ format_field.kind.rust_field_name,
+ operand.kind.rust_field_name
+ );
+ num_immediates += 1;
+ }
+ }
+ }
+
+ assert_eq!(
+ num_values, format.num_value_operands,
+ "inst {} doesn't have as many value input operands as its format {} declares; you may need \
+ to use a different format.",
+ inst_name, format.name
+ );
+
+ assert_eq!(
+ num_immediates,
+ format.imm_fields.len(),
+ "inst {} doesn't have as many immediate input \
+ operands as its format {} declares; you may need to use a different format.",
+ inst_name,
+ format.name
+ );
+}
+
+/// Check if this instruction is polymorphic, and verify its use of type variables.
+fn verify_polymorphic(
+ operands_in: &[Operand],
+ operands_out: &[Operand],
+ format: &InstructionFormat,
+ value_opnums: &[usize],
+) -> Option<PolymorphicInfo> {
+ // The instruction is polymorphic if it has one free input or output operand.
+ let is_polymorphic = operands_in
+ .iter()
+ .any(|op| op.is_value() && op.type_var().unwrap().free_typevar().is_some())
+ || operands_out
+ .iter()
+ .any(|op| op.is_value() && op.type_var().unwrap().free_typevar().is_some());
+
+ if !is_polymorphic {
+ return None;
+ }
+
+ // Verify the use of type variables.
+ let tv_op = format.typevar_operand;
+ let mut maybe_error_message = None;
+ if let Some(tv_op) = tv_op {
+ if tv_op < value_opnums.len() {
+ let op_num = value_opnums[tv_op];
+ let tv = operands_in[op_num].type_var().unwrap();
+ let free_typevar = tv.free_typevar();
+ if (free_typevar.is_some() && tv == &free_typevar.unwrap())
+ || tv.singleton_type().is_some()
+ {
+ match is_ctrl_typevar_candidate(tv, &operands_in, &operands_out) {
+ Ok(other_typevars) => {
+ return Some(PolymorphicInfo {
+ use_typevar_operand: true,
+ ctrl_typevar: tv.clone(),
+ other_typevars,
+ });
+ }
+ Err(error_message) => {
+ maybe_error_message = Some(error_message);
+ }
+ }
+ }
+ }
+ };
+
+ // If we reached here, it means the type variable indicated as the typevar operand couldn't
+ // control every other input and output type variable. We need to look at the result type
+ // variables.
+ if operands_out.is_empty() {
+ // No result means no other possible type variable, so it's a type inference failure.
+ match maybe_error_message {
+ Some(msg) => panic!(msg),
+ None => panic!("typevar_operand must be a free type variable"),
+ }
+ }
+
+ // Otherwise, try to infer the controlling type variable by looking at the first result.
+ let tv = operands_out[0].type_var().unwrap();
+ let free_typevar = tv.free_typevar();
+ if free_typevar.is_some() && tv != &free_typevar.unwrap() {
+ panic!("first result must be a free type variable");
+ }
+
+ // At this point, if the next unwrap() fails, it means the output type couldn't be used as a
+ // controlling type variable either; panicking is the right behavior.
+ let other_typevars = is_ctrl_typevar_candidate(tv, &operands_in, &operands_out).unwrap();
+
+ Some(PolymorphicInfo {
+ use_typevar_operand: false,
+ ctrl_typevar: tv.clone(),
+ other_typevars,
+ })
+}
+
+/// Verify that the use of TypeVars is consistent with `ctrl_typevar` as the controlling type
+/// variable.
+///
+/// All polymorhic inputs must either be derived from `ctrl_typevar` or be independent free type
+/// variables only used once.
+///
+/// All polymorphic results must be derived from `ctrl_typevar`.
+///
+/// Return a vector of other type variables used, or a string explaining what went wrong.
+fn is_ctrl_typevar_candidate(
+ ctrl_typevar: &TypeVar,
+ operands_in: &[Operand],
+ operands_out: &[Operand],
+) -> Result<Vec<TypeVar>, String> {
+ let mut other_typevars = Vec::new();
+
+ // Check value inputs.
+ for input in operands_in {
+ if !input.is_value() {
+ continue;
+ }
+
+ let typ = input.type_var().unwrap();
+ let free_typevar = typ.free_typevar();
+
+ // Non-polymorphic or derived from ctrl_typevar is OK.
+ if free_typevar.is_none() {
+ continue;
+ }
+ let free_typevar = free_typevar.unwrap();
+ if &free_typevar == ctrl_typevar {
+ continue;
+ }
+
+ // No other derived typevars allowed.
+ if typ != &free_typevar {
+ return Err(format!(
+ "{:?}: type variable {} must be derived from {:?} while it is derived from {:?}",
+ input, typ.name, ctrl_typevar, free_typevar
+ ));
+ }
+
+ // Other free type variables can only be used once each.
+ for other_tv in &other_typevars {
+ if &free_typevar == other_tv {
+ return Err(format!(
+ "non-controlling type variable {} can't be used more than once",
+ free_typevar.name
+ ));
+ }
+ }
+
+ other_typevars.push(free_typevar);
+ }
+
+ // Check outputs.
+ for result in operands_out {
+ if !result.is_value() {
+ continue;
+ }
+
+ let typ = result.type_var().unwrap();
+ let free_typevar = typ.free_typevar();
+
+ // Non-polymorphic or derived from ctrl_typevar is OK.
+ if free_typevar.is_none() || &free_typevar.unwrap() == ctrl_typevar {
+ continue;
+ }
+
+ return Err("type variable in output not derived from ctrl_typevar".into());
+ }
+
+ Ok(other_typevars)
+}
+
+#[derive(Clone, Hash, PartialEq, Eq)]
+pub(crate) enum FormatPredicateKind {
+ /// Is the field member equal to the expected value (stored here)?
+ IsEqual(String),
+
+ /// Is the immediate instruction format field representable as an n-bit two's complement
+ /// integer? (with width: first member, scale: second member).
+ /// The predicate is true if the field is in the range: `-2^(width-1) -- 2^(width-1)-1` and a
+ /// multiple of `2^scale`.
+ IsSignedInt(usize, usize),
+
+ /// Is the immediate instruction format field representable as an n-bit unsigned integer? (with
+ /// width: first member, scale: second member).
+ /// The predicate is true if the field is in the range: `0 -- 2^width - 1` and a multiple of
+ /// `2^scale`.
+ IsUnsignedInt(usize, usize),
+
+ /// Is the immediate format field member an integer equal to zero?
+ IsZeroInt,
+ /// Is the immediate format field member equal to zero? (float32 version)
+ IsZero32BitFloat,
+
+ /// Is the immediate format field member equal to zero? (float64 version)
+ IsZero64BitFloat,
+
+ /// Is the immediate format field member equal zero in all lanes?
+ IsAllZeroes,
+
+ /// Does the immediate format field member have ones in all bits of all lanes?
+ IsAllOnes,
+
+ /// Has the value list (in member_name) the size specified in parameter?
+ LengthEquals(usize),
+
+ /// Is the referenced function colocated?
+ IsColocatedFunc,
+
+ /// Is the referenced data object colocated?
+ IsColocatedData,
+}
+
+#[derive(Clone, Hash, PartialEq, Eq)]
+pub(crate) struct FormatPredicateNode {
+ format_name: &'static str,
+ member_name: &'static str,
+ kind: FormatPredicateKind,
+}
+
+impl FormatPredicateNode {
+ fn new(
+ format: &InstructionFormat,
+ field_name: &'static str,
+ kind: FormatPredicateKind,
+ ) -> Self {
+ let member_name = format.imm_by_name(field_name).member;
+ Self {
+ format_name: format.name,
+ member_name,
+ kind,
+ }
+ }
+
+ fn new_raw(
+ format: &InstructionFormat,
+ member_name: &'static str,
+ kind: FormatPredicateKind,
+ ) -> Self {
+ Self {
+ format_name: format.name,
+ member_name,
+ kind,
+ }
+ }
+
+ fn destructuring_member_name(&self) -> &'static str {
+ match &self.kind {
+ FormatPredicateKind::LengthEquals(_) => {
+ // Length operates on the argument value list.
+ assert!(self.member_name == "args");
+ "ref args"
+ }
+ _ => self.member_name,
+ }
+ }
+
+ fn rust_predicate(&self) -> String {
+ match &self.kind {
+ FormatPredicateKind::IsEqual(arg) => {
+ format!("predicates::is_equal({}, {})", self.member_name, arg)
+ }
+ FormatPredicateKind::IsSignedInt(width, scale) => format!(
+ "predicates::is_signed_int({}, {}, {})",
+ self.member_name, width, scale
+ ),
+ FormatPredicateKind::IsUnsignedInt(width, scale) => format!(
+ "predicates::is_unsigned_int({}, {}, {})",
+ self.member_name, width, scale
+ ),
+ FormatPredicateKind::IsZeroInt => {
+ format!("predicates::is_zero_int({})", self.member_name)
+ }
+ FormatPredicateKind::IsZero32BitFloat => {
+ format!("predicates::is_zero_32_bit_float({})", self.member_name)
+ }
+ FormatPredicateKind::IsZero64BitFloat => {
+ format!("predicates::is_zero_64_bit_float({})", self.member_name)
+ }
+ FormatPredicateKind::IsAllZeroes => format!(
+ "predicates::is_all_zeroes(func.dfg.constants.get({}))",
+ self.member_name
+ ),
+ FormatPredicateKind::IsAllOnes => format!(
+ "predicates::is_all_ones(func.dfg.constants.get({}))",
+ self.member_name
+ ),
+ FormatPredicateKind::LengthEquals(num) => format!(
+ "predicates::has_length_of({}, {}, func)",
+ self.member_name, num
+ ),
+ FormatPredicateKind::IsColocatedFunc => {
+ format!("predicates::is_colocated_func({}, func)", self.member_name,)
+ }
+ FormatPredicateKind::IsColocatedData => {
+ format!("predicates::is_colocated_data({}, func)", self.member_name)
+ }
+ }
+ }
+}
+
+#[derive(Clone, Hash, PartialEq, Eq)]
+pub(crate) enum TypePredicateNode {
+ /// Is the value argument (at the index designated by the first member) the same type as the
+ /// type name (second member)?
+ TypeVarCheck(usize, String),
+
+ /// Is the controlling type variable the same type as the one designated by the type name
+ /// (only member)?
+ CtrlTypeVarCheck(String),
+}
+
+impl TypePredicateNode {
+ fn rust_predicate(&self, func_str: &str) -> String {
+ match self {
+ TypePredicateNode::TypeVarCheck(index, value_type_name) => format!(
+ "{}.dfg.value_type(args[{}]) == {}",
+ func_str, index, value_type_name
+ ),
+ TypePredicateNode::CtrlTypeVarCheck(value_type_name) => {
+ format!("{}.dfg.ctrl_typevar(inst) == {}", func_str, value_type_name)
+ }
+ }
+ }
+}
+
+/// A basic node in an instruction predicate: either an atom, or an AND of two conditions.
+#[derive(Clone, Hash, PartialEq, Eq)]
+pub(crate) enum InstructionPredicateNode {
+ FormatPredicate(FormatPredicateNode),
+
+ TypePredicate(TypePredicateNode),
+
+ /// An AND-combination of two or more other predicates.
+ And(Vec<InstructionPredicateNode>),
+
+ /// An OR-combination of two or more other predicates.
+ Or(Vec<InstructionPredicateNode>),
+}
+
+impl InstructionPredicateNode {
+ fn rust_predicate(&self, func_str: &str) -> String {
+ match self {
+ InstructionPredicateNode::FormatPredicate(node) => node.rust_predicate(),
+ InstructionPredicateNode::TypePredicate(node) => node.rust_predicate(func_str),
+ InstructionPredicateNode::And(nodes) => nodes
+ .iter()
+ .map(|x| x.rust_predicate(func_str))
+ .collect::<Vec<_>>()
+ .join(" && "),
+ InstructionPredicateNode::Or(nodes) => nodes
+ .iter()
+ .map(|x| x.rust_predicate(func_str))
+ .collect::<Vec<_>>()
+ .join(" || "),
+ }
+ }
+
+ pub fn format_destructuring_member_name(&self) -> &str {
+ match self {
+ InstructionPredicateNode::FormatPredicate(format_pred) => {
+ format_pred.destructuring_member_name()
+ }
+ _ => panic!("Only for leaf format predicates"),
+ }
+ }
+
+ pub fn format_name(&self) -> &str {
+ match self {
+ InstructionPredicateNode::FormatPredicate(format_pred) => format_pred.format_name,
+ _ => panic!("Only for leaf format predicates"),
+ }
+ }
+
+ pub fn is_type_predicate(&self) -> bool {
+ match self {
+ InstructionPredicateNode::FormatPredicate(_)
+ | InstructionPredicateNode::And(_)
+ | InstructionPredicateNode::Or(_) => false,
+ InstructionPredicateNode::TypePredicate(_) => true,
+ }
+ }
+
+ fn collect_leaves(&self) -> Vec<&InstructionPredicateNode> {
+ let mut ret = Vec::new();
+ match self {
+ InstructionPredicateNode::And(nodes) | InstructionPredicateNode::Or(nodes) => {
+ for node in nodes {
+ ret.extend(node.collect_leaves());
+ }
+ }
+ _ => ret.push(self),
+ }
+ ret
+ }
+}
+
+#[derive(Clone, Hash, PartialEq, Eq)]
+pub(crate) struct InstructionPredicate {
+ node: Option<InstructionPredicateNode>,
+}
+
+impl Into<InstructionPredicate> for InstructionPredicateNode {
+ fn into(self) -> InstructionPredicate {
+ InstructionPredicate { node: Some(self) }
+ }
+}
+
+impl InstructionPredicate {
+ pub fn new() -> Self {
+ Self { node: None }
+ }
+
+ pub fn unwrap(self) -> InstructionPredicateNode {
+ self.node.unwrap()
+ }
+
+ pub fn new_typevar_check(
+ inst: &Instruction,
+ type_var: &TypeVar,
+ value_type: &ValueType,
+ ) -> InstructionPredicateNode {
+ let index = inst
+ .value_opnums
+ .iter()
+ .enumerate()
+ .find(|(_, &op_num)| inst.operands_in[op_num].type_var().unwrap() == type_var)
+ .unwrap()
+ .0;
+ InstructionPredicateNode::TypePredicate(TypePredicateNode::TypeVarCheck(
+ index,
+ value_type.rust_name(),
+ ))
+ }
+
+ pub fn new_ctrl_typevar_check(value_type: &ValueType) -> InstructionPredicateNode {
+ InstructionPredicateNode::TypePredicate(TypePredicateNode::CtrlTypeVarCheck(
+ value_type.rust_name(),
+ ))
+ }
+
+ pub fn new_is_field_equal(
+ format: &InstructionFormat,
+ field_name: &'static str,
+ imm_value: String,
+ ) -> InstructionPredicateNode {
+ InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new(
+ format,
+ field_name,
+ FormatPredicateKind::IsEqual(imm_value),
+ ))
+ }
+
+ /// Used only for the AST module, which directly passes in the format field.
+ pub fn new_is_field_equal_ast(
+ format: &InstructionFormat,
+ field: &FormatField,
+ imm_value: String,
+ ) -> InstructionPredicateNode {
+ InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new_raw(
+ format,
+ field.member,
+ FormatPredicateKind::IsEqual(imm_value),
+ ))
+ }
+
+ pub fn new_is_signed_int(
+ format: &InstructionFormat,
+ field_name: &'static str,
+ width: usize,
+ scale: usize,
+ ) -> InstructionPredicateNode {
+ InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new(
+ format,
+ field_name,
+ FormatPredicateKind::IsSignedInt(width, scale),
+ ))
+ }
+
+ pub fn new_is_unsigned_int(
+ format: &InstructionFormat,
+ field_name: &'static str,
+ width: usize,
+ scale: usize,
+ ) -> InstructionPredicateNode {
+ InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new(
+ format,
+ field_name,
+ FormatPredicateKind::IsUnsignedInt(width, scale),
+ ))
+ }
+
+ pub fn new_is_zero_int(
+ format: &InstructionFormat,
+ field_name: &'static str,
+ ) -> InstructionPredicateNode {
+ InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new(
+ format,
+ field_name,
+ FormatPredicateKind::IsZeroInt,
+ ))
+ }
+
+ pub fn new_is_zero_32bit_float(
+ format: &InstructionFormat,
+ field_name: &'static str,
+ ) -> InstructionPredicateNode {
+ InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new(
+ format,
+ field_name,
+ FormatPredicateKind::IsZero32BitFloat,
+ ))
+ }
+
+ pub fn new_is_zero_64bit_float(
+ format: &InstructionFormat,
+ field_name: &'static str,
+ ) -> InstructionPredicateNode {
+ InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new(
+ format,
+ field_name,
+ FormatPredicateKind::IsZero64BitFloat,
+ ))
+ }
+
+ pub fn new_is_all_zeroes(
+ format: &InstructionFormat,
+ field_name: &'static str,
+ ) -> InstructionPredicateNode {
+ InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new(
+ format,
+ field_name,
+ FormatPredicateKind::IsAllZeroes,
+ ))
+ }
+
+ pub fn new_is_all_ones(
+ format: &InstructionFormat,
+ field_name: &'static str,
+ ) -> InstructionPredicateNode {
+ InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new(
+ format,
+ field_name,
+ FormatPredicateKind::IsAllOnes,
+ ))
+ }
+
+ pub fn new_length_equals(format: &InstructionFormat, size: usize) -> InstructionPredicateNode {
+ assert!(
+ format.has_value_list,
+ "the format must be variadic in number of arguments"
+ );
+ InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new_raw(
+ format,
+ "args",
+ FormatPredicateKind::LengthEquals(size),
+ ))
+ }
+
+ pub fn new_is_colocated_func(
+ format: &InstructionFormat,
+ field_name: &'static str,
+ ) -> InstructionPredicateNode {
+ InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new(
+ format,
+ field_name,
+ FormatPredicateKind::IsColocatedFunc,
+ ))
+ }
+
+ pub fn new_is_colocated_data(formats: &Formats) -> InstructionPredicateNode {
+ let format = &formats.unary_global_value;
+ InstructionPredicateNode::FormatPredicate(FormatPredicateNode::new(
+ &*format,
+ "global_value",
+ FormatPredicateKind::IsColocatedData,
+ ))
+ }
+
+ pub fn and(mut self, new_node: InstructionPredicateNode) -> Self {
+ let node = self.node;
+ let mut and_nodes = match node {
+ Some(node) => match node {
+ InstructionPredicateNode::And(nodes) => nodes,
+ InstructionPredicateNode::Or(_) => {
+ panic!("Can't mix and/or without implementing operator precedence!")
+ }
+ _ => vec![node],
+ },
+ _ => Vec::new(),
+ };
+ and_nodes.push(new_node);
+ self.node = Some(InstructionPredicateNode::And(and_nodes));
+ self
+ }
+
+ pub fn or(mut self, new_node: InstructionPredicateNode) -> Self {
+ let node = self.node;
+ let mut or_nodes = match node {
+ Some(node) => match node {
+ InstructionPredicateNode::Or(nodes) => nodes,
+ InstructionPredicateNode::And(_) => {
+ panic!("Can't mix and/or without implementing operator precedence!")
+ }
+ _ => vec![node],
+ },
+ _ => Vec::new(),
+ };
+ or_nodes.push(new_node);
+ self.node = Some(InstructionPredicateNode::Or(or_nodes));
+ self
+ }
+
+ pub fn rust_predicate(&self, func_str: &str) -> Option<String> {
+ self.node.as_ref().map(|root| root.rust_predicate(func_str))
+ }
+
+ /// Returns the type predicate if this is one, or None otherwise.
+ pub fn type_predicate(&self, func_str: &str) -> Option<String> {
+ let node = self.node.as_ref().unwrap();
+ if node.is_type_predicate() {
+ Some(node.rust_predicate(func_str))
+ } else {
+ None
+ }
+ }
+
+ /// Returns references to all the nodes that are leaves in the condition (i.e. by flattening
+ /// AND/OR).
+ pub fn collect_leaves(&self) -> Vec<&InstructionPredicateNode> {
+ self.node.as_ref().unwrap().collect_leaves()
+ }
+}
+
+#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
+pub(crate) struct InstructionPredicateNumber(u32);
+entity_impl!(InstructionPredicateNumber);
+
+pub(crate) type InstructionPredicateMap =
+ PrimaryMap<InstructionPredicateNumber, InstructionPredicate>;
+
+/// A registry of predicates to help deduplicating them, during Encodings construction. When the
+/// construction process is over, it needs to be extracted with `extract` and associated to the
+/// TargetIsa.
+pub(crate) struct InstructionPredicateRegistry {
+ /// Maps a predicate number to its actual predicate.
+ map: InstructionPredicateMap,
+
+ /// Inverse map: maps a predicate to its predicate number. This is used before inserting a
+ /// predicate, to check whether it already exists.
+ inverted_map: HashMap<InstructionPredicate, InstructionPredicateNumber>,
+}
+
+impl InstructionPredicateRegistry {
+ pub fn new() -> Self {
+ Self {
+ map: PrimaryMap::new(),
+ inverted_map: HashMap::new(),
+ }
+ }
+ pub fn insert(&mut self, predicate: InstructionPredicate) -> InstructionPredicateNumber {
+ match self.inverted_map.get(&predicate) {
+ Some(&found) => found,
+ None => {
+ let key = self.map.push(predicate.clone());
+ self.inverted_map.insert(predicate, key);
+ key
+ }
+ }
+ }
+ pub fn extract(self) -> InstructionPredicateMap {
+ self.map
+ }
+}
+
+/// An instruction specification, containing an instruction that has bound types or not.
+pub(crate) enum InstSpec {
+ Inst(Instruction),
+ Bound(BoundInstruction),
+}
+
+impl InstSpec {
+ pub fn inst(&self) -> &Instruction {
+ match &self {
+ InstSpec::Inst(inst) => inst,
+ InstSpec::Bound(bound_inst) => &bound_inst.inst,
+ }
+ }
+}
+
+impl Bindable for InstSpec {
+ fn bind(&self, parameter: impl Into<BindParameter>) -> BoundInstruction {
+ match self {
+ InstSpec::Inst(inst) => inst.bind(parameter.into()),
+ InstSpec::Bound(inst) => inst.bind(parameter.into()),
+ }
+ }
+}
+
+impl Into<InstSpec> for &Instruction {
+ fn into(self) -> InstSpec {
+ InstSpec::Inst(self.clone())
+ }
+}
+
+impl Into<InstSpec> for BoundInstruction {
+ fn into(self) -> InstSpec {
+ InstSpec::Bound(self)
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+ use crate::cdsl::formats::InstructionFormatBuilder;
+ use crate::cdsl::operands::{OperandKind, OperandKindFields};
+ use crate::cdsl::typevar::TypeSetBuilder;
+ use crate::shared::types::Int::{I32, I64};
+
+ fn field_to_operand(index: usize, field: OperandKindFields) -> Operand {
+ // Pretend the index string is &'static.
+ let name = Box::leak(index.to_string().into_boxed_str());
+ // Format's name / rust_type don't matter here.
+ let kind = OperandKind::new(name, name, field);
+ let operand = Operand::new(name, kind);
+ operand
+ }
+
+ fn field_to_operands(types: Vec<OperandKindFields>) -> Vec<Operand> {
+ types
+ .iter()
+ .enumerate()
+ .map(|(i, f)| field_to_operand(i, f.clone()))
+ .collect()
+ }
+
+ fn build_fake_instruction(
+ inputs: Vec<OperandKindFields>,
+ outputs: Vec<OperandKindFields>,
+ ) -> Instruction {
+ // Setup a format from the input operands.
+ let mut format = InstructionFormatBuilder::new("fake");
+ for (i, f) in inputs.iter().enumerate() {
+ match f {
+ OperandKindFields::TypeVar(_) => format = format.value(),
+ OperandKindFields::ImmValue => {
+ format = format.imm(&field_to_operand(i, f.clone()).kind)
+ }
+ _ => {}
+ };
+ }
+ let format = format.build();
+
+ // Create the fake instruction.
+ InstructionBuilder::new("fake", "A fake instruction for testing.", &format)
+ .operands_in(field_to_operands(inputs).iter().collect())
+ .operands_out(field_to_operands(outputs).iter().collect())
+ .build(OpcodeNumber(42))
+ }
+
+ #[test]
+ fn ensure_bound_instructions_can_bind_lane_types() {
+ let type1 = TypeSetBuilder::new().ints(8..64).build();
+ let in1 = OperandKindFields::TypeVar(TypeVar::new("a", "...", type1));
+ let inst = build_fake_instruction(vec![in1], vec![]);
+ inst.bind(LaneType::Int(I32));
+ }
+
+ #[test]
+ fn ensure_bound_instructions_can_bind_immediates() {
+ let inst = build_fake_instruction(vec![OperandKindFields::ImmValue], vec![]);
+ let bound_inst = inst.bind(Immediate::IntCC(IntCC::Equal));
+ assert!(bound_inst.verify_bindings().is_ok());
+ }
+
+ #[test]
+ #[should_panic]
+ fn ensure_instructions_fail_to_bind() {
+ let inst = build_fake_instruction(vec![], vec![]);
+ inst.bind(BindParameter::Lane(LaneType::Int(I32)));
+ // Trying to bind to an instruction with no inputs should fail.
+ }
+
+ #[test]
+ #[should_panic]
+ fn ensure_bound_instructions_fail_to_bind_too_many_types() {
+ let type1 = TypeSetBuilder::new().ints(8..64).build();
+ let in1 = OperandKindFields::TypeVar(TypeVar::new("a", "...", type1));
+ let inst = build_fake_instruction(vec![in1], vec![]);
+ inst.bind(LaneType::Int(I32)).bind(LaneType::Int(I64));
+ }
+
+ #[test]
+ #[should_panic]
+ fn ensure_instructions_fail_to_bind_too_many_immediates() {
+ let inst = build_fake_instruction(vec![OperandKindFields::ImmValue], vec![]);
+ inst.bind(BindParameter::Immediate(Immediate::IntCC(IntCC::Equal)))
+ .bind(BindParameter::Immediate(Immediate::IntCC(IntCC::Equal)));
+ // Trying to bind too many immediates to an instruction should fail; note that the immediate
+ // values are nonsensical but irrelevant to the purpose of this test.
+ }
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/cdsl/isa.rs b/third_party/rust/cranelift-codegen-meta/src/cdsl/isa.rs
new file mode 100644
index 0000000000..512105d09a
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/cdsl/isa.rs
@@ -0,0 +1,99 @@
+use std::collections::HashSet;
+use std::iter::FromIterator;
+
+use crate::cdsl::cpu_modes::CpuMode;
+use crate::cdsl::instructions::{InstructionGroup, InstructionPredicateMap};
+use crate::cdsl::recipes::Recipes;
+use crate::cdsl::regs::IsaRegs;
+use crate::cdsl::settings::SettingGroup;
+use crate::cdsl::xform::{TransformGroupIndex, TransformGroups};
+
+pub(crate) struct TargetIsa {
+ pub name: &'static str,
+ pub instructions: InstructionGroup,
+ pub settings: SettingGroup,
+ pub regs: IsaRegs,
+ pub recipes: Recipes,
+ pub cpu_modes: Vec<CpuMode>,
+ pub encodings_predicates: InstructionPredicateMap,
+
+ /// TransformGroupIndex are global to all the ISAs, while we want to have indices into the
+ /// local array of transform groups that are directly used. We use this map to get this
+ /// information.
+ pub local_transform_groups: Vec<TransformGroupIndex>,
+}
+
+impl TargetIsa {
+ pub fn new(
+ name: &'static str,
+ instructions: InstructionGroup,
+ settings: SettingGroup,
+ regs: IsaRegs,
+ recipes: Recipes,
+ cpu_modes: Vec<CpuMode>,
+ encodings_predicates: InstructionPredicateMap,
+ ) -> Self {
+ // Compute the local TransformGroup index.
+ let mut local_transform_groups = Vec::new();
+ for cpu_mode in &cpu_modes {
+ let transform_groups = cpu_mode.direct_transform_groups();
+ for group_index in transform_groups {
+ // find() is fine here: the number of transform group is < 5 as of June 2019.
+ if local_transform_groups
+ .iter()
+ .find(|&val| group_index == *val)
+ .is_none()
+ {
+ local_transform_groups.push(group_index);
+ }
+ }
+ }
+
+ Self {
+ name,
+ instructions,
+ settings,
+ regs,
+ recipes,
+ cpu_modes,
+ encodings_predicates,
+ local_transform_groups,
+ }
+ }
+
+ /// Returns a deterministically ordered, deduplicated list of TransformGroupIndex for the
+ /// transitive set of TransformGroup this TargetIsa uses.
+ pub fn transitive_transform_groups(
+ &self,
+ all_groups: &TransformGroups,
+ ) -> Vec<TransformGroupIndex> {
+ let mut set = HashSet::new();
+
+ for &root in self.local_transform_groups.iter() {
+ set.insert(root);
+ let mut base = root;
+ // Follow the chain of chain_with.
+ while let Some(chain_with) = &all_groups.get(base).chain_with {
+ set.insert(*chain_with);
+ base = *chain_with;
+ }
+ }
+
+ let mut vec = Vec::from_iter(set);
+ vec.sort();
+ vec
+ }
+
+ /// Returns a deterministically ordered, deduplicated list of TransformGroupIndex for the directly
+ /// reachable set of TransformGroup this TargetIsa uses.
+ pub fn direct_transform_groups(&self) -> &Vec<TransformGroupIndex> {
+ &self.local_transform_groups
+ }
+
+ pub fn translate_group_index(&self, group_index: TransformGroupIndex) -> usize {
+ self.local_transform_groups
+ .iter()
+ .position(|&val| val == group_index)
+ .expect("TransformGroup unused by this TargetIsa!")
+ }
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/cdsl/mod.rs b/third_party/rust/cranelift-codegen-meta/src/cdsl/mod.rs
new file mode 100644
index 0000000000..698b64dff3
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/cdsl/mod.rs
@@ -0,0 +1,89 @@
+//! Cranelift DSL classes.
+//!
+//! This module defines the classes that are used to define Cranelift
+//! instructions and other entities.
+
+#[macro_use]
+pub mod ast;
+pub mod cpu_modes;
+pub mod encodings;
+pub mod formats;
+pub mod instructions;
+pub mod isa;
+pub mod operands;
+pub mod recipes;
+pub mod regs;
+pub mod settings;
+pub mod type_inference;
+pub mod types;
+pub mod typevar;
+pub mod xform;
+
+/// A macro that converts boolean settings into predicates to look more natural.
+#[macro_export]
+macro_rules! predicate {
+ ($a:ident && $($b:tt)*) => {
+ PredicateNode::And(Box::new($a.into()), Box::new(predicate!($($b)*)))
+ };
+ (!$a:ident && $($b:tt)*) => {
+ PredicateNode::And(
+ Box::new(PredicateNode::Not(Box::new($a.into()))),
+ Box::new(predicate!($($b)*))
+ )
+ };
+ (!$a:ident) => {
+ PredicateNode::Not(Box::new($a.into()))
+ };
+ ($a:ident) => {
+ $a.into()
+ };
+}
+
+/// A macro that joins boolean settings into a list (e.g. `preset!(feature_a && feature_b)`).
+#[macro_export]
+macro_rules! preset {
+ () => {
+ vec![]
+ };
+ ($($x:ident)&&*) => {
+ {
+ let mut v = Vec::new();
+ $(
+ v.push($x.into());
+ )*
+ v
+ }
+ };
+}
+
+/// Convert the string `s` to CamelCase.
+pub fn camel_case(s: &str) -> String {
+ let mut output_chars = String::with_capacity(s.len());
+
+ let mut capitalize = true;
+ for curr_char in s.chars() {
+ if curr_char == '_' {
+ capitalize = true;
+ } else {
+ if capitalize {
+ output_chars.extend(curr_char.to_uppercase());
+ } else {
+ output_chars.push(curr_char);
+ }
+ capitalize = false;
+ }
+ }
+
+ output_chars
+}
+
+#[cfg(test)]
+mod tests {
+ use super::camel_case;
+
+ #[test]
+ fn camel_case_works() {
+ assert_eq!(camel_case("x"), "X");
+ assert_eq!(camel_case("camel_case"), "CamelCase");
+ }
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/cdsl/operands.rs b/third_party/rust/cranelift-codegen-meta/src/cdsl/operands.rs
new file mode 100644
index 0000000000..605df24862
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/cdsl/operands.rs
@@ -0,0 +1,173 @@
+use std::collections::HashMap;
+
+use crate::cdsl::typevar::TypeVar;
+
+/// An instruction operand can be an *immediate*, an *SSA value*, or an *entity reference*. The
+/// type of the operand is one of:
+///
+/// 1. A `ValueType` instance indicates an SSA value operand with a concrete type.
+///
+/// 2. A `TypeVar` instance indicates an SSA value operand, and the instruction is polymorphic over
+/// the possible concrete types that the type variable can assume.
+///
+/// 3. An `ImmediateKind` instance indicates an immediate operand whose value is encoded in the
+/// instruction itself rather than being passed as an SSA value.
+///
+/// 4. An `EntityRefKind` instance indicates an operand that references another entity in the
+/// function, typically something declared in the function preamble.
+#[derive(Clone, Debug)]
+pub(crate) struct Operand {
+ /// Name of the operand variable, as it appears in function parameters, legalizations, etc.
+ pub name: &'static str,
+
+ /// Type of the operand.
+ pub kind: OperandKind,
+
+ doc: Option<&'static str>,
+}
+
+impl Operand {
+ pub fn new(name: &'static str, kind: impl Into<OperandKind>) -> Self {
+ Self {
+ name,
+ doc: None,
+ kind: kind.into(),
+ }
+ }
+ pub fn with_doc(mut self, doc: &'static str) -> Self {
+ self.doc = Some(doc);
+ self
+ }
+
+ pub fn doc(&self) -> Option<&str> {
+ if let Some(doc) = &self.doc {
+ return Some(doc);
+ }
+ match &self.kind.fields {
+ OperandKindFields::TypeVar(tvar) => Some(&tvar.doc),
+ _ => self.kind.doc(),
+ }
+ }
+
+ pub fn is_value(&self) -> bool {
+ match self.kind.fields {
+ OperandKindFields::TypeVar(_) => true,
+ _ => false,
+ }
+ }
+
+ pub fn type_var(&self) -> Option<&TypeVar> {
+ match &self.kind.fields {
+ OperandKindFields::TypeVar(typevar) => Some(typevar),
+ _ => None,
+ }
+ }
+
+ pub fn is_varargs(&self) -> bool {
+ match self.kind.fields {
+ OperandKindFields::VariableArgs => true,
+ _ => false,
+ }
+ }
+
+ /// Returns true if the operand has an immediate kind or is an EntityRef.
+ pub fn is_immediate_or_entityref(&self) -> bool {
+ match self.kind.fields {
+ OperandKindFields::ImmEnum(_)
+ | OperandKindFields::ImmValue
+ | OperandKindFields::EntityRef => true,
+ _ => false,
+ }
+ }
+
+ /// Returns true if the operand has an immediate kind.
+ pub fn is_immediate(&self) -> bool {
+ match self.kind.fields {
+ OperandKindFields::ImmEnum(_) | OperandKindFields::ImmValue => true,
+ _ => false,
+ }
+ }
+
+ pub fn is_cpu_flags(&self) -> bool {
+ match &self.kind.fields {
+ OperandKindFields::TypeVar(type_var)
+ if type_var.name == "iflags" || type_var.name == "fflags" =>
+ {
+ true
+ }
+ _ => false,
+ }
+ }
+}
+
+pub type EnumValues = HashMap<&'static str, &'static str>;
+
+#[derive(Clone, Debug)]
+pub(crate) enum OperandKindFields {
+ EntityRef,
+ VariableArgs,
+ ImmValue,
+ ImmEnum(EnumValues),
+ TypeVar(TypeVar),
+}
+
+#[derive(Clone, Debug)]
+pub(crate) struct OperandKind {
+ /// String representation of the Rust type mapping to this OperandKind.
+ pub rust_type: &'static str,
+
+ /// Name of this OperandKind in the format's member field.
+ pub rust_field_name: &'static str,
+
+ /// Type-specific fields for this OperandKind.
+ pub fields: OperandKindFields,
+
+ doc: Option<&'static str>,
+}
+
+impl OperandKind {
+ pub fn new(
+ rust_field_name: &'static str,
+ rust_type: &'static str,
+ fields: OperandKindFields,
+ ) -> Self {
+ Self {
+ rust_field_name,
+ rust_type,
+ fields,
+ doc: None,
+ }
+ }
+ pub fn with_doc(mut self, doc: &'static str) -> Self {
+ assert!(self.doc.is_none());
+ self.doc = Some(doc);
+ self
+ }
+ fn doc(&self) -> Option<&str> {
+ if let Some(doc) = &self.doc {
+ return Some(doc);
+ }
+ match &self.fields {
+ OperandKindFields::TypeVar(type_var) => Some(&type_var.doc),
+ OperandKindFields::ImmEnum(_)
+ | OperandKindFields::ImmValue
+ | OperandKindFields::EntityRef
+ | OperandKindFields::VariableArgs => None,
+ }
+ }
+}
+
+impl Into<OperandKind> for &TypeVar {
+ fn into(self) -> OperandKind {
+ OperandKind::new(
+ "value",
+ "ir::Value",
+ OperandKindFields::TypeVar(self.into()),
+ )
+ }
+}
+impl Into<OperandKind> for &OperandKind {
+ fn into(self) -> OperandKind {
+ self.clone()
+ }
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/cdsl/recipes.rs b/third_party/rust/cranelift-codegen-meta/src/cdsl/recipes.rs
new file mode 100644
index 0000000000..dfe4cd67a5
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/cdsl/recipes.rs
@@ -0,0 +1,298 @@
+use std::rc::Rc;
+
+use cranelift_entity::{entity_impl, PrimaryMap};
+
+use crate::cdsl::formats::InstructionFormat;
+use crate::cdsl::instructions::InstructionPredicate;
+use crate::cdsl::regs::RegClassIndex;
+use crate::cdsl::settings::SettingPredicateNumber;
+
+/// A specific register in a register class.
+///
+/// A register is identified by the top-level register class it belongs to and
+/// its first register unit.
+///
+/// Specific registers are used to describe constraints on instructions where
+/// some operands must use a fixed register.
+///
+/// Register instances can be created with the constructor, or accessed as
+/// attributes on the register class: `GPR.rcx`.
+#[derive(Copy, Clone, Hash, PartialEq, Eq)]
+pub(crate) struct Register {
+ pub regclass: RegClassIndex,
+ pub unit: u8,
+}
+
+impl Register {
+ pub fn new(regclass: RegClassIndex, unit: u8) -> Self {
+ Self { regclass, unit }
+ }
+}
+
+/// An operand that must be in a stack slot.
+///
+/// A `Stack` object can be used to indicate an operand constraint for a value
+/// operand that must live in a stack slot.
+#[derive(Copy, Clone, Hash, PartialEq)]
+pub(crate) struct Stack {
+ pub regclass: RegClassIndex,
+}
+
+impl Stack {
+ pub fn new(regclass: RegClassIndex) -> Self {
+ Self { regclass }
+ }
+ pub fn stack_base_mask(self) -> &'static str {
+ // TODO: Make this configurable instead of just using the SP.
+ "StackBaseMask(1)"
+ }
+}
+
+#[derive(Clone, Hash, PartialEq)]
+pub(crate) struct BranchRange {
+ pub inst_size: u64,
+ pub range: u64,
+}
+
+#[derive(Copy, Clone, Hash, PartialEq)]
+pub(crate) enum OperandConstraint {
+ RegClass(RegClassIndex),
+ FixedReg(Register),
+ TiedInput(usize),
+ Stack(Stack),
+}
+
+impl Into<OperandConstraint> for RegClassIndex {
+ fn into(self) -> OperandConstraint {
+ OperandConstraint::RegClass(self)
+ }
+}
+
+impl Into<OperandConstraint> for Register {
+ fn into(self) -> OperandConstraint {
+ OperandConstraint::FixedReg(self)
+ }
+}
+
+impl Into<OperandConstraint> for usize {
+ fn into(self) -> OperandConstraint {
+ OperandConstraint::TiedInput(self)
+ }
+}
+
+impl Into<OperandConstraint> for Stack {
+ fn into(self) -> OperandConstraint {
+ OperandConstraint::Stack(self)
+ }
+}
+
+/// A recipe for encoding instructions with a given format.
+///
+/// Many different instructions can be encoded by the same recipe, but they
+/// must all have the same instruction format.
+///
+/// The `operands_in` and `operands_out` arguments are tuples specifying the register
+/// allocation constraints for the value operands and results respectively. The
+/// possible constraints for an operand are:
+///
+/// - A `RegClass` specifying the set of allowed registers.
+/// - A `Register` specifying a fixed-register operand.
+/// - An integer indicating that this result is tied to a value operand, so
+/// they must use the same register.
+/// - A `Stack` specifying a value in a stack slot.
+///
+/// The `branch_range` argument must be provided for recipes that can encode
+/// branch instructions. It is an `(origin, bits)` tuple describing the exact
+/// range that can be encoded in a branch instruction.
+#[derive(Clone)]
+pub(crate) struct EncodingRecipe {
+ /// Short mnemonic name for this recipe.
+ pub name: String,
+
+ /// Associated instruction format.
+ pub format: Rc<InstructionFormat>,
+
+ /// Base number of bytes in the binary encoded instruction.
+ pub base_size: u64,
+
+ /// Tuple of register constraints for value operands.
+ pub operands_in: Vec<OperandConstraint>,
+
+ /// Tuple of register constraints for results.
+ pub operands_out: Vec<OperandConstraint>,
+
+ /// Function name to use when computing actual size.
+ pub compute_size: &'static str,
+
+ /// `(origin, bits)` range for branches.
+ pub branch_range: Option<BranchRange>,
+
+ /// This instruction clobbers `iflags` and `fflags`; true by default.
+ pub clobbers_flags: bool,
+
+ /// Instruction predicate.
+ pub inst_predicate: Option<InstructionPredicate>,
+
+ /// ISA predicate.
+ pub isa_predicate: Option<SettingPredicateNumber>,
+
+ /// Rust code for binary emission.
+ pub emit: Option<String>,
+}
+
+// Implement PartialEq ourselves: take all the fields into account but the name.
+impl PartialEq for EncodingRecipe {
+ fn eq(&self, other: &Self) -> bool {
+ Rc::ptr_eq(&self.format, &other.format)
+ && self.base_size == other.base_size
+ && self.operands_in == other.operands_in
+ && self.operands_out == other.operands_out
+ && self.compute_size == other.compute_size
+ && self.branch_range == other.branch_range
+ && self.clobbers_flags == other.clobbers_flags
+ && self.inst_predicate == other.inst_predicate
+ && self.isa_predicate == other.isa_predicate
+ && self.emit == other.emit
+ }
+}
+
+// To allow using it in a hashmap.
+impl Eq for EncodingRecipe {}
+
+#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
+pub(crate) struct EncodingRecipeNumber(u32);
+entity_impl!(EncodingRecipeNumber);
+
+pub(crate) type Recipes = PrimaryMap<EncodingRecipeNumber, EncodingRecipe>;
+
+#[derive(Clone)]
+pub(crate) struct EncodingRecipeBuilder {
+ pub name: String,
+ format: Rc<InstructionFormat>,
+ pub base_size: u64,
+ pub operands_in: Option<Vec<OperandConstraint>>,
+ pub operands_out: Option<Vec<OperandConstraint>>,
+ pub compute_size: Option<&'static str>,
+ pub branch_range: Option<BranchRange>,
+ pub emit: Option<String>,
+ clobbers_flags: Option<bool>,
+ inst_predicate: Option<InstructionPredicate>,
+ isa_predicate: Option<SettingPredicateNumber>,
+}
+
+impl EncodingRecipeBuilder {
+ pub fn new(name: impl Into<String>, format: &Rc<InstructionFormat>, base_size: u64) -> Self {
+ Self {
+ name: name.into(),
+ format: format.clone(),
+ base_size,
+ operands_in: None,
+ operands_out: None,
+ compute_size: None,
+ branch_range: None,
+ emit: None,
+ clobbers_flags: None,
+ inst_predicate: None,
+ isa_predicate: None,
+ }
+ }
+
+ // Setters.
+ pub fn operands_in(mut self, constraints: Vec<impl Into<OperandConstraint>>) -> Self {
+ assert!(self.operands_in.is_none());
+ self.operands_in = Some(
+ constraints
+ .into_iter()
+ .map(|constr| constr.into())
+ .collect(),
+ );
+ self
+ }
+ pub fn operands_out(mut self, constraints: Vec<impl Into<OperandConstraint>>) -> Self {
+ assert!(self.operands_out.is_none());
+ self.operands_out = Some(
+ constraints
+ .into_iter()
+ .map(|constr| constr.into())
+ .collect(),
+ );
+ self
+ }
+ pub fn clobbers_flags(mut self, flag: bool) -> Self {
+ assert!(self.clobbers_flags.is_none());
+ self.clobbers_flags = Some(flag);
+ self
+ }
+ pub fn emit(mut self, code: impl Into<String>) -> Self {
+ assert!(self.emit.is_none());
+ self.emit = Some(code.into());
+ self
+ }
+ pub fn branch_range(mut self, range: (u64, u64)) -> Self {
+ assert!(self.branch_range.is_none());
+ self.branch_range = Some(BranchRange {
+ inst_size: range.0,
+ range: range.1,
+ });
+ self
+ }
+ pub fn isa_predicate(mut self, pred: SettingPredicateNumber) -> Self {
+ assert!(self.isa_predicate.is_none());
+ self.isa_predicate = Some(pred);
+ self
+ }
+ pub fn inst_predicate(mut self, inst_predicate: impl Into<InstructionPredicate>) -> Self {
+ assert!(self.inst_predicate.is_none());
+ self.inst_predicate = Some(inst_predicate.into());
+ self
+ }
+ pub fn compute_size(mut self, compute_size: &'static str) -> Self {
+ assert!(self.compute_size.is_none());
+ self.compute_size = Some(compute_size);
+ self
+ }
+
+ pub fn build(self) -> EncodingRecipe {
+ let operands_in = self.operands_in.unwrap_or_default();
+ let operands_out = self.operands_out.unwrap_or_default();
+
+ // The number of input constraints must match the number of format input operands.
+ if !self.format.has_value_list {
+ assert!(
+ operands_in.len() == self.format.num_value_operands,
+ format!(
+ "missing operand constraints for recipe {} (format {})",
+ self.name, self.format.name
+ )
+ );
+ }
+
+ // Ensure tied inputs actually refer to existing inputs.
+ for constraint in operands_in.iter().chain(operands_out.iter()) {
+ if let OperandConstraint::TiedInput(n) = *constraint {
+ assert!(n < operands_in.len());
+ }
+ }
+
+ let compute_size = match self.compute_size {
+ Some(compute_size) => compute_size,
+ None => "base_size",
+ };
+
+ let clobbers_flags = self.clobbers_flags.unwrap_or(true);
+
+ EncodingRecipe {
+ name: self.name,
+ format: self.format,
+ base_size: self.base_size,
+ operands_in,
+ operands_out,
+ compute_size,
+ branch_range: self.branch_range,
+ clobbers_flags,
+ inst_predicate: self.inst_predicate,
+ isa_predicate: self.isa_predicate,
+ emit: self.emit,
+ }
+ }
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/cdsl/regs.rs b/third_party/rust/cranelift-codegen-meta/src/cdsl/regs.rs
new file mode 100644
index 0000000000..864826ee43
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/cdsl/regs.rs
@@ -0,0 +1,412 @@
+use cranelift_codegen_shared::constants;
+use cranelift_entity::{entity_impl, EntityRef, PrimaryMap};
+
+#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
+pub(crate) struct RegBankIndex(u32);
+entity_impl!(RegBankIndex);
+
+pub(crate) struct RegBank {
+ pub name: &'static str,
+ pub first_unit: u8,
+ pub units: u8,
+ pub names: Vec<&'static str>,
+ pub prefix: &'static str,
+ pub pressure_tracking: bool,
+ pub pinned_reg: Option<u16>,
+ pub toprcs: Vec<RegClassIndex>,
+ pub classes: Vec<RegClassIndex>,
+}
+
+impl RegBank {
+ pub fn new(
+ name: &'static str,
+ first_unit: u8,
+ units: u8,
+ names: Vec<&'static str>,
+ prefix: &'static str,
+ pressure_tracking: bool,
+ pinned_reg: Option<u16>,
+ ) -> Self {
+ RegBank {
+ name,
+ first_unit,
+ units,
+ names,
+ prefix,
+ pressure_tracking,
+ pinned_reg,
+ toprcs: Vec::new(),
+ classes: Vec::new(),
+ }
+ }
+
+ fn unit_by_name(&self, name: &'static str) -> u8 {
+ let unit = if let Some(found) = self.names.iter().position(|&reg_name| reg_name == name) {
+ found
+ } else {
+ // Try to match without the bank prefix.
+ assert!(name.starts_with(self.prefix));
+ let name_without_prefix = &name[self.prefix.len()..];
+ if let Some(found) = self
+ .names
+ .iter()
+ .position(|&reg_name| reg_name == name_without_prefix)
+ {
+ found
+ } else {
+ // Ultimate try: try to parse a number and use this in the array, eg r15 on x86.
+ if let Ok(as_num) = name_without_prefix.parse::<u8>() {
+ assert!(
+ as_num < self.units,
+ "trying to get {}, but bank only has {} registers!",
+ name,
+ self.units
+ );
+ as_num as usize
+ } else {
+ panic!("invalid register name {}", name);
+ }
+ }
+ };
+ self.first_unit + (unit as u8)
+ }
+}
+
+#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)]
+pub(crate) struct RegClassIndex(u32);
+entity_impl!(RegClassIndex);
+
+pub(crate) struct RegClass {
+ pub name: &'static str,
+ pub index: RegClassIndex,
+ pub width: u8,
+ pub bank: RegBankIndex,
+ pub toprc: RegClassIndex,
+ pub count: u8,
+ pub start: u8,
+ pub subclasses: Vec<RegClassIndex>,
+}
+
+impl RegClass {
+ pub fn new(
+ name: &'static str,
+ index: RegClassIndex,
+ width: u8,
+ bank: RegBankIndex,
+ toprc: RegClassIndex,
+ count: u8,
+ start: u8,
+ ) -> Self {
+ Self {
+ name,
+ index,
+ width,
+ bank,
+ toprc,
+ count,
+ start,
+ subclasses: Vec::new(),
+ }
+ }
+
+ /// Compute a bit-mask of subclasses, including self.
+ pub fn subclass_mask(&self) -> u64 {
+ let mut m = 1 << self.index.index();
+ for rc in self.subclasses.iter() {
+ m |= 1 << rc.index();
+ }
+ m
+ }
+
+ /// Compute a bit-mask of the register units allocated by this register class.
+ pub fn mask(&self, bank_first_unit: u8) -> Vec<u32> {
+ let mut u = (self.start + bank_first_unit) as usize;
+ let mut out_mask = vec![0, 0, 0];
+ for _ in 0..self.count {
+ out_mask[u / 32] |= 1 << (u % 32);
+ u += self.width as usize;
+ }
+ out_mask
+ }
+}
+
+pub(crate) enum RegClassProto {
+ TopLevel(RegBankIndex),
+ SubClass(RegClassIndex),
+}
+
+pub(crate) struct RegClassBuilder {
+ pub name: &'static str,
+ pub width: u8,
+ pub count: u8,
+ pub start: u8,
+ pub proto: RegClassProto,
+}
+
+impl RegClassBuilder {
+ pub fn new_toplevel(name: &'static str, bank: RegBankIndex) -> Self {
+ Self {
+ name,
+ width: 1,
+ count: 0,
+ start: 0,
+ proto: RegClassProto::TopLevel(bank),
+ }
+ }
+ pub fn subclass_of(
+ name: &'static str,
+ parent_index: RegClassIndex,
+ start: u8,
+ stop: u8,
+ ) -> Self {
+ assert!(stop >= start);
+ Self {
+ name,
+ width: 0,
+ count: stop - start,
+ start,
+ proto: RegClassProto::SubClass(parent_index),
+ }
+ }
+ pub fn count(mut self, count: u8) -> Self {
+ self.count = count;
+ self
+ }
+ pub fn width(mut self, width: u8) -> Self {
+ match self.proto {
+ RegClassProto::TopLevel(_) => self.width = width,
+ RegClassProto::SubClass(_) => panic!("Subclasses inherit their parent's width."),
+ }
+ self
+ }
+}
+
+pub(crate) struct RegBankBuilder {
+ pub name: &'static str,
+ pub units: u8,
+ pub names: Vec<&'static str>,
+ pub prefix: &'static str,
+ pub pressure_tracking: Option<bool>,
+ pub pinned_reg: Option<u16>,
+}
+
+impl RegBankBuilder {
+ pub fn new(name: &'static str, prefix: &'static str) -> Self {
+ Self {
+ name,
+ units: 0,
+ names: vec![],
+ prefix,
+ pressure_tracking: None,
+ pinned_reg: None,
+ }
+ }
+ pub fn units(mut self, units: u8) -> Self {
+ self.units = units;
+ self
+ }
+ pub fn names(mut self, names: Vec<&'static str>) -> Self {
+ self.names = names;
+ self
+ }
+ pub fn track_pressure(mut self, track: bool) -> Self {
+ self.pressure_tracking = Some(track);
+ self
+ }
+ pub fn pinned_reg(mut self, unit: u16) -> Self {
+ assert!(unit < u16::from(self.units));
+ self.pinned_reg = Some(unit);
+ self
+ }
+}
+
+pub(crate) struct IsaRegsBuilder {
+ pub banks: PrimaryMap<RegBankIndex, RegBank>,
+ pub classes: PrimaryMap<RegClassIndex, RegClass>,
+}
+
+impl IsaRegsBuilder {
+ pub fn new() -> Self {
+ Self {
+ banks: PrimaryMap::new(),
+ classes: PrimaryMap::new(),
+ }
+ }
+
+ pub fn add_bank(&mut self, builder: RegBankBuilder) -> RegBankIndex {
+ let first_unit = if self.banks.is_empty() {
+ 0
+ } else {
+ let last = &self.banks.last().unwrap();
+ let first_available_unit = (last.first_unit + last.units) as i8;
+ let units = builder.units;
+ let align = if units.is_power_of_two() {
+ units
+ } else {
+ units.next_power_of_two()
+ } as i8;
+ (first_available_unit + align - 1) & -align
+ } as u8;
+
+ self.banks.push(RegBank::new(
+ builder.name,
+ first_unit,
+ builder.units,
+ builder.names,
+ builder.prefix,
+ builder
+ .pressure_tracking
+ .expect("Pressure tracking must be explicitly set"),
+ builder.pinned_reg,
+ ))
+ }
+
+ pub fn add_class(&mut self, builder: RegClassBuilder) -> RegClassIndex {
+ let class_index = self.classes.next_key();
+
+ // Finish delayed construction of RegClass.
+ let (bank, toprc, start, width) = match builder.proto {
+ RegClassProto::TopLevel(bank_index) => {
+ self.banks
+ .get_mut(bank_index)
+ .unwrap()
+ .toprcs
+ .push(class_index);
+ (bank_index, class_index, builder.start, builder.width)
+ }
+ RegClassProto::SubClass(parent_class_index) => {
+ assert!(builder.width == 0);
+ let (bank, toprc, start, width) = {
+ let parent = self.classes.get(parent_class_index).unwrap();
+ (parent.bank, parent.toprc, parent.start, parent.width)
+ };
+ for reg_class in self.classes.values_mut() {
+ if reg_class.toprc == toprc {
+ reg_class.subclasses.push(class_index);
+ }
+ }
+ let subclass_start = start + builder.start * width;
+ (bank, toprc, subclass_start, width)
+ }
+ };
+
+ let reg_bank_units = self.banks.get(bank).unwrap().units;
+ assert!(start < reg_bank_units);
+
+ let count = if builder.count != 0 {
+ builder.count
+ } else {
+ reg_bank_units / width
+ };
+
+ let reg_class = RegClass::new(builder.name, class_index, width, bank, toprc, count, start);
+ self.classes.push(reg_class);
+
+ let reg_bank = self.banks.get_mut(bank).unwrap();
+ reg_bank.classes.push(class_index);
+
+ class_index
+ }
+
+ /// Checks that the set of register classes satisfies:
+ ///
+ /// 1. Closed under intersection: The intersection of any two register
+ /// classes in the set is either empty or identical to a member of the
+ /// set.
+ /// 2. There are no identical classes under different names.
+ /// 3. Classes are sorted topologically such that all subclasses have a
+ /// higher index that the superclass.
+ pub fn build(self) -> IsaRegs {
+ for reg_bank in self.banks.values() {
+ for i1 in reg_bank.classes.iter() {
+ for i2 in reg_bank.classes.iter() {
+ if i1 >= i2 {
+ continue;
+ }
+
+ let rc1 = self.classes.get(*i1).unwrap();
+ let rc2 = self.classes.get(*i2).unwrap();
+
+ let rc1_mask = rc1.mask(0);
+ let rc2_mask = rc2.mask(0);
+
+ assert!(
+ rc1.width != rc2.width || rc1_mask != rc2_mask,
+ "no duplicates"
+ );
+ if rc1.width != rc2.width {
+ continue;
+ }
+
+ let mut intersect = Vec::new();
+ for (a, b) in rc1_mask.iter().zip(rc2_mask.iter()) {
+ intersect.push(a & b);
+ }
+ if intersect == vec![0; intersect.len()] {
+ continue;
+ }
+
+ // Classes must be topologically ordered, so the intersection can't be the
+ // superclass.
+ assert!(intersect != rc1_mask);
+
+ // If the intersection is the second one, then it must be a subclass.
+ if intersect == rc2_mask {
+ assert!(self
+ .classes
+ .get(*i1)
+ .unwrap()
+ .subclasses
+ .iter()
+ .any(|x| *x == *i2));
+ }
+ }
+ }
+ }
+
+ assert!(
+ self.classes.len() <= constants::MAX_NUM_REG_CLASSES,
+ "Too many register classes"
+ );
+
+ let num_toplevel = self
+ .classes
+ .values()
+ .filter(|x| x.toprc == x.index && self.banks.get(x.bank).unwrap().pressure_tracking)
+ .count();
+
+ assert!(
+ num_toplevel <= constants::MAX_TRACKED_TOP_RCS,
+ "Too many top-level register classes"
+ );
+
+ IsaRegs::new(self.banks, self.classes)
+ }
+}
+
+pub(crate) struct IsaRegs {
+ pub banks: PrimaryMap<RegBankIndex, RegBank>,
+ pub classes: PrimaryMap<RegClassIndex, RegClass>,
+}
+
+impl IsaRegs {
+ fn new(
+ banks: PrimaryMap<RegBankIndex, RegBank>,
+ classes: PrimaryMap<RegClassIndex, RegClass>,
+ ) -> Self {
+ Self { banks, classes }
+ }
+
+ pub fn class_by_name(&self, name: &str) -> RegClassIndex {
+ self.classes
+ .values()
+ .find(|&class| class.name == name)
+ .unwrap_or_else(|| panic!("register class {} not found", name))
+ .index
+ }
+
+ pub fn regunit_by_name(&self, class_index: RegClassIndex, name: &'static str) -> u8 {
+ let bank_index = self.classes.get(class_index).unwrap().bank;
+ self.banks.get(bank_index).unwrap().unit_by_name(name)
+ }
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/cdsl/settings.rs b/third_party/rust/cranelift-codegen-meta/src/cdsl/settings.rs
new file mode 100644
index 0000000000..217bad9955
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/cdsl/settings.rs
@@ -0,0 +1,407 @@
+use std::iter;
+
+#[derive(Clone, Copy, Hash, PartialEq, Eq)]
+pub(crate) struct BoolSettingIndex(usize);
+
+#[derive(Hash, PartialEq, Eq)]
+pub(crate) struct BoolSetting {
+ pub default: bool,
+ pub bit_offset: u8,
+ pub predicate_number: u8,
+}
+
+#[derive(Hash, PartialEq, Eq)]
+pub(crate) enum SpecificSetting {
+ Bool(BoolSetting),
+ Enum(Vec<&'static str>),
+ Num(u8),
+}
+
+#[derive(Hash, PartialEq, Eq)]
+pub(crate) struct Setting {
+ pub name: &'static str,
+ pub comment: &'static str,
+ pub specific: SpecificSetting,
+ pub byte_offset: u8,
+}
+
+impl Setting {
+ pub fn default_byte(&self) -> u8 {
+ match self.specific {
+ SpecificSetting::Bool(BoolSetting {
+ default,
+ bit_offset,
+ ..
+ }) => {
+ if default {
+ 1 << bit_offset
+ } else {
+ 0
+ }
+ }
+ SpecificSetting::Enum(_) => 0,
+ SpecificSetting::Num(default) => default,
+ }
+ }
+
+ fn byte_for_value(&self, v: bool) -> u8 {
+ match self.specific {
+ SpecificSetting::Bool(BoolSetting { bit_offset, .. }) => {
+ if v {
+ 1 << bit_offset
+ } else {
+ 0
+ }
+ }
+ _ => panic!("byte_for_value shouldn't be used for non-boolean settings."),
+ }
+ }
+
+ fn byte_mask(&self) -> u8 {
+ match self.specific {
+ SpecificSetting::Bool(BoolSetting { bit_offset, .. }) => 1 << bit_offset,
+ _ => panic!("byte_for_value shouldn't be used for non-boolean settings."),
+ }
+ }
+}
+
+#[derive(Hash, PartialEq, Eq)]
+pub(crate) struct PresetIndex(usize);
+
+#[derive(Hash, PartialEq, Eq)]
+pub(crate) enum PresetType {
+ BoolSetting(BoolSettingIndex),
+ OtherPreset(PresetIndex),
+}
+
+impl Into<PresetType> for BoolSettingIndex {
+ fn into(self) -> PresetType {
+ PresetType::BoolSetting(self)
+ }
+}
+impl Into<PresetType> for PresetIndex {
+ fn into(self) -> PresetType {
+ PresetType::OtherPreset(self)
+ }
+}
+
+#[derive(Hash, PartialEq, Eq)]
+pub(crate) struct Preset {
+ pub name: &'static str,
+ values: Vec<BoolSettingIndex>,
+}
+
+impl Preset {
+ pub fn layout(&self, group: &SettingGroup) -> Vec<(u8, u8)> {
+ let mut layout: Vec<(u8, u8)> = iter::repeat((0, 0))
+ .take(group.settings_size as usize)
+ .collect();
+ for bool_index in &self.values {
+ let setting = &group.settings[bool_index.0];
+ let mask = setting.byte_mask();
+ let val = setting.byte_for_value(true);
+ assert!((val & !mask) == 0);
+ let (ref mut l_mask, ref mut l_val) =
+ *layout.get_mut(setting.byte_offset as usize).unwrap();
+ *l_mask |= mask;
+ *l_val = (*l_val & !mask) | val;
+ }
+ layout
+ }
+}
+
+pub(crate) struct SettingGroup {
+ pub name: &'static str,
+ pub settings: Vec<Setting>,
+ pub bool_start_byte_offset: u8,
+ pub settings_size: u8,
+ pub presets: Vec<Preset>,
+ pub predicates: Vec<Predicate>,
+}
+
+impl SettingGroup {
+ fn num_bool_settings(&self) -> u8 {
+ self.settings
+ .iter()
+ .filter(|s| {
+ if let SpecificSetting::Bool(_) = s.specific {
+ true
+ } else {
+ false
+ }
+ })
+ .count() as u8
+ }
+
+ pub fn byte_size(&self) -> u8 {
+ let num_predicates = self.num_bool_settings() + (self.predicates.len() as u8);
+ self.bool_start_byte_offset + (num_predicates + 7) / 8
+ }
+
+ pub fn get_bool(&self, name: &'static str) -> (BoolSettingIndex, &Self) {
+ for (i, s) in self.settings.iter().enumerate() {
+ if let SpecificSetting::Bool(_) = s.specific {
+ if s.name == name {
+ return (BoolSettingIndex(i), self);
+ }
+ }
+ }
+ panic!("Should have found bool setting by name.");
+ }
+
+ pub fn predicate_by_name(&self, name: &'static str) -> SettingPredicateNumber {
+ self.predicates
+ .iter()
+ .find(|pred| pred.name == name)
+ .unwrap_or_else(|| panic!("unknown predicate {}", name))
+ .number
+ }
+}
+
+/// This is the basic information needed to track the specific parts of a setting when building
+/// them.
+pub(crate) enum ProtoSpecificSetting {
+ Bool(bool),
+ Enum(Vec<&'static str>),
+ Num(u8),
+}
+
+/// This is the information provided during building for a setting.
+struct ProtoSetting {
+ name: &'static str,
+ comment: &'static str,
+ specific: ProtoSpecificSetting,
+}
+
+#[derive(Hash, PartialEq, Eq)]
+pub(crate) enum PredicateNode {
+ OwnedBool(BoolSettingIndex),
+ SharedBool(&'static str, &'static str),
+ Not(Box<PredicateNode>),
+ And(Box<PredicateNode>, Box<PredicateNode>),
+}
+
+impl Into<PredicateNode> for BoolSettingIndex {
+ fn into(self) -> PredicateNode {
+ PredicateNode::OwnedBool(self)
+ }
+}
+impl<'a> Into<PredicateNode> for (BoolSettingIndex, &'a SettingGroup) {
+ fn into(self) -> PredicateNode {
+ let (index, group) = (self.0, self.1);
+ let setting = &group.settings[index.0];
+ PredicateNode::SharedBool(group.name, setting.name)
+ }
+}
+
+impl PredicateNode {
+ fn render(&self, group: &SettingGroup) -> String {
+ match *self {
+ PredicateNode::OwnedBool(bool_setting_index) => format!(
+ "{}.{}()",
+ group.name, group.settings[bool_setting_index.0].name
+ ),
+ PredicateNode::SharedBool(ref group_name, ref bool_name) => {
+ format!("{}.{}()", group_name, bool_name)
+ }
+ PredicateNode::And(ref lhs, ref rhs) => {
+ format!("{} && {}", lhs.render(group), rhs.render(group))
+ }
+ PredicateNode::Not(ref node) => format!("!({})", node.render(group)),
+ }
+ }
+}
+
+struct ProtoPredicate {
+ pub name: &'static str,
+ node: PredicateNode,
+}
+
+pub(crate) type SettingPredicateNumber = u8;
+
+pub(crate) struct Predicate {
+ pub name: &'static str,
+ node: PredicateNode,
+ pub number: SettingPredicateNumber,
+}
+
+impl Predicate {
+ pub fn render(&self, group: &SettingGroup) -> String {
+ self.node.render(group)
+ }
+}
+
+pub(crate) struct SettingGroupBuilder {
+ name: &'static str,
+ settings: Vec<ProtoSetting>,
+ presets: Vec<Preset>,
+ predicates: Vec<ProtoPredicate>,
+}
+
+impl SettingGroupBuilder {
+ pub fn new(name: &'static str) -> Self {
+ Self {
+ name,
+ settings: Vec::new(),
+ presets: Vec::new(),
+ predicates: Vec::new(),
+ }
+ }
+
+ fn add_setting(
+ &mut self,
+ name: &'static str,
+ comment: &'static str,
+ specific: ProtoSpecificSetting,
+ ) {
+ self.settings.push(ProtoSetting {
+ name,
+ comment,
+ specific,
+ })
+ }
+
+ pub fn add_bool(
+ &mut self,
+ name: &'static str,
+ comment: &'static str,
+ default: bool,
+ ) -> BoolSettingIndex {
+ assert!(
+ self.predicates.is_empty(),
+ "predicates must be added after the boolean settings"
+ );
+ self.add_setting(name, comment, ProtoSpecificSetting::Bool(default));
+ BoolSettingIndex(self.settings.len() - 1)
+ }
+
+ pub fn add_enum(
+ &mut self,
+ name: &'static str,
+ comment: &'static str,
+ values: Vec<&'static str>,
+ ) {
+ self.add_setting(name, comment, ProtoSpecificSetting::Enum(values));
+ }
+
+ pub fn add_num(&mut self, name: &'static str, comment: &'static str, default: u8) {
+ self.add_setting(name, comment, ProtoSpecificSetting::Num(default));
+ }
+
+ pub fn add_predicate(&mut self, name: &'static str, node: PredicateNode) {
+ self.predicates.push(ProtoPredicate { name, node });
+ }
+
+ pub fn add_preset(&mut self, name: &'static str, args: Vec<PresetType>) -> PresetIndex {
+ let mut values = Vec::new();
+ for arg in args {
+ match arg {
+ PresetType::OtherPreset(index) => {
+ values.extend(self.presets[index.0].values.iter());
+ }
+ PresetType::BoolSetting(index) => values.push(index),
+ }
+ }
+ self.presets.push(Preset { name, values });
+ PresetIndex(self.presets.len() - 1)
+ }
+
+ /// Compute the layout of the byte vector used to represent this settings
+ /// group.
+ ///
+ /// The byte vector contains the following entries in order:
+ ///
+ /// 1. Byte-sized settings like `NumSetting` and `EnumSetting`.
+ /// 2. `BoolSetting` settings.
+ /// 3. Precomputed named predicates.
+ /// 4. Other numbered predicates, including parent predicates that need to be accessible by
+ /// number.
+ ///
+ /// Set `self.settings_size` to the length of the byte vector prefix that
+ /// contains the settings. All bytes after that are computed, not
+ /// configured.
+ ///
+ /// Set `self.boolean_offset` to the beginning of the numbered predicates,
+ /// 2. in the list above.
+ ///
+ /// Assign `byte_offset` and `bit_offset` fields in all settings.
+ pub fn build(self) -> SettingGroup {
+ let mut group = SettingGroup {
+ name: self.name,
+ settings: Vec::new(),
+ bool_start_byte_offset: 0,
+ settings_size: 0,
+ presets: Vec::new(),
+ predicates: Vec::new(),
+ };
+
+ let mut byte_offset = 0;
+
+ // Assign the non-boolean settings first.
+ for s in &self.settings {
+ let specific = match s.specific {
+ ProtoSpecificSetting::Bool(..) => continue,
+ ProtoSpecificSetting::Enum(ref values) => SpecificSetting::Enum(values.clone()),
+ ProtoSpecificSetting::Num(default) => SpecificSetting::Num(default),
+ };
+
+ group.settings.push(Setting {
+ name: s.name,
+ comment: s.comment,
+ byte_offset,
+ specific,
+ });
+
+ byte_offset += 1;
+ }
+
+ group.bool_start_byte_offset = byte_offset;
+
+ let mut predicate_number = 0;
+
+ // Then the boolean settings.
+ for s in &self.settings {
+ let default = match s.specific {
+ ProtoSpecificSetting::Bool(default) => default,
+ ProtoSpecificSetting::Enum(_) | ProtoSpecificSetting::Num(_) => continue,
+ };
+ group.settings.push(Setting {
+ name: s.name,
+ comment: s.comment,
+ byte_offset: byte_offset + predicate_number / 8,
+ specific: SpecificSetting::Bool(BoolSetting {
+ default,
+ bit_offset: predicate_number % 8,
+ predicate_number,
+ }),
+ });
+ predicate_number += 1;
+ }
+
+ assert!(
+ group.predicates.is_empty(),
+ "settings_size is the byte size before adding predicates"
+ );
+ group.settings_size = group.byte_size();
+
+ // Sort predicates by name to ensure the same order as the Python code.
+ let mut predicates = self.predicates;
+ predicates.sort_by_key(|predicate| predicate.name);
+
+ group
+ .predicates
+ .extend(predicates.into_iter().map(|predicate| {
+ let number = predicate_number;
+ predicate_number += 1;
+ Predicate {
+ name: predicate.name,
+ node: predicate.node,
+ number,
+ }
+ }));
+
+ group.presets.extend(self.presets);
+
+ group
+ }
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/cdsl/type_inference.rs b/third_party/rust/cranelift-codegen-meta/src/cdsl/type_inference.rs
new file mode 100644
index 0000000000..25a07a9b84
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/cdsl/type_inference.rs
@@ -0,0 +1,660 @@
+use crate::cdsl::ast::{Def, DefIndex, DefPool, Var, VarIndex, VarPool};
+use crate::cdsl::typevar::{DerivedFunc, TypeSet, TypeVar};
+
+use std::collections::{HashMap, HashSet};
+use std::iter::FromIterator;
+
+#[derive(Debug, Hash, PartialEq, Eq)]
+pub(crate) enum Constraint {
+ /// Constraint specifying that a type var tv1 must be wider than or equal to type var tv2 at
+ /// runtime. This requires that:
+ /// 1) They have the same number of lanes
+ /// 2) In a lane tv1 has at least as many bits as tv2.
+ WiderOrEq(TypeVar, TypeVar),
+
+ /// Constraint specifying that two derived type vars must have the same runtime type.
+ Eq(TypeVar, TypeVar),
+
+ /// Constraint specifying that a type var must belong to some typeset.
+ InTypeset(TypeVar, TypeSet),
+}
+
+impl Constraint {
+ fn translate_with<F: Fn(&TypeVar) -> TypeVar>(&self, func: F) -> Constraint {
+ match self {
+ Constraint::WiderOrEq(lhs, rhs) => {
+ let lhs = func(&lhs);
+ let rhs = func(&rhs);
+ Constraint::WiderOrEq(lhs, rhs)
+ }
+ Constraint::Eq(lhs, rhs) => {
+ let lhs = func(&lhs);
+ let rhs = func(&rhs);
+ Constraint::Eq(lhs, rhs)
+ }
+ Constraint::InTypeset(tv, ts) => {
+ let tv = func(&tv);
+ Constraint::InTypeset(tv, ts.clone())
+ }
+ }
+ }
+
+ /// Creates a new constraint by replacing type vars by their hashmap equivalent.
+ fn translate_with_map(
+ &self,
+ original_to_own_typevar: &HashMap<&TypeVar, TypeVar>,
+ ) -> Constraint {
+ self.translate_with(|tv| substitute(original_to_own_typevar, tv))
+ }
+
+ /// Creates a new constraint by replacing type vars by their canonical equivalent.
+ fn translate_with_env(&self, type_env: &TypeEnvironment) -> Constraint {
+ self.translate_with(|tv| type_env.get_equivalent(tv))
+ }
+
+ fn is_trivial(&self) -> bool {
+ match self {
+ Constraint::WiderOrEq(lhs, rhs) => {
+ // Trivially true.
+ if lhs == rhs {
+ return true;
+ }
+
+ let ts1 = lhs.get_typeset();
+ let ts2 = rhs.get_typeset();
+
+ // Trivially true.
+ if ts1.is_wider_or_equal(&ts2) {
+ return true;
+ }
+
+ // Trivially false.
+ if ts1.is_narrower(&ts2) {
+ return true;
+ }
+
+ // Trivially false.
+ if (&ts1.lanes & &ts2.lanes).is_empty() {
+ return true;
+ }
+
+ self.is_concrete()
+ }
+ Constraint::Eq(lhs, rhs) => lhs == rhs || self.is_concrete(),
+ Constraint::InTypeset(_, _) => {
+ // The way InTypeset are made, they would always be trivial if we were applying the
+ // same logic as the Python code did, so ignore this.
+ self.is_concrete()
+ }
+ }
+ }
+
+ /// Returns true iff all the referenced type vars are singletons.
+ fn is_concrete(&self) -> bool {
+ match self {
+ Constraint::WiderOrEq(lhs, rhs) => {
+ lhs.singleton_type().is_some() && rhs.singleton_type().is_some()
+ }
+ Constraint::Eq(lhs, rhs) => {
+ lhs.singleton_type().is_some() && rhs.singleton_type().is_some()
+ }
+ Constraint::InTypeset(tv, _) => tv.singleton_type().is_some(),
+ }
+ }
+
+ fn typevar_args(&self) -> Vec<&TypeVar> {
+ match self {
+ Constraint::WiderOrEq(lhs, rhs) => vec![lhs, rhs],
+ Constraint::Eq(lhs, rhs) => vec![lhs, rhs],
+ Constraint::InTypeset(tv, _) => vec![tv],
+ }
+ }
+}
+
+#[derive(Clone, Copy)]
+enum TypeEnvRank {
+ Singleton = 5,
+ Input = 4,
+ Intermediate = 3,
+ Output = 2,
+ Temp = 1,
+ Internal = 0,
+}
+
+/// Class encapsulating the necessary bookkeeping for type inference.
+pub(crate) struct TypeEnvironment {
+ vars: HashSet<VarIndex>,
+ ranks: HashMap<TypeVar, TypeEnvRank>,
+ equivalency_map: HashMap<TypeVar, TypeVar>,
+ pub constraints: Vec<Constraint>,
+}
+
+impl TypeEnvironment {
+ fn new() -> Self {
+ TypeEnvironment {
+ vars: HashSet::new(),
+ ranks: HashMap::new(),
+ equivalency_map: HashMap::new(),
+ constraints: Vec::new(),
+ }
+ }
+
+ fn register(&mut self, var_index: VarIndex, var: &mut Var) {
+ self.vars.insert(var_index);
+ let rank = if var.is_input() {
+ TypeEnvRank::Input
+ } else if var.is_intermediate() {
+ TypeEnvRank::Intermediate
+ } else if var.is_output() {
+ TypeEnvRank::Output
+ } else {
+ assert!(var.is_temp());
+ TypeEnvRank::Temp
+ };
+ self.ranks.insert(var.get_or_create_typevar(), rank);
+ }
+
+ fn add_constraint(&mut self, constraint: Constraint) {
+ if self.constraints.iter().any(|item| *item == constraint) {
+ return;
+ }
+
+ // Check extra conditions for InTypeset constraints.
+ if let Constraint::InTypeset(tv, _) = &constraint {
+ assert!(
+ tv.base.is_none(),
+ "type variable is {:?}, while expecting none",
+ tv
+ );
+ assert!(
+ tv.name.starts_with("typeof_"),
+ "Name \"{}\" should start with \"typeof_\"",
+ tv.name
+ );
+ }
+
+ self.constraints.push(constraint);
+ }
+
+ /// Returns the canonical representative of the equivalency class of the given argument, or
+ /// duplicates it if it's not there yet.
+ pub fn get_equivalent(&self, tv: &TypeVar) -> TypeVar {
+ let mut tv = tv;
+ while let Some(found) = self.equivalency_map.get(tv) {
+ tv = found;
+ }
+ match &tv.base {
+ Some(parent) => self
+ .get_equivalent(&parent.type_var)
+ .derived(parent.derived_func),
+ None => tv.clone(),
+ }
+ }
+
+ /// Get the rank of tv in the partial order:
+ /// - TVs directly associated with a Var get their rank from the Var (see register()).
+ /// - Internally generated non-derived TVs implicitly get the lowest rank (0).
+ /// - Derived variables get their rank from their free typevar.
+ /// - Singletons have the highest rank.
+ /// - TVs associated with vars in a source pattern have a higher rank than TVs associated with
+ /// temporary vars.
+ fn rank(&self, tv: &TypeVar) -> u8 {
+ let actual_tv = match tv.base {
+ Some(_) => tv.free_typevar(),
+ None => Some(tv.clone()),
+ };
+
+ let rank = match actual_tv {
+ Some(actual_tv) => match self.ranks.get(&actual_tv) {
+ Some(rank) => Some(*rank),
+ None => {
+ assert!(
+ !actual_tv.name.starts_with("typeof_"),
+ format!("variable {} should be explicitly ranked", actual_tv.name)
+ );
+ None
+ }
+ },
+ None => None,
+ };
+
+ let rank = match rank {
+ Some(rank) => rank,
+ None => {
+ if tv.singleton_type().is_some() {
+ TypeEnvRank::Singleton
+ } else {
+ TypeEnvRank::Internal
+ }
+ }
+ };
+
+ rank as u8
+ }
+
+ /// Record the fact that the free tv1 is part of the same equivalence class as tv2. The
+ /// canonical representative of the merged class is tv2's canonical representative.
+ fn record_equivalent(&mut self, tv1: TypeVar, tv2: TypeVar) {
+ assert!(tv1.base.is_none());
+ assert!(self.get_equivalent(&tv1) == tv1);
+ if let Some(tv2_base) = &tv2.base {
+ // Ensure there are no cycles.
+ assert!(self.get_equivalent(&tv2_base.type_var) != tv1);
+ }
+ self.equivalency_map.insert(tv1, tv2);
+ }
+
+ /// Get the free typevars in the current type environment.
+ pub fn free_typevars(&self, var_pool: &mut VarPool) -> Vec<TypeVar> {
+ let mut typevars = Vec::new();
+ typevars.extend(self.equivalency_map.keys().cloned());
+ typevars.extend(
+ self.vars
+ .iter()
+ .map(|&var_index| var_pool.get_mut(var_index).get_or_create_typevar()),
+ );
+
+ let set: HashSet<TypeVar> = HashSet::from_iter(
+ typevars
+ .iter()
+ .map(|tv| self.get_equivalent(tv).free_typevar())
+ .filter(|opt_tv| {
+ // Filter out singleton types.
+ opt_tv.is_some()
+ })
+ .map(|tv| tv.unwrap()),
+ );
+ Vec::from_iter(set)
+ }
+
+ /// Normalize by collapsing any roots that don't correspond to a concrete type var AND have a
+ /// single type var derived from them or equivalent to them.
+ ///
+ /// e.g. if we have a root of the tree that looks like:
+ ///
+ /// typeof_a typeof_b
+ /// \\ /
+ /// typeof_x
+ /// |
+ /// half_width(1)
+ /// |
+ /// 1
+ ///
+ /// we want to collapse the linear path between 1 and typeof_x. The resulting graph is:
+ ///
+ /// typeof_a typeof_b
+ /// \\ /
+ /// typeof_x
+ fn normalize(&mut self, var_pool: &mut VarPool) {
+ let source_tvs: HashSet<TypeVar> = HashSet::from_iter(
+ self.vars
+ .iter()
+ .map(|&var_index| var_pool.get_mut(var_index).get_or_create_typevar()),
+ );
+
+ let mut children: HashMap<TypeVar, HashSet<TypeVar>> = HashMap::new();
+
+ // Insert all the parents found by the derivation relationship.
+ for type_var in self.equivalency_map.values() {
+ if type_var.base.is_none() {
+ continue;
+ }
+
+ let parent_tv = type_var.free_typevar();
+ if parent_tv.is_none() {
+ // Ignore this type variable, it's a singleton.
+ continue;
+ }
+ let parent_tv = parent_tv.unwrap();
+
+ children
+ .entry(parent_tv)
+ .or_insert_with(HashSet::new)
+ .insert(type_var.clone());
+ }
+
+ // Insert all the explicit equivalency links.
+ for (equivalent_tv, canon_tv) in self.equivalency_map.iter() {
+ children
+ .entry(canon_tv.clone())
+ .or_insert_with(HashSet::new)
+ .insert(equivalent_tv.clone());
+ }
+
+ // Remove links that are straight paths up to typevar of variables.
+ for free_root in self.free_typevars(var_pool) {
+ let mut root = &free_root;
+ while !source_tvs.contains(&root)
+ && children.contains_key(&root)
+ && children.get(&root).unwrap().len() == 1
+ {
+ let child = children.get(&root).unwrap().iter().next().unwrap();
+ assert_eq!(self.equivalency_map[child], root.clone());
+ self.equivalency_map.remove(child);
+ root = child;
+ }
+ }
+ }
+
+ /// Extract a clean type environment from self, that only mentions type vars associated with
+ /// real variables.
+ fn extract(self, var_pool: &mut VarPool) -> TypeEnvironment {
+ let vars_tv: HashSet<TypeVar> = HashSet::from_iter(
+ self.vars
+ .iter()
+ .map(|&var_index| var_pool.get_mut(var_index).get_or_create_typevar()),
+ );
+
+ let mut new_equivalency_map: HashMap<TypeVar, TypeVar> = HashMap::new();
+ for tv in &vars_tv {
+ let canon_tv = self.get_equivalent(tv);
+ if *tv != canon_tv {
+ new_equivalency_map.insert(tv.clone(), canon_tv.clone());
+ }
+
+ // Sanity check: the translated type map should only refer to real variables.
+ assert!(vars_tv.contains(tv));
+ let canon_free_tv = canon_tv.free_typevar();
+ assert!(canon_free_tv.is_none() || vars_tv.contains(&canon_free_tv.unwrap()));
+ }
+
+ let mut new_constraints: HashSet<Constraint> = HashSet::new();
+ for constraint in &self.constraints {
+ let constraint = constraint.translate_with_env(&self);
+ if constraint.is_trivial() || new_constraints.contains(&constraint) {
+ continue;
+ }
+
+ // Sanity check: translated constraints should refer only to real variables.
+ for arg in constraint.typevar_args() {
+ let arg_free_tv = arg.free_typevar();
+ assert!(arg_free_tv.is_none() || vars_tv.contains(&arg_free_tv.unwrap()));
+ }
+
+ new_constraints.insert(constraint);
+ }
+
+ TypeEnvironment {
+ vars: self.vars,
+ ranks: self.ranks,
+ equivalency_map: new_equivalency_map,
+ constraints: Vec::from_iter(new_constraints),
+ }
+ }
+}
+
+/// Replaces an external type variable according to the following rules:
+/// - if a local copy is present in the map, return it.
+/// - or if it's derived, create a local derived one that recursively substitutes the parent.
+/// - or return itself.
+fn substitute(map: &HashMap<&TypeVar, TypeVar>, external_type_var: &TypeVar) -> TypeVar {
+ match map.get(&external_type_var) {
+ Some(own_type_var) => own_type_var.clone(),
+ None => match &external_type_var.base {
+ Some(parent) => {
+ let parent_substitute = substitute(map, &parent.type_var);
+ TypeVar::derived(&parent_substitute, parent.derived_func)
+ }
+ None => external_type_var.clone(),
+ },
+ }
+}
+
+/// Normalize a (potentially derived) typevar using the following rules:
+///
+/// - vector and width derived functions commute
+/// {HALF,DOUBLE}VECTOR({HALF,DOUBLE}WIDTH(base)) ->
+/// {HALF,DOUBLE}WIDTH({HALF,DOUBLE}VECTOR(base))
+///
+/// - half/double pairs collapse
+/// {HALF,DOUBLE}WIDTH({DOUBLE,HALF}WIDTH(base)) -> base
+/// {HALF,DOUBLE}VECTOR({DOUBLE,HALF}VECTOR(base)) -> base
+fn canonicalize_derivations(tv: TypeVar) -> TypeVar {
+ let base = match &tv.base {
+ Some(base) => base,
+ None => return tv,
+ };
+
+ let derived_func = base.derived_func;
+
+ if let Some(base_base) = &base.type_var.base {
+ let base_base_tv = &base_base.type_var;
+ match (derived_func, base_base.derived_func) {
+ (DerivedFunc::HalfWidth, DerivedFunc::DoubleWidth)
+ | (DerivedFunc::DoubleWidth, DerivedFunc::HalfWidth)
+ | (DerivedFunc::HalfVector, DerivedFunc::DoubleVector)
+ | (DerivedFunc::DoubleVector, DerivedFunc::HalfVector) => {
+ // Cancelling bijective transformations. This doesn't hide any overflow issues
+ // since derived type sets are checked upon derivaion, and base typesets are only
+ // allowed to shrink.
+ return canonicalize_derivations(base_base_tv.clone());
+ }
+ (DerivedFunc::HalfWidth, DerivedFunc::HalfVector)
+ | (DerivedFunc::HalfWidth, DerivedFunc::DoubleVector)
+ | (DerivedFunc::DoubleWidth, DerivedFunc::DoubleVector)
+ | (DerivedFunc::DoubleWidth, DerivedFunc::HalfVector) => {
+ // Arbitrarily put WIDTH derivations before VECTOR derivations, since they commute.
+ return canonicalize_derivations(
+ base_base_tv
+ .derived(derived_func)
+ .derived(base_base.derived_func),
+ );
+ }
+ _ => {}
+ };
+ }
+
+ canonicalize_derivations(base.type_var.clone()).derived(derived_func)
+}
+
+/// Given typevars tv1 and tv2 (which could be derived from one another), constrain their typesets
+/// to be the same. When one is derived from the other, repeat the constrain process until
+/// a fixed point is reached.
+fn constrain_fixpoint(tv1: &TypeVar, tv2: &TypeVar) {
+ loop {
+ let old_tv1_ts = tv1.get_typeset().clone();
+ tv2.constrain_types(tv1.clone());
+ if tv1.get_typeset() == old_tv1_ts {
+ break;
+ }
+ }
+
+ let old_tv2_ts = tv2.get_typeset();
+ tv1.constrain_types(tv2.clone());
+ // The above loop should ensure that all reference cycles have been handled.
+ assert!(old_tv2_ts == tv2.get_typeset());
+}
+
+/// Unify tv1 and tv2 in the given type environment. tv1 must have a rank greater or equal to tv2's
+/// one, modulo commutations.
+fn unify(tv1: &TypeVar, tv2: &TypeVar, type_env: &mut TypeEnvironment) -> Result<(), String> {
+ let tv1 = canonicalize_derivations(type_env.get_equivalent(tv1));
+ let tv2 = canonicalize_derivations(type_env.get_equivalent(tv2));
+
+ if tv1 == tv2 {
+ // Already unified.
+ return Ok(());
+ }
+
+ if type_env.rank(&tv2) < type_env.rank(&tv1) {
+ // Make sure tv1 always has the smallest rank, since real variables have the higher rank
+ // and we want them to be the canonical representatives of their equivalency classes.
+ return unify(&tv2, &tv1, type_env);
+ }
+
+ constrain_fixpoint(&tv1, &tv2);
+
+ if tv1.get_typeset().size() == 0 || tv2.get_typeset().size() == 0 {
+ return Err(format!(
+ "Error: empty type created when unifying {} and {}",
+ tv1.name, tv2.name
+ ));
+ }
+
+ let base = match &tv1.base {
+ Some(base) => base,
+ None => {
+ type_env.record_equivalent(tv1, tv2);
+ return Ok(());
+ }
+ };
+
+ if let Some(inverse) = base.derived_func.inverse() {
+ return unify(&base.type_var, &tv2.derived(inverse), type_env);
+ }
+
+ type_env.add_constraint(Constraint::Eq(tv1, tv2));
+ Ok(())
+}
+
+/// Perform type inference on one Def in the current type environment and return an updated type
+/// environment or error.
+///
+/// At a high level this works by creating fresh copies of each formal type var in the Def's
+/// instruction's signature, and unifying the formal typevar with the corresponding actual typevar.
+fn infer_definition(
+ def: &Def,
+ var_pool: &mut VarPool,
+ type_env: TypeEnvironment,
+ last_type_index: &mut usize,
+) -> Result<TypeEnvironment, String> {
+ let apply = &def.apply;
+ let inst = &apply.inst;
+
+ let mut type_env = type_env;
+ let free_formal_tvs = inst.all_typevars();
+
+ let mut original_to_own_typevar: HashMap<&TypeVar, TypeVar> = HashMap::new();
+ for &tv in &free_formal_tvs {
+ assert!(original_to_own_typevar
+ .insert(
+ tv,
+ TypeVar::copy_from(tv, format!("own_{}", last_type_index))
+ )
+ .is_none());
+ *last_type_index += 1;
+ }
+
+ // Update the mapping with any explicity bound type vars:
+ for (i, value_type) in apply.value_types.iter().enumerate() {
+ let singleton = TypeVar::new_singleton(value_type.clone());
+ assert!(original_to_own_typevar
+ .insert(free_formal_tvs[i], singleton)
+ .is_some());
+ }
+
+ // Get fresh copies for each typevar in the signature (both free and derived).
+ let mut formal_tvs = Vec::new();
+ formal_tvs.extend(inst.value_results.iter().map(|&i| {
+ substitute(
+ &original_to_own_typevar,
+ inst.operands_out[i].type_var().unwrap(),
+ )
+ }));
+ formal_tvs.extend(inst.value_opnums.iter().map(|&i| {
+ substitute(
+ &original_to_own_typevar,
+ inst.operands_in[i].type_var().unwrap(),
+ )
+ }));
+
+ // Get the list of actual vars.
+ let mut actual_vars = Vec::new();
+ actual_vars.extend(inst.value_results.iter().map(|&i| def.defined_vars[i]));
+ actual_vars.extend(
+ inst.value_opnums
+ .iter()
+ .map(|&i| apply.args[i].unwrap_var()),
+ );
+
+ // Get the list of the actual TypeVars.
+ let mut actual_tvs = Vec::new();
+ for var_index in actual_vars {
+ let var = var_pool.get_mut(var_index);
+ type_env.register(var_index, var);
+ actual_tvs.push(var.get_or_create_typevar());
+ }
+
+ // Make sure we start unifying with the control type variable first, by putting it at the
+ // front of both vectors.
+ if let Some(poly) = &inst.polymorphic_info {
+ let own_ctrl_tv = &original_to_own_typevar[&poly.ctrl_typevar];
+ let ctrl_index = formal_tvs.iter().position(|tv| tv == own_ctrl_tv).unwrap();
+ if ctrl_index != 0 {
+ formal_tvs.swap(0, ctrl_index);
+ actual_tvs.swap(0, ctrl_index);
+ }
+ }
+
+ // Unify each actual type variable with the corresponding formal type variable.
+ for (actual_tv, formal_tv) in actual_tvs.iter().zip(&formal_tvs) {
+ if let Err(msg) = unify(actual_tv, formal_tv, &mut type_env) {
+ return Err(format!(
+ "fail ti on {} <: {}: {}",
+ actual_tv.name, formal_tv.name, msg
+ ));
+ }
+ }
+
+ // Add any instruction specific constraints.
+ for constraint in &inst.constraints {
+ type_env.add_constraint(constraint.translate_with_map(&original_to_own_typevar));
+ }
+
+ Ok(type_env)
+}
+
+/// Perform type inference on an transformation. Return an updated type environment or error.
+pub(crate) fn infer_transform(
+ src: DefIndex,
+ dst: &[DefIndex],
+ def_pool: &DefPool,
+ var_pool: &mut VarPool,
+) -> Result<TypeEnvironment, String> {
+ let mut type_env = TypeEnvironment::new();
+ let mut last_type_index = 0;
+
+ // Execute type inference on the source pattern.
+ type_env = infer_definition(def_pool.get(src), var_pool, type_env, &mut last_type_index)
+ .map_err(|err| format!("In src pattern: {}", err))?;
+
+ // Collect the type sets once after applying the source patterm; we'll compare the typesets
+ // after we've also considered the destination pattern, and will emit supplementary InTypeset
+ // checks if they don't match.
+ let src_typesets = type_env
+ .vars
+ .iter()
+ .map(|&var_index| {
+ let var = var_pool.get_mut(var_index);
+ let tv = type_env.get_equivalent(&var.get_or_create_typevar());
+ (var_index, tv.get_typeset())
+ })
+ .collect::<Vec<_>>();
+
+ // Execute type inference on the destination pattern.
+ for (i, &def_index) in dst.iter().enumerate() {
+ let def = def_pool.get(def_index);
+ type_env = infer_definition(def, var_pool, type_env, &mut last_type_index)
+ .map_err(|err| format!("line {}: {}", i, err))?;
+ }
+
+ for (var_index, src_typeset) in src_typesets {
+ let var = var_pool.get(var_index);
+ if !var.has_free_typevar() {
+ continue;
+ }
+ let tv = type_env.get_equivalent(&var.get_typevar().unwrap());
+ let new_typeset = tv.get_typeset();
+ assert!(
+ new_typeset.is_subset(&src_typeset),
+ "type sets can only get narrower"
+ );
+ if new_typeset != src_typeset {
+ type_env.add_constraint(Constraint::InTypeset(tv.clone(), new_typeset.clone()));
+ }
+ }
+
+ type_env.normalize(var_pool);
+
+ Ok(type_env.extract(var_pool))
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/cdsl/types.rs b/third_party/rust/cranelift-codegen-meta/src/cdsl/types.rs
new file mode 100644
index 0000000000..7e03c873db
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/cdsl/types.rs
@@ -0,0 +1,587 @@
+//! Cranelift ValueType hierarchy
+
+use std::fmt;
+
+use crate::shared::types as shared_types;
+use cranelift_codegen_shared::constants;
+
+// Rust name prefix used for the `rust_name` method.
+static _RUST_NAME_PREFIX: &str = "ir::types::";
+
+// ValueType variants (i8, i32, ...) are provided in `shared::types.rs`.
+
+/// A concrete SSA value type.
+///
+/// All SSA values have a type that is described by an instance of `ValueType`
+/// or one of its subclasses.
+#[derive(Clone, Debug, PartialEq, Eq, Hash)]
+pub(crate) enum ValueType {
+ Lane(LaneType),
+ Reference(ReferenceType),
+ Special(SpecialType),
+ Vector(VectorType),
+}
+
+impl ValueType {
+ /// Iterate through all of the lane types.
+ pub fn all_lane_types() -> LaneTypeIterator {
+ LaneTypeIterator::new()
+ }
+
+ /// Iterate through all of the special types (neither lanes nor vectors).
+ pub fn all_special_types() -> SpecialTypeIterator {
+ SpecialTypeIterator::new()
+ }
+
+ pub fn all_reference_types() -> ReferenceTypeIterator {
+ ReferenceTypeIterator::new()
+ }
+
+ /// Return a string containing the documentation comment for this type.
+ pub fn doc(&self) -> String {
+ match *self {
+ ValueType::Lane(l) => l.doc(),
+ ValueType::Reference(r) => r.doc(),
+ ValueType::Special(s) => s.doc(),
+ ValueType::Vector(ref v) => v.doc(),
+ }
+ }
+
+ /// Return the number of bits in a lane.
+ pub fn lane_bits(&self) -> u64 {
+ match *self {
+ ValueType::Lane(l) => l.lane_bits(),
+ ValueType::Reference(r) => r.lane_bits(),
+ ValueType::Special(s) => s.lane_bits(),
+ ValueType::Vector(ref v) => v.lane_bits(),
+ }
+ }
+
+ /// Return the number of lanes.
+ pub fn lane_count(&self) -> u64 {
+ match *self {
+ ValueType::Vector(ref v) => v.lane_count(),
+ _ => 1,
+ }
+ }
+
+ /// Find the number of bytes that this type occupies in memory.
+ pub fn membytes(&self) -> u64 {
+ self.width() / 8
+ }
+
+ /// Find the unique number associated with this type.
+ pub fn number(&self) -> Option<u8> {
+ match *self {
+ ValueType::Lane(l) => Some(l.number()),
+ ValueType::Reference(r) => Some(r.number()),
+ ValueType::Special(s) => Some(s.number()),
+ ValueType::Vector(ref v) => Some(v.number()),
+ }
+ }
+
+ /// Return the name of this type for generated Rust source files.
+ pub fn rust_name(&self) -> String {
+ format!("{}{}", _RUST_NAME_PREFIX, self.to_string().to_uppercase())
+ }
+
+ /// Return true iff:
+ /// 1. self and other have equal number of lanes
+ /// 2. each lane in self has at least as many bits as a lane in other
+ pub fn _wider_or_equal(&self, rhs: &ValueType) -> bool {
+ (self.lane_count() == rhs.lane_count()) && (self.lane_bits() >= rhs.lane_bits())
+ }
+
+ /// Return the total number of bits of an instance of this type.
+ pub fn width(&self) -> u64 {
+ self.lane_count() * self.lane_bits()
+ }
+}
+
+impl fmt::Display for ValueType {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ match *self {
+ ValueType::Lane(l) => l.fmt(f),
+ ValueType::Reference(r) => r.fmt(f),
+ ValueType::Special(s) => s.fmt(f),
+ ValueType::Vector(ref v) => v.fmt(f),
+ }
+ }
+}
+
+/// Create a ValueType from a given lane type.
+impl From<LaneType> for ValueType {
+ fn from(lane: LaneType) -> Self {
+ ValueType::Lane(lane)
+ }
+}
+
+/// Create a ValueType from a given reference type.
+impl From<ReferenceType> for ValueType {
+ fn from(reference: ReferenceType) -> Self {
+ ValueType::Reference(reference)
+ }
+}
+
+/// Create a ValueType from a given special type.
+impl From<SpecialType> for ValueType {
+ fn from(spec: SpecialType) -> Self {
+ ValueType::Special(spec)
+ }
+}
+
+/// Create a ValueType from a given vector type.
+impl From<VectorType> for ValueType {
+ fn from(vector: VectorType) -> Self {
+ ValueType::Vector(vector)
+ }
+}
+
+/// A concrete scalar type that can appear as a vector lane too.
+#[derive(Clone, Copy, PartialEq, Eq, Hash)]
+pub(crate) enum LaneType {
+ Bool(shared_types::Bool),
+ Float(shared_types::Float),
+ Int(shared_types::Int),
+}
+
+impl LaneType {
+ /// Return a string containing the documentation comment for this lane type.
+ pub fn doc(self) -> String {
+ match self {
+ LaneType::Bool(_) => format!("A boolean type with {} bits.", self.lane_bits()),
+ LaneType::Float(shared_types::Float::F32) => String::from(
+ "A 32-bit floating point type represented in the IEEE 754-2008
+ *binary32* interchange format. This corresponds to the :c:type:`float`
+ type in most C implementations.",
+ ),
+ LaneType::Float(shared_types::Float::F64) => String::from(
+ "A 64-bit floating point type represented in the IEEE 754-2008
+ *binary64* interchange format. This corresponds to the :c:type:`double`
+ type in most C implementations.",
+ ),
+ LaneType::Int(_) if self.lane_bits() < 32 => format!(
+ "An integer type with {} bits.
+ WARNING: arithmetic on {}bit integers is incomplete",
+ self.lane_bits(),
+ self.lane_bits()
+ ),
+ LaneType::Int(_) => format!("An integer type with {} bits.", self.lane_bits()),
+ }
+ }
+
+ /// Return the number of bits in a lane.
+ pub fn lane_bits(self) -> u64 {
+ match self {
+ LaneType::Bool(ref b) => *b as u64,
+ LaneType::Float(ref f) => *f as u64,
+ LaneType::Int(ref i) => *i as u64,
+ }
+ }
+
+ /// Find the unique number associated with this lane type.
+ pub fn number(self) -> u8 {
+ constants::LANE_BASE
+ + match self {
+ LaneType::Bool(shared_types::Bool::B1) => 0,
+ LaneType::Bool(shared_types::Bool::B8) => 1,
+ LaneType::Bool(shared_types::Bool::B16) => 2,
+ LaneType::Bool(shared_types::Bool::B32) => 3,
+ LaneType::Bool(shared_types::Bool::B64) => 4,
+ LaneType::Bool(shared_types::Bool::B128) => 5,
+ LaneType::Int(shared_types::Int::I8) => 6,
+ LaneType::Int(shared_types::Int::I16) => 7,
+ LaneType::Int(shared_types::Int::I32) => 8,
+ LaneType::Int(shared_types::Int::I64) => 9,
+ LaneType::Int(shared_types::Int::I128) => 10,
+ LaneType::Float(shared_types::Float::F32) => 11,
+ LaneType::Float(shared_types::Float::F64) => 12,
+ }
+ }
+
+ pub fn bool_from_bits(num_bits: u16) -> LaneType {
+ LaneType::Bool(match num_bits {
+ 1 => shared_types::Bool::B1,
+ 8 => shared_types::Bool::B8,
+ 16 => shared_types::Bool::B16,
+ 32 => shared_types::Bool::B32,
+ 64 => shared_types::Bool::B64,
+ 128 => shared_types::Bool::B128,
+ _ => unreachable!("unxpected num bits for bool"),
+ })
+ }
+
+ pub fn int_from_bits(num_bits: u16) -> LaneType {
+ LaneType::Int(match num_bits {
+ 8 => shared_types::Int::I8,
+ 16 => shared_types::Int::I16,
+ 32 => shared_types::Int::I32,
+ 64 => shared_types::Int::I64,
+ 128 => shared_types::Int::I128,
+ _ => unreachable!("unxpected num bits for int"),
+ })
+ }
+
+ pub fn float_from_bits(num_bits: u16) -> LaneType {
+ LaneType::Float(match num_bits {
+ 32 => shared_types::Float::F32,
+ 64 => shared_types::Float::F64,
+ _ => unreachable!("unxpected num bits for float"),
+ })
+ }
+
+ pub fn by(self, lanes: u16) -> ValueType {
+ if lanes == 1 {
+ self.into()
+ } else {
+ ValueType::Vector(VectorType::new(self, lanes.into()))
+ }
+ }
+
+ pub fn is_float(self) -> bool {
+ match self {
+ LaneType::Float(_) => true,
+ _ => false,
+ }
+ }
+
+ pub fn is_int(self) -> bool {
+ match self {
+ LaneType::Int(_) => true,
+ _ => false,
+ }
+ }
+}
+
+impl fmt::Display for LaneType {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ match *self {
+ LaneType::Bool(_) => write!(f, "b{}", self.lane_bits()),
+ LaneType::Float(_) => write!(f, "f{}", self.lane_bits()),
+ LaneType::Int(_) => write!(f, "i{}", self.lane_bits()),
+ }
+ }
+}
+
+impl fmt::Debug for LaneType {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ let inner_msg = format!("bits={}", self.lane_bits());
+ write!(
+ f,
+ "{}",
+ match *self {
+ LaneType::Bool(_) => format!("BoolType({})", inner_msg),
+ LaneType::Float(_) => format!("FloatType({})", inner_msg),
+ LaneType::Int(_) => format!("IntType({})", inner_msg),
+ }
+ )
+ }
+}
+
+/// Create a LaneType from a given bool variant.
+impl From<shared_types::Bool> for LaneType {
+ fn from(b: shared_types::Bool) -> Self {
+ LaneType::Bool(b)
+ }
+}
+
+/// Create a LaneType from a given float variant.
+impl From<shared_types::Float> for LaneType {
+ fn from(f: shared_types::Float) -> Self {
+ LaneType::Float(f)
+ }
+}
+
+/// Create a LaneType from a given int variant.
+impl From<shared_types::Int> for LaneType {
+ fn from(i: shared_types::Int) -> Self {
+ LaneType::Int(i)
+ }
+}
+
+/// An iterator for different lane types.
+pub(crate) struct LaneTypeIterator {
+ bool_iter: shared_types::BoolIterator,
+ int_iter: shared_types::IntIterator,
+ float_iter: shared_types::FloatIterator,
+}
+
+impl LaneTypeIterator {
+ /// Create a new lane type iterator.
+ fn new() -> Self {
+ Self {
+ bool_iter: shared_types::BoolIterator::new(),
+ int_iter: shared_types::IntIterator::new(),
+ float_iter: shared_types::FloatIterator::new(),
+ }
+ }
+}
+
+impl Iterator for LaneTypeIterator {
+ type Item = LaneType;
+ fn next(&mut self) -> Option<Self::Item> {
+ if let Some(b) = self.bool_iter.next() {
+ Some(LaneType::from(b))
+ } else if let Some(i) = self.int_iter.next() {
+ Some(LaneType::from(i))
+ } else if let Some(f) = self.float_iter.next() {
+ Some(LaneType::from(f))
+ } else {
+ None
+ }
+ }
+}
+
+/// A concrete SIMD vector type.
+///
+/// A vector type has a lane type which is an instance of `LaneType`,
+/// and a positive number of lanes.
+#[derive(Clone, PartialEq, Eq, Hash)]
+pub(crate) struct VectorType {
+ base: LaneType,
+ lanes: u64,
+}
+
+impl VectorType {
+ /// Initialize a new integer type with `n` bits.
+ pub fn new(base: LaneType, lanes: u64) -> Self {
+ Self { base, lanes }
+ }
+
+ /// Return a string containing the documentation comment for this vector type.
+ pub fn doc(&self) -> String {
+ format!(
+ "A SIMD vector with {} lanes containing a `{}` each.",
+ self.lane_count(),
+ self.base
+ )
+ }
+
+ /// Return the number of bits in a lane.
+ pub fn lane_bits(&self) -> u64 {
+ self.base.lane_bits()
+ }
+
+ /// Return the number of lanes.
+ pub fn lane_count(&self) -> u64 {
+ self.lanes
+ }
+
+ /// Return the lane type.
+ pub fn lane_type(&self) -> LaneType {
+ self.base
+ }
+
+ /// Find the unique number associated with this vector type.
+ ///
+ /// Vector types are encoded with the lane type in the low 4 bits and
+ /// log2(lanes) in the high 4 bits, giving a range of 2-256 lanes.
+ pub fn number(&self) -> u8 {
+ let lanes_log_2: u32 = 63 - self.lane_count().leading_zeros();
+ let base_num = u32::from(self.base.number());
+ let num = (lanes_log_2 << 4) + base_num;
+ num as u8
+ }
+}
+
+impl fmt::Display for VectorType {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ write!(f, "{}x{}", self.base, self.lane_count())
+ }
+}
+
+impl fmt::Debug for VectorType {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ write!(
+ f,
+ "VectorType(base={}, lanes={})",
+ self.base,
+ self.lane_count()
+ )
+ }
+}
+
+/// A concrete scalar type that is neither a vector nor a lane type.
+///
+/// Special types cannot be used to form vectors.
+#[derive(Clone, Copy, PartialEq, Eq, Hash)]
+pub(crate) enum SpecialType {
+ Flag(shared_types::Flag),
+ // FIXME remove once the old style backends are removed.
+ StructArgument,
+}
+
+impl SpecialType {
+ /// Return a string containing the documentation comment for this special type.
+ pub fn doc(self) -> String {
+ match self {
+ SpecialType::Flag(shared_types::Flag::IFlags) => String::from(
+ "CPU flags representing the result of an integer comparison. These flags
+ can be tested with an :type:`intcc` condition code.",
+ ),
+ SpecialType::Flag(shared_types::Flag::FFlags) => String::from(
+ "CPU flags representing the result of a floating point comparison. These
+ flags can be tested with a :type:`floatcc` condition code.",
+ ),
+ SpecialType::StructArgument => {
+ String::from("After legalization sarg_t arguments will get this type.")
+ }
+ }
+ }
+
+ /// Return the number of bits in a lane.
+ pub fn lane_bits(self) -> u64 {
+ match self {
+ SpecialType::Flag(_) => 0,
+ SpecialType::StructArgument => 0,
+ }
+ }
+
+ /// Find the unique number associated with this special type.
+ pub fn number(self) -> u8 {
+ match self {
+ SpecialType::Flag(shared_types::Flag::IFlags) => 1,
+ SpecialType::Flag(shared_types::Flag::FFlags) => 2,
+ SpecialType::StructArgument => 3,
+ }
+ }
+}
+
+impl fmt::Display for SpecialType {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ match *self {
+ SpecialType::Flag(shared_types::Flag::IFlags) => write!(f, "iflags"),
+ SpecialType::Flag(shared_types::Flag::FFlags) => write!(f, "fflags"),
+ SpecialType::StructArgument => write!(f, "sarg_t"),
+ }
+ }
+}
+
+impl fmt::Debug for SpecialType {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ write!(
+ f,
+ "{}",
+ match *self {
+ SpecialType::Flag(_) => format!("FlagsType({})", self),
+ SpecialType::StructArgument => format!("StructArgument"),
+ }
+ )
+ }
+}
+
+impl From<shared_types::Flag> for SpecialType {
+ fn from(f: shared_types::Flag) -> Self {
+ SpecialType::Flag(f)
+ }
+}
+
+pub(crate) struct SpecialTypeIterator {
+ flag_iter: shared_types::FlagIterator,
+ done: bool,
+}
+
+impl SpecialTypeIterator {
+ fn new() -> Self {
+ Self {
+ flag_iter: shared_types::FlagIterator::new(),
+ done: false,
+ }
+ }
+}
+
+impl Iterator for SpecialTypeIterator {
+ type Item = SpecialType;
+ fn next(&mut self) -> Option<Self::Item> {
+ if let Some(f) = self.flag_iter.next() {
+ Some(SpecialType::from(f))
+ } else {
+ if !self.done {
+ self.done = true;
+ Some(SpecialType::StructArgument)
+ } else {
+ None
+ }
+ }
+ }
+}
+
+/// Reference type is scalar type, but not lane type.
+#[derive(Clone, Copy, PartialEq, Eq, Hash)]
+pub(crate) struct ReferenceType(pub shared_types::Reference);
+
+impl ReferenceType {
+ /// Return a string containing the documentation comment for this reference type.
+ pub fn doc(self) -> String {
+ format!("An opaque reference type with {} bits.", self.lane_bits())
+ }
+
+ /// Return the number of bits in a lane.
+ pub fn lane_bits(self) -> u64 {
+ match self.0 {
+ shared_types::Reference::R32 => 32,
+ shared_types::Reference::R64 => 64,
+ }
+ }
+
+ /// Find the unique number associated with this reference type.
+ pub fn number(self) -> u8 {
+ constants::REFERENCE_BASE
+ + match self {
+ ReferenceType(shared_types::Reference::R32) => 0,
+ ReferenceType(shared_types::Reference::R64) => 1,
+ }
+ }
+
+ pub fn ref_from_bits(num_bits: u16) -> ReferenceType {
+ ReferenceType(match num_bits {
+ 32 => shared_types::Reference::R32,
+ 64 => shared_types::Reference::R64,
+ _ => unreachable!("unexpected number of bits for a reference type"),
+ })
+ }
+}
+
+impl fmt::Display for ReferenceType {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ write!(f, "r{}", self.lane_bits())
+ }
+}
+
+impl fmt::Debug for ReferenceType {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ write!(f, "ReferenceType(bits={})", self.lane_bits())
+ }
+}
+
+/// Create a ReferenceType from a given reference variant.
+impl From<shared_types::Reference> for ReferenceType {
+ fn from(r: shared_types::Reference) -> Self {
+ ReferenceType(r)
+ }
+}
+
+/// An iterator for different reference types.
+pub(crate) struct ReferenceTypeIterator {
+ reference_iter: shared_types::ReferenceIterator,
+}
+
+impl ReferenceTypeIterator {
+ /// Create a new reference type iterator.
+ fn new() -> Self {
+ Self {
+ reference_iter: shared_types::ReferenceIterator::new(),
+ }
+ }
+}
+
+impl Iterator for ReferenceTypeIterator {
+ type Item = ReferenceType;
+ fn next(&mut self) -> Option<Self::Item> {
+ if let Some(r) = self.reference_iter.next() {
+ Some(ReferenceType::from(r))
+ } else {
+ None
+ }
+ }
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/cdsl/typevar.rs b/third_party/rust/cranelift-codegen-meta/src/cdsl/typevar.rs
new file mode 100644
index 0000000000..c1027bf847
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/cdsl/typevar.rs
@@ -0,0 +1,1274 @@
+use std::cell::RefCell;
+use std::collections::{BTreeSet, HashSet};
+use std::fmt;
+use std::hash;
+use std::iter::FromIterator;
+use std::ops;
+use std::rc::Rc;
+
+use crate::cdsl::types::{LaneType, ReferenceType, SpecialType, ValueType};
+
+const MAX_LANES: u16 = 256;
+const MAX_BITS: u16 = 128;
+const MAX_FLOAT_BITS: u16 = 64;
+
+/// Type variables can be used in place of concrete types when defining
+/// instructions. This makes the instructions *polymorphic*.
+///
+/// A type variable is restricted to vary over a subset of the value types.
+/// This subset is specified by a set of flags that control the permitted base
+/// types and whether the type variable can assume scalar or vector types, or
+/// both.
+#[derive(Debug)]
+pub(crate) struct TypeVarContent {
+ /// Short name of type variable used in instruction descriptions.
+ pub name: String,
+
+ /// Documentation string.
+ pub doc: String,
+
+ /// Type set associated to the type variable.
+ /// This field must remain private; use `get_typeset()` or `get_raw_typeset()` to get the
+ /// information you want.
+ type_set: TypeSet,
+
+ pub base: Option<TypeVarParent>,
+}
+
+#[derive(Clone, Debug)]
+pub(crate) struct TypeVar {
+ content: Rc<RefCell<TypeVarContent>>,
+}
+
+impl TypeVar {
+ pub fn new(name: impl Into<String>, doc: impl Into<String>, type_set: TypeSet) -> Self {
+ Self {
+ content: Rc::new(RefCell::new(TypeVarContent {
+ name: name.into(),
+ doc: doc.into(),
+ type_set,
+ base: None,
+ })),
+ }
+ }
+
+ pub fn new_singleton(value_type: ValueType) -> Self {
+ let (name, doc) = (value_type.to_string(), value_type.doc());
+ let mut builder = TypeSetBuilder::new();
+
+ let (scalar_type, num_lanes) = match value_type {
+ ValueType::Special(special_type) => {
+ return TypeVar::new(name, doc, builder.specials(vec![special_type]).build());
+ }
+ ValueType::Reference(ReferenceType(reference_type)) => {
+ let bits = reference_type as RangeBound;
+ return TypeVar::new(name, doc, builder.refs(bits..bits).build());
+ }
+ ValueType::Lane(lane_type) => (lane_type, 1),
+ ValueType::Vector(vec_type) => {
+ (vec_type.lane_type(), vec_type.lane_count() as RangeBound)
+ }
+ };
+
+ builder = builder.simd_lanes(num_lanes..num_lanes);
+
+ let builder = match scalar_type {
+ LaneType::Int(int_type) => {
+ let bits = int_type as RangeBound;
+ builder.ints(bits..bits)
+ }
+ LaneType::Float(float_type) => {
+ let bits = float_type as RangeBound;
+ builder.floats(bits..bits)
+ }
+ LaneType::Bool(bool_type) => {
+ let bits = bool_type as RangeBound;
+ builder.bools(bits..bits)
+ }
+ };
+ TypeVar::new(name, doc, builder.build())
+ }
+
+ /// Get a fresh copy of self, named after `name`. Can only be called on non-derived typevars.
+ pub fn copy_from(other: &TypeVar, name: String) -> TypeVar {
+ assert!(
+ other.base.is_none(),
+ "copy_from() can only be called on non-derived type variables"
+ );
+ TypeVar {
+ content: Rc::new(RefCell::new(TypeVarContent {
+ name,
+ doc: "".into(),
+ type_set: other.type_set.clone(),
+ base: None,
+ })),
+ }
+ }
+
+ /// Returns the typeset for this TV. If the TV is derived, computes it recursively from the
+ /// derived function and the base's typeset.
+ /// Note this can't be done non-lazily in the constructor, because the TypeSet of the base may
+ /// change over time.
+ pub fn get_typeset(&self) -> TypeSet {
+ match &self.base {
+ Some(base) => base.type_var.get_typeset().image(base.derived_func),
+ None => self.type_set.clone(),
+ }
+ }
+
+ /// Returns this typevar's type set, assuming this type var has no parent.
+ pub fn get_raw_typeset(&self) -> &TypeSet {
+ assert_eq!(self.type_set, self.get_typeset());
+ &self.type_set
+ }
+
+ /// If the associated typeset has a single type return it. Otherwise return None.
+ pub fn singleton_type(&self) -> Option<ValueType> {
+ let type_set = self.get_typeset();
+ if type_set.size() == 1 {
+ Some(type_set.get_singleton())
+ } else {
+ None
+ }
+ }
+
+ /// Get the free type variable controlling this one.
+ pub fn free_typevar(&self) -> Option<TypeVar> {
+ match &self.base {
+ Some(base) => base.type_var.free_typevar(),
+ None => {
+ match self.singleton_type() {
+ // A singleton type isn't a proper free variable.
+ Some(_) => None,
+ None => Some(self.clone()),
+ }
+ }
+ }
+ }
+
+ /// Create a type variable that is a function of another.
+ pub fn derived(&self, derived_func: DerivedFunc) -> TypeVar {
+ let ts = self.get_typeset();
+
+ // Safety checks to avoid over/underflows.
+ assert!(ts.specials.is_empty(), "can't derive from special types");
+ match derived_func {
+ DerivedFunc::HalfWidth => {
+ assert!(
+ ts.ints.is_empty() || *ts.ints.iter().min().unwrap() > 8,
+ "can't halve all integer types"
+ );
+ assert!(
+ ts.floats.is_empty() || *ts.floats.iter().min().unwrap() > 32,
+ "can't halve all float types"
+ );
+ assert!(
+ ts.bools.is_empty() || *ts.bools.iter().min().unwrap() > 8,
+ "can't halve all boolean types"
+ );
+ }
+ DerivedFunc::DoubleWidth => {
+ assert!(
+ ts.ints.is_empty() || *ts.ints.iter().max().unwrap() < MAX_BITS,
+ "can't double all integer types"
+ );
+ assert!(
+ ts.floats.is_empty() || *ts.floats.iter().max().unwrap() < MAX_FLOAT_BITS,
+ "can't double all float types"
+ );
+ assert!(
+ ts.bools.is_empty() || *ts.bools.iter().max().unwrap() < MAX_BITS,
+ "can't double all boolean types"
+ );
+ }
+ DerivedFunc::HalfVector => {
+ assert!(
+ *ts.lanes.iter().min().unwrap() > 1,
+ "can't halve a scalar type"
+ );
+ }
+ DerivedFunc::DoubleVector => {
+ assert!(
+ *ts.lanes.iter().max().unwrap() < MAX_LANES,
+ "can't double 256 lanes"
+ );
+ }
+ DerivedFunc::SplitLanes => {
+ assert!(
+ ts.ints.is_empty() || *ts.ints.iter().min().unwrap() > 8,
+ "can't halve all integer types"
+ );
+ assert!(
+ ts.floats.is_empty() || *ts.floats.iter().min().unwrap() > 32,
+ "can't halve all float types"
+ );
+ assert!(
+ ts.bools.is_empty() || *ts.bools.iter().min().unwrap() > 8,
+ "can't halve all boolean types"
+ );
+ assert!(
+ *ts.lanes.iter().max().unwrap() < MAX_LANES,
+ "can't double 256 lanes"
+ );
+ }
+ DerivedFunc::MergeLanes => {
+ assert!(
+ ts.ints.is_empty() || *ts.ints.iter().max().unwrap() < MAX_BITS,
+ "can't double all integer types"
+ );
+ assert!(
+ ts.floats.is_empty() || *ts.floats.iter().max().unwrap() < MAX_FLOAT_BITS,
+ "can't double all float types"
+ );
+ assert!(
+ ts.bools.is_empty() || *ts.bools.iter().max().unwrap() < MAX_BITS,
+ "can't double all boolean types"
+ );
+ assert!(
+ *ts.lanes.iter().min().unwrap() > 1,
+ "can't halve a scalar type"
+ );
+ }
+ DerivedFunc::LaneOf | DerivedFunc::AsBool => { /* no particular assertions */ }
+ }
+
+ TypeVar {
+ content: Rc::new(RefCell::new(TypeVarContent {
+ name: format!("{}({})", derived_func.name(), self.name),
+ doc: "".into(),
+ type_set: ts,
+ base: Some(TypeVarParent {
+ type_var: self.clone(),
+ derived_func,
+ }),
+ })),
+ }
+ }
+
+ pub fn lane_of(&self) -> TypeVar {
+ self.derived(DerivedFunc::LaneOf)
+ }
+ pub fn as_bool(&self) -> TypeVar {
+ self.derived(DerivedFunc::AsBool)
+ }
+ pub fn half_width(&self) -> TypeVar {
+ self.derived(DerivedFunc::HalfWidth)
+ }
+ pub fn double_width(&self) -> TypeVar {
+ self.derived(DerivedFunc::DoubleWidth)
+ }
+ pub fn half_vector(&self) -> TypeVar {
+ self.derived(DerivedFunc::HalfVector)
+ }
+ pub fn double_vector(&self) -> TypeVar {
+ self.derived(DerivedFunc::DoubleVector)
+ }
+ pub fn split_lanes(&self) -> TypeVar {
+ self.derived(DerivedFunc::SplitLanes)
+ }
+ pub fn merge_lanes(&self) -> TypeVar {
+ self.derived(DerivedFunc::MergeLanes)
+ }
+
+ /// Constrain the range of types this variable can assume to a subset of those in the typeset
+ /// ts.
+ /// May mutate itself if it's not derived, or its parent if it is.
+ pub fn constrain_types_by_ts(&self, type_set: TypeSet) {
+ match &self.base {
+ Some(base) => {
+ base.type_var
+ .constrain_types_by_ts(type_set.preimage(base.derived_func));
+ }
+ None => {
+ self.content
+ .borrow_mut()
+ .type_set
+ .inplace_intersect_with(&type_set);
+ }
+ }
+ }
+
+ /// Constrain the range of types this variable can assume to a subset of those `other` can
+ /// assume.
+ /// May mutate itself if it's not derived, or its parent if it is.
+ pub fn constrain_types(&self, other: TypeVar) {
+ if self == &other {
+ return;
+ }
+ self.constrain_types_by_ts(other.get_typeset());
+ }
+
+ /// Get a Rust expression that computes the type of this type variable.
+ pub fn to_rust_code(&self) -> String {
+ match &self.base {
+ Some(base) => format!(
+ "{}.{}().unwrap()",
+ base.type_var.to_rust_code(),
+ base.derived_func.name()
+ ),
+ None => {
+ if let Some(singleton) = self.singleton_type() {
+ singleton.rust_name()
+ } else {
+ self.name.clone()
+ }
+ }
+ }
+ }
+}
+
+impl Into<TypeVar> for &TypeVar {
+ fn into(self) -> TypeVar {
+ self.clone()
+ }
+}
+impl Into<TypeVar> for ValueType {
+ fn into(self) -> TypeVar {
+ TypeVar::new_singleton(self)
+ }
+}
+
+// Hash TypeVars by pointers.
+// There might be a better way to do this, but since TypeVar's content (namely TypeSet) can be
+// mutated, it makes sense to use pointer equality/hashing here.
+impl hash::Hash for TypeVar {
+ fn hash<H: hash::Hasher>(&self, h: &mut H) {
+ match &self.base {
+ Some(base) => {
+ base.type_var.hash(h);
+ base.derived_func.hash(h);
+ }
+ None => {
+ (&**self as *const TypeVarContent).hash(h);
+ }
+ }
+ }
+}
+
+impl PartialEq for TypeVar {
+ fn eq(&self, other: &TypeVar) -> bool {
+ match (&self.base, &other.base) {
+ (Some(base1), Some(base2)) => {
+ base1.type_var.eq(&base2.type_var) && base1.derived_func == base2.derived_func
+ }
+ (None, None) => Rc::ptr_eq(&self.content, &other.content),
+ _ => false,
+ }
+ }
+}
+
+// Allow TypeVar as map keys, based on pointer equality (see also above PartialEq impl).
+impl Eq for TypeVar {}
+
+impl ops::Deref for TypeVar {
+ type Target = TypeVarContent;
+ fn deref(&self) -> &Self::Target {
+ unsafe { self.content.as_ptr().as_ref().unwrap() }
+ }
+}
+
+#[derive(Clone, Copy, Debug, Hash, PartialEq)]
+pub(crate) enum DerivedFunc {
+ LaneOf,
+ AsBool,
+ HalfWidth,
+ DoubleWidth,
+ HalfVector,
+ DoubleVector,
+ SplitLanes,
+ MergeLanes,
+}
+
+impl DerivedFunc {
+ pub fn name(self) -> &'static str {
+ match self {
+ DerivedFunc::LaneOf => "lane_of",
+ DerivedFunc::AsBool => "as_bool",
+ DerivedFunc::HalfWidth => "half_width",
+ DerivedFunc::DoubleWidth => "double_width",
+ DerivedFunc::HalfVector => "half_vector",
+ DerivedFunc::DoubleVector => "double_vector",
+ DerivedFunc::SplitLanes => "split_lanes",
+ DerivedFunc::MergeLanes => "merge_lanes",
+ }
+ }
+
+ /// Returns the inverse function of this one, if it is a bijection.
+ pub fn inverse(self) -> Option<DerivedFunc> {
+ match self {
+ DerivedFunc::HalfWidth => Some(DerivedFunc::DoubleWidth),
+ DerivedFunc::DoubleWidth => Some(DerivedFunc::HalfWidth),
+ DerivedFunc::HalfVector => Some(DerivedFunc::DoubleVector),
+ DerivedFunc::DoubleVector => Some(DerivedFunc::HalfVector),
+ DerivedFunc::MergeLanes => Some(DerivedFunc::SplitLanes),
+ DerivedFunc::SplitLanes => Some(DerivedFunc::MergeLanes),
+ _ => None,
+ }
+ }
+}
+
+#[derive(Debug, Hash)]
+pub(crate) struct TypeVarParent {
+ pub type_var: TypeVar,
+ pub derived_func: DerivedFunc,
+}
+
+/// A set of types.
+///
+/// We don't allow arbitrary subsets of types, but use a parametrized approach
+/// instead.
+///
+/// Objects of this class can be used as dictionary keys.
+///
+/// Parametrized type sets are specified in terms of ranges:
+/// - The permitted range of vector lanes, where 1 indicates a scalar type.
+/// - The permitted range of integer types.
+/// - The permitted range of floating point types, and
+/// - The permitted range of boolean types.
+///
+/// The ranges are inclusive from smallest bit-width to largest bit-width.
+///
+/// Finally, a type set can contain special types (derived from `SpecialType`)
+/// which can't appear as lane types.
+
+type RangeBound = u16;
+type Range = ops::Range<RangeBound>;
+type NumSet = BTreeSet<RangeBound>;
+
+macro_rules! num_set {
+ ($($expr:expr),*) => {
+ NumSet::from_iter(vec![$($expr),*])
+ };
+}
+
+#[derive(Clone, PartialEq, Eq, Hash)]
+pub(crate) struct TypeSet {
+ pub lanes: NumSet,
+ pub ints: NumSet,
+ pub floats: NumSet,
+ pub bools: NumSet,
+ pub refs: NumSet,
+ pub specials: Vec<SpecialType>,
+}
+
+impl TypeSet {
+ fn new(
+ lanes: NumSet,
+ ints: NumSet,
+ floats: NumSet,
+ bools: NumSet,
+ refs: NumSet,
+ specials: Vec<SpecialType>,
+ ) -> Self {
+ Self {
+ lanes,
+ ints,
+ floats,
+ bools,
+ refs,
+ specials,
+ }
+ }
+
+ /// Return the number of concrete types represented by this typeset.
+ pub fn size(&self) -> usize {
+ self.lanes.len()
+ * (self.ints.len() + self.floats.len() + self.bools.len() + self.refs.len())
+ + self.specials.len()
+ }
+
+ /// Return the image of self across the derived function func.
+ fn image(&self, derived_func: DerivedFunc) -> TypeSet {
+ match derived_func {
+ DerivedFunc::LaneOf => self.lane_of(),
+ DerivedFunc::AsBool => self.as_bool(),
+ DerivedFunc::HalfWidth => self.half_width(),
+ DerivedFunc::DoubleWidth => self.double_width(),
+ DerivedFunc::HalfVector => self.half_vector(),
+ DerivedFunc::DoubleVector => self.double_vector(),
+ DerivedFunc::SplitLanes => self.half_width().double_vector(),
+ DerivedFunc::MergeLanes => self.double_width().half_vector(),
+ }
+ }
+
+ /// Return a TypeSet describing the image of self across lane_of.
+ fn lane_of(&self) -> TypeSet {
+ let mut copy = self.clone();
+ copy.lanes = num_set![1];
+ copy
+ }
+
+ /// Return a TypeSet describing the image of self across as_bool.
+ fn as_bool(&self) -> TypeSet {
+ let mut copy = self.clone();
+ copy.ints = NumSet::new();
+ copy.floats = NumSet::new();
+ copy.refs = NumSet::new();
+ if !(&self.lanes - &num_set![1]).is_empty() {
+ copy.bools = &self.ints | &self.floats;
+ copy.bools = &copy.bools | &self.bools;
+ }
+ if self.lanes.contains(&1) {
+ copy.bools.insert(1);
+ }
+ copy
+ }
+
+ /// Return a TypeSet describing the image of self across halfwidth.
+ fn half_width(&self) -> TypeSet {
+ let mut copy = self.clone();
+ copy.ints = NumSet::from_iter(self.ints.iter().filter(|&&x| x > 8).map(|&x| x / 2));
+ copy.floats = NumSet::from_iter(self.floats.iter().filter(|&&x| x > 32).map(|&x| x / 2));
+ copy.bools = NumSet::from_iter(self.bools.iter().filter(|&&x| x > 8).map(|&x| x / 2));
+ copy.specials = Vec::new();
+ copy
+ }
+
+ /// Return a TypeSet describing the image of self across doublewidth.
+ fn double_width(&self) -> TypeSet {
+ let mut copy = self.clone();
+ copy.ints = NumSet::from_iter(self.ints.iter().filter(|&&x| x < MAX_BITS).map(|&x| x * 2));
+ copy.floats = NumSet::from_iter(
+ self.floats
+ .iter()
+ .filter(|&&x| x < MAX_FLOAT_BITS)
+ .map(|&x| x * 2),
+ );
+ copy.bools = NumSet::from_iter(
+ self.bools
+ .iter()
+ .filter(|&&x| x < MAX_BITS)
+ .map(|&x| x * 2)
+ .filter(|x| legal_bool(*x)),
+ );
+ copy.specials = Vec::new();
+ copy
+ }
+
+ /// Return a TypeSet describing the image of self across halfvector.
+ fn half_vector(&self) -> TypeSet {
+ let mut copy = self.clone();
+ copy.lanes = NumSet::from_iter(self.lanes.iter().filter(|&&x| x > 1).map(|&x| x / 2));
+ copy.specials = Vec::new();
+ copy
+ }
+
+ /// Return a TypeSet describing the image of self across doublevector.
+ fn double_vector(&self) -> TypeSet {
+ let mut copy = self.clone();
+ copy.lanes = NumSet::from_iter(
+ self.lanes
+ .iter()
+ .filter(|&&x| x < MAX_LANES)
+ .map(|&x| x * 2),
+ );
+ copy.specials = Vec::new();
+ copy
+ }
+
+ fn concrete_types(&self) -> Vec<ValueType> {
+ let mut ret = Vec::new();
+ for &num_lanes in &self.lanes {
+ for &bits in &self.ints {
+ ret.push(LaneType::int_from_bits(bits).by(num_lanes));
+ }
+ for &bits in &self.floats {
+ ret.push(LaneType::float_from_bits(bits).by(num_lanes));
+ }
+ for &bits in &self.bools {
+ ret.push(LaneType::bool_from_bits(bits).by(num_lanes));
+ }
+ for &bits in &self.refs {
+ ret.push(ReferenceType::ref_from_bits(bits).into());
+ }
+ }
+ for &special in &self.specials {
+ ret.push(special.into());
+ }
+ ret
+ }
+
+ /// Return the singleton type represented by self. Can only call on typesets containing 1 type.
+ fn get_singleton(&self) -> ValueType {
+ let mut types = self.concrete_types();
+ assert_eq!(types.len(), 1);
+ types.remove(0)
+ }
+
+ /// Return the inverse image of self across the derived function func.
+ fn preimage(&self, func: DerivedFunc) -> TypeSet {
+ if self.size() == 0 {
+ // The inverse of the empty set is itself.
+ return self.clone();
+ }
+
+ match func {
+ DerivedFunc::LaneOf => {
+ let mut copy = self.clone();
+ copy.lanes =
+ NumSet::from_iter((0..=MAX_LANES.trailing_zeros()).map(|i| u16::pow(2, i)));
+ copy
+ }
+ DerivedFunc::AsBool => {
+ let mut copy = self.clone();
+ if self.bools.contains(&1) {
+ copy.ints = NumSet::from_iter(vec![8, 16, 32, 64, 128]);
+ copy.floats = NumSet::from_iter(vec![32, 64]);
+ } else {
+ copy.ints = &self.bools - &NumSet::from_iter(vec![1]);
+ copy.floats = &self.bools & &NumSet::from_iter(vec![32, 64]);
+ // If b1 is not in our typeset, than lanes=1 cannot be in the pre-image, as
+ // as_bool() of scalars is always b1.
+ copy.lanes = &self.lanes - &NumSet::from_iter(vec![1]);
+ }
+ copy
+ }
+ DerivedFunc::HalfWidth => self.double_width(),
+ DerivedFunc::DoubleWidth => self.half_width(),
+ DerivedFunc::HalfVector => self.double_vector(),
+ DerivedFunc::DoubleVector => self.half_vector(),
+ DerivedFunc::SplitLanes => self.double_width().half_vector(),
+ DerivedFunc::MergeLanes => self.half_width().double_vector(),
+ }
+ }
+
+ pub fn inplace_intersect_with(&mut self, other: &TypeSet) {
+ self.lanes = &self.lanes & &other.lanes;
+ self.ints = &self.ints & &other.ints;
+ self.floats = &self.floats & &other.floats;
+ self.bools = &self.bools & &other.bools;
+ self.refs = &self.refs & &other.refs;
+
+ let mut new_specials = Vec::new();
+ for spec in &self.specials {
+ if let Some(spec) = other.specials.iter().find(|&other_spec| other_spec == spec) {
+ new_specials.push(*spec);
+ }
+ }
+ self.specials = new_specials;
+ }
+
+ pub fn is_subset(&self, other: &TypeSet) -> bool {
+ self.lanes.is_subset(&other.lanes)
+ && self.ints.is_subset(&other.ints)
+ && self.floats.is_subset(&other.floats)
+ && self.bools.is_subset(&other.bools)
+ && self.refs.is_subset(&other.refs)
+ && {
+ let specials: HashSet<SpecialType> = HashSet::from_iter(self.specials.clone());
+ let other_specials = HashSet::from_iter(other.specials.clone());
+ specials.is_subset(&other_specials)
+ }
+ }
+
+ pub fn is_wider_or_equal(&self, other: &TypeSet) -> bool {
+ set_wider_or_equal(&self.ints, &other.ints)
+ && set_wider_or_equal(&self.floats, &other.floats)
+ && set_wider_or_equal(&self.bools, &other.bools)
+ && set_wider_or_equal(&self.refs, &other.refs)
+ }
+
+ pub fn is_narrower(&self, other: &TypeSet) -> bool {
+ set_narrower(&self.ints, &other.ints)
+ && set_narrower(&self.floats, &other.floats)
+ && set_narrower(&self.bools, &other.bools)
+ && set_narrower(&self.refs, &other.refs)
+ }
+}
+
+fn set_wider_or_equal(s1: &NumSet, s2: &NumSet) -> bool {
+ !s1.is_empty() && !s2.is_empty() && s1.iter().min() >= s2.iter().max()
+}
+
+fn set_narrower(s1: &NumSet, s2: &NumSet) -> bool {
+ !s1.is_empty() && !s2.is_empty() && s1.iter().min() < s2.iter().max()
+}
+
+impl fmt::Debug for TypeSet {
+ fn fmt(&self, fmt: &mut fmt::Formatter) -> Result<(), fmt::Error> {
+ write!(fmt, "TypeSet(")?;
+
+ let mut subsets = Vec::new();
+ if !self.lanes.is_empty() {
+ subsets.push(format!(
+ "lanes={{{}}}",
+ Vec::from_iter(self.lanes.iter().map(|x| x.to_string())).join(", ")
+ ));
+ }
+ if !self.ints.is_empty() {
+ subsets.push(format!(
+ "ints={{{}}}",
+ Vec::from_iter(self.ints.iter().map(|x| x.to_string())).join(", ")
+ ));
+ }
+ if !self.floats.is_empty() {
+ subsets.push(format!(
+ "floats={{{}}}",
+ Vec::from_iter(self.floats.iter().map(|x| x.to_string())).join(", ")
+ ));
+ }
+ if !self.bools.is_empty() {
+ subsets.push(format!(
+ "bools={{{}}}",
+ Vec::from_iter(self.bools.iter().map(|x| x.to_string())).join(", ")
+ ));
+ }
+ if !self.refs.is_empty() {
+ subsets.push(format!(
+ "refs={{{}}}",
+ Vec::from_iter(self.refs.iter().map(|x| x.to_string())).join(", ")
+ ));
+ }
+ if !self.specials.is_empty() {
+ subsets.push(format!(
+ "specials={{{}}}",
+ Vec::from_iter(self.specials.iter().map(|x| x.to_string())).join(", ")
+ ));
+ }
+
+ write!(fmt, "{})", subsets.join(", "))?;
+ Ok(())
+ }
+}
+
+pub(crate) struct TypeSetBuilder {
+ ints: Interval,
+ floats: Interval,
+ bools: Interval,
+ refs: Interval,
+ includes_scalars: bool,
+ simd_lanes: Interval,
+ specials: Vec<SpecialType>,
+}
+
+impl TypeSetBuilder {
+ pub fn new() -> Self {
+ Self {
+ ints: Interval::None,
+ floats: Interval::None,
+ bools: Interval::None,
+ refs: Interval::None,
+ includes_scalars: true,
+ simd_lanes: Interval::None,
+ specials: Vec::new(),
+ }
+ }
+
+ pub fn ints(mut self, interval: impl Into<Interval>) -> Self {
+ assert!(self.ints == Interval::None);
+ self.ints = interval.into();
+ self
+ }
+ pub fn floats(mut self, interval: impl Into<Interval>) -> Self {
+ assert!(self.floats == Interval::None);
+ self.floats = interval.into();
+ self
+ }
+ pub fn bools(mut self, interval: impl Into<Interval>) -> Self {
+ assert!(self.bools == Interval::None);
+ self.bools = interval.into();
+ self
+ }
+ pub fn refs(mut self, interval: impl Into<Interval>) -> Self {
+ assert!(self.refs == Interval::None);
+ self.refs = interval.into();
+ self
+ }
+ pub fn includes_scalars(mut self, includes_scalars: bool) -> Self {
+ self.includes_scalars = includes_scalars;
+ self
+ }
+ pub fn simd_lanes(mut self, interval: impl Into<Interval>) -> Self {
+ assert!(self.simd_lanes == Interval::None);
+ self.simd_lanes = interval.into();
+ self
+ }
+ pub fn specials(mut self, specials: Vec<SpecialType>) -> Self {
+ assert!(self.specials.is_empty());
+ self.specials = specials;
+ self
+ }
+
+ pub fn build(self) -> TypeSet {
+ let min_lanes = if self.includes_scalars { 1 } else { 2 };
+
+ let bools = range_to_set(self.bools.to_range(1..MAX_BITS, None))
+ .into_iter()
+ .filter(|x| legal_bool(*x))
+ .collect();
+
+ TypeSet::new(
+ range_to_set(self.simd_lanes.to_range(min_lanes..MAX_LANES, Some(1))),
+ range_to_set(self.ints.to_range(8..MAX_BITS, None)),
+ range_to_set(self.floats.to_range(32..64, None)),
+ bools,
+ range_to_set(self.refs.to_range(32..64, None)),
+ self.specials,
+ )
+ }
+
+ pub fn all() -> TypeSet {
+ TypeSetBuilder::new()
+ .ints(Interval::All)
+ .floats(Interval::All)
+ .bools(Interval::All)
+ .refs(Interval::All)
+ .simd_lanes(Interval::All)
+ .specials(ValueType::all_special_types().collect())
+ .includes_scalars(true)
+ .build()
+ }
+}
+
+#[derive(PartialEq)]
+pub(crate) enum Interval {
+ None,
+ All,
+ Range(Range),
+}
+
+impl Interval {
+ fn to_range(&self, full_range: Range, default: Option<RangeBound>) -> Option<Range> {
+ match self {
+ Interval::None => {
+ if let Some(default_val) = default {
+ Some(default_val..default_val)
+ } else {
+ None
+ }
+ }
+
+ Interval::All => Some(full_range),
+
+ Interval::Range(range) => {
+ let (low, high) = (range.start, range.end);
+ assert!(low.is_power_of_two());
+ assert!(high.is_power_of_two());
+ assert!(low <= high);
+ assert!(low >= full_range.start);
+ assert!(high <= full_range.end);
+ Some(low..high)
+ }
+ }
+ }
+}
+
+impl Into<Interval> for Range {
+ fn into(self) -> Interval {
+ Interval::Range(self)
+ }
+}
+
+fn legal_bool(bits: RangeBound) -> bool {
+ // Only allow legal bit widths for bool types.
+ bits == 1 || (bits >= 8 && bits <= MAX_BITS && bits.is_power_of_two())
+}
+
+/// Generates a set with all the powers of two included in the range.
+fn range_to_set(range: Option<Range>) -> NumSet {
+ let mut set = NumSet::new();
+
+ let (low, high) = match range {
+ Some(range) => (range.start, range.end),
+ None => return set,
+ };
+
+ assert!(low.is_power_of_two());
+ assert!(high.is_power_of_two());
+ assert!(low <= high);
+
+ for i in low.trailing_zeros()..=high.trailing_zeros() {
+ assert!(1 << i <= RangeBound::max_value());
+ set.insert(1 << i);
+ }
+ set
+}
+
+#[test]
+fn test_typevar_builder() {
+ let type_set = TypeSetBuilder::new().ints(Interval::All).build();
+ assert_eq!(type_set.lanes, num_set![1]);
+ assert!(type_set.floats.is_empty());
+ assert_eq!(type_set.ints, num_set![8, 16, 32, 64, 128]);
+ assert!(type_set.bools.is_empty());
+ assert!(type_set.specials.is_empty());
+
+ let type_set = TypeSetBuilder::new().bools(Interval::All).build();
+ assert_eq!(type_set.lanes, num_set![1]);
+ assert!(type_set.floats.is_empty());
+ assert!(type_set.ints.is_empty());
+ assert_eq!(type_set.bools, num_set![1, 8, 16, 32, 64, 128]);
+ assert!(type_set.specials.is_empty());
+
+ let type_set = TypeSetBuilder::new().floats(Interval::All).build();
+ assert_eq!(type_set.lanes, num_set![1]);
+ assert_eq!(type_set.floats, num_set![32, 64]);
+ assert!(type_set.ints.is_empty());
+ assert!(type_set.bools.is_empty());
+ assert!(type_set.specials.is_empty());
+
+ let type_set = TypeSetBuilder::new()
+ .floats(Interval::All)
+ .simd_lanes(Interval::All)
+ .includes_scalars(false)
+ .build();
+ assert_eq!(type_set.lanes, num_set![2, 4, 8, 16, 32, 64, 128, 256]);
+ assert_eq!(type_set.floats, num_set![32, 64]);
+ assert!(type_set.ints.is_empty());
+ assert!(type_set.bools.is_empty());
+ assert!(type_set.specials.is_empty());
+
+ let type_set = TypeSetBuilder::new()
+ .floats(Interval::All)
+ .simd_lanes(Interval::All)
+ .includes_scalars(true)
+ .build();
+ assert_eq!(type_set.lanes, num_set![1, 2, 4, 8, 16, 32, 64, 128, 256]);
+ assert_eq!(type_set.floats, num_set![32, 64]);
+ assert!(type_set.ints.is_empty());
+ assert!(type_set.bools.is_empty());
+ assert!(type_set.specials.is_empty());
+
+ let type_set = TypeSetBuilder::new().ints(16..64).build();
+ assert_eq!(type_set.lanes, num_set![1]);
+ assert_eq!(type_set.ints, num_set![16, 32, 64]);
+ assert!(type_set.floats.is_empty());
+ assert!(type_set.bools.is_empty());
+ assert!(type_set.specials.is_empty());
+}
+
+#[test]
+#[should_panic]
+fn test_typevar_builder_too_high_bound_panic() {
+ TypeSetBuilder::new().ints(16..2 * MAX_BITS).build();
+}
+
+#[test]
+#[should_panic]
+fn test_typevar_builder_inverted_bounds_panic() {
+ TypeSetBuilder::new().ints(32..16).build();
+}
+
+#[test]
+fn test_as_bool() {
+ let a = TypeSetBuilder::new()
+ .simd_lanes(2..8)
+ .ints(8..8)
+ .floats(32..32)
+ .build();
+ assert_eq!(
+ a.lane_of(),
+ TypeSetBuilder::new().ints(8..8).floats(32..32).build()
+ );
+
+ // Test as_bool with disjoint intervals.
+ let mut a_as_bool = TypeSetBuilder::new().simd_lanes(2..8).build();
+ a_as_bool.bools = num_set![8, 32];
+ assert_eq!(a.as_bool(), a_as_bool);
+
+ let b = TypeSetBuilder::new()
+ .simd_lanes(1..8)
+ .ints(8..8)
+ .floats(32..32)
+ .build();
+ let mut b_as_bool = TypeSetBuilder::new().simd_lanes(1..8).build();
+ b_as_bool.bools = num_set![1, 8, 32];
+ assert_eq!(b.as_bool(), b_as_bool);
+}
+
+#[test]
+fn test_forward_images() {
+ let empty_set = TypeSetBuilder::new().build();
+
+ // Half vector.
+ assert_eq!(
+ TypeSetBuilder::new()
+ .simd_lanes(1..32)
+ .build()
+ .half_vector(),
+ TypeSetBuilder::new().simd_lanes(1..16).build()
+ );
+
+ // Double vector.
+ assert_eq!(
+ TypeSetBuilder::new()
+ .simd_lanes(1..32)
+ .build()
+ .double_vector(),
+ TypeSetBuilder::new().simd_lanes(2..64).build()
+ );
+ assert_eq!(
+ TypeSetBuilder::new()
+ .simd_lanes(128..256)
+ .build()
+ .double_vector(),
+ TypeSetBuilder::new().simd_lanes(256..256).build()
+ );
+
+ // Half width.
+ assert_eq!(
+ TypeSetBuilder::new().ints(8..32).build().half_width(),
+ TypeSetBuilder::new().ints(8..16).build()
+ );
+ assert_eq!(
+ TypeSetBuilder::new().floats(32..32).build().half_width(),
+ empty_set
+ );
+ assert_eq!(
+ TypeSetBuilder::new().floats(32..64).build().half_width(),
+ TypeSetBuilder::new().floats(32..32).build()
+ );
+ assert_eq!(
+ TypeSetBuilder::new().bools(1..8).build().half_width(),
+ empty_set
+ );
+ assert_eq!(
+ TypeSetBuilder::new().bools(1..32).build().half_width(),
+ TypeSetBuilder::new().bools(8..16).build()
+ );
+
+ // Double width.
+ assert_eq!(
+ TypeSetBuilder::new().ints(8..32).build().double_width(),
+ TypeSetBuilder::new().ints(16..64).build()
+ );
+ assert_eq!(
+ TypeSetBuilder::new().ints(32..64).build().double_width(),
+ TypeSetBuilder::new().ints(64..128).build()
+ );
+ assert_eq!(
+ TypeSetBuilder::new().floats(32..32).build().double_width(),
+ TypeSetBuilder::new().floats(64..64).build()
+ );
+ assert_eq!(
+ TypeSetBuilder::new().floats(32..64).build().double_width(),
+ TypeSetBuilder::new().floats(64..64).build()
+ );
+ assert_eq!(
+ TypeSetBuilder::new().bools(1..16).build().double_width(),
+ TypeSetBuilder::new().bools(16..32).build()
+ );
+ assert_eq!(
+ TypeSetBuilder::new().bools(32..64).build().double_width(),
+ TypeSetBuilder::new().bools(64..128).build()
+ );
+}
+
+#[test]
+fn test_backward_images() {
+ let empty_set = TypeSetBuilder::new().build();
+
+ // LaneOf.
+ assert_eq!(
+ TypeSetBuilder::new()
+ .simd_lanes(1..1)
+ .ints(8..8)
+ .floats(32..32)
+ .build()
+ .preimage(DerivedFunc::LaneOf),
+ TypeSetBuilder::new()
+ .simd_lanes(Interval::All)
+ .ints(8..8)
+ .floats(32..32)
+ .build()
+ );
+ assert_eq!(empty_set.preimage(DerivedFunc::LaneOf), empty_set);
+
+ // AsBool.
+ assert_eq!(
+ TypeSetBuilder::new()
+ .simd_lanes(1..4)
+ .bools(1..128)
+ .build()
+ .preimage(DerivedFunc::AsBool),
+ TypeSetBuilder::new()
+ .simd_lanes(1..4)
+ .ints(Interval::All)
+ .bools(Interval::All)
+ .floats(Interval::All)
+ .build()
+ );
+
+ // Double vector.
+ assert_eq!(
+ TypeSetBuilder::new()
+ .simd_lanes(1..1)
+ .ints(8..8)
+ .build()
+ .preimage(DerivedFunc::DoubleVector)
+ .size(),
+ 0
+ );
+ assert_eq!(
+ TypeSetBuilder::new()
+ .simd_lanes(1..16)
+ .ints(8..16)
+ .floats(32..32)
+ .build()
+ .preimage(DerivedFunc::DoubleVector),
+ TypeSetBuilder::new()
+ .simd_lanes(1..8)
+ .ints(8..16)
+ .floats(32..32)
+ .build(),
+ );
+
+ // Half vector.
+ assert_eq!(
+ TypeSetBuilder::new()
+ .simd_lanes(256..256)
+ .ints(8..8)
+ .build()
+ .preimage(DerivedFunc::HalfVector)
+ .size(),
+ 0
+ );
+ assert_eq!(
+ TypeSetBuilder::new()
+ .simd_lanes(64..128)
+ .bools(1..32)
+ .build()
+ .preimage(DerivedFunc::HalfVector),
+ TypeSetBuilder::new()
+ .simd_lanes(128..256)
+ .bools(1..32)
+ .build(),
+ );
+
+ // Half width.
+ assert_eq!(
+ TypeSetBuilder::new()
+ .ints(128..128)
+ .floats(64..64)
+ .bools(128..128)
+ .build()
+ .preimage(DerivedFunc::HalfWidth)
+ .size(),
+ 0
+ );
+ assert_eq!(
+ TypeSetBuilder::new()
+ .simd_lanes(64..256)
+ .bools(1..64)
+ .build()
+ .preimage(DerivedFunc::HalfWidth),
+ TypeSetBuilder::new()
+ .simd_lanes(64..256)
+ .bools(16..128)
+ .build(),
+ );
+
+ // Double width.
+ assert_eq!(
+ TypeSetBuilder::new()
+ .ints(8..8)
+ .floats(32..32)
+ .bools(1..8)
+ .build()
+ .preimage(DerivedFunc::DoubleWidth)
+ .size(),
+ 0
+ );
+ assert_eq!(
+ TypeSetBuilder::new()
+ .simd_lanes(1..16)
+ .ints(8..16)
+ .floats(32..64)
+ .build()
+ .preimage(DerivedFunc::DoubleWidth),
+ TypeSetBuilder::new()
+ .simd_lanes(1..16)
+ .ints(8..8)
+ .floats(32..32)
+ .build()
+ );
+}
+
+#[test]
+#[should_panic]
+fn test_typeset_singleton_panic_nonsingleton_types() {
+ TypeSetBuilder::new()
+ .ints(8..8)
+ .floats(32..32)
+ .build()
+ .get_singleton();
+}
+
+#[test]
+#[should_panic]
+fn test_typeset_singleton_panic_nonsingleton_lanes() {
+ TypeSetBuilder::new()
+ .simd_lanes(1..2)
+ .floats(32..32)
+ .build()
+ .get_singleton();
+}
+
+#[test]
+fn test_typeset_singleton() {
+ use crate::shared::types as shared_types;
+ assert_eq!(
+ TypeSetBuilder::new().ints(16..16).build().get_singleton(),
+ ValueType::Lane(shared_types::Int::I16.into())
+ );
+ assert_eq!(
+ TypeSetBuilder::new().floats(64..64).build().get_singleton(),
+ ValueType::Lane(shared_types::Float::F64.into())
+ );
+ assert_eq!(
+ TypeSetBuilder::new().bools(1..1).build().get_singleton(),
+ ValueType::Lane(shared_types::Bool::B1.into())
+ );
+ assert_eq!(
+ TypeSetBuilder::new()
+ .simd_lanes(4..4)
+ .ints(32..32)
+ .build()
+ .get_singleton(),
+ LaneType::from(shared_types::Int::I32).by(4)
+ );
+}
+
+#[test]
+fn test_typevar_functions() {
+ let x = TypeVar::new(
+ "x",
+ "i16 and up",
+ TypeSetBuilder::new().ints(16..64).build(),
+ );
+ assert_eq!(x.half_width().name, "half_width(x)");
+ assert_eq!(
+ x.half_width().double_width().name,
+ "double_width(half_width(x))"
+ );
+
+ let x = TypeVar::new("x", "up to i32", TypeSetBuilder::new().ints(8..32).build());
+ assert_eq!(x.double_width().name, "double_width(x)");
+}
+
+#[test]
+fn test_typevar_singleton() {
+ use crate::cdsl::types::VectorType;
+ use crate::shared::types as shared_types;
+
+ // Test i32.
+ let typevar = TypeVar::new_singleton(ValueType::Lane(LaneType::Int(shared_types::Int::I32)));
+ assert_eq!(typevar.name, "i32");
+ assert_eq!(typevar.type_set.ints, num_set![32]);
+ assert!(typevar.type_set.floats.is_empty());
+ assert!(typevar.type_set.bools.is_empty());
+ assert!(typevar.type_set.specials.is_empty());
+ assert_eq!(typevar.type_set.lanes, num_set![1]);
+
+ // Test f32x4.
+ let typevar = TypeVar::new_singleton(ValueType::Vector(VectorType::new(
+ LaneType::Float(shared_types::Float::F32),
+ 4,
+ )));
+ assert_eq!(typevar.name, "f32x4");
+ assert!(typevar.type_set.ints.is_empty());
+ assert_eq!(typevar.type_set.floats, num_set![32]);
+ assert_eq!(typevar.type_set.lanes, num_set![4]);
+ assert!(typevar.type_set.bools.is_empty());
+ assert!(typevar.type_set.specials.is_empty());
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/cdsl/xform.rs b/third_party/rust/cranelift-codegen-meta/src/cdsl/xform.rs
new file mode 100644
index 0000000000..d21e93128d
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/cdsl/xform.rs
@@ -0,0 +1,484 @@
+use crate::cdsl::ast::{
+ Apply, BlockPool, ConstPool, DefIndex, DefPool, DummyDef, DummyExpr, Expr, PatternPosition,
+ VarIndex, VarPool,
+};
+use crate::cdsl::instructions::Instruction;
+use crate::cdsl::type_inference::{infer_transform, TypeEnvironment};
+use crate::cdsl::typevar::TypeVar;
+
+use cranelift_entity::{entity_impl, PrimaryMap};
+
+use std::collections::{HashMap, HashSet};
+use std::iter::FromIterator;
+
+/// An instruction transformation consists of a source and destination pattern.
+///
+/// Patterns are expressed in *register transfer language* as tuples of Def or Expr nodes. A
+/// pattern may optionally have a sequence of TypeConstraints, that additionally limit the set of
+/// cases when it applies.
+///
+/// The source pattern can contain only a single instruction.
+pub(crate) struct Transform {
+ pub src: DefIndex,
+ pub dst: Vec<DefIndex>,
+ pub var_pool: VarPool,
+ pub def_pool: DefPool,
+ pub block_pool: BlockPool,
+ pub const_pool: ConstPool,
+ pub type_env: TypeEnvironment,
+}
+
+type SymbolTable = HashMap<String, VarIndex>;
+
+impl Transform {
+ fn new(src: DummyDef, dst: Vec<DummyDef>) -> Self {
+ let mut var_pool = VarPool::new();
+ let mut def_pool = DefPool::new();
+ let mut block_pool = BlockPool::new();
+ let mut const_pool = ConstPool::new();
+
+ let mut input_vars: Vec<VarIndex> = Vec::new();
+ let mut defined_vars: Vec<VarIndex> = Vec::new();
+
+ // Maps variable names to our own Var copies.
+ let mut symbol_table: SymbolTable = SymbolTable::new();
+
+ // Rewrite variables in src and dst using our own copies.
+ let src = rewrite_def_list(
+ PatternPosition::Source,
+ vec![src],
+ &mut symbol_table,
+ &mut input_vars,
+ &mut defined_vars,
+ &mut var_pool,
+ &mut def_pool,
+ &mut block_pool,
+ &mut const_pool,
+ )[0];
+
+ let num_src_inputs = input_vars.len();
+
+ let dst = rewrite_def_list(
+ PatternPosition::Destination,
+ dst,
+ &mut symbol_table,
+ &mut input_vars,
+ &mut defined_vars,
+ &mut var_pool,
+ &mut def_pool,
+ &mut block_pool,
+ &mut const_pool,
+ );
+
+ // Sanity checks.
+ for &var_index in &input_vars {
+ assert!(
+ var_pool.get(var_index).is_input(),
+ format!("'{:?}' used as both input and def", var_pool.get(var_index))
+ );
+ }
+ assert!(
+ input_vars.len() == num_src_inputs,
+ format!(
+ "extra input vars in dst pattern: {:?}",
+ input_vars
+ .iter()
+ .map(|&i| var_pool.get(i))
+ .skip(num_src_inputs)
+ .collect::<Vec<_>>()
+ )
+ );
+
+ // Perform type inference and cleanup.
+ let type_env = infer_transform(src, &dst, &def_pool, &mut var_pool).unwrap();
+
+ // Sanity check: the set of inferred free type variables should be a subset of the type
+ // variables corresponding to Vars appearing in the source pattern.
+ {
+ let free_typevars: HashSet<TypeVar> =
+ HashSet::from_iter(type_env.free_typevars(&mut var_pool));
+ let src_tvs = HashSet::from_iter(
+ input_vars
+ .clone()
+ .iter()
+ .chain(
+ defined_vars
+ .iter()
+ .filter(|&&var_index| !var_pool.get(var_index).is_temp()),
+ )
+ .map(|&var_index| var_pool.get(var_index).get_typevar())
+ .filter(|maybe_var| maybe_var.is_some())
+ .map(|var| var.unwrap()),
+ );
+ if !free_typevars.is_subset(&src_tvs) {
+ let missing_tvs = (&free_typevars - &src_tvs)
+ .iter()
+ .map(|tv| tv.name.clone())
+ .collect::<Vec<_>>()
+ .join(", ");
+ panic!("Some free vars don't appear in src: {}", missing_tvs);
+ }
+ }
+
+ for &var_index in input_vars.iter().chain(defined_vars.iter()) {
+ let var = var_pool.get_mut(var_index);
+ let canon_tv = type_env.get_equivalent(&var.get_or_create_typevar());
+ var.set_typevar(canon_tv);
+ }
+
+ Self {
+ src,
+ dst,
+ var_pool,
+ def_pool,
+ block_pool,
+ const_pool,
+ type_env,
+ }
+ }
+
+ fn verify_legalize(&self) {
+ let def = self.def_pool.get(self.src);
+ for &var_index in def.defined_vars.iter() {
+ let defined_var = self.var_pool.get(var_index);
+ assert!(
+ defined_var.is_output(),
+ format!("{:?} not defined in the destination pattern", defined_var)
+ );
+ }
+ }
+}
+
+/// Inserts, if not present, a name in the `symbol_table`. Then returns its index in the variable
+/// pool `var_pool`. If the variable was not present in the symbol table, then add it to the list of
+/// `defined_vars`.
+fn var_index(
+ name: &str,
+ symbol_table: &mut SymbolTable,
+ defined_vars: &mut Vec<VarIndex>,
+ var_pool: &mut VarPool,
+) -> VarIndex {
+ let name = name.to_string();
+ match symbol_table.get(&name) {
+ Some(&existing_var) => existing_var,
+ None => {
+ // Materialize the variable.
+ let new_var = var_pool.create(name.clone());
+ symbol_table.insert(name, new_var);
+ defined_vars.push(new_var);
+ new_var
+ }
+ }
+}
+
+/// Given a list of symbols defined in a Def, rewrite them to local symbols. Yield the new locals.
+fn rewrite_defined_vars(
+ position: PatternPosition,
+ dummy_def: &DummyDef,
+ def_index: DefIndex,
+ symbol_table: &mut SymbolTable,
+ defined_vars: &mut Vec<VarIndex>,
+ var_pool: &mut VarPool,
+) -> Vec<VarIndex> {
+ let mut new_defined_vars = Vec::new();
+ for var in &dummy_def.defined_vars {
+ let own_var = var_index(&var.name, symbol_table, defined_vars, var_pool);
+ var_pool.get_mut(own_var).set_def(position, def_index);
+ new_defined_vars.push(own_var);
+ }
+ new_defined_vars
+}
+
+/// Find all uses of variables in `expr` and replace them with our own local symbols.
+fn rewrite_expr(
+ position: PatternPosition,
+ dummy_expr: DummyExpr,
+ symbol_table: &mut SymbolTable,
+ input_vars: &mut Vec<VarIndex>,
+ var_pool: &mut VarPool,
+ const_pool: &mut ConstPool,
+) -> Apply {
+ let (apply_target, dummy_args) = if let DummyExpr::Apply(apply_target, dummy_args) = dummy_expr
+ {
+ (apply_target, dummy_args)
+ } else {
+ panic!("we only rewrite apply expressions");
+ };
+
+ assert_eq!(
+ apply_target.inst().operands_in.len(),
+ dummy_args.len(),
+ "number of arguments in instruction {} is incorrect\nexpected: {:?}",
+ apply_target.inst().name,
+ apply_target
+ .inst()
+ .operands_in
+ .iter()
+ .map(|operand| format!("{}: {}", operand.name, operand.kind.rust_type))
+ .collect::<Vec<_>>(),
+ );
+
+ let mut args = Vec::new();
+ for (i, arg) in dummy_args.into_iter().enumerate() {
+ match arg {
+ DummyExpr::Var(var) => {
+ let own_var = var_index(&var.name, symbol_table, input_vars, var_pool);
+ let var = var_pool.get(own_var);
+ assert!(
+ var.is_input() || var.get_def(position).is_some(),
+ format!("{:?} used as both input and def", var)
+ );
+ args.push(Expr::Var(own_var));
+ }
+ DummyExpr::Literal(literal) => {
+ assert!(!apply_target.inst().operands_in[i].is_value());
+ args.push(Expr::Literal(literal));
+ }
+ DummyExpr::Constant(constant) => {
+ let const_name = const_pool.insert(constant.0);
+ // Here we abuse var_index by passing an empty, immediately-dropped vector to
+ // `defined_vars`; the reason for this is that unlike the `Var` case above,
+ // constants will create a variable that is not an input variable (it is tracked
+ // instead by ConstPool).
+ let const_var = var_index(&const_name, symbol_table, &mut vec![], var_pool);
+ args.push(Expr::Var(const_var));
+ }
+ DummyExpr::Apply(..) => {
+ panic!("Recursive apply is not allowed.");
+ }
+ DummyExpr::Block(_block) => {
+ panic!("Blocks are not valid arguments.");
+ }
+ }
+ }
+
+ Apply::new(apply_target, args)
+}
+
+#[allow(clippy::too_many_arguments)]
+fn rewrite_def_list(
+ position: PatternPosition,
+ dummy_defs: Vec<DummyDef>,
+ symbol_table: &mut SymbolTable,
+ input_vars: &mut Vec<VarIndex>,
+ defined_vars: &mut Vec<VarIndex>,
+ var_pool: &mut VarPool,
+ def_pool: &mut DefPool,
+ block_pool: &mut BlockPool,
+ const_pool: &mut ConstPool,
+) -> Vec<DefIndex> {
+ let mut new_defs = Vec::new();
+ // Register variable names of new blocks first as a block name can be used to jump forward. Thus
+ // the name has to be registered first to avoid misinterpreting it as an input-var.
+ for dummy_def in dummy_defs.iter() {
+ if let DummyExpr::Block(ref var) = dummy_def.expr {
+ var_index(&var.name, symbol_table, defined_vars, var_pool);
+ }
+ }
+
+ // Iterate over the definitions and blocks, to map variables names to inputs or outputs.
+ for dummy_def in dummy_defs {
+ let def_index = def_pool.next_index();
+
+ let new_defined_vars = rewrite_defined_vars(
+ position,
+ &dummy_def,
+ def_index,
+ symbol_table,
+ defined_vars,
+ var_pool,
+ );
+ if let DummyExpr::Block(var) = dummy_def.expr {
+ let var_index = *symbol_table
+ .get(&var.name)
+ .or_else(|| {
+ panic!(
+ "Block {} was not registered during the first visit",
+ var.name
+ )
+ })
+ .unwrap();
+ var_pool.get_mut(var_index).set_def(position, def_index);
+ block_pool.create_block(var_index, def_index);
+ } else {
+ let new_apply = rewrite_expr(
+ position,
+ dummy_def.expr,
+ symbol_table,
+ input_vars,
+ var_pool,
+ const_pool,
+ );
+
+ assert!(
+ def_pool.next_index() == def_index,
+ "shouldn't have created new defs in the meanwhile"
+ );
+ assert_eq!(
+ new_apply.inst.value_results.len(),
+ new_defined_vars.len(),
+ "number of Var results in instruction is incorrect"
+ );
+
+ new_defs.push(def_pool.create_inst(new_apply, new_defined_vars));
+ }
+ }
+ new_defs
+}
+
+/// A group of related transformations.
+pub(crate) struct TransformGroup {
+ pub name: &'static str,
+ pub doc: &'static str,
+ pub chain_with: Option<TransformGroupIndex>,
+ pub isa_name: Option<&'static str>,
+ pub id: TransformGroupIndex,
+
+ /// Maps Instruction camel_case names to custom legalization functions names.
+ pub custom_legalizes: HashMap<String, &'static str>,
+ pub transforms: Vec<Transform>,
+}
+
+impl TransformGroup {
+ pub fn rust_name(&self) -> String {
+ match self.isa_name {
+ Some(_) => {
+ // This is a function in the same module as the LEGALIZE_ACTIONS table referring to
+ // it.
+ self.name.to_string()
+ }
+ None => format!("crate::legalizer::{}", self.name),
+ }
+ }
+}
+
+#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
+pub(crate) struct TransformGroupIndex(u32);
+entity_impl!(TransformGroupIndex);
+
+pub(crate) struct TransformGroupBuilder {
+ name: &'static str,
+ doc: &'static str,
+ chain_with: Option<TransformGroupIndex>,
+ isa_name: Option<&'static str>,
+ pub custom_legalizes: HashMap<String, &'static str>,
+ pub transforms: Vec<Transform>,
+}
+
+impl TransformGroupBuilder {
+ pub fn new(name: &'static str, doc: &'static str) -> Self {
+ Self {
+ name,
+ doc,
+ chain_with: None,
+ isa_name: None,
+ custom_legalizes: HashMap::new(),
+ transforms: Vec::new(),
+ }
+ }
+
+ pub fn chain_with(mut self, next_id: TransformGroupIndex) -> Self {
+ assert!(self.chain_with.is_none());
+ self.chain_with = Some(next_id);
+ self
+ }
+
+ pub fn isa(mut self, isa_name: &'static str) -> Self {
+ assert!(self.isa_name.is_none());
+ self.isa_name = Some(isa_name);
+ self
+ }
+
+ /// Add a custom legalization action for `inst`.
+ ///
+ /// The `func_name` parameter is the fully qualified name of a Rust function which takes the
+ /// same arguments as the `isa::Legalize` actions.
+ ///
+ /// The custom function will be called to legalize `inst` and any return value is ignored.
+ pub fn custom_legalize(&mut self, inst: &Instruction, func_name: &'static str) {
+ assert!(
+ self.custom_legalizes
+ .insert(inst.camel_name.clone(), func_name)
+ .is_none(),
+ format!(
+ "custom legalization action for {} inserted twice",
+ inst.name
+ )
+ );
+ }
+
+ /// Add a legalization pattern to this group.
+ pub fn legalize(&mut self, src: DummyDef, dst: Vec<DummyDef>) {
+ let transform = Transform::new(src, dst);
+ transform.verify_legalize();
+ self.transforms.push(transform);
+ }
+
+ pub fn build_and_add_to(self, owner: &mut TransformGroups) -> TransformGroupIndex {
+ let next_id = owner.next_key();
+ owner.add(TransformGroup {
+ name: self.name,
+ doc: self.doc,
+ isa_name: self.isa_name,
+ id: next_id,
+ chain_with: self.chain_with,
+ custom_legalizes: self.custom_legalizes,
+ transforms: self.transforms,
+ })
+ }
+}
+
+pub(crate) struct TransformGroups {
+ groups: PrimaryMap<TransformGroupIndex, TransformGroup>,
+}
+
+impl TransformGroups {
+ pub fn new() -> Self {
+ Self {
+ groups: PrimaryMap::new(),
+ }
+ }
+ pub fn add(&mut self, new_group: TransformGroup) -> TransformGroupIndex {
+ for group in self.groups.values() {
+ assert!(
+ group.name != new_group.name,
+ format!("trying to insert {} for the second time", new_group.name)
+ );
+ }
+ self.groups.push(new_group)
+ }
+ pub fn get(&self, id: TransformGroupIndex) -> &TransformGroup {
+ &self.groups[id]
+ }
+ fn next_key(&self) -> TransformGroupIndex {
+ self.groups.next_key()
+ }
+ pub fn by_name(&self, name: &'static str) -> &TransformGroup {
+ for group in self.groups.values() {
+ if group.name == name {
+ return group;
+ }
+ }
+ panic!(format!("transform group with name {} not found", name));
+ }
+}
+
+#[test]
+#[should_panic]
+fn test_double_custom_legalization() {
+ use crate::cdsl::formats::InstructionFormatBuilder;
+ use crate::cdsl::instructions::{AllInstructions, InstructionBuilder, InstructionGroupBuilder};
+
+ let nullary = InstructionFormatBuilder::new("nullary").build();
+
+ let mut dummy_all = AllInstructions::new();
+ let mut inst_group = InstructionGroupBuilder::new(&mut dummy_all);
+ inst_group.push(InstructionBuilder::new("dummy", "doc", &nullary));
+
+ let inst_group = inst_group.build();
+ let dummy_inst = inst_group.by_name("dummy");
+
+ let mut transform_group = TransformGroupBuilder::new("test", "doc");
+ transform_group.custom_legalize(&dummy_inst, "custom 1");
+ transform_group.custom_legalize(&dummy_inst, "custom 2");
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/default_map.rs b/third_party/rust/cranelift-codegen-meta/src/default_map.rs
new file mode 100644
index 0000000000..3a2be05dac
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/default_map.rs
@@ -0,0 +1,20 @@
+//! Trait for extending `HashMap` with `get_or_default`.
+use std::collections::HashMap;
+use std::hash::Hash;
+
+pub(crate) trait MapWithDefault<K, V: Default> {
+ fn get_or_default(&mut self, k: K) -> &mut V;
+}
+
+impl<K: Eq + Hash, V: Default> MapWithDefault<K, V> for HashMap<K, V> {
+ fn get_or_default(&mut self, k: K) -> &mut V {
+ self.entry(k).or_insert_with(V::default)
+ }
+}
+
+#[test]
+fn test_default() {
+ let mut hash_map = HashMap::new();
+ hash_map.insert(42, "hello");
+ assert_eq!(*hash_map.get_or_default(43), "");
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/error.rs b/third_party/rust/cranelift-codegen-meta/src/error.rs
new file mode 100644
index 0000000000..4cbf3d8285
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/error.rs
@@ -0,0 +1,48 @@
+//! Error returned during meta code-generation.
+use std::fmt;
+use std::io;
+
+/// An error that occurred when the cranelift_codegen_meta crate was generating
+/// source files for the cranelift_codegen crate.
+#[derive(Debug)]
+pub struct Error {
+ inner: Box<ErrorInner>,
+}
+
+impl Error {
+ /// Create a new error object with the given message.
+ pub fn with_msg<S: Into<String>>(msg: S) -> Error {
+ Error {
+ inner: Box::new(ErrorInner::Msg(msg.into())),
+ }
+ }
+}
+
+impl fmt::Display for Error {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ write!(f, "{}", self.inner)
+ }
+}
+
+impl From<io::Error> for Error {
+ fn from(e: io::Error) -> Self {
+ Error {
+ inner: Box::new(ErrorInner::IoError(e)),
+ }
+ }
+}
+
+#[derive(Debug)]
+enum ErrorInner {
+ Msg(String),
+ IoError(io::Error),
+}
+
+impl fmt::Display for ErrorInner {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ match *self {
+ ErrorInner::Msg(ref s) => write!(f, "{}", s),
+ ErrorInner::IoError(ref e) => write!(f, "{}", e),
+ }
+ }
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/gen_binemit.rs b/third_party/rust/cranelift-codegen-meta/src/gen_binemit.rs
new file mode 100644
index 0000000000..f67aa9b5a9
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/gen_binemit.rs
@@ -0,0 +1,224 @@
+//! Generate binary emission code for each ISA.
+
+use cranelift_entity::EntityRef;
+
+use crate::error;
+use crate::srcgen::Formatter;
+
+use crate::cdsl::recipes::{EncodingRecipe, OperandConstraint, Recipes};
+
+/// Generate code to handle a single recipe.
+///
+/// - Unpack the instruction data, knowing the format.
+/// - Determine register locations for operands with register constraints.
+/// - Determine stack slot locations for operands with stack constraints.
+/// - Call hand-written code for the actual emission.
+fn gen_recipe(recipe: &EncodingRecipe, fmt: &mut Formatter) {
+ let inst_format = &recipe.format;
+ let num_value_ops = inst_format.num_value_operands;
+
+ // TODO: Set want_args to true for only MultiAry instructions instead of all formats with value
+ // list.
+ let want_args = inst_format.has_value_list
+ || recipe.operands_in.iter().any(|c| match c {
+ OperandConstraint::RegClass(_) | OperandConstraint::Stack(_) => true,
+ OperandConstraint::FixedReg(_) | OperandConstraint::TiedInput(_) => false,
+ });
+ assert!(!want_args || num_value_ops > 0 || inst_format.has_value_list);
+
+ let want_outs = recipe.operands_out.iter().any(|c| match c {
+ OperandConstraint::RegClass(_) | OperandConstraint::Stack(_) => true,
+ OperandConstraint::FixedReg(_) | OperandConstraint::TiedInput(_) => false,
+ });
+
+ let is_regmove = ["RegMove", "RegSpill", "RegFill"].contains(&inst_format.name);
+
+ // Unpack the instruction data.
+ fmtln!(fmt, "if let InstructionData::{} {{", inst_format.name);
+ fmt.indent(|fmt| {
+ fmt.line("opcode,");
+ for f in &inst_format.imm_fields {
+ fmtln!(fmt, "{},", f.member);
+ }
+ if want_args {
+ if inst_format.has_value_list || num_value_ops > 1 {
+ fmt.line("ref args,");
+ } else {
+ fmt.line("arg,");
+ }
+ }
+ fmt.line("..");
+
+ fmt.outdented_line("} = *inst_data {");
+
+ // Pass recipe arguments in this order: inputs, imm_fields, outputs.
+ let mut args = String::new();
+
+ if want_args && !is_regmove {
+ if inst_format.has_value_list {
+ fmt.line("let args = args.as_slice(&func.dfg.value_lists);");
+ } else if num_value_ops == 1 {
+ fmt.line("let args = [arg];");
+ }
+ args += &unwrap_values(&recipe.operands_in, "in", "args", fmt);
+ }
+
+ for f in &inst_format.imm_fields {
+ args += &format!(", {}", f.member);
+ }
+
+ // Unwrap interesting output arguments.
+ if want_outs {
+ if recipe.operands_out.len() == 1 {
+ fmt.line("let results = [func.dfg.first_result(inst)];")
+ } else {
+ fmt.line("let results = func.dfg.inst_results(inst);");
+ }
+ args += &unwrap_values(&recipe.operands_out, "out", "results", fmt);
+ }
+
+ // Optimization: Only update the register diversion tracker for regmove instructions.
+ if is_regmove {
+ fmt.line("divert.apply(inst_data);")
+ }
+
+ match &recipe.emit {
+ Some(emit) => {
+ fmt.multi_line(emit);
+ fmt.line("return;");
+ }
+ None => {
+ fmtln!(
+ fmt,
+ "return recipe_{}(func, inst, sink, bits{});",
+ recipe.name.to_lowercase(),
+ args
+ );
+ }
+ }
+ });
+ fmt.line("}");
+}
+
+/// Emit code that unwraps values living in registers or stack slots.
+///
+/// :param args: Input or output constraints.
+/// :param prefix: Prefix to be used for the generated local variables.
+/// :param values: Name of slice containing the values to be unwrapped.
+/// :returns: Comma separated list of the generated variables
+fn unwrap_values(
+ args: &[OperandConstraint],
+ prefix: &str,
+ values_slice: &str,
+ fmt: &mut Formatter,
+) -> String {
+ let mut varlist = String::new();
+ for (i, cst) in args.iter().enumerate() {
+ match cst {
+ OperandConstraint::RegClass(_reg_class) => {
+ let v = format!("{}_reg{}", prefix, i);
+ varlist += &format!(", {}", v);
+ fmtln!(
+ fmt,
+ "let {} = divert.reg({}[{}], &func.locations);",
+ v,
+ values_slice,
+ i
+ );
+ }
+ OperandConstraint::Stack(stack) => {
+ let v = format!("{}_stk{}", prefix, i);
+ varlist += &format!(", {}", v);
+ fmtln!(fmt, "let {} = StackRef::masked(", v);
+ fmt.indent(|fmt| {
+ fmtln!(
+ fmt,
+ "divert.stack({}[{}], &func.locations),",
+ values_slice,
+ i
+ );
+ fmt.line(format!("{},", stack.stack_base_mask()));
+ fmt.line("&func.stack_slots,");
+ });
+ fmt.line(").unwrap();");
+ }
+ _ => {}
+ }
+ }
+ varlist
+}
+
+fn gen_isa(isa_name: &str, recipes: &Recipes, fmt: &mut Formatter) {
+ fmt.doc_comment(format!(
+ "Emit binary machine code for `inst` for the {} ISA.",
+ isa_name
+ ));
+
+ if recipes.is_empty() {
+ fmt.line("pub fn emit_inst<CS: CodeSink + ?Sized>(");
+ fmt.indent(|fmt| {
+ fmt.line("func: &Function,");
+ fmt.line("inst: Inst,");
+ fmt.line("_divert: &mut RegDiversions,");
+ fmt.line("_sink: &mut CS,");
+ fmt.line("_isa: &dyn TargetIsa,");
+ });
+ fmt.line(") {");
+ fmt.indent(|fmt| {
+ // No encoding recipes: Emit a stub.
+ fmt.line("bad_encoding(func, inst)");
+ });
+ fmt.line("}");
+ return;
+ }
+
+ fmt.line("#[allow(unused_variables, unreachable_code)]");
+ fmt.line("pub fn emit_inst<CS: CodeSink + ?Sized>(");
+ fmt.indent(|fmt| {
+ fmt.line("func: &Function,");
+ fmt.line("inst: Inst,");
+ fmt.line("divert: &mut RegDiversions,");
+ fmt.line("sink: &mut CS,");
+ fmt.line("isa: &dyn TargetIsa,")
+ });
+
+ fmt.line(") {");
+ fmt.indent(|fmt| {
+ fmt.line("let encoding = func.encodings[inst];");
+ fmt.line("let bits = encoding.bits();");
+ fmt.line("let inst_data = &func.dfg[inst];");
+ fmt.line("match encoding.recipe() {");
+ fmt.indent(|fmt| {
+ for (i, recipe) in recipes.iter() {
+ fmt.comment(format!("Recipe {}", recipe.name));
+ fmtln!(fmt, "{} => {{", i.index());
+ fmt.indent(|fmt| {
+ gen_recipe(recipe, fmt);
+ });
+ fmt.line("}");
+ }
+ fmt.line("_ => {},");
+ });
+ fmt.line("}");
+
+ // Allow for unencoded ghost instructions. The verifier will check details.
+ fmt.line("if encoding.is_legal() {");
+ fmt.indent(|fmt| {
+ fmt.line("bad_encoding(func, inst);");
+ });
+ fmt.line("}");
+ });
+ fmt.line("}");
+}
+
+pub(crate) fn generate(
+ isa_name: &str,
+ recipes: &Recipes,
+ binemit_filename: &str,
+ out_dir: &str,
+) -> Result<(), error::Error> {
+ let mut fmt = Formatter::new();
+ gen_isa(isa_name, recipes, &mut fmt);
+ fmt.update_file(binemit_filename, out_dir)?;
+ Ok(())
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/gen_encodings.rs b/third_party/rust/cranelift-codegen-meta/src/gen_encodings.rs
new file mode 100644
index 0000000000..d79dc66340
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/gen_encodings.rs
@@ -0,0 +1,1139 @@
+//! Generate sources for instruction encoding.
+//!
+//! The tables and functions generated here support the `TargetISA::encode()` function which
+//! determines if a given instruction is legal, and if so, its `Encoding` data which consists of a
+//! *recipe* and some *encoding* bits.
+//!
+//! The `encode` function doesn't actually generate the binary machine bits. Each recipe has a
+//! corresponding hand-written function to do that after registers are allocated.
+//!
+//! This is the information available to us:
+//!
+//! - The instruction to be encoded as an `InstructionData` reference.
+//! - The controlling type variable.
+//! - The data-flow graph giving us access to the types of all values involved. This is needed for
+//! testing any secondary type variables.
+//! - A `PredicateView` reference for the ISA-specific settings for evaluating ISA predicates.
+//! - The currently active CPU mode is determined by the ISA.
+//!
+//! ## Level 1 table lookup
+//!
+//! The CPU mode provides the first table. The key is the instruction's controlling type variable.
+//! If the instruction is not polymorphic, use `INVALID` for the type variable. The table values
+//! are level 2 tables.
+//!
+//! ## Level 2 table lookup
+//!
+//! The level 2 table is keyed by the instruction's opcode. The table values are *encoding lists*.
+//!
+//! The two-level table lookup allows the level 2 tables to be much smaller with good locality.
+//! Code in any given function usually only uses a few different types, so many of the level 2
+//! tables will be cold.
+//!
+//! ## Encoding lists
+//!
+//! An encoding list is a non-empty sequence of list entries. Each entry has one of these forms:
+//!
+//! 1. Recipe + bits. Use this encoding if the recipe predicate is satisfied.
+//! 2. Recipe + bits, final entry. Use this encoding if the recipe predicate is satisfied.
+//! Otherwise, stop with the default legalization code.
+//! 3. Stop with legalization code.
+//! 4. Predicate + skip count. Test predicate and skip N entries if it is false.
+//! 5. Predicate + stop. Test predicate and stop with the default legalization code if it is false.
+//!
+//! The instruction predicate is also used to distinguish between polymorphic instructions with
+//! different types for secondary type variables.
+
+use std::collections::btree_map;
+use std::collections::{BTreeMap, HashMap, HashSet};
+use std::convert::TryFrom;
+use std::iter::FromIterator;
+
+use cranelift_codegen_shared::constant_hash::generate_table;
+use cranelift_entity::EntityRef;
+
+use crate::error;
+use crate::srcgen::Formatter;
+
+use crate::cdsl::cpu_modes::CpuMode;
+use crate::cdsl::encodings::Encoding;
+use crate::cdsl::instructions::{Instruction, InstructionPredicate, InstructionPredicateNumber};
+use crate::cdsl::isa::TargetIsa;
+use crate::cdsl::recipes::{EncodingRecipe, OperandConstraint, Recipes, Register};
+use crate::cdsl::regs::IsaRegs;
+use crate::cdsl::settings::SettingPredicateNumber;
+use crate::cdsl::types::ValueType;
+use crate::cdsl::xform::TransformGroupIndex;
+
+use crate::shared::Definitions as SharedDefinitions;
+
+use crate::default_map::MapWithDefault;
+use crate::unique_table::UniqueSeqTable;
+
+/// Emit code for matching an instruction predicate against an `InstructionData` reference called
+/// `inst`.
+///
+/// The generated code is an `if let` pattern match that falls through if the instruction has an
+/// unexpected format. This should lead to a panic.
+fn emit_instp(instp: &InstructionPredicate, has_func: bool, fmt: &mut Formatter) {
+ if let Some(type_predicate) = instp.type_predicate("func") {
+ fmt.line("let args = inst.arguments(&func.dfg.value_lists);");
+ fmt.line(type_predicate);
+ return;
+ }
+
+ let leaves = instp.collect_leaves();
+
+ let mut has_type_check = false;
+ let mut format_name = None;
+ let mut field_names = HashSet::new();
+
+ for leaf in leaves {
+ if leaf.is_type_predicate() {
+ has_type_check = true;
+ } else {
+ field_names.insert(leaf.format_destructuring_member_name());
+ let leaf_format_name = leaf.format_name();
+ match format_name {
+ None => format_name = Some(leaf_format_name),
+ Some(previous_format_name) => {
+ assert!(
+ previous_format_name == leaf_format_name,
+ format!("Format predicate can only operate on a single InstructionFormat; trying to use both {} and {}", previous_format_name, leaf_format_name
+ ));
+ }
+ }
+ }
+ }
+
+ let mut fields = Vec::from_iter(field_names);
+ fields.sort();
+ let fields = fields.join(", ");
+
+ let format_name = format_name.expect("There should be a format name!");
+
+ fmtln!(
+ fmt,
+ "if let crate::ir::InstructionData::{} {{ {}, .. }} = *inst {{",
+ format_name,
+ fields
+ );
+ fmt.indent(|fmt| {
+ if has_type_check {
+ // We could implement this.
+ assert!(has_func, "recipe predicates can't check type variables.");
+ fmt.line("let args = inst.arguments(&func.dfg.value_lists);");
+ } else if has_func {
+ // Silence dead argument.
+ fmt.line("let _ = func;");
+ }
+ fmtln!(fmt, "return {};", instp.rust_predicate("func").unwrap());
+ });
+ fmtln!(fmt, "}");
+
+ fmt.line("unreachable!();");
+}
+
+/// Emit private functions for checking recipe predicates as well as a static `RECIPE_PREDICATES`
+/// array indexed by recipe number.
+///
+/// A recipe predicate is a combination of an ISA predicate and an instruction predicate. Many
+/// recipes have identical predicates.
+fn emit_recipe_predicates(isa: &TargetIsa, fmt: &mut Formatter) {
+ let mut predicate_names = HashMap::new();
+
+ fmt.comment(format!("{} recipe predicates.", isa.name));
+ for recipe in isa.recipes.values() {
+ let (isap, instp) = match (&recipe.isa_predicate, &recipe.inst_predicate) {
+ (None, None) => continue,
+ (isap, instp) if predicate_names.contains_key(&(isap, instp)) => continue,
+ (isap, instp) => (isap, instp),
+ };
+
+ let func_name = format!("recipe_predicate_{}", recipe.name.to_lowercase());
+ predicate_names.insert((isap, instp), func_name.clone());
+
+ // Generate the predicate function.
+ fmtln!(
+ fmt,
+ "fn {}({}: crate::settings::PredicateView, {}: &ir::InstructionData) -> bool {{",
+ func_name,
+ if isap.is_some() { "isap" } else { "_" },
+ if instp.is_some() { "inst" } else { "_" }
+ );
+ fmt.indent(|fmt| {
+ match (isap, instp) {
+ (Some(isap), None) => {
+ fmtln!(fmt, "isap.test({})", isap);
+ }
+ (None, Some(instp)) => {
+ emit_instp(instp, /* has func */ false, fmt);
+ }
+ (Some(isap), Some(instp)) => {
+ fmtln!(fmt, "isap.test({}) &&", isap);
+ emit_instp(instp, /* has func */ false, fmt);
+ }
+ _ => panic!("skipped above"),
+ }
+ });
+ fmtln!(fmt, "}");
+ }
+ fmt.empty_line();
+
+ // Generate the static table.
+ fmt.doc_comment(format!(
+ r#"{} recipe predicate table.
+
+ One entry per recipe, set to Some only when the recipe is guarded by a predicate."#,
+ isa.name
+ ));
+ fmtln!(
+ fmt,
+ "pub static RECIPE_PREDICATES: [RecipePredicate; {}] = [",
+ isa.recipes.len()
+ );
+ fmt.indent(|fmt| {
+ for recipe in isa.recipes.values() {
+ match (&recipe.isa_predicate, &recipe.inst_predicate) {
+ (None, None) => fmt.line("None,"),
+ key => fmtln!(fmt, "Some({}),", predicate_names.get(&key).unwrap()),
+ }
+ }
+ });
+ fmtln!(fmt, "];");
+ fmt.empty_line();
+}
+
+/// Emit private functions for matching instruction predicates as well as a static
+/// `INST_PREDICATES` array indexed by predicate number.
+fn emit_inst_predicates(isa: &TargetIsa, fmt: &mut Formatter) {
+ fmt.comment(format!("{} instruction predicates.", isa.name));
+ for (id, instp) in isa.encodings_predicates.iter() {
+ fmtln!(fmt, "fn inst_predicate_{}(func: &crate::ir::Function, inst: &crate::ir::InstructionData) -> bool {{", id.index());
+ fmt.indent(|fmt| {
+ emit_instp(instp, /* has func */ true, fmt);
+ });
+ fmtln!(fmt, "}");
+ }
+ fmt.empty_line();
+
+ // Generate the static table.
+ fmt.doc_comment(format!(
+ r#"{} instruction predicate table.
+
+ One entry per instruction predicate, so the encoding bytecode can embed indexes into this
+ table."#,
+ isa.name
+ ));
+ fmtln!(
+ fmt,
+ "pub static INST_PREDICATES: [InstPredicate; {}] = [",
+ isa.encodings_predicates.len()
+ );
+ fmt.indent(|fmt| {
+ for id in isa.encodings_predicates.keys() {
+ fmtln!(fmt, "inst_predicate_{},", id.index());
+ }
+ });
+ fmtln!(fmt, "];");
+ fmt.empty_line();
+}
+
+/// Emit a table of encoding recipe names keyed by recipe number.
+///
+/// This is used for pretty-printing encodings.
+fn emit_recipe_names(isa: &TargetIsa, fmt: &mut Formatter) {
+ fmt.doc_comment(format!(
+ r#"{} recipe names, using the same recipe index spaces as the one specified by the
+ corresponding binemit file."#,
+ isa.name
+ ));
+ fmtln!(
+ fmt,
+ "static RECIPE_NAMES: [&str; {}] = [",
+ isa.recipes.len()
+ );
+ fmt.indent(|fmt| {
+ for recipe in isa.recipes.values() {
+ fmtln!(fmt, r#""{}","#, recipe.name);
+ }
+ });
+ fmtln!(fmt, "];");
+ fmt.empty_line();
+}
+
+/// Returns a set of all the registers involved in fixed register constraints.
+fn get_fixed_registers(operands_in: &[OperandConstraint]) -> HashSet<Register> {
+ HashSet::from_iter(
+ operands_in
+ .iter()
+ .map(|constraint| {
+ if let OperandConstraint::FixedReg(reg) = &constraint {
+ Some(*reg)
+ } else {
+ None
+ }
+ })
+ .filter(|opt| opt.is_some())
+ .map(|opt| opt.unwrap()),
+ )
+}
+
+/// Emit a struct field initializer for an array of operand constraints.
+///
+/// Note "fixed_registers" must refer to the other kind of operands (i.e. if we're operating on
+/// inputs, fixed_registers must contain the fixed output registers).
+fn emit_operand_constraints(
+ registers: &IsaRegs,
+ recipe: &EncodingRecipe,
+ constraints: &[OperandConstraint],
+ field_name: &'static str,
+ tied_operands: &HashMap<usize, usize>,
+ fixed_registers: &HashSet<Register>,
+ fmt: &mut Formatter,
+) {
+ if constraints.is_empty() {
+ fmtln!(fmt, "{}: &[],", field_name);
+ return;
+ }
+
+ fmtln!(fmt, "{}: &[", field_name);
+ fmt.indent(|fmt| {
+ for (n, constraint) in constraints.iter().enumerate() {
+ fmt.line("OperandConstraint {");
+ fmt.indent(|fmt| {
+ match constraint {
+ OperandConstraint::RegClass(reg_class) => {
+ if let Some(tied_input) = tied_operands.get(&n) {
+ fmtln!(fmt, "kind: ConstraintKind::Tied({}),", tied_input);
+ } else {
+ fmt.line("kind: ConstraintKind::Reg,");
+ }
+ fmtln!(
+ fmt,
+ "regclass: &{}_DATA,",
+ registers.classes[*reg_class].name
+ );
+ }
+ OperandConstraint::FixedReg(reg) => {
+ assert!(!tied_operands.contains_key(&n), "can't tie fixed registers");
+ let constraint_kind = if fixed_registers.contains(&reg) {
+ "FixedTied"
+ } else {
+ "FixedReg"
+ };
+ fmtln!(
+ fmt,
+ "kind: ConstraintKind::{}({}),",
+ constraint_kind,
+ reg.unit
+ );
+ fmtln!(
+ fmt,
+ "regclass: &{}_DATA,",
+ registers.classes[reg.regclass].name
+ );
+ }
+ OperandConstraint::TiedInput(tied_input) => {
+ // This is a tied output constraint. It should never happen
+ // for input constraints.
+ assert!(
+ tied_input == tied_operands.get(&n).unwrap(),
+ "invalid tied constraint"
+ );
+ fmtln!(fmt, "kind: ConstraintKind::Tied({}),", tied_input);
+
+ let tied_class = if let OperandConstraint::RegClass(tied_class) =
+ recipe.operands_in[*tied_input]
+ {
+ tied_class
+ } else {
+ panic!("tied constraints relate only to register inputs");
+ };
+
+ fmtln!(
+ fmt,
+ "regclass: &{}_DATA,",
+ registers.classes[tied_class].name
+ );
+ }
+ OperandConstraint::Stack(stack) => {
+ assert!(!tied_operands.contains_key(&n), "can't tie stack operand");
+ fmt.line("kind: ConstraintKind::Stack,");
+ fmtln!(
+ fmt,
+ "regclass: &{}_DATA,",
+ registers.classes[stack.regclass].name
+ );
+ }
+ }
+ });
+ fmt.line("},");
+ }
+ });
+ fmtln!(fmt, "],");
+}
+
+/// Emit a table of encoding recipe operand constraints keyed by recipe number.
+///
+/// These are used by the register allocator to pick registers that can be properly encoded.
+fn emit_recipe_constraints(isa: &TargetIsa, fmt: &mut Formatter) {
+ fmt.doc_comment(format!(
+ r#"{} recipe constraints list, using the same recipe index spaces as the one
+ specified by the corresponding binemit file. These constraints are used by register
+ allocation to select the right location to use for input and output values."#,
+ isa.name
+ ));
+ fmtln!(
+ fmt,
+ "static RECIPE_CONSTRAINTS: [RecipeConstraints; {}] = [",
+ isa.recipes.len()
+ );
+ fmt.indent(|fmt| {
+ for recipe in isa.recipes.values() {
+ // Compute a mapping of tied operands in both directions (input tied to outputs and
+ // conversely).
+ let mut tied_in_to_out = HashMap::new();
+ let mut tied_out_to_in = HashMap::new();
+ for (out_index, constraint) in recipe.operands_out.iter().enumerate() {
+ if let OperandConstraint::TiedInput(in_index) = &constraint {
+ tied_in_to_out.insert(*in_index, out_index);
+ tied_out_to_in.insert(out_index, *in_index);
+ }
+ }
+
+ // Find the sets of registers involved in fixed register constraints.
+ let fixed_inputs = get_fixed_registers(&recipe.operands_in);
+ let fixed_outputs = get_fixed_registers(&recipe.operands_out);
+
+ fmt.comment(format!("Constraints for recipe {}:", recipe.name));
+ fmt.line("RecipeConstraints {");
+ fmt.indent(|fmt| {
+ emit_operand_constraints(
+ &isa.regs,
+ recipe,
+ &recipe.operands_in,
+ "ins",
+ &tied_in_to_out,
+ &fixed_outputs,
+ fmt,
+ );
+ emit_operand_constraints(
+ &isa.regs,
+ recipe,
+ &recipe.operands_out,
+ "outs",
+ &tied_out_to_in,
+ &fixed_inputs,
+ fmt,
+ );
+ fmtln!(
+ fmt,
+ "fixed_ins: {},",
+ if !fixed_inputs.is_empty() {
+ "true"
+ } else {
+ "false"
+ }
+ );
+ fmtln!(
+ fmt,
+ "fixed_outs: {},",
+ if !fixed_outputs.is_empty() {
+ "true"
+ } else {
+ "false"
+ }
+ );
+ fmtln!(
+ fmt,
+ "tied_ops: {},",
+ if !tied_in_to_out.is_empty() {
+ "true"
+ } else {
+ "false"
+ }
+ );
+ fmtln!(
+ fmt,
+ "clobbers_flags: {},",
+ if recipe.clobbers_flags {
+ "true"
+ } else {
+ "false"
+ }
+ );
+ });
+ fmt.line("},");
+ }
+ });
+ fmtln!(fmt, "];");
+ fmt.empty_line();
+}
+
+/// Emit a table of encoding recipe code size information.
+fn emit_recipe_sizing(isa: &TargetIsa, fmt: &mut Formatter) {
+ fmt.doc_comment(format!(
+ r#"{} recipe sizing descriptors, using the same recipe index spaces as the one
+ specified by the corresponding binemit file. These are used to compute the final size of an
+ instruction, as well as to compute the range of branches."#,
+ isa.name
+ ));
+ fmtln!(
+ fmt,
+ "static RECIPE_SIZING: [RecipeSizing; {}] = [",
+ isa.recipes.len()
+ );
+ fmt.indent(|fmt| {
+ for recipe in isa.recipes.values() {
+ fmt.comment(format!("Code size information for recipe {}:", recipe.name));
+ fmt.line("RecipeSizing {");
+ fmt.indent(|fmt| {
+ fmtln!(fmt, "base_size: {},", recipe.base_size);
+ fmtln!(fmt, "compute_size: {},", recipe.compute_size);
+ if let Some(range) = &recipe.branch_range {
+ fmtln!(
+ fmt,
+ "branch_range: Some(BranchRange {{ origin: {}, bits: {} }}),",
+ range.inst_size,
+ range.range
+ );
+ } else {
+ fmt.line("branch_range: None,");
+ }
+ });
+ fmt.line("},");
+ }
+ });
+ fmtln!(fmt, "];");
+ fmt.empty_line();
+}
+
+/// Level 1 table mapping types to `Level2` objects.
+struct Level1Table<'cpu_mode> {
+ cpu_mode: &'cpu_mode CpuMode,
+ legalize_code: TransformGroupIndex,
+
+ table_map: HashMap<Option<ValueType>, usize>,
+ table_vec: Vec<Level2Table>,
+}
+
+impl<'cpu_mode> Level1Table<'cpu_mode> {
+ fn new(cpu_mode: &'cpu_mode CpuMode) -> Self {
+ Self {
+ cpu_mode,
+ legalize_code: cpu_mode.get_default_legalize_code(),
+ table_map: HashMap::new(),
+ table_vec: Vec::new(),
+ }
+ }
+
+ /// Returns the level2 table for the given type; None means monomorphic, in this context.
+ fn l2table_for(&mut self, typ: Option<ValueType>) -> &mut Level2Table {
+ let cpu_mode = &self.cpu_mode;
+ let index = match self.table_map.get(&typ) {
+ Some(&index) => index,
+ None => {
+ let legalize_code = cpu_mode.get_legalize_code_for(&typ);
+ let table = Level2Table::new(typ.clone(), legalize_code);
+ let index = self.table_vec.len();
+ self.table_map.insert(typ, index);
+ self.table_vec.push(table);
+ index
+ }
+ };
+ self.table_vec.get_mut(index).unwrap()
+ }
+
+ fn l2tables(&mut self) -> Vec<&mut Level2Table> {
+ self.table_vec
+ .iter_mut()
+ .filter(|table| !table.is_empty())
+ .collect::<Vec<_>>()
+ }
+}
+
+struct Level2HashTableEntry {
+ inst_name: String,
+ offset: usize,
+}
+
+/// Level 2 table mapping instruction opcodes to `EncList` objects.
+///
+/// A level 2 table can be completely empty if it only holds a custom legalization action for `ty`.
+struct Level2Table {
+ typ: Option<ValueType>,
+ legalize_code: TransformGroupIndex,
+ inst_to_encodings: BTreeMap<String, EncodingList>,
+ hash_table_offset: Option<usize>,
+ hash_table_len: Option<usize>,
+}
+
+impl Level2Table {
+ fn new(typ: Option<ValueType>, legalize_code: TransformGroupIndex) -> Self {
+ Self {
+ typ,
+ legalize_code,
+ inst_to_encodings: BTreeMap::new(),
+ hash_table_offset: None,
+ hash_table_len: None,
+ }
+ }
+
+ fn enclist_for(&mut self, inst: &Instruction) -> &mut EncodingList {
+ let copied_typ = self.typ.clone();
+ self.inst_to_encodings
+ .entry(inst.name.clone())
+ .or_insert_with(|| EncodingList::new(inst, copied_typ))
+ }
+
+ fn enclists(&mut self) -> btree_map::ValuesMut<'_, String, EncodingList> {
+ self.inst_to_encodings.values_mut()
+ }
+
+ fn is_empty(&self) -> bool {
+ self.inst_to_encodings.is_empty()
+ }
+
+ fn layout_hashtable(
+ &mut self,
+ level2_hashtables: &mut Vec<Option<Level2HashTableEntry>>,
+ level2_doc: &mut HashMap<usize, Vec<String>>,
+ ) {
+ let hash_table = generate_table(
+ self.inst_to_encodings.values(),
+ self.inst_to_encodings.len(),
+ // TODO the Python code wanted opcode numbers to start from 1.
+ |enc_list| enc_list.inst.opcode_number.index() + 1,
+ );
+
+ let hash_table_offset = level2_hashtables.len();
+ let hash_table_len = hash_table.len();
+
+ assert!(self.hash_table_offset.is_none());
+ assert!(self.hash_table_len.is_none());
+ self.hash_table_offset = Some(hash_table_offset);
+ self.hash_table_len = Some(hash_table_len);
+
+ level2_hashtables.extend(hash_table.iter().map(|opt_enc_list| {
+ opt_enc_list.map(|enc_list| Level2HashTableEntry {
+ inst_name: enc_list.inst.camel_name.clone(),
+ offset: enc_list.offset.unwrap(),
+ })
+ }));
+
+ let typ_comment = match &self.typ {
+ Some(ty) => ty.to_string(),
+ None => "typeless".into(),
+ };
+
+ level2_doc.get_or_default(hash_table_offset).push(format!(
+ "{:06x}: {}, {} entries",
+ hash_table_offset, typ_comment, hash_table_len
+ ));
+ }
+}
+
+/// The u16 values in an encoding list entry are interpreted as follows:
+///
+/// NR = len(all_recipes)
+///
+/// entry < 2*NR
+/// Try Encoding(entry/2, next_entry) if the recipe predicate is satisfied.
+/// If bit 0 is set, stop with the default legalization code.
+/// If bit 0 is clear, keep going down the list.
+/// entry < PRED_START
+/// Stop with legalization code `entry - 2*NR`.
+///
+/// Remaining entries are interpreted as (skip, pred) pairs, where:
+///
+/// skip = (entry - PRED_START) >> PRED_BITS
+/// pred = (entry - PRED_START) & PRED_MASK
+///
+/// If the predicate is satisfied, keep going. Otherwise skip over the next
+/// `skip` entries. If skip == 0, stop with the default legalization code.
+///
+/// The `pred` predicate number is interpreted as an instruction predicate if it
+/// is in range, otherwise an ISA predicate.
+
+/// Encoding lists are represented as u16 arrays.
+const CODE_BITS: usize = 16;
+
+/// Beginning of the predicate code words.
+const PRED_START: u16 = 0x1000;
+
+/// Number of bits used to hold a predicate number (instruction + ISA predicates).
+const PRED_BITS: usize = 12;
+
+/// Mask for extracting the predicate number.
+const PRED_MASK: usize = (1 << PRED_BITS) - 1;
+
+/// Encoder for the list format above.
+struct Encoder {
+ num_instruction_predicates: usize,
+
+ /// u16 encoding list words.
+ words: Vec<u16>,
+
+ /// Documentation comments: Index into `words` + comment.
+ docs: Vec<(usize, String)>,
+}
+
+impl Encoder {
+ fn new(num_instruction_predicates: usize) -> Self {
+ Self {
+ num_instruction_predicates,
+ words: Vec::new(),
+ docs: Vec::new(),
+ }
+ }
+
+ /// Add a recipe+bits entry to the list.
+ fn recipe(&mut self, recipes: &Recipes, enc: &Encoding, is_final: bool) {
+ let code = (2 * enc.recipe.index() + if is_final { 1 } else { 0 }) as u16;
+ assert!(code < PRED_START);
+
+ let doc = format!(
+ "--> {}{}",
+ enc.to_rust_comment(recipes),
+ if is_final { " and stop" } else { "" }
+ );
+ self.docs.push((self.words.len(), doc));
+
+ self.words.push(code);
+ self.words.push(enc.encbits);
+ }
+
+ /// Add a predicate entry.
+ fn pred(&mut self, pred_comment: String, skip: usize, n: usize) {
+ assert!(n <= PRED_MASK);
+ let entry = (PRED_START as usize) + (n | (skip << PRED_BITS));
+ assert!(entry < (1 << CODE_BITS));
+ let entry = entry as u16;
+
+ let doc = if skip == 0 {
+ "stop".to_string()
+ } else {
+ format!("skip {}", skip)
+ };
+ let doc = format!("{} unless {}", doc, pred_comment);
+
+ self.docs.push((self.words.len(), doc));
+ self.words.push(entry);
+ }
+
+ /// Add an instruction predicate entry.
+ fn inst_predicate(&mut self, pred: InstructionPredicateNumber, skip: usize) {
+ let number = pred.index();
+ let pred_comment = format!("inst_predicate_{}", number);
+ self.pred(pred_comment, skip, number);
+ }
+
+ /// Add an ISA predicate entry.
+ fn isa_predicate(&mut self, pred: SettingPredicateNumber, skip: usize) {
+ // ISA predicates follow the instruction predicates.
+ let n = self.num_instruction_predicates + (pred as usize);
+ let pred_comment = format!("PredicateView({})", pred);
+ self.pred(pred_comment, skip, n);
+ }
+}
+
+/// List of instructions for encoding a given type + opcode pair.
+///
+/// An encoding list contains a sequence of predicates and encoding recipes, all encoded as u16
+/// values.
+struct EncodingList {
+ inst: Instruction,
+ typ: Option<ValueType>,
+ encodings: Vec<Encoding>,
+ offset: Option<usize>,
+}
+
+impl EncodingList {
+ fn new(inst: &Instruction, typ: Option<ValueType>) -> Self {
+ Self {
+ inst: inst.clone(),
+ typ,
+ encodings: Default::default(),
+ offset: None,
+ }
+ }
+
+ /// Encode this list as a sequence of u16 numbers.
+ ///
+ /// Adds the sequence to `enc_lists` and records the returned offset as
+ /// `self.offset`.
+ ///
+ /// Adds comment lines to `enc_lists_doc` keyed by enc_lists offsets.
+ fn encode(
+ &mut self,
+ isa: &TargetIsa,
+ cpu_mode: &CpuMode,
+ enc_lists: &mut UniqueSeqTable<u16>,
+ enc_lists_doc: &mut HashMap<usize, Vec<String>>,
+ ) {
+ assert!(!self.encodings.is_empty());
+
+ let mut encoder = Encoder::new(isa.encodings_predicates.len());
+
+ let mut index = 0;
+ while index < self.encodings.len() {
+ let encoding = &self.encodings[index];
+
+ // Try to see how many encodings are following and have the same ISA predicate and
+ // instruction predicate, so as to reduce the number of tests carried out by the
+ // encoding list interpreter..
+ //
+ // Encodings with similar tests are hereby called a group. The group includes the
+ // current encoding we're looking at.
+ let (isa_predicate, inst_predicate) =
+ (&encoding.isa_predicate, &encoding.inst_predicate);
+
+ let group_size = {
+ let mut group_size = 1;
+ while index + group_size < self.encodings.len() {
+ let next_encoding = &self.encodings[index + group_size];
+ if &next_encoding.inst_predicate != inst_predicate
+ || &next_encoding.isa_predicate != isa_predicate
+ {
+ break;
+ }
+ group_size += 1;
+ }
+ group_size
+ };
+
+ let is_last_group = index + group_size == self.encodings.len();
+
+ // The number of entries to skip when a predicate isn't satisfied is the size of both
+ // predicates + the size of the group, minus one (for this predicate). Each recipe
+ // entry has a size of two u16 (recipe index + bits).
+ let mut skip = if is_last_group {
+ 0
+ } else {
+ let isap_size = match isa_predicate {
+ Some(_) => 1,
+ None => 0,
+ };
+ let instp_size = match inst_predicate {
+ Some(_) => 1,
+ None => 0,
+ };
+ isap_size + instp_size + group_size * 2 - 1
+ };
+
+ if let Some(pred) = isa_predicate {
+ encoder.isa_predicate(*pred, skip);
+ if !is_last_group {
+ skip -= 1;
+ }
+ }
+
+ if let Some(pred) = inst_predicate {
+ encoder.inst_predicate(*pred, skip);
+ // No need to update skip, it's dead after this point.
+ }
+
+ for i in 0..group_size {
+ let encoding = &self.encodings[index + i];
+ let is_last_encoding = index + i == self.encodings.len() - 1;
+ encoder.recipe(&isa.recipes, encoding, is_last_encoding);
+ }
+
+ index += group_size;
+ }
+
+ assert!(self.offset.is_none());
+ let offset = enc_lists.add(&encoder.words);
+ self.offset = Some(offset);
+
+ // Doc comments.
+ let recipe_typ_mode_name = format!(
+ "{}{} ({})",
+ self.inst.name,
+ if let Some(typ) = &self.typ {
+ format!(".{}", typ.to_string())
+ } else {
+ "".into()
+ },
+ cpu_mode.name
+ );
+
+ enc_lists_doc
+ .get_or_default(offset)
+ .push(format!("{:06x}: {}", offset, recipe_typ_mode_name));
+ for (pos, doc) in encoder.docs {
+ enc_lists_doc.get_or_default(offset + pos).push(doc);
+ }
+ enc_lists_doc
+ .get_or_default(offset + encoder.words.len())
+ .insert(0, format!("end of {}", recipe_typ_mode_name));
+ }
+}
+
+fn make_tables(cpu_mode: &CpuMode) -> Level1Table {
+ let mut table = Level1Table::new(cpu_mode);
+
+ for encoding in &cpu_mode.encodings {
+ table
+ .l2table_for(encoding.bound_type.clone())
+ .enclist_for(encoding.inst())
+ .encodings
+ .push(encoding.clone());
+ }
+
+ // Ensure there are level 1 table entries for all types with a custom legalize action.
+ for value_type in cpu_mode.get_legalized_types() {
+ table.l2table_for(Some(value_type.clone()));
+ }
+ // ... and also for monomorphic instructions.
+ table.l2table_for(None);
+
+ table
+}
+
+/// Compute encodings and doc comments for encoding lists in `level1`.
+fn encode_enclists(
+ isa: &TargetIsa,
+ cpu_mode: &CpuMode,
+ level1: &mut Level1Table,
+ enc_lists: &mut UniqueSeqTable<u16>,
+ enc_lists_doc: &mut HashMap<usize, Vec<String>>,
+) {
+ for level2 in level1.l2tables() {
+ for enclist in level2.enclists() {
+ enclist.encode(isa, cpu_mode, enc_lists, enc_lists_doc);
+ }
+ }
+}
+
+fn encode_level2_hashtables<'a>(
+ level1: &'a mut Level1Table,
+ level2_hashtables: &mut Vec<Option<Level2HashTableEntry>>,
+ level2_doc: &mut HashMap<usize, Vec<String>>,
+) {
+ for level2 in level1.l2tables() {
+ level2.layout_hashtable(level2_hashtables, level2_doc);
+ }
+}
+
+fn emit_encoding_tables(defs: &SharedDefinitions, isa: &TargetIsa, fmt: &mut Formatter) {
+ // Level 1 tables, one per CPU mode.
+ let mut level1_tables: HashMap<&'static str, Level1Table> = HashMap::new();
+
+ // Single table containing all the level2 hash tables.
+ let mut level2_hashtables = Vec::new();
+ let mut level2_doc: HashMap<usize, Vec<String>> = HashMap::new();
+
+ // Tables for encoding lists with comments.
+ let mut enc_lists = UniqueSeqTable::new();
+ let mut enc_lists_doc = HashMap::new();
+
+ for cpu_mode in &isa.cpu_modes {
+ level2_doc
+ .get_or_default(level2_hashtables.len())
+ .push(cpu_mode.name.into());
+
+ let mut level1 = make_tables(cpu_mode);
+
+ encode_enclists(
+ isa,
+ cpu_mode,
+ &mut level1,
+ &mut enc_lists,
+ &mut enc_lists_doc,
+ );
+ encode_level2_hashtables(&mut level1, &mut level2_hashtables, &mut level2_doc);
+
+ level1_tables.insert(cpu_mode.name, level1);
+ }
+
+ // Compute an appropriate Rust integer type to use for offsets into a table of the given length.
+ let offset_type = |length: usize| {
+ if length <= 0x10000 {
+ "u16"
+ } else {
+ assert!(u32::try_from(length).is_ok(), "table too big!");
+ "u32"
+ }
+ };
+
+ let level1_offset_type = offset_type(level2_hashtables.len());
+ let level2_offset_type = offset_type(enc_lists.len());
+
+ // Emit encoding lists.
+ fmt.doc_comment(
+ format!(r#"{} encoding lists.
+
+ This contains the entire encodings bytecode for every single instruction; the encodings
+ interpreter knows where to start from thanks to the initial lookup in the level 1 and level 2
+ table entries below."#, isa.name)
+ );
+ fmtln!(fmt, "pub static ENCLISTS: [u16; {}] = [", enc_lists.len());
+ fmt.indent(|fmt| {
+ let mut line = Vec::new();
+ for (index, entry) in enc_lists.iter().enumerate() {
+ if let Some(comments) = enc_lists_doc.get(&index) {
+ if !line.is_empty() {
+ fmtln!(fmt, "{},", line.join(", "));
+ line.clear();
+ }
+ for comment in comments {
+ fmt.comment(comment);
+ }
+ }
+ line.push(format!("{:#06x}", entry));
+ }
+ if !line.is_empty() {
+ fmtln!(fmt, "{},", line.join(", "));
+ }
+ });
+ fmtln!(fmt, "];");
+ fmt.empty_line();
+
+ // Emit the full concatenation of level 2 hash tables.
+ fmt.doc_comment(format!(
+ r#"{} level 2 hash tables.
+
+ This hash table, keyed by instruction opcode, contains all the starting offsets for the
+ encodings interpreter, for all the CPU modes. It is jumped to after a lookup on the
+ instruction's controlling type in the level 1 hash table."#,
+ isa.name
+ ));
+ fmtln!(
+ fmt,
+ "pub static LEVEL2: [Level2Entry<{}>; {}] = [",
+ level2_offset_type,
+ level2_hashtables.len()
+ );
+ fmt.indent(|fmt| {
+ for (offset, entry) in level2_hashtables.iter().enumerate() {
+ if let Some(comments) = level2_doc.get(&offset) {
+ for comment in comments {
+ fmt.comment(comment);
+ }
+ }
+ if let Some(entry) = entry {
+ fmtln!(
+ fmt,
+ "Level2Entry {{ opcode: Some(crate::ir::Opcode::{}), offset: {:#08x} }},",
+ entry.inst_name,
+ entry.offset
+ );
+ } else {
+ fmt.line("Level2Entry { opcode: None, offset: 0 },");
+ }
+ }
+ });
+ fmtln!(fmt, "];");
+ fmt.empty_line();
+
+ // Emit a level 1 hash table for each CPU mode.
+ for cpu_mode in &isa.cpu_modes {
+ let level1 = &level1_tables.get(cpu_mode.name).unwrap();
+ let hash_table = generate_table(
+ level1.table_vec.iter(),
+ level1.table_vec.len(),
+ |level2_table| {
+ if let Some(typ) = &level2_table.typ {
+ typ.number().expect("type without a number") as usize
+ } else {
+ 0
+ }
+ },
+ );
+
+ fmt.doc_comment(format!(
+ r#"{} level 1 hash table for the CPU mode {}.
+
+ This hash table, keyed by instruction controlling type, contains all the level 2
+ hash-tables offsets for the given CPU mode, as well as a legalization identifier indicating
+ which legalization scheme to apply when the instruction doesn't have any valid encoding for
+ this CPU mode.
+ "#,
+ isa.name, cpu_mode.name
+ ));
+ fmtln!(
+ fmt,
+ "pub static LEVEL1_{}: [Level1Entry<{}>; {}] = [",
+ cpu_mode.name.to_uppercase(),
+ level1_offset_type,
+ hash_table.len()
+ );
+ fmt.indent(|fmt| {
+ for opt_level2 in hash_table {
+ let level2 = match opt_level2 {
+ None => {
+ // Empty hash table entry. Include the default legalization action.
+ fmtln!(fmt, "Level1Entry {{ ty: ir::types::INVALID, log2len: !0, offset: 0, legalize: {} }},",
+ isa.translate_group_index(level1.legalize_code));
+ continue;
+ }
+ Some(level2) => level2,
+ };
+
+ let legalize_comment = defs.transform_groups.get(level2.legalize_code).name;
+ let legalize_code = isa.translate_group_index(level2.legalize_code);
+
+ let typ_name = if let Some(typ) = &level2.typ {
+ typ.rust_name()
+ } else {
+ "ir::types::INVALID".into()
+ };
+
+ if level2.is_empty() {
+ // Empty level 2 table: Only a specialized legalization action, no actual
+ // table.
+ // Set an offset that is out of bounds, but make sure it doesn't overflow its
+ // type when adding `1<<log2len`.
+ fmtln!(fmt, "Level1Entry {{ ty: {}, log2len: 0, offset: !0 - 1, legalize: {} }}, // {}",
+ typ_name, legalize_code, legalize_comment);
+ continue;
+ }
+
+ // Proper level 2 hash table.
+ let l2l = (level2.hash_table_len.unwrap() as f64).log2() as i32;
+ assert!(l2l > 0, "Level2 hash table was too small.");
+ fmtln!(fmt, "Level1Entry {{ ty: {}, log2len: {}, offset: {:#08x}, legalize: {} }}, // {}",
+ typ_name, l2l, level2.hash_table_offset.unwrap(), legalize_code, legalize_comment);
+ }
+ });
+ fmtln!(fmt, "];");
+ fmt.empty_line();
+ }
+}
+
+fn gen_isa(defs: &SharedDefinitions, isa: &TargetIsa, fmt: &mut Formatter) {
+ // Make the `RECIPE_PREDICATES` table.
+ emit_recipe_predicates(isa, fmt);
+
+ // Make the `INST_PREDICATES` table.
+ emit_inst_predicates(isa, fmt);
+
+ emit_encoding_tables(defs, isa, fmt);
+
+ emit_recipe_names(isa, fmt);
+ emit_recipe_constraints(isa, fmt);
+ emit_recipe_sizing(isa, fmt);
+
+ // Finally, tie it all together in an `EncInfo`.
+ fmt.line("pub static INFO: isa::EncInfo = isa::EncInfo {");
+ fmt.indent(|fmt| {
+ fmt.line("constraints: &RECIPE_CONSTRAINTS,");
+ fmt.line("sizing: &RECIPE_SIZING,");
+ fmt.line("names: &RECIPE_NAMES,");
+ });
+ fmt.line("};");
+}
+
+pub(crate) fn generate(
+ defs: &SharedDefinitions,
+ isa: &TargetIsa,
+ filename: &str,
+ out_dir: &str,
+) -> Result<(), error::Error> {
+ let mut fmt = Formatter::new();
+ gen_isa(defs, isa, &mut fmt);
+ fmt.update_file(filename, out_dir)?;
+ Ok(())
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/gen_inst.rs b/third_party/rust/cranelift-codegen-meta/src/gen_inst.rs
new file mode 100644
index 0000000000..a2760b34d7
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/gen_inst.rs
@@ -0,0 +1,1184 @@
+//! Generate instruction data (including opcodes, formats, builders, etc.).
+use std::fmt;
+
+use cranelift_codegen_shared::constant_hash;
+use cranelift_entity::EntityRef;
+
+use crate::cdsl::camel_case;
+use crate::cdsl::formats::InstructionFormat;
+use crate::cdsl::instructions::{AllInstructions, Instruction};
+use crate::cdsl::operands::Operand;
+use crate::cdsl::typevar::{TypeSet, TypeVar};
+
+use crate::error;
+use crate::srcgen::{Formatter, Match};
+use crate::unique_table::{UniqueSeqTable, UniqueTable};
+
+// TypeSet indexes are encoded in 8 bits, with `0xff` reserved.
+const TYPESET_LIMIT: usize = 0xff;
+
+/// Generate an instruction format enumeration.
+fn gen_formats(formats: &[&InstructionFormat], fmt: &mut Formatter) {
+ fmt.doc_comment(
+ r#"
+ An instruction format
+
+ Every opcode has a corresponding instruction format
+ which is represented by both the `InstructionFormat`
+ and the `InstructionData` enums.
+ "#,
+ );
+ fmt.line("#[derive(Copy, Clone, PartialEq, Eq, Debug)]");
+ fmt.line("pub enum InstructionFormat {");
+ fmt.indent(|fmt| {
+ for format in formats {
+ fmt.doc_comment(format.to_string());
+ fmtln!(fmt, "{},", format.name);
+ }
+ });
+ fmt.line("}");
+ fmt.empty_line();
+
+ // Emit a From<InstructionData> which also serves to verify that
+ // InstructionFormat and InstructionData are in sync.
+ fmt.line("impl<'a> From<&'a InstructionData> for InstructionFormat {");
+ fmt.indent(|fmt| {
+ fmt.line("fn from(inst: &'a InstructionData) -> Self {");
+ fmt.indent(|fmt| {
+ let mut m = Match::new("*inst");
+ for format in formats {
+ m.arm(
+ format!("InstructionData::{}", format.name),
+ vec![".."],
+ format!("Self::{}", format.name),
+ );
+ }
+ fmt.add_match(m);
+ });
+ fmt.line("}");
+ });
+ fmt.line("}");
+ fmt.empty_line();
+}
+
+/// Generate the InstructionData enum.
+///
+/// Every variant must contain an `opcode` field. The size of `InstructionData` should be kept at
+/// 16 bytes on 64-bit architectures. If more space is needed to represent an instruction, use a
+/// `ValueList` to store the additional information out of line.
+fn gen_instruction_data(formats: &[&InstructionFormat], fmt: &mut Formatter) {
+ fmt.line("#[derive(Clone, Debug)]");
+ fmt.line("#[allow(missing_docs)]");
+ fmt.line("pub enum InstructionData {");
+ fmt.indent(|fmt| {
+ for format in formats {
+ fmtln!(fmt, "{} {{", format.name);
+ fmt.indent(|fmt| {
+ fmt.line("opcode: Opcode,");
+ if format.typevar_operand.is_some() {
+ if format.has_value_list {
+ fmt.line("args: ValueList,");
+ } else if format.num_value_operands == 1 {
+ fmt.line("arg: Value,");
+ } else {
+ fmtln!(fmt, "args: [Value; {}],", format.num_value_operands);
+ }
+ }
+ for field in &format.imm_fields {
+ fmtln!(fmt, "{}: {},", field.member, field.kind.rust_type);
+ }
+ });
+ fmtln!(fmt, "},");
+ }
+ });
+ fmt.line("}");
+}
+
+fn gen_arguments_method(formats: &[&InstructionFormat], fmt: &mut Formatter, is_mut: bool) {
+ let (method, mut_, rslice, as_slice) = if is_mut {
+ (
+ "arguments_mut",
+ "mut ",
+ "core::slice::from_mut",
+ "as_mut_slice",
+ )
+ } else {
+ ("arguments", "", "core::slice::from_ref", "as_slice")
+ };
+
+ fmtln!(
+ fmt,
+ "pub fn {}<'a>(&'a {}self, pool: &'a {}ir::ValueListPool) -> &{}[Value] {{",
+ method,
+ mut_,
+ mut_,
+ mut_
+ );
+ fmt.indent(|fmt| {
+ let mut m = Match::new("*self");
+ for format in formats {
+ let name = format!("Self::{}", format.name);
+
+ // Formats with a value list put all of their arguments in the list. We don't split
+ // them up, just return it all as variable arguments. (I expect the distinction to go
+ // away).
+ if format.has_value_list {
+ m.arm(
+ name,
+ vec![format!("ref {}args", mut_), "..".to_string()],
+ format!("args.{}(pool)", as_slice),
+ );
+ continue;
+ }
+
+ // Fixed args.
+ let mut fields = Vec::new();
+ let arg = if format.num_value_operands == 0 {
+ format!("&{}[]", mut_)
+ } else if format.num_value_operands == 1 {
+ fields.push(format!("ref {}arg", mut_));
+ format!("{}(arg)", rslice)
+ } else {
+ let arg = format!("args_arity{}", format.num_value_operands);
+ fields.push(format!("args: ref {}{}", mut_, arg));
+ arg
+ };
+ fields.push("..".into());
+
+ m.arm(name, fields, arg);
+ }
+ fmt.add_match(m);
+ });
+ fmtln!(fmt, "}");
+}
+
+/// Generate the boring parts of the InstructionData implementation.
+///
+/// These methods in `impl InstructionData` can be generated automatically from the instruction
+/// formats:
+///
+/// - `pub fn opcode(&self) -> Opcode`
+/// - `pub fn arguments(&self, &pool) -> &[Value]`
+/// - `pub fn arguments_mut(&mut self, &pool) -> &mut [Value]`
+/// - `pub fn take_value_list(&mut self) -> Option<ir::ValueList>`
+/// - `pub fn put_value_list(&mut self, args: ir::ValueList>`
+/// - `pub fn eq(&self, &other: Self, &pool) -> bool`
+/// - `pub fn hash<H: Hasher>(&self, state: &mut H, &pool)`
+fn gen_instruction_data_impl(formats: &[&InstructionFormat], fmt: &mut Formatter) {
+ fmt.line("impl InstructionData {");
+ fmt.indent(|fmt| {
+ fmt.doc_comment("Get the opcode of this instruction.");
+ fmt.line("pub fn opcode(&self) -> Opcode {");
+ fmt.indent(|fmt| {
+ let mut m = Match::new("*self");
+ for format in formats {
+ m.arm(format!("Self::{}", format.name), vec!["opcode", ".."],
+ "opcode".to_string());
+ }
+ fmt.add_match(m);
+ });
+ fmt.line("}");
+ fmt.empty_line();
+
+ fmt.doc_comment("Get the controlling type variable operand.");
+ fmt.line("pub fn typevar_operand(&self, pool: &ir::ValueListPool) -> Option<Value> {");
+ fmt.indent(|fmt| {
+ let mut m = Match::new("*self");
+ for format in formats {
+ let name = format!("Self::{}", format.name);
+ if format.typevar_operand.is_none() {
+ m.arm(name, vec![".."], "None".to_string());
+ } else if format.has_value_list {
+ // We keep all arguments in a value list.
+ m.arm(name, vec!["ref args", ".."], format!("args.get({}, pool)", format.typevar_operand.unwrap()));
+ } else if format.num_value_operands == 1 {
+ m.arm(name, vec!["arg", ".."], "Some(arg)".to_string());
+ } else {
+ // We have multiple value operands and an array `args`.
+ // Which `args` index to use?
+ let args = format!("args_arity{}", format.num_value_operands);
+ m.arm(name, vec![format!("args: ref {}", args), "..".to_string()],
+ format!("Some({}[{}])", args, format.typevar_operand.unwrap()));
+ }
+ }
+ fmt.add_match(m);
+ });
+ fmt.line("}");
+ fmt.empty_line();
+
+ fmt.doc_comment("Get the value arguments to this instruction.");
+ gen_arguments_method(formats, fmt, false);
+ fmt.empty_line();
+
+ fmt.doc_comment(r#"Get mutable references to the value arguments to this
+ instruction."#);
+ gen_arguments_method(formats, fmt, true);
+ fmt.empty_line();
+
+ fmt.doc_comment(r#"
+ Take out the value list with all the value arguments and return
+ it.
+
+ This leaves the value list in the instruction empty. Use
+ `put_value_list` to put the value list back.
+ "#);
+ fmt.line("pub fn take_value_list(&mut self) -> Option<ir::ValueList> {");
+ fmt.indent(|fmt| {
+ let mut m = Match::new("*self");
+
+ for format in formats {
+ if format.has_value_list {
+ m.arm(format!("Self::{}", format.name),
+ vec!["ref mut args", ".."],
+ "Some(args.take())".to_string());
+ }
+ }
+
+ m.arm_no_fields("_", "None");
+
+ fmt.add_match(m);
+ });
+ fmt.line("}");
+ fmt.empty_line();
+
+ fmt.doc_comment(r#"
+ Put back a value list.
+
+ After removing a value list with `take_value_list()`, use this
+ method to put it back. It is required that this instruction has
+ a format that accepts a value list, and that the existing value
+ list is empty. This avoids leaking list pool memory.
+ "#);
+ fmt.line("pub fn put_value_list(&mut self, vlist: ir::ValueList) {");
+ fmt.indent(|fmt| {
+ fmt.line("let args = match *self {");
+ fmt.indent(|fmt| {
+ for format in formats {
+ if format.has_value_list {
+ fmtln!(fmt, "Self::{} {{ ref mut args, .. }} => args,", format.name);
+ }
+ }
+ fmt.line("_ => panic!(\"No value list: {:?}\", self),");
+ });
+ fmt.line("};");
+ fmt.line("debug_assert!(args.is_empty(), \"Value list already in use\");");
+ fmt.line("*args = vlist;");
+ });
+ fmt.line("}");
+ fmt.empty_line();
+
+ fmt.doc_comment(r#"
+ Compare two `InstructionData` for equality.
+
+ This operation requires a reference to a `ValueListPool` to
+ determine if the contents of any `ValueLists` are equal.
+ "#);
+ fmt.line("pub fn eq(&self, other: &Self, pool: &ir::ValueListPool) -> bool {");
+ fmt.indent(|fmt| {
+ fmt.line("if ::core::mem::discriminant(self) != ::core::mem::discriminant(other) {");
+ fmt.indent(|fmt| {
+ fmt.line("return false;");
+ });
+ fmt.line("}");
+
+ fmt.line("match (self, other) {");
+ fmt.indent(|fmt| {
+ for format in formats {
+ let name = format!("&Self::{}", format.name);
+ let mut members = vec!["opcode"];
+
+ let args_eq = if format.typevar_operand.is_none() {
+ None
+ } else if format.has_value_list {
+ members.push("args");
+ Some("args1.as_slice(pool) == args2.as_slice(pool)")
+ } else if format.num_value_operands == 1 {
+ members.push("arg");
+ Some("arg1 == arg2")
+ } else {
+ members.push("args");
+ Some("args1 == args2")
+ };
+
+ for field in &format.imm_fields {
+ members.push(field.member);
+ }
+
+ let pat1 = members.iter().map(|x| format!("{}: ref {}1", x, x)).collect::<Vec<_>>().join(", ");
+ let pat2 = members.iter().map(|x| format!("{}: ref {}2", x, x)).collect::<Vec<_>>().join(", ");
+ fmtln!(fmt, "({} {{ {} }}, {} {{ {} }}) => {{", name, pat1, name, pat2);
+ fmt.indent(|fmt| {
+ fmt.line("opcode1 == opcode2");
+ for field in &format.imm_fields {
+ fmtln!(fmt, "&& {}1 == {}2", field.member, field.member);
+ }
+ if let Some(args_eq) = args_eq {
+ fmtln!(fmt, "&& {}", args_eq);
+ }
+ });
+ fmtln!(fmt, "}");
+ }
+ fmt.line("_ => unreachable!()");
+ });
+ fmt.line("}");
+ });
+ fmt.line("}");
+ fmt.empty_line();
+
+ fmt.doc_comment(r#"
+ Hash an `InstructionData`.
+
+ This operation requires a reference to a `ValueListPool` to
+ hash the contents of any `ValueLists`.
+ "#);
+ fmt.line("pub fn hash<H: ::core::hash::Hasher>(&self, state: &mut H, pool: &ir::ValueListPool) {");
+ fmt.indent(|fmt| {
+ fmt.line("match *self {");
+ fmt.indent(|fmt| {
+ for format in formats {
+ let name = format!("Self::{}", format.name);
+ let mut members = vec!["opcode"];
+
+ let args = if format.typevar_operand.is_none() {
+ "&()"
+ } else if format.has_value_list {
+ members.push("ref args");
+ "args.as_slice(pool)"
+ } else if format.num_value_operands == 1 {
+ members.push("ref arg");
+ "arg"
+ } else {
+ members.push("ref args");
+ "args"
+ };
+
+ for field in &format.imm_fields {
+ members.push(field.member);
+ }
+ let members = members.join(", ");
+
+ fmtln!(fmt, "{}{{{}}} => {{", name, members ); // beware the moustaches
+ fmt.indent(|fmt| {
+ fmt.line("::core::hash::Hash::hash( &::core::mem::discriminant(self), state);");
+ fmt.line("::core::hash::Hash::hash(&opcode, state);");
+ for field in &format.imm_fields {
+ fmtln!(fmt, "::core::hash::Hash::hash(&{}, state);", field.member);
+ }
+ fmtln!(fmt, "::core::hash::Hash::hash({}, state);", args);
+ });
+ fmtln!(fmt, "}");
+ }
+ });
+ fmt.line("}");
+ });
+ fmt.line("}");
+ });
+ fmt.line("}");
+}
+
+fn gen_bool_accessor<T: Fn(&Instruction) -> bool>(
+ all_inst: &AllInstructions,
+ get_attr: T,
+ name: &'static str,
+ doc: &'static str,
+ fmt: &mut Formatter,
+) {
+ fmt.doc_comment(doc);
+ fmtln!(fmt, "pub fn {}(self) -> bool {{", name);
+ fmt.indent(|fmt| {
+ let mut m = Match::new("self");
+ for inst in all_inst.values() {
+ if get_attr(inst) {
+ m.arm_no_fields(format!("Self::{}", inst.camel_name), "true");
+ }
+ }
+ m.arm_no_fields("_", "false");
+ fmt.add_match(m);
+ });
+ fmtln!(fmt, "}");
+ fmt.empty_line();
+}
+
+fn gen_opcodes(all_inst: &AllInstructions, fmt: &mut Formatter) {
+ fmt.doc_comment(
+ r#"
+ An instruction opcode.
+
+ All instructions from all supported ISAs are present.
+ "#,
+ );
+ fmt.line("#[repr(u16)]");
+ fmt.line("#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash)]");
+ fmt.line(
+ r#"#[cfg_attr(feature = "enable-peepmatic", derive(serde::Serialize, serde::Deserialize))]"#
+ );
+
+ // We explicitly set the discriminant of the first variant to 1, which allows us to take
+ // advantage of the NonZero optimization, meaning that wrapping enums can use the 0
+ // discriminant instead of increasing the size of the whole type, and so the size of
+ // Option<Opcode> is the same as Opcode's.
+ fmt.line("pub enum Opcode {");
+ fmt.indent(|fmt| {
+ let mut is_first_opcode = true;
+ for inst in all_inst.values() {
+ fmt.doc_comment(format!("`{}`. ({})", inst, inst.format.name));
+
+ // Document polymorphism.
+ if let Some(poly) = &inst.polymorphic_info {
+ if poly.use_typevar_operand {
+ let op_num = inst.value_opnums[inst.format.typevar_operand.unwrap()];
+ fmt.doc_comment(format!(
+ "Type inferred from `{}`.",
+ inst.operands_in[op_num].name
+ ));
+ }
+ }
+
+ // Enum variant itself.
+ if is_first_opcode {
+ assert!(inst.opcode_number.index() == 0);
+ // TODO the python crate requires opcode numbers to start from one.
+ fmtln!(fmt, "{} = 1,", inst.camel_name);
+ is_first_opcode = false;
+ } else {
+ fmtln!(fmt, "{},", inst.camel_name)
+ }
+ }
+ });
+ fmt.line("}");
+ fmt.empty_line();
+
+ fmt.line("impl Opcode {");
+ fmt.indent(|fmt| {
+ gen_bool_accessor(
+ all_inst,
+ |inst| inst.is_terminator,
+ "is_terminator",
+ "True for instructions that terminate the block",
+ fmt,
+ );
+ gen_bool_accessor(
+ all_inst,
+ |inst| inst.is_branch,
+ "is_branch",
+ "True for all branch or jump instructions.",
+ fmt,
+ );
+ gen_bool_accessor(
+ all_inst,
+ |inst| inst.is_indirect_branch,
+ "is_indirect_branch",
+ "True for all indirect branch or jump instructions.",
+ fmt,
+ );
+ gen_bool_accessor(
+ all_inst,
+ |inst| inst.is_call,
+ "is_call",
+ "Is this a call instruction?",
+ fmt,
+ );
+ gen_bool_accessor(
+ all_inst,
+ |inst| inst.is_return,
+ "is_return",
+ "Is this a return instruction?",
+ fmt,
+ );
+ gen_bool_accessor(
+ all_inst,
+ |inst| inst.is_ghost,
+ "is_ghost",
+ "Is this a ghost instruction?",
+ fmt,
+ );
+ gen_bool_accessor(
+ all_inst,
+ |inst| inst.can_load,
+ "can_load",
+ "Can this instruction read from memory?",
+ fmt,
+ );
+ gen_bool_accessor(
+ all_inst,
+ |inst| inst.can_store,
+ "can_store",
+ "Can this instruction write to memory?",
+ fmt,
+ );
+ gen_bool_accessor(
+ all_inst,
+ |inst| inst.can_trap,
+ "can_trap",
+ "Can this instruction cause a trap?",
+ fmt,
+ );
+ gen_bool_accessor(
+ all_inst,
+ |inst| inst.other_side_effects,
+ "other_side_effects",
+ "Does this instruction have other side effects besides can_* flags?",
+ fmt,
+ );
+ gen_bool_accessor(
+ all_inst,
+ |inst| inst.writes_cpu_flags,
+ "writes_cpu_flags",
+ "Does this instruction write to CPU flags?",
+ fmt,
+ );
+ gen_bool_accessor(
+ all_inst,
+ |inst| inst.clobbers_all_regs,
+ "clobbers_all_regs",
+ "Should this opcode be considered to clobber all the registers, during regalloc?",
+ fmt,
+ );
+ });
+ fmt.line("}");
+ fmt.empty_line();
+
+ // Generate a private opcode_format table.
+ fmtln!(
+ fmt,
+ "const OPCODE_FORMAT: [InstructionFormat; {}] = [",
+ all_inst.len()
+ );
+ fmt.indent(|fmt| {
+ for inst in all_inst.values() {
+ fmtln!(
+ fmt,
+ "InstructionFormat::{}, // {}",
+ inst.format.name,
+ inst.name
+ );
+ }
+ });
+ fmtln!(fmt, "];");
+ fmt.empty_line();
+
+ // Generate a private opcode_name function.
+ fmt.line("fn opcode_name(opc: Opcode) -> &\'static str {");
+ fmt.indent(|fmt| {
+ let mut m = Match::new("opc");
+ for inst in all_inst.values() {
+ m.arm_no_fields(
+ format!("Opcode::{}", inst.camel_name),
+ format!("\"{}\"", inst.name),
+ );
+ }
+ fmt.add_match(m);
+ });
+ fmt.line("}");
+ fmt.empty_line();
+
+ // Generate an opcode hash table for looking up opcodes by name.
+ let hash_table = constant_hash::generate_table(all_inst.values(), all_inst.len(), |inst| {
+ constant_hash::simple_hash(&inst.name)
+ });
+ fmtln!(
+ fmt,
+ "const OPCODE_HASH_TABLE: [Option<Opcode>; {}] = [",
+ hash_table.len()
+ );
+ fmt.indent(|fmt| {
+ for i in hash_table {
+ match i {
+ Some(i) => fmtln!(fmt, "Some(Opcode::{}),", i.camel_name),
+ None => fmtln!(fmt, "None,"),
+ }
+ }
+ });
+ fmtln!(fmt, "];");
+ fmt.empty_line();
+}
+
+fn gen_try_from(all_inst: &AllInstructions, fmt: &mut Formatter) {
+ fmt.line("impl core::convert::TryFrom<u16> for Opcode {");
+ fmt.indent(|fmt| {
+ fmt.line("type Error = ();");
+ fmt.line("#[inline]");
+ fmt.line("fn try_from(x: u16) -> Result<Self, ()> {");
+ fmt.indent(|fmt| {
+ fmtln!(fmt, "if 0 < x && x <= {} {{", all_inst.len());
+ fmt.indent(|fmt| fmt.line("Ok(unsafe { core::mem::transmute(x) })"));
+ fmt.line("} else {");
+ fmt.indent(|fmt| fmt.line("Err(())"));
+ fmt.line("}");
+ });
+ fmt.line("}");
+ });
+ fmt.line("}");
+}
+
+/// Get the value type constraint for an SSA value operand, where
+/// `ctrl_typevar` is the controlling type variable.
+///
+/// Each operand constraint is represented as a string, one of:
+/// - `Concrete(vt)`, where `vt` is a value type name.
+/// - `Free(idx)` where `idx` is an index into `type_sets`.
+/// - `Same`, `Lane`, `AsBool` for controlling typevar-derived constraints.
+fn get_constraint<'entries, 'table>(
+ operand: &'entries Operand,
+ ctrl_typevar: Option<&TypeVar>,
+ type_sets: &'table mut UniqueTable<'entries, TypeSet>,
+) -> String {
+ assert!(operand.is_value());
+ let type_var = operand.type_var().unwrap();
+
+ if let Some(typ) = type_var.singleton_type() {
+ return format!("Concrete({})", typ.rust_name());
+ }
+
+ if let Some(free_typevar) = type_var.free_typevar() {
+ if ctrl_typevar.is_some() && free_typevar != *ctrl_typevar.unwrap() {
+ assert!(type_var.base.is_none());
+ return format!("Free({})", type_sets.add(&type_var.get_raw_typeset()));
+ }
+ }
+
+ if let Some(base) = &type_var.base {
+ assert!(base.type_var == *ctrl_typevar.unwrap());
+ return camel_case(base.derived_func.name());
+ }
+
+ assert!(type_var == ctrl_typevar.unwrap());
+ "Same".into()
+}
+
+fn gen_bitset<'a, T: IntoIterator<Item = &'a u16>>(
+ iterable: T,
+ name: &'static str,
+ field_size: u8,
+ fmt: &mut Formatter,
+) {
+ let bits = iterable.into_iter().fold(0, |acc, x| {
+ assert!(x.is_power_of_two());
+ assert!(u32::from(*x) < (1 << u32::from(field_size)));
+ acc | x
+ });
+ fmtln!(fmt, "{}: BitSet::<u{}>({}),", name, field_size, bits);
+}
+
+fn iterable_to_string<I: fmt::Display, T: IntoIterator<Item = I>>(iterable: T) -> String {
+ let elems = iterable
+ .into_iter()
+ .map(|x| x.to_string())
+ .collect::<Vec<_>>()
+ .join(", ");
+ format!("{{{}}}", elems)
+}
+
+fn typeset_to_string(ts: &TypeSet) -> String {
+ let mut result = format!("TypeSet(lanes={}", iterable_to_string(&ts.lanes));
+ if !ts.ints.is_empty() {
+ result += &format!(", ints={}", iterable_to_string(&ts.ints));
+ }
+ if !ts.floats.is_empty() {
+ result += &format!(", floats={}", iterable_to_string(&ts.floats));
+ }
+ if !ts.bools.is_empty() {
+ result += &format!(", bools={}", iterable_to_string(&ts.bools));
+ }
+ if !ts.specials.is_empty() {
+ result += &format!(", specials=[{}]", iterable_to_string(&ts.specials));
+ }
+ if !ts.refs.is_empty() {
+ result += &format!(", refs={}", iterable_to_string(&ts.refs));
+ }
+ result += ")";
+ result
+}
+
+/// Generate the table of ValueTypeSets described by type_sets.
+pub(crate) fn gen_typesets_table(type_sets: &UniqueTable<TypeSet>, fmt: &mut Formatter) {
+ if type_sets.len() == 0 {
+ return;
+ }
+
+ fmt.comment("Table of value type sets.");
+ assert!(type_sets.len() <= TYPESET_LIMIT, "Too many type sets!");
+ fmtln!(
+ fmt,
+ "const TYPE_SETS: [ir::instructions::ValueTypeSet; {}] = [",
+ type_sets.len()
+ );
+ fmt.indent(|fmt| {
+ for ts in type_sets.iter() {
+ fmt.line("ir::instructions::ValueTypeSet {");
+ fmt.indent(|fmt| {
+ fmt.comment(typeset_to_string(ts));
+ gen_bitset(&ts.lanes, "lanes", 16, fmt);
+ gen_bitset(&ts.ints, "ints", 8, fmt);
+ gen_bitset(&ts.floats, "floats", 8, fmt);
+ gen_bitset(&ts.bools, "bools", 8, fmt);
+ gen_bitset(&ts.refs, "refs", 8, fmt);
+ });
+ fmt.line("},");
+ }
+ });
+ fmtln!(fmt, "];");
+}
+
+/// Generate value type constraints for all instructions.
+/// - Emit a compact constant table of ValueTypeSet objects.
+/// - Emit a compact constant table of OperandConstraint objects.
+/// - Emit an opcode-indexed table of instruction constraints.
+fn gen_type_constraints(all_inst: &AllInstructions, fmt: &mut Formatter) {
+ // Table of TypeSet instances.
+ let mut type_sets = UniqueTable::new();
+
+ // Table of operand constraint sequences (as tuples). Each operand
+ // constraint is represented as a string, one of:
+ // - `Concrete(vt)`, where `vt` is a value type name.
+ // - `Free(idx)` where `idx` is an index into `type_sets`.
+ // - `Same`, `Lane`, `AsBool` for controlling typevar-derived constraints.
+ let mut operand_seqs = UniqueSeqTable::new();
+
+ // Preload table with constraints for typical binops.
+ #[allow(clippy::useless_vec)]
+ operand_seqs.add(&vec!["Same".to_string(); 3]);
+
+ fmt.comment("Table of opcode constraints.");
+ fmtln!(
+ fmt,
+ "const OPCODE_CONSTRAINTS: [OpcodeConstraints; {}] = [",
+ all_inst.len()
+ );
+ fmt.indent(|fmt| {
+ for inst in all_inst.values() {
+ let (ctrl_typevar, ctrl_typeset) = if let Some(poly) = &inst.polymorphic_info {
+ let index = type_sets.add(&*poly.ctrl_typevar.get_raw_typeset());
+ (Some(&poly.ctrl_typevar), index)
+ } else {
+ (None, TYPESET_LIMIT)
+ };
+
+ // Collect constraints for the value results, not including `variable_args` results
+ // which are always special cased.
+ let mut constraints = Vec::new();
+ for &index in &inst.value_results {
+ constraints.push(get_constraint(&inst.operands_out[index], ctrl_typevar, &mut type_sets));
+ }
+ for &index in &inst.value_opnums {
+ constraints.push(get_constraint(&inst.operands_in[index], ctrl_typevar, &mut type_sets));
+ }
+
+ let constraint_offset = operand_seqs.add(&constraints);
+
+ let fixed_results = inst.value_results.len();
+ let fixed_values = inst.value_opnums.len();
+
+ // Can the controlling type variable be inferred from the designated operand?
+ let use_typevar_operand = if let Some(poly) = &inst.polymorphic_info {
+ poly.use_typevar_operand
+ } else {
+ false
+ };
+
+ // Can the controlling type variable be inferred from the result?
+ let use_result = fixed_results > 0 && inst.operands_out[inst.value_results[0]].type_var() == ctrl_typevar;
+
+ // Are we required to use the designated operand instead of the result?
+ let requires_typevar_operand = use_typevar_operand && !use_result;
+
+ fmt.comment(
+ format!("{}: fixed_results={}, use_typevar_operand={}, requires_typevar_operand={}, fixed_values={}",
+ inst.camel_name,
+ fixed_results,
+ use_typevar_operand,
+ requires_typevar_operand,
+ fixed_values)
+ );
+ fmt.comment(format!("Constraints=[{}]", constraints
+ .iter()
+ .map(|x| format!("'{}'", x))
+ .collect::<Vec<_>>()
+ .join(", ")));
+ if let Some(poly) = &inst.polymorphic_info {
+ fmt.comment(format!("Polymorphic over {}", typeset_to_string(&poly.ctrl_typevar.get_raw_typeset())));
+ }
+
+ // Compute the bit field encoding, c.f. instructions.rs.
+ assert!(fixed_results < 8 && fixed_values < 8, "Bit field encoding too tight");
+ let mut flags = fixed_results; // 3 bits
+ if use_typevar_operand {
+ flags |= 1<<3; // 4th bit
+ }
+ if requires_typevar_operand {
+ flags |= 1<<4; // 5th bit
+ }
+ flags |= fixed_values << 5; // 6th bit and more
+
+ fmt.line("OpcodeConstraints {");
+ fmt.indent(|fmt| {
+ fmtln!(fmt, "flags: {:#04x},", flags);
+ fmtln!(fmt, "typeset_offset: {},", ctrl_typeset);
+ fmtln!(fmt, "constraint_offset: {},", constraint_offset);
+ });
+ fmt.line("},");
+ }
+ });
+ fmtln!(fmt, "];");
+ fmt.empty_line();
+
+ gen_typesets_table(&type_sets, fmt);
+ fmt.empty_line();
+
+ fmt.comment("Table of operand constraint sequences.");
+ fmtln!(
+ fmt,
+ "const OPERAND_CONSTRAINTS: [OperandConstraint; {}] = [",
+ operand_seqs.len()
+ );
+ fmt.indent(|fmt| {
+ for constraint in operand_seqs.iter() {
+ fmtln!(fmt, "OperandConstraint::{},", constraint);
+ }
+ });
+ fmtln!(fmt, "];");
+}
+
+/// Emit member initializers for an instruction format.
+fn gen_member_inits(format: &InstructionFormat, fmt: &mut Formatter) {
+ // Immediate operands.
+ // We have local variables with the same names as the members.
+ for f in &format.imm_fields {
+ fmtln!(fmt, "{},", f.member);
+ }
+
+ // Value operands.
+ if format.has_value_list {
+ fmt.line("args,");
+ } else if format.num_value_operands == 1 {
+ fmt.line("arg: arg0,");
+ } else if format.num_value_operands > 1 {
+ let mut args = Vec::new();
+ for i in 0..format.num_value_operands {
+ args.push(format!("arg{}", i));
+ }
+ fmtln!(fmt, "args: [{}],", args.join(", "));
+ }
+}
+
+/// Emit a method for creating and inserting an instruction format.
+///
+/// All instruction formats take an `opcode` argument and a `ctrl_typevar` argument for deducing
+/// the result types.
+fn gen_format_constructor(format: &InstructionFormat, fmt: &mut Formatter) {
+ // Construct method arguments.
+ let mut args = vec![
+ "self".to_string(),
+ "opcode: Opcode".into(),
+ "ctrl_typevar: Type".into(),
+ ];
+
+ // Normal operand arguments. Start with the immediate operands.
+ for f in &format.imm_fields {
+ args.push(format!("{}: {}", f.member, f.kind.rust_type));
+ }
+
+ // Then the value operands.
+ if format.has_value_list {
+ // Take all value arguments as a finished value list. The value lists
+ // are created by the individual instruction constructors.
+ args.push("args: ir::ValueList".into());
+ } else {
+ // Take a fixed number of value operands.
+ for i in 0..format.num_value_operands {
+ args.push(format!("arg{}: Value", i));
+ }
+ }
+
+ let proto = format!(
+ "{}({}) -> (Inst, &'f mut ir::DataFlowGraph)",
+ format.name,
+ args.join(", ")
+ );
+
+ let imms_need_sign_extension = format
+ .imm_fields
+ .iter()
+ .any(|f| f.kind.rust_type == "ir::immediates::Imm64");
+
+ fmt.doc_comment(format.to_string());
+ fmt.line("#[allow(non_snake_case)]");
+ fmtln!(fmt, "fn {} {{", proto);
+ fmt.indent(|fmt| {
+ // Generate the instruction data.
+ fmtln!(
+ fmt,
+ "let{} data = ir::InstructionData::{} {{",
+ if imms_need_sign_extension { " mut" } else { "" },
+ format.name
+ );
+ fmt.indent(|fmt| {
+ fmt.line("opcode,");
+ gen_member_inits(format, fmt);
+ });
+ fmtln!(fmt, "};");
+
+ if imms_need_sign_extension {
+ fmtln!(fmt, "data.sign_extend_immediates(ctrl_typevar);");
+ }
+
+ fmt.line("self.build(data, ctrl_typevar)");
+ });
+ fmtln!(fmt, "}");
+}
+
+/// Emit a method for generating the instruction `inst`.
+///
+/// The method will create and insert an instruction, then return the result values, or the
+/// instruction reference itself for instructions that don't have results.
+fn gen_inst_builder(inst: &Instruction, format: &InstructionFormat, fmt: &mut Formatter) {
+ // Construct method arguments.
+ let mut args = vec![if format.has_value_list {
+ "mut self"
+ } else {
+ "self"
+ }
+ .to_string()];
+
+ let mut args_doc = Vec::new();
+ let mut rets_doc = Vec::new();
+
+ // The controlling type variable will be inferred from the input values if
+ // possible. Otherwise, it is the first method argument.
+ if let Some(poly) = &inst.polymorphic_info {
+ if !poly.use_typevar_operand {
+ args.push(format!("{}: crate::ir::Type", poly.ctrl_typevar.name));
+ args_doc.push(format!(
+ "- {} (controlling type variable): {}",
+ poly.ctrl_typevar.name, poly.ctrl_typevar.doc
+ ));
+ }
+ }
+
+ let mut tmpl_types = Vec::new();
+ let mut into_args = Vec::new();
+ for op in &inst.operands_in {
+ let t = if op.is_immediate() {
+ let t = format!("T{}", tmpl_types.len() + 1);
+ tmpl_types.push(format!("{}: Into<{}>", t, op.kind.rust_type));
+ into_args.push(op.name);
+ t
+ } else {
+ op.kind.rust_type.to_string()
+ };
+ args.push(format!("{}: {}", op.name, t));
+ args_doc.push(format!(
+ "- {}: {}",
+ op.name,
+ op.doc()
+ .expect("every instruction's input operand must be documented")
+ ));
+ }
+
+ for op in &inst.operands_out {
+ rets_doc.push(format!(
+ "- {}: {}",
+ op.name,
+ op.doc()
+ .expect("every instruction's output operand must be documented")
+ ));
+ }
+
+ let rtype = match inst.value_results.len() {
+ 0 => "Inst".into(),
+ 1 => "Value".into(),
+ _ => format!("({})", vec!["Value"; inst.value_results.len()].join(", ")),
+ };
+
+ let tmpl = if !tmpl_types.is_empty() {
+ format!("<{}>", tmpl_types.join(", "))
+ } else {
+ "".into()
+ };
+
+ let proto = format!(
+ "{}{}({}) -> {}",
+ inst.snake_name(),
+ tmpl,
+ args.join(", "),
+ rtype
+ );
+
+ fmt.doc_comment(&inst.doc);
+ if !args_doc.is_empty() {
+ fmt.line("///");
+ fmt.doc_comment("Inputs:");
+ fmt.line("///");
+ for doc_line in args_doc {
+ fmt.doc_comment(doc_line);
+ }
+ }
+ if !rets_doc.is_empty() {
+ fmt.line("///");
+ fmt.doc_comment("Outputs:");
+ fmt.line("///");
+ for doc_line in rets_doc {
+ fmt.doc_comment(doc_line);
+ }
+ }
+
+ fmt.line("#[allow(non_snake_case)]");
+ fmtln!(fmt, "fn {} {{", proto);
+ fmt.indent(|fmt| {
+ // Convert all of the `Into<>` arguments.
+ for arg in &into_args {
+ fmtln!(fmt, "let {} = {}.into();", arg, arg);
+ }
+
+ // Arguments for instruction constructor.
+ let first_arg = format!("Opcode::{}", inst.camel_name);
+ let mut args = vec![first_arg.as_str()];
+ if let Some(poly) = &inst.polymorphic_info {
+ if poly.use_typevar_operand {
+ // Infer the controlling type variable from the input operands.
+ let op_num = inst.value_opnums[format.typevar_operand.unwrap()];
+ fmtln!(
+ fmt,
+ "let ctrl_typevar = self.data_flow_graph().value_type({});",
+ inst.operands_in[op_num].name
+ );
+
+ // The format constructor will resolve the result types from the type var.
+ args.push("ctrl_typevar");
+ } else {
+ // This was an explicit method argument.
+ args.push(&poly.ctrl_typevar.name);
+ }
+ } else {
+ // No controlling type variable needed.
+ args.push("types::INVALID");
+ }
+
+ // Now add all of the immediate operands to the constructor arguments.
+ for &op_num in &inst.imm_opnums {
+ args.push(inst.operands_in[op_num].name);
+ }
+
+ // Finally, the value operands.
+ if format.has_value_list {
+ // We need to build a value list with all the arguments.
+ fmt.line("let mut vlist = ir::ValueList::default();");
+ args.push("vlist");
+ fmt.line("{");
+ fmt.indent(|fmt| {
+ fmt.line("let pool = &mut self.data_flow_graph_mut().value_lists;");
+ for op in &inst.operands_in {
+ if op.is_value() {
+ fmtln!(fmt, "vlist.push({}, pool);", op.name);
+ } else if op.is_varargs() {
+ fmtln!(fmt, "vlist.extend({}.iter().cloned(), pool);", op.name);
+ }
+ }
+ });
+ fmt.line("}");
+ } else {
+ // With no value list, we're guaranteed to just have a set of fixed value operands.
+ for &op_num in &inst.value_opnums {
+ args.push(inst.operands_in[op_num].name);
+ }
+ }
+
+ // Call to the format constructor,
+ let fcall = format!("self.{}({})", format.name, args.join(", "));
+
+ if inst.value_results.is_empty() {
+ fmtln!(fmt, "{}.0", fcall);
+ return;
+ }
+
+ fmtln!(fmt, "let (inst, dfg) = {};", fcall);
+ if inst.value_results.len() == 1 {
+ fmt.line("dfg.first_result(inst)");
+ } else {
+ fmtln!(
+ fmt,
+ "let results = &dfg.inst_results(inst)[0..{}];",
+ inst.value_results.len()
+ );
+ fmtln!(
+ fmt,
+ "({})",
+ inst.value_results
+ .iter()
+ .enumerate()
+ .map(|(i, _)| format!("results[{}]", i))
+ .collect::<Vec<_>>()
+ .join(", ")
+ );
+ }
+ });
+ fmtln!(fmt, "}")
+}
+
+/// Generate a Builder trait with methods for all instructions.
+fn gen_builder(
+ instructions: &AllInstructions,
+ formats: &[&InstructionFormat],
+ fmt: &mut Formatter,
+) {
+ fmt.doc_comment(
+ r#"
+ Convenience methods for building instructions.
+
+ The `InstBuilder` trait has one method per instruction opcode for
+ conveniently constructing the instruction with minimum arguments.
+ Polymorphic instructions infer their result types from the input
+ arguments when possible. In some cases, an explicit `ctrl_typevar`
+ argument is required.
+
+ The opcode methods return the new instruction's result values, or
+ the `Inst` itself for instructions that don't have any results.
+
+ There is also a method per instruction format. These methods all
+ return an `Inst`.
+ "#,
+ );
+ fmt.line("pub trait InstBuilder<'f>: InstBuilderBase<'f> {");
+ fmt.indent(|fmt| {
+ for inst in instructions.values() {
+ gen_inst_builder(inst, &*inst.format, fmt);
+ fmt.empty_line();
+ }
+ for (i, format) in formats.iter().enumerate() {
+ gen_format_constructor(format, fmt);
+ if i + 1 != formats.len() {
+ fmt.empty_line();
+ }
+ }
+ });
+ fmt.line("}");
+}
+
+pub(crate) fn generate(
+ formats: Vec<&InstructionFormat>,
+ all_inst: &AllInstructions,
+ opcode_filename: &str,
+ inst_builder_filename: &str,
+ out_dir: &str,
+) -> Result<(), error::Error> {
+ // Opcodes.
+ let mut fmt = Formatter::new();
+ gen_formats(&formats, &mut fmt);
+ gen_instruction_data(&formats, &mut fmt);
+ fmt.empty_line();
+ gen_instruction_data_impl(&formats, &mut fmt);
+ fmt.empty_line();
+ gen_opcodes(all_inst, &mut fmt);
+ fmt.empty_line();
+ gen_type_constraints(all_inst, &mut fmt);
+ fmt.empty_line();
+ gen_try_from(all_inst, &mut fmt);
+ fmt.update_file(opcode_filename, out_dir)?;
+
+ // Instruction builder.
+ let mut fmt = Formatter::new();
+ gen_builder(all_inst, &formats, &mut fmt);
+ fmt.update_file(inst_builder_filename, out_dir)?;
+
+ Ok(())
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/gen_legalizer.rs b/third_party/rust/cranelift-codegen-meta/src/gen_legalizer.rs
new file mode 100644
index 0000000000..7b56b8db48
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/gen_legalizer.rs
@@ -0,0 +1,734 @@
+//! Generate transformations to legalize instructions without encodings.
+use crate::cdsl::ast::{Def, DefPool, Expr, VarPool};
+use crate::cdsl::isa::TargetIsa;
+use crate::cdsl::operands::Operand;
+use crate::cdsl::type_inference::Constraint;
+use crate::cdsl::typevar::{TypeSet, TypeVar};
+use crate::cdsl::xform::{Transform, TransformGroup, TransformGroups};
+
+use crate::error;
+use crate::gen_inst::gen_typesets_table;
+use crate::srcgen::Formatter;
+use crate::unique_table::UniqueTable;
+
+use std::collections::{HashMap, HashSet};
+use std::iter::FromIterator;
+
+/// Given a `Def` node, emit code that extracts all the instruction fields from
+/// `pos.func.dfg[iref]`.
+///
+/// Create local variables named after the `Var` instances in `node`.
+///
+/// Also create a local variable named `predicate` with the value of the evaluated instruction
+/// predicate, or `true` if the node has no predicate.
+fn unwrap_inst(transform: &Transform, fmt: &mut Formatter) -> bool {
+ let var_pool = &transform.var_pool;
+ let def_pool = &transform.def_pool;
+
+ let def = def_pool.get(transform.src);
+ let apply = &def.apply;
+ let inst = &apply.inst;
+ let iform = &inst.format;
+
+ fmt.comment(format!(
+ "Unwrap fields from instruction format {}",
+ def.to_comment_string(&transform.var_pool)
+ ));
+
+ // Extract the Var arguments.
+ let arg_names = apply
+ .args
+ .iter()
+ .enumerate()
+ .filter(|(arg_num, _)| {
+ // Variable args are specially handled after extracting args.
+ !inst.operands_in[*arg_num].is_varargs()
+ })
+ .map(|(arg_num, arg)| match &arg {
+ Expr::Var(var_index) => var_pool.get(*var_index).name.as_ref(),
+ Expr::Literal(_) => {
+ let n = inst.imm_opnums.iter().position(|&i| i == arg_num).unwrap();
+ iform.imm_fields[n].member
+ }
+ })
+ .collect::<Vec<_>>()
+ .join(", ");
+
+ // May we need "args" in the values consumed by predicates?
+ let emit_args = iform.num_value_operands >= 1 || iform.has_value_list;
+
+ // We need a tuple:
+ // - if there's at least one value operand, then we emit a variable for the value, and the
+ // value list as args.
+ // - otherwise, if there's the count of immediate operands added to the presence of a value list exceeds one.
+ let need_tuple = if iform.num_value_operands >= 1 {
+ true
+ } else {
+ let mut imm_and_varargs = inst
+ .operands_in
+ .iter()
+ .filter(|op| op.is_immediate_or_entityref())
+ .count();
+ if iform.has_value_list {
+ imm_and_varargs += 1;
+ }
+ imm_and_varargs > 1
+ };
+
+ let maybe_args = if emit_args { ", args" } else { "" };
+ let defined_values = format!("{}{}", arg_names, maybe_args);
+
+ let tuple_or_value = if need_tuple {
+ format!("({})", defined_values)
+ } else {
+ defined_values
+ };
+
+ fmtln!(
+ fmt,
+ "let {} = if let ir::InstructionData::{} {{",
+ tuple_or_value,
+ iform.name
+ );
+
+ fmt.indent(|fmt| {
+ // Fields are encoded directly.
+ for field in &iform.imm_fields {
+ fmtln!(fmt, "{},", field.member);
+ }
+
+ if iform.has_value_list || iform.num_value_operands > 1 {
+ fmt.line("ref args,");
+ } else if iform.num_value_operands == 1 {
+ fmt.line("arg,");
+ }
+
+ fmt.line("..");
+ fmt.outdented_line("} = pos.func.dfg[inst] {");
+
+ if iform.has_value_list {
+ fmt.line("let args = args.as_slice(&pos.func.dfg.value_lists);");
+ } else if iform.num_value_operands == 1 {
+ fmt.line("let args = [arg];")
+ }
+
+ // Generate the values for the tuple.
+ let emit_one_value =
+ |fmt: &mut Formatter, needs_comma: bool, op_num: usize, op: &Operand| {
+ let comma = if needs_comma { "," } else { "" };
+ if op.is_immediate_or_entityref() {
+ let n = inst.imm_opnums.iter().position(|&i| i == op_num).unwrap();
+ fmtln!(fmt, "{}{}", iform.imm_fields[n].member, comma);
+ } else if op.is_value() {
+ let n = inst.value_opnums.iter().position(|&i| i == op_num).unwrap();
+ fmtln!(fmt, "pos.func.dfg.resolve_aliases(args[{}]),", n);
+ } else {
+ // This is a value list argument or a varargs.
+ assert!(iform.has_value_list || op.is_varargs());
+ }
+ };
+
+ if need_tuple {
+ fmt.line("(");
+ fmt.indent(|fmt| {
+ for (op_num, op) in inst.operands_in.iter().enumerate() {
+ let needs_comma = emit_args || op_num + 1 < inst.operands_in.len();
+ emit_one_value(fmt, needs_comma, op_num, op);
+ }
+ if emit_args {
+ fmt.line("args");
+ }
+ });
+ fmt.line(")");
+ } else {
+ // Only one of these can be true at the same time, otherwise we'd need a tuple.
+ emit_one_value(fmt, false, 0, &inst.operands_in[0]);
+ if emit_args {
+ fmt.line("args");
+ }
+ }
+
+ fmt.outdented_line("} else {");
+ fmt.line(r#"unreachable!("bad instruction format")"#);
+ });
+ fmtln!(fmt, "};");
+ fmt.empty_line();
+
+ assert_eq!(inst.operands_in.len(), apply.args.len());
+ for (i, op) in inst.operands_in.iter().enumerate() {
+ if op.is_varargs() {
+ let name = &var_pool
+ .get(apply.args[i].maybe_var().expect("vararg without name"))
+ .name;
+ let n = inst
+ .imm_opnums
+ .iter()
+ .chain(inst.value_opnums.iter())
+ .max()
+ .copied()
+ .unwrap_or(0);
+ fmtln!(fmt, "let {} = &Vec::from(&args[{}..]);", name, n);
+ }
+ }
+
+ for &op_num in &inst.value_opnums {
+ let arg = &apply.args[op_num];
+ if let Some(var_index) = arg.maybe_var() {
+ let var = var_pool.get(var_index);
+ if var.has_free_typevar() {
+ fmtln!(
+ fmt,
+ "let typeof_{} = pos.func.dfg.value_type({});",
+ var.name,
+ var.name
+ );
+ }
+ }
+ }
+
+ // If the definition creates results, detach the values and place them in locals.
+ let mut replace_inst = false;
+ if !def.defined_vars.is_empty() {
+ if def.defined_vars
+ == def_pool
+ .get(var_pool.get(def.defined_vars[0]).dst_def.unwrap())
+ .defined_vars
+ {
+ // Special case: The instruction replacing node defines the exact same values.
+ fmt.comment(format!(
+ "Results handled by {}.",
+ def_pool
+ .get(var_pool.get(def.defined_vars[0]).dst_def.unwrap())
+ .to_comment_string(var_pool)
+ ));
+
+ fmt.line("let r = pos.func.dfg.inst_results(inst);");
+ for (i, &var_index) in def.defined_vars.iter().enumerate() {
+ let var = var_pool.get(var_index);
+ fmtln!(fmt, "let {} = &r[{}];", var.name, i);
+ fmtln!(
+ fmt,
+ "let typeof_{} = pos.func.dfg.value_type(*{});",
+ var.name,
+ var.name
+ );
+ }
+
+ replace_inst = true;
+ } else {
+ // Boring case: Detach the result values, capture them in locals.
+ for &var_index in &def.defined_vars {
+ fmtln!(fmt, "let {};", var_pool.get(var_index).name);
+ }
+
+ fmt.line("{");
+ fmt.indent(|fmt| {
+ fmt.line("let r = pos.func.dfg.inst_results(inst);");
+ for i in 0..def.defined_vars.len() {
+ let var = var_pool.get(def.defined_vars[i]);
+ fmtln!(fmt, "{} = r[{}];", var.name, i);
+ }
+ });
+ fmt.line("}");
+
+ for &var_index in &def.defined_vars {
+ let var = var_pool.get(var_index);
+ if var.has_free_typevar() {
+ fmtln!(
+ fmt,
+ "let typeof_{} = pos.func.dfg.value_type({});",
+ var.name,
+ var.name
+ );
+ }
+ }
+ }
+ }
+ replace_inst
+}
+
+fn build_derived_expr(tv: &TypeVar) -> String {
+ let base = match &tv.base {
+ Some(base) => base,
+ None => {
+ assert!(tv.name.starts_with("typeof_"));
+ return format!("Some({})", tv.name);
+ }
+ };
+ let base_expr = build_derived_expr(&base.type_var);
+ format!(
+ "{}.map(|t: crate::ir::Type| t.{}())",
+ base_expr,
+ base.derived_func.name()
+ )
+}
+
+/// Emit rust code for the given check.
+///
+/// The emitted code is a statement redefining the `predicate` variable like this:
+/// let predicate = predicate && ...
+fn emit_runtime_typecheck<'a>(
+ constraint: &'a Constraint,
+ type_sets: &mut UniqueTable<'a, TypeSet>,
+ fmt: &mut Formatter,
+) {
+ match constraint {
+ Constraint::InTypeset(tv, ts) => {
+ let ts_index = type_sets.add(&ts);
+ fmt.comment(format!(
+ "{} must belong to {:?}",
+ tv.name,
+ type_sets.get(ts_index)
+ ));
+ fmtln!(
+ fmt,
+ "let predicate = predicate && TYPE_SETS[{}].contains({});",
+ ts_index,
+ tv.name
+ );
+ }
+ Constraint::Eq(tv1, tv2) => {
+ fmtln!(
+ fmt,
+ "let predicate = predicate && match ({}, {}) {{",
+ build_derived_expr(tv1),
+ build_derived_expr(tv2)
+ );
+ fmt.indent(|fmt| {
+ fmt.line("(Some(a), Some(b)) => a == b,");
+ fmt.comment("On overflow, constraint doesn\'t apply");
+ fmt.line("_ => false,");
+ });
+ fmtln!(fmt, "};");
+ }
+ Constraint::WiderOrEq(tv1, tv2) => {
+ fmtln!(
+ fmt,
+ "let predicate = predicate && match ({}, {}) {{",
+ build_derived_expr(tv1),
+ build_derived_expr(tv2)
+ );
+ fmt.indent(|fmt| {
+ fmt.line("(Some(a), Some(b)) => a.wider_or_equal(b),");
+ fmt.comment("On overflow, constraint doesn\'t apply");
+ fmt.line("_ => false,");
+ });
+ fmtln!(fmt, "};");
+ }
+ }
+}
+
+/// Determine if `node` represents one of the value splitting instructions: `isplit` or `vsplit.
+/// These instructions are lowered specially by the `legalize::split` module.
+fn is_value_split(def: &Def) -> bool {
+ let name = &def.apply.inst.name;
+ name == "isplit" || name == "vsplit"
+}
+
+fn emit_dst_inst(def: &Def, def_pool: &DefPool, var_pool: &VarPool, fmt: &mut Formatter) {
+ let defined_vars = {
+ let vars = def
+ .defined_vars
+ .iter()
+ .map(|&var_index| var_pool.get(var_index).name.as_ref())
+ .collect::<Vec<&str>>();
+ if vars.len() == 1 {
+ vars[0].to_string()
+ } else {
+ format!("({})", vars.join(", "))
+ }
+ };
+
+ if is_value_split(def) {
+ // Split instructions are not emitted with the builder, but by calling special functions in
+ // the `legalizer::split` module. These functions will eliminate concat-split patterns.
+ fmt.line("let curpos = pos.position();");
+ fmt.line("let srcloc = pos.srcloc();");
+ fmtln!(
+ fmt,
+ "let {} = split::{}(pos.func, cfg, curpos, srcloc, {});",
+ defined_vars,
+ def.apply.inst.snake_name(),
+ def.apply.args[0].to_rust_code(var_pool)
+ );
+ return;
+ }
+
+ if def.defined_vars.is_empty() {
+ // This node doesn't define any values, so just insert the new instruction.
+ fmtln!(
+ fmt,
+ "pos.ins().{};",
+ def.apply.rust_builder(&def.defined_vars, var_pool)
+ );
+ return;
+ }
+
+ if let Some(src_def0) = var_pool.get(def.defined_vars[0]).src_def {
+ if def.defined_vars == def_pool.get(src_def0).defined_vars {
+ // The replacement instruction defines the exact same values as the source pattern.
+ // Unwrapping would have left the results intact. Replace the whole instruction.
+ fmtln!(
+ fmt,
+ "let {} = pos.func.dfg.replace(inst).{};",
+ defined_vars,
+ def.apply.rust_builder(&def.defined_vars, var_pool)
+ );
+
+ // We need to bump the cursor so following instructions are inserted *after* the
+ // replaced instruction.
+ fmt.line("if pos.current_inst() == Some(inst) {");
+ fmt.indent(|fmt| {
+ fmt.line("pos.next_inst();");
+ });
+ fmt.line("}");
+ return;
+ }
+ }
+
+ // Insert a new instruction.
+ let mut builder = format!("let {} = pos.ins()", defined_vars);
+
+ if def.defined_vars.len() == 1 && var_pool.get(def.defined_vars[0]).is_output() {
+ // Reuse the single source result value.
+ builder = format!(
+ "{}.with_result({})",
+ builder,
+ var_pool.get(def.defined_vars[0]).to_rust_code()
+ );
+ } else if def
+ .defined_vars
+ .iter()
+ .any(|&var_index| var_pool.get(var_index).is_output())
+ {
+ // There are more than one output values that can be reused.
+ let array = def
+ .defined_vars
+ .iter()
+ .map(|&var_index| {
+ let var = var_pool.get(var_index);
+ if var.is_output() {
+ format!("Some({})", var.name)
+ } else {
+ "None".into()
+ }
+ })
+ .collect::<Vec<_>>()
+ .join(", ");
+ builder = format!("{}.with_results([{}])", builder, array);
+ }
+
+ fmtln!(
+ fmt,
+ "{}.{};",
+ builder,
+ def.apply.rust_builder(&def.defined_vars, var_pool)
+ );
+}
+
+/// Emit code for `transform`, assuming that the opcode of transform's root instruction
+/// has already been matched.
+///
+/// `inst: Inst` is the variable to be replaced. It is pointed to by `pos: Cursor`.
+/// `dfg: DataFlowGraph` is available and mutable.
+fn gen_transform<'a>(
+ replace_inst: bool,
+ transform: &'a Transform,
+ type_sets: &mut UniqueTable<'a, TypeSet>,
+ fmt: &mut Formatter,
+) {
+ // Evaluate the instruction predicate if any.
+ let apply = &transform.def_pool.get(transform.src).apply;
+
+ let inst_predicate = apply
+ .inst_predicate_with_ctrl_typevar(&transform.var_pool)
+ .rust_predicate("pos.func");
+
+ let has_extra_constraints = !transform.type_env.constraints.is_empty();
+ if has_extra_constraints {
+ // Extra constraints rely on the predicate being a variable that we can rebind as we add
+ // more constraint predicates.
+ if let Some(pred) = &inst_predicate {
+ fmt.multi_line(&format!("let predicate = {};", pred));
+ } else {
+ fmt.line("let predicate = true;");
+ }
+ }
+
+ // Emit any runtime checks; these will rebind `predicate` emitted right above.
+ for constraint in &transform.type_env.constraints {
+ emit_runtime_typecheck(constraint, type_sets, fmt);
+ }
+
+ let do_expand = |fmt: &mut Formatter| {
+ // Emit any constants that must be created before use.
+ for (name, value) in transform.const_pool.iter() {
+ fmtln!(
+ fmt,
+ "let {} = pos.func.dfg.constants.insert(vec!{:?}.into());",
+ name,
+ value
+ );
+ }
+
+ // If we are adding some blocks, we need to recall the original block, such that we can
+ // recompute it.
+ if !transform.block_pool.is_empty() {
+ fmt.line("let orig_block = pos.current_block().unwrap();");
+ }
+
+ // If we're going to delete `inst`, we need to detach its results first so they can be
+ // reattached during pattern expansion.
+ if !replace_inst {
+ fmt.line("pos.func.dfg.clear_results(inst);");
+ }
+
+ // Emit new block creation.
+ for block in &transform.block_pool {
+ let var = transform.var_pool.get(block.name);
+ fmtln!(fmt, "let {} = pos.func.dfg.make_block();", var.name);
+ }
+
+ // Emit the destination pattern.
+ for &def_index in &transform.dst {
+ if let Some(block) = transform.block_pool.get(def_index) {
+ let var = transform.var_pool.get(block.name);
+ fmtln!(fmt, "pos.insert_block({});", var.name);
+ }
+ emit_dst_inst(
+ transform.def_pool.get(def_index),
+ &transform.def_pool,
+ &transform.var_pool,
+ fmt,
+ );
+ }
+
+ // Insert a new block after the last instruction, if needed.
+ let def_next_index = transform.def_pool.next_index();
+ if let Some(block) = transform.block_pool.get(def_next_index) {
+ let var = transform.var_pool.get(block.name);
+ fmtln!(fmt, "pos.insert_block({});", var.name);
+ }
+
+ // Delete the original instruction if we didn't have an opportunity to replace it.
+ if !replace_inst {
+ fmt.line("let removed = pos.remove_inst();");
+ fmt.line("debug_assert_eq!(removed, inst);");
+ }
+
+ if transform.block_pool.is_empty() {
+ if transform.def_pool.get(transform.src).apply.inst.is_branch {
+ // A branch might have been legalized into multiple branches, so we need to recompute
+ // the cfg.
+ fmt.line("cfg.recompute_block(pos.func, pos.current_block().unwrap());");
+ }
+ } else {
+ // Update CFG for the new blocks.
+ fmt.line("cfg.recompute_block(pos.func, orig_block);");
+ for block in &transform.block_pool {
+ let var = transform.var_pool.get(block.name);
+ fmtln!(fmt, "cfg.recompute_block(pos.func, {});", var.name);
+ }
+ }
+
+ fmt.line("return true;");
+ };
+
+ // Guard the actual expansion by `predicate`.
+ if has_extra_constraints {
+ fmt.line("if predicate {");
+ fmt.indent(|fmt| {
+ do_expand(fmt);
+ });
+ fmt.line("}");
+ } else if let Some(pred) = &inst_predicate {
+ fmt.multi_line(&format!("if {} {{", pred));
+ fmt.indent(|fmt| {
+ do_expand(fmt);
+ });
+ fmt.line("}");
+ } else {
+ // Unconditional transform (there was no predicate), just emit it.
+ do_expand(fmt);
+ }
+}
+
+fn gen_transform_group<'a>(
+ group: &'a TransformGroup,
+ transform_groups: &TransformGroups,
+ type_sets: &mut UniqueTable<'a, TypeSet>,
+ fmt: &mut Formatter,
+) {
+ fmt.doc_comment(group.doc);
+ fmt.line("#[allow(unused_variables,unused_assignments,unused_imports,non_snake_case)]");
+
+ // Function arguments.
+ fmtln!(fmt, "pub fn {}(", group.name);
+ fmt.indent(|fmt| {
+ fmt.line("inst: crate::ir::Inst,");
+ fmt.line("func: &mut crate::ir::Function,");
+ fmt.line("cfg: &mut crate::flowgraph::ControlFlowGraph,");
+ fmt.line("isa: &dyn crate::isa::TargetIsa,");
+ });
+ fmtln!(fmt, ") -> bool {");
+
+ // Function body.
+ fmt.indent(|fmt| {
+ fmt.line("use crate::ir::InstBuilder;");
+ fmt.line("use crate::cursor::{Cursor, FuncCursor};");
+ fmt.line("let mut pos = FuncCursor::new(func).at_inst(inst);");
+ fmt.line("pos.use_srcloc(inst);");
+
+ // Group the transforms by opcode so we can generate a big switch.
+ // Preserve ordering.
+ let mut inst_to_transforms = HashMap::new();
+ for transform in &group.transforms {
+ let def_index = transform.src;
+ let inst = &transform.def_pool.get(def_index).apply.inst;
+ inst_to_transforms
+ .entry(inst.camel_name.clone())
+ .or_insert_with(Vec::new)
+ .push(transform);
+ }
+
+ let mut sorted_inst_names = Vec::from_iter(inst_to_transforms.keys());
+ sorted_inst_names.sort();
+
+ fmt.line("{");
+ fmt.indent(|fmt| {
+ fmt.line("match pos.func.dfg[inst].opcode() {");
+ fmt.indent(|fmt| {
+ for camel_name in sorted_inst_names {
+ fmtln!(fmt, "ir::Opcode::{} => {{", camel_name);
+ fmt.indent(|fmt| {
+ let transforms = inst_to_transforms.get(camel_name).unwrap();
+
+ // Unwrap the source instruction, create local variables for the input variables.
+ let replace_inst = unwrap_inst(&transforms[0], fmt);
+ fmt.empty_line();
+
+ for (i, transform) in transforms.iter().enumerate() {
+ if i > 0 {
+ fmt.empty_line();
+ }
+ gen_transform(replace_inst, transform, type_sets, fmt);
+ }
+ });
+ fmtln!(fmt, "}");
+ fmt.empty_line();
+ }
+
+ // Emit the custom transforms. The Rust compiler will complain about any overlap with
+ // the normal transforms.
+ let mut sorted_custom_legalizes = Vec::from_iter(&group.custom_legalizes);
+ sorted_custom_legalizes.sort();
+ for (inst_camel_name, func_name) in sorted_custom_legalizes {
+ fmtln!(fmt, "ir::Opcode::{} => {{", inst_camel_name);
+ fmt.indent(|fmt| {
+ fmtln!(fmt, "{}(inst, func, cfg, isa);", func_name);
+ fmt.line("return true;");
+ });
+ fmtln!(fmt, "}");
+ fmt.empty_line();
+ }
+
+ // We'll assume there are uncovered opcodes.
+ fmt.line("_ => {},");
+ });
+ fmt.line("}");
+ });
+ fmt.line("}");
+
+ // If we fall through, nothing was expanded; call the chain if any.
+ match &group.chain_with {
+ Some(group_id) => fmtln!(
+ fmt,
+ "{}(inst, func, cfg, isa)",
+ transform_groups.get(*group_id).rust_name()
+ ),
+ None => fmt.line("false"),
+ };
+ });
+ fmtln!(fmt, "}");
+ fmt.empty_line();
+}
+
+/// Generate legalization functions for `isa` and add any shared `TransformGroup`s
+/// encountered to `shared_groups`.
+///
+/// Generate `TYPE_SETS` and `LEGALIZE_ACTIONS` tables.
+fn gen_isa(
+ isa: &TargetIsa,
+ transform_groups: &TransformGroups,
+ shared_group_names: &mut HashSet<&'static str>,
+ fmt: &mut Formatter,
+) {
+ let mut type_sets = UniqueTable::new();
+ for group_index in isa.transitive_transform_groups(transform_groups) {
+ let group = transform_groups.get(group_index);
+ match group.isa_name {
+ Some(isa_name) => {
+ assert!(
+ isa_name == isa.name,
+ "ISA-specific legalizations must be used by the same ISA"
+ );
+ gen_transform_group(group, transform_groups, &mut type_sets, fmt);
+ }
+ None => {
+ shared_group_names.insert(group.name);
+ }
+ }
+ }
+
+ gen_typesets_table(&type_sets, fmt);
+
+ let direct_groups = isa.direct_transform_groups();
+ fmtln!(
+ fmt,
+ "pub static LEGALIZE_ACTIONS: [isa::Legalize; {}] = [",
+ direct_groups.len()
+ );
+ fmt.indent(|fmt| {
+ for &group_index in direct_groups {
+ fmtln!(fmt, "{},", transform_groups.get(group_index).rust_name());
+ }
+ });
+ fmtln!(fmt, "];");
+}
+
+/// Generate the legalizer files.
+pub(crate) fn generate(
+ isas: &[TargetIsa],
+ transform_groups: &TransformGroups,
+ extra_legalization_groups: &[&'static str],
+ filename_prefix: &str,
+ out_dir: &str,
+) -> Result<(), error::Error> {
+ let mut shared_group_names = HashSet::new();
+
+ for isa in isas {
+ let mut fmt = Formatter::new();
+ gen_isa(isa, transform_groups, &mut shared_group_names, &mut fmt);
+ fmt.update_file(format!("{}-{}.rs", filename_prefix, isa.name), out_dir)?;
+ }
+
+ // Add extra legalization groups that were explicitly requested.
+ for group in extra_legalization_groups {
+ shared_group_names.insert(group);
+ }
+
+ // Generate shared legalize groups.
+ let mut fmt = Formatter::new();
+ // Generate shared legalize groups.
+ let mut type_sets = UniqueTable::new();
+ let mut sorted_shared_group_names = Vec::from_iter(shared_group_names);
+ sorted_shared_group_names.sort();
+ for group_name in &sorted_shared_group_names {
+ let group = transform_groups.by_name(group_name);
+ gen_transform_group(group, transform_groups, &mut type_sets, &mut fmt);
+ }
+ gen_typesets_table(&type_sets, &mut fmt);
+ fmt.update_file(format!("{}r.rs", filename_prefix), out_dir)?;
+
+ Ok(())
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/gen_registers.rs b/third_party/rust/cranelift-codegen-meta/src/gen_registers.rs
new file mode 100644
index 0000000000..bd5ac95ae0
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/gen_registers.rs
@@ -0,0 +1,148 @@
+//! Generate the ISA-specific registers.
+use crate::cdsl::isa::TargetIsa;
+use crate::cdsl::regs::{RegBank, RegClass};
+use crate::error;
+use crate::srcgen::Formatter;
+use cranelift_entity::EntityRef;
+
+fn gen_regbank(fmt: &mut Formatter, reg_bank: &RegBank) {
+ let names = if !reg_bank.names.is_empty() {
+ format!(r#""{}""#, reg_bank.names.join(r#"", ""#))
+ } else {
+ "".to_string()
+ };
+ fmtln!(fmt, "RegBank {");
+ fmt.indent(|fmt| {
+ fmtln!(fmt, r#"name: "{}","#, reg_bank.name);
+ fmtln!(fmt, "first_unit: {},", reg_bank.first_unit);
+ fmtln!(fmt, "units: {},", reg_bank.units);
+ fmtln!(fmt, "names: &[{}],", names);
+ fmtln!(fmt, r#"prefix: "{}","#, reg_bank.prefix);
+ fmtln!(fmt, "first_toprc: {},", reg_bank.toprcs[0].index());
+ fmtln!(fmt, "num_toprcs: {},", reg_bank.toprcs.len());
+ fmtln!(
+ fmt,
+ "pressure_tracking: {},",
+ if reg_bank.pressure_tracking {
+ "true"
+ } else {
+ "false"
+ }
+ );
+ });
+ fmtln!(fmt, "},");
+}
+
+fn gen_regclass(isa: &TargetIsa, reg_class: &RegClass, fmt: &mut Formatter) {
+ let reg_bank = isa.regs.banks.get(reg_class.bank).unwrap();
+
+ let mask: Vec<String> = reg_class
+ .mask(reg_bank.first_unit)
+ .iter()
+ .map(|x| format!("0x{:08x}", x))
+ .collect();
+ let mask = mask.join(", ");
+
+ fmtln!(
+ fmt,
+ "pub static {}_DATA: RegClassData = RegClassData {{",
+ reg_class.name
+ );
+ fmt.indent(|fmt| {
+ fmtln!(fmt, r#"name: "{}","#, reg_class.name);
+ fmtln!(fmt, "index: {},", reg_class.index.index());
+ fmtln!(fmt, "width: {},", reg_class.width);
+ fmtln!(fmt, "bank: {},", reg_class.bank.index());
+ fmtln!(fmt, "toprc: {},", reg_class.toprc.index());
+ fmtln!(fmt, "first: {},", reg_bank.first_unit + reg_class.start);
+ fmtln!(fmt, "subclasses: {:#x},", reg_class.subclass_mask());
+ fmtln!(fmt, "mask: [{}],", mask);
+ fmtln!(
+ fmt,
+ "pinned_reg: {:?},",
+ reg_bank
+ .pinned_reg
+ .map(|index| index + reg_bank.first_unit as u16 + reg_class.start as u16)
+ );
+ fmtln!(fmt, "info: &INFO,");
+ });
+ fmtln!(fmt, "};");
+
+ fmtln!(fmt, "#[allow(dead_code)]");
+ fmtln!(
+ fmt,
+ "pub static {}: RegClass = &{}_DATA;",
+ reg_class.name,
+ reg_class.name
+ );
+}
+
+fn gen_regbank_units(reg_bank: &RegBank, fmt: &mut Formatter) {
+ for unit in 0..reg_bank.units {
+ let v = unit + reg_bank.first_unit;
+ if (unit as usize) < reg_bank.names.len() {
+ fmtln!(fmt, "{} = {},", reg_bank.names[unit as usize], v);
+ continue;
+ }
+ fmtln!(fmt, "{}{} = {},", reg_bank.prefix, unit, v);
+ }
+}
+
+fn gen_isa(isa: &TargetIsa, fmt: &mut Formatter) {
+ // Emit RegInfo.
+ fmtln!(fmt, "pub static INFO: RegInfo = RegInfo {");
+
+ fmt.indent(|fmt| {
+ fmtln!(fmt, "banks: &[");
+ // Bank descriptors.
+ fmt.indent(|fmt| {
+ for reg_bank in isa.regs.banks.values() {
+ gen_regbank(fmt, &reg_bank);
+ }
+ });
+ fmtln!(fmt, "],");
+ // References to register classes.
+ fmtln!(fmt, "classes: &[");
+ fmt.indent(|fmt| {
+ for reg_class in isa.regs.classes.values() {
+ fmtln!(fmt, "&{}_DATA,", reg_class.name);
+ }
+ });
+ fmtln!(fmt, "],");
+ });
+ fmtln!(fmt, "};");
+
+ // Register class descriptors.
+ for rc in isa.regs.classes.values() {
+ gen_regclass(&isa, rc, fmt);
+ }
+
+ // Emit constants for all the register units.
+ fmtln!(fmt, "#[allow(dead_code, non_camel_case_types)]");
+ fmtln!(fmt, "#[derive(Clone, Copy)]");
+ fmtln!(fmt, "pub enum RU {");
+ fmt.indent(|fmt| {
+ for reg_bank in isa.regs.banks.values() {
+ gen_regbank_units(reg_bank, fmt);
+ }
+ });
+ fmtln!(fmt, "}");
+
+ // Emit Into conversion for the RU class.
+ fmtln!(fmt, "impl Into<RegUnit> for RU {");
+ fmt.indent(|fmt| {
+ fmtln!(fmt, "fn into(self) -> RegUnit {");
+ fmt.indent(|fmt| {
+ fmtln!(fmt, "self as RegUnit");
+ });
+ fmtln!(fmt, "}");
+ });
+ fmtln!(fmt, "}");
+}
+
+pub(crate) fn generate(isa: &TargetIsa, filename: &str, out_dir: &str) -> Result<(), error::Error> {
+ let mut fmt = Formatter::new();
+ gen_isa(&isa, &mut fmt);
+ fmt.update_file(filename, out_dir)?;
+ Ok(())
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/gen_settings.rs b/third_party/rust/cranelift-codegen-meta/src/gen_settings.rs
new file mode 100644
index 0000000000..2ed5941b80
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/gen_settings.rs
@@ -0,0 +1,447 @@
+//! Generate the ISA-specific settings.
+use std::collections::HashMap;
+
+use cranelift_codegen_shared::constant_hash::{generate_table, simple_hash};
+
+use crate::cdsl::camel_case;
+use crate::cdsl::settings::{
+ BoolSetting, Predicate, Preset, Setting, SettingGroup, SpecificSetting,
+};
+use crate::error;
+use crate::srcgen::{Formatter, Match};
+use crate::unique_table::UniqueSeqTable;
+
+pub(crate) enum ParentGroup {
+ None,
+ Shared,
+}
+
+/// Emits the constructor of the Flags structure.
+fn gen_constructor(group: &SettingGroup, parent: ParentGroup, fmt: &mut Formatter) {
+ let args = match parent {
+ ParentGroup::None => "builder: Builder",
+ ParentGroup::Shared => "shared: &settings::Flags, builder: Builder",
+ };
+ fmtln!(fmt, "impl Flags {");
+ fmt.indent(|fmt| {
+ fmt.doc_comment(format!("Create flags {} settings group.", group.name));
+ fmtln!(fmt, "#[allow(unused_variables)]");
+ fmtln!(fmt, "pub fn new({}) -> Self {{", args);
+ fmt.indent(|fmt| {
+ fmtln!(fmt, "let bvec = builder.state_for(\"{}\");", group.name);
+ fmtln!(
+ fmt,
+ "let mut {} = Self {{ bytes: [0; {}] }};",
+ group.name,
+ group.byte_size()
+ );
+ fmtln!(
+ fmt,
+ "debug_assert_eq!(bvec.len(), {});",
+ group.settings_size
+ );
+ fmtln!(
+ fmt,
+ "{}.bytes[0..{}].copy_from_slice(&bvec);",
+ group.name,
+ group.settings_size
+ );
+
+ // Now compute the predicates.
+ for p in &group.predicates {
+ fmt.comment(format!("Precompute #{}.", p.number));
+ fmtln!(fmt, "if {} {{", p.render(group));
+ fmt.indent(|fmt| {
+ fmtln!(
+ fmt,
+ "{}.bytes[{}] |= 1 << {};",
+ group.name,
+ group.bool_start_byte_offset + p.number / 8,
+ p.number % 8
+ );
+ });
+ fmtln!(fmt, "}");
+ }
+
+ fmtln!(fmt, group.name);
+ });
+ fmtln!(fmt, "}");
+ });
+ fmtln!(fmt, "}");
+}
+
+/// Emit Display and FromStr implementations for enum settings.
+fn gen_to_and_from_str(name: &str, values: &[&'static str], fmt: &mut Formatter) {
+ fmtln!(fmt, "impl fmt::Display for {} {{", name);
+ fmt.indent(|fmt| {
+ fmtln!(
+ fmt,
+ "fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {"
+ );
+ fmt.indent(|fmt| {
+ fmtln!(fmt, "f.write_str(match *self {");
+ fmt.indent(|fmt| {
+ for v in values.iter() {
+ fmtln!(fmt, "Self::{} => \"{}\",", camel_case(v), v);
+ }
+ });
+ fmtln!(fmt, "})");
+ });
+ fmtln!(fmt, "}");
+ });
+ fmtln!(fmt, "}");
+
+ fmtln!(fmt, "impl str::FromStr for {} {{", name);
+ fmt.indent(|fmt| {
+ fmtln!(fmt, "type Err = ();");
+ fmtln!(fmt, "fn from_str(s: &str) -> Result<Self, Self::Err> {");
+ fmt.indent(|fmt| {
+ fmtln!(fmt, "match s {");
+ fmt.indent(|fmt| {
+ for v in values.iter() {
+ fmtln!(fmt, "\"{}\" => Ok(Self::{}),", v, camel_case(v));
+ }
+ fmtln!(fmt, "_ => Err(()),");
+ });
+ fmtln!(fmt, "}");
+ });
+ fmtln!(fmt, "}");
+ });
+ fmtln!(fmt, "}");
+}
+
+/// Emit real enum for the Enum settings.
+fn gen_enum_types(group: &SettingGroup, fmt: &mut Formatter) {
+ for setting in group.settings.iter() {
+ let values = match setting.specific {
+ SpecificSetting::Bool(_) | SpecificSetting::Num(_) => continue,
+ SpecificSetting::Enum(ref values) => values,
+ };
+ let name = camel_case(setting.name);
+
+ fmt.doc_comment(format!("Values for `{}.{}`.", group.name, setting.name));
+ fmtln!(fmt, "#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]");
+ fmtln!(fmt, "pub enum {} {{", name);
+ fmt.indent(|fmt| {
+ for v in values.iter() {
+ fmt.doc_comment(format!("`{}`.", v));
+ fmtln!(fmt, "{},", camel_case(v));
+ }
+ });
+ fmtln!(fmt, "}");
+
+ gen_to_and_from_str(&name, values, fmt);
+ }
+}
+
+/// Emit a getter function for `setting`.
+fn gen_getter(setting: &Setting, fmt: &mut Formatter) {
+ fmt.doc_comment(setting.comment);
+ match setting.specific {
+ SpecificSetting::Bool(BoolSetting {
+ predicate_number, ..
+ }) => {
+ fmtln!(fmt, "pub fn {}(&self) -> bool {{", setting.name);
+ fmt.indent(|fmt| {
+ fmtln!(fmt, "self.numbered_predicate({})", predicate_number);
+ });
+ fmtln!(fmt, "}");
+ }
+ SpecificSetting::Enum(ref values) => {
+ let ty = camel_case(setting.name);
+ fmtln!(fmt, "pub fn {}(&self) -> {} {{", setting.name, ty);
+ fmt.indent(|fmt| {
+ let mut m = Match::new(format!("self.bytes[{}]", setting.byte_offset));
+ for (i, v) in values.iter().enumerate() {
+ m.arm_no_fields(format!("{}", i), format!("{}::{}", ty, camel_case(v)));
+ }
+ m.arm_no_fields("_", "panic!(\"Invalid enum value\")");
+ fmt.add_match(m);
+ });
+ fmtln!(fmt, "}");
+ }
+ SpecificSetting::Num(_) => {
+ fmtln!(fmt, "pub fn {}(&self) -> u8 {{", setting.name);
+ fmt.indent(|fmt| {
+ fmtln!(fmt, "self.bytes[{}]", setting.byte_offset);
+ });
+ fmtln!(fmt, "}");
+ }
+ }
+}
+
+fn gen_pred_getter(predicate: &Predicate, group: &SettingGroup, fmt: &mut Formatter) {
+ fmt.doc_comment(format!("Computed predicate `{}`.", predicate.render(group)));
+ fmtln!(fmt, "pub fn {}(&self) -> bool {{", predicate.name);
+ fmt.indent(|fmt| {
+ fmtln!(fmt, "self.numbered_predicate({})", predicate.number);
+ });
+ fmtln!(fmt, "}");
+}
+
+/// Emits getters for each setting value.
+fn gen_getters(group: &SettingGroup, fmt: &mut Formatter) {
+ fmt.doc_comment("User-defined settings.");
+ fmtln!(fmt, "#[allow(dead_code)]");
+ fmtln!(fmt, "impl Flags {");
+ fmt.indent(|fmt| {
+ fmt.doc_comment("Get a view of the boolean predicates.");
+ fmtln!(
+ fmt,
+ "pub fn predicate_view(&self) -> crate::settings::PredicateView {"
+ );
+ fmt.indent(|fmt| {
+ fmtln!(
+ fmt,
+ "crate::settings::PredicateView::new(&self.bytes[{}..])",
+ group.bool_start_byte_offset
+ );
+ });
+ fmtln!(fmt, "}");
+
+ if !group.settings.is_empty() {
+ fmt.doc_comment("Dynamic numbered predicate getter.");
+ fmtln!(fmt, "fn numbered_predicate(&self, p: usize) -> bool {");
+ fmt.indent(|fmt| {
+ fmtln!(
+ fmt,
+ "self.bytes[{} + p / 8] & (1 << (p % 8)) != 0",
+ group.bool_start_byte_offset
+ );
+ });
+ fmtln!(fmt, "}");
+ }
+
+ for setting in &group.settings {
+ gen_getter(&setting, fmt);
+ }
+ for predicate in &group.predicates {
+ gen_pred_getter(&predicate, &group, fmt);
+ }
+ });
+ fmtln!(fmt, "}");
+}
+
+#[derive(Hash, PartialEq, Eq)]
+enum SettingOrPreset<'a> {
+ Setting(&'a Setting),
+ Preset(&'a Preset),
+}
+
+impl<'a> SettingOrPreset<'a> {
+ fn name(&self) -> &str {
+ match *self {
+ SettingOrPreset::Setting(s) => s.name,
+ SettingOrPreset::Preset(p) => p.name,
+ }
+ }
+}
+
+/// Emits DESCRIPTORS, ENUMERATORS, HASH_TABLE and PRESETS.
+fn gen_descriptors(group: &SettingGroup, fmt: &mut Formatter) {
+ let mut enum_table = UniqueSeqTable::new();
+
+ let mut descriptor_index_map: HashMap<SettingOrPreset, usize> = HashMap::new();
+
+ // Generate descriptors.
+ fmtln!(
+ fmt,
+ "static DESCRIPTORS: [detail::Descriptor; {}] = [",
+ group.settings.len() + group.presets.len()
+ );
+ fmt.indent(|fmt| {
+ for (idx, setting) in group.settings.iter().enumerate() {
+ fmtln!(fmt, "detail::Descriptor {");
+ fmt.indent(|fmt| {
+ fmtln!(fmt, "name: \"{}\",", setting.name);
+ fmtln!(fmt, "offset: {},", setting.byte_offset);
+ match setting.specific {
+ SpecificSetting::Bool(BoolSetting { bit_offset, .. }) => {
+ fmtln!(
+ fmt,
+ "detail: detail::Detail::Bool {{ bit: {} }},",
+ bit_offset
+ );
+ }
+ SpecificSetting::Enum(ref values) => {
+ let offset = enum_table.add(values);
+ fmtln!(
+ fmt,
+ "detail: detail::Detail::Enum {{ last: {}, enumerators: {} }},",
+ values.len() - 1,
+ offset
+ );
+ }
+ SpecificSetting::Num(_) => {
+ fmtln!(fmt, "detail: detail::Detail::Num,");
+ }
+ }
+
+ descriptor_index_map.insert(SettingOrPreset::Setting(setting), idx);
+ });
+ fmtln!(fmt, "},");
+ }
+
+ for (idx, preset) in group.presets.iter().enumerate() {
+ fmtln!(fmt, "detail::Descriptor {");
+ fmt.indent(|fmt| {
+ fmtln!(fmt, "name: \"{}\",", preset.name);
+ fmtln!(fmt, "offset: {},", (idx as u8) * group.settings_size);
+ fmtln!(fmt, "detail: detail::Detail::Preset,");
+ });
+ fmtln!(fmt, "},");
+
+ let whole_idx = idx + group.settings.len();
+ descriptor_index_map.insert(SettingOrPreset::Preset(preset), whole_idx);
+ }
+ });
+ fmtln!(fmt, "];");
+
+ // Generate enumerators.
+ fmtln!(fmt, "static ENUMERATORS: [&str; {}] = [", enum_table.len());
+ fmt.indent(|fmt| {
+ for enum_val in enum_table.iter() {
+ fmtln!(fmt, "\"{}\",", enum_val);
+ }
+ });
+ fmtln!(fmt, "];");
+
+ // Generate hash table.
+ let mut hash_entries: Vec<SettingOrPreset> = Vec::new();
+ hash_entries.extend(group.settings.iter().map(|x| SettingOrPreset::Setting(x)));
+ hash_entries.extend(group.presets.iter().map(|x| SettingOrPreset::Preset(x)));
+
+ let hash_table = generate_table(hash_entries.iter(), hash_entries.len(), |entry| {
+ simple_hash(entry.name())
+ });
+ fmtln!(fmt, "static HASH_TABLE: [u16; {}] = [", hash_table.len());
+ fmt.indent(|fmt| {
+ for h in &hash_table {
+ match *h {
+ Some(setting_or_preset) => fmtln!(
+ fmt,
+ "{},",
+ &descriptor_index_map
+ .get(setting_or_preset)
+ .unwrap()
+ .to_string()
+ ),
+ None => fmtln!(fmt, "0xffff,"),
+ }
+ }
+ });
+ fmtln!(fmt, "];");
+
+ // Generate presets.
+ fmtln!(
+ fmt,
+ "static PRESETS: [(u8, u8); {}] = [",
+ group.presets.len() * (group.settings_size as usize)
+ );
+ fmt.indent(|fmt| {
+ for preset in &group.presets {
+ fmt.comment(preset.name);
+ for (mask, value) in preset.layout(&group) {
+ fmtln!(fmt, "(0b{:08b}, 0b{:08b}),", mask, value);
+ }
+ }
+ });
+ fmtln!(fmt, "];");
+}
+
+fn gen_template(group: &SettingGroup, fmt: &mut Formatter) {
+ let mut default_bytes: Vec<u8> = vec![0; group.settings_size as usize];
+ for setting in &group.settings {
+ *default_bytes.get_mut(setting.byte_offset as usize).unwrap() |= setting.default_byte();
+ }
+
+ let default_bytes: Vec<String> = default_bytes
+ .iter()
+ .map(|x| format!("{:#04x}", x))
+ .collect();
+ let default_bytes_str = default_bytes.join(", ");
+
+ fmtln!(
+ fmt,
+ "static TEMPLATE: detail::Template = detail::Template {"
+ );
+ fmt.indent(|fmt| {
+ fmtln!(fmt, "name: \"{}\",", group.name);
+ fmtln!(fmt, "descriptors: &DESCRIPTORS,");
+ fmtln!(fmt, "enumerators: &ENUMERATORS,");
+ fmtln!(fmt, "hash_table: &HASH_TABLE,");
+ fmtln!(fmt, "defaults: &[{}],", default_bytes_str);
+ fmtln!(fmt, "presets: &PRESETS,");
+ });
+ fmtln!(fmt, "};");
+
+ fmt.doc_comment(format!(
+ "Create a `settings::Builder` for the {} settings group.",
+ group.name
+ ));
+ fmtln!(fmt, "pub fn builder() -> Builder {");
+ fmt.indent(|fmt| {
+ fmtln!(fmt, "Builder::new(&TEMPLATE)");
+ });
+ fmtln!(fmt, "}");
+}
+
+fn gen_display(group: &SettingGroup, fmt: &mut Formatter) {
+ fmtln!(fmt, "impl fmt::Display for Flags {");
+ fmt.indent(|fmt| {
+ fmtln!(
+ fmt,
+ "fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {"
+ );
+ fmt.indent(|fmt| {
+ fmtln!(fmt, "writeln!(f, \"[{}]\")?;", group.name);
+ fmtln!(fmt, "for d in &DESCRIPTORS {");
+ fmt.indent(|fmt| {
+ fmtln!(fmt, "if !d.detail.is_preset() {");
+ fmt.indent(|fmt| {
+ fmtln!(fmt, "write!(f, \"{} = \", d.name)?;");
+ fmtln!(
+ fmt,
+ "TEMPLATE.format_toml_value(d.detail, self.bytes[d.offset as usize], f)?;",
+ );
+ fmtln!(fmt, "writeln!(f)?;");
+ });
+ fmtln!(fmt, "}");
+ });
+ fmtln!(fmt, "}");
+ fmtln!(fmt, "Ok(())");
+ });
+ fmtln!(fmt, "}")
+ });
+ fmtln!(fmt, "}");
+}
+
+fn gen_group(group: &SettingGroup, parent: ParentGroup, fmt: &mut Formatter) {
+ // Generate struct.
+ fmtln!(fmt, "#[derive(Clone)]");
+ fmt.doc_comment(format!("Flags group `{}`.", group.name));
+ fmtln!(fmt, "pub struct Flags {");
+ fmt.indent(|fmt| {
+ fmtln!(fmt, "bytes: [u8; {}],", group.byte_size());
+ });
+ fmtln!(fmt, "}");
+
+ gen_constructor(group, parent, fmt);
+ gen_enum_types(group, fmt);
+ gen_getters(group, fmt);
+ gen_descriptors(group, fmt);
+ gen_template(group, fmt);
+ gen_display(group, fmt);
+}
+
+pub(crate) fn generate(
+ settings: &SettingGroup,
+ parent_group: ParentGroup,
+ filename: &str,
+ out_dir: &str,
+) -> Result<(), error::Error> {
+ let mut fmt = Formatter::new();
+ gen_group(&settings, parent_group, &mut fmt);
+ fmt.update_file(filename, out_dir)?;
+ Ok(())
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/gen_types.rs b/third_party/rust/cranelift-codegen-meta/src/gen_types.rs
new file mode 100644
index 0000000000..6ced212b8d
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/gen_types.rs
@@ -0,0 +1,76 @@
+//! Generate sources with type info.
+//!
+//! This generates a `types.rs` file which is included in
+//! `cranelift-codegen/ir/types.rs`. The file provides constant definitions for the
+//! most commonly used types, including all of the scalar types.
+//!
+//! This ensures that the metaprogram and the generated program see the same
+//! type numbering.
+
+use crate::cdsl::types as cdsl_types;
+use crate::error;
+use crate::srcgen;
+
+/// Emit a constant definition of a single value type.
+fn emit_type(ty: &cdsl_types::ValueType, fmt: &mut srcgen::Formatter) -> Result<(), error::Error> {
+ let name = ty.to_string().to_uppercase();
+ let number = ty.number().ok_or_else(|| {
+ error::Error::with_msg(format!(
+ "Could not emit type `{}` which has no number.",
+ name
+ ))
+ })?;
+
+ fmt.doc_comment(&ty.doc());
+ fmtln!(fmt, "pub const {}: Type = Type({:#x});\n", name, number);
+
+ Ok(())
+}
+
+/// Emit definition for all vector types with `bits` total size.
+fn emit_vectors(bits: u64, fmt: &mut srcgen::Formatter) -> Result<(), error::Error> {
+ let vec_size: u64 = bits / 8;
+ for vec in cdsl_types::ValueType::all_lane_types()
+ .map(|ty| (ty, cdsl_types::ValueType::from(ty).membytes()))
+ .filter(|&(_, lane_size)| lane_size != 0 && lane_size < vec_size)
+ .map(|(ty, lane_size)| (ty, vec_size / lane_size))
+ .map(|(ty, lanes)| cdsl_types::VectorType::new(ty, lanes))
+ {
+ emit_type(&cdsl_types::ValueType::from(vec), fmt)?;
+ }
+
+ Ok(())
+}
+
+/// Emit types using the given formatter object.
+fn emit_types(fmt: &mut srcgen::Formatter) -> Result<(), error::Error> {
+ // Emit all of the special types, such as types for CPU flags.
+ for spec in cdsl_types::ValueType::all_special_types().map(cdsl_types::ValueType::from) {
+ emit_type(&spec, fmt)?;
+ }
+
+ // Emit all of the lane types, such integers, floats, and booleans.
+ for ty in cdsl_types::ValueType::all_lane_types().map(cdsl_types::ValueType::from) {
+ emit_type(&ty, fmt)?;
+ }
+
+ // Emit all reference types.
+ for ty in cdsl_types::ValueType::all_reference_types().map(cdsl_types::ValueType::from) {
+ emit_type(&ty, fmt)?;
+ }
+
+ // Emit vector definitions for common SIMD sizes.
+ for vec_size in &[64_u64, 128, 256, 512] {
+ emit_vectors(*vec_size, fmt)?;
+ }
+
+ Ok(())
+}
+
+/// Generate the types file.
+pub(crate) fn generate(filename: &str, out_dir: &str) -> Result<(), error::Error> {
+ let mut fmt = srcgen::Formatter::new();
+ emit_types(&mut fmt)?;
+ fmt.update_file(filename, out_dir)?;
+ Ok(())
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/isa/arm32/mod.rs b/third_party/rust/cranelift-codegen-meta/src/isa/arm32/mod.rs
new file mode 100644
index 0000000000..f699ece8eb
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/isa/arm32/mod.rs
@@ -0,0 +1,88 @@
+use crate::cdsl::cpu_modes::CpuMode;
+use crate::cdsl::instructions::{InstructionGroupBuilder, InstructionPredicateMap};
+use crate::cdsl::isa::TargetIsa;
+use crate::cdsl::recipes::Recipes;
+use crate::cdsl::regs::{IsaRegs, IsaRegsBuilder, RegBankBuilder, RegClassBuilder};
+use crate::cdsl::settings::{SettingGroup, SettingGroupBuilder};
+
+use crate::shared::Definitions as SharedDefinitions;
+
+fn define_settings(_shared: &SettingGroup) -> SettingGroup {
+ let setting = SettingGroupBuilder::new("arm32");
+ setting.build()
+}
+
+fn define_regs() -> IsaRegs {
+ let mut regs = IsaRegsBuilder::new();
+
+ let builder = RegBankBuilder::new("FloatRegs", "s")
+ .units(64)
+ .track_pressure(true);
+ let float_regs = regs.add_bank(builder);
+
+ let builder = RegBankBuilder::new("IntRegs", "r")
+ .units(16)
+ .track_pressure(true);
+ let int_regs = regs.add_bank(builder);
+
+ let builder = RegBankBuilder::new("FlagRegs", "")
+ .units(1)
+ .names(vec!["nzcv"])
+ .track_pressure(false);
+ let flag_reg = regs.add_bank(builder);
+
+ let builder = RegClassBuilder::new_toplevel("S", float_regs).count(32);
+ regs.add_class(builder);
+
+ let builder = RegClassBuilder::new_toplevel("D", float_regs).width(2);
+ regs.add_class(builder);
+
+ let builder = RegClassBuilder::new_toplevel("Q", float_regs).width(4);
+ regs.add_class(builder);
+
+ let builder = RegClassBuilder::new_toplevel("GPR", int_regs);
+ regs.add_class(builder);
+
+ let builder = RegClassBuilder::new_toplevel("FLAG", flag_reg);
+ regs.add_class(builder);
+
+ regs.build()
+}
+
+pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa {
+ let settings = define_settings(&shared_defs.settings);
+ let regs = define_regs();
+
+ let inst_group = InstructionGroupBuilder::new(&mut shared_defs.all_instructions).build();
+
+ // CPU modes for 32-bit ARM and Thumb2.
+ let mut a32 = CpuMode::new("A32");
+ let mut t32 = CpuMode::new("T32");
+
+ // TODO refine these.
+ let narrow_flags = shared_defs.transform_groups.by_name("narrow_flags");
+ a32.legalize_default(narrow_flags);
+ t32.legalize_default(narrow_flags);
+
+ // Make sure that the expand code is used, thus generated.
+ let expand = shared_defs.transform_groups.by_name("expand");
+ a32.legalize_monomorphic(expand);
+
+ let cpu_modes = vec![a32, t32];
+
+ // TODO implement arm32 recipes.
+ let recipes = Recipes::new();
+
+ // TODO implement arm32 encodings and predicates.
+ let encodings_predicates = InstructionPredicateMap::new();
+
+ TargetIsa::new(
+ "arm32",
+ inst_group,
+ settings,
+ regs,
+ recipes,
+ cpu_modes,
+ encodings_predicates,
+ )
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/isa/arm64/mod.rs b/third_party/rust/cranelift-codegen-meta/src/isa/arm64/mod.rs
new file mode 100644
index 0000000000..5d8bc76fc4
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/isa/arm64/mod.rs
@@ -0,0 +1,79 @@
+use crate::cdsl::cpu_modes::CpuMode;
+use crate::cdsl::instructions::{InstructionGroupBuilder, InstructionPredicateMap};
+use crate::cdsl::isa::TargetIsa;
+use crate::cdsl::recipes::Recipes;
+use crate::cdsl::regs::{IsaRegs, IsaRegsBuilder, RegBankBuilder, RegClassBuilder};
+use crate::cdsl::settings::{SettingGroup, SettingGroupBuilder};
+
+use crate::shared::Definitions as SharedDefinitions;
+
+fn define_settings(_shared: &SettingGroup) -> SettingGroup {
+ let setting = SettingGroupBuilder::new("arm64");
+ setting.build()
+}
+
+fn define_registers() -> IsaRegs {
+ let mut regs = IsaRegsBuilder::new();
+
+ // The `x31` regunit serves as the stack pointer / zero register depending on context. We
+ // reserve it and don't model the difference.
+ let builder = RegBankBuilder::new("IntRegs", "x")
+ .units(32)
+ .track_pressure(true);
+ let int_regs = regs.add_bank(builder);
+
+ let builder = RegBankBuilder::new("FloatRegs", "v")
+ .units(32)
+ .track_pressure(true);
+ let float_regs = regs.add_bank(builder);
+
+ let builder = RegBankBuilder::new("FlagRegs", "")
+ .units(1)
+ .names(vec!["nzcv"])
+ .track_pressure(false);
+ let flag_reg = regs.add_bank(builder);
+
+ let builder = RegClassBuilder::new_toplevel("GPR", int_regs);
+ regs.add_class(builder);
+
+ let builder = RegClassBuilder::new_toplevel("FPR", float_regs);
+ regs.add_class(builder);
+
+ let builder = RegClassBuilder::new_toplevel("FLAG", flag_reg);
+ regs.add_class(builder);
+
+ regs.build()
+}
+
+pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa {
+ let settings = define_settings(&shared_defs.settings);
+ let regs = define_registers();
+
+ let inst_group = InstructionGroupBuilder::new(&mut shared_defs.all_instructions).build();
+
+ let mut a64 = CpuMode::new("A64");
+
+ // TODO refine these.
+ let expand_flags = shared_defs.transform_groups.by_name("expand_flags");
+ let narrow_flags = shared_defs.transform_groups.by_name("narrow_flags");
+ a64.legalize_monomorphic(expand_flags);
+ a64.legalize_default(narrow_flags);
+
+ let cpu_modes = vec![a64];
+
+ // TODO implement arm64 recipes.
+ let recipes = Recipes::new();
+
+ // TODO implement arm64 encodings and predicates.
+ let encodings_predicates = InstructionPredicateMap::new();
+
+ TargetIsa::new(
+ "arm64",
+ inst_group,
+ settings,
+ regs,
+ recipes,
+ cpu_modes,
+ encodings_predicates,
+ )
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/isa/mod.rs b/third_party/rust/cranelift-codegen-meta/src/isa/mod.rs
new file mode 100644
index 0000000000..ed8db85f0d
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/isa/mod.rs
@@ -0,0 +1,67 @@
+//! Define supported ISAs; includes ISA-specific instructions, encodings, registers, settings, etc.
+use crate::cdsl::isa::TargetIsa;
+use crate::shared::Definitions as SharedDefinitions;
+use std::fmt;
+
+mod arm32;
+mod arm64;
+mod riscv;
+pub(crate) mod x86;
+
+/// Represents known ISA target.
+#[derive(PartialEq, Copy, Clone)]
+pub enum Isa {
+ Riscv,
+ X86,
+ Arm32,
+ Arm64,
+}
+
+impl Isa {
+ /// Creates isa target using name.
+ pub fn from_name(name: &str) -> Option<Self> {
+ Isa::all()
+ .iter()
+ .cloned()
+ .find(|isa| isa.to_string() == name)
+ }
+
+ /// Creates isa target from arch.
+ pub fn from_arch(arch: &str) -> Option<Self> {
+ match arch {
+ "riscv" => Some(Isa::Riscv),
+ "aarch64" => Some(Isa::Arm64),
+ x if ["x86_64", "i386", "i586", "i686"].contains(&x) => Some(Isa::X86),
+ x if x.starts_with("arm") || arch.starts_with("thumb") => Some(Isa::Arm32),
+ _ => None,
+ }
+ }
+
+ /// Returns all supported isa targets.
+ pub fn all() -> &'static [Isa] {
+ &[Isa::Riscv, Isa::X86, Isa::Arm32, Isa::Arm64]
+ }
+}
+
+impl fmt::Display for Isa {
+ // These names should be kept in sync with the crate features.
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ match *self {
+ Isa::Riscv => write!(f, "riscv"),
+ Isa::X86 => write!(f, "x86"),
+ Isa::Arm32 => write!(f, "arm32"),
+ Isa::Arm64 => write!(f, "arm64"),
+ }
+ }
+}
+
+pub(crate) fn define(isas: &[Isa], shared_defs: &mut SharedDefinitions) -> Vec<TargetIsa> {
+ isas.iter()
+ .map(|isa| match isa {
+ Isa::Riscv => riscv::define(shared_defs),
+ Isa::X86 => x86::define(shared_defs),
+ Isa::Arm32 => arm32::define(shared_defs),
+ Isa::Arm64 => arm64::define(shared_defs),
+ })
+ .collect()
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/isa/riscv/encodings.rs b/third_party/rust/cranelift-codegen-meta/src/isa/riscv/encodings.rs
new file mode 100644
index 0000000000..c255ddb483
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/isa/riscv/encodings.rs
@@ -0,0 +1,431 @@
+use crate::cdsl::ast::{Apply, Expr, Literal, VarPool};
+use crate::cdsl::encodings::{Encoding, EncodingBuilder};
+use crate::cdsl::instructions::{
+ Bindable, BoundInstruction, InstSpec, InstructionPredicateNode, InstructionPredicateRegistry,
+};
+use crate::cdsl::recipes::{EncodingRecipeNumber, Recipes};
+use crate::cdsl::settings::SettingGroup;
+
+use crate::shared::types::Bool::B1;
+use crate::shared::types::Float::{F32, F64};
+use crate::shared::types::Int::{I16, I32, I64, I8};
+use crate::shared::types::Reference::{R32, R64};
+use crate::shared::Definitions as SharedDefinitions;
+
+use super::recipes::RecipeGroup;
+
+pub(crate) struct PerCpuModeEncodings<'defs> {
+ pub inst_pred_reg: InstructionPredicateRegistry,
+ pub enc32: Vec<Encoding>,
+ pub enc64: Vec<Encoding>,
+ recipes: &'defs Recipes,
+}
+
+impl<'defs> PerCpuModeEncodings<'defs> {
+ fn new(recipes: &'defs Recipes) -> Self {
+ Self {
+ inst_pred_reg: InstructionPredicateRegistry::new(),
+ enc32: Vec::new(),
+ enc64: Vec::new(),
+ recipes,
+ }
+ }
+ fn enc(
+ &self,
+ inst: impl Into<InstSpec>,
+ recipe: EncodingRecipeNumber,
+ bits: u16,
+ ) -> EncodingBuilder {
+ EncodingBuilder::new(inst.into(), recipe, bits)
+ }
+ fn add32(&mut self, encoding: EncodingBuilder) {
+ self.enc32
+ .push(encoding.build(self.recipes, &mut self.inst_pred_reg));
+ }
+ fn add64(&mut self, encoding: EncodingBuilder) {
+ self.enc64
+ .push(encoding.build(self.recipes, &mut self.inst_pred_reg));
+ }
+}
+
+// The low 7 bits of a RISC-V instruction is the base opcode. All 32-bit instructions have 11 as
+// the two low bits, with bits 6:2 determining the base opcode.
+//
+// Encbits for the 32-bit recipes are opcode[6:2] | (funct3 << 5) | ...
+// The functions below encode the encbits.
+
+fn load_bits(funct3: u16) -> u16 {
+ assert!(funct3 <= 0b111);
+ funct3 << 5
+}
+
+fn store_bits(funct3: u16) -> u16 {
+ assert!(funct3 <= 0b111);
+ 0b01000 | (funct3 << 5)
+}
+
+fn branch_bits(funct3: u16) -> u16 {
+ assert!(funct3 <= 0b111);
+ 0b11000 | (funct3 << 5)
+}
+
+fn jalr_bits() -> u16 {
+ // This was previously accepting an argument funct3 of 3 bits and used the following formula:
+ //0b11001 | (funct3 << 5)
+ 0b11001
+}
+
+fn jal_bits() -> u16 {
+ 0b11011
+}
+
+fn opimm_bits(funct3: u16, funct7: u16) -> u16 {
+ assert!(funct3 <= 0b111);
+ 0b00100 | (funct3 << 5) | (funct7 << 8)
+}
+
+fn opimm32_bits(funct3: u16, funct7: u16) -> u16 {
+ assert!(funct3 <= 0b111);
+ 0b00110 | (funct3 << 5) | (funct7 << 8)
+}
+
+fn op_bits(funct3: u16, funct7: u16) -> u16 {
+ assert!(funct3 <= 0b111);
+ assert!(funct7 <= 0b111_1111);
+ 0b01100 | (funct3 << 5) | (funct7 << 8)
+}
+
+fn op32_bits(funct3: u16, funct7: u16) -> u16 {
+ assert!(funct3 <= 0b111);
+ assert!(funct7 <= 0b111_1111);
+ 0b01110 | (funct3 << 5) | (funct7 << 8)
+}
+
+fn lui_bits() -> u16 {
+ 0b01101
+}
+
+pub(crate) fn define<'defs>(
+ shared_defs: &'defs SharedDefinitions,
+ isa_settings: &SettingGroup,
+ recipes: &'defs RecipeGroup,
+) -> PerCpuModeEncodings<'defs> {
+ // Instructions shorthands.
+ let shared = &shared_defs.instructions;
+
+ let band = shared.by_name("band");
+ let band_imm = shared.by_name("band_imm");
+ let bor = shared.by_name("bor");
+ let bor_imm = shared.by_name("bor_imm");
+ let br_icmp = shared.by_name("br_icmp");
+ let brz = shared.by_name("brz");
+ let brnz = shared.by_name("brnz");
+ let bxor = shared.by_name("bxor");
+ let bxor_imm = shared.by_name("bxor_imm");
+ let call = shared.by_name("call");
+ let call_indirect = shared.by_name("call_indirect");
+ let copy = shared.by_name("copy");
+ let copy_nop = shared.by_name("copy_nop");
+ let copy_to_ssa = shared.by_name("copy_to_ssa");
+ let fill = shared.by_name("fill");
+ let fill_nop = shared.by_name("fill_nop");
+ let iadd = shared.by_name("iadd");
+ let iadd_imm = shared.by_name("iadd_imm");
+ let iconst = shared.by_name("iconst");
+ let icmp = shared.by_name("icmp");
+ let icmp_imm = shared.by_name("icmp_imm");
+ let imul = shared.by_name("imul");
+ let ishl = shared.by_name("ishl");
+ let ishl_imm = shared.by_name("ishl_imm");
+ let isub = shared.by_name("isub");
+ let jump = shared.by_name("jump");
+ let regmove = shared.by_name("regmove");
+ let spill = shared.by_name("spill");
+ let sshr = shared.by_name("sshr");
+ let sshr_imm = shared.by_name("sshr_imm");
+ let ushr = shared.by_name("ushr");
+ let ushr_imm = shared.by_name("ushr_imm");
+ let return_ = shared.by_name("return");
+
+ // Recipes shorthands, prefixed with r_.
+ let r_copytossa = recipes.by_name("copytossa");
+ let r_fillnull = recipes.by_name("fillnull");
+ let r_icall = recipes.by_name("Icall");
+ let r_icopy = recipes.by_name("Icopy");
+ let r_ii = recipes.by_name("Ii");
+ let r_iicmp = recipes.by_name("Iicmp");
+ let r_iret = recipes.by_name("Iret");
+ let r_irmov = recipes.by_name("Irmov");
+ let r_iz = recipes.by_name("Iz");
+ let r_gp_sp = recipes.by_name("GPsp");
+ let r_gp_fi = recipes.by_name("GPfi");
+ let r_r = recipes.by_name("R");
+ let r_ricmp = recipes.by_name("Ricmp");
+ let r_rshamt = recipes.by_name("Rshamt");
+ let r_sb = recipes.by_name("SB");
+ let r_sb_zero = recipes.by_name("SBzero");
+ let r_stacknull = recipes.by_name("stacknull");
+ let r_u = recipes.by_name("U");
+ let r_uj = recipes.by_name("UJ");
+ let r_uj_call = recipes.by_name("UJcall");
+
+ // Predicates shorthands.
+ let use_m = isa_settings.predicate_by_name("use_m");
+
+ // Definitions.
+ let mut e = PerCpuModeEncodings::new(&recipes.recipes);
+
+ // Basic arithmetic binary instructions are encoded in an R-type instruction.
+ for &(inst, inst_imm, f3, f7) in &[
+ (iadd, Some(iadd_imm), 0b000, 0b000_0000),
+ (isub, None, 0b000, 0b010_0000),
+ (bxor, Some(bxor_imm), 0b100, 0b000_0000),
+ (bor, Some(bor_imm), 0b110, 0b000_0000),
+ (band, Some(band_imm), 0b111, 0b000_0000),
+ ] {
+ e.add32(e.enc(inst.bind(I32), r_r, op_bits(f3, f7)));
+ e.add64(e.enc(inst.bind(I64), r_r, op_bits(f3, f7)));
+
+ // Immediate versions for add/xor/or/and.
+ if let Some(inst_imm) = inst_imm {
+ e.add32(e.enc(inst_imm.bind(I32), r_ii, opimm_bits(f3, 0)));
+ e.add64(e.enc(inst_imm.bind(I64), r_ii, opimm_bits(f3, 0)));
+ }
+ }
+
+ // 32-bit ops in RV64.
+ e.add64(e.enc(iadd.bind(I32), r_r, op32_bits(0b000, 0b000_0000)));
+ e.add64(e.enc(isub.bind(I32), r_r, op32_bits(0b000, 0b010_0000)));
+ // There are no andiw/oriw/xoriw variations.
+ e.add64(e.enc(iadd_imm.bind(I32), r_ii, opimm32_bits(0b000, 0)));
+
+ // Use iadd_imm with %x0 to materialize constants.
+ e.add32(e.enc(iconst.bind(I32), r_iz, opimm_bits(0b0, 0)));
+ e.add64(e.enc(iconst.bind(I32), r_iz, opimm_bits(0b0, 0)));
+ e.add64(e.enc(iconst.bind(I64), r_iz, opimm_bits(0b0, 0)));
+
+ // Dynamic shifts have the same masking semantics as the clif base instructions.
+ for &(inst, inst_imm, f3, f7) in &[
+ (ishl, ishl_imm, 0b1, 0b0),
+ (ushr, ushr_imm, 0b101, 0b0),
+ (sshr, sshr_imm, 0b101, 0b10_0000),
+ ] {
+ e.add32(e.enc(inst.bind(I32).bind(I32), r_r, op_bits(f3, f7)));
+ e.add64(e.enc(inst.bind(I64).bind(I64), r_r, op_bits(f3, f7)));
+ e.add64(e.enc(inst.bind(I32).bind(I32), r_r, op32_bits(f3, f7)));
+ // Allow i32 shift amounts in 64-bit shifts.
+ e.add64(e.enc(inst.bind(I64).bind(I32), r_r, op_bits(f3, f7)));
+ e.add64(e.enc(inst.bind(I32).bind(I64), r_r, op32_bits(f3, f7)));
+
+ // Immediate shifts.
+ e.add32(e.enc(inst_imm.bind(I32), r_rshamt, opimm_bits(f3, f7)));
+ e.add64(e.enc(inst_imm.bind(I64), r_rshamt, opimm_bits(f3, f7)));
+ e.add64(e.enc(inst_imm.bind(I32), r_rshamt, opimm32_bits(f3, f7)));
+ }
+
+ // Signed and unsigned integer 'less than'. There are no 'w' variants for comparing 32-bit
+ // numbers in RV64.
+ {
+ let mut var_pool = VarPool::new();
+
+ // Helper that creates an instruction predicate for an instruction in the icmp family.
+ let mut icmp_instp = |bound_inst: &BoundInstruction,
+ intcc_field: &'static str|
+ -> InstructionPredicateNode {
+ let x = var_pool.create("x");
+ let y = var_pool.create("y");
+ let cc = Literal::enumerator_for(&shared_defs.imm.intcc, intcc_field);
+ Apply::new(
+ bound_inst.clone().into(),
+ vec![Expr::Literal(cc), Expr::Var(x), Expr::Var(y)],
+ )
+ .inst_predicate(&var_pool)
+ .unwrap()
+ };
+
+ let icmp_i32 = icmp.bind(I32);
+ let icmp_i64 = icmp.bind(I64);
+ e.add32(
+ e.enc(icmp_i32.clone(), r_ricmp, op_bits(0b010, 0b000_0000))
+ .inst_predicate(icmp_instp(&icmp_i32, "slt")),
+ );
+ e.add64(
+ e.enc(icmp_i64.clone(), r_ricmp, op_bits(0b010, 0b000_0000))
+ .inst_predicate(icmp_instp(&icmp_i64, "slt")),
+ );
+
+ e.add32(
+ e.enc(icmp_i32.clone(), r_ricmp, op_bits(0b011, 0b000_0000))
+ .inst_predicate(icmp_instp(&icmp_i32, "ult")),
+ );
+ e.add64(
+ e.enc(icmp_i64.clone(), r_ricmp, op_bits(0b011, 0b000_0000))
+ .inst_predicate(icmp_instp(&icmp_i64, "ult")),
+ );
+
+ // Immediate variants.
+ let icmp_i32 = icmp_imm.bind(I32);
+ let icmp_i64 = icmp_imm.bind(I64);
+ e.add32(
+ e.enc(icmp_i32.clone(), r_iicmp, opimm_bits(0b010, 0))
+ .inst_predicate(icmp_instp(&icmp_i32, "slt")),
+ );
+ e.add64(
+ e.enc(icmp_i64.clone(), r_iicmp, opimm_bits(0b010, 0))
+ .inst_predicate(icmp_instp(&icmp_i64, "slt")),
+ );
+
+ e.add32(
+ e.enc(icmp_i32.clone(), r_iicmp, opimm_bits(0b011, 0))
+ .inst_predicate(icmp_instp(&icmp_i32, "ult")),
+ );
+ e.add64(
+ e.enc(icmp_i64.clone(), r_iicmp, opimm_bits(0b011, 0))
+ .inst_predicate(icmp_instp(&icmp_i64, "ult")),
+ );
+ }
+
+ // Integer constants with the low 12 bits clear are materialized by lui.
+ e.add32(e.enc(iconst.bind(I32), r_u, lui_bits()));
+ e.add64(e.enc(iconst.bind(I32), r_u, lui_bits()));
+ e.add64(e.enc(iconst.bind(I64), r_u, lui_bits()));
+
+ // "M" Standard Extension for Integer Multiplication and Division.
+ // Gated by the `use_m` flag.
+ e.add32(
+ e.enc(imul.bind(I32), r_r, op_bits(0b000, 0b0000_0001))
+ .isa_predicate(use_m),
+ );
+ e.add64(
+ e.enc(imul.bind(I64), r_r, op_bits(0b000, 0b0000_0001))
+ .isa_predicate(use_m),
+ );
+ e.add64(
+ e.enc(imul.bind(I32), r_r, op32_bits(0b000, 0b0000_0001))
+ .isa_predicate(use_m),
+ );
+
+ // Control flow.
+
+ // Unconditional branches.
+ e.add32(e.enc(jump, r_uj, jal_bits()));
+ e.add64(e.enc(jump, r_uj, jal_bits()));
+ e.add32(e.enc(call, r_uj_call, jal_bits()));
+ e.add64(e.enc(call, r_uj_call, jal_bits()));
+
+ // Conditional branches.
+ {
+ let mut var_pool = VarPool::new();
+
+ // Helper that creates an instruction predicate for an instruction in the icmp family.
+ let mut br_icmp_instp = |bound_inst: &BoundInstruction,
+ intcc_field: &'static str|
+ -> InstructionPredicateNode {
+ let x = var_pool.create("x");
+ let y = var_pool.create("y");
+ let dest = var_pool.create("dest");
+ let args = var_pool.create("args");
+ let cc = Literal::enumerator_for(&shared_defs.imm.intcc, intcc_field);
+ Apply::new(
+ bound_inst.clone().into(),
+ vec![
+ Expr::Literal(cc),
+ Expr::Var(x),
+ Expr::Var(y),
+ Expr::Var(dest),
+ Expr::Var(args),
+ ],
+ )
+ .inst_predicate(&var_pool)
+ .unwrap()
+ };
+
+ let br_icmp_i32 = br_icmp.bind(I32);
+ let br_icmp_i64 = br_icmp.bind(I64);
+ for &(cond, f3) in &[
+ ("eq", 0b000),
+ ("ne", 0b001),
+ ("slt", 0b100),
+ ("sge", 0b101),
+ ("ult", 0b110),
+ ("uge", 0b111),
+ ] {
+ e.add32(
+ e.enc(br_icmp_i32.clone(), r_sb, branch_bits(f3))
+ .inst_predicate(br_icmp_instp(&br_icmp_i32, cond)),
+ );
+ e.add64(
+ e.enc(br_icmp_i64.clone(), r_sb, branch_bits(f3))
+ .inst_predicate(br_icmp_instp(&br_icmp_i64, cond)),
+ );
+ }
+ }
+
+ for &(inst, f3) in &[(brz, 0b000), (brnz, 0b001)] {
+ e.add32(e.enc(inst.bind(I32), r_sb_zero, branch_bits(f3)));
+ e.add64(e.enc(inst.bind(I64), r_sb_zero, branch_bits(f3)));
+ e.add32(e.enc(inst.bind(B1), r_sb_zero, branch_bits(f3)));
+ e.add64(e.enc(inst.bind(B1), r_sb_zero, branch_bits(f3)));
+ }
+
+ // Returns are a special case of jalr_bits using %x1 to hold the return address.
+ // The return address is provided by a special-purpose `link` return value that
+ // is added by legalize_signature().
+ e.add32(e.enc(return_, r_iret, jalr_bits()));
+ e.add64(e.enc(return_, r_iret, jalr_bits()));
+ e.add32(e.enc(call_indirect.bind(I32), r_icall, jalr_bits()));
+ e.add64(e.enc(call_indirect.bind(I64), r_icall, jalr_bits()));
+
+ // Spill and fill.
+ e.add32(e.enc(spill.bind(I32), r_gp_sp, store_bits(0b010)));
+ e.add64(e.enc(spill.bind(I32), r_gp_sp, store_bits(0b010)));
+ e.add64(e.enc(spill.bind(I64), r_gp_sp, store_bits(0b011)));
+ e.add32(e.enc(fill.bind(I32), r_gp_fi, load_bits(0b010)));
+ e.add64(e.enc(fill.bind(I32), r_gp_fi, load_bits(0b010)));
+ e.add64(e.enc(fill.bind(I64), r_gp_fi, load_bits(0b011)));
+
+ // No-op fills, created by late-stage redundant-fill removal.
+ for &ty in &[I64, I32] {
+ e.add64(e.enc(fill_nop.bind(ty), r_fillnull, 0));
+ e.add32(e.enc(fill_nop.bind(ty), r_fillnull, 0));
+ }
+ e.add64(e.enc(fill_nop.bind(B1), r_fillnull, 0));
+ e.add32(e.enc(fill_nop.bind(B1), r_fillnull, 0));
+
+ // Register copies.
+ e.add32(e.enc(copy.bind(I32), r_icopy, opimm_bits(0b000, 0)));
+ e.add64(e.enc(copy.bind(I64), r_icopy, opimm_bits(0b000, 0)));
+ e.add64(e.enc(copy.bind(I32), r_icopy, opimm32_bits(0b000, 0)));
+
+ e.add32(e.enc(regmove.bind(I32), r_irmov, opimm_bits(0b000, 0)));
+ e.add64(e.enc(regmove.bind(I64), r_irmov, opimm_bits(0b000, 0)));
+ e.add64(e.enc(regmove.bind(I32), r_irmov, opimm32_bits(0b000, 0)));
+
+ e.add32(e.enc(copy.bind(B1), r_icopy, opimm_bits(0b000, 0)));
+ e.add64(e.enc(copy.bind(B1), r_icopy, opimm_bits(0b000, 0)));
+ e.add32(e.enc(regmove.bind(B1), r_irmov, opimm_bits(0b000, 0)));
+ e.add64(e.enc(regmove.bind(B1), r_irmov, opimm_bits(0b000, 0)));
+
+ // Stack-slot-to-the-same-stack-slot copy, which is guaranteed to turn
+ // into a no-op.
+ // The same encoding is generated for both the 64- and 32-bit architectures.
+ for &ty in &[I64, I32, I16, I8] {
+ e.add32(e.enc(copy_nop.bind(ty), r_stacknull, 0));
+ e.add64(e.enc(copy_nop.bind(ty), r_stacknull, 0));
+ }
+ for &ty in &[F64, F32] {
+ e.add32(e.enc(copy_nop.bind(ty), r_stacknull, 0));
+ e.add64(e.enc(copy_nop.bind(ty), r_stacknull, 0));
+ }
+
+ // Copy-to-SSA
+ e.add32(e.enc(copy_to_ssa.bind(I32), r_copytossa, opimm_bits(0b000, 0)));
+ e.add64(e.enc(copy_to_ssa.bind(I64), r_copytossa, opimm_bits(0b000, 0)));
+ e.add64(e.enc(copy_to_ssa.bind(I32), r_copytossa, opimm32_bits(0b000, 0)));
+ e.add32(e.enc(copy_to_ssa.bind(B1), r_copytossa, opimm_bits(0b000, 0)));
+ e.add64(e.enc(copy_to_ssa.bind(B1), r_copytossa, opimm_bits(0b000, 0)));
+ e.add32(e.enc(copy_to_ssa.bind(R32), r_copytossa, opimm_bits(0b000, 0)));
+ e.add64(e.enc(copy_to_ssa.bind(R64), r_copytossa, opimm_bits(0b000, 0)));
+
+ e
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/isa/riscv/mod.rs b/third_party/rust/cranelift-codegen-meta/src/isa/riscv/mod.rs
new file mode 100644
index 0000000000..801e61a3d2
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/isa/riscv/mod.rs
@@ -0,0 +1,134 @@
+use crate::cdsl::cpu_modes::CpuMode;
+use crate::cdsl::instructions::InstructionGroupBuilder;
+use crate::cdsl::isa::TargetIsa;
+use crate::cdsl::regs::{IsaRegs, IsaRegsBuilder, RegBankBuilder, RegClassBuilder};
+use crate::cdsl::settings::{PredicateNode, SettingGroup, SettingGroupBuilder};
+
+use crate::shared::types::Float::{F32, F64};
+use crate::shared::types::Int::{I32, I64};
+use crate::shared::Definitions as SharedDefinitions;
+
+mod encodings;
+mod recipes;
+
+fn define_settings(shared: &SettingGroup) -> SettingGroup {
+ let mut setting = SettingGroupBuilder::new("riscv");
+
+ let supports_m = setting.add_bool(
+ "supports_m",
+ "CPU supports the 'M' extension (mul/div)",
+ false,
+ );
+ let supports_a = setting.add_bool(
+ "supports_a",
+ "CPU supports the 'A' extension (atomics)",
+ false,
+ );
+ let supports_f = setting.add_bool(
+ "supports_f",
+ "CPU supports the 'F' extension (float)",
+ false,
+ );
+ let supports_d = setting.add_bool(
+ "supports_d",
+ "CPU supports the 'D' extension (double)",
+ false,
+ );
+
+ let enable_m = setting.add_bool(
+ "enable_m",
+ "Enable the use of 'M' instructions if available",
+ true,
+ );
+
+ setting.add_bool(
+ "enable_e",
+ "Enable the 'RV32E' instruction set with only 16 registers",
+ false,
+ );
+
+ let shared_enable_atomics = shared.get_bool("enable_atomics");
+ let shared_enable_float = shared.get_bool("enable_float");
+ let shared_enable_simd = shared.get_bool("enable_simd");
+
+ setting.add_predicate("use_m", predicate!(supports_m && enable_m));
+ setting.add_predicate("use_a", predicate!(supports_a && shared_enable_atomics));
+ setting.add_predicate("use_f", predicate!(supports_f && shared_enable_float));
+ setting.add_predicate("use_d", predicate!(supports_d && shared_enable_float));
+ setting.add_predicate(
+ "full_float",
+ predicate!(shared_enable_simd && supports_f && supports_d),
+ );
+
+ setting.build()
+}
+
+fn define_registers() -> IsaRegs {
+ let mut regs = IsaRegsBuilder::new();
+
+ let builder = RegBankBuilder::new("IntRegs", "x")
+ .units(32)
+ .track_pressure(true);
+ let int_regs = regs.add_bank(builder);
+
+ let builder = RegBankBuilder::new("FloatRegs", "f")
+ .units(32)
+ .track_pressure(true);
+ let float_regs = regs.add_bank(builder);
+
+ let builder = RegClassBuilder::new_toplevel("GPR", int_regs);
+ regs.add_class(builder);
+
+ let builder = RegClassBuilder::new_toplevel("FPR", float_regs);
+ regs.add_class(builder);
+
+ regs.build()
+}
+
+pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa {
+ let settings = define_settings(&shared_defs.settings);
+ let regs = define_registers();
+
+ let inst_group = InstructionGroupBuilder::new(&mut shared_defs.all_instructions).build();
+
+ // CPU modes for 32-bit and 64-bit operation.
+ let mut rv_32 = CpuMode::new("RV32");
+ let mut rv_64 = CpuMode::new("RV64");
+
+ let expand = shared_defs.transform_groups.by_name("expand");
+ let narrow_no_flags = shared_defs.transform_groups.by_name("narrow_no_flags");
+
+ rv_32.legalize_monomorphic(expand);
+ rv_32.legalize_default(narrow_no_flags);
+ rv_32.legalize_type(I32, expand);
+ rv_32.legalize_type(F32, expand);
+ rv_32.legalize_type(F64, expand);
+
+ rv_64.legalize_monomorphic(expand);
+ rv_64.legalize_default(narrow_no_flags);
+ rv_64.legalize_type(I32, expand);
+ rv_64.legalize_type(I64, expand);
+ rv_64.legalize_type(F32, expand);
+ rv_64.legalize_type(F64, expand);
+
+ let recipes = recipes::define(shared_defs, &regs);
+
+ let encodings = encodings::define(shared_defs, &settings, &recipes);
+ rv_32.set_encodings(encodings.enc32);
+ rv_64.set_encodings(encodings.enc64);
+ let encodings_predicates = encodings.inst_pred_reg.extract();
+
+ let recipes = recipes.collect();
+
+ let cpu_modes = vec![rv_32, rv_64];
+
+ TargetIsa::new(
+ "riscv",
+ inst_group,
+ settings,
+ regs,
+ recipes,
+ cpu_modes,
+ encodings_predicates,
+ )
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/isa/riscv/recipes.rs b/third_party/rust/cranelift-codegen-meta/src/isa/riscv/recipes.rs
new file mode 100644
index 0000000000..47acdbb042
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/isa/riscv/recipes.rs
@@ -0,0 +1,279 @@
+use std::collections::HashMap;
+
+use crate::cdsl::instructions::InstructionPredicate;
+use crate::cdsl::recipes::{EncodingRecipeBuilder, EncodingRecipeNumber, Recipes, Stack};
+use crate::cdsl::regs::IsaRegs;
+use crate::shared::Definitions as SharedDefinitions;
+
+/// An helper to create recipes and use them when defining the RISCV encodings.
+pub(crate) struct RecipeGroup {
+ /// The actualy list of recipes explicitly created in this file.
+ pub recipes: Recipes,
+
+ /// Provides fast lookup from a name to an encoding recipe.
+ name_to_recipe: HashMap<String, EncodingRecipeNumber>,
+}
+
+impl RecipeGroup {
+ fn new() -> Self {
+ Self {
+ recipes: Recipes::new(),
+ name_to_recipe: HashMap::new(),
+ }
+ }
+
+ fn push(&mut self, builder: EncodingRecipeBuilder) {
+ assert!(
+ self.name_to_recipe.get(&builder.name).is_none(),
+ format!("riscv recipe '{}' created twice", builder.name)
+ );
+ let name = builder.name.clone();
+ let number = self.recipes.push(builder.build());
+ self.name_to_recipe.insert(name, number);
+ }
+
+ pub fn by_name(&self, name: &str) -> EncodingRecipeNumber {
+ *self
+ .name_to_recipe
+ .get(name)
+ .unwrap_or_else(|| panic!("unknown riscv recipe name {}", name))
+ }
+
+ pub fn collect(self) -> Recipes {
+ self.recipes
+ }
+}
+
+pub(crate) fn define(shared_defs: &SharedDefinitions, regs: &IsaRegs) -> RecipeGroup {
+ let formats = &shared_defs.formats;
+
+ // Register classes shorthands.
+ let gpr = regs.class_by_name("GPR");
+
+ // Definitions.
+ let mut recipes = RecipeGroup::new();
+
+ // R-type 32-bit instructions: These are mostly binary arithmetic instructions.
+ // The encbits are `opcode[6:2] | (funct3 << 5) | (funct7 << 8)
+ recipes.push(
+ EncodingRecipeBuilder::new("R", &formats.binary, 4)
+ .operands_in(vec![gpr, gpr])
+ .operands_out(vec![gpr])
+ .emit("put_r(bits, in_reg0, in_reg1, out_reg0, sink);"),
+ );
+
+ // R-type with an immediate shift amount instead of rs2.
+ recipes.push(
+ EncodingRecipeBuilder::new("Rshamt", &formats.binary_imm64, 4)
+ .operands_in(vec![gpr])
+ .operands_out(vec![gpr])
+ .emit("put_rshamt(bits, in_reg0, imm.into(), out_reg0, sink);"),
+ );
+
+ // R-type encoding of an integer comparison.
+ recipes.push(
+ EncodingRecipeBuilder::new("Ricmp", &formats.int_compare, 4)
+ .operands_in(vec![gpr, gpr])
+ .operands_out(vec![gpr])
+ .emit("put_r(bits, in_reg0, in_reg1, out_reg0, sink);"),
+ );
+
+ recipes.push(
+ EncodingRecipeBuilder::new("Ii", &formats.binary_imm64, 4)
+ .operands_in(vec![gpr])
+ .operands_out(vec![gpr])
+ .inst_predicate(InstructionPredicate::new_is_signed_int(
+ &*formats.binary_imm64,
+ "imm",
+ 12,
+ 0,
+ ))
+ .emit("put_i(bits, in_reg0, imm.into(), out_reg0, sink);"),
+ );
+
+ // I-type instruction with a hardcoded %x0 rs1.
+ recipes.push(
+ EncodingRecipeBuilder::new("Iz", &formats.unary_imm, 4)
+ .operands_out(vec![gpr])
+ .inst_predicate(InstructionPredicate::new_is_signed_int(
+ &formats.unary_imm,
+ "imm",
+ 12,
+ 0,
+ ))
+ .emit("put_i(bits, 0, imm.into(), out_reg0, sink);"),
+ );
+
+ // I-type encoding of an integer comparison.
+ recipes.push(
+ EncodingRecipeBuilder::new("Iicmp", &formats.int_compare_imm, 4)
+ .operands_in(vec![gpr])
+ .operands_out(vec![gpr])
+ .inst_predicate(InstructionPredicate::new_is_signed_int(
+ &formats.int_compare_imm,
+ "imm",
+ 12,
+ 0,
+ ))
+ .emit("put_i(bits, in_reg0, imm.into(), out_reg0, sink);"),
+ );
+
+ // I-type encoding for `jalr` as a return instruction. We won't use the immediate offset. The
+ // variable return values are not encoded.
+ recipes.push(
+ EncodingRecipeBuilder::new("Iret", &formats.multiary, 4).emit(
+ r#"
+ // Return instructions are always a jalr to %x1.
+ // The return address is provided as a special-purpose link argument.
+ put_i(
+ bits,
+ 1, // rs1 = %x1
+ 0, // no offset.
+ 0, // rd = %x0: no address written.
+ sink,
+ );
+ "#,
+ ),
+ );
+
+ // I-type encoding for `jalr` as a call_indirect.
+ recipes.push(
+ EncodingRecipeBuilder::new("Icall", &formats.call_indirect, 4)
+ .operands_in(vec![gpr])
+ .emit(
+ r#"
+ // call_indirect instructions are jalr with rd=%x1.
+ put_i(
+ bits,
+ in_reg0,
+ 0, // no offset.
+ 1, // rd = %x1: link register.
+ sink,
+ );
+ "#,
+ ),
+ );
+
+ // Copy of a GPR is implemented as addi x, 0.
+ recipes.push(
+ EncodingRecipeBuilder::new("Icopy", &formats.unary, 4)
+ .operands_in(vec![gpr])
+ .operands_out(vec![gpr])
+ .emit("put_i(bits, in_reg0, 0, out_reg0, sink);"),
+ );
+
+ // Same for a GPR regmove.
+ recipes.push(
+ EncodingRecipeBuilder::new("Irmov", &formats.reg_move, 4)
+ .operands_in(vec![gpr])
+ .emit("put_i(bits, src, 0, dst, sink);"),
+ );
+
+ // Same for copy-to-SSA -- GPR regmove.
+ recipes.push(
+ EncodingRecipeBuilder::new("copytossa", &formats.copy_to_ssa, 4)
+ // No operands_in to mention, because a source register is specified directly.
+ .operands_out(vec![gpr])
+ .emit("put_i(bits, src, 0, out_reg0, sink);"),
+ );
+
+ // U-type instructions have a 20-bit immediate that targets bits 12-31.
+ recipes.push(
+ EncodingRecipeBuilder::new("U", &formats.unary_imm, 4)
+ .operands_out(vec![gpr])
+ .inst_predicate(InstructionPredicate::new_is_signed_int(
+ &formats.unary_imm,
+ "imm",
+ 32,
+ 12,
+ ))
+ .emit("put_u(bits, imm.into(), out_reg0, sink);"),
+ );
+
+ // UJ-type unconditional branch instructions.
+ recipes.push(
+ EncodingRecipeBuilder::new("UJ", &formats.jump, 4)
+ .branch_range((0, 21))
+ .emit(
+ r#"
+ let dest = i64::from(func.offsets[destination]);
+ let disp = dest - i64::from(sink.offset());
+ put_uj(bits, disp, 0, sink);
+ "#,
+ ),
+ );
+
+ recipes.push(EncodingRecipeBuilder::new("UJcall", &formats.call, 4).emit(
+ r#"
+ sink.reloc_external(func.srclocs[inst],
+ Reloc::RiscvCall,
+ &func.dfg.ext_funcs[func_ref].name,
+ 0);
+ // rd=%x1 is the standard link register.
+ put_uj(bits, 0, 1, sink);
+ "#,
+ ));
+
+ // SB-type branch instructions.
+ recipes.push(
+ EncodingRecipeBuilder::new("SB", &formats.branch_icmp, 4)
+ .operands_in(vec![gpr, gpr])
+ .branch_range((0, 13))
+ .emit(
+ r#"
+ let dest = i64::from(func.offsets[destination]);
+ let disp = dest - i64::from(sink.offset());
+ put_sb(bits, disp, in_reg0, in_reg1, sink);
+ "#,
+ ),
+ );
+
+ // SB-type branch instruction with rs2 fixed to zero.
+ recipes.push(
+ EncodingRecipeBuilder::new("SBzero", &formats.branch, 4)
+ .operands_in(vec![gpr])
+ .branch_range((0, 13))
+ .emit(
+ r#"
+ let dest = i64::from(func.offsets[destination]);
+ let disp = dest - i64::from(sink.offset());
+ put_sb(bits, disp, in_reg0, 0, sink);
+ "#,
+ ),
+ );
+
+ // Spill of a GPR.
+ recipes.push(
+ EncodingRecipeBuilder::new("GPsp", &formats.unary, 4)
+ .operands_in(vec![gpr])
+ .operands_out(vec![Stack::new(gpr)])
+ .emit("unimplemented!();"),
+ );
+
+ // Fill of a GPR.
+ recipes.push(
+ EncodingRecipeBuilder::new("GPfi", &formats.unary, 4)
+ .operands_in(vec![Stack::new(gpr)])
+ .operands_out(vec![gpr])
+ .emit("unimplemented!();"),
+ );
+
+ // Stack-slot to same stack-slot copy, which is guaranteed to turn into a no-op.
+ recipes.push(
+ EncodingRecipeBuilder::new("stacknull", &formats.unary, 0)
+ .operands_in(vec![Stack::new(gpr)])
+ .operands_out(vec![Stack::new(gpr)])
+ .emit(""),
+ );
+
+ // No-op fills, created by late-stage redundant-fill removal.
+ recipes.push(
+ EncodingRecipeBuilder::new("fillnull", &formats.unary, 0)
+ .operands_in(vec![Stack::new(gpr)])
+ .operands_out(vec![gpr])
+ .clobbers_flags(false)
+ .emit(""),
+ );
+
+ recipes
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/isa/x86/encodings.rs b/third_party/rust/cranelift-codegen-meta/src/isa/x86/encodings.rs
new file mode 100644
index 0000000000..9ee12656c0
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/isa/x86/encodings.rs
@@ -0,0 +1,2726 @@
+#![allow(non_snake_case)]
+
+use cranelift_codegen_shared::condcodes::IntCC;
+use std::collections::HashMap;
+
+use crate::cdsl::encodings::{Encoding, EncodingBuilder};
+use crate::cdsl::instructions::{
+ vector, Bindable, Immediate, InstSpec, Instruction, InstructionGroup, InstructionPredicate,
+ InstructionPredicateNode, InstructionPredicateRegistry,
+};
+use crate::cdsl::recipes::{EncodingRecipe, EncodingRecipeNumber, Recipes};
+use crate::cdsl::settings::{SettingGroup, SettingPredicateNumber};
+use crate::cdsl::types::{LaneType, ValueType};
+use crate::shared::types::Bool::{B1, B16, B32, B64, B8};
+use crate::shared::types::Float::{F32, F64};
+use crate::shared::types::Int::{I16, I32, I64, I8};
+use crate::shared::types::Reference::{R32, R64};
+use crate::shared::Definitions as SharedDefinitions;
+
+use crate::isa::x86::opcodes::*;
+
+use super::recipes::{RecipeGroup, Template};
+use crate::cdsl::instructions::BindParameter::Any;
+
+pub(crate) struct PerCpuModeEncodings {
+ pub enc32: Vec<Encoding>,
+ pub enc64: Vec<Encoding>,
+ pub recipes: Recipes,
+ recipes_by_name: HashMap<String, EncodingRecipeNumber>,
+ pub inst_pred_reg: InstructionPredicateRegistry,
+}
+
+impl PerCpuModeEncodings {
+ fn new() -> Self {
+ Self {
+ enc32: Vec::new(),
+ enc64: Vec::new(),
+ recipes: Recipes::new(),
+ recipes_by_name: HashMap::new(),
+ inst_pred_reg: InstructionPredicateRegistry::new(),
+ }
+ }
+
+ fn add_recipe(&mut self, recipe: EncodingRecipe) -> EncodingRecipeNumber {
+ if let Some(found_index) = self.recipes_by_name.get(&recipe.name) {
+ assert!(
+ self.recipes[*found_index] == recipe,
+ format!(
+ "trying to insert different recipes with a same name ({})",
+ recipe.name
+ )
+ );
+ *found_index
+ } else {
+ let recipe_name = recipe.name.clone();
+ let index = self.recipes.push(recipe);
+ self.recipes_by_name.insert(recipe_name, index);
+ index
+ }
+ }
+
+ fn make_encoding<T>(
+ &mut self,
+ inst: InstSpec,
+ template: Template,
+ builder_closure: T,
+ ) -> Encoding
+ where
+ T: FnOnce(EncodingBuilder) -> EncodingBuilder,
+ {
+ let (recipe, bits) = template.build();
+ let recipe_number = self.add_recipe(recipe);
+ let builder = EncodingBuilder::new(inst, recipe_number, bits);
+ builder_closure(builder).build(&self.recipes, &mut self.inst_pred_reg)
+ }
+
+ fn enc32_func<T>(&mut self, inst: impl Into<InstSpec>, template: Template, builder_closure: T)
+ where
+ T: FnOnce(EncodingBuilder) -> EncodingBuilder,
+ {
+ let encoding = self.make_encoding(inst.into(), template, builder_closure);
+ self.enc32.push(encoding);
+ }
+ fn enc32(&mut self, inst: impl Into<InstSpec>, template: Template) {
+ self.enc32_func(inst, template, |x| x);
+ }
+ fn enc32_isap(
+ &mut self,
+ inst: impl Into<InstSpec>,
+ template: Template,
+ isap: SettingPredicateNumber,
+ ) {
+ self.enc32_func(inst, template, |encoding| encoding.isa_predicate(isap));
+ }
+ fn enc32_instp(
+ &mut self,
+ inst: impl Into<InstSpec>,
+ template: Template,
+ instp: InstructionPredicateNode,
+ ) {
+ self.enc32_func(inst, template, |encoding| encoding.inst_predicate(instp));
+ }
+ fn enc32_rec(&mut self, inst: impl Into<InstSpec>, recipe: &EncodingRecipe, bits: u16) {
+ let recipe_number = self.add_recipe(recipe.clone());
+ let builder = EncodingBuilder::new(inst.into(), recipe_number, bits);
+ let encoding = builder.build(&self.recipes, &mut self.inst_pred_reg);
+ self.enc32.push(encoding);
+ }
+
+ fn enc64_func<T>(&mut self, inst: impl Into<InstSpec>, template: Template, builder_closure: T)
+ where
+ T: FnOnce(EncodingBuilder) -> EncodingBuilder,
+ {
+ let encoding = self.make_encoding(inst.into(), template, builder_closure);
+ self.enc64.push(encoding);
+ }
+ fn enc64(&mut self, inst: impl Into<InstSpec>, template: Template) {
+ self.enc64_func(inst, template, |x| x);
+ }
+ fn enc64_isap(
+ &mut self,
+ inst: impl Into<InstSpec>,
+ template: Template,
+ isap: SettingPredicateNumber,
+ ) {
+ self.enc64_func(inst, template, |encoding| encoding.isa_predicate(isap));
+ }
+ fn enc64_instp(
+ &mut self,
+ inst: impl Into<InstSpec>,
+ template: Template,
+ instp: InstructionPredicateNode,
+ ) {
+ self.enc64_func(inst, template, |encoding| encoding.inst_predicate(instp));
+ }
+ fn enc64_rec(&mut self, inst: impl Into<InstSpec>, recipe: &EncodingRecipe, bits: u16) {
+ let recipe_number = self.add_recipe(recipe.clone());
+ let builder = EncodingBuilder::new(inst.into(), recipe_number, bits);
+ let encoding = builder.build(&self.recipes, &mut self.inst_pred_reg);
+ self.enc64.push(encoding);
+ }
+
+ /// Adds I32/I64 encodings as appropriate for a typed instruction.
+ /// The REX prefix is always inferred at runtime.
+ ///
+ /// Add encodings for `inst.i32` to X86_32.
+ /// Add encodings for `inst.i32` to X86_64 with optional, inferred REX.
+ /// Add encodings for `inst.i64` to X86_64 with a REX.W prefix.
+ fn enc_i32_i64(&mut self, inst: impl Into<InstSpec>, template: Template) {
+ let inst: InstSpec = inst.into();
+
+ // I32 on x86: no REX prefix.
+ self.enc32(inst.bind(I32), template.infer_rex());
+
+ // I32 on x86_64: REX.W unset; REX.RXB determined at runtime from registers.
+ self.enc64(inst.bind(I32), template.infer_rex());
+
+ // I64 on x86_64: REX.W set; REX.RXB determined at runtime from registers.
+ self.enc64(inst.bind(I64), template.rex().w());
+ }
+
+ /// Adds I32/I64 encodings as appropriate for a typed instruction.
+ /// All variants of REX prefix are explicitly emitted, not inferred.
+ ///
+ /// Add encodings for `inst.i32` to X86_32.
+ /// Add encodings for `inst.i32` to X86_64 with and without REX.
+ /// Add encodings for `inst.i64` to X86_64 with and without REX.
+ fn enc_i32_i64_explicit_rex(&mut self, inst: impl Into<InstSpec>, template: Template) {
+ let inst: InstSpec = inst.into();
+ self.enc32(inst.bind(I32), template.nonrex());
+
+ // REX-less encoding must come after REX encoding so we don't use it by default.
+ // Otherwise reg-alloc would never use r8 and up.
+ self.enc64(inst.bind(I32), template.rex());
+ self.enc64(inst.bind(I32), template.nonrex());
+ self.enc64(inst.bind(I64), template.rex().w());
+ }
+
+ /// Adds B32/B64 encodings as appropriate for a typed instruction.
+ /// The REX prefix is always inferred at runtime.
+ ///
+ /// Adds encoding for `inst.b32` to X86_32.
+ /// Adds encoding for `inst.b32` to X86_64 with optional, inferred REX.
+ /// Adds encoding for `inst.b64` to X86_64 with a REX.W prefix.
+ fn enc_b32_b64(&mut self, inst: impl Into<InstSpec>, template: Template) {
+ let inst: InstSpec = inst.into();
+
+ // B32 on x86: no REX prefix.
+ self.enc32(inst.bind(B32), template.infer_rex());
+
+ // B32 on x86_64: REX.W unset; REX.RXB determined at runtime from registers.
+ self.enc64(inst.bind(B32), template.infer_rex());
+
+ // B64 on x86_64: REX.W set; REX.RXB determined at runtime from registers.
+ self.enc64(inst.bind(B64), template.rex().w());
+ }
+
+ /// Add encodings for `inst.i32` to X86_32.
+ /// Add encodings for `inst.i32` to X86_64 with a REX prefix.
+ /// Add encodings for `inst.i64` to X86_64 with a REX.W prefix.
+ fn enc_i32_i64_rex_only(&mut self, inst: impl Into<InstSpec>, template: Template) {
+ let inst: InstSpec = inst.into();
+ self.enc32(inst.bind(I32), template.nonrex());
+ self.enc64(inst.bind(I32), template.rex());
+ self.enc64(inst.bind(I64), template.rex().w());
+ }
+
+ /// Add encodings for `inst.i32` to X86_32.
+ /// Add encodings for `inst.i32` to X86_64 with and without REX.
+ /// Add encodings for `inst.i64` to X86_64 with a REX.W prefix.
+ fn enc_i32_i64_instp(
+ &mut self,
+ inst: &Instruction,
+ template: Template,
+ instp: InstructionPredicateNode,
+ ) {
+ self.enc32_func(inst.bind(I32), template.nonrex(), |builder| {
+ builder.inst_predicate(instp.clone())
+ });
+
+ // REX-less encoding must come after REX encoding so we don't use it by default. Otherwise
+ // reg-alloc would never use r8 and up.
+ self.enc64_func(inst.bind(I32), template.rex(), |builder| {
+ builder.inst_predicate(instp.clone())
+ });
+ self.enc64_func(inst.bind(I32), template.nonrex(), |builder| {
+ builder.inst_predicate(instp.clone())
+ });
+ self.enc64_func(inst.bind(I64), template.rex().w(), |builder| {
+ builder.inst_predicate(instp)
+ });
+ }
+
+ /// Add encodings for `inst.r32` to X86_32.
+ /// Add encodings for `inst.r32` to X86_64 with and without REX.
+ /// Add encodings for `inst.r64` to X86_64 with a REX.W prefix.
+ fn enc_r32_r64_instp(
+ &mut self,
+ inst: &Instruction,
+ template: Template,
+ instp: InstructionPredicateNode,
+ ) {
+ self.enc32_func(inst.bind(R32), template.nonrex(), |builder| {
+ builder.inst_predicate(instp.clone())
+ });
+
+ // REX-less encoding must come after REX encoding so we don't use it by default. Otherwise
+ // reg-alloc would never use r8 and up.
+ self.enc64_func(inst.bind(R32), template.rex(), |builder| {
+ builder.inst_predicate(instp.clone())
+ });
+ self.enc64_func(inst.bind(R32), template.nonrex(), |builder| {
+ builder.inst_predicate(instp.clone())
+ });
+ self.enc64_func(inst.bind(R64), template.rex().w(), |builder| {
+ builder.inst_predicate(instp)
+ });
+ }
+
+ /// Add encodings for `inst.r32` to X86_32.
+ /// Add encodings for `inst.r64` to X86_64 with a REX.W prefix.
+ fn enc_r32_r64_rex_only(&mut self, inst: impl Into<InstSpec>, template: Template) {
+ let inst: InstSpec = inst.into();
+ self.enc32(inst.bind(R32), template.nonrex());
+ self.enc64(inst.bind(R64), template.rex().w());
+ }
+
+ fn enc_r32_r64_ld_st(&mut self, inst: &Instruction, w_bit: bool, template: Template) {
+ self.enc32(inst.clone().bind(R32).bind(Any), template.clone());
+
+ // REX-less encoding must come after REX encoding so we don't use it by
+ // default. Otherwise reg-alloc would never use r8 and up.
+ self.enc64(inst.clone().bind(R32).bind(Any), template.clone().rex());
+ self.enc64(inst.clone().bind(R32).bind(Any), template.clone());
+
+ if w_bit {
+ self.enc64(inst.clone().bind(R64).bind(Any), template.rex().w());
+ } else {
+ self.enc64(inst.clone().bind(R64).bind(Any), template.clone().rex());
+ self.enc64(inst.clone().bind(R64).bind(Any), template);
+ }
+ }
+
+ /// Add encodings for `inst` to X86_64 with and without a REX prefix.
+ fn enc_x86_64(&mut self, inst: impl Into<InstSpec> + Clone, template: Template) {
+ // See above comment about the ordering of rex vs non-rex encodings.
+ self.enc64(inst.clone(), template.rex());
+ self.enc64(inst, template);
+ }
+
+ /// Add encodings for `inst` to X86_64 with and without a REX prefix.
+ fn enc_x86_64_instp(
+ &mut self,
+ inst: impl Clone + Into<InstSpec>,
+ template: Template,
+ instp: InstructionPredicateNode,
+ ) {
+ // See above comment about the ordering of rex vs non-rex encodings.
+ self.enc64_func(inst.clone(), template.rex(), |builder| {
+ builder.inst_predicate(instp.clone())
+ });
+ self.enc64_func(inst, template, |builder| builder.inst_predicate(instp));
+ }
+ fn enc_x86_64_isap(
+ &mut self,
+ inst: impl Clone + Into<InstSpec>,
+ template: Template,
+ isap: SettingPredicateNumber,
+ ) {
+ // See above comment about the ordering of rex vs non-rex encodings.
+ self.enc64_isap(inst.clone(), template.rex(), isap);
+ self.enc64_isap(inst, template, isap);
+ }
+
+ /// Add all three encodings for `inst`:
+ /// - X86_32
+ /// - X86_64 with and without the REX prefix.
+ fn enc_both(&mut self, inst: impl Clone + Into<InstSpec>, template: Template) {
+ self.enc32(inst.clone(), template.clone());
+ self.enc_x86_64(inst, template);
+ }
+ fn enc_both_isap(
+ &mut self,
+ inst: impl Clone + Into<InstSpec>,
+ template: Template,
+ isap: SettingPredicateNumber,
+ ) {
+ self.enc32_isap(inst.clone(), template.clone(), isap);
+ self.enc_x86_64_isap(inst, template, isap);
+ }
+ fn enc_both_instp(
+ &mut self,
+ inst: impl Clone + Into<InstSpec>,
+ template: Template,
+ instp: InstructionPredicateNode,
+ ) {
+ self.enc32_instp(inst.clone(), template.clone(), instp.clone());
+ self.enc_x86_64_instp(inst, template, instp);
+ }
+
+ /// Add two encodings for `inst`:
+ /// - X86_32, no REX prefix, since this is not valid in 32-bit mode.
+ /// - X86_64, dynamically infer the REX prefix.
+ fn enc_both_inferred(&mut self, inst: impl Clone + Into<InstSpec>, template: Template) {
+ self.enc32(inst.clone(), template.clone());
+ self.enc64(inst, template.infer_rex());
+ }
+ fn enc_both_inferred_maybe_isap(
+ &mut self,
+ inst: impl Clone + Into<InstSpec>,
+ template: Template,
+ isap: Option<SettingPredicateNumber>,
+ ) {
+ self.enc32_maybe_isap(inst.clone(), template.clone(), isap);
+ self.enc64_maybe_isap(inst, template.infer_rex(), isap);
+ }
+
+ /// Add two encodings for `inst`:
+ /// - X86_32
+ /// - X86_64 with the REX prefix.
+ fn enc_both_rex_only(&mut self, inst: impl Clone + Into<InstSpec>, template: Template) {
+ self.enc32(inst.clone(), template.clone());
+ self.enc64(inst, template.rex());
+ }
+
+ /// Add encodings for `inst.i32` to X86_32.
+ /// Add encodings for `inst.i32` to X86_64 with and without REX.
+ /// Add encodings for `inst.i64` to X86_64 with a REX prefix, using the `w_bit`
+ /// argument to determine whether or not to set the REX.W bit.
+ fn enc_i32_i64_ld_st(&mut self, inst: &Instruction, w_bit: bool, template: Template) {
+ self.enc32(inst.clone().bind(I32).bind(Any), template.clone());
+
+ // REX-less encoding must come after REX encoding so we don't use it by
+ // default. Otherwise reg-alloc would never use r8 and up.
+ self.enc64(inst.clone().bind(I32).bind(Any), template.clone().rex());
+ self.enc64(inst.clone().bind(I32).bind(Any), template.clone());
+
+ if w_bit {
+ self.enc64(inst.clone().bind(I64).bind(Any), template.rex().w());
+ } else {
+ self.enc64(inst.clone().bind(I64).bind(Any), template.clone().rex());
+ self.enc64(inst.clone().bind(I64).bind(Any), template);
+ }
+ }
+
+ /// Add the same encoding/recipe pairing to both X86_32 and X86_64
+ fn enc_32_64_rec(
+ &mut self,
+ inst: impl Clone + Into<InstSpec>,
+ recipe: &EncodingRecipe,
+ bits: u16,
+ ) {
+ self.enc32_rec(inst.clone(), recipe, bits);
+ self.enc64_rec(inst, recipe, bits);
+ }
+
+ /// Add the same encoding to both X86_32 and X86_64; assumes configuration (e.g. REX, operand binding) has already happened
+ fn enc_32_64_func<T>(
+ &mut self,
+ inst: impl Clone + Into<InstSpec>,
+ template: Template,
+ builder_closure: T,
+ ) where
+ T: FnOnce(EncodingBuilder) -> EncodingBuilder,
+ {
+ let encoding = self.make_encoding(inst.into(), template, builder_closure);
+ self.enc32.push(encoding.clone());
+ self.enc64.push(encoding);
+ }
+
+ /// Add the same encoding to both X86_32 and X86_64; assumes configuration (e.g. REX, operand
+ /// binding) has already happened.
+ fn enc_32_64_maybe_isap(
+ &mut self,
+ inst: impl Clone + Into<InstSpec>,
+ template: Template,
+ isap: Option<SettingPredicateNumber>,
+ ) {
+ self.enc32_maybe_isap(inst.clone(), template.clone(), isap);
+ self.enc64_maybe_isap(inst, template, isap);
+ }
+
+ fn enc32_maybe_isap(
+ &mut self,
+ inst: impl Into<InstSpec>,
+ template: Template,
+ isap: Option<SettingPredicateNumber>,
+ ) {
+ match isap {
+ None => self.enc32(inst, template),
+ Some(isap) => self.enc32_isap(inst, template, isap),
+ }
+ }
+
+ fn enc64_maybe_isap(
+ &mut self,
+ inst: impl Into<InstSpec>,
+ template: Template,
+ isap: Option<SettingPredicateNumber>,
+ ) {
+ match isap {
+ None => self.enc64(inst, template),
+ Some(isap) => self.enc64_isap(inst, template, isap),
+ }
+ }
+}
+
+// Definitions.
+
+#[inline(never)]
+fn define_moves(e: &mut PerCpuModeEncodings, shared_defs: &SharedDefinitions, r: &RecipeGroup) {
+ let shared = &shared_defs.instructions;
+ let formats = &shared_defs.formats;
+
+ // Shorthands for instructions.
+ let bconst = shared.by_name("bconst");
+ let bint = shared.by_name("bint");
+ let copy = shared.by_name("copy");
+ let copy_special = shared.by_name("copy_special");
+ let copy_to_ssa = shared.by_name("copy_to_ssa");
+ let get_pinned_reg = shared.by_name("get_pinned_reg");
+ let iconst = shared.by_name("iconst");
+ let ireduce = shared.by_name("ireduce");
+ let regmove = shared.by_name("regmove");
+ let sextend = shared.by_name("sextend");
+ let set_pinned_reg = shared.by_name("set_pinned_reg");
+ let uextend = shared.by_name("uextend");
+ let dummy_sarg_t = shared.by_name("dummy_sarg_t");
+
+ // Shorthands for recipes.
+ let rec_copysp = r.template("copysp");
+ let rec_furm_reg_to_ssa = r.template("furm_reg_to_ssa");
+ let rec_get_pinned_reg = r.recipe("get_pinned_reg");
+ let rec_null = r.recipe("null");
+ let rec_pu_id = r.template("pu_id");
+ let rec_pu_id_bool = r.template("pu_id_bool");
+ let rec_pu_iq = r.template("pu_iq");
+ let rec_rmov = r.template("rmov");
+ let rec_set_pinned_reg = r.template("set_pinned_reg");
+ let rec_u_id = r.template("u_id");
+ let rec_u_id_z = r.template("u_id_z");
+ let rec_umr = r.template("umr");
+ let rec_umr_reg_to_ssa = r.template("umr_reg_to_ssa");
+ let rec_urm_noflags = r.template("urm_noflags");
+ let rec_urm_noflags_abcd = r.template("urm_noflags_abcd");
+ let rec_dummy_sarg_t = r.recipe("dummy_sarg_t");
+
+ // The pinned reg is fixed to a certain value entirely user-controlled, so it generates nothing!
+ e.enc64_rec(get_pinned_reg.bind(I64), rec_get_pinned_reg, 0);
+ e.enc_x86_64(
+ set_pinned_reg.bind(I64),
+ rec_set_pinned_reg.opcodes(&MOV_STORE).rex().w(),
+ );
+
+ e.enc_i32_i64(copy, rec_umr.opcodes(&MOV_STORE));
+ e.enc_r32_r64_rex_only(copy, rec_umr.opcodes(&MOV_STORE));
+ e.enc_both(copy.bind(B1), rec_umr.opcodes(&MOV_STORE));
+ e.enc_both(copy.bind(I8), rec_umr.opcodes(&MOV_STORE));
+ e.enc_both(copy.bind(I16), rec_umr.opcodes(&MOV_STORE));
+
+ // TODO For x86-64, only define REX forms for now, since we can't describe the
+ // special regunit immediate operands with the current constraint language.
+ for &ty in &[I8, I16, I32] {
+ e.enc32(regmove.bind(ty), rec_rmov.opcodes(&MOV_STORE));
+ e.enc64(regmove.bind(ty), rec_rmov.opcodes(&MOV_STORE).rex());
+ }
+ for &ty in &[B8, B16, B32] {
+ e.enc32(regmove.bind(ty), rec_rmov.opcodes(&MOV_STORE));
+ e.enc64(regmove.bind(ty), rec_rmov.opcodes(&MOV_STORE).rex());
+ }
+ e.enc64(regmove.bind(I64), rec_rmov.opcodes(&MOV_STORE).rex().w());
+ e.enc_both(regmove.bind(B1), rec_rmov.opcodes(&MOV_STORE));
+ e.enc_both(regmove.bind(I8), rec_rmov.opcodes(&MOV_STORE));
+ e.enc32(regmove.bind(R32), rec_rmov.opcodes(&MOV_STORE));
+ e.enc64(regmove.bind(R32), rec_rmov.opcodes(&MOV_STORE).rex());
+ e.enc64(regmove.bind(R64), rec_rmov.opcodes(&MOV_STORE).rex().w());
+
+ // Immediate constants.
+ e.enc32(iconst.bind(I32), rec_pu_id.opcodes(&MOV_IMM));
+
+ e.enc64(iconst.bind(I32), rec_pu_id.rex().opcodes(&MOV_IMM));
+ e.enc64(iconst.bind(I32), rec_pu_id.opcodes(&MOV_IMM));
+
+ // The 32-bit immediate movl also zero-extends to 64 bits.
+ let is_unsigned_int32 =
+ InstructionPredicate::new_is_unsigned_int(&*formats.unary_imm, "imm", 32, 0);
+
+ e.enc64_func(
+ iconst.bind(I64),
+ rec_pu_id.opcodes(&MOV_IMM).rex(),
+ |encoding| encoding.inst_predicate(is_unsigned_int32.clone()),
+ );
+ e.enc64_func(iconst.bind(I64), rec_pu_id.opcodes(&MOV_IMM), |encoding| {
+ encoding.inst_predicate(is_unsigned_int32)
+ });
+
+ // Sign-extended 32-bit immediate.
+ e.enc64(
+ iconst.bind(I64),
+ rec_u_id.rex().opcodes(&MOV_IMM_SIGNEXTEND).rrr(0).w(),
+ );
+
+ // Finally, the MOV_IMM opcode takes an 8-byte immediate with a REX.W prefix.
+ e.enc64(iconst.bind(I64), rec_pu_iq.opcodes(&MOV_IMM).rex().w());
+
+ // Bool constants (uses MOV)
+ for &ty in &[B1, B8, B16, B32] {
+ e.enc_both(bconst.bind(ty), rec_pu_id_bool.opcodes(&MOV_IMM));
+ }
+ e.enc64(bconst.bind(B64), rec_pu_id_bool.opcodes(&MOV_IMM).rex());
+
+ let is_zero_int = InstructionPredicate::new_is_zero_int(&formats.unary_imm, "imm");
+ e.enc_both_instp(
+ iconst.bind(I8),
+ rec_u_id_z.opcodes(&XORB),
+ is_zero_int.clone(),
+ );
+
+ // You may expect that i16 encodings would have an 0x66 prefix on the opcode to indicate that
+ // encodings should be on 16-bit operands (f.ex, "xor %ax, %ax"). Cranelift currently does not
+ // know that it can drop the 0x66 prefix and clear the upper half of a 32-bit register in these
+ // scenarios, so we explicitly select a wider but permissible opcode.
+ //
+ // This effectively formalizes the i16->i32 widening that Cranelift performs when there isn't
+ // an appropriate i16 encoding available.
+ e.enc_both_instp(
+ iconst.bind(I16),
+ rec_u_id_z.opcodes(&XOR),
+ is_zero_int.clone(),
+ );
+ e.enc_both_instp(
+ iconst.bind(I32),
+ rec_u_id_z.opcodes(&XOR),
+ is_zero_int.clone(),
+ );
+ e.enc_x86_64_instp(iconst.bind(I64), rec_u_id_z.opcodes(&XOR), is_zero_int);
+
+ // Numerical conversions.
+
+ // Reducing an integer is a no-op.
+ e.enc32_rec(ireduce.bind(I8).bind(I16), rec_null, 0);
+ e.enc32_rec(ireduce.bind(I8).bind(I32), rec_null, 0);
+ e.enc32_rec(ireduce.bind(I16).bind(I32), rec_null, 0);
+
+ e.enc64_rec(ireduce.bind(I8).bind(I16), rec_null, 0);
+ e.enc64_rec(ireduce.bind(I8).bind(I32), rec_null, 0);
+ e.enc64_rec(ireduce.bind(I16).bind(I32), rec_null, 0);
+ e.enc64_rec(ireduce.bind(I8).bind(I64), rec_null, 0);
+ e.enc64_rec(ireduce.bind(I16).bind(I64), rec_null, 0);
+ e.enc64_rec(ireduce.bind(I32).bind(I64), rec_null, 0);
+
+ // TODO: Add encodings for cbw, cwde, cdqe, which are sign-extending
+ // instructions for %al/%ax/%eax to %ax/%eax/%rax.
+
+ // movsbl
+ e.enc32(
+ sextend.bind(I32).bind(I8),
+ rec_urm_noflags_abcd.opcodes(&MOVSX_BYTE),
+ );
+ e.enc64(
+ sextend.bind(I32).bind(I8),
+ rec_urm_noflags.opcodes(&MOVSX_BYTE).rex(),
+ );
+ e.enc64(
+ sextend.bind(I32).bind(I8),
+ rec_urm_noflags_abcd.opcodes(&MOVSX_BYTE),
+ );
+
+ // movswl
+ e.enc32(
+ sextend.bind(I32).bind(I16),
+ rec_urm_noflags.opcodes(&MOVSX_WORD),
+ );
+ e.enc64(
+ sextend.bind(I32).bind(I16),
+ rec_urm_noflags.opcodes(&MOVSX_WORD).rex(),
+ );
+ e.enc64(
+ sextend.bind(I32).bind(I16),
+ rec_urm_noflags.opcodes(&MOVSX_WORD),
+ );
+
+ // movsbq
+ e.enc64(
+ sextend.bind(I64).bind(I8),
+ rec_urm_noflags.opcodes(&MOVSX_BYTE).rex().w(),
+ );
+
+ // movswq
+ e.enc64(
+ sextend.bind(I64).bind(I16),
+ rec_urm_noflags.opcodes(&MOVSX_WORD).rex().w(),
+ );
+
+ // movslq
+ e.enc64(
+ sextend.bind(I64).bind(I32),
+ rec_urm_noflags.opcodes(&MOVSXD).rex().w(),
+ );
+
+ // movzbl
+ e.enc32(
+ uextend.bind(I32).bind(I8),
+ rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE),
+ );
+ e.enc64(
+ uextend.bind(I32).bind(I8),
+ rec_urm_noflags.opcodes(&MOVZX_BYTE).rex(),
+ );
+ e.enc64(
+ uextend.bind(I32).bind(I8),
+ rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE),
+ );
+
+ // movzwl
+ e.enc32(
+ uextend.bind(I32).bind(I16),
+ rec_urm_noflags.opcodes(&MOVZX_WORD),
+ );
+ e.enc64(
+ uextend.bind(I32).bind(I16),
+ rec_urm_noflags.opcodes(&MOVZX_WORD).rex(),
+ );
+ e.enc64(
+ uextend.bind(I32).bind(I16),
+ rec_urm_noflags.opcodes(&MOVZX_WORD),
+ );
+
+ // movzbq, encoded as movzbl because it's equivalent and shorter.
+ e.enc64(
+ uextend.bind(I64).bind(I8),
+ rec_urm_noflags.opcodes(&MOVZX_BYTE).rex(),
+ );
+ e.enc64(
+ uextend.bind(I64).bind(I8),
+ rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE),
+ );
+
+ // movzwq, encoded as movzwl because it's equivalent and shorter
+ e.enc64(
+ uextend.bind(I64).bind(I16),
+ rec_urm_noflags.opcodes(&MOVZX_WORD).rex(),
+ );
+ e.enc64(
+ uextend.bind(I64).bind(I16),
+ rec_urm_noflags.opcodes(&MOVZX_WORD),
+ );
+
+ // A 32-bit register copy clears the high 32 bits.
+ e.enc64(
+ uextend.bind(I64).bind(I32),
+ rec_umr.opcodes(&MOV_STORE).rex(),
+ );
+ e.enc64(uextend.bind(I64).bind(I32), rec_umr.opcodes(&MOV_STORE));
+
+ // Convert bool to int.
+ //
+ // This assumes that b1 is represented as an 8-bit low register with the value 0
+ // or 1.
+ //
+ // Encode movzbq as movzbl, because it's equivalent and shorter.
+ for &to in &[I8, I16, I32, I64] {
+ for &from in &[B1, B8] {
+ e.enc64(
+ bint.bind(to).bind(from),
+ rec_urm_noflags.opcodes(&MOVZX_BYTE).rex(),
+ );
+ e.enc64(
+ bint.bind(to).bind(from),
+ rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE),
+ );
+ if to != I64 {
+ e.enc32(
+ bint.bind(to).bind(from),
+ rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE),
+ );
+ }
+ }
+ }
+ for (to, from) in &[(I16, B16), (I32, B32), (I64, B64)] {
+ e.enc_both(
+ bint.bind(*to).bind(*from),
+ rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE),
+ );
+ }
+
+ // Copy Special
+ // For x86-64, only define REX forms for now, since we can't describe the
+ // special regunit immediate operands with the current constraint language.
+ e.enc64(copy_special, rec_copysp.opcodes(&MOV_STORE).rex().w());
+ e.enc32(copy_special, rec_copysp.opcodes(&MOV_STORE));
+
+ // Copy to SSA. These have to be done with special _rex_only encoders, because the standard
+ // machinery for deciding whether a REX.{RXB} prefix is needed doesn't take into account
+ // the source register, which is specified directly in the instruction.
+ e.enc_i32_i64_rex_only(copy_to_ssa, rec_umr_reg_to_ssa.opcodes(&MOV_STORE));
+ e.enc_r32_r64_rex_only(copy_to_ssa, rec_umr_reg_to_ssa.opcodes(&MOV_STORE));
+ e.enc_both_rex_only(copy_to_ssa.bind(B1), rec_umr_reg_to_ssa.opcodes(&MOV_STORE));
+ e.enc_both_rex_only(copy_to_ssa.bind(I8), rec_umr_reg_to_ssa.opcodes(&MOV_STORE));
+ e.enc_both_rex_only(
+ copy_to_ssa.bind(I16),
+ rec_umr_reg_to_ssa.opcodes(&MOV_STORE),
+ );
+ e.enc_both_rex_only(
+ copy_to_ssa.bind(F64),
+ rec_furm_reg_to_ssa.opcodes(&MOVSD_LOAD),
+ );
+ e.enc_both_rex_only(
+ copy_to_ssa.bind(F32),
+ rec_furm_reg_to_ssa.opcodes(&MOVSS_LOAD),
+ );
+
+ e.enc_32_64_rec(dummy_sarg_t, rec_dummy_sarg_t, 0);
+}
+
+#[inline(never)]
+fn define_memory(
+ e: &mut PerCpuModeEncodings,
+ shared_defs: &SharedDefinitions,
+ x86: &InstructionGroup,
+ r: &RecipeGroup,
+) {
+ let shared = &shared_defs.instructions;
+ let formats = &shared_defs.formats;
+
+ // Shorthands for instructions.
+ let adjust_sp_down = shared.by_name("adjust_sp_down");
+ let adjust_sp_down_imm = shared.by_name("adjust_sp_down_imm");
+ let adjust_sp_up_imm = shared.by_name("adjust_sp_up_imm");
+ let copy_nop = shared.by_name("copy_nop");
+ let fill = shared.by_name("fill");
+ let fill_nop = shared.by_name("fill_nop");
+ let istore16 = shared.by_name("istore16");
+ let istore16_complex = shared.by_name("istore16_complex");
+ let istore32 = shared.by_name("istore32");
+ let istore32_complex = shared.by_name("istore32_complex");
+ let istore8 = shared.by_name("istore8");
+ let istore8_complex = shared.by_name("istore8_complex");
+ let load = shared.by_name("load");
+ let load_complex = shared.by_name("load_complex");
+ let regfill = shared.by_name("regfill");
+ let regspill = shared.by_name("regspill");
+ let sload16 = shared.by_name("sload16");
+ let sload16_complex = shared.by_name("sload16_complex");
+ let sload32 = shared.by_name("sload32");
+ let sload32_complex = shared.by_name("sload32_complex");
+ let sload8 = shared.by_name("sload8");
+ let sload8_complex = shared.by_name("sload8_complex");
+ let spill = shared.by_name("spill");
+ let store = shared.by_name("store");
+ let store_complex = shared.by_name("store_complex");
+ let uload16 = shared.by_name("uload16");
+ let uload16_complex = shared.by_name("uload16_complex");
+ let uload32 = shared.by_name("uload32");
+ let uload32_complex = shared.by_name("uload32_complex");
+ let uload8 = shared.by_name("uload8");
+ let uload8_complex = shared.by_name("uload8_complex");
+ let x86_pop = x86.by_name("x86_pop");
+ let x86_push = x86.by_name("x86_push");
+
+ // Shorthands for recipes.
+ let rec_adjustsp = r.template("adjustsp");
+ let rec_adjustsp_ib = r.template("adjustsp_ib");
+ let rec_adjustsp_id = r.template("adjustsp_id");
+ let rec_ffillnull = r.recipe("ffillnull");
+ let rec_fillnull = r.recipe("fillnull");
+ let rec_fillSib32 = r.template("fillSib32");
+ let rec_ld = r.template("ld");
+ let rec_ldDisp32 = r.template("ldDisp32");
+ let rec_ldDisp8 = r.template("ldDisp8");
+ let rec_ldWithIndex = r.template("ldWithIndex");
+ let rec_ldWithIndexDisp32 = r.template("ldWithIndexDisp32");
+ let rec_ldWithIndexDisp8 = r.template("ldWithIndexDisp8");
+ let rec_popq = r.template("popq");
+ let rec_pushq = r.template("pushq");
+ let rec_regfill32 = r.template("regfill32");
+ let rec_regspill32 = r.template("regspill32");
+ let rec_spillSib32 = r.template("spillSib32");
+ let rec_st = r.template("st");
+ let rec_stacknull = r.recipe("stacknull");
+ let rec_stDisp32 = r.template("stDisp32");
+ let rec_stDisp32_abcd = r.template("stDisp32_abcd");
+ let rec_stDisp8 = r.template("stDisp8");
+ let rec_stDisp8_abcd = r.template("stDisp8_abcd");
+ let rec_stWithIndex = r.template("stWithIndex");
+ let rec_stWithIndexDisp32 = r.template("stWithIndexDisp32");
+ let rec_stWithIndexDisp32_abcd = r.template("stWithIndexDisp32_abcd");
+ let rec_stWithIndexDisp8 = r.template("stWithIndexDisp8");
+ let rec_stWithIndexDisp8_abcd = r.template("stWithIndexDisp8_abcd");
+ let rec_stWithIndex_abcd = r.template("stWithIndex_abcd");
+ let rec_st_abcd = r.template("st_abcd");
+
+ // Loads and stores.
+ let is_load_complex_length_two =
+ InstructionPredicate::new_length_equals(&*formats.load_complex, 2);
+
+ for recipe in &[rec_ldWithIndex, rec_ldWithIndexDisp8, rec_ldWithIndexDisp32] {
+ e.enc_i32_i64_instp(
+ load_complex,
+ recipe.opcodes(&MOV_LOAD),
+ is_load_complex_length_two.clone(),
+ );
+ e.enc_r32_r64_instp(
+ load_complex,
+ recipe.opcodes(&MOV_LOAD),
+ is_load_complex_length_two.clone(),
+ );
+ e.enc_x86_64_instp(
+ uload32_complex,
+ recipe.opcodes(&MOV_LOAD),
+ is_load_complex_length_two.clone(),
+ );
+
+ e.enc64_instp(
+ sload32_complex,
+ recipe.opcodes(&MOVSXD).rex().w(),
+ is_load_complex_length_two.clone(),
+ );
+
+ e.enc_i32_i64_instp(
+ uload16_complex,
+ recipe.opcodes(&MOVZX_WORD),
+ is_load_complex_length_two.clone(),
+ );
+ e.enc_i32_i64_instp(
+ sload16_complex,
+ recipe.opcodes(&MOVSX_WORD),
+ is_load_complex_length_two.clone(),
+ );
+
+ e.enc_i32_i64_instp(
+ uload8_complex,
+ recipe.opcodes(&MOVZX_BYTE),
+ is_load_complex_length_two.clone(),
+ );
+
+ e.enc_i32_i64_instp(
+ sload8_complex,
+ recipe.opcodes(&MOVSX_BYTE),
+ is_load_complex_length_two.clone(),
+ );
+ }
+
+ let is_store_complex_length_three =
+ InstructionPredicate::new_length_equals(&*formats.store_complex, 3);
+
+ for recipe in &[rec_stWithIndex, rec_stWithIndexDisp8, rec_stWithIndexDisp32] {
+ e.enc_i32_i64_instp(
+ store_complex,
+ recipe.opcodes(&MOV_STORE),
+ is_store_complex_length_three.clone(),
+ );
+ e.enc_r32_r64_instp(
+ store_complex,
+ recipe.opcodes(&MOV_STORE),
+ is_store_complex_length_three.clone(),
+ );
+ e.enc_x86_64_instp(
+ istore32_complex,
+ recipe.opcodes(&MOV_STORE),
+ is_store_complex_length_three.clone(),
+ );
+ e.enc_both_instp(
+ istore16_complex.bind(I32),
+ recipe.opcodes(&MOV_STORE_16),
+ is_store_complex_length_three.clone(),
+ );
+ e.enc_x86_64_instp(
+ istore16_complex.bind(I64),
+ recipe.opcodes(&MOV_STORE_16),
+ is_store_complex_length_three.clone(),
+ );
+ }
+
+ for recipe in &[
+ rec_stWithIndex_abcd,
+ rec_stWithIndexDisp8_abcd,
+ rec_stWithIndexDisp32_abcd,
+ ] {
+ e.enc_both_instp(
+ istore8_complex.bind(I32),
+ recipe.opcodes(&MOV_BYTE_STORE),
+ is_store_complex_length_three.clone(),
+ );
+ e.enc_x86_64_instp(
+ istore8_complex.bind(I64),
+ recipe.opcodes(&MOV_BYTE_STORE),
+ is_store_complex_length_three.clone(),
+ );
+ }
+
+ for recipe in &[rec_st, rec_stDisp8, rec_stDisp32] {
+ e.enc_i32_i64_ld_st(store, true, recipe.opcodes(&MOV_STORE));
+ e.enc_r32_r64_ld_st(store, true, recipe.opcodes(&MOV_STORE));
+ e.enc_x86_64(istore32.bind(I64).bind(Any), recipe.opcodes(&MOV_STORE));
+ e.enc_i32_i64_ld_st(istore16, false, recipe.opcodes(&MOV_STORE_16));
+ }
+
+ // Byte stores are more complicated because the registers they can address
+ // depends of the presence of a REX prefix. The st*_abcd recipes fall back to
+ // the corresponding st* recipes when a REX prefix is applied.
+
+ for recipe in &[rec_st_abcd, rec_stDisp8_abcd, rec_stDisp32_abcd] {
+ e.enc_both(istore8.bind(I32).bind(Any), recipe.opcodes(&MOV_BYTE_STORE));
+ e.enc_x86_64(istore8.bind(I64).bind(Any), recipe.opcodes(&MOV_BYTE_STORE));
+ }
+
+ e.enc_i32_i64_explicit_rex(spill, rec_spillSib32.opcodes(&MOV_STORE));
+ e.enc_i32_i64_explicit_rex(regspill, rec_regspill32.opcodes(&MOV_STORE));
+ e.enc_r32_r64_rex_only(spill, rec_spillSib32.opcodes(&MOV_STORE));
+ e.enc_r32_r64_rex_only(regspill, rec_regspill32.opcodes(&MOV_STORE));
+
+ // Use a 32-bit write for spilling `b1`, `i8` and `i16` to avoid
+ // constraining the permitted registers.
+ // See MIN_SPILL_SLOT_SIZE which makes this safe.
+
+ e.enc_both(spill.bind(B1), rec_spillSib32.opcodes(&MOV_STORE));
+ e.enc_both(regspill.bind(B1), rec_regspill32.opcodes(&MOV_STORE));
+ for &ty in &[I8, I16] {
+ e.enc_both(spill.bind(ty), rec_spillSib32.opcodes(&MOV_STORE));
+ e.enc_both(regspill.bind(ty), rec_regspill32.opcodes(&MOV_STORE));
+ }
+
+ for recipe in &[rec_ld, rec_ldDisp8, rec_ldDisp32] {
+ e.enc_i32_i64_ld_st(load, true, recipe.opcodes(&MOV_LOAD));
+ e.enc_r32_r64_ld_st(load, true, recipe.opcodes(&MOV_LOAD));
+ e.enc_x86_64(uload32.bind(I64), recipe.opcodes(&MOV_LOAD));
+ e.enc64(sload32.bind(I64), recipe.opcodes(&MOVSXD).rex().w());
+ e.enc_i32_i64_ld_st(uload16, true, recipe.opcodes(&MOVZX_WORD));
+ e.enc_i32_i64_ld_st(sload16, true, recipe.opcodes(&MOVSX_WORD));
+ e.enc_i32_i64_ld_st(uload8, true, recipe.opcodes(&MOVZX_BYTE));
+ e.enc_i32_i64_ld_st(sload8, true, recipe.opcodes(&MOVSX_BYTE));
+ }
+
+ e.enc_i32_i64_explicit_rex(fill, rec_fillSib32.opcodes(&MOV_LOAD));
+ e.enc_i32_i64_explicit_rex(regfill, rec_regfill32.opcodes(&MOV_LOAD));
+ e.enc_r32_r64_rex_only(fill, rec_fillSib32.opcodes(&MOV_LOAD));
+ e.enc_r32_r64_rex_only(regfill, rec_regfill32.opcodes(&MOV_LOAD));
+
+ // No-op fills, created by late-stage redundant-fill removal.
+ for &ty in &[I64, I32, I16, I8] {
+ e.enc64_rec(fill_nop.bind(ty), rec_fillnull, 0);
+ e.enc32_rec(fill_nop.bind(ty), rec_fillnull, 0);
+ }
+ e.enc64_rec(fill_nop.bind(B1), rec_fillnull, 0);
+ e.enc32_rec(fill_nop.bind(B1), rec_fillnull, 0);
+ for &ty in &[F64, F32] {
+ e.enc64_rec(fill_nop.bind(ty), rec_ffillnull, 0);
+ e.enc32_rec(fill_nop.bind(ty), rec_ffillnull, 0);
+ }
+ for &ty in &[R64, R32] {
+ e.enc64_rec(fill_nop.bind(ty), rec_fillnull, 0);
+ e.enc32_rec(fill_nop.bind(ty), rec_fillnull, 0);
+ }
+
+ // Load 32 bits from `b1`, `i8` and `i16` spill slots. See `spill.b1` above.
+
+ e.enc_both(fill.bind(B1), rec_fillSib32.opcodes(&MOV_LOAD));
+ e.enc_both(regfill.bind(B1), rec_regfill32.opcodes(&MOV_LOAD));
+ for &ty in &[I8, I16] {
+ e.enc_both(fill.bind(ty), rec_fillSib32.opcodes(&MOV_LOAD));
+ e.enc_both(regfill.bind(ty), rec_regfill32.opcodes(&MOV_LOAD));
+ }
+
+ // Push and Pop.
+ e.enc32(x86_push.bind(I32), rec_pushq.opcodes(&PUSH_REG));
+ e.enc_x86_64(x86_push.bind(I64), rec_pushq.opcodes(&PUSH_REG));
+
+ e.enc32(x86_pop.bind(I32), rec_popq.opcodes(&POP_REG));
+ e.enc_x86_64(x86_pop.bind(I64), rec_popq.opcodes(&POP_REG));
+
+ // Stack-slot-to-the-same-stack-slot copy, which is guaranteed to turn
+ // into a no-op.
+ // The same encoding is generated for both the 64- and 32-bit architectures.
+ for &ty in &[I64, I32, I16, I8] {
+ e.enc64_rec(copy_nop.bind(ty), rec_stacknull, 0);
+ e.enc32_rec(copy_nop.bind(ty), rec_stacknull, 0);
+ }
+ for &ty in &[F64, F32] {
+ e.enc64_rec(copy_nop.bind(ty), rec_stacknull, 0);
+ e.enc32_rec(copy_nop.bind(ty), rec_stacknull, 0);
+ }
+
+ // Adjust SP down by a dynamic value (or up, with a negative operand).
+ e.enc32(adjust_sp_down.bind(I32), rec_adjustsp.opcodes(&SUB));
+ e.enc64(
+ adjust_sp_down.bind(I64),
+ rec_adjustsp.opcodes(&SUB).rex().w(),
+ );
+
+ // Adjust SP up by an immediate (or down, with a negative immediate).
+ e.enc32(adjust_sp_up_imm, rec_adjustsp_ib.opcodes(&CMP_IMM8));
+ e.enc32(adjust_sp_up_imm, rec_adjustsp_id.opcodes(&CMP_IMM));
+ e.enc64(
+ adjust_sp_up_imm,
+ rec_adjustsp_ib.opcodes(&CMP_IMM8).rex().w(),
+ );
+ e.enc64(
+ adjust_sp_up_imm,
+ rec_adjustsp_id.opcodes(&CMP_IMM).rex().w(),
+ );
+
+ // Adjust SP down by an immediate (or up, with a negative immediate).
+ e.enc32(
+ adjust_sp_down_imm,
+ rec_adjustsp_ib.opcodes(&CMP_IMM8).rrr(5),
+ );
+ e.enc32(adjust_sp_down_imm, rec_adjustsp_id.opcodes(&CMP_IMM).rrr(5));
+ e.enc64(
+ adjust_sp_down_imm,
+ rec_adjustsp_ib.opcodes(&CMP_IMM8).rrr(5).rex().w(),
+ );
+ e.enc64(
+ adjust_sp_down_imm,
+ rec_adjustsp_id.opcodes(&CMP_IMM).rrr(5).rex().w(),
+ );
+}
+
+#[inline(never)]
+fn define_fpu_moves(e: &mut PerCpuModeEncodings, shared_defs: &SharedDefinitions, r: &RecipeGroup) {
+ let shared = &shared_defs.instructions;
+
+ // Shorthands for instructions.
+ let bitcast = shared.by_name("bitcast");
+ let copy = shared.by_name("copy");
+ let regmove = shared.by_name("regmove");
+
+ // Shorthands for recipes.
+ let rec_frmov = r.template("frmov");
+ let rec_frurm = r.template("frurm");
+ let rec_furm = r.template("furm");
+ let rec_rfumr = r.template("rfumr");
+
+ // Floating-point moves.
+ // movd
+ e.enc_both(
+ bitcast.bind(F32).bind(I32),
+ rec_frurm.opcodes(&MOVD_LOAD_XMM),
+ );
+ e.enc_both(
+ bitcast.bind(I32).bind(F32),
+ rec_rfumr.opcodes(&MOVD_STORE_XMM),
+ );
+
+ // movq
+ e.enc64(
+ bitcast.bind(F64).bind(I64),
+ rec_frurm.opcodes(&MOVD_LOAD_XMM).rex().w(),
+ );
+ e.enc64(
+ bitcast.bind(I64).bind(F64),
+ rec_rfumr.opcodes(&MOVD_STORE_XMM).rex().w(),
+ );
+
+ // movaps
+ e.enc_both(copy.bind(F32), rec_furm.opcodes(&MOVAPS_LOAD));
+ e.enc_both(copy.bind(F64), rec_furm.opcodes(&MOVAPS_LOAD));
+
+ // TODO For x86-64, only define REX forms for now, since we can't describe the special regunit
+ // immediate operands with the current constraint language.
+ e.enc32(regmove.bind(F32), rec_frmov.opcodes(&MOVAPS_LOAD));
+ e.enc64(regmove.bind(F32), rec_frmov.opcodes(&MOVAPS_LOAD).rex());
+
+ // TODO For x86-64, only define REX forms for now, since we can't describe the special regunit
+ // immediate operands with the current constraint language.
+ e.enc32(regmove.bind(F64), rec_frmov.opcodes(&MOVAPS_LOAD));
+ e.enc64(regmove.bind(F64), rec_frmov.opcodes(&MOVAPS_LOAD).rex());
+}
+
+#[inline(never)]
+fn define_fpu_memory(
+ e: &mut PerCpuModeEncodings,
+ shared_defs: &SharedDefinitions,
+ r: &RecipeGroup,
+) {
+ let shared = &shared_defs.instructions;
+
+ // Shorthands for instructions.
+ let fill = shared.by_name("fill");
+ let load = shared.by_name("load");
+ let load_complex = shared.by_name("load_complex");
+ let regfill = shared.by_name("regfill");
+ let regspill = shared.by_name("regspill");
+ let spill = shared.by_name("spill");
+ let store = shared.by_name("store");
+ let store_complex = shared.by_name("store_complex");
+
+ // Shorthands for recipes.
+ let rec_ffillSib32 = r.template("ffillSib32");
+ let rec_fld = r.template("fld");
+ let rec_fldDisp32 = r.template("fldDisp32");
+ let rec_fldDisp8 = r.template("fldDisp8");
+ let rec_fldWithIndex = r.template("fldWithIndex");
+ let rec_fldWithIndexDisp32 = r.template("fldWithIndexDisp32");
+ let rec_fldWithIndexDisp8 = r.template("fldWithIndexDisp8");
+ let rec_fregfill32 = r.template("fregfill32");
+ let rec_fregspill32 = r.template("fregspill32");
+ let rec_fspillSib32 = r.template("fspillSib32");
+ let rec_fst = r.template("fst");
+ let rec_fstDisp32 = r.template("fstDisp32");
+ let rec_fstDisp8 = r.template("fstDisp8");
+ let rec_fstWithIndex = r.template("fstWithIndex");
+ let rec_fstWithIndexDisp32 = r.template("fstWithIndexDisp32");
+ let rec_fstWithIndexDisp8 = r.template("fstWithIndexDisp8");
+
+ // Float loads and stores.
+ e.enc_both(load.bind(F32).bind(Any), rec_fld.opcodes(&MOVSS_LOAD));
+ e.enc_both(load.bind(F32).bind(Any), rec_fldDisp8.opcodes(&MOVSS_LOAD));
+ e.enc_both(load.bind(F32).bind(Any), rec_fldDisp32.opcodes(&MOVSS_LOAD));
+
+ e.enc_both(
+ load_complex.bind(F32),
+ rec_fldWithIndex.opcodes(&MOVSS_LOAD),
+ );
+ e.enc_both(
+ load_complex.bind(F32),
+ rec_fldWithIndexDisp8.opcodes(&MOVSS_LOAD),
+ );
+ e.enc_both(
+ load_complex.bind(F32),
+ rec_fldWithIndexDisp32.opcodes(&MOVSS_LOAD),
+ );
+
+ e.enc_both(load.bind(F64).bind(Any), rec_fld.opcodes(&MOVSD_LOAD));
+ e.enc_both(load.bind(F64).bind(Any), rec_fldDisp8.opcodes(&MOVSD_LOAD));
+ e.enc_both(load.bind(F64).bind(Any), rec_fldDisp32.opcodes(&MOVSD_LOAD));
+
+ e.enc_both(
+ load_complex.bind(F64),
+ rec_fldWithIndex.opcodes(&MOVSD_LOAD),
+ );
+ e.enc_both(
+ load_complex.bind(F64),
+ rec_fldWithIndexDisp8.opcodes(&MOVSD_LOAD),
+ );
+ e.enc_both(
+ load_complex.bind(F64),
+ rec_fldWithIndexDisp32.opcodes(&MOVSD_LOAD),
+ );
+
+ e.enc_both(store.bind(F32).bind(Any), rec_fst.opcodes(&MOVSS_STORE));
+ e.enc_both(
+ store.bind(F32).bind(Any),
+ rec_fstDisp8.opcodes(&MOVSS_STORE),
+ );
+ e.enc_both(
+ store.bind(F32).bind(Any),
+ rec_fstDisp32.opcodes(&MOVSS_STORE),
+ );
+
+ e.enc_both(
+ store_complex.bind(F32),
+ rec_fstWithIndex.opcodes(&MOVSS_STORE),
+ );
+ e.enc_both(
+ store_complex.bind(F32),
+ rec_fstWithIndexDisp8.opcodes(&MOVSS_STORE),
+ );
+ e.enc_both(
+ store_complex.bind(F32),
+ rec_fstWithIndexDisp32.opcodes(&MOVSS_STORE),
+ );
+
+ e.enc_both(store.bind(F64).bind(Any), rec_fst.opcodes(&MOVSD_STORE));
+ e.enc_both(
+ store.bind(F64).bind(Any),
+ rec_fstDisp8.opcodes(&MOVSD_STORE),
+ );
+ e.enc_both(
+ store.bind(F64).bind(Any),
+ rec_fstDisp32.opcodes(&MOVSD_STORE),
+ );
+
+ e.enc_both(
+ store_complex.bind(F64),
+ rec_fstWithIndex.opcodes(&MOVSD_STORE),
+ );
+ e.enc_both(
+ store_complex.bind(F64),
+ rec_fstWithIndexDisp8.opcodes(&MOVSD_STORE),
+ );
+ e.enc_both(
+ store_complex.bind(F64),
+ rec_fstWithIndexDisp32.opcodes(&MOVSD_STORE),
+ );
+
+ e.enc_both(fill.bind(F32), rec_ffillSib32.opcodes(&MOVSS_LOAD));
+ e.enc_both(regfill.bind(F32), rec_fregfill32.opcodes(&MOVSS_LOAD));
+ e.enc_both(fill.bind(F64), rec_ffillSib32.opcodes(&MOVSD_LOAD));
+ e.enc_both(regfill.bind(F64), rec_fregfill32.opcodes(&MOVSD_LOAD));
+
+ e.enc_both(spill.bind(F32), rec_fspillSib32.opcodes(&MOVSS_STORE));
+ e.enc_both(regspill.bind(F32), rec_fregspill32.opcodes(&MOVSS_STORE));
+ e.enc_both(spill.bind(F64), rec_fspillSib32.opcodes(&MOVSD_STORE));
+ e.enc_both(regspill.bind(F64), rec_fregspill32.opcodes(&MOVSD_STORE));
+}
+
+#[inline(never)]
+fn define_fpu_ops(
+ e: &mut PerCpuModeEncodings,
+ shared_defs: &SharedDefinitions,
+ settings: &SettingGroup,
+ x86: &InstructionGroup,
+ r: &RecipeGroup,
+) {
+ let shared = &shared_defs.instructions;
+ let formats = &shared_defs.formats;
+
+ // Shorthands for instructions.
+ let ceil = shared.by_name("ceil");
+ let f32const = shared.by_name("f32const");
+ let f64const = shared.by_name("f64const");
+ let fadd = shared.by_name("fadd");
+ let fcmp = shared.by_name("fcmp");
+ let fcvt_from_sint = shared.by_name("fcvt_from_sint");
+ let fdemote = shared.by_name("fdemote");
+ let fdiv = shared.by_name("fdiv");
+ let ffcmp = shared.by_name("ffcmp");
+ let floor = shared.by_name("floor");
+ let fmul = shared.by_name("fmul");
+ let fpromote = shared.by_name("fpromote");
+ let fsub = shared.by_name("fsub");
+ let nearest = shared.by_name("nearest");
+ let sqrt = shared.by_name("sqrt");
+ let trunc = shared.by_name("trunc");
+ let x86_cvtt2si = x86.by_name("x86_cvtt2si");
+ let x86_fmax = x86.by_name("x86_fmax");
+ let x86_fmin = x86.by_name("x86_fmin");
+
+ // Shorthands for recipes.
+ let rec_f32imm_z = r.template("f32imm_z");
+ let rec_f64imm_z = r.template("f64imm_z");
+ let rec_fa = r.template("fa");
+ let rec_fcmp = r.template("fcmp");
+ let rec_fcscc = r.template("fcscc");
+ let rec_frurm = r.template("frurm");
+ let rec_furm = r.template("furm");
+ let rec_furmi_rnd = r.template("furmi_rnd");
+ let rec_rfurm = r.template("rfurm");
+
+ // Predicates shorthands.
+ let use_sse41 = settings.predicate_by_name("use_sse41");
+
+ // Floating-point constants equal to 0.0 can be encoded using either `xorps` or `xorpd`, for
+ // 32-bit and 64-bit floats respectively.
+ let is_zero_32_bit_float =
+ InstructionPredicate::new_is_zero_32bit_float(&*formats.unary_ieee32, "imm");
+ e.enc32_instp(
+ f32const,
+ rec_f32imm_z.opcodes(&XORPS),
+ is_zero_32_bit_float.clone(),
+ );
+
+ let is_zero_64_bit_float =
+ InstructionPredicate::new_is_zero_64bit_float(&*formats.unary_ieee64, "imm");
+ e.enc32_instp(
+ f64const,
+ rec_f64imm_z.opcodes(&XORPD),
+ is_zero_64_bit_float.clone(),
+ );
+
+ e.enc_x86_64_instp(f32const, rec_f32imm_z.opcodes(&XORPS), is_zero_32_bit_float);
+ e.enc_x86_64_instp(f64const, rec_f64imm_z.opcodes(&XORPD), is_zero_64_bit_float);
+
+ // cvtsi2ss
+ e.enc_i32_i64(fcvt_from_sint.bind(F32), rec_frurm.opcodes(&CVTSI2SS));
+
+ // cvtsi2sd
+ e.enc_i32_i64(fcvt_from_sint.bind(F64), rec_frurm.opcodes(&CVTSI2SD));
+
+ // cvtss2sd
+ e.enc_both(fpromote.bind(F64).bind(F32), rec_furm.opcodes(&CVTSS2SD));
+
+ // cvtsd2ss
+ e.enc_both(fdemote.bind(F32).bind(F64), rec_furm.opcodes(&CVTSD2SS));
+
+ // cvttss2si
+ e.enc_both(
+ x86_cvtt2si.bind(I32).bind(F32),
+ rec_rfurm.opcodes(&CVTTSS2SI),
+ );
+ e.enc64(
+ x86_cvtt2si.bind(I64).bind(F32),
+ rec_rfurm.opcodes(&CVTTSS2SI).rex().w(),
+ );
+
+ // cvttsd2si
+ e.enc_both(
+ x86_cvtt2si.bind(I32).bind(F64),
+ rec_rfurm.opcodes(&CVTTSD2SI),
+ );
+ e.enc64(
+ x86_cvtt2si.bind(I64).bind(F64),
+ rec_rfurm.opcodes(&CVTTSD2SI).rex().w(),
+ );
+
+ // Exact square roots.
+ e.enc_both(sqrt.bind(F32), rec_furm.opcodes(&SQRTSS));
+ e.enc_both(sqrt.bind(F64), rec_furm.opcodes(&SQRTSD));
+
+ // Rounding. The recipe looks at the opcode to pick an immediate.
+ for inst in &[nearest, floor, ceil, trunc] {
+ e.enc_both_isap(inst.bind(F32), rec_furmi_rnd.opcodes(&ROUNDSS), use_sse41);
+ e.enc_both_isap(inst.bind(F64), rec_furmi_rnd.opcodes(&ROUNDSD), use_sse41);
+ }
+
+ // Binary arithmetic ops.
+ e.enc_both(fadd.bind(F32), rec_fa.opcodes(&ADDSS));
+ e.enc_both(fadd.bind(F64), rec_fa.opcodes(&ADDSD));
+
+ e.enc_both(fsub.bind(F32), rec_fa.opcodes(&SUBSS));
+ e.enc_both(fsub.bind(F64), rec_fa.opcodes(&SUBSD));
+
+ e.enc_both(fmul.bind(F32), rec_fa.opcodes(&MULSS));
+ e.enc_both(fmul.bind(F64), rec_fa.opcodes(&MULSD));
+
+ e.enc_both(fdiv.bind(F32), rec_fa.opcodes(&DIVSS));
+ e.enc_both(fdiv.bind(F64), rec_fa.opcodes(&DIVSD));
+
+ e.enc_both(x86_fmin.bind(F32), rec_fa.opcodes(&MINSS));
+ e.enc_both(x86_fmin.bind(F64), rec_fa.opcodes(&MINSD));
+
+ e.enc_both(x86_fmax.bind(F32), rec_fa.opcodes(&MAXSS));
+ e.enc_both(x86_fmax.bind(F64), rec_fa.opcodes(&MAXSD));
+
+ // Comparisons.
+ //
+ // This only covers the condition codes in `supported_floatccs`, the rest are
+ // handled by legalization patterns.
+ e.enc_both(fcmp.bind(F32), rec_fcscc.opcodes(&UCOMISS));
+ e.enc_both(fcmp.bind(F64), rec_fcscc.opcodes(&UCOMISD));
+ e.enc_both(ffcmp.bind(F32), rec_fcmp.opcodes(&UCOMISS));
+ e.enc_both(ffcmp.bind(F64), rec_fcmp.opcodes(&UCOMISD));
+}
+
+#[inline(never)]
+fn define_alu(
+ e: &mut PerCpuModeEncodings,
+ shared_defs: &SharedDefinitions,
+ settings: &SettingGroup,
+ x86: &InstructionGroup,
+ r: &RecipeGroup,
+) {
+ let shared = &shared_defs.instructions;
+
+ // Shorthands for instructions.
+ let clz = shared.by_name("clz");
+ let ctz = shared.by_name("ctz");
+ let icmp = shared.by_name("icmp");
+ let icmp_imm = shared.by_name("icmp_imm");
+ let ifcmp = shared.by_name("ifcmp");
+ let ifcmp_imm = shared.by_name("ifcmp_imm");
+ let ifcmp_sp = shared.by_name("ifcmp_sp");
+ let ishl = shared.by_name("ishl");
+ let ishl_imm = shared.by_name("ishl_imm");
+ let popcnt = shared.by_name("popcnt");
+ let rotl = shared.by_name("rotl");
+ let rotl_imm = shared.by_name("rotl_imm");
+ let rotr = shared.by_name("rotr");
+ let rotr_imm = shared.by_name("rotr_imm");
+ let selectif = shared.by_name("selectif");
+ let selectif_spectre_guard = shared.by_name("selectif_spectre_guard");
+ let sshr = shared.by_name("sshr");
+ let sshr_imm = shared.by_name("sshr_imm");
+ let trueff = shared.by_name("trueff");
+ let trueif = shared.by_name("trueif");
+ let ushr = shared.by_name("ushr");
+ let ushr_imm = shared.by_name("ushr_imm");
+ let x86_bsf = x86.by_name("x86_bsf");
+ let x86_bsr = x86.by_name("x86_bsr");
+
+ // Shorthands for recipes.
+ let rec_bsf_and_bsr = r.template("bsf_and_bsr");
+ let rec_cmov = r.template("cmov");
+ let rec_icscc = r.template("icscc");
+ let rec_icscc_ib = r.template("icscc_ib");
+ let rec_icscc_id = r.template("icscc_id");
+ let rec_rcmp = r.template("rcmp");
+ let rec_rcmp_ib = r.template("rcmp_ib");
+ let rec_rcmp_id = r.template("rcmp_id");
+ let rec_rcmp_sp = r.template("rcmp_sp");
+ let rec_rc = r.template("rc");
+ let rec_setf_abcd = r.template("setf_abcd");
+ let rec_seti_abcd = r.template("seti_abcd");
+ let rec_urm = r.template("urm");
+
+ // Predicates shorthands.
+ let use_popcnt = settings.predicate_by_name("use_popcnt");
+ let use_lzcnt = settings.predicate_by_name("use_lzcnt");
+ let use_bmi1 = settings.predicate_by_name("use_bmi1");
+
+ let band = shared.by_name("band");
+ let band_imm = shared.by_name("band_imm");
+ let band_not = shared.by_name("band_not");
+ let bnot = shared.by_name("bnot");
+ let bor = shared.by_name("bor");
+ let bor_imm = shared.by_name("bor_imm");
+ let bxor = shared.by_name("bxor");
+ let bxor_imm = shared.by_name("bxor_imm");
+ let iadd = shared.by_name("iadd");
+ let iadd_ifcarry = shared.by_name("iadd_ifcarry");
+ let iadd_ifcin = shared.by_name("iadd_ifcin");
+ let iadd_ifcout = shared.by_name("iadd_ifcout");
+ let iadd_imm = shared.by_name("iadd_imm");
+ let imul = shared.by_name("imul");
+ let isub = shared.by_name("isub");
+ let isub_ifbin = shared.by_name("isub_ifbin");
+ let isub_ifborrow = shared.by_name("isub_ifborrow");
+ let isub_ifbout = shared.by_name("isub_ifbout");
+ let x86_sdivmodx = x86.by_name("x86_sdivmodx");
+ let x86_smulx = x86.by_name("x86_smulx");
+ let x86_udivmodx = x86.by_name("x86_udivmodx");
+ let x86_umulx = x86.by_name("x86_umulx");
+
+ let rec_div = r.template("div");
+ let rec_fa = r.template("fa");
+ let rec_fax = r.template("fax");
+ let rec_mulx = r.template("mulx");
+ let rec_r_ib = r.template("r_ib");
+ let rec_r_id = r.template("r_id");
+ let rec_rin = r.template("rin");
+ let rec_rio = r.template("rio");
+ let rec_rout = r.template("rout");
+ let rec_rr = r.template("rr");
+ let rec_rrx = r.template("rrx");
+ let rec_ur = r.template("ur");
+
+ e.enc_i32_i64(iadd, rec_rr.opcodes(&ADD));
+ e.enc_i32_i64(iadd_ifcout, rec_rout.opcodes(&ADD));
+ e.enc_i32_i64(iadd_ifcin, rec_rin.opcodes(&ADC));
+ e.enc_i32_i64(iadd_ifcarry, rec_rio.opcodes(&ADC));
+ e.enc_i32_i64(iadd_imm, rec_r_ib.opcodes(&ADD_IMM8_SIGN_EXTEND).rrr(0));
+ e.enc_i32_i64(iadd_imm, rec_r_id.opcodes(&ADD_IMM).rrr(0));
+
+ e.enc_i32_i64(isub, rec_rr.opcodes(&SUB));
+ e.enc_i32_i64(isub_ifbout, rec_rout.opcodes(&SUB));
+ e.enc_i32_i64(isub_ifbin, rec_rin.opcodes(&SBB));
+ e.enc_i32_i64(isub_ifborrow, rec_rio.opcodes(&SBB));
+
+ e.enc_i32_i64(band, rec_rr.opcodes(&AND));
+ e.enc_b32_b64(band, rec_rr.opcodes(&AND));
+
+ // TODO: band_imm.i64 with an unsigned 32-bit immediate can be encoded as band_imm.i32. Can
+ // even use the single-byte immediate for 0xffff_ffXX masks.
+
+ e.enc_i32_i64(band_imm, rec_r_ib.opcodes(&AND_IMM8_SIGN_EXTEND).rrr(4));
+ e.enc_i32_i64(band_imm, rec_r_id.opcodes(&AND_IMM).rrr(4));
+
+ e.enc_i32_i64(bor, rec_rr.opcodes(&OR));
+ e.enc_b32_b64(bor, rec_rr.opcodes(&OR));
+ e.enc_i32_i64(bor_imm, rec_r_ib.opcodes(&OR_IMM8_SIGN_EXTEND).rrr(1));
+ e.enc_i32_i64(bor_imm, rec_r_id.opcodes(&OR_IMM).rrr(1));
+
+ e.enc_i32_i64(bxor, rec_rr.opcodes(&XOR));
+ e.enc_b32_b64(bxor, rec_rr.opcodes(&XOR));
+ e.enc_i32_i64(bxor_imm, rec_r_ib.opcodes(&XOR_IMM8_SIGN_EXTEND).rrr(6));
+ e.enc_i32_i64(bxor_imm, rec_r_id.opcodes(&XOR_IMM).rrr(6));
+
+ // x86 has a bitwise not instruction NOT.
+ e.enc_i32_i64(bnot, rec_ur.opcodes(&NOT).rrr(2));
+ e.enc_b32_b64(bnot, rec_ur.opcodes(&NOT).rrr(2));
+ e.enc_both(bnot.bind(B1), rec_ur.opcodes(&NOT).rrr(2));
+
+ // Also add a `b1` encodings for the logic instructions.
+ // TODO: Should this be done with 8-bit instructions? It would improve partial register
+ // dependencies.
+ e.enc_both(band.bind(B1), rec_rr.opcodes(&AND));
+ e.enc_both(bor.bind(B1), rec_rr.opcodes(&OR));
+ e.enc_both(bxor.bind(B1), rec_rr.opcodes(&XOR));
+
+ e.enc_i32_i64(imul, rec_rrx.opcodes(&IMUL));
+ e.enc_i32_i64(x86_sdivmodx, rec_div.opcodes(&IDIV).rrr(7));
+ e.enc_i32_i64(x86_udivmodx, rec_div.opcodes(&DIV).rrr(6));
+
+ e.enc_i32_i64(x86_smulx, rec_mulx.opcodes(&IMUL_RDX_RAX).rrr(5));
+ e.enc_i32_i64(x86_umulx, rec_mulx.opcodes(&MUL).rrr(4));
+
+ // Binary bitwise ops.
+ //
+ // The F64 version is intentionally encoded using the single-precision opcode:
+ // the operation is identical and the encoding is one byte shorter.
+ e.enc_both(band.bind(F32), rec_fa.opcodes(&ANDPS));
+ e.enc_both(band.bind(F64), rec_fa.opcodes(&ANDPS));
+
+ e.enc_both(bor.bind(F32), rec_fa.opcodes(&ORPS));
+ e.enc_both(bor.bind(F64), rec_fa.opcodes(&ORPS));
+
+ e.enc_both(bxor.bind(F32), rec_fa.opcodes(&XORPS));
+ e.enc_both(bxor.bind(F64), rec_fa.opcodes(&XORPS));
+
+ // The `andnps(x,y)` instruction computes `~x&y`, while band_not(x,y)` is `x&~y.
+ e.enc_both(band_not.bind(F32), rec_fax.opcodes(&ANDNPS));
+ e.enc_both(band_not.bind(F64), rec_fax.opcodes(&ANDNPS));
+
+ // Shifts and rotates.
+ // Note that the dynamic shift amount is only masked by 5 or 6 bits; the 8-bit
+ // and 16-bit shifts would need explicit masking.
+
+ for &(inst, rrr) in &[(rotl, 0), (rotr, 1), (ishl, 4), (ushr, 5), (sshr, 7)] {
+ // Cannot use enc_i32_i64 for this pattern because instructions require
+ // to bind any.
+ e.enc32(inst.bind(I32).bind(I8), rec_rc.opcodes(&ROTATE_CL).rrr(rrr));
+ e.enc32(
+ inst.bind(I32).bind(I16),
+ rec_rc.opcodes(&ROTATE_CL).rrr(rrr),
+ );
+ e.enc32(
+ inst.bind(I32).bind(I32),
+ rec_rc.opcodes(&ROTATE_CL).rrr(rrr),
+ );
+ e.enc64(
+ inst.bind(I64).bind(Any),
+ rec_rc.opcodes(&ROTATE_CL).rrr(rrr).rex().w(),
+ );
+ e.enc64(
+ inst.bind(I32).bind(Any),
+ rec_rc.opcodes(&ROTATE_CL).rrr(rrr).rex(),
+ );
+ e.enc64(
+ inst.bind(I32).bind(Any),
+ rec_rc.opcodes(&ROTATE_CL).rrr(rrr),
+ );
+ }
+
+ e.enc_i32_i64(rotl_imm, rec_r_ib.opcodes(&ROTATE_IMM8).rrr(0));
+ e.enc_i32_i64(rotr_imm, rec_r_ib.opcodes(&ROTATE_IMM8).rrr(1));
+ e.enc_i32_i64(ishl_imm, rec_r_ib.opcodes(&ROTATE_IMM8).rrr(4));
+ e.enc_i32_i64(ushr_imm, rec_r_ib.opcodes(&ROTATE_IMM8).rrr(5));
+ e.enc_i32_i64(sshr_imm, rec_r_ib.opcodes(&ROTATE_IMM8).rrr(7));
+
+ // Population count.
+ e.enc32_isap(popcnt.bind(I32), rec_urm.opcodes(&POPCNT), use_popcnt);
+ e.enc64_isap(
+ popcnt.bind(I64),
+ rec_urm.opcodes(&POPCNT).rex().w(),
+ use_popcnt,
+ );
+ e.enc64_isap(popcnt.bind(I32), rec_urm.opcodes(&POPCNT).rex(), use_popcnt);
+ e.enc64_isap(popcnt.bind(I32), rec_urm.opcodes(&POPCNT), use_popcnt);
+
+ // Count leading zero bits.
+ e.enc32_isap(clz.bind(I32), rec_urm.opcodes(&LZCNT), use_lzcnt);
+ e.enc64_isap(clz.bind(I64), rec_urm.opcodes(&LZCNT).rex().w(), use_lzcnt);
+ e.enc64_isap(clz.bind(I32), rec_urm.opcodes(&LZCNT).rex(), use_lzcnt);
+ e.enc64_isap(clz.bind(I32), rec_urm.opcodes(&LZCNT), use_lzcnt);
+
+ // Count trailing zero bits.
+ e.enc32_isap(ctz.bind(I32), rec_urm.opcodes(&TZCNT), use_bmi1);
+ e.enc64_isap(ctz.bind(I64), rec_urm.opcodes(&TZCNT).rex().w(), use_bmi1);
+ e.enc64_isap(ctz.bind(I32), rec_urm.opcodes(&TZCNT).rex(), use_bmi1);
+ e.enc64_isap(ctz.bind(I32), rec_urm.opcodes(&TZCNT), use_bmi1);
+
+ // Bit scan forwards and reverse
+ e.enc_i32_i64(x86_bsf, rec_bsf_and_bsr.opcodes(&BIT_SCAN_FORWARD));
+ e.enc_i32_i64(x86_bsr, rec_bsf_and_bsr.opcodes(&BIT_SCAN_REVERSE));
+
+ // Comparisons
+ e.enc_i32_i64(icmp, rec_icscc.opcodes(&CMP_REG));
+ e.enc_i32_i64(icmp_imm, rec_icscc_ib.opcodes(&CMP_IMM8).rrr(7));
+ e.enc_i32_i64(icmp_imm, rec_icscc_id.opcodes(&CMP_IMM).rrr(7));
+ e.enc_i32_i64(ifcmp, rec_rcmp.opcodes(&CMP_REG));
+ e.enc_i32_i64(ifcmp_imm, rec_rcmp_ib.opcodes(&CMP_IMM8).rrr(7));
+ e.enc_i32_i64(ifcmp_imm, rec_rcmp_id.opcodes(&CMP_IMM).rrr(7));
+ // TODO: We could special-case ifcmp_imm(x, 0) to TEST(x, x).
+
+ e.enc32(ifcmp_sp.bind(I32), rec_rcmp_sp.opcodes(&CMP_REG));
+ e.enc64(ifcmp_sp.bind(I64), rec_rcmp_sp.opcodes(&CMP_REG).rex().w());
+
+ // Convert flags to bool.
+ // This encodes `b1` as an 8-bit low register with the value 0 or 1.
+ e.enc_both(trueif, rec_seti_abcd.opcodes(&SET_BYTE_IF_OVERFLOW));
+ e.enc_both(trueff, rec_setf_abcd.opcodes(&SET_BYTE_IF_OVERFLOW));
+
+ // Conditional move (a.k.a integer select).
+ e.enc_i32_i64(selectif, rec_cmov.opcodes(&CMOV_OVERFLOW));
+ // A Spectre-guard integer select is exactly the same as a selectif, but
+ // is not associated with any other legalization rules and is not
+ // recognized by any optimizations, so it must arrive here unmodified
+ // and in its original place.
+ e.enc_i32_i64(selectif_spectre_guard, rec_cmov.opcodes(&CMOV_OVERFLOW));
+}
+
+#[inline(never)]
+#[allow(clippy::cognitive_complexity)]
+fn define_simd(
+ e: &mut PerCpuModeEncodings,
+ shared_defs: &SharedDefinitions,
+ settings: &SettingGroup,
+ x86: &InstructionGroup,
+ r: &RecipeGroup,
+) {
+ let shared = &shared_defs.instructions;
+ let formats = &shared_defs.formats;
+
+ // Shorthands for instructions.
+ let avg_round = shared.by_name("avg_round");
+ let bitcast = shared.by_name("bitcast");
+ let bor = shared.by_name("bor");
+ let bxor = shared.by_name("bxor");
+ let copy = shared.by_name("copy");
+ let copy_nop = shared.by_name("copy_nop");
+ let copy_to_ssa = shared.by_name("copy_to_ssa");
+ let fadd = shared.by_name("fadd");
+ let fcmp = shared.by_name("fcmp");
+ let fcvt_from_sint = shared.by_name("fcvt_from_sint");
+ let fdiv = shared.by_name("fdiv");
+ let fill = shared.by_name("fill");
+ let fill_nop = shared.by_name("fill_nop");
+ let fmul = shared.by_name("fmul");
+ let fsub = shared.by_name("fsub");
+ let iabs = shared.by_name("iabs");
+ let iadd = shared.by_name("iadd");
+ let icmp = shared.by_name("icmp");
+ let imul = shared.by_name("imul");
+ let ishl_imm = shared.by_name("ishl_imm");
+ let load = shared.by_name("load");
+ let load_complex = shared.by_name("load_complex");
+ let raw_bitcast = shared.by_name("raw_bitcast");
+ let regfill = shared.by_name("regfill");
+ let regmove = shared.by_name("regmove");
+ let regspill = shared.by_name("regspill");
+ let sadd_sat = shared.by_name("sadd_sat");
+ let scalar_to_vector = shared.by_name("scalar_to_vector");
+ let sload8x8 = shared.by_name("sload8x8");
+ let sload8x8_complex = shared.by_name("sload8x8_complex");
+ let sload16x4 = shared.by_name("sload16x4");
+ let sload16x4_complex = shared.by_name("sload16x4_complex");
+ let sload32x2 = shared.by_name("sload32x2");
+ let sload32x2_complex = shared.by_name("sload32x2_complex");
+ let spill = shared.by_name("spill");
+ let sqrt = shared.by_name("sqrt");
+ let sshr_imm = shared.by_name("sshr_imm");
+ let ssub_sat = shared.by_name("ssub_sat");
+ let store = shared.by_name("store");
+ let store_complex = shared.by_name("store_complex");
+ let swiden_low = shared.by_name("swiden_low");
+ let uadd_sat = shared.by_name("uadd_sat");
+ let uload8x8 = shared.by_name("uload8x8");
+ let uload8x8_complex = shared.by_name("uload8x8_complex");
+ let uload16x4 = shared.by_name("uload16x4");
+ let uload16x4_complex = shared.by_name("uload16x4_complex");
+ let uload32x2 = shared.by_name("uload32x2");
+ let uload32x2_complex = shared.by_name("uload32x2_complex");
+ let snarrow = shared.by_name("snarrow");
+ let unarrow = shared.by_name("unarrow");
+ let uwiden_low = shared.by_name("uwiden_low");
+ let ushr_imm = shared.by_name("ushr_imm");
+ let usub_sat = shared.by_name("usub_sat");
+ let vconst = shared.by_name("vconst");
+ let vselect = shared.by_name("vselect");
+ let x86_cvtt2si = x86.by_name("x86_cvtt2si");
+ let x86_insertps = x86.by_name("x86_insertps");
+ let x86_fmax = x86.by_name("x86_fmax");
+ let x86_fmin = x86.by_name("x86_fmin");
+ let x86_movlhps = x86.by_name("x86_movlhps");
+ let x86_movsd = x86.by_name("x86_movsd");
+ let x86_pblendw = x86.by_name("x86_pblendw");
+ let x86_pextr = x86.by_name("x86_pextr");
+ let x86_pinsr = x86.by_name("x86_pinsr");
+ let x86_pmaxs = x86.by_name("x86_pmaxs");
+ let x86_pmaxu = x86.by_name("x86_pmaxu");
+ let x86_pmins = x86.by_name("x86_pmins");
+ let x86_pminu = x86.by_name("x86_pminu");
+ let x86_pmullq = x86.by_name("x86_pmullq");
+ let x86_pmuludq = x86.by_name("x86_pmuludq");
+ let x86_palignr = x86.by_name("x86_palignr");
+ let x86_pshufb = x86.by_name("x86_pshufb");
+ let x86_pshufd = x86.by_name("x86_pshufd");
+ let x86_psll = x86.by_name("x86_psll");
+ let x86_psra = x86.by_name("x86_psra");
+ let x86_psrl = x86.by_name("x86_psrl");
+ let x86_ptest = x86.by_name("x86_ptest");
+ let x86_punpckh = x86.by_name("x86_punpckh");
+ let x86_punpckl = x86.by_name("x86_punpckl");
+ let x86_vcvtudq2ps = x86.by_name("x86_vcvtudq2ps");
+
+ // Shorthands for recipes.
+ let rec_blend = r.template("blend");
+ let rec_evex_reg_vvvv_rm_128 = r.template("evex_reg_vvvv_rm_128");
+ let rec_evex_reg_rm_128 = r.template("evex_reg_rm_128");
+ let rec_f_ib = r.template("f_ib");
+ let rec_fa = r.template("fa");
+ let rec_fa_ib = r.template("fa_ib");
+ let rec_fax = r.template("fax");
+ let rec_fcmp = r.template("fcmp");
+ let rec_ffillSib32 = r.template("ffillSib32");
+ let rec_ffillnull = r.recipe("ffillnull");
+ let rec_fld = r.template("fld");
+ let rec_fldDisp32 = r.template("fldDisp32");
+ let rec_fldDisp8 = r.template("fldDisp8");
+ let rec_fldWithIndex = r.template("fldWithIndex");
+ let rec_fldWithIndexDisp32 = r.template("fldWithIndexDisp32");
+ let rec_fldWithIndexDisp8 = r.template("fldWithIndexDisp8");
+ let rec_fregfill32 = r.template("fregfill32");
+ let rec_fregspill32 = r.template("fregspill32");
+ let rec_frmov = r.template("frmov");
+ let rec_frurm = r.template("frurm");
+ let rec_fspillSib32 = r.template("fspillSib32");
+ let rec_fst = r.template("fst");
+ let rec_fstDisp32 = r.template("fstDisp32");
+ let rec_fstDisp8 = r.template("fstDisp8");
+ let rec_fstWithIndex = r.template("fstWithIndex");
+ let rec_fstWithIndexDisp32 = r.template("fstWithIndexDisp32");
+ let rec_fstWithIndexDisp8 = r.template("fstWithIndexDisp8");
+ let rec_furm = r.template("furm");
+ let rec_furm_reg_to_ssa = r.template("furm_reg_to_ssa");
+ let rec_icscc_fpr = r.template("icscc_fpr");
+ let rec_null_fpr = r.recipe("null_fpr");
+ let rec_pfcmp = r.template("pfcmp");
+ let rec_r_ib_unsigned_fpr = r.template("r_ib_unsigned_fpr");
+ let rec_r_ib_unsigned_gpr = r.template("r_ib_unsigned_gpr");
+ let rec_r_ib_unsigned_r = r.template("r_ib_unsigned_r");
+ let rec_stacknull = r.recipe("stacknull");
+ let rec_vconst = r.template("vconst");
+ let rec_vconst_optimized = r.template("vconst_optimized");
+
+ // Predicates shorthands.
+ settings.predicate_by_name("all_ones_funcaddrs_and_not_is_pic");
+ settings.predicate_by_name("not_all_ones_funcaddrs_and_not_is_pic");
+ let use_ssse3_simd = settings.predicate_by_name("use_ssse3_simd");
+ let use_sse41_simd = settings.predicate_by_name("use_sse41_simd");
+ let use_sse42_simd = settings.predicate_by_name("use_sse42_simd");
+ let use_avx512dq_simd = settings.predicate_by_name("use_avx512dq_simd");
+ let use_avx512vl_simd = settings.predicate_by_name("use_avx512vl_simd");
+
+ // SIMD vector size: eventually multiple vector sizes may be supported but for now only
+ // SSE-sized vectors are available.
+ let sse_vector_size: u64 = 128;
+
+ // SIMD splat: before x86 can use vector data, it must be moved to XMM registers; see
+ // legalize.rs for how this is done; once there, x86_pshuf* (below) is used for broadcasting the
+ // value across the register.
+
+ let allowed_simd_type = |t: &LaneType| t.lane_bits() >= 8 && t.lane_bits() < 128;
+
+ // PSHUFB, 8-bit shuffle using two XMM registers.
+ for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
+ let instruction = x86_pshufb.bind(vector(ty, sse_vector_size));
+ let template = rec_fa.opcodes(&PSHUFB);
+ e.enc_both_inferred_maybe_isap(instruction.clone(), template.clone(), Some(use_ssse3_simd));
+ }
+
+ // PSHUFD, 32-bit shuffle using one XMM register and a u8 immediate.
+ for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 32) {
+ let instruction = x86_pshufd.bind(vector(ty, sse_vector_size));
+ let template = rec_r_ib_unsigned_fpr.opcodes(&PSHUFD);
+ e.enc_both_inferred(instruction, template);
+ }
+
+ // SIMD vselect; controlling value of vselect is a boolean vector, so each lane should be
+ // either all ones or all zeroes - it makes it possible to always use 8-bit PBLENDVB;
+ // for 32/64-bit lanes we can also use BLENDVPS and BLENDVPD
+ for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
+ let opcode = match ty.lane_bits() {
+ 32 => &BLENDVPS,
+ 64 => &BLENDVPD,
+ _ => &PBLENDVB,
+ };
+ let instruction = vselect.bind(vector(ty, sse_vector_size));
+ let template = rec_blend.opcodes(opcode);
+ e.enc_both_inferred_maybe_isap(instruction, template, Some(use_sse41_simd));
+ }
+
+ // PBLENDW, select lanes using a u8 immediate.
+ for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 16) {
+ let instruction = x86_pblendw.bind(vector(ty, sse_vector_size));
+ let template = rec_fa_ib.opcodes(&PBLENDW);
+ e.enc_both_inferred_maybe_isap(instruction, template, Some(use_sse41_simd));
+ }
+
+ // SIMD scalar_to_vector; this uses MOV to copy the scalar value to an XMM register; according
+ // to the Intel manual: "When the destination operand is an XMM register, the source operand is
+ // written to the low doubleword of the register and the register is zero-extended to 128 bits."
+ for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
+ let instruction = scalar_to_vector.bind(vector(ty, sse_vector_size));
+ if ty.is_float() {
+ // No need to move floats--they already live in XMM registers.
+ e.enc_32_64_rec(instruction, rec_null_fpr, 0);
+ } else {
+ let template = rec_frurm.opcodes(&MOVD_LOAD_XMM);
+ if ty.lane_bits() < 64 {
+ e.enc_both_inferred(instruction, template);
+ } else {
+ // No 32-bit encodings for 64-bit widths.
+ assert_eq!(ty.lane_bits(), 64);
+ e.enc64(instruction, template.rex().w());
+ }
+ }
+ }
+
+ // SIMD insertlane
+ for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
+ let (opcode, isap): (&[_], _) = match ty.lane_bits() {
+ 8 => (&PINSRB, Some(use_sse41_simd)),
+ 16 => (&PINSRW, None),
+ 32 | 64 => (&PINSR, Some(use_sse41_simd)),
+ _ => panic!("invalid size for SIMD insertlane"),
+ };
+
+ let instruction = x86_pinsr.bind(vector(ty, sse_vector_size));
+ let template = rec_r_ib_unsigned_r.opcodes(opcode);
+ if ty.lane_bits() < 64 {
+ e.enc_both_inferred_maybe_isap(instruction, template, isap);
+ } else {
+ // It turns out the 64-bit widths have REX/W encodings and only are available on
+ // x86_64.
+ e.enc64_maybe_isap(instruction, template.rex().w(), isap);
+ }
+ }
+
+ // For legalizing insertlane with floats, INSERTPS from SSE4.1.
+ {
+ let instruction = x86_insertps.bind(vector(F32, sse_vector_size));
+ let template = rec_fa_ib.opcodes(&INSERTPS);
+ e.enc_both_inferred_maybe_isap(instruction, template, Some(use_sse41_simd));
+ }
+
+ // For legalizing insertlane with floats, MOVSD from SSE2.
+ {
+ let instruction = x86_movsd.bind(vector(F64, sse_vector_size));
+ let template = rec_fa.opcodes(&MOVSD_LOAD);
+ e.enc_both_inferred(instruction, template); // from SSE2
+ }
+
+ // For legalizing insertlane with floats, MOVLHPS from SSE.
+ {
+ let instruction = x86_movlhps.bind(vector(F64, sse_vector_size));
+ let template = rec_fa.opcodes(&MOVLHPS);
+ e.enc_both_inferred(instruction, template); // from SSE
+ }
+
+ // SIMD extractlane
+ for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
+ let opcode = match ty.lane_bits() {
+ 8 => &PEXTRB,
+ 16 => &PEXTRW,
+ 32 | 64 => &PEXTR,
+ _ => panic!("invalid size for SIMD extractlane"),
+ };
+
+ let instruction = x86_pextr.bind(vector(ty, sse_vector_size));
+ let template = rec_r_ib_unsigned_gpr.opcodes(opcode);
+ if ty.lane_bits() < 64 {
+ e.enc_both_inferred_maybe_isap(instruction, template, Some(use_sse41_simd));
+ } else {
+ // It turns out the 64-bit widths have REX/W encodings and only are available on
+ // x86_64.
+ e.enc64_maybe_isap(instruction, template.rex().w(), Some(use_sse41_simd));
+ }
+ }
+
+ // SIMD packing/unpacking
+ for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
+ let (high, low) = match ty.lane_bits() {
+ 8 => (&PUNPCKHBW, &PUNPCKLBW),
+ 16 => (&PUNPCKHWD, &PUNPCKLWD),
+ 32 => (&PUNPCKHDQ, &PUNPCKLDQ),
+ 64 => (&PUNPCKHQDQ, &PUNPCKLQDQ),
+ _ => panic!("invalid size for SIMD packing/unpacking"),
+ };
+
+ e.enc_both_inferred(
+ x86_punpckh.bind(vector(ty, sse_vector_size)),
+ rec_fa.opcodes(high),
+ );
+ e.enc_both_inferred(
+ x86_punpckl.bind(vector(ty, sse_vector_size)),
+ rec_fa.opcodes(low),
+ );
+ }
+
+ // SIMD narrow/widen
+ for (ty, opcodes) in &[(I16, &PACKSSWB), (I32, &PACKSSDW)] {
+ let snarrow = snarrow.bind(vector(*ty, sse_vector_size));
+ e.enc_both_inferred(snarrow, rec_fa.opcodes(*opcodes));
+ }
+ for (ty, opcodes, isap) in &[
+ (I16, &PACKUSWB[..], None),
+ (I32, &PACKUSDW[..], Some(use_sse41_simd)),
+ ] {
+ let unarrow = unarrow.bind(vector(*ty, sse_vector_size));
+ e.enc_both_inferred_maybe_isap(unarrow, rec_fa.opcodes(*opcodes), *isap);
+ }
+ for (ty, swiden_opcode, uwiden_opcode) in &[
+ (I8, &PMOVSXBW[..], &PMOVZXBW[..]),
+ (I16, &PMOVSXWD[..], &PMOVZXWD[..]),
+ ] {
+ let isap = Some(use_sse41_simd);
+ let swiden_low = swiden_low.bind(vector(*ty, sse_vector_size));
+ e.enc_both_inferred_maybe_isap(swiden_low, rec_furm.opcodes(*swiden_opcode), isap);
+ let uwiden_low = uwiden_low.bind(vector(*ty, sse_vector_size));
+ e.enc_both_inferred_maybe_isap(uwiden_low, rec_furm.opcodes(*uwiden_opcode), isap);
+ }
+ for ty in &[I8, I16, I32, I64] {
+ e.enc_both_inferred_maybe_isap(
+ x86_palignr.bind(vector(*ty, sse_vector_size)),
+ rec_fa_ib.opcodes(&PALIGNR[..]),
+ Some(use_ssse3_simd),
+ );
+ }
+
+ // SIMD bitcast all 128-bit vectors to each other (for legalizing splat.x16x8).
+ for from_type in ValueType::all_lane_types().filter(allowed_simd_type) {
+ for to_type in
+ ValueType::all_lane_types().filter(|t| allowed_simd_type(t) && *t != from_type)
+ {
+ let instruction = raw_bitcast
+ .bind(vector(to_type, sse_vector_size))
+ .bind(vector(from_type, sse_vector_size));
+ e.enc_32_64_rec(instruction, rec_null_fpr, 0);
+ }
+ }
+
+ // SIMD raw bitcast floats to vector (and back); assumes that floats are already stored in an
+ // XMM register.
+ for float_type in &[F32, F64] {
+ for lane_type in ValueType::all_lane_types().filter(allowed_simd_type) {
+ e.enc_32_64_rec(
+ raw_bitcast
+ .bind(vector(lane_type, sse_vector_size))
+ .bind(*float_type),
+ rec_null_fpr,
+ 0,
+ );
+ e.enc_32_64_rec(
+ raw_bitcast
+ .bind(*float_type)
+ .bind(vector(lane_type, sse_vector_size)),
+ rec_null_fpr,
+ 0,
+ );
+ }
+ }
+
+ // SIMD conversions
+ {
+ let fcvt_from_sint_32 = fcvt_from_sint
+ .bind(vector(F32, sse_vector_size))
+ .bind(vector(I32, sse_vector_size));
+ e.enc_both(fcvt_from_sint_32, rec_furm.opcodes(&CVTDQ2PS));
+
+ e.enc_32_64_maybe_isap(
+ x86_vcvtudq2ps,
+ rec_evex_reg_rm_128.opcodes(&VCVTUDQ2PS),
+ Some(use_avx512vl_simd), // TODO need an OR predicate to join with AVX512F
+ );
+
+ e.enc_both_inferred(
+ x86_cvtt2si
+ .bind(vector(I32, sse_vector_size))
+ .bind(vector(F32, sse_vector_size)),
+ rec_furm.opcodes(&CVTTPS2DQ),
+ );
+ }
+
+ // SIMD vconst for special cases (all zeroes, all ones)
+ // this must be encoded prior to the MOVUPS implementation (below) so the compiler sees this
+ // encoding first
+ for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
+ let instruction = vconst.bind(vector(ty, sse_vector_size));
+
+ let is_zero_128bit =
+ InstructionPredicate::new_is_all_zeroes(&*formats.unary_const, "constant_handle");
+ let template = rec_vconst_optimized.opcodes(&PXOR).infer_rex();
+ e.enc_32_64_func(instruction.clone(), template, |builder| {
+ builder.inst_predicate(is_zero_128bit)
+ });
+
+ let is_ones_128bit =
+ InstructionPredicate::new_is_all_ones(&*formats.unary_const, "constant_handle");
+ let template = rec_vconst_optimized.opcodes(&PCMPEQB).infer_rex();
+ e.enc_32_64_func(instruction, template, |builder| {
+ builder.inst_predicate(is_ones_128bit)
+ });
+ }
+
+ // SIMD vconst using MOVUPS
+ // TODO it would be ideal if eventually this became the more efficient MOVAPS but we would have
+ // to guarantee that the constants are aligned when emitted and there is currently no mechanism
+ // for that; alternately, constants could be loaded into XMM registers using a sequence like:
+ // MOVQ + MOVHPD + MOVQ + MOVLPD (this allows the constants to be immediates instead of stored
+ // in memory) but some performance measurements are needed.
+ for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
+ let instruction = vconst.bind(vector(ty, sse_vector_size));
+ let template = rec_vconst.opcodes(&MOVUPS_LOAD);
+ e.enc_both_inferred(instruction, template); // from SSE
+ }
+
+ // SIMD register movement: store, load, spill, fill, regmove, etc. All of these use encodings of
+ // MOVUPS and MOVAPS from SSE (TODO ideally all of these would either use MOVAPS when we have
+ // alignment or type-specific encodings, see https://github.com/bytecodealliance/wasmtime/issues/1124).
+ // Also, it would be ideal to infer REX prefixes for all of these instructions but for the
+ // time being only instructions with common recipes have `infer_rex()` support.
+ for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
+ // Store
+ let bound_store = store.bind(vector(ty, sse_vector_size)).bind(Any);
+ e.enc_both_inferred(bound_store.clone(), rec_fst.opcodes(&MOVUPS_STORE));
+ e.enc_both_inferred(bound_store.clone(), rec_fstDisp8.opcodes(&MOVUPS_STORE));
+ e.enc_both_inferred(bound_store, rec_fstDisp32.opcodes(&MOVUPS_STORE));
+
+ // Store complex
+ let bound_store_complex = store_complex.bind(vector(ty, sse_vector_size));
+ e.enc_both(
+ bound_store_complex.clone(),
+ rec_fstWithIndex.opcodes(&MOVUPS_STORE),
+ );
+ e.enc_both(
+ bound_store_complex.clone(),
+ rec_fstWithIndexDisp8.opcodes(&MOVUPS_STORE),
+ );
+ e.enc_both(
+ bound_store_complex,
+ rec_fstWithIndexDisp32.opcodes(&MOVUPS_STORE),
+ );
+
+ // Load
+ let bound_load = load.bind(vector(ty, sse_vector_size)).bind(Any);
+ e.enc_both_inferred(bound_load.clone(), rec_fld.opcodes(&MOVUPS_LOAD));
+ e.enc_both_inferred(bound_load.clone(), rec_fldDisp8.opcodes(&MOVUPS_LOAD));
+ e.enc_both_inferred(bound_load, rec_fldDisp32.opcodes(&MOVUPS_LOAD));
+
+ // Load complex
+ let bound_load_complex = load_complex.bind(vector(ty, sse_vector_size));
+ e.enc_both(
+ bound_load_complex.clone(),
+ rec_fldWithIndex.opcodes(&MOVUPS_LOAD),
+ );
+ e.enc_both(
+ bound_load_complex.clone(),
+ rec_fldWithIndexDisp8.opcodes(&MOVUPS_LOAD),
+ );
+ e.enc_both(
+ bound_load_complex,
+ rec_fldWithIndexDisp32.opcodes(&MOVUPS_LOAD),
+ );
+
+ // Spill
+ let bound_spill = spill.bind(vector(ty, sse_vector_size));
+ e.enc_both(bound_spill, rec_fspillSib32.opcodes(&MOVUPS_STORE));
+ let bound_regspill = regspill.bind(vector(ty, sse_vector_size));
+ e.enc_both(bound_regspill, rec_fregspill32.opcodes(&MOVUPS_STORE));
+
+ // Fill
+ let bound_fill = fill.bind(vector(ty, sse_vector_size));
+ e.enc_both(bound_fill, rec_ffillSib32.opcodes(&MOVUPS_LOAD));
+ let bound_regfill = regfill.bind(vector(ty, sse_vector_size));
+ e.enc_both(bound_regfill, rec_fregfill32.opcodes(&MOVUPS_LOAD));
+ let bound_fill_nop = fill_nop.bind(vector(ty, sse_vector_size));
+ e.enc_32_64_rec(bound_fill_nop, rec_ffillnull, 0);
+
+ // Regmove
+ let bound_regmove = regmove.bind(vector(ty, sse_vector_size));
+ e.enc_both(bound_regmove, rec_frmov.opcodes(&MOVAPS_LOAD));
+
+ // Copy
+ let bound_copy = copy.bind(vector(ty, sse_vector_size));
+ e.enc_both(bound_copy, rec_furm.opcodes(&MOVAPS_LOAD));
+ let bound_copy_to_ssa = copy_to_ssa.bind(vector(ty, sse_vector_size));
+ e.enc_both(bound_copy_to_ssa, rec_furm_reg_to_ssa.opcodes(&MOVAPS_LOAD));
+ let bound_copy_nop = copy_nop.bind(vector(ty, sse_vector_size));
+ e.enc_32_64_rec(bound_copy_nop, rec_stacknull, 0);
+ }
+
+ // SIMD load extend
+ for (inst, opcodes) in &[
+ (uload8x8, &PMOVZXBW),
+ (uload16x4, &PMOVZXWD),
+ (uload32x2, &PMOVZXDQ),
+ (sload8x8, &PMOVSXBW),
+ (sload16x4, &PMOVSXWD),
+ (sload32x2, &PMOVSXDQ),
+ ] {
+ let isap = Some(use_sse41_simd);
+ for recipe in &[rec_fld, rec_fldDisp8, rec_fldDisp32] {
+ let inst = *inst;
+ let template = recipe.opcodes(*opcodes);
+ e.enc_both_inferred_maybe_isap(inst.clone().bind(I32), template.clone(), isap);
+ e.enc64_maybe_isap(inst.bind(I64), template.infer_rex(), isap);
+ }
+ }
+
+ // SIMD load extend (complex addressing)
+ let is_load_complex_length_two =
+ InstructionPredicate::new_length_equals(&*formats.load_complex, 2);
+ for (inst, opcodes) in &[
+ (uload8x8_complex, &PMOVZXBW),
+ (uload16x4_complex, &PMOVZXWD),
+ (uload32x2_complex, &PMOVZXDQ),
+ (sload8x8_complex, &PMOVSXBW),
+ (sload16x4_complex, &PMOVSXWD),
+ (sload32x2_complex, &PMOVSXDQ),
+ ] {
+ for recipe in &[
+ rec_fldWithIndex,
+ rec_fldWithIndexDisp8,
+ rec_fldWithIndexDisp32,
+ ] {
+ let template = recipe.opcodes(*opcodes);
+ let predicate = |encoding: EncodingBuilder| {
+ encoding
+ .isa_predicate(use_sse41_simd)
+ .inst_predicate(is_load_complex_length_two.clone())
+ };
+ e.enc32_func(inst.clone(), template.clone(), predicate);
+ // No infer_rex calculator for these recipes; place REX version first as in enc_x86_64.
+ e.enc64_func(inst.clone(), template.rex(), predicate);
+ e.enc64_func(inst.clone(), template, predicate);
+ }
+ }
+
+ // SIMD integer addition
+ for (ty, opcodes) in &[(I8, &PADDB), (I16, &PADDW), (I32, &PADDD), (I64, &PADDQ)] {
+ let iadd = iadd.bind(vector(*ty, sse_vector_size));
+ e.enc_both_inferred(iadd, rec_fa.opcodes(*opcodes));
+ }
+
+ // SIMD integer saturating addition
+ e.enc_both_inferred(
+ sadd_sat.bind(vector(I8, sse_vector_size)),
+ rec_fa.opcodes(&PADDSB),
+ );
+ e.enc_both_inferred(
+ sadd_sat.bind(vector(I16, sse_vector_size)),
+ rec_fa.opcodes(&PADDSW),
+ );
+ e.enc_both_inferred(
+ uadd_sat.bind(vector(I8, sse_vector_size)),
+ rec_fa.opcodes(&PADDUSB),
+ );
+ e.enc_both_inferred(
+ uadd_sat.bind(vector(I16, sse_vector_size)),
+ rec_fa.opcodes(&PADDUSW),
+ );
+
+ // SIMD integer subtraction
+ let isub = shared.by_name("isub");
+ for (ty, opcodes) in &[(I8, &PSUBB), (I16, &PSUBW), (I32, &PSUBD), (I64, &PSUBQ)] {
+ let isub = isub.bind(vector(*ty, sse_vector_size));
+ e.enc_both_inferred(isub, rec_fa.opcodes(*opcodes));
+ }
+
+ // SIMD integer saturating subtraction
+ e.enc_both_inferred(
+ ssub_sat.bind(vector(I8, sse_vector_size)),
+ rec_fa.opcodes(&PSUBSB),
+ );
+ e.enc_both_inferred(
+ ssub_sat.bind(vector(I16, sse_vector_size)),
+ rec_fa.opcodes(&PSUBSW),
+ );
+ e.enc_both_inferred(
+ usub_sat.bind(vector(I8, sse_vector_size)),
+ rec_fa.opcodes(&PSUBUSB),
+ );
+ e.enc_both_inferred(
+ usub_sat.bind(vector(I16, sse_vector_size)),
+ rec_fa.opcodes(&PSUBUSW),
+ );
+
+ // SIMD integer multiplication: the x86 ISA does not have instructions for multiplying I8x16
+ // and I64x2 and these are (at the time of writing) not necessary for WASM SIMD.
+ for (ty, opcodes, isap) in &[
+ (I16, &PMULLW[..], None),
+ (I32, &PMULLD[..], Some(use_sse41_simd)),
+ ] {
+ let imul = imul.bind(vector(*ty, sse_vector_size));
+ e.enc_both_inferred_maybe_isap(imul, rec_fa.opcodes(opcodes), *isap);
+ }
+
+ // SIMD multiplication with lane expansion.
+ e.enc_both_inferred(x86_pmuludq, rec_fa.opcodes(&PMULUDQ));
+
+ // SIMD integer multiplication for I64x2 using a AVX512.
+ {
+ e.enc_32_64_maybe_isap(
+ x86_pmullq,
+ rec_evex_reg_vvvv_rm_128.opcodes(&VPMULLQ).w(),
+ Some(use_avx512dq_simd), // TODO need an OR predicate to join with AVX512VL
+ );
+ }
+
+ // SIMD integer average with rounding.
+ for (ty, opcodes) in &[(I8, &PAVGB[..]), (I16, &PAVGW[..])] {
+ let avgr = avg_round.bind(vector(*ty, sse_vector_size));
+ e.enc_both_inferred(avgr, rec_fa.opcodes(opcodes));
+ }
+
+ // SIMD integer absolute value.
+ for (ty, opcodes) in &[(I8, &PABSB[..]), (I16, &PABSW[..]), (I32, &PABSD)] {
+ let iabs = iabs.bind(vector(*ty, sse_vector_size));
+ e.enc_both_inferred_maybe_isap(iabs, rec_furm.opcodes(opcodes), Some(use_ssse3_simd));
+ }
+
+ // SIMD logical operations
+ let band = shared.by_name("band");
+ let band_not = shared.by_name("band_not");
+ for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
+ // and
+ let band = band.bind(vector(ty, sse_vector_size));
+ e.enc_both_inferred(band, rec_fa.opcodes(&PAND));
+
+ // and not (note flipped recipe operands to match band_not order)
+ let band_not = band_not.bind(vector(ty, sse_vector_size));
+ e.enc_both_inferred(band_not, rec_fax.opcodes(&PANDN));
+
+ // or
+ let bor = bor.bind(vector(ty, sse_vector_size));
+ e.enc_both_inferred(bor, rec_fa.opcodes(&POR));
+
+ // xor
+ let bxor = bxor.bind(vector(ty, sse_vector_size));
+ e.enc_both_inferred(bxor, rec_fa.opcodes(&PXOR));
+
+ // ptest
+ let x86_ptest = x86_ptest.bind(vector(ty, sse_vector_size));
+ e.enc_both_inferred_maybe_isap(x86_ptest, rec_fcmp.opcodes(&PTEST), Some(use_sse41_simd));
+ }
+
+ // SIMD bitcast from I32/I64 to the low bits of a vector (e.g. I64x2); this register movement
+ // allows SIMD shifts to be legalized more easily. TODO ideally this would be typed as an
+ // I128x1 but restrictions on the type builder prevent this; the general idea here is that
+ // the upper bits are all zeroed and do not form parts of any separate lane. See
+ // https://github.com/bytecodealliance/wasmtime/issues/1140.
+ e.enc_both_inferred(
+ bitcast.bind(vector(I64, sse_vector_size)).bind(I32),
+ rec_frurm.opcodes(&MOVD_LOAD_XMM),
+ );
+ e.enc64(
+ bitcast.bind(vector(I64, sse_vector_size)).bind(I64),
+ rec_frurm.opcodes(&MOVD_LOAD_XMM).rex().w(),
+ );
+
+ // SIMD shift left
+ for (ty, opcodes) in &[(I16, &PSLLW), (I32, &PSLLD), (I64, &PSLLQ)] {
+ let x86_psll = x86_psll.bind(vector(*ty, sse_vector_size));
+ e.enc_both_inferred(x86_psll, rec_fa.opcodes(*opcodes));
+ }
+
+ // SIMD shift right (logical)
+ for (ty, opcodes) in &[(I16, &PSRLW), (I32, &PSRLD), (I64, &PSRLQ)] {
+ let x86_psrl = x86_psrl.bind(vector(*ty, sse_vector_size));
+ e.enc_both_inferred(x86_psrl, rec_fa.opcodes(*opcodes));
+ }
+
+ // SIMD shift right (arithmetic)
+ for (ty, opcodes) in &[(I16, &PSRAW), (I32, &PSRAD)] {
+ let x86_psra = x86_psra.bind(vector(*ty, sse_vector_size));
+ e.enc_both_inferred(x86_psra, rec_fa.opcodes(*opcodes));
+ }
+
+ // SIMD immediate shift
+ for (ty, opcodes) in &[(I16, &PS_W_IMM), (I32, &PS_D_IMM), (I64, &PS_Q_IMM)] {
+ let ishl_imm = ishl_imm.bind(vector(*ty, sse_vector_size));
+ e.enc_both_inferred(ishl_imm, rec_f_ib.opcodes(*opcodes).rrr(6));
+
+ let ushr_imm = ushr_imm.bind(vector(*ty, sse_vector_size));
+ e.enc_both_inferred(ushr_imm, rec_f_ib.opcodes(*opcodes).rrr(2));
+
+ // One exception: PSRAQ does not exist in for 64x2 in SSE2, it requires a higher CPU feature set.
+ if *ty != I64 {
+ let sshr_imm = sshr_imm.bind(vector(*ty, sse_vector_size));
+ e.enc_both_inferred(sshr_imm, rec_f_ib.opcodes(*opcodes).rrr(4));
+ }
+ }
+
+ // SIMD integer comparisons
+ {
+ use IntCC::*;
+ for (ty, cc, opcodes, isa_predicate) in &[
+ (I8, Equal, &PCMPEQB[..], None),
+ (I16, Equal, &PCMPEQW[..], None),
+ (I32, Equal, &PCMPEQD[..], None),
+ (I64, Equal, &PCMPEQQ[..], Some(use_sse41_simd)),
+ (I8, SignedGreaterThan, &PCMPGTB[..], None),
+ (I16, SignedGreaterThan, &PCMPGTW[..], None),
+ (I32, SignedGreaterThan, &PCMPGTD[..], None),
+ (I64, SignedGreaterThan, &PCMPGTQ, Some(use_sse42_simd)),
+ ] {
+ let instruction = icmp
+ .bind(Immediate::IntCC(*cc))
+ .bind(vector(*ty, sse_vector_size));
+ let template = rec_icscc_fpr.opcodes(opcodes);
+ e.enc_both_inferred_maybe_isap(instruction, template, *isa_predicate);
+ }
+ }
+
+ // SIMD min/max
+ for (ty, inst, opcodes, isa_predicate) in &[
+ (I8, x86_pmaxs, &PMAXSB[..], Some(use_sse41_simd)),
+ (I16, x86_pmaxs, &PMAXSW[..], None),
+ (I32, x86_pmaxs, &PMAXSD[..], Some(use_sse41_simd)),
+ (I8, x86_pmaxu, &PMAXUB[..], None),
+ (I16, x86_pmaxu, &PMAXUW[..], Some(use_sse41_simd)),
+ (I32, x86_pmaxu, &PMAXUD[..], Some(use_sse41_simd)),
+ (I8, x86_pmins, &PMINSB[..], Some(use_sse41_simd)),
+ (I16, x86_pmins, &PMINSW[..], None),
+ (I32, x86_pmins, &PMINSD[..], Some(use_sse41_simd)),
+ (I8, x86_pminu, &PMINUB[..], None),
+ (I16, x86_pminu, &PMINUW[..], Some(use_sse41_simd)),
+ (I32, x86_pminu, &PMINUD[..], Some(use_sse41_simd)),
+ ] {
+ let inst = inst.bind(vector(*ty, sse_vector_size));
+ e.enc_both_inferred_maybe_isap(inst, rec_fa.opcodes(opcodes), *isa_predicate);
+ }
+
+ // SIMD float comparisons
+ e.enc_both_inferred(
+ fcmp.bind(vector(F32, sse_vector_size)),
+ rec_pfcmp.opcodes(&CMPPS),
+ );
+ e.enc_both_inferred(
+ fcmp.bind(vector(F64, sse_vector_size)),
+ rec_pfcmp.opcodes(&CMPPD),
+ );
+
+ // SIMD float arithmetic
+ for (ty, inst, opcodes) in &[
+ (F32, fadd, &ADDPS[..]),
+ (F64, fadd, &ADDPD[..]),
+ (F32, fsub, &SUBPS[..]),
+ (F64, fsub, &SUBPD[..]),
+ (F32, fmul, &MULPS[..]),
+ (F64, fmul, &MULPD[..]),
+ (F32, fdiv, &DIVPS[..]),
+ (F64, fdiv, &DIVPD[..]),
+ (F32, x86_fmin, &MINPS[..]),
+ (F64, x86_fmin, &MINPD[..]),
+ (F32, x86_fmax, &MAXPS[..]),
+ (F64, x86_fmax, &MAXPD[..]),
+ ] {
+ let inst = inst.bind(vector(*ty, sse_vector_size));
+ e.enc_both_inferred(inst, rec_fa.opcodes(opcodes));
+ }
+ for (ty, inst, opcodes) in &[(F32, sqrt, &SQRTPS[..]), (F64, sqrt, &SQRTPD[..])] {
+ let inst = inst.bind(vector(*ty, sse_vector_size));
+ e.enc_both_inferred(inst, rec_furm.opcodes(opcodes));
+ }
+}
+
+#[inline(never)]
+fn define_entity_ref(
+ e: &mut PerCpuModeEncodings,
+ shared_defs: &SharedDefinitions,
+ settings: &SettingGroup,
+ r: &RecipeGroup,
+) {
+ let shared = &shared_defs.instructions;
+ let formats = &shared_defs.formats;
+
+ // Shorthands for instructions.
+ let const_addr = shared.by_name("const_addr");
+ let func_addr = shared.by_name("func_addr");
+ let stack_addr = shared.by_name("stack_addr");
+ let symbol_value = shared.by_name("symbol_value");
+
+ // Shorthands for recipes.
+ let rec_allones_fnaddr4 = r.template("allones_fnaddr4");
+ let rec_allones_fnaddr8 = r.template("allones_fnaddr8");
+ let rec_fnaddr4 = r.template("fnaddr4");
+ let rec_fnaddr8 = r.template("fnaddr8");
+ let rec_const_addr = r.template("const_addr");
+ let rec_got_fnaddr8 = r.template("got_fnaddr8");
+ let rec_got_gvaddr8 = r.template("got_gvaddr8");
+ let rec_gvaddr4 = r.template("gvaddr4");
+ let rec_gvaddr8 = r.template("gvaddr8");
+ let rec_pcrel_fnaddr8 = r.template("pcrel_fnaddr8");
+ let rec_pcrel_gvaddr8 = r.template("pcrel_gvaddr8");
+ let rec_spaddr_id = r.template("spaddr_id");
+
+ // Predicates shorthands.
+ let all_ones_funcaddrs_and_not_is_pic =
+ settings.predicate_by_name("all_ones_funcaddrs_and_not_is_pic");
+ let is_pic = settings.predicate_by_name("is_pic");
+ let not_all_ones_funcaddrs_and_not_is_pic =
+ settings.predicate_by_name("not_all_ones_funcaddrs_and_not_is_pic");
+ let not_is_pic = settings.predicate_by_name("not_is_pic");
+
+ // Function addresses.
+
+ // Non-PIC, all-ones funcaddresses.
+ e.enc32_isap(
+ func_addr.bind(I32),
+ rec_fnaddr4.opcodes(&MOV_IMM),
+ not_all_ones_funcaddrs_and_not_is_pic,
+ );
+ e.enc64_isap(
+ func_addr.bind(I64),
+ rec_fnaddr8.opcodes(&MOV_IMM).rex().w(),
+ not_all_ones_funcaddrs_and_not_is_pic,
+ );
+
+ // Non-PIC, all-zeros funcaddresses.
+ e.enc32_isap(
+ func_addr.bind(I32),
+ rec_allones_fnaddr4.opcodes(&MOV_IMM),
+ all_ones_funcaddrs_and_not_is_pic,
+ );
+ e.enc64_isap(
+ func_addr.bind(I64),
+ rec_allones_fnaddr8.opcodes(&MOV_IMM).rex().w(),
+ all_ones_funcaddrs_and_not_is_pic,
+ );
+
+ // 64-bit, colocated, both PIC and non-PIC. Use the lea instruction's pc-relative field.
+ let is_colocated_func =
+ InstructionPredicate::new_is_colocated_func(&*formats.func_addr, "func_ref");
+ e.enc64_instp(
+ func_addr.bind(I64),
+ rec_pcrel_fnaddr8.opcodes(&LEA).rex().w(),
+ is_colocated_func,
+ );
+
+ // 64-bit, non-colocated, PIC.
+ e.enc64_isap(
+ func_addr.bind(I64),
+ rec_got_fnaddr8.opcodes(&MOV_LOAD).rex().w(),
+ is_pic,
+ );
+
+ // Global addresses.
+
+ // Non-PIC.
+ e.enc32_isap(
+ symbol_value.bind(I32),
+ rec_gvaddr4.opcodes(&MOV_IMM),
+ not_is_pic,
+ );
+ e.enc64_isap(
+ symbol_value.bind(I64),
+ rec_gvaddr8.opcodes(&MOV_IMM).rex().w(),
+ not_is_pic,
+ );
+
+ // PIC, colocated.
+ e.enc64_func(
+ symbol_value.bind(I64),
+ rec_pcrel_gvaddr8.opcodes(&LEA).rex().w(),
+ |encoding| {
+ encoding
+ .isa_predicate(is_pic)
+ .inst_predicate(InstructionPredicate::new_is_colocated_data(formats))
+ },
+ );
+
+ // PIC, non-colocated.
+ e.enc64_isap(
+ symbol_value.bind(I64),
+ rec_got_gvaddr8.opcodes(&MOV_LOAD).rex().w(),
+ is_pic,
+ );
+
+ // Stack addresses.
+ //
+ // TODO: Add encoding rules for stack_load and stack_store, so that they
+ // don't get legalized to stack_addr + load/store.
+ e.enc64(stack_addr.bind(I64), rec_spaddr_id.opcodes(&LEA).rex().w());
+ e.enc32(stack_addr.bind(I32), rec_spaddr_id.opcodes(&LEA));
+
+ // Constant addresses (PIC).
+ e.enc64(const_addr.bind(I64), rec_const_addr.opcodes(&LEA).rex().w());
+ e.enc32(const_addr.bind(I32), rec_const_addr.opcodes(&LEA));
+}
+
+/// Control flow opcodes.
+#[inline(never)]
+fn define_control_flow(
+ e: &mut PerCpuModeEncodings,
+ shared_defs: &SharedDefinitions,
+ settings: &SettingGroup,
+ r: &RecipeGroup,
+) {
+ let shared = &shared_defs.instructions;
+ let formats = &shared_defs.formats;
+
+ // Shorthands for instructions.
+ let brff = shared.by_name("brff");
+ let brif = shared.by_name("brif");
+ let brnz = shared.by_name("brnz");
+ let brz = shared.by_name("brz");
+ let call = shared.by_name("call");
+ let call_indirect = shared.by_name("call_indirect");
+ let debugtrap = shared.by_name("debugtrap");
+ let indirect_jump_table_br = shared.by_name("indirect_jump_table_br");
+ let jump = shared.by_name("jump");
+ let jump_table_base = shared.by_name("jump_table_base");
+ let jump_table_entry = shared.by_name("jump_table_entry");
+ let return_ = shared.by_name("return");
+ let trap = shared.by_name("trap");
+ let trapff = shared.by_name("trapff");
+ let trapif = shared.by_name("trapif");
+ let resumable_trap = shared.by_name("resumable_trap");
+
+ // Shorthands for recipes.
+ let rec_brfb = r.template("brfb");
+ let rec_brfd = r.template("brfd");
+ let rec_brib = r.template("brib");
+ let rec_brid = r.template("brid");
+ let rec_call_id = r.template("call_id");
+ let rec_call_plt_id = r.template("call_plt_id");
+ let rec_call_r = r.template("call_r");
+ let rec_debugtrap = r.recipe("debugtrap");
+ let rec_indirect_jmp = r.template("indirect_jmp");
+ let rec_jmpb = r.template("jmpb");
+ let rec_jmpd = r.template("jmpd");
+ let rec_jt_base = r.template("jt_base");
+ let rec_jt_entry = r.template("jt_entry");
+ let rec_ret = r.template("ret");
+ let rec_t8jccb_abcd = r.template("t8jccb_abcd");
+ let rec_t8jccd_abcd = r.template("t8jccd_abcd");
+ let rec_t8jccd_long = r.template("t8jccd_long");
+ let rec_tjccb = r.template("tjccb");
+ let rec_tjccd = r.template("tjccd");
+ let rec_trap = r.template("trap");
+ let rec_trapif = r.recipe("trapif");
+ let rec_trapff = r.recipe("trapff");
+
+ // Predicates shorthands.
+ let is_pic = settings.predicate_by_name("is_pic");
+
+ // Call/return
+
+ // 32-bit, both PIC and non-PIC.
+ e.enc32(call, rec_call_id.opcodes(&CALL_RELATIVE));
+
+ // 64-bit, colocated, both PIC and non-PIC. Use the call instruction's pc-relative field.
+ let is_colocated_func = InstructionPredicate::new_is_colocated_func(&*formats.call, "func_ref");
+ e.enc64_instp(call, rec_call_id.opcodes(&CALL_RELATIVE), is_colocated_func);
+
+ // 64-bit, non-colocated, PIC. There is no 64-bit non-colocated non-PIC version, since non-PIC
+ // is currently using the large model, which requires calls be lowered to
+ // func_addr+call_indirect.
+ e.enc64_isap(call, rec_call_plt_id.opcodes(&CALL_RELATIVE), is_pic);
+
+ e.enc32(
+ call_indirect.bind(I32),
+ rec_call_r.opcodes(&JUMP_ABSOLUTE).rrr(2),
+ );
+ e.enc64(
+ call_indirect.bind(I64),
+ rec_call_r.opcodes(&JUMP_ABSOLUTE).rrr(2).rex(),
+ );
+ e.enc64(
+ call_indirect.bind(I64),
+ rec_call_r.opcodes(&JUMP_ABSOLUTE).rrr(2),
+ );
+
+ e.enc32(return_, rec_ret.opcodes(&RET_NEAR));
+ e.enc64(return_, rec_ret.opcodes(&RET_NEAR));
+
+ // Branches.
+ e.enc32(jump, rec_jmpb.opcodes(&JUMP_SHORT));
+ e.enc64(jump, rec_jmpb.opcodes(&JUMP_SHORT));
+ e.enc32(jump, rec_jmpd.opcodes(&JUMP_NEAR_RELATIVE));
+ e.enc64(jump, rec_jmpd.opcodes(&JUMP_NEAR_RELATIVE));
+
+ e.enc_both(brif, rec_brib.opcodes(&JUMP_SHORT_IF_OVERFLOW));
+ e.enc_both(brif, rec_brid.opcodes(&JUMP_NEAR_IF_OVERFLOW));
+
+ // Not all float condition codes are legal, see `supported_floatccs`.
+ e.enc_both(brff, rec_brfb.opcodes(&JUMP_SHORT_IF_OVERFLOW));
+ e.enc_both(brff, rec_brfd.opcodes(&JUMP_NEAR_IF_OVERFLOW));
+
+ // Note that the tjccd opcode will be prefixed with 0x0f.
+ e.enc_i32_i64_explicit_rex(brz, rec_tjccb.opcodes(&JUMP_SHORT_IF_EQUAL));
+ e.enc_i32_i64_explicit_rex(brz, rec_tjccd.opcodes(&TEST_BYTE_REG));
+ e.enc_i32_i64_explicit_rex(brnz, rec_tjccb.opcodes(&JUMP_SHORT_IF_NOT_EQUAL));
+ e.enc_i32_i64_explicit_rex(brnz, rec_tjccd.opcodes(&TEST_REG));
+
+ // Branch on a b1 value in a register only looks at the low 8 bits. See also
+ // bint encodings below.
+ //
+ // Start with the worst-case encoding for X86_32 only. The register allocator
+ // can't handle a branch with an ABCD-constrained operand.
+ e.enc32(brz.bind(B1), rec_t8jccd_long.opcodes(&TEST_BYTE_REG));
+ e.enc32(brnz.bind(B1), rec_t8jccd_long.opcodes(&TEST_REG));
+
+ e.enc_both(brz.bind(B1), rec_t8jccb_abcd.opcodes(&JUMP_SHORT_IF_EQUAL));
+ e.enc_both(brz.bind(B1), rec_t8jccd_abcd.opcodes(&TEST_BYTE_REG));
+ e.enc_both(
+ brnz.bind(B1),
+ rec_t8jccb_abcd.opcodes(&JUMP_SHORT_IF_NOT_EQUAL),
+ );
+ e.enc_both(brnz.bind(B1), rec_t8jccd_abcd.opcodes(&TEST_REG));
+
+ // Jump tables.
+ e.enc64(
+ jump_table_entry.bind(I64),
+ rec_jt_entry.opcodes(&MOVSXD).rex().w(),
+ );
+ e.enc32(jump_table_entry.bind(I32), rec_jt_entry.opcodes(&MOV_LOAD));
+
+ e.enc64(
+ jump_table_base.bind(I64),
+ rec_jt_base.opcodes(&LEA).rex().w(),
+ );
+ e.enc32(jump_table_base.bind(I32), rec_jt_base.opcodes(&LEA));
+
+ e.enc_x86_64(
+ indirect_jump_table_br.bind(I64),
+ rec_indirect_jmp.opcodes(&JUMP_ABSOLUTE).rrr(4),
+ );
+ e.enc32(
+ indirect_jump_table_br.bind(I32),
+ rec_indirect_jmp.opcodes(&JUMP_ABSOLUTE).rrr(4),
+ );
+
+ // Trap as ud2
+ e.enc32(trap, rec_trap.opcodes(&UNDEFINED2));
+ e.enc64(trap, rec_trap.opcodes(&UNDEFINED2));
+ e.enc32(resumable_trap, rec_trap.opcodes(&UNDEFINED2));
+ e.enc64(resumable_trap, rec_trap.opcodes(&UNDEFINED2));
+
+ // Debug trap as int3
+ e.enc32_rec(debugtrap, rec_debugtrap, 0);
+ e.enc64_rec(debugtrap, rec_debugtrap, 0);
+
+ e.enc32_rec(trapif, rec_trapif, 0);
+ e.enc64_rec(trapif, rec_trapif, 0);
+ e.enc32_rec(trapff, rec_trapff, 0);
+ e.enc64_rec(trapff, rec_trapff, 0);
+}
+
+/// Reference type instructions.
+#[inline(never)]
+fn define_reftypes(e: &mut PerCpuModeEncodings, shared_defs: &SharedDefinitions, r: &RecipeGroup) {
+ let shared = &shared_defs.instructions;
+
+ let is_null = shared.by_name("is_null");
+ let is_invalid = shared.by_name("is_invalid");
+ let null = shared.by_name("null");
+ let safepoint = shared.by_name("safepoint");
+
+ let rec_is_zero = r.template("is_zero");
+ let rec_is_invalid = r.template("is_invalid");
+ let rec_pu_id_ref = r.template("pu_id_ref");
+ let rec_safepoint = r.recipe("safepoint");
+
+ // Null references implemented as iconst 0.
+ e.enc32(null.bind(R32), rec_pu_id_ref.opcodes(&MOV_IMM));
+
+ e.enc64(null.bind(R64), rec_pu_id_ref.rex().opcodes(&MOV_IMM));
+ e.enc64(null.bind(R64), rec_pu_id_ref.opcodes(&MOV_IMM));
+
+ // is_null, implemented by testing whether the value is 0.
+ e.enc_r32_r64_rex_only(is_null, rec_is_zero.opcodes(&TEST_REG));
+
+ // is_invalid, implemented by testing whether the value is -1.
+ e.enc_r32_r64_rex_only(is_invalid, rec_is_invalid.opcodes(&CMP_IMM8).rrr(7));
+
+ // safepoint instruction calls sink, no actual encoding.
+ e.enc32_rec(safepoint, rec_safepoint, 0);
+ e.enc64_rec(safepoint, rec_safepoint, 0);
+}
+
+#[allow(clippy::cognitive_complexity)]
+pub(crate) fn define(
+ shared_defs: &SharedDefinitions,
+ settings: &SettingGroup,
+ x86: &InstructionGroup,
+ r: &RecipeGroup,
+) -> PerCpuModeEncodings {
+ // Definitions.
+ let mut e = PerCpuModeEncodings::new();
+
+ define_moves(&mut e, shared_defs, r);
+ define_memory(&mut e, shared_defs, x86, r);
+ define_fpu_moves(&mut e, shared_defs, r);
+ define_fpu_memory(&mut e, shared_defs, r);
+ define_fpu_ops(&mut e, shared_defs, settings, x86, r);
+ define_alu(&mut e, shared_defs, settings, x86, r);
+ define_simd(&mut e, shared_defs, settings, x86, r);
+ define_entity_ref(&mut e, shared_defs, settings, r);
+ define_control_flow(&mut e, shared_defs, settings, r);
+ define_reftypes(&mut e, shared_defs, r);
+
+ let x86_elf_tls_get_addr = x86.by_name("x86_elf_tls_get_addr");
+ let x86_macho_tls_get_addr = x86.by_name("x86_macho_tls_get_addr");
+
+ let rec_elf_tls_get_addr = r.recipe("elf_tls_get_addr");
+ let rec_macho_tls_get_addr = r.recipe("macho_tls_get_addr");
+
+ e.enc64_rec(x86_elf_tls_get_addr, rec_elf_tls_get_addr, 0);
+ e.enc64_rec(x86_macho_tls_get_addr, rec_macho_tls_get_addr, 0);
+
+ e
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/isa/x86/instructions.rs b/third_party/rust/cranelift-codegen-meta/src/isa/x86/instructions.rs
new file mode 100644
index 0000000000..7acd2e2c50
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/isa/x86/instructions.rs
@@ -0,0 +1,723 @@
+#![allow(non_snake_case)]
+
+use crate::cdsl::instructions::{
+ AllInstructions, InstructionBuilder as Inst, InstructionGroup, InstructionGroupBuilder,
+};
+use crate::cdsl::operands::Operand;
+use crate::cdsl::types::ValueType;
+use crate::cdsl::typevar::{Interval, TypeSetBuilder, TypeVar};
+use crate::shared::entities::EntityRefs;
+use crate::shared::formats::Formats;
+use crate::shared::immediates::Immediates;
+use crate::shared::types;
+
+#[allow(clippy::many_single_char_names)]
+pub(crate) fn define(
+ mut all_instructions: &mut AllInstructions,
+ formats: &Formats,
+ immediates: &Immediates,
+ entities: &EntityRefs,
+) -> InstructionGroup {
+ let mut ig = InstructionGroupBuilder::new(&mut all_instructions);
+
+ let iflags: &TypeVar = &ValueType::Special(types::Flag::IFlags.into()).into();
+
+ let iWord = &TypeVar::new(
+ "iWord",
+ "A scalar integer machine word",
+ TypeSetBuilder::new().ints(32..64).build(),
+ );
+ let nlo = &Operand::new("nlo", iWord).with_doc("Low part of numerator");
+ let nhi = &Operand::new("nhi", iWord).with_doc("High part of numerator");
+ let d = &Operand::new("d", iWord).with_doc("Denominator");
+ let q = &Operand::new("q", iWord).with_doc("Quotient");
+ let r = &Operand::new("r", iWord).with_doc("Remainder");
+
+ ig.push(
+ Inst::new(
+ "x86_udivmodx",
+ r#"
+ Extended unsigned division.
+
+ Concatenate the bits in `nhi` and `nlo` to form the numerator.
+ Interpret the bits as an unsigned number and divide by the unsigned
+ denominator `d`. Trap when `d` is zero or if the quotient is larger
+ than the range of the output.
+
+ Return both quotient and remainder.
+ "#,
+ &formats.ternary,
+ )
+ .operands_in(vec![nlo, nhi, d])
+ .operands_out(vec![q, r])
+ .can_trap(true),
+ );
+
+ ig.push(
+ Inst::new(
+ "x86_sdivmodx",
+ r#"
+ Extended signed division.
+
+ Concatenate the bits in `nhi` and `nlo` to form the numerator.
+ Interpret the bits as a signed number and divide by the signed
+ denominator `d`. Trap when `d` is zero or if the quotient is outside
+ the range of the output.
+
+ Return both quotient and remainder.
+ "#,
+ &formats.ternary,
+ )
+ .operands_in(vec![nlo, nhi, d])
+ .operands_out(vec![q, r])
+ .can_trap(true),
+ );
+
+ let argL = &Operand::new("argL", iWord);
+ let argR = &Operand::new("argR", iWord);
+ let resLo = &Operand::new("resLo", iWord);
+ let resHi = &Operand::new("resHi", iWord);
+
+ ig.push(
+ Inst::new(
+ "x86_umulx",
+ r#"
+ Unsigned integer multiplication, producing a double-length result.
+
+ Polymorphic over all scalar integer types, but does not support vector
+ types.
+ "#,
+ &formats.binary,
+ )
+ .operands_in(vec![argL, argR])
+ .operands_out(vec![resLo, resHi]),
+ );
+
+ ig.push(
+ Inst::new(
+ "x86_smulx",
+ r#"
+ Signed integer multiplication, producing a double-length result.
+
+ Polymorphic over all scalar integer types, but does not support vector
+ types.
+ "#,
+ &formats.binary,
+ )
+ .operands_in(vec![argL, argR])
+ .operands_out(vec![resLo, resHi]),
+ );
+
+ let Float = &TypeVar::new(
+ "Float",
+ "A scalar or vector floating point number",
+ TypeSetBuilder::new()
+ .floats(Interval::All)
+ .simd_lanes(Interval::All)
+ .build(),
+ );
+ let IntTo = &TypeVar::new(
+ "IntTo",
+ "An integer type with the same number of lanes",
+ TypeSetBuilder::new()
+ .ints(32..64)
+ .simd_lanes(Interval::All)
+ .build(),
+ );
+ let x = &Operand::new("x", Float);
+ let a = &Operand::new("a", IntTo);
+
+ ig.push(
+ Inst::new(
+ "x86_cvtt2si",
+ r#"
+ Convert with truncation floating point to signed integer.
+
+ The source floating point operand is converted to a signed integer by
+ rounding towards zero. If the result can't be represented in the output
+ type, returns the smallest signed value the output type can represent.
+
+ This instruction does not trap.
+ "#,
+ &formats.unary,
+ )
+ .operands_in(vec![x])
+ .operands_out(vec![a]),
+ );
+
+ let f32x4 = &TypeVar::new(
+ "f32x4",
+ "A floating point number",
+ TypeSetBuilder::new()
+ .floats(32..32)
+ .simd_lanes(4..4)
+ .build(),
+ );
+ let i32x4 = &TypeVar::new(
+ "i32x4",
+ "An integer type with the same number of lanes",
+ TypeSetBuilder::new().ints(32..32).simd_lanes(4..4).build(),
+ );
+ let x = &Operand::new("x", i32x4);
+ let a = &Operand::new("a", f32x4);
+
+ ig.push(
+ Inst::new(
+ "x86_vcvtudq2ps",
+ r#"
+ Convert unsigned integer to floating point.
+
+ Convert packed doubleword unsigned integers to packed single-precision floating-point
+ values. This instruction does not trap.
+ "#,
+ &formats.unary,
+ )
+ .operands_in(vec![x])
+ .operands_out(vec![a]),
+ );
+
+ let x = &Operand::new("x", Float);
+ let a = &Operand::new("a", Float);
+ let y = &Operand::new("y", Float);
+
+ ig.push(
+ Inst::new(
+ "x86_fmin",
+ r#"
+ Floating point minimum with x86 semantics.
+
+ This is equivalent to the C ternary operator `x < y ? x : y` which
+ differs from `fmin` when either operand is NaN or when comparing
+ +0.0 to -0.0.
+
+ When the two operands don't compare as LT, `y` is returned unchanged,
+ even if it is a signalling NaN.
+ "#,
+ &formats.binary,
+ )
+ .operands_in(vec![x, y])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "x86_fmax",
+ r#"
+ Floating point maximum with x86 semantics.
+
+ This is equivalent to the C ternary operator `x > y ? x : y` which
+ differs from `fmax` when either operand is NaN or when comparing
+ +0.0 to -0.0.
+
+ When the two operands don't compare as GT, `y` is returned unchanged,
+ even if it is a signalling NaN.
+ "#,
+ &formats.binary,
+ )
+ .operands_in(vec![x, y])
+ .operands_out(vec![a]),
+ );
+
+ let x = &Operand::new("x", iWord);
+
+ ig.push(
+ Inst::new(
+ "x86_push",
+ r#"
+ Pushes a value onto the stack.
+
+ Decrements the stack pointer and stores the specified value on to the top.
+
+ This is polymorphic in i32 and i64. However, it is only implemented for i64
+ in 64-bit mode, and only for i32 in 32-bit mode.
+ "#,
+ &formats.unary,
+ )
+ .operands_in(vec![x])
+ .other_side_effects(true)
+ .can_store(true),
+ );
+
+ ig.push(
+ Inst::new(
+ "x86_pop",
+ r#"
+ Pops a value from the stack.
+
+ Loads a value from the top of the stack and then increments the stack
+ pointer.
+
+ This is polymorphic in i32 and i64. However, it is only implemented for i64
+ in 64-bit mode, and only for i32 in 32-bit mode.
+ "#,
+ &formats.nullary,
+ )
+ .operands_out(vec![x])
+ .other_side_effects(true)
+ .can_load(true),
+ );
+
+ let y = &Operand::new("y", iWord);
+ let rflags = &Operand::new("rflags", iflags);
+
+ ig.push(
+ Inst::new(
+ "x86_bsr",
+ r#"
+ Bit Scan Reverse -- returns the bit-index of the most significant 1
+ in the word. Result is undefined if the argument is zero. However, it
+ sets the Z flag depending on the argument, so it is at least easy to
+ detect and handle that case.
+
+ This is polymorphic in i32 and i64. It is implemented for both i64 and
+ i32 in 64-bit mode, and only for i32 in 32-bit mode.
+ "#,
+ &formats.unary,
+ )
+ .operands_in(vec![x])
+ .operands_out(vec![y, rflags]),
+ );
+
+ ig.push(
+ Inst::new(
+ "x86_bsf",
+ r#"
+ Bit Scan Forwards -- returns the bit-index of the least significant 1
+ in the word. Is otherwise identical to 'bsr', just above.
+ "#,
+ &formats.unary,
+ )
+ .operands_in(vec![x])
+ .operands_out(vec![y, rflags]),
+ );
+
+ let uimm8 = &immediates.uimm8;
+ let TxN = &TypeVar::new(
+ "TxN",
+ "A SIMD vector type",
+ TypeSetBuilder::new()
+ .ints(Interval::All)
+ .floats(Interval::All)
+ .bools(Interval::All)
+ .simd_lanes(Interval::All)
+ .includes_scalars(false)
+ .build(),
+ );
+ let a = &Operand::new("a", TxN).with_doc("A vector value (i.e. held in an XMM register)");
+ let b = &Operand::new("b", TxN).with_doc("A vector value (i.e. held in an XMM register)");
+ let i = &Operand::new("i", uimm8).with_doc("An ordering operand controlling the copying of data from the source to the destination; see PSHUFD in Intel manual for details");
+
+ ig.push(
+ Inst::new(
+ "x86_pshufd",
+ r#"
+ Packed Shuffle Doublewords -- copies data from either memory or lanes in an extended
+ register and re-orders the data according to the passed immediate byte.
+ "#,
+ &formats.binary_imm8,
+ )
+ .operands_in(vec![a, i]) // TODO allow copying from memory here (need more permissive type than TxN)
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "x86_pshufb",
+ r#"
+ Packed Shuffle Bytes -- re-orders data in an extended register using a shuffle
+ mask from either memory or another extended register
+ "#,
+ &formats.binary,
+ )
+ .operands_in(vec![a, b]) // TODO allow re-ordering from memory here (need more permissive type than TxN)
+ .operands_out(vec![a]),
+ );
+
+ let mask = &Operand::new("mask", uimm8).with_doc("mask to select lanes from b");
+ ig.push(
+ Inst::new(
+ "x86_pblendw",
+ r#"
+ Blend packed words using an immediate mask. Each bit of the 8-bit immediate corresponds to a
+ lane in ``b``: if the bit is set, the lane is copied into ``a``.
+ "#,
+ &formats.ternary_imm8,
+ )
+ .operands_in(vec![a, b, mask])
+ .operands_out(vec![a]),
+ );
+
+ let Idx = &Operand::new("Idx", uimm8).with_doc("Lane index");
+ let x = &Operand::new("x", TxN);
+ let a = &Operand::new("a", &TxN.lane_of());
+
+ ig.push(
+ Inst::new(
+ "x86_pextr",
+ r#"
+ Extract lane ``Idx`` from ``x``.
+ The lane index, ``Idx``, is an immediate value, not an SSA value. It
+ must indicate a valid lane index for the type of ``x``.
+ "#,
+ &formats.binary_imm8,
+ )
+ .operands_in(vec![x, Idx])
+ .operands_out(vec![a]),
+ );
+
+ let IBxN = &TypeVar::new(
+ "IBxN",
+ "A SIMD vector type containing only booleans and integers",
+ TypeSetBuilder::new()
+ .ints(Interval::All)
+ .bools(Interval::All)
+ .simd_lanes(Interval::All)
+ .includes_scalars(false)
+ .build(),
+ );
+ let x = &Operand::new("x", IBxN);
+ let y = &Operand::new("y", &IBxN.lane_of()).with_doc("New lane value");
+ let a = &Operand::new("a", IBxN);
+
+ ig.push(
+ Inst::new(
+ "x86_pinsr",
+ r#"
+ Insert ``y`` into ``x`` at lane ``Idx``.
+ The lane index, ``Idx``, is an immediate value, not an SSA value. It
+ must indicate a valid lane index for the type of ``x``.
+ "#,
+ &formats.ternary_imm8,
+ )
+ .operands_in(vec![x, y, Idx])
+ .operands_out(vec![a]),
+ );
+
+ let FxN = &TypeVar::new(
+ "FxN",
+ "A SIMD vector type containing floats",
+ TypeSetBuilder::new()
+ .floats(Interval::All)
+ .simd_lanes(Interval::All)
+ .includes_scalars(false)
+ .build(),
+ );
+ let x = &Operand::new("x", FxN);
+ let y = &Operand::new("y", &FxN.lane_of()).with_doc("New lane value");
+ let a = &Operand::new("a", FxN);
+
+ ig.push(
+ Inst::new(
+ "x86_insertps",
+ r#"
+ Insert a lane of ``y`` into ``x`` at using ``Idx`` to encode both which lane the value is
+ extracted from and which it is inserted to. This is similar to x86_pinsr but inserts
+ floats, which are already stored in an XMM register.
+ "#,
+ &formats.ternary_imm8,
+ )
+ .operands_in(vec![x, y, Idx])
+ .operands_out(vec![a]),
+ );
+
+ let x = &Operand::new("x", TxN);
+ let y = &Operand::new("y", TxN);
+ let a = &Operand::new("a", TxN);
+
+ ig.push(
+ Inst::new(
+ "x86_punpckh",
+ r#"
+ Unpack the high-order lanes of ``x`` and ``y`` and interleave into ``a``. With notional
+ i8x4 vectors, where ``x = [x3, x2, x1, x0]`` and ``y = [y3, y2, y1, y0]``, this operation
+ would result in ``a = [y3, x3, y2, x2]`` (using the Intel manual's right-to-left lane
+ ordering).
+ "#,
+ &formats.binary,
+ )
+ .operands_in(vec![x, y])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "x86_punpckl",
+ r#"
+ Unpack the low-order lanes of ``x`` and ``y`` and interleave into ``a``. With notional
+ i8x4 vectors, where ``x = [x3, x2, x1, x0]`` and ``y = [y3, y2, y1, y0]``, this operation
+ would result in ``a = [y1, x1, y0, x0]`` (using the Intel manual's right-to-left lane
+ ordering).
+ "#,
+ &formats.binary,
+ )
+ .operands_in(vec![x, y])
+ .operands_out(vec![a]),
+ );
+
+ let x = &Operand::new("x", FxN);
+ let y = &Operand::new("y", FxN);
+ let a = &Operand::new("a", FxN);
+
+ ig.push(
+ Inst::new(
+ "x86_movsd",
+ r#"
+ Move the low 64 bits of the float vector ``y`` to the low 64 bits of float vector ``x``
+ "#,
+ &formats.binary,
+ )
+ .operands_in(vec![x, y])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "x86_movlhps",
+ r#"
+ Move the low 64 bits of the float vector ``y`` to the high 64 bits of float vector ``x``
+ "#,
+ &formats.binary,
+ )
+ .operands_in(vec![x, y])
+ .operands_out(vec![a]),
+ );
+
+ let IxN = &TypeVar::new(
+ "IxN",
+ "A SIMD vector type containing integers",
+ TypeSetBuilder::new()
+ .ints(Interval::All)
+ .simd_lanes(Interval::All)
+ .includes_scalars(false)
+ .build(),
+ );
+ let I128 = &TypeVar::new(
+ "I128",
+ "A SIMD vector type containing one large integer (due to Cranelift type constraints, \
+ this uses the Cranelift I64X2 type but should be understood as one large value, i.e., the \
+ upper lane is concatenated with the lower lane to form the integer)",
+ TypeSetBuilder::new()
+ .ints(64..64)
+ .simd_lanes(2..2)
+ .includes_scalars(false)
+ .build(),
+ );
+
+ let x = &Operand::new("x", IxN).with_doc("Vector value to shift");
+ let y = &Operand::new("y", I128).with_doc("Number of bits to shift");
+ let a = &Operand::new("a", IxN);
+
+ ig.push(
+ Inst::new(
+ "x86_psll",
+ r#"
+ Shift Packed Data Left Logical -- This implements the behavior of the shared instruction
+ ``ishl`` but alters the shift operand to live in an XMM register as expected by the PSLL*
+ family of instructions.
+ "#,
+ &formats.binary,
+ )
+ .operands_in(vec![x, y])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "x86_psrl",
+ r#"
+ Shift Packed Data Right Logical -- This implements the behavior of the shared instruction
+ ``ushr`` but alters the shift operand to live in an XMM register as expected by the PSRL*
+ family of instructions.
+ "#,
+ &formats.binary,
+ )
+ .operands_in(vec![x, y])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "x86_psra",
+ r#"
+ Shift Packed Data Right Arithmetic -- This implements the behavior of the shared
+ instruction ``sshr`` but alters the shift operand to live in an XMM register as expected by
+ the PSRA* family of instructions.
+ "#,
+ &formats.binary,
+ )
+ .operands_in(vec![x, y])
+ .operands_out(vec![a]),
+ );
+
+ let I64x2 = &TypeVar::new(
+ "I64x2",
+ "A SIMD vector type containing two 64-bit integers",
+ TypeSetBuilder::new()
+ .ints(64..64)
+ .simd_lanes(2..2)
+ .includes_scalars(false)
+ .build(),
+ );
+
+ let x = &Operand::new("x", I64x2);
+ let y = &Operand::new("y", I64x2);
+ let a = &Operand::new("a", I64x2);
+ ig.push(
+ Inst::new(
+ "x86_pmullq",
+ r#"
+ Multiply Packed Integers -- Multiply two 64x2 integers and receive a 64x2 result with
+ lane-wise wrapping if the result overflows. This instruction is necessary to add distinct
+ encodings for CPUs with newer vector features.
+ "#,
+ &formats.binary,
+ )
+ .operands_in(vec![x, y])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "x86_pmuludq",
+ r#"
+ Multiply Packed Integers -- Using only the bottom 32 bits in each lane, multiply two 64x2
+ unsigned integers and receive a 64x2 result. This instruction avoids the need for handling
+ overflow as in `x86_pmullq`.
+ "#,
+ &formats.binary,
+ )
+ .operands_in(vec![x, y])
+ .operands_out(vec![a]),
+ );
+
+ let x = &Operand::new("x", TxN);
+ let y = &Operand::new("y", TxN);
+ let f = &Operand::new("f", iflags);
+ ig.push(
+ Inst::new(
+ "x86_ptest",
+ r#"
+ Logical Compare -- PTEST will set the ZF flag if all bits in the result are 0 of the
+ bitwise AND of the first source operand (first operand) and the second source operand
+ (second operand). PTEST sets the CF flag if all bits in the result are 0 of the bitwise
+ AND of the second source operand (second operand) and the logical NOT of the destination
+ operand (first operand).
+ "#,
+ &formats.binary,
+ )
+ .operands_in(vec![x, y])
+ .operands_out(vec![f]),
+ );
+
+ let x = &Operand::new("x", IxN);
+ let y = &Operand::new("y", IxN);
+ let a = &Operand::new("a", IxN);
+ ig.push(
+ Inst::new(
+ "x86_pmaxs",
+ r#"
+ Maximum of Packed Signed Integers -- Compare signed integers in the first and second
+ operand and return the maximum values.
+ "#,
+ &formats.binary,
+ )
+ .operands_in(vec![x, y])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "x86_pmaxu",
+ r#"
+ Maximum of Packed Unsigned Integers -- Compare unsigned integers in the first and second
+ operand and return the maximum values.
+ "#,
+ &formats.binary,
+ )
+ .operands_in(vec![x, y])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "x86_pmins",
+ r#"
+ Minimum of Packed Signed Integers -- Compare signed integers in the first and second
+ operand and return the minimum values.
+ "#,
+ &formats.binary,
+ )
+ .operands_in(vec![x, y])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "x86_pminu",
+ r#"
+ Minimum of Packed Unsigned Integers -- Compare unsigned integers in the first and second
+ operand and return the minimum values.
+ "#,
+ &formats.binary,
+ )
+ .operands_in(vec![x, y])
+ .operands_out(vec![a]),
+ );
+
+ let c = &Operand::new("c", uimm8)
+ .with_doc("The number of bytes to shift right; see PALIGNR in Intel manual for details");
+ ig.push(
+ Inst::new(
+ "x86_palignr",
+ r#"
+ Concatenate destination and source operands, extracting a byte-aligned result shifted to
+ the right by `c`.
+ "#,
+ &formats.ternary_imm8,
+ )
+ .operands_in(vec![x, y, c])
+ .operands_out(vec![a]),
+ );
+
+ let i64_t = &TypeVar::new(
+ "i64_t",
+ "A scalar 64bit integer",
+ TypeSetBuilder::new().ints(64..64).build(),
+ );
+
+ let GV = &Operand::new("GV", &entities.global_value);
+ let addr = &Operand::new("addr", i64_t);
+
+ ig.push(
+ Inst::new(
+ "x86_elf_tls_get_addr",
+ r#"
+ Elf tls get addr -- This implements the GD TLS model for ELF. The clobber output should
+ not be used.
+ "#,
+ &formats.unary_global_value,
+ )
+ // This is a bit overly broad to mark as clobbering *all* the registers, because it should
+ // only preserve caller-saved registers. There's no way to indicate this to register
+ // allocation yet, though, so mark as clobbering all registers instead.
+ .clobbers_all_regs(true)
+ .operands_in(vec![GV])
+ .operands_out(vec![addr]),
+ );
+ ig.push(
+ Inst::new(
+ "x86_macho_tls_get_addr",
+ r#"
+ Mach-O tls get addr -- This implements TLS access for Mach-O. The clobber output should
+ not be used.
+ "#,
+ &formats.unary_global_value,
+ )
+ // See above comment for x86_elf_tls_get_addr.
+ .clobbers_all_regs(true)
+ .operands_in(vec![GV])
+ .operands_out(vec![addr]),
+ );
+
+ ig.build()
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/isa/x86/legalize.rs b/third_party/rust/cranelift-codegen-meta/src/isa/x86/legalize.rs
new file mode 100644
index 0000000000..681b3104d5
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/isa/x86/legalize.rs
@@ -0,0 +1,829 @@
+use crate::cdsl::ast::{constant, var, ExprBuilder, Literal};
+use crate::cdsl::instructions::{vector, Bindable, InstructionGroup};
+use crate::cdsl::types::{LaneType, ValueType};
+use crate::cdsl::xform::TransformGroupBuilder;
+use crate::shared::types::Float::{F32, F64};
+use crate::shared::types::Int::{I16, I32, I64, I8};
+use crate::shared::Definitions as SharedDefinitions;
+
+#[allow(clippy::many_single_char_names)]
+pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &InstructionGroup) {
+ let mut expand = TransformGroupBuilder::new(
+ "x86_expand",
+ r#"
+ Legalize instructions by expansion.
+
+ Use x86-specific instructions if needed."#,
+ )
+ .isa("x86")
+ .chain_with(shared.transform_groups.by_name("expand_flags").id);
+
+ let mut narrow = TransformGroupBuilder::new(
+ "x86_narrow",
+ r#"
+ Legalize instructions by narrowing.
+
+ Use x86-specific instructions if needed."#,
+ )
+ .isa("x86")
+ .chain_with(shared.transform_groups.by_name("narrow_flags").id);
+
+ let mut narrow_avx = TransformGroupBuilder::new(
+ "x86_narrow_avx",
+ r#"
+ Legalize instructions by narrowing with CPU feature checks.
+
+ This special case converts using x86 AVX instructions where available."#,
+ )
+ .isa("x86");
+ // We cannot chain with the x86_narrow group until this group is built, see bottom of this
+ // function for where this is chained.
+
+ let mut widen = TransformGroupBuilder::new(
+ "x86_widen",
+ r#"
+ Legalize instructions by widening.
+
+ Use x86-specific instructions if needed."#,
+ )
+ .isa("x86")
+ .chain_with(shared.transform_groups.by_name("widen").id);
+
+ // List of instructions.
+ let insts = &shared.instructions;
+ let band = insts.by_name("band");
+ let bor = insts.by_name("bor");
+ let clz = insts.by_name("clz");
+ let ctz = insts.by_name("ctz");
+ let fcmp = insts.by_name("fcmp");
+ let fcvt_from_uint = insts.by_name("fcvt_from_uint");
+ let fcvt_to_sint = insts.by_name("fcvt_to_sint");
+ let fcvt_to_uint = insts.by_name("fcvt_to_uint");
+ let fcvt_to_sint_sat = insts.by_name("fcvt_to_sint_sat");
+ let fcvt_to_uint_sat = insts.by_name("fcvt_to_uint_sat");
+ let fmax = insts.by_name("fmax");
+ let fmin = insts.by_name("fmin");
+ let iadd = insts.by_name("iadd");
+ let iconst = insts.by_name("iconst");
+ let imul = insts.by_name("imul");
+ let ineg = insts.by_name("ineg");
+ let isub = insts.by_name("isub");
+ let ishl = insts.by_name("ishl");
+ let ireduce = insts.by_name("ireduce");
+ let popcnt = insts.by_name("popcnt");
+ let sdiv = insts.by_name("sdiv");
+ let selectif = insts.by_name("selectif");
+ let smulhi = insts.by_name("smulhi");
+ let srem = insts.by_name("srem");
+ let tls_value = insts.by_name("tls_value");
+ let udiv = insts.by_name("udiv");
+ let umulhi = insts.by_name("umulhi");
+ let ushr = insts.by_name("ushr");
+ let ushr_imm = insts.by_name("ushr_imm");
+ let urem = insts.by_name("urem");
+
+ let x86_bsf = x86_instructions.by_name("x86_bsf");
+ let x86_bsr = x86_instructions.by_name("x86_bsr");
+ let x86_umulx = x86_instructions.by_name("x86_umulx");
+ let x86_smulx = x86_instructions.by_name("x86_smulx");
+
+ let imm = &shared.imm;
+
+ // Shift by a 64-bit amount is equivalent to a shift by that amount mod 32, so we can reduce
+ // the size of the shift amount. This is useful for x86_32, where an I64 shift amount is
+ // not encodable.
+ let a = var("a");
+ let x = var("x");
+ let y = var("y");
+ let z = var("z");
+
+ for &ty in &[I8, I16, I32] {
+ let ishl_by_i64 = ishl.bind(ty).bind(I64);
+ let ireduce = ireduce.bind(I32);
+ expand.legalize(
+ def!(a = ishl_by_i64(x, y)),
+ vec![def!(z = ireduce(y)), def!(a = ishl(x, z))],
+ );
+ }
+
+ for &ty in &[I8, I16, I32] {
+ let ushr_by_i64 = ushr.bind(ty).bind(I64);
+ let ireduce = ireduce.bind(I32);
+ expand.legalize(
+ def!(a = ushr_by_i64(x, y)),
+ vec![def!(z = ireduce(y)), def!(a = ishl(x, z))],
+ );
+ }
+
+ // Division and remainder.
+ //
+ // The srem expansion requires custom code because srem INT_MIN, -1 is not
+ // allowed to trap. The other ops need to check avoid_div_traps.
+ expand.custom_legalize(sdiv, "expand_sdivrem");
+ expand.custom_legalize(srem, "expand_sdivrem");
+ expand.custom_legalize(udiv, "expand_udivrem");
+ expand.custom_legalize(urem, "expand_udivrem");
+
+ // Double length (widening) multiplication.
+ let a = var("a");
+ let x = var("x");
+ let y = var("y");
+ let a1 = var("a1");
+ let a2 = var("a2");
+ let res_lo = var("res_lo");
+ let res_hi = var("res_hi");
+
+ expand.legalize(
+ def!(res_hi = umulhi(x, y)),
+ vec![def!((res_lo, res_hi) = x86_umulx(x, y))],
+ );
+
+ expand.legalize(
+ def!(res_hi = smulhi(x, y)),
+ vec![def!((res_lo, res_hi) = x86_smulx(x, y))],
+ );
+
+ // Floating point condition codes.
+ //
+ // The 8 condition codes in `supported_floatccs` are directly supported by a
+ // `ucomiss` or `ucomisd` instruction. The remaining codes need legalization
+ // patterns.
+
+ let floatcc_eq = Literal::enumerator_for(&imm.floatcc, "eq");
+ let floatcc_ord = Literal::enumerator_for(&imm.floatcc, "ord");
+ let floatcc_ueq = Literal::enumerator_for(&imm.floatcc, "ueq");
+ let floatcc_ne = Literal::enumerator_for(&imm.floatcc, "ne");
+ let floatcc_uno = Literal::enumerator_for(&imm.floatcc, "uno");
+ let floatcc_one = Literal::enumerator_for(&imm.floatcc, "one");
+
+ // Equality needs an explicit `ord` test which checks the parity bit.
+ expand.legalize(
+ def!(a = fcmp(floatcc_eq, x, y)),
+ vec![
+ def!(a1 = fcmp(floatcc_ord, x, y)),
+ def!(a2 = fcmp(floatcc_ueq, x, y)),
+ def!(a = band(a1, a2)),
+ ],
+ );
+ expand.legalize(
+ def!(a = fcmp(floatcc_ne, x, y)),
+ vec![
+ def!(a1 = fcmp(floatcc_uno, x, y)),
+ def!(a2 = fcmp(floatcc_one, x, y)),
+ def!(a = bor(a1, a2)),
+ ],
+ );
+
+ let floatcc_lt = &Literal::enumerator_for(&imm.floatcc, "lt");
+ let floatcc_gt = &Literal::enumerator_for(&imm.floatcc, "gt");
+ let floatcc_le = &Literal::enumerator_for(&imm.floatcc, "le");
+ let floatcc_ge = &Literal::enumerator_for(&imm.floatcc, "ge");
+ let floatcc_ugt = &Literal::enumerator_for(&imm.floatcc, "ugt");
+ let floatcc_ult = &Literal::enumerator_for(&imm.floatcc, "ult");
+ let floatcc_uge = &Literal::enumerator_for(&imm.floatcc, "uge");
+ let floatcc_ule = &Literal::enumerator_for(&imm.floatcc, "ule");
+
+ // Inequalities that need to be reversed.
+ for &(cc, rev_cc) in &[
+ (floatcc_lt, floatcc_gt),
+ (floatcc_le, floatcc_ge),
+ (floatcc_ugt, floatcc_ult),
+ (floatcc_uge, floatcc_ule),
+ ] {
+ expand.legalize(def!(a = fcmp(cc, x, y)), vec![def!(a = fcmp(rev_cc, y, x))]);
+ }
+
+ // We need to modify the CFG for min/max legalization.
+ expand.custom_legalize(fmin, "expand_minmax");
+ expand.custom_legalize(fmax, "expand_minmax");
+
+ // Conversions from unsigned need special handling.
+ expand.custom_legalize(fcvt_from_uint, "expand_fcvt_from_uint");
+ // Conversions from float to int can trap and modify the control flow graph.
+ expand.custom_legalize(fcvt_to_sint, "expand_fcvt_to_sint");
+ expand.custom_legalize(fcvt_to_uint, "expand_fcvt_to_uint");
+ expand.custom_legalize(fcvt_to_sint_sat, "expand_fcvt_to_sint_sat");
+ expand.custom_legalize(fcvt_to_uint_sat, "expand_fcvt_to_uint_sat");
+
+ // Count leading and trailing zeroes, for baseline x86_64
+ let c_minus_one = var("c_minus_one");
+ let c_thirty_one = var("c_thirty_one");
+ let c_thirty_two = var("c_thirty_two");
+ let c_sixty_three = var("c_sixty_three");
+ let c_sixty_four = var("c_sixty_four");
+ let index1 = var("index1");
+ let r2flags = var("r2flags");
+ let index2 = var("index2");
+
+ let intcc_eq = Literal::enumerator_for(&imm.intcc, "eq");
+ let imm64_minus_one = Literal::constant(&imm.imm64, -1);
+ let imm64_63 = Literal::constant(&imm.imm64, 63);
+ expand.legalize(
+ def!(a = clz.I64(x)),
+ vec![
+ def!(c_minus_one = iconst(imm64_minus_one)),
+ def!(c_sixty_three = iconst(imm64_63)),
+ def!((index1, r2flags) = x86_bsr(x)),
+ def!(index2 = selectif(intcc_eq, r2flags, c_minus_one, index1)),
+ def!(a = isub(c_sixty_three, index2)),
+ ],
+ );
+
+ let imm64_31 = Literal::constant(&imm.imm64, 31);
+ expand.legalize(
+ def!(a = clz.I32(x)),
+ vec![
+ def!(c_minus_one = iconst(imm64_minus_one)),
+ def!(c_thirty_one = iconst(imm64_31)),
+ def!((index1, r2flags) = x86_bsr(x)),
+ def!(index2 = selectif(intcc_eq, r2flags, c_minus_one, index1)),
+ def!(a = isub(c_thirty_one, index2)),
+ ],
+ );
+
+ let imm64_64 = Literal::constant(&imm.imm64, 64);
+ expand.legalize(
+ def!(a = ctz.I64(x)),
+ vec![
+ def!(c_sixty_four = iconst(imm64_64)),
+ def!((index1, r2flags) = x86_bsf(x)),
+ def!(a = selectif(intcc_eq, r2flags, c_sixty_four, index1)),
+ ],
+ );
+
+ let imm64_32 = Literal::constant(&imm.imm64, 32);
+ expand.legalize(
+ def!(a = ctz.I32(x)),
+ vec![
+ def!(c_thirty_two = iconst(imm64_32)),
+ def!((index1, r2flags) = x86_bsf(x)),
+ def!(a = selectif(intcc_eq, r2flags, c_thirty_two, index1)),
+ ],
+ );
+
+ // Population count for baseline x86_64
+ let x = var("x");
+ let r = var("r");
+
+ let qv3 = var("qv3");
+ let qv4 = var("qv4");
+ let qv5 = var("qv5");
+ let qv6 = var("qv6");
+ let qv7 = var("qv7");
+ let qv8 = var("qv8");
+ let qv9 = var("qv9");
+ let qv10 = var("qv10");
+ let qv11 = var("qv11");
+ let qv12 = var("qv12");
+ let qv13 = var("qv13");
+ let qv14 = var("qv14");
+ let qv15 = var("qv15");
+ let qc77 = var("qc77");
+ #[allow(non_snake_case)]
+ let qc0F = var("qc0F");
+ let qc01 = var("qc01");
+
+ let imm64_1 = Literal::constant(&imm.imm64, 1);
+ let imm64_4 = Literal::constant(&imm.imm64, 4);
+ expand.legalize(
+ def!(r = popcnt.I64(x)),
+ vec![
+ def!(qv3 = ushr_imm(x, imm64_1)),
+ def!(qc77 = iconst(Literal::constant(&imm.imm64, 0x7777_7777_7777_7777))),
+ def!(qv4 = band(qv3, qc77)),
+ def!(qv5 = isub(x, qv4)),
+ def!(qv6 = ushr_imm(qv4, imm64_1)),
+ def!(qv7 = band(qv6, qc77)),
+ def!(qv8 = isub(qv5, qv7)),
+ def!(qv9 = ushr_imm(qv7, imm64_1)),
+ def!(qv10 = band(qv9, qc77)),
+ def!(qv11 = isub(qv8, qv10)),
+ def!(qv12 = ushr_imm(qv11, imm64_4)),
+ def!(qv13 = iadd(qv11, qv12)),
+ def!(qc0F = iconst(Literal::constant(&imm.imm64, 0x0F0F_0F0F_0F0F_0F0F))),
+ def!(qv14 = band(qv13, qc0F)),
+ def!(qc01 = iconst(Literal::constant(&imm.imm64, 0x0101_0101_0101_0101))),
+ def!(qv15 = imul(qv14, qc01)),
+ def!(r = ushr_imm(qv15, Literal::constant(&imm.imm64, 56))),
+ ],
+ );
+
+ let lv3 = var("lv3");
+ let lv4 = var("lv4");
+ let lv5 = var("lv5");
+ let lv6 = var("lv6");
+ let lv7 = var("lv7");
+ let lv8 = var("lv8");
+ let lv9 = var("lv9");
+ let lv10 = var("lv10");
+ let lv11 = var("lv11");
+ let lv12 = var("lv12");
+ let lv13 = var("lv13");
+ let lv14 = var("lv14");
+ let lv15 = var("lv15");
+ let lc77 = var("lc77");
+ #[allow(non_snake_case)]
+ let lc0F = var("lc0F");
+ let lc01 = var("lc01");
+
+ expand.legalize(
+ def!(r = popcnt.I32(x)),
+ vec![
+ def!(lv3 = ushr_imm(x, imm64_1)),
+ def!(lc77 = iconst(Literal::constant(&imm.imm64, 0x7777_7777))),
+ def!(lv4 = band(lv3, lc77)),
+ def!(lv5 = isub(x, lv4)),
+ def!(lv6 = ushr_imm(lv4, imm64_1)),
+ def!(lv7 = band(lv6, lc77)),
+ def!(lv8 = isub(lv5, lv7)),
+ def!(lv9 = ushr_imm(lv7, imm64_1)),
+ def!(lv10 = band(lv9, lc77)),
+ def!(lv11 = isub(lv8, lv10)),
+ def!(lv12 = ushr_imm(lv11, imm64_4)),
+ def!(lv13 = iadd(lv11, lv12)),
+ def!(lc0F = iconst(Literal::constant(&imm.imm64, 0x0F0F_0F0F))),
+ def!(lv14 = band(lv13, lc0F)),
+ def!(lc01 = iconst(Literal::constant(&imm.imm64, 0x0101_0101))),
+ def!(lv15 = imul(lv14, lc01)),
+ def!(r = ushr_imm(lv15, Literal::constant(&imm.imm64, 24))),
+ ],
+ );
+
+ expand.custom_legalize(ineg, "convert_ineg");
+ expand.custom_legalize(tls_value, "expand_tls_value");
+ widen.custom_legalize(ineg, "convert_ineg");
+
+ // To reduce compilation times, separate out large blocks of legalizations by theme.
+ define_simd(shared, x86_instructions, &mut narrow, &mut narrow_avx);
+
+ expand.build_and_add_to(&mut shared.transform_groups);
+ let narrow_id = narrow.build_and_add_to(&mut shared.transform_groups);
+ narrow_avx
+ .chain_with(narrow_id)
+ .build_and_add_to(&mut shared.transform_groups);
+ widen.build_and_add_to(&mut shared.transform_groups);
+}
+
+fn define_simd(
+ shared: &mut SharedDefinitions,
+ x86_instructions: &InstructionGroup,
+ narrow: &mut TransformGroupBuilder,
+ narrow_avx: &mut TransformGroupBuilder,
+) {
+ let insts = &shared.instructions;
+ let band = insts.by_name("band");
+ let band_not = insts.by_name("band_not");
+ let bitcast = insts.by_name("bitcast");
+ let bitselect = insts.by_name("bitselect");
+ let bor = insts.by_name("bor");
+ let bnot = insts.by_name("bnot");
+ let bxor = insts.by_name("bxor");
+ let extractlane = insts.by_name("extractlane");
+ let fabs = insts.by_name("fabs");
+ let fcmp = insts.by_name("fcmp");
+ let fcvt_from_uint = insts.by_name("fcvt_from_uint");
+ let fcvt_to_sint_sat = insts.by_name("fcvt_to_sint_sat");
+ let fcvt_to_uint_sat = insts.by_name("fcvt_to_uint_sat");
+ let fmax = insts.by_name("fmax");
+ let fmin = insts.by_name("fmin");
+ let fneg = insts.by_name("fneg");
+ let iadd_imm = insts.by_name("iadd_imm");
+ let icmp = insts.by_name("icmp");
+ let imax = insts.by_name("imax");
+ let imin = insts.by_name("imin");
+ let imul = insts.by_name("imul");
+ let ineg = insts.by_name("ineg");
+ let insertlane = insts.by_name("insertlane");
+ let ishl = insts.by_name("ishl");
+ let ishl_imm = insts.by_name("ishl_imm");
+ let load_splat = insts.by_name("load_splat");
+ let raw_bitcast = insts.by_name("raw_bitcast");
+ let scalar_to_vector = insts.by_name("scalar_to_vector");
+ let splat = insts.by_name("splat");
+ let shuffle = insts.by_name("shuffle");
+ let sshr = insts.by_name("sshr");
+ let swizzle = insts.by_name("swizzle");
+ let trueif = insts.by_name("trueif");
+ let uadd_sat = insts.by_name("uadd_sat");
+ let umax = insts.by_name("umax");
+ let umin = insts.by_name("umin");
+ let snarrow = insts.by_name("snarrow");
+ let swiden_high = insts.by_name("swiden_high");
+ let swiden_low = insts.by_name("swiden_low");
+ let ushr_imm = insts.by_name("ushr_imm");
+ let ushr = insts.by_name("ushr");
+ let uwiden_high = insts.by_name("uwiden_high");
+ let uwiden_low = insts.by_name("uwiden_low");
+ let vconst = insts.by_name("vconst");
+ let vall_true = insts.by_name("vall_true");
+ let vany_true = insts.by_name("vany_true");
+ let vselect = insts.by_name("vselect");
+
+ let x86_palignr = x86_instructions.by_name("x86_palignr");
+ let x86_pmaxs = x86_instructions.by_name("x86_pmaxs");
+ let x86_pmaxu = x86_instructions.by_name("x86_pmaxu");
+ let x86_pmins = x86_instructions.by_name("x86_pmins");
+ let x86_pminu = x86_instructions.by_name("x86_pminu");
+ let x86_pshufb = x86_instructions.by_name("x86_pshufb");
+ let x86_pshufd = x86_instructions.by_name("x86_pshufd");
+ let x86_psra = x86_instructions.by_name("x86_psra");
+ let x86_ptest = x86_instructions.by_name("x86_ptest");
+ let x86_punpckh = x86_instructions.by_name("x86_punpckh");
+ let x86_punpckl = x86_instructions.by_name("x86_punpckl");
+
+ let imm = &shared.imm;
+
+ // Set up variables and immediates.
+ let uimm8_zero = Literal::constant(&imm.uimm8, 0x00);
+ let uimm8_one = Literal::constant(&imm.uimm8, 0x01);
+ let uimm8_eight = Literal::constant(&imm.uimm8, 8);
+ let u128_zeroes = constant(vec![0x00; 16]);
+ let u128_ones = constant(vec![0xff; 16]);
+ let u128_seventies = constant(vec![0x70; 16]);
+ let a = var("a");
+ let b = var("b");
+ let c = var("c");
+ let d = var("d");
+ let e = var("e");
+ let f = var("f");
+ let g = var("g");
+ let h = var("h");
+ let x = var("x");
+ let y = var("y");
+ let z = var("z");
+
+ // Limit the SIMD vector size: eventually multiple vector sizes may be supported
+ // but for now only SSE-sized vectors are available.
+ let sse_vector_size: u64 = 128;
+ let allowed_simd_type = |t: &LaneType| t.lane_bits() >= 8 && t.lane_bits() < 128;
+
+ // SIMD splat: 8-bits
+ for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 8) {
+ let splat_any8x16 = splat.bind(vector(ty, sse_vector_size));
+ narrow.legalize(
+ def!(y = splat_any8x16(x)),
+ vec![
+ // Move into the lowest 8 bits of an XMM register.
+ def!(a = scalar_to_vector(x)),
+ // Zero out a different XMM register; the shuffle mask for moving the lowest byte
+ // to all other byte lanes is 0x0.
+ def!(b = vconst(u128_zeroes)),
+ // PSHUFB takes two XMM operands, one of which is a shuffle mask (i.e. b).
+ def!(y = x86_pshufb(a, b)),
+ ],
+ );
+ }
+
+ // SIMD splat: 16-bits
+ for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 16) {
+ let splat_x16x8 = splat.bind(vector(ty, sse_vector_size));
+ let raw_bitcast_any16x8_to_i32x4 = raw_bitcast
+ .bind(vector(I32, sse_vector_size))
+ .bind(vector(ty, sse_vector_size));
+ let raw_bitcast_i32x4_to_any16x8 = raw_bitcast
+ .bind(vector(ty, sse_vector_size))
+ .bind(vector(I32, sse_vector_size));
+ narrow.legalize(
+ def!(y = splat_x16x8(x)),
+ vec![
+ // Move into the lowest 16 bits of an XMM register.
+ def!(a = scalar_to_vector(x)),
+ // Insert the value again but in the next lowest 16 bits.
+ def!(b = insertlane(a, x, uimm8_one)),
+ // No instruction emitted; pretend this is an I32x4 so we can use PSHUFD.
+ def!(c = raw_bitcast_any16x8_to_i32x4(b)),
+ // Broadcast the bytes in the XMM register with PSHUFD.
+ def!(d = x86_pshufd(c, uimm8_zero)),
+ // No instruction emitted; pretend this is an X16x8 again.
+ def!(y = raw_bitcast_i32x4_to_any16x8(d)),
+ ],
+ );
+ }
+
+ // SIMD splat: 32-bits
+ for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 32) {
+ let splat_any32x4 = splat.bind(vector(ty, sse_vector_size));
+ narrow.legalize(
+ def!(y = splat_any32x4(x)),
+ vec![
+ // Translate to an x86 MOV to get the value in an XMM register.
+ def!(a = scalar_to_vector(x)),
+ // Broadcast the bytes in the XMM register with PSHUFD.
+ def!(y = x86_pshufd(a, uimm8_zero)),
+ ],
+ );
+ }
+
+ // SIMD splat: 64-bits
+ for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 64) {
+ let splat_any64x2 = splat.bind(vector(ty, sse_vector_size));
+ narrow.legalize(
+ def!(y = splat_any64x2(x)),
+ vec![
+ // Move into the lowest 64 bits of an XMM register.
+ def!(a = scalar_to_vector(x)),
+ // Move into the highest 64 bits of the same XMM register.
+ def!(y = insertlane(a, x, uimm8_one)),
+ ],
+ );
+ }
+
+ // SIMD swizzle; the following inefficient implementation is due to the Wasm SIMD spec requiring
+ // mask indexes greater than 15 to have the same semantics as a 0 index. For the spec discussion,
+ // see https://github.com/WebAssembly/simd/issues/93.
+ {
+ let swizzle = swizzle.bind(vector(I8, sse_vector_size));
+ narrow.legalize(
+ def!(a = swizzle(x, y)),
+ vec![
+ def!(b = vconst(u128_seventies)),
+ def!(c = uadd_sat(y, b)),
+ def!(a = x86_pshufb(x, c)),
+ ],
+ );
+ }
+
+ // SIMD bnot
+ for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
+ let bnot = bnot.bind(vector(ty, sse_vector_size));
+ narrow.legalize(
+ def!(y = bnot(x)),
+ vec![def!(a = vconst(u128_ones)), def!(y = bxor(a, x))],
+ );
+ }
+
+ // SIMD shift right (arithmetic, i16x8 and i32x4)
+ for ty in &[I16, I32] {
+ let sshr = sshr.bind(vector(*ty, sse_vector_size));
+ let bitcast_i64x2 = bitcast.bind(vector(I64, sse_vector_size));
+ narrow.legalize(
+ def!(a = sshr(x, y)),
+ vec![def!(b = bitcast_i64x2(y)), def!(a = x86_psra(x, b))],
+ );
+ }
+ // SIMD shift right (arithmetic, i8x16)
+ {
+ let sshr = sshr.bind(vector(I8, sse_vector_size));
+ let bitcast_i64x2 = bitcast.bind(vector(I64, sse_vector_size));
+ let raw_bitcast_i16x8 = raw_bitcast.bind(vector(I16, sse_vector_size));
+ let raw_bitcast_i16x8_again = raw_bitcast.bind(vector(I16, sse_vector_size));
+ narrow.legalize(
+ def!(z = sshr(x, y)),
+ vec![
+ // Since we will use the high byte of each 16x8 lane, shift an extra 8 bits.
+ def!(a = iadd_imm(y, uimm8_eight)),
+ def!(b = bitcast_i64x2(a)),
+ // Take the low 8 bytes of x, duplicate them in 16x8 lanes, then shift right.
+ def!(c = x86_punpckl(x, x)),
+ def!(d = raw_bitcast_i16x8(c)),
+ def!(e = x86_psra(d, b)),
+ // Take the high 8 bytes of x, duplicate them in 16x8 lanes, then shift right.
+ def!(f = x86_punpckh(x, x)),
+ def!(g = raw_bitcast_i16x8_again(f)),
+ def!(h = x86_psra(g, b)),
+ // Re-pack the vector.
+ def!(z = snarrow(e, h)),
+ ],
+ );
+ }
+ // SIMD shift right (arithmetic, i64x2)
+ {
+ let sshr_vector = sshr.bind(vector(I64, sse_vector_size));
+ let sshr_scalar_lane0 = sshr.bind(I64);
+ let sshr_scalar_lane1 = sshr.bind(I64);
+ narrow.legalize(
+ def!(z = sshr_vector(x, y)),
+ vec![
+ // Use scalar operations to shift the first lane.
+ def!(a = extractlane(x, uimm8_zero)),
+ def!(b = sshr_scalar_lane0(a, y)),
+ def!(c = insertlane(x, b, uimm8_zero)),
+ // Do the same for the second lane.
+ def!(d = extractlane(x, uimm8_one)),
+ def!(e = sshr_scalar_lane1(d, y)),
+ def!(z = insertlane(c, e, uimm8_one)),
+ ],
+ );
+ }
+
+ // SIMD select
+ for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
+ let bitselect = bitselect.bind(vector(ty, sse_vector_size)); // must bind both x/y and c
+ narrow.legalize(
+ def!(d = bitselect(c, x, y)),
+ vec![
+ def!(a = band(x, c)),
+ def!(b = band_not(y, c)),
+ def!(d = bor(a, b)),
+ ],
+ );
+ }
+
+ // SIMD vselect; replace with bitselect if BLEND* instructions are not available.
+ // This works, because each lane of boolean vector is filled with zeroes or ones.
+ for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
+ let vselect = vselect.bind(vector(ty, sse_vector_size));
+ let raw_bitcast = raw_bitcast.bind(vector(ty, sse_vector_size));
+ narrow.legalize(
+ def!(d = vselect(c, x, y)),
+ vec![def!(a = raw_bitcast(c)), def!(d = bitselect(a, x, y))],
+ );
+ }
+
+ // SIMD vany_true
+ let ne = Literal::enumerator_for(&imm.intcc, "ne");
+ for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
+ let vany_true = vany_true.bind(vector(ty, sse_vector_size));
+ narrow.legalize(
+ def!(y = vany_true(x)),
+ vec![def!(a = x86_ptest(x, x)), def!(y = trueif(ne, a))],
+ );
+ }
+
+ // SIMD vall_true
+ let eq = Literal::enumerator_for(&imm.intcc, "eq");
+ for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
+ let vall_true = vall_true.bind(vector(ty, sse_vector_size));
+ if ty.is_int() {
+ // In the common case (Wasm's integer-only all_true), we do not require a
+ // bitcast.
+ narrow.legalize(
+ def!(y = vall_true(x)),
+ vec![
+ def!(a = vconst(u128_zeroes)),
+ def!(c = icmp(eq, x, a)),
+ def!(d = x86_ptest(c, c)),
+ def!(y = trueif(eq, d)),
+ ],
+ );
+ } else {
+ // However, to support other types we must bitcast them to an integer vector to
+ // use icmp.
+ let lane_type_as_int = LaneType::int_from_bits(ty.lane_bits() as u16);
+ let raw_bitcast_to_int = raw_bitcast.bind(vector(lane_type_as_int, sse_vector_size));
+ narrow.legalize(
+ def!(y = vall_true(x)),
+ vec![
+ def!(a = vconst(u128_zeroes)),
+ def!(b = raw_bitcast_to_int(x)),
+ def!(c = icmp(eq, b, a)),
+ def!(d = x86_ptest(c, c)),
+ def!(y = trueif(eq, d)),
+ ],
+ );
+ }
+ }
+
+ // SIMD icmp ne
+ let ne = Literal::enumerator_for(&imm.intcc, "ne");
+ for ty in ValueType::all_lane_types().filter(|ty| allowed_simd_type(ty) && ty.is_int()) {
+ let icmp_ = icmp.bind(vector(ty, sse_vector_size));
+ narrow.legalize(
+ def!(c = icmp_(ne, a, b)),
+ vec![def!(x = icmp(eq, a, b)), def!(c = bnot(x))],
+ );
+ }
+
+ // SIMD icmp greater-/less-than
+ let sgt = Literal::enumerator_for(&imm.intcc, "sgt");
+ let ugt = Literal::enumerator_for(&imm.intcc, "ugt");
+ let sge = Literal::enumerator_for(&imm.intcc, "sge");
+ let uge = Literal::enumerator_for(&imm.intcc, "uge");
+ let slt = Literal::enumerator_for(&imm.intcc, "slt");
+ let ult = Literal::enumerator_for(&imm.intcc, "ult");
+ let sle = Literal::enumerator_for(&imm.intcc, "sle");
+ let ule = Literal::enumerator_for(&imm.intcc, "ule");
+ for ty in &[I8, I16, I32] {
+ // greater-than
+ let icmp_ = icmp.bind(vector(*ty, sse_vector_size));
+ narrow.legalize(
+ def!(c = icmp_(ugt, a, b)),
+ vec![
+ def!(x = x86_pmaxu(a, b)),
+ def!(y = icmp(eq, x, b)),
+ def!(c = bnot(y)),
+ ],
+ );
+ let icmp_ = icmp.bind(vector(*ty, sse_vector_size));
+ narrow.legalize(
+ def!(c = icmp_(sge, a, b)),
+ vec![def!(x = x86_pmins(a, b)), def!(c = icmp(eq, x, b))],
+ );
+ let icmp_ = icmp.bind(vector(*ty, sse_vector_size));
+ narrow.legalize(
+ def!(c = icmp_(uge, a, b)),
+ vec![def!(x = x86_pminu(a, b)), def!(c = icmp(eq, x, b))],
+ );
+
+ // less-than
+ let icmp_ = icmp.bind(vector(*ty, sse_vector_size));
+ narrow.legalize(def!(c = icmp_(slt, a, b)), vec![def!(c = icmp(sgt, b, a))]);
+ let icmp_ = icmp.bind(vector(*ty, sse_vector_size));
+ narrow.legalize(def!(c = icmp_(ult, a, b)), vec![def!(c = icmp(ugt, b, a))]);
+ let icmp_ = icmp.bind(vector(*ty, sse_vector_size));
+ narrow.legalize(def!(c = icmp_(sle, a, b)), vec![def!(c = icmp(sge, b, a))]);
+ let icmp_ = icmp.bind(vector(*ty, sse_vector_size));
+ narrow.legalize(def!(c = icmp_(ule, a, b)), vec![def!(c = icmp(uge, b, a))]);
+ }
+
+ // SIMD integer min/max
+ for ty in &[I8, I16, I32] {
+ let imin = imin.bind(vector(*ty, sse_vector_size));
+ narrow.legalize(def!(c = imin(a, b)), vec![def!(c = x86_pmins(a, b))]);
+ let umin = umin.bind(vector(*ty, sse_vector_size));
+ narrow.legalize(def!(c = umin(a, b)), vec![def!(c = x86_pminu(a, b))]);
+ let imax = imax.bind(vector(*ty, sse_vector_size));
+ narrow.legalize(def!(c = imax(a, b)), vec![def!(c = x86_pmaxs(a, b))]);
+ let umax = umax.bind(vector(*ty, sse_vector_size));
+ narrow.legalize(def!(c = umax(a, b)), vec![def!(c = x86_pmaxu(a, b))]);
+ }
+
+ // SIMD fcmp greater-/less-than
+ let gt = Literal::enumerator_for(&imm.floatcc, "gt");
+ let lt = Literal::enumerator_for(&imm.floatcc, "lt");
+ let ge = Literal::enumerator_for(&imm.floatcc, "ge");
+ let le = Literal::enumerator_for(&imm.floatcc, "le");
+ let ugt = Literal::enumerator_for(&imm.floatcc, "ugt");
+ let ult = Literal::enumerator_for(&imm.floatcc, "ult");
+ let uge = Literal::enumerator_for(&imm.floatcc, "uge");
+ let ule = Literal::enumerator_for(&imm.floatcc, "ule");
+ for ty in &[F32, F64] {
+ let fcmp_ = fcmp.bind(vector(*ty, sse_vector_size));
+ narrow.legalize(def!(c = fcmp_(gt, a, b)), vec![def!(c = fcmp(lt, b, a))]);
+ let fcmp_ = fcmp.bind(vector(*ty, sse_vector_size));
+ narrow.legalize(def!(c = fcmp_(ge, a, b)), vec![def!(c = fcmp(le, b, a))]);
+ let fcmp_ = fcmp.bind(vector(*ty, sse_vector_size));
+ narrow.legalize(def!(c = fcmp_(ult, a, b)), vec![def!(c = fcmp(ugt, b, a))]);
+ let fcmp_ = fcmp.bind(vector(*ty, sse_vector_size));
+ narrow.legalize(def!(c = fcmp_(ule, a, b)), vec![def!(c = fcmp(uge, b, a))]);
+ }
+
+ for ty in &[F32, F64] {
+ let fneg = fneg.bind(vector(*ty, sse_vector_size));
+ let lane_type_as_int = LaneType::int_from_bits(LaneType::from(*ty).lane_bits() as u16);
+ let uimm8_shift = Literal::constant(&imm.uimm8, lane_type_as_int.lane_bits() as i64 - 1);
+ let vconst = vconst.bind(vector(lane_type_as_int, sse_vector_size));
+ let bitcast_to_float = raw_bitcast.bind(vector(*ty, sse_vector_size));
+ narrow.legalize(
+ def!(b = fneg(a)),
+ vec![
+ def!(c = vconst(u128_ones)),
+ def!(d = ishl_imm(c, uimm8_shift)), // Create a mask of all 0s except the MSB.
+ def!(e = bitcast_to_float(d)), // Cast mask to the floating-point type.
+ def!(b = bxor(a, e)), // Flip the MSB.
+ ],
+ );
+ }
+
+ // SIMD fabs
+ for ty in &[F32, F64] {
+ let fabs = fabs.bind(vector(*ty, sse_vector_size));
+ let lane_type_as_int = LaneType::int_from_bits(LaneType::from(*ty).lane_bits() as u16);
+ let vconst = vconst.bind(vector(lane_type_as_int, sse_vector_size));
+ let bitcast_to_float = raw_bitcast.bind(vector(*ty, sse_vector_size));
+ narrow.legalize(
+ def!(b = fabs(a)),
+ vec![
+ def!(c = vconst(u128_ones)),
+ def!(d = ushr_imm(c, uimm8_one)), // Create a mask of all 1s except the MSB.
+ def!(e = bitcast_to_float(d)), // Cast mask to the floating-point type.
+ def!(b = band(a, e)), // Unset the MSB.
+ ],
+ );
+ }
+
+ // SIMD widen
+ for ty in &[I8, I16] {
+ let swiden_high = swiden_high.bind(vector(*ty, sse_vector_size));
+ narrow.legalize(
+ def!(b = swiden_high(a)),
+ vec![
+ def!(c = x86_palignr(a, a, uimm8_eight)),
+ def!(b = swiden_low(c)),
+ ],
+ );
+ let uwiden_high = uwiden_high.bind(vector(*ty, sse_vector_size));
+ narrow.legalize(
+ def!(b = uwiden_high(a)),
+ vec![
+ def!(c = x86_palignr(a, a, uimm8_eight)),
+ def!(b = uwiden_low(c)),
+ ],
+ );
+ }
+
+ narrow.custom_legalize(shuffle, "convert_shuffle");
+ narrow.custom_legalize(extractlane, "convert_extractlane");
+ narrow.custom_legalize(insertlane, "convert_insertlane");
+ narrow.custom_legalize(ineg, "convert_ineg");
+ narrow.custom_legalize(ushr, "convert_ushr");
+ narrow.custom_legalize(ishl, "convert_ishl");
+ narrow.custom_legalize(fcvt_to_sint_sat, "expand_fcvt_to_sint_sat_vector");
+ narrow.custom_legalize(fmin, "expand_minmax_vector");
+ narrow.custom_legalize(fmax, "expand_minmax_vector");
+ narrow.custom_legalize(load_splat, "expand_load_splat");
+
+ narrow_avx.custom_legalize(imul, "convert_i64x2_imul");
+ narrow_avx.custom_legalize(fcvt_from_uint, "expand_fcvt_from_uint_vector");
+ narrow_avx.custom_legalize(fcvt_to_uint_sat, "expand_fcvt_to_uint_sat_vector");
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/isa/x86/mod.rs b/third_party/rust/cranelift-codegen-meta/src/isa/x86/mod.rs
new file mode 100644
index 0000000000..a272e83900
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/isa/x86/mod.rs
@@ -0,0 +1,88 @@
+use crate::cdsl::cpu_modes::CpuMode;
+use crate::cdsl::isa::TargetIsa;
+use crate::cdsl::types::{ReferenceType, VectorType};
+
+use crate::shared::types::Bool::B1;
+use crate::shared::types::Float::{F32, F64};
+use crate::shared::types::Int::{I16, I32, I64, I8};
+use crate::shared::types::Reference::{R32, R64};
+use crate::shared::Definitions as SharedDefinitions;
+
+mod encodings;
+mod instructions;
+mod legalize;
+mod opcodes;
+mod recipes;
+mod registers;
+pub(crate) mod settings;
+
+pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa {
+ let settings = settings::define(&shared_defs.settings);
+ let regs = registers::define();
+
+ let inst_group = instructions::define(
+ &mut shared_defs.all_instructions,
+ &shared_defs.formats,
+ &shared_defs.imm,
+ &shared_defs.entities,
+ );
+ legalize::define(shared_defs, &inst_group);
+
+ // CPU modes for 32-bit and 64-bit operations.
+ let mut x86_64 = CpuMode::new("I64");
+ let mut x86_32 = CpuMode::new("I32");
+
+ let expand_flags = shared_defs.transform_groups.by_name("expand_flags");
+ let x86_widen = shared_defs.transform_groups.by_name("x86_widen");
+ let x86_narrow = shared_defs.transform_groups.by_name("x86_narrow");
+ let x86_narrow_avx = shared_defs.transform_groups.by_name("x86_narrow_avx");
+ let x86_expand = shared_defs.transform_groups.by_name("x86_expand");
+
+ x86_32.legalize_monomorphic(expand_flags);
+ x86_32.legalize_default(x86_narrow);
+ x86_32.legalize_type(B1, expand_flags);
+ x86_32.legalize_type(I8, x86_widen);
+ x86_32.legalize_type(I16, x86_widen);
+ x86_32.legalize_type(I32, x86_expand);
+ x86_32.legalize_value_type(ReferenceType(R32), x86_expand);
+ x86_32.legalize_type(F32, x86_expand);
+ x86_32.legalize_type(F64, x86_expand);
+ x86_32.legalize_value_type(VectorType::new(I32.into(), 4), x86_narrow_avx);
+ x86_32.legalize_value_type(VectorType::new(I64.into(), 2), x86_narrow_avx);
+ x86_32.legalize_value_type(VectorType::new(F32.into(), 4), x86_narrow_avx);
+
+ x86_64.legalize_monomorphic(expand_flags);
+ x86_64.legalize_default(x86_narrow);
+ x86_64.legalize_type(B1, expand_flags);
+ x86_64.legalize_type(I8, x86_widen);
+ x86_64.legalize_type(I16, x86_widen);
+ x86_64.legalize_type(I32, x86_expand);
+ x86_64.legalize_type(I64, x86_expand);
+ x86_64.legalize_value_type(ReferenceType(R64), x86_expand);
+ x86_64.legalize_type(F32, x86_expand);
+ x86_64.legalize_type(F64, x86_expand);
+ x86_64.legalize_value_type(VectorType::new(I32.into(), 4), x86_narrow_avx);
+ x86_64.legalize_value_type(VectorType::new(I64.into(), 2), x86_narrow_avx);
+ x86_64.legalize_value_type(VectorType::new(F32.into(), 4), x86_narrow_avx);
+
+ let recipes = recipes::define(shared_defs, &settings, &regs);
+
+ let encodings = encodings::define(shared_defs, &settings, &inst_group, &recipes);
+ x86_32.set_encodings(encodings.enc32);
+ x86_64.set_encodings(encodings.enc64);
+ let encodings_predicates = encodings.inst_pred_reg.extract();
+
+ let recipes = encodings.recipes;
+
+ let cpu_modes = vec![x86_64, x86_32];
+
+ TargetIsa::new(
+ "x86",
+ inst_group,
+ settings,
+ regs,
+ recipes,
+ cpu_modes,
+ encodings_predicates,
+ )
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/isa/x86/opcodes.rs b/third_party/rust/cranelift-codegen-meta/src/isa/x86/opcodes.rs
new file mode 100644
index 0000000000..09c07c458f
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/isa/x86/opcodes.rs
@@ -0,0 +1,721 @@
+//! Static, named definitions of instruction opcodes.
+
+/// Empty opcode for use as a default.
+pub static EMPTY: [u8; 0] = [];
+
+/// Add with carry flag r{16,32,64} to r/m of the same size.
+pub static ADC: [u8; 1] = [0x11];
+
+/// Add r{16,32,64} to r/m of the same size.
+pub static ADD: [u8; 1] = [0x01];
+
+/// Add imm{16,32} to r/m{16,32,64}, possibly sign-extended.
+pub static ADD_IMM: [u8; 1] = [0x81];
+
+/// Add sign-extended imm8 to r/m{16,32,64}.
+pub static ADD_IMM8_SIGN_EXTEND: [u8; 1] = [0x83];
+
+/// Add packed double-precision floating-point values from xmm2/mem to xmm1 and store result in
+/// xmm1 (SSE2).
+pub static ADDPD: [u8; 3] = [0x66, 0x0f, 0x58];
+
+/// Add packed single-precision floating-point values from xmm2/mem to xmm1 and store result in
+/// xmm1 (SSE).
+pub static ADDPS: [u8; 2] = [0x0f, 0x58];
+
+/// Add the low double-precision floating-point value from xmm2/mem to xmm1
+/// and store the result in xmm1.
+pub static ADDSD: [u8; 3] = [0xf2, 0x0f, 0x58];
+
+/// Add the low single-precision floating-point value from xmm2/mem to xmm1
+/// and store the result in xmm1.
+pub static ADDSS: [u8; 3] = [0xf3, 0x0f, 0x58];
+
+/// r/m{16,32,64} AND register of the same size (Intel docs have a typo).
+pub static AND: [u8; 1] = [0x21];
+
+/// imm{16,32} AND r/m{16,32,64}, possibly sign-extended.
+pub static AND_IMM: [u8; 1] = [0x81];
+
+/// r/m{16,32,64} AND sign-extended imm8.
+pub static AND_IMM8_SIGN_EXTEND: [u8; 1] = [0x83];
+
+/// Return the bitwise logical AND NOT of packed single-precision floating-point
+/// values in xmm1 and xmm2/mem.
+pub static ANDNPS: [u8; 2] = [0x0f, 0x55];
+
+/// Return the bitwise logical AND of packed single-precision floating-point values
+/// in xmm1 and xmm2/mem.
+pub static ANDPS: [u8; 2] = [0x0f, 0x54];
+
+/// Bit scan forward (stores index of first encountered 1 from the front).
+pub static BIT_SCAN_FORWARD: [u8; 2] = [0x0f, 0xbc];
+
+/// Bit scan reverse (stores index of first encountered 1 from the back).
+pub static BIT_SCAN_REVERSE: [u8; 2] = [0x0f, 0xbd];
+
+/// Select packed single-precision floating-point values from xmm1 and xmm2/m128
+/// from mask specified in XMM0 and store the values into xmm1 (SSE4.1).
+pub static BLENDVPS: [u8; 4] = [0x66, 0x0f, 0x38, 0x14];
+
+/// Select packed double-precision floating-point values from xmm1 and xmm2/m128
+/// from mask specified in XMM0 and store the values into xmm1 (SSE4.1).
+pub static BLENDVPD: [u8; 4] = [0x66, 0x0f, 0x38, 0x15];
+
+/// Call near, relative, displacement relative to next instruction (sign-extended).
+pub static CALL_RELATIVE: [u8; 1] = [0xe8];
+
+/// Move r/m{16,32,64} if overflow (OF=1).
+pub static CMOV_OVERFLOW: [u8; 2] = [0x0f, 0x40];
+
+/// Compare imm{16,32} with r/m{16,32,64} (sign-extended if 64).
+pub static CMP_IMM: [u8; 1] = [0x81];
+
+/// Compare imm8 with r/m{16,32,64}.
+pub static CMP_IMM8: [u8; 1] = [0x83];
+
+/// Compare r{16,32,64} with r/m of the same size.
+pub static CMP_REG: [u8; 1] = [0x39];
+
+/// Compare packed double-precision floating-point value in xmm2/m32 and xmm1 using bits 2:0 of
+/// imm8 as comparison predicate (SSE2).
+pub static CMPPD: [u8; 3] = [0x66, 0x0f, 0xc2];
+
+/// Compare packed single-precision floating-point value in xmm2/m32 and xmm1 using bits 2:0 of
+/// imm8 as comparison predicate (SSE).
+pub static CMPPS: [u8; 2] = [0x0f, 0xc2];
+
+/// Convert four packed signed doubleword integers from xmm2/mem to four packed single-precision
+/// floating-point values in xmm1 (SSE2).
+pub static CVTDQ2PS: [u8; 2] = [0x0f, 0x5b];
+
+/// Convert scalar double-precision floating-point value to scalar single-precision
+/// floating-point value.
+pub static CVTSD2SS: [u8; 3] = [0xf2, 0x0f, 0x5a];
+
+/// Convert doubleword integer to scalar double-precision floating-point value.
+pub static CVTSI2SD: [u8; 3] = [0xf2, 0x0f, 0x2a];
+
+/// Convert doubleword integer to scalar single-precision floating-point value.
+pub static CVTSI2SS: [u8; 3] = [0xf3, 0x0f, 0x2a];
+
+/// Convert scalar single-precision floating-point value to scalar double-precision
+/// float-point value.
+pub static CVTSS2SD: [u8; 3] = [0xf3, 0x0f, 0x5a];
+
+/// Convert four packed single-precision floating-point values from xmm2/mem to four packed signed
+/// doubleword values in xmm1 using truncation (SSE2).
+pub static CVTTPS2DQ: [u8; 3] = [0xf3, 0x0f, 0x5b];
+
+/// Convert with truncation scalar double-precision floating-point value to signed
+/// integer.
+pub static CVTTSD2SI: [u8; 3] = [0xf2, 0x0f, 0x2c];
+
+/// Convert with truncation scalar single-precision floating-point value to integer.
+pub static CVTTSS2SI: [u8; 3] = [0xf3, 0x0f, 0x2c];
+
+/// Unsigned divide for {16,32,64}-bit.
+pub static DIV: [u8; 1] = [0xf7];
+
+/// Divide packed double-precision floating-point values in xmm1 by packed double-precision
+/// floating-point values in xmm2/mem (SSE2).
+pub static DIVPD: [u8; 3] = [0x66, 0x0f, 0x5e];
+
+/// Divide packed single-precision floating-point values in xmm1 by packed single-precision
+/// floating-point values in xmm2/mem (SSE).
+pub static DIVPS: [u8; 2] = [0x0f, 0x5e];
+
+/// Divide low double-precision floating-point value in xmm1 by low double-precision
+/// floating-point value in xmm2/m64.
+pub static DIVSD: [u8; 3] = [0xf2, 0x0f, 0x5e];
+
+/// Divide low single-precision floating-point value in xmm1 by low single-precision
+/// floating-point value in xmm2/m32.
+pub static DIVSS: [u8; 3] = [0xf3, 0x0f, 0x5e];
+
+/// Signed divide for {16,32,64}-bit.
+pub static IDIV: [u8; 1] = [0xf7];
+
+/// Signed multiply for {16,32,64}-bit, generic registers.
+pub static IMUL: [u8; 2] = [0x0f, 0xaf];
+
+/// Signed multiply for {16,32,64}-bit, storing into RDX:RAX.
+pub static IMUL_RDX_RAX: [u8; 1] = [0xf7];
+
+/// Insert scalar single-precision floating-point value.
+pub static INSERTPS: [u8; 4] = [0x66, 0x0f, 0x3a, 0x21];
+
+/// Either:
+/// 1. Jump near, absolute indirect, RIP = 64-bit offset from register or memory.
+/// 2. Jump far, absolute indirect, address given in m16:64.
+pub static JUMP_ABSOLUTE: [u8; 1] = [0xff];
+
+/// Jump near, relative, RIP = RIP + 32-bit displacement sign extended to 64 bits.
+pub static JUMP_NEAR_RELATIVE: [u8; 1] = [0xe9];
+
+/// Jump near (rel32) if overflow (OF=1).
+pub static JUMP_NEAR_IF_OVERFLOW: [u8; 2] = [0x0f, 0x80];
+
+/// Jump short, relative, RIP = RIP + 8-bit displacement sign extended to 64 bits.
+pub static JUMP_SHORT: [u8; 1] = [0xeb];
+
+/// Jump short (rel8) if equal (ZF=1).
+pub static JUMP_SHORT_IF_EQUAL: [u8; 1] = [0x74];
+
+/// Jump short (rel8) if not equal (ZF=0).
+pub static JUMP_SHORT_IF_NOT_EQUAL: [u8; 1] = [0x75];
+
+/// Jump short (rel8) if overflow (OF=1).
+pub static JUMP_SHORT_IF_OVERFLOW: [u8; 1] = [0x70];
+
+/// Store effective address for m in register r{16,32,64}.
+pub static LEA: [u8; 1] = [0x8d];
+
+/// Count the number of leading zero bits.
+pub static LZCNT: [u8; 3] = [0xf3, 0x0f, 0xbd];
+
+/// Return the maximum packed double-precision floating-point values between xmm1 and xmm2/m128
+/// (SSE2).
+pub static MAXPD: [u8; 3] = [0x66, 0x0f, 0x5f];
+
+/// Return the maximum packed single-precision floating-point values between xmm1 and xmm2/m128
+/// (SSE).
+pub static MAXPS: [u8; 2] = [0x0f, 0x5f];
+
+/// Return the maximum scalar double-precision floating-point value between
+/// xmm2/m64 and xmm1.
+pub static MAXSD: [u8; 3] = [0xf2, 0x0f, 0x5f];
+
+/// Return the maximum scalar single-precision floating-point value between
+/// xmm2/m32 and xmm1.
+pub static MAXSS: [u8; 3] = [0xf3, 0x0f, 0x5f];
+
+/// Return the minimum packed double-precision floating-point values between xmm1 and xmm2/m128
+/// (SSE2).
+pub static MINPD: [u8; 3] = [0x66, 0x0f, 0x5d];
+
+/// Return the minimum packed single-precision floating-point values between xmm1 and xmm2/m128
+/// (SSE).
+pub static MINPS: [u8; 2] = [0x0f, 0x5d];
+
+/// Return the minimum scalar double-precision floating-point value between
+/// xmm2/m64 and xmm1.
+pub static MINSD: [u8; 3] = [0xf2, 0x0f, 0x5d];
+
+/// Return the minimum scalar single-precision floating-point value between
+/// xmm2/m32 and xmm1.
+pub static MINSS: [u8; 3] = [0xf3, 0x0f, 0x5d];
+
+/// Move r8 to r/m8.
+pub static MOV_BYTE_STORE: [u8; 1] = [0x88];
+
+/// Move imm{16,32,64} to same-sized register.
+pub static MOV_IMM: [u8; 1] = [0xb8];
+
+/// Move imm{16,32} to r{16,32,64}, sign-extended if 64-bit target.
+pub static MOV_IMM_SIGNEXTEND: [u8; 1] = [0xc7];
+
+/// Move {r/m16, r/m32, r/m64} to same-sized register.
+pub static MOV_LOAD: [u8; 1] = [0x8b];
+
+/// Move r16 to r/m16.
+pub static MOV_STORE_16: [u8; 2] = [0x66, 0x89];
+
+/// Move {r16, r32, r64} to same-sized register or memory.
+pub static MOV_STORE: [u8; 1] = [0x89];
+
+/// Move aligned packed single-precision floating-point values from x/m to xmm (SSE).
+pub static MOVAPS_LOAD: [u8; 2] = [0x0f, 0x28];
+
+/// Move doubleword from r/m32 to xmm (SSE2). Quadword with REX prefix.
+pub static MOVD_LOAD_XMM: [u8; 3] = [0x66, 0x0f, 0x6e];
+
+/// Move doubleword from xmm to r/m32 (SSE2). Quadword with REX prefix.
+pub static MOVD_STORE_XMM: [u8; 3] = [0x66, 0x0f, 0x7e];
+
+/// Move packed single-precision floating-point values low to high (SSE).
+pub static MOVLHPS: [u8; 2] = [0x0f, 0x16];
+
+/// Move scalar double-precision floating-point value (from reg/mem to reg).
+pub static MOVSD_LOAD: [u8; 3] = [0xf2, 0x0f, 0x10];
+
+/// Move scalar double-precision floating-point value (from reg to reg/mem).
+pub static MOVSD_STORE: [u8; 3] = [0xf2, 0x0f, 0x11];
+
+/// Move scalar single-precision floating-point value (from reg to reg/mem).
+pub static MOVSS_STORE: [u8; 3] = [0xf3, 0x0f, 0x11];
+
+/// Move scalar single-precision floating-point-value (from reg/mem to reg).
+pub static MOVSS_LOAD: [u8; 3] = [0xf3, 0x0f, 0x10];
+
+/// Move byte to register with sign-extension.
+pub static MOVSX_BYTE: [u8; 2] = [0x0f, 0xbe];
+
+/// Move word to register with sign-extension.
+pub static MOVSX_WORD: [u8; 2] = [0x0f, 0xbf];
+
+/// Move doubleword to register with sign-extension.
+pub static MOVSXD: [u8; 1] = [0x63];
+
+/// Move unaligned packed single-precision floating-point from x/m to xmm (SSE).
+pub static MOVUPS_LOAD: [u8; 2] = [0x0f, 0x10];
+
+/// Move unaligned packed single-precision floating-point value from xmm to x/m (SSE).
+pub static MOVUPS_STORE: [u8; 2] = [0x0f, 0x11];
+
+/// Move byte to register with zero-extension.
+pub static MOVZX_BYTE: [u8; 2] = [0x0f, 0xb6];
+
+/// Move word to register with zero-extension.
+pub static MOVZX_WORD: [u8; 2] = [0x0f, 0xb7];
+
+/// Unsigned multiply for {16,32,64}-bit.
+pub static MUL: [u8; 1] = [0xf7];
+
+/// Multiply packed double-precision floating-point values from xmm2/mem to xmm1 and store result
+/// in xmm1 (SSE2).
+pub static MULPD: [u8; 3] = [0x66, 0x0f, 0x59];
+
+/// Multiply packed single-precision floating-point values from xmm2/mem to xmm1 and store result
+/// in xmm1 (SSE).
+pub static MULPS: [u8; 2] = [0x0f, 0x59];
+
+/// Multiply the low double-precision floating-point value in xmm2/m64 by the
+/// low double-precision floating-point value in xmm1.
+pub static MULSD: [u8; 3] = [0xf2, 0x0f, 0x59];
+
+/// Multiply the low single-precision floating-point value in xmm2/m32 by the
+/// low single-precision floating-point value in xmm1.
+pub static MULSS: [u8; 3] = [0xf3, 0x0f, 0x59];
+
+/// Reverse each bit of r/m{16,32,64}.
+pub static NOT: [u8; 1] = [0xf7];
+
+/// r{16,32,64} OR register of same size.
+pub static OR: [u8; 1] = [0x09];
+
+/// imm{16,32} OR r/m{16,32,64}, possibly sign-extended.
+pub static OR_IMM: [u8; 1] = [0x81];
+
+/// r/m{16,32,64} OR sign-extended imm8.
+pub static OR_IMM8_SIGN_EXTEND: [u8; 1] = [0x83];
+
+/// Return the bitwise logical OR of packed single-precision values in xmm and x/m (SSE).
+pub static ORPS: [u8; 2] = [0x0f, 0x56];
+
+/// Compute the absolute value of bytes in xmm2/m128 and store the unsigned result in xmm1 (SSSE3).
+pub static PABSB: [u8; 4] = [0x66, 0x0f, 0x38, 0x1c];
+
+/// Compute the absolute value of 32-bit integers in xmm2/m128 and store the unsigned result in
+/// xmm1 (SSSE3).
+pub static PABSD: [u8; 4] = [0x66, 0x0f, 0x38, 0x1e];
+
+/// Compute the absolute value of 16-bit integers in xmm2/m128 and store the unsigned result in
+/// xmm1 (SSSE3).
+pub static PABSW: [u8; 4] = [0x66, 0x0f, 0x38, 0x1d];
+
+/// Converts 8 packed signed word integers from xmm1 and from xmm2/m128 into 16 packed signed byte
+/// integers in xmm1 using signed saturation (SSE2).
+pub static PACKSSWB: [u8; 3] = [0x66, 0x0f, 0x63];
+
+/// Converts 4 packed signed doubleword integers from xmm1 and from xmm2/m128 into 8 packed signed
+/// word integers in xmm1 using signed saturation (SSE2).
+pub static PACKSSDW: [u8; 3] = [0x66, 0x0f, 0x6b];
+
+/// Converts 8 packed signed word integers from xmm1 and from xmm2/m128 into 16 packed unsigned byte
+/// integers in xmm1 using unsigned saturation (SSE2).
+pub static PACKUSWB: [u8; 3] = [0x66, 0x0f, 0x67];
+
+/// Converts 4 packed signed doubleword integers from xmm1 and from xmm2/m128 into 8 unpacked signed
+/// word integers in xmm1 using unsigned saturation (SSE4.1).
+pub static PACKUSDW: [u8; 4] = [0x66, 0x0f, 0x38, 0x2b];
+
+/// Add packed byte integers from xmm2/m128 and xmm1 (SSE2).
+pub static PADDB: [u8; 3] = [0x66, 0x0f, 0xfc];
+
+/// Add packed doubleword integers from xmm2/m128 and xmm1 (SSE2).
+pub static PADDD: [u8; 3] = [0x66, 0x0f, 0xfe];
+
+/// Add packed quadword integers from xmm2/m128 and xmm1 (SSE2).
+pub static PADDQ: [u8; 3] = [0x66, 0x0f, 0xd4];
+
+/// Add packed word integers from xmm2/m128 and xmm1 (SSE2).
+pub static PADDW: [u8; 3] = [0x66, 0x0f, 0xfd];
+
+/// Add packed signed byte integers from xmm2/m128 and xmm1 saturate the results (SSE).
+pub static PADDSB: [u8; 3] = [0x66, 0x0f, 0xec];
+
+/// Add packed signed word integers from xmm2/m128 and xmm1 saturate the results (SSE).
+pub static PADDSW: [u8; 3] = [0x66, 0x0f, 0xed];
+
+/// Add packed unsigned byte integers from xmm2/m128 and xmm1 saturate the results (SSE).
+pub static PADDUSB: [u8; 3] = [0x66, 0x0f, 0xdc];
+
+/// Add packed unsigned word integers from xmm2/m128 and xmm1 saturate the results (SSE).
+pub static PADDUSW: [u8; 3] = [0x66, 0x0f, 0xdd];
+
+/// Concatenate destination and source operands, extract a byte-aligned result into xmm1 that is
+/// shifted to the right by the constant number of bytes in imm8 (SSSE3).
+pub static PALIGNR: [u8; 4] = [0x66, 0x0f, 0x3a, 0x0f];
+
+/// Bitwise AND of xmm2/m128 and xmm1 (SSE2).
+pub static PAND: [u8; 3] = [0x66, 0x0f, 0xdb];
+
+/// Bitwise AND NOT of xmm2/m128 and xmm1 (SSE2).
+pub static PANDN: [u8; 3] = [0x66, 0x0f, 0xdf];
+
+/// Average packed unsigned byte integers from xmm2/m128 and xmm1 with rounding (SSE2).
+pub static PAVGB: [u8; 3] = [0x66, 0x0f, 0xE0];
+
+/// Average packed unsigned word integers from xmm2/m128 and xmm1 with rounding (SSE2).
+pub static PAVGW: [u8; 3] = [0x66, 0x0f, 0xE3];
+
+/// Select byte values from xmm1 and xmm2/m128 from mask specified in the high bit of each byte
+/// in XMM0 and store the values into xmm1 (SSE4.1).
+pub static PBLENDVB: [u8; 4] = [0x66, 0x0f, 0x38, 0x10];
+
+/// Select words from xmm1 and xmm2/m128 from mask specified in imm8 and store the values into xmm1
+/// (SSE4.1).
+pub static PBLENDW: [u8; 4] = [0x66, 0x0f, 0x3a, 0x0e];
+
+/// Compare packed data for equal (SSE2).
+pub static PCMPEQB: [u8; 3] = [0x66, 0x0f, 0x74];
+
+/// Compare packed data for equal (SSE2).
+pub static PCMPEQD: [u8; 3] = [0x66, 0x0f, 0x76];
+
+/// Compare packed data for equal (SSE4.1).
+pub static PCMPEQQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x29];
+
+/// Compare packed data for equal (SSE2).
+pub static PCMPEQW: [u8; 3] = [0x66, 0x0f, 0x75];
+
+/// Compare packed signed byte integers for greater than (SSE2).
+pub static PCMPGTB: [u8; 3] = [0x66, 0x0f, 0x64];
+
+/// Compare packed signed doubleword integers for greater than (SSE2).
+pub static PCMPGTD: [u8; 3] = [0x66, 0x0f, 0x66];
+
+/// Compare packed signed quadword integers for greater than (SSE4.2).
+pub static PCMPGTQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x37];
+
+/// Compare packed signed word integers for greater than (SSE2).
+pub static PCMPGTW: [u8; 3] = [0x66, 0x0f, 0x65];
+
+/// Extract doubleword or quadword, depending on REX.W (SSE4.1).
+pub static PEXTR: [u8; 4] = [0x66, 0x0f, 0x3a, 0x16];
+
+/// Extract byte (SSE4.1).
+pub static PEXTRB: [u8; 4] = [0x66, 0x0f, 0x3a, 0x14];
+
+/// Extract word (SSE4.1). There is a 3-byte SSE2 variant that can also move to m/16.
+pub static PEXTRW: [u8; 4] = [0x66, 0x0f, 0x3a, 0x15];
+
+/// Insert doubleword or quadword, depending on REX.W (SSE4.1).
+pub static PINSR: [u8; 4] = [0x66, 0x0f, 0x3a, 0x22];
+
+/// Insert byte (SSE4.1).
+pub static PINSRB: [u8; 4] = [0x66, 0x0f, 0x3a, 0x20];
+
+/// Insert word (SSE2).
+pub static PINSRW: [u8; 3] = [0x66, 0x0f, 0xc4];
+
+/// Compare packed signed byte integers in xmm1 and xmm2/m128 and store packed maximum values in
+/// xmm1 (SSE4.1).
+pub static PMAXSB: [u8; 4] = [0x66, 0x0f, 0x38, 0x3c];
+
+/// Compare packed signed doubleword integers in xmm1 and xmm2/m128 and store packed maximum
+/// values in xmm1 (SSE4.1).
+pub static PMAXSD: [u8; 4] = [0x66, 0x0f, 0x38, 0x3d];
+
+/// Compare packed signed word integers in xmm1 and xmm2/m128 and store packed maximum values in
+/// xmm1 (SSE2).
+pub static PMAXSW: [u8; 3] = [0x66, 0x0f, 0xee];
+
+/// Compare packed unsigned byte integers in xmm1 and xmm2/m128 and store packed maximum values in
+/// xmm1 (SSE2).
+pub static PMAXUB: [u8; 3] = [0x66, 0x0f, 0xde];
+
+/// Compare packed unsigned doubleword integers in xmm1 and xmm2/m128 and store packed maximum
+/// values in xmm1 (SSE4.1).
+pub static PMAXUD: [u8; 4] = [0x66, 0x0f, 0x38, 0x3f];
+
+/// Compare packed unsigned word integers in xmm1 and xmm2/m128 and store packed maximum values in
+/// xmm1 (SSE4.1).
+pub static PMAXUW: [u8; 4] = [0x66, 0x0f, 0x38, 0x3e];
+
+/// Compare packed signed byte integers in xmm1 and xmm2/m128 and store packed minimum values in
+/// xmm1 (SSE4.1).
+pub static PMINSB: [u8; 4] = [0x66, 0x0f, 0x38, 0x38];
+
+/// Compare packed signed doubleword integers in xmm1 and xmm2/m128 and store packed minimum
+/// values in xmm1 (SSE4.1).
+pub static PMINSD: [u8; 4] = [0x66, 0x0f, 0x38, 0x39];
+
+/// Compare packed signed word integers in xmm1 and xmm2/m128 and store packed minimum values in
+/// xmm1 (SSE2).
+pub static PMINSW: [u8; 3] = [0x66, 0x0f, 0xea];
+
+/// Compare packed unsigned byte integers in xmm1 and xmm2/m128 and store packed minimum values in
+/// xmm1 (SSE2).
+pub static PMINUB: [u8; 3] = [0x66, 0x0f, 0xda];
+
+/// Compare packed unsigned doubleword integers in xmm1 and xmm2/m128 and store packed minimum
+/// values in xmm1 (SSE4.1).
+pub static PMINUD: [u8; 4] = [0x66, 0x0f, 0x38, 0x3b];
+
+/// Compare packed unsigned word integers in xmm1 and xmm2/m128 and store packed minimum values in
+/// xmm1 (SSE4.1).
+pub static PMINUW: [u8; 4] = [0x66, 0x0f, 0x38, 0x3a];
+
+/// Sign extend 8 packed 8-bit integers in the low 8 bytes of xmm2/m64 to 8 packed 16-bit
+/// integers in xmm1 (SSE4.1).
+pub static PMOVSXBW: [u8; 4] = [0x66, 0x0f, 0x38, 0x20];
+
+/// Sign extend 4 packed 16-bit integers in the low 8 bytes of xmm2/m64 to 4 packed 32-bit
+/// integers in xmm1 (SSE4.1).
+pub static PMOVSXWD: [u8; 4] = [0x66, 0x0f, 0x38, 0x23];
+
+/// Sign extend 2 packed 32-bit integers in the low 8 bytes of xmm2/m64 to 2 packed 64-bit
+/// integers in xmm1 (SSE4.1).
+pub static PMOVSXDQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x25];
+
+/// Zero extend 8 packed 8-bit integers in the low 8 bytes of xmm2/m64 to 8 packed 16-bit
+/// integers in xmm1 (SSE4.1).
+pub static PMOVZXBW: [u8; 4] = [0x66, 0x0f, 0x38, 0x30];
+
+/// Zero extend 4 packed 16-bit integers in the low 8 bytes of xmm2/m64 to 4 packed 32-bit
+/// integers in xmm1 (SSE4.1).
+pub static PMOVZXWD: [u8; 4] = [0x66, 0x0f, 0x38, 0x33];
+
+/// Zero extend 2 packed 32-bit integers in the low 8 bytes of xmm2/m64 to 2 packed 64-bit
+/// integers in xmm1 (SSE4.1).
+pub static PMOVZXDQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x35];
+
+/// Multiply the packed signed word integers in xmm1 and xmm2/m128, and store the low 16 bits of
+/// the results in xmm1 (SSE2).
+pub static PMULLW: [u8; 3] = [0x66, 0x0f, 0xd5];
+
+/// Multiply the packed doubleword signed integers in xmm1 and xmm2/m128 and store the low 32
+/// bits of each product in xmm1 (SSE4.1).
+pub static PMULLD: [u8; 4] = [0x66, 0x0f, 0x38, 0x40];
+
+/// Multiply the packed quadword signed integers in xmm2 and xmm3/m128 and store the low 64
+/// bits of each product in xmm1 (AVX512VL/DQ). Requires an EVEX encoding.
+pub static VPMULLQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x40];
+
+/// Multiply packed unsigned doubleword integers in xmm1 by packed unsigned doubleword integers
+/// in xmm2/m128, and store the quadword results in xmm1 (SSE2).
+pub static PMULUDQ: [u8; 3] = [0x66, 0x0f, 0xf4];
+
+/// Pop top of stack into r{16,32,64}; increment stack pointer.
+pub static POP_REG: [u8; 1] = [0x58];
+
+/// Returns the count of number of bits set to 1.
+pub static POPCNT: [u8; 3] = [0xf3, 0x0f, 0xb8];
+
+/// Bitwise OR of xmm2/m128 and xmm1 (SSE2).
+pub static POR: [u8; 3] = [0x66, 0x0f, 0xeb];
+
+/// Shuffle bytes in xmm1 according to contents of xmm2/m128 (SSE3).
+pub static PSHUFB: [u8; 4] = [0x66, 0x0f, 0x38, 0x00];
+
+/// Shuffle the doublewords in xmm2/m128 based on the encoding in imm8 and
+/// store the result in xmm1 (SSE2).
+pub static PSHUFD: [u8; 3] = [0x66, 0x0f, 0x70];
+
+/// Shift words in xmm1 by imm8; the direction and sign-bit behavior is controlled by the RRR
+/// digit used in the ModR/M byte (SSE2).
+pub static PS_W_IMM: [u8; 3] = [0x66, 0x0f, 0x71];
+
+/// Shift doublewords in xmm1 by imm8; the direction and sign-bit behavior is controlled by the RRR
+/// digit used in the ModR/M byte (SSE2).
+pub static PS_D_IMM: [u8; 3] = [0x66, 0x0f, 0x72];
+
+/// Shift quadwords in xmm1 by imm8; the direction and sign-bit behavior is controlled by the RRR
+/// digit used in the ModR/M byte (SSE2).
+pub static PS_Q_IMM: [u8; 3] = [0x66, 0x0f, 0x73];
+
+/// Shift words in xmm1 left by xmm2/m128 while shifting in 0s (SSE2).
+pub static PSLLW: [u8; 3] = [0x66, 0x0f, 0xf1];
+
+/// Shift doublewords in xmm1 left by xmm2/m128 while shifting in 0s (SSE2).
+pub static PSLLD: [u8; 3] = [0x66, 0x0f, 0xf2];
+
+/// Shift quadwords in xmm1 left by xmm2/m128 while shifting in 0s (SSE2).
+pub static PSLLQ: [u8; 3] = [0x66, 0x0f, 0xf3];
+
+/// Shift words in xmm1 right by xmm2/m128 while shifting in 0s (SSE2).
+pub static PSRLW: [u8; 3] = [0x66, 0x0f, 0xd1];
+
+/// Shift doublewords in xmm1 right by xmm2/m128 while shifting in 0s (SSE2).
+pub static PSRLD: [u8; 3] = [0x66, 0x0f, 0xd2];
+
+/// Shift quadwords in xmm1 right by xmm2/m128 while shifting in 0s (SSE2).
+pub static PSRLQ: [u8; 3] = [0x66, 0x0f, 0xd3];
+
+/// Shift words in xmm1 right by xmm2/m128 while shifting in sign bits (SSE2).
+pub static PSRAW: [u8; 3] = [0x66, 0x0f, 0xe1];
+
+/// Shift doublewords in xmm1 right by xmm2/m128 while shifting in sign bits (SSE2).
+pub static PSRAD: [u8; 3] = [0x66, 0x0f, 0xe2];
+
+/// Subtract packed byte integers in xmm2/m128 from packed byte integers in xmm1 (SSE2).
+pub static PSUBB: [u8; 3] = [0x66, 0x0f, 0xf8];
+
+/// Subtract packed word integers in xmm2/m128 from packed word integers in xmm1 (SSE2).
+pub static PSUBW: [u8; 3] = [0x66, 0x0f, 0xf9];
+
+/// Subtract packed doubleword integers in xmm2/m128 from doubleword byte integers in xmm1 (SSE2).
+pub static PSUBD: [u8; 3] = [0x66, 0x0f, 0xfa];
+
+/// Subtract packed quadword integers in xmm2/m128 from xmm1 (SSE2).
+pub static PSUBQ: [u8; 3] = [0x66, 0x0f, 0xfb];
+
+/// Subtract packed signed byte integers in xmm2/m128 from packed signed byte integers in xmm1
+/// and saturate results (SSE2).
+pub static PSUBSB: [u8; 3] = [0x66, 0x0f, 0xe8];
+
+/// Subtract packed signed word integers in xmm2/m128 from packed signed word integers in xmm1
+/// and saturate results (SSE2).
+pub static PSUBSW: [u8; 3] = [0x66, 0x0f, 0xe9];
+
+/// Subtract packed unsigned byte integers in xmm2/m128 from packed unsigned byte integers in xmm1
+/// and saturate results (SSE2).
+pub static PSUBUSB: [u8; 3] = [0x66, 0x0f, 0xd8];
+
+/// Subtract packed unsigned word integers in xmm2/m128 from packed unsigned word integers in xmm1
+/// and saturate results (SSE2).
+pub static PSUBUSW: [u8; 3] = [0x66, 0x0f, 0xd9];
+
+/// Set ZF if xmm2/m128 AND xmm1 result is all 0s; set CF if xmm2/m128 AND NOT xmm1 result is all
+/// 0s (SSE4.1).
+pub static PTEST: [u8; 4] = [0x66, 0x0f, 0x38, 0x17];
+
+/// Unpack and interleave high-order bytes from xmm1 and xmm2/m128 into xmm1 (SSE2).
+pub static PUNPCKHBW: [u8; 3] = [0x66, 0x0f, 0x68];
+
+/// Unpack and interleave high-order words from xmm1 and xmm2/m128 into xmm1 (SSE2).
+pub static PUNPCKHWD: [u8; 3] = [0x66, 0x0f, 0x69];
+
+/// Unpack and interleave high-order doublewords from xmm1 and xmm2/m128 into xmm1 (SSE2).
+pub static PUNPCKHDQ: [u8; 3] = [0x66, 0x0f, 0x6A];
+
+/// Unpack and interleave high-order quadwords from xmm1 and xmm2/m128 into xmm1 (SSE2).
+pub static PUNPCKHQDQ: [u8; 3] = [0x66, 0x0f, 0x6D];
+
+/// Unpack and interleave low-order bytes from xmm1 and xmm2/m128 into xmm1 (SSE2).
+pub static PUNPCKLBW: [u8; 3] = [0x66, 0x0f, 0x60];
+
+/// Unpack and interleave low-order words from xmm1 and xmm2/m128 into xmm1 (SSE2).
+pub static PUNPCKLWD: [u8; 3] = [0x66, 0x0f, 0x61];
+
+/// Unpack and interleave low-order doublewords from xmm1 and xmm2/m128 into xmm1 (SSE2).
+pub static PUNPCKLDQ: [u8; 3] = [0x66, 0x0f, 0x62];
+
+/// Unpack and interleave low-order quadwords from xmm1 and xmm2/m128 into xmm1 (SSE2).
+pub static PUNPCKLQDQ: [u8; 3] = [0x66, 0x0f, 0x6C];
+
+/// Push r{16,32,64}.
+pub static PUSH_REG: [u8; 1] = [0x50];
+
+/// Logical exclusive OR (SSE2).
+pub static PXOR: [u8; 3] = [0x66, 0x0f, 0xef];
+
+/// Near return to calling procedure.
+pub static RET_NEAR: [u8; 1] = [0xc3];
+
+/// General rotation opcode. Kind of rotation depends on encoding.
+pub static ROTATE_CL: [u8; 1] = [0xd3];
+
+/// General rotation opcode. Kind of rotation depends on encoding.
+pub static ROTATE_IMM8: [u8; 1] = [0xc1];
+
+/// Round scalar doubl-precision floating-point values.
+pub static ROUNDSD: [u8; 4] = [0x66, 0x0f, 0x3a, 0x0b];
+
+/// Round scalar single-precision floating-point values.
+pub static ROUNDSS: [u8; 4] = [0x66, 0x0f, 0x3a, 0x0a];
+
+/// Subtract with borrow r{16,32,64} from r/m of the same size.
+pub static SBB: [u8; 1] = [0x19];
+
+/// Set byte if overflow (OF=1).
+pub static SET_BYTE_IF_OVERFLOW: [u8; 2] = [0x0f, 0x90];
+
+/// Compute the square root of the packed double-precision floating-point values and store the
+/// result in xmm1 (SSE2).
+pub static SQRTPD: [u8; 3] = [0x66, 0x0f, 0x51];
+
+/// Compute the square root of the packed double-precision floating-point values and store the
+/// result in xmm1 (SSE).
+pub static SQRTPS: [u8; 2] = [0x0f, 0x51];
+
+/// Compute square root of scalar double-precision floating-point value.
+pub static SQRTSD: [u8; 3] = [0xf2, 0x0f, 0x51];
+
+/// Compute square root of scalar single-precision value.
+pub static SQRTSS: [u8; 3] = [0xf3, 0x0f, 0x51];
+
+/// Subtract r{16,32,64} from r/m of same size.
+pub static SUB: [u8; 1] = [0x29];
+
+/// Subtract packed double-precision floating-point values in xmm2/mem from xmm1 and store result
+/// in xmm1 (SSE2).
+pub static SUBPD: [u8; 3] = [0x66, 0x0f, 0x5c];
+
+/// Subtract packed single-precision floating-point values in xmm2/mem from xmm1 and store result
+/// in xmm1 (SSE).
+pub static SUBPS: [u8; 2] = [0x0f, 0x5c];
+
+/// Subtract the low double-precision floating-point value in xmm2/m64 from xmm1
+/// and store the result in xmm1.
+pub static SUBSD: [u8; 3] = [0xf2, 0x0f, 0x5c];
+
+/// Subtract the low single-precision floating-point value in xmm2/m32 from xmm1
+/// and store the result in xmm1.
+pub static SUBSS: [u8; 3] = [0xf3, 0x0f, 0x5c];
+
+/// AND r8 with r/m8; set SF, ZF, PF according to result.
+pub static TEST_BYTE_REG: [u8; 1] = [0x84];
+
+/// AND {r16, r32, r64} with r/m of the same size; set SF, ZF, PF according to result.
+pub static TEST_REG: [u8; 1] = [0x85];
+
+/// Count the number of trailing zero bits.
+pub static TZCNT: [u8; 3] = [0xf3, 0x0f, 0xbc];
+
+/// Compare low double-precision floating-point values in xmm1 and xmm2/mem64
+/// and set the EFLAGS flags accordingly.
+pub static UCOMISD: [u8; 3] = [0x66, 0x0f, 0x2e];
+
+/// Compare low single-precision floating-point values in xmm1 and xmm2/mem32
+/// and set the EFLAGS flags accordingly.
+pub static UCOMISS: [u8; 2] = [0x0f, 0x2e];
+
+/// Raise invalid opcode instruction.
+pub static UNDEFINED2: [u8; 2] = [0x0f, 0x0b];
+
+/// Convert four packed unsigned doubleword integers from xmm2/m128/m32bcst to packed
+/// single-precision floating-point values in xmm1 with writemask k1. Rounding behavior
+/// is controlled by MXCSR but can be overriden by EVEX.L'L in static rounding mode
+/// (AVX512VL, AVX512F).
+pub static VCVTUDQ2PS: [u8; 3] = [0xf2, 0x0f, 0x7a];
+
+/// imm{16,32} XOR r/m{16,32,64}, possibly sign-extended.
+pub static XOR_IMM: [u8; 1] = [0x81];
+
+/// r/m{16,32,64} XOR sign-extended imm8.
+pub static XOR_IMM8_SIGN_EXTEND: [u8; 1] = [0x83];
+
+/// r/m{16,32,64} XOR register of the same size.
+pub static XOR: [u8; 1] = [0x31];
+
+/// r/m8 XOR r8.
+pub static XORB: [u8; 1] = [0x30];
+
+/// Bitwise logical XOR of packed double-precision floating-point values.
+pub static XORPD: [u8; 3] = [0x66, 0x0f, 0x57];
+
+/// Bitwise logical XOR of packed single-precision floating-point values.
+pub static XORPS: [u8; 2] = [0x0f, 0x57];
diff --git a/third_party/rust/cranelift-codegen-meta/src/isa/x86/recipes.rs b/third_party/rust/cranelift-codegen-meta/src/isa/x86/recipes.rs
new file mode 100644
index 0000000000..f45f8dc673
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/isa/x86/recipes.rs
@@ -0,0 +1,3445 @@
+//! Encoding recipes for x86/x86_64.
+use std::rc::Rc;
+
+use cranelift_codegen_shared::isa::x86::EncodingBits;
+
+use crate::cdsl::ast::Literal;
+use crate::cdsl::formats::InstructionFormat;
+use crate::cdsl::instructions::InstructionPredicate;
+use crate::cdsl::recipes::{
+ EncodingRecipe, EncodingRecipeBuilder, OperandConstraint, Register, Stack,
+};
+use crate::cdsl::regs::IsaRegs;
+use crate::cdsl::settings::SettingGroup;
+use crate::shared::Definitions as SharedDefinitions;
+
+use crate::isa::x86::opcodes;
+
+/// Helper data structure to create recipes and template recipes.
+/// It contains all the recipes and recipe templates that might be used in the encodings crate of
+/// this same directory.
+pub(crate) struct RecipeGroup<'builder> {
+ /// Memoized registers description, to pass it to builders later.
+ regs: &'builder IsaRegs,
+
+ /// All the recipes explicitly created in this file. This is different from the final set of
+ /// recipes, which is definitive only once encodings have generated new recipes on the fly.
+ recipes: Vec<EncodingRecipe>,
+
+ /// All the recipe templates created in this file.
+ templates: Vec<Rc<Template<'builder>>>,
+}
+
+impl<'builder> RecipeGroup<'builder> {
+ fn new(regs: &'builder IsaRegs) -> Self {
+ Self {
+ regs,
+ recipes: Vec::new(),
+ templates: Vec::new(),
+ }
+ }
+ fn add_recipe(&mut self, recipe: EncodingRecipeBuilder) {
+ self.recipes.push(recipe.build());
+ }
+ fn add_template_recipe(&mut self, recipe: EncodingRecipeBuilder) -> Rc<Template<'builder>> {
+ let template = Rc::new(Template::new(recipe, self.regs));
+ self.templates.push(template.clone());
+ template
+ }
+ fn add_template_inferred(
+ &mut self,
+ recipe: EncodingRecipeBuilder,
+ infer_function: &'static str,
+ ) -> Rc<Template<'builder>> {
+ let template =
+ Rc::new(Template::new(recipe, self.regs).inferred_rex_compute_size(infer_function));
+ self.templates.push(template.clone());
+ template
+ }
+ fn add_template(&mut self, template: Template<'builder>) -> Rc<Template<'builder>> {
+ let template = Rc::new(template);
+ self.templates.push(template.clone());
+ template
+ }
+ pub fn recipe(&self, name: &str) -> &EncodingRecipe {
+ self.recipes
+ .iter()
+ .find(|recipe| recipe.name == name)
+ .unwrap_or_else(|| panic!("unknown recipe name: {}. Try template?", name))
+ }
+ pub fn template(&self, name: &str) -> &Template {
+ self.templates
+ .iter()
+ .find(|recipe| recipe.name() == name)
+ .unwrap_or_else(|| panic!("unknown template name: {}. Try recipe?", name))
+ }
+}
+
+// Opcode representation.
+//
+// Cranelift requires each recipe to have a single encoding size in bytes, and x86 opcodes are
+// variable length, so we use separate recipes for different styles of opcodes and prefixes. The
+// opcode format is indicated by the recipe name prefix.
+//
+// The match case below does not include the REX prefix which goes after the mandatory prefix.
+// VEX/XOP and EVEX prefixes are not yet supported. Encodings using any of these prefixes are
+// represented by separate recipes.
+//
+// The encoding bits are:
+//
+// 0-7: The opcode byte <op>.
+// 8-9: pp, mandatory prefix:
+// 00 none (Op*)
+// 01 66 (Mp*)
+// 10 F3 (Mp*)
+// 11 F2 (Mp*)
+// 10-11: mm, opcode map:
+// 00 <op> (Op1/Mp1)
+// 01 0F <op> (Op2/Mp2)
+// 10 0F 38 <op> (Op3/Mp3)
+// 11 0F 3A <op> (Op3/Mp3)
+// 12-14 rrr, opcode bits for the ModR/M byte for certain opcodes.
+// 15: REX.W bit (or VEX.W/E)
+//
+// There is some redundancy between bits 8-11 and the recipe names, but we have enough bits, and
+// the pp+mm format is ready for supporting VEX prefixes.
+//
+// TODO Cranelift doesn't actually require recipe to have different encoding sizes anymore, so this
+// could be simplified.
+
+/// Given a sequence of opcode bytes, compute the recipe name prefix and encoding bits.
+fn decode_opcodes(op_bytes: &[u8], rrr: u16, w: u16) -> (&'static str, u16) {
+ let enc = EncodingBits::new(op_bytes, rrr, w);
+ (enc.prefix().recipe_name_prefix(), enc.bits())
+}
+
+/// Given a snippet of Rust code (or None), replace the `PUT_OP` macro with the
+/// corresponding `put_*` function from the `binemit.rs` module.
+fn replace_put_op(code: Option<String>, prefix: &str) -> Option<String> {
+ code.map(|code| code.replace("{{PUT_OP}}", &format!("put_{}", prefix.to_lowercase())))
+}
+
+/// Replaces constraints to a REX-prefixed register class by the equivalent non-REX register class.
+fn replace_nonrex_constraints(
+ regs: &IsaRegs,
+ constraints: Vec<OperandConstraint>,
+) -> Vec<OperandConstraint> {
+ constraints
+ .into_iter()
+ .map(|constraint| match constraint {
+ OperandConstraint::RegClass(rc_index) => {
+ let new_rc_index = if rc_index == regs.class_by_name("GPR") {
+ regs.class_by_name("GPR8")
+ } else if rc_index == regs.class_by_name("FPR") {
+ regs.class_by_name("FPR8")
+ } else {
+ rc_index
+ };
+ OperandConstraint::RegClass(new_rc_index)
+ }
+ _ => constraint,
+ })
+ .collect()
+}
+
+fn replace_evex_constraints(
+ _: &IsaRegs,
+ constraints: Vec<OperandConstraint>,
+) -> Vec<OperandConstraint> {
+ constraints
+ .into_iter()
+ .map(|constraint| match constraint {
+ OperandConstraint::RegClass(rc_index) => {
+ // FIXME(#1306) this should be able to upgrade the register class to FPR32 as in
+ // `replace_nonrex_constraints` above, e.g. When FPR32 is re-added, add back in the
+ // rc_index conversion to FPR32. In the meantime, this is effectively a no-op
+ // conversion--the register class stays the same.
+ OperandConstraint::RegClass(rc_index)
+ }
+ _ => constraint,
+ })
+ .collect()
+}
+
+/// Specifies how the prefix (e.g. REX) is emitted by a Recipe.
+#[derive(Copy, Clone, PartialEq)]
+pub enum RecipePrefixKind {
+ /// The REX emission behavior is not hardcoded for the Recipe
+ /// and may be overridden when using the Template.
+ Unspecified,
+
+ /// The Recipe must hardcode the non-emission of the REX prefix.
+ NeverEmitRex,
+
+ /// The Recipe must hardcode the emission of the REX prefix.
+ AlwaysEmitRex,
+
+ /// The Recipe should infer the emission of the REX.RXB bits from registers,
+ /// and the REX.W bit from the EncodingBits.
+ ///
+ /// Because such a Recipe has a non-constant instruction size, it must have
+ /// a special `compute_size` handler for the inferrable-REX case.
+ InferRex,
+
+ /// The Recipe must hardcode the emission of an EVEX prefix.
+ Evex,
+}
+
+impl Default for RecipePrefixKind {
+ fn default() -> Self {
+ Self::Unspecified
+ }
+}
+
+/// Previously called a TailRecipe in the Python meta language, this allows to create multiple
+/// variants of a single base EncodingRecipe (rex prefix, specialized w/rrr bits, different
+/// opcodes). It serves as a prototype of an EncodingRecipe, which is then used when actually creating
+/// Encodings, in encodings.rs. This is an idiosyncrasy of the x86 meta-language, and could be
+/// reconsidered later.
+#[derive(Clone)]
+pub(crate) struct Template<'builder> {
+ /// Description of registers, used in the build() method.
+ regs: &'builder IsaRegs,
+
+ /// The recipe template, which is to be specialized (by copy).
+ recipe: EncodingRecipeBuilder,
+
+ /// How is the REX prefix emitted?
+ rex_kind: RecipePrefixKind,
+
+ /// Function for `compute_size()` when REX is inferrable.
+ inferred_rex_compute_size: Option<&'static str>,
+
+ /// Other recipe to use when REX-prefixed.
+ when_prefixed: Option<Rc<Template<'builder>>>,
+
+ // Parameters passed in the EncodingBits.
+ /// Value of the W bit (0 or 1), stored in the EncodingBits.
+ w_bit: u16,
+ /// Value of the RRR bits (between 0 and 0b111).
+ rrr_bits: u16,
+ /// Opcode bytes.
+ op_bytes: &'static [u8],
+}
+
+impl<'builder> Template<'builder> {
+ fn new(recipe: EncodingRecipeBuilder, regs: &'builder IsaRegs) -> Self {
+ Self {
+ regs,
+ recipe,
+ rex_kind: RecipePrefixKind::default(),
+ inferred_rex_compute_size: None,
+ when_prefixed: None,
+ w_bit: 0,
+ rrr_bits: 0,
+ op_bytes: &opcodes::EMPTY,
+ }
+ }
+
+ fn name(&self) -> &str {
+ &self.recipe.name
+ }
+ fn rex_kind(self, kind: RecipePrefixKind) -> Self {
+ Self {
+ rex_kind: kind,
+ ..self
+ }
+ }
+ fn inferred_rex_compute_size(self, function: &'static str) -> Self {
+ Self {
+ inferred_rex_compute_size: Some(function),
+ ..self
+ }
+ }
+ fn when_prefixed(self, template: Rc<Template<'builder>>) -> Self {
+ assert!(self.when_prefixed.is_none());
+ Self {
+ when_prefixed: Some(template),
+ ..self
+ }
+ }
+
+ // Copy setters.
+ pub fn opcodes(&self, op_bytes: &'static [u8]) -> Self {
+ assert!(!op_bytes.is_empty());
+ let mut copy = self.clone();
+ copy.op_bytes = op_bytes;
+ copy
+ }
+ pub fn w(&self) -> Self {
+ let mut copy = self.clone();
+ copy.w_bit = 1;
+ copy
+ }
+ pub fn rrr(&self, value: u16) -> Self {
+ assert!(value <= 0b111);
+ let mut copy = self.clone();
+ copy.rrr_bits = value;
+ copy
+ }
+ pub fn nonrex(&self) -> Self {
+ assert!(
+ self.rex_kind != RecipePrefixKind::AlwaysEmitRex,
+ "Template requires REX prefix."
+ );
+ let mut copy = self.clone();
+ copy.rex_kind = RecipePrefixKind::NeverEmitRex;
+ copy
+ }
+ pub fn rex(&self) -> Self {
+ assert!(
+ self.rex_kind != RecipePrefixKind::NeverEmitRex,
+ "Template requires no REX prefix."
+ );
+ if let Some(prefixed) = &self.when_prefixed {
+ let mut ret = prefixed.rex();
+ // Forward specialized parameters.
+ ret.op_bytes = self.op_bytes;
+ ret.w_bit = self.w_bit;
+ ret.rrr_bits = self.rrr_bits;
+ return ret;
+ }
+ let mut copy = self.clone();
+ copy.rex_kind = RecipePrefixKind::AlwaysEmitRex;
+ copy
+ }
+ pub fn infer_rex(&self) -> Self {
+ assert!(
+ self.rex_kind != RecipePrefixKind::NeverEmitRex,
+ "Template requires no REX prefix."
+ );
+ assert!(
+ self.when_prefixed.is_none(),
+ "infer_rex used with when_prefixed()."
+ );
+ let mut copy = self.clone();
+ copy.rex_kind = RecipePrefixKind::InferRex;
+ copy
+ }
+
+ pub fn build(mut self) -> (EncodingRecipe, u16) {
+ let (opcode, bits) = decode_opcodes(&self.op_bytes, self.rrr_bits, self.w_bit);
+
+ let (recipe_name, size_addendum) = match self.rex_kind {
+ RecipePrefixKind::Unspecified | RecipePrefixKind::NeverEmitRex => {
+ // Ensure the operands are limited to non-REX constraints.
+ let operands_in = self.recipe.operands_in.unwrap_or_default();
+ self.recipe.operands_in = Some(replace_nonrex_constraints(self.regs, operands_in));
+ let operands_out = self.recipe.operands_out.unwrap_or_default();
+ self.recipe.operands_out =
+ Some(replace_nonrex_constraints(self.regs, operands_out));
+
+ (opcode.into(), self.op_bytes.len() as u64)
+ }
+ RecipePrefixKind::AlwaysEmitRex => {
+ ("Rex".to_string() + opcode, self.op_bytes.len() as u64 + 1)
+ }
+ RecipePrefixKind::InferRex => {
+ assert_eq!(self.w_bit, 0, "A REX.W bit always requires a REX prefix; avoid using `infer_rex().w()` and use `rex().w()` instead.");
+ // Hook up the right function for inferred compute_size().
+ assert!(
+ self.inferred_rex_compute_size.is_some(),
+ "InferRex recipe '{}' needs an inferred_rex_compute_size function.",
+ &self.recipe.name
+ );
+ self.recipe.compute_size = self.inferred_rex_compute_size;
+
+ ("DynRex".to_string() + opcode, self.op_bytes.len() as u64)
+ }
+ RecipePrefixKind::Evex => {
+ // Allow the operands to expand limits to EVEX constraints.
+ let operands_in = self.recipe.operands_in.unwrap_or_default();
+ self.recipe.operands_in = Some(replace_evex_constraints(self.regs, operands_in));
+ let operands_out = self.recipe.operands_out.unwrap_or_default();
+ self.recipe.operands_out = Some(replace_evex_constraints(self.regs, operands_out));
+
+ ("Evex".to_string() + opcode, 4 + 1)
+ }
+ };
+
+ self.recipe.base_size += size_addendum;
+
+ // Branch ranges are relative to the end of the instruction.
+ // For InferRex, the range should be the minimum, assuming no REX.
+ if let Some(range) = self.recipe.branch_range.as_mut() {
+ range.inst_size += size_addendum;
+ }
+
+ self.recipe.emit = replace_put_op(self.recipe.emit, &recipe_name);
+ self.recipe.name = recipe_name + &self.recipe.name;
+
+ (self.recipe.build(), bits)
+ }
+}
+
+/// Returns a predicate checking that the "cond" field of the instruction contains one of the
+/// directly supported floating point condition codes.
+fn supported_floatccs_predicate(
+ supported_cc: &[Literal],
+ format: &InstructionFormat,
+) -> InstructionPredicate {
+ supported_cc
+ .iter()
+ .fold(InstructionPredicate::new(), |pred, literal| {
+ pred.or(InstructionPredicate::new_is_field_equal(
+ format,
+ "cond",
+ literal.to_rust_code(),
+ ))
+ })
+}
+
+/// Return an instruction predicate that checks if `iform.imm` is a valid `scale` for a SIB byte.
+fn valid_scale(format: &InstructionFormat) -> InstructionPredicate {
+ ["1", "2", "4", "8"]
+ .iter()
+ .fold(InstructionPredicate::new(), |pred, &literal| {
+ pred.or(InstructionPredicate::new_is_field_equal(
+ format,
+ "imm",
+ literal.into(),
+ ))
+ })
+}
+
+pub(crate) fn define<'shared>(
+ shared_defs: &'shared SharedDefinitions,
+ settings: &'shared SettingGroup,
+ regs: &'shared IsaRegs,
+) -> RecipeGroup<'shared> {
+ // The set of floating point condition codes that are directly supported.
+ // Other condition codes need to be reversed or expressed as two tests.
+ let floatcc = &shared_defs.imm.floatcc;
+ let supported_floatccs: Vec<Literal> = ["ord", "uno", "one", "ueq", "gt", "ge", "ult", "ule"]
+ .iter()
+ .map(|name| Literal::enumerator_for(floatcc, name))
+ .collect();
+
+ // Register classes shorthands.
+ let abcd = regs.class_by_name("ABCD");
+ let gpr = regs.class_by_name("GPR");
+ let fpr = regs.class_by_name("FPR");
+ let flag = regs.class_by_name("FLAG");
+
+ // Operand constraints shorthands.
+ let reg_rflags = Register::new(flag, regs.regunit_by_name(flag, "rflags"));
+ let reg_rax = Register::new(gpr, regs.regunit_by_name(gpr, "rax"));
+ let reg_rcx = Register::new(gpr, regs.regunit_by_name(gpr, "rcx"));
+ let reg_rdx = Register::new(gpr, regs.regunit_by_name(gpr, "rdx"));
+ let reg_r15 = Register::new(gpr, regs.regunit_by_name(gpr, "r15"));
+ let reg_xmm0 = Register::new(fpr, regs.regunit_by_name(fpr, "xmm0"));
+
+ // Stack operand with a 32-bit signed displacement from either RBP or RSP.
+ let stack_gpr32 = Stack::new(gpr);
+ let stack_fpr32 = Stack::new(fpr);
+
+ let formats = &shared_defs.formats;
+
+ // Predicates shorthands.
+ let use_sse41 = settings.predicate_by_name("use_sse41");
+
+ // Definitions.
+ let mut recipes = RecipeGroup::new(regs);
+
+ // A null unary instruction that takes a GPR register. Can be used for identity copies and
+ // no-op conversions.
+ recipes.add_recipe(
+ EncodingRecipeBuilder::new("null", &formats.unary, 0)
+ .operands_in(vec![gpr])
+ .operands_out(vec![0])
+ .emit(""),
+ );
+ recipes.add_recipe(
+ EncodingRecipeBuilder::new("null_fpr", &formats.unary, 0)
+ .operands_in(vec![fpr])
+ .operands_out(vec![0])
+ .emit(""),
+ );
+ recipes.add_recipe(
+ EncodingRecipeBuilder::new("stacknull", &formats.unary, 0)
+ .operands_in(vec![stack_gpr32])
+ .operands_out(vec![stack_gpr32])
+ .emit(""),
+ );
+
+ recipes.add_recipe(
+ EncodingRecipeBuilder::new("get_pinned_reg", &formats.nullary, 0)
+ .operands_out(vec![reg_r15])
+ .emit(""),
+ );
+ // umr with a fixed register output that's r15.
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("set_pinned_reg", &formats.unary, 1)
+ .operands_in(vec![gpr])
+ .clobbers_flags(false)
+ .emit(
+ r#"
+ let r15 = RU::r15.into();
+ {{PUT_OP}}(bits, rex2(r15, in_reg0), sink);
+ modrm_rr(r15, in_reg0, sink);
+ "#,
+ ),
+ );
+
+ // No-op fills, created by late-stage redundant-fill removal.
+ recipes.add_recipe(
+ EncodingRecipeBuilder::new("fillnull", &formats.unary, 0)
+ .operands_in(vec![stack_gpr32])
+ .operands_out(vec![gpr])
+ .clobbers_flags(false)
+ .emit(""),
+ );
+ recipes.add_recipe(
+ EncodingRecipeBuilder::new("ffillnull", &formats.unary, 0)
+ .operands_in(vec![stack_gpr32])
+ .operands_out(vec![fpr])
+ .clobbers_flags(false)
+ .emit(""),
+ );
+
+ recipes.add_recipe(
+ EncodingRecipeBuilder::new("debugtrap", &formats.nullary, 1).emit("sink.put1(0xcc);"),
+ );
+
+ // XX opcode, no ModR/M.
+ recipes.add_template_recipe(EncodingRecipeBuilder::new("trap", &formats.trap, 0).emit(
+ r#"
+ sink.trap(code, func.srclocs[inst]);
+ {{PUT_OP}}(bits, BASE_REX, sink);
+ "#,
+ ));
+
+ // Macro: conditional jump over a ud2.
+ recipes.add_recipe(
+ EncodingRecipeBuilder::new("trapif", &formats.int_cond_trap, 4)
+ .operands_in(vec![reg_rflags])
+ .clobbers_flags(false)
+ .emit(
+ r#"
+ // Jump over a 2-byte ud2.
+ sink.put1(0x70 | (icc2opc(cond.inverse()) as u8));
+ sink.put1(2);
+ // ud2.
+ sink.trap(code, func.srclocs[inst]);
+ sink.put1(0x0f);
+ sink.put1(0x0b);
+ "#,
+ ),
+ );
+
+ recipes.add_recipe(
+ EncodingRecipeBuilder::new("trapff", &formats.float_cond_trap, 4)
+ .operands_in(vec![reg_rflags])
+ .clobbers_flags(false)
+ .inst_predicate(supported_floatccs_predicate(
+ &supported_floatccs,
+ &*formats.float_cond_trap,
+ ))
+ .emit(
+ r#"
+ // Jump over a 2-byte ud2.
+ sink.put1(0x70 | (fcc2opc(cond.inverse()) as u8));
+ sink.put1(2);
+ // ud2.
+ sink.trap(code, func.srclocs[inst]);
+ sink.put1(0x0f);
+ sink.put1(0x0b);
+ "#,
+ ),
+ );
+
+ // XX /r
+ recipes.add_template_inferred(
+ EncodingRecipeBuilder::new("rr", &formats.binary, 1)
+ .operands_in(vec![gpr, gpr])
+ .operands_out(vec![0])
+ .emit(
+ r#"
+ {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink);
+ modrm_rr(in_reg0, in_reg1, sink);
+ "#,
+ ),
+ "size_with_inferred_rex_for_inreg0_inreg1",
+ );
+
+ // XX /r with operands swapped. (RM form).
+ recipes.add_template_inferred(
+ EncodingRecipeBuilder::new("rrx", &formats.binary, 1)
+ .operands_in(vec![gpr, gpr])
+ .operands_out(vec![0])
+ .emit(
+ r#"
+ {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
+ modrm_rr(in_reg1, in_reg0, sink);
+ "#,
+ ),
+ "size_with_inferred_rex_for_inreg0_inreg1",
+ );
+
+ // XX /r with FPR ins and outs. A form.
+ recipes.add_template_inferred(
+ EncodingRecipeBuilder::new("fa", &formats.binary, 1)
+ .operands_in(vec![fpr, fpr])
+ .operands_out(vec![0])
+ .emit(
+ r#"
+ {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
+ modrm_rr(in_reg1, in_reg0, sink);
+ "#,
+ ),
+ "size_with_inferred_rex_for_inreg0_inreg1",
+ );
+
+ // XX /r with FPR ins and outs. A form with input operands swapped.
+ recipes.add_template_inferred(
+ EncodingRecipeBuilder::new("fax", &formats.binary, 1)
+ .operands_in(vec![fpr, fpr])
+ .operands_out(vec![1])
+ .emit(
+ r#"
+ {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink);
+ modrm_rr(in_reg0, in_reg1, sink);
+ "#,
+ ),
+ // The operand order does not matter for calculating whether a REX prefix is needed.
+ "size_with_inferred_rex_for_inreg0_inreg1",
+ );
+
+ // XX /r with FPR ins and outs. A form with a byte immediate.
+ {
+ recipes.add_template_inferred(
+ EncodingRecipeBuilder::new("fa_ib", &formats.ternary_imm8, 2)
+ .operands_in(vec![fpr, fpr])
+ .operands_out(vec![0])
+ .inst_predicate(InstructionPredicate::new_is_unsigned_int(
+ &*formats.ternary_imm8,
+ "imm",
+ 8,
+ 0,
+ ))
+ .emit(
+ r#"
+ {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
+ modrm_rr(in_reg1, in_reg0, sink);
+ let imm: i64 = imm.into();
+ sink.put1(imm as u8);
+ "#,
+ ),
+ "size_with_inferred_rex_for_inreg0_inreg1",
+ );
+ }
+
+ // XX /n for a unary operation with extension bits.
+ recipes.add_template(
+ Template::new(
+ EncodingRecipeBuilder::new("ur", &formats.unary, 1)
+ .operands_in(vec![gpr])
+ .operands_out(vec![0])
+ .emit(
+ r#"
+ {{PUT_OP}}(bits, rex1(in_reg0), sink);
+ modrm_r_bits(in_reg0, bits, sink);
+ "#,
+ ),
+ regs,
+ )
+ .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"),
+ );
+
+ // XX /r, but for a unary operator with separate input/output register, like
+ // copies. MR form, preserving flags.
+ recipes.add_template(
+ Template::new(
+ EncodingRecipeBuilder::new("umr", &formats.unary, 1)
+ .operands_in(vec![gpr])
+ .operands_out(vec![gpr])
+ .clobbers_flags(false)
+ .emit(
+ r#"
+ {{PUT_OP}}(bits, rex2(out_reg0, in_reg0), sink);
+ modrm_rr(out_reg0, in_reg0, sink);
+ "#,
+ ),
+ regs,
+ )
+ .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_outreg0"),
+ );
+
+ // Same as umr, but with FPR -> GPR registers.
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("rfumr", &formats.unary, 1)
+ .operands_in(vec![fpr])
+ .operands_out(vec![gpr])
+ .clobbers_flags(false)
+ .emit(
+ r#"
+ {{PUT_OP}}(bits, rex2(out_reg0, in_reg0), sink);
+ modrm_rr(out_reg0, in_reg0, sink);
+ "#,
+ ),
+ );
+
+ // Same as umr, but with the source register specified directly.
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("umr_reg_to_ssa", &formats.copy_to_ssa, 1)
+ // No operands_in to mention, because a source register is specified directly.
+ .operands_out(vec![gpr])
+ .clobbers_flags(false)
+ .emit(
+ r#"
+ {{PUT_OP}}(bits, rex2(out_reg0, src), sink);
+ modrm_rr(out_reg0, src, sink);
+ "#,
+ ),
+ );
+
+ // XX /r, but for a unary operator with separate input/output register.
+ // RM form. Clobbers FLAGS.
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("urm", &formats.unary, 1)
+ .operands_in(vec![gpr])
+ .operands_out(vec![gpr])
+ .emit(
+ r#"
+ {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
+ modrm_rr(in_reg0, out_reg0, sink);
+ "#,
+ ),
+ );
+
+ // XX /r. Same as urm, but doesn't clobber FLAGS.
+ let urm_noflags = recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("urm_noflags", &formats.unary, 1)
+ .operands_in(vec![gpr])
+ .operands_out(vec![gpr])
+ .clobbers_flags(false)
+ .emit(
+ r#"
+ {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
+ modrm_rr(in_reg0, out_reg0, sink);
+ "#,
+ ),
+ );
+
+ // XX /r. Same as urm_noflags, but input limited to ABCD.
+ recipes.add_template(
+ Template::new(
+ EncodingRecipeBuilder::new("urm_noflags_abcd", &formats.unary, 1)
+ .operands_in(vec![abcd])
+ .operands_out(vec![gpr])
+ .clobbers_flags(false)
+ .emit(
+ r#"
+ {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
+ modrm_rr(in_reg0, out_reg0, sink);
+ "#,
+ ),
+ regs,
+ )
+ .when_prefixed(urm_noflags),
+ );
+
+ // XX /r, RM form, FPR -> FPR.
+ recipes.add_template_inferred(
+ EncodingRecipeBuilder::new("furm", &formats.unary, 1)
+ .operands_in(vec![fpr])
+ .operands_out(vec![fpr])
+ .clobbers_flags(false)
+ .emit(
+ r#"
+ {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
+ modrm_rr(in_reg0, out_reg0, sink);
+ "#,
+ ),
+ "size_with_inferred_rex_for_inreg0_outreg0",
+ );
+
+ // Same as furm, but with the source register specified directly.
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("furm_reg_to_ssa", &formats.copy_to_ssa, 1)
+ // No operands_in to mention, because a source register is specified directly.
+ .operands_out(vec![fpr])
+ .clobbers_flags(false)
+ .emit(
+ r#"
+ {{PUT_OP}}(bits, rex2(src, out_reg0), sink);
+ modrm_rr(src, out_reg0, sink);
+ "#,
+ ),
+ );
+
+ // XX /r, RM form, GPR -> FPR.
+ recipes.add_template_inferred(
+ EncodingRecipeBuilder::new("frurm", &formats.unary, 1)
+ .operands_in(vec![gpr])
+ .operands_out(vec![fpr])
+ .clobbers_flags(false)
+ .emit(
+ r#"
+ {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
+ modrm_rr(in_reg0, out_reg0, sink);
+ "#,
+ ),
+ "size_with_inferred_rex_for_inreg0_outreg0",
+ );
+
+ // XX /r, RM form, FPR -> GPR.
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("rfurm", &formats.unary, 1)
+ .operands_in(vec![fpr])
+ .operands_out(vec![gpr])
+ .clobbers_flags(false)
+ .emit(
+ r#"
+ {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
+ modrm_rr(in_reg0, out_reg0, sink);
+ "#,
+ ),
+ );
+
+ // XX /r, RMI form for one of the roundXX SSE 4.1 instructions.
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("furmi_rnd", &formats.unary, 2)
+ .operands_in(vec![fpr])
+ .operands_out(vec![fpr])
+ .isa_predicate(use_sse41)
+ .emit(
+ r#"
+ {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
+ modrm_rr(in_reg0, out_reg0, sink);
+ sink.put1(match opcode {
+ Opcode::Nearest => 0b00,
+ Opcode::Floor => 0b01,
+ Opcode::Ceil => 0b10,
+ Opcode::Trunc => 0b11,
+ x => panic!("{} unexpected for furmi_rnd", opcode),
+ });
+ "#,
+ ),
+ );
+
+ // XX /r, for regmove instructions.
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("rmov", &formats.reg_move, 1)
+ .operands_in(vec![gpr])
+ .clobbers_flags(false)
+ .emit(
+ r#"
+ {{PUT_OP}}(bits, rex2(dst, src), sink);
+ modrm_rr(dst, src, sink);
+ "#,
+ ),
+ );
+
+ // XX /r, for regmove instructions (FPR version, RM encoded).
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("frmov", &formats.reg_move, 1)
+ .operands_in(vec![fpr])
+ .clobbers_flags(false)
+ .emit(
+ r#"
+ {{PUT_OP}}(bits, rex2(src, dst), sink);
+ modrm_rr(src, dst, sink);
+ "#,
+ ),
+ );
+
+ // XX /n with one arg in %rcx, for shifts.
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("rc", &formats.binary, 1)
+ .operands_in(vec![
+ OperandConstraint::RegClass(gpr),
+ OperandConstraint::FixedReg(reg_rcx),
+ ])
+ .operands_out(vec![0])
+ .emit(
+ r#"
+ {{PUT_OP}}(bits, rex1(in_reg0), sink);
+ modrm_r_bits(in_reg0, bits, sink);
+ "#,
+ ),
+ );
+
+ // XX /n for division: inputs in %rax, %rdx, r. Outputs in %rax, %rdx.
+ recipes.add_template(
+ Template::new(
+ EncodingRecipeBuilder::new("div", &formats.ternary, 1)
+ .operands_in(vec![
+ OperandConstraint::FixedReg(reg_rax),
+ OperandConstraint::FixedReg(reg_rdx),
+ OperandConstraint::RegClass(gpr),
+ ])
+ .operands_out(vec![reg_rax, reg_rdx])
+ .emit(
+ r#"
+ sink.trap(TrapCode::IntegerDivisionByZero, func.srclocs[inst]);
+ {{PUT_OP}}(bits, rex1(in_reg2), sink);
+ modrm_r_bits(in_reg2, bits, sink);
+ "#,
+ ),
+ regs,
+ )
+ .inferred_rex_compute_size("size_with_inferred_rex_for_inreg2"),
+ );
+
+ // XX /n for {s,u}mulx: inputs in %rax, r. Outputs in %rdx(hi):%rax(lo)
+ recipes.add_template(
+ Template::new(
+ EncodingRecipeBuilder::new("mulx", &formats.binary, 1)
+ .operands_in(vec![
+ OperandConstraint::FixedReg(reg_rax),
+ OperandConstraint::RegClass(gpr),
+ ])
+ .operands_out(vec![
+ OperandConstraint::FixedReg(reg_rax),
+ OperandConstraint::FixedReg(reg_rdx),
+ ])
+ .emit(
+ r#"
+ {{PUT_OP}}(bits, rex1(in_reg1), sink);
+ modrm_r_bits(in_reg1, bits, sink);
+ "#,
+ ),
+ regs,
+ )
+ .inferred_rex_compute_size("size_with_inferred_rex_for_inreg1"),
+ );
+
+ // XX /r for BLEND* instructions
+ recipes.add_template_inferred(
+ EncodingRecipeBuilder::new("blend", &formats.ternary, 1)
+ .operands_in(vec![
+ OperandConstraint::FixedReg(reg_xmm0),
+ OperandConstraint::RegClass(fpr),
+ OperandConstraint::RegClass(fpr),
+ ])
+ .operands_out(vec![2])
+ .emit(
+ r#"
+ {{PUT_OP}}(bits, rex2(in_reg1, in_reg2), sink);
+ modrm_rr(in_reg1, in_reg2, sink);
+ "#,
+ ),
+ "size_with_inferred_rex_for_inreg1_inreg2",
+ );
+
+ // XX /n ib with 8-bit immediate sign-extended.
+ {
+ recipes.add_template_inferred(
+ EncodingRecipeBuilder::new("r_ib", &formats.binary_imm64, 2)
+ .operands_in(vec![gpr])
+ .operands_out(vec![0])
+ .inst_predicate(InstructionPredicate::new_is_signed_int(
+ &*formats.binary_imm64,
+ "imm",
+ 8,
+ 0,
+ ))
+ .emit(
+ r#"
+ {{PUT_OP}}(bits, rex1(in_reg0), sink);
+ modrm_r_bits(in_reg0, bits, sink);
+ let imm: i64 = imm.into();
+ sink.put1(imm as u8);
+ "#,
+ ),
+ "size_with_inferred_rex_for_inreg0",
+ );
+
+ recipes.add_template_inferred(
+ EncodingRecipeBuilder::new("f_ib", &formats.binary_imm64, 2)
+ .operands_in(vec![fpr])
+ .operands_out(vec![0])
+ .inst_predicate(InstructionPredicate::new_is_signed_int(
+ &*formats.binary_imm64,
+ "imm",
+ 8,
+ 0,
+ ))
+ .emit(
+ r#"
+ {{PUT_OP}}(bits, rex1(in_reg0), sink);
+ modrm_r_bits(in_reg0, bits, sink);
+ let imm: i64 = imm.into();
+ sink.put1(imm as u8);
+ "#,
+ ),
+ "size_with_inferred_rex_for_inreg0",
+ );
+
+ // XX /n id with 32-bit immediate sign-extended.
+ recipes.add_template(
+ Template::new(
+ EncodingRecipeBuilder::new("r_id", &formats.binary_imm64, 5)
+ .operands_in(vec![gpr])
+ .operands_out(vec![0])
+ .inst_predicate(InstructionPredicate::new_is_signed_int(
+ &*formats.binary_imm64,
+ "imm",
+ 32,
+ 0,
+ ))
+ .emit(
+ r#"
+ {{PUT_OP}}(bits, rex1(in_reg0), sink);
+ modrm_r_bits(in_reg0, bits, sink);
+ let imm: i64 = imm.into();
+ sink.put4(imm as u32);
+ "#,
+ ),
+ regs,
+ )
+ .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"),
+ );
+ }
+
+ // XX /r ib with 8-bit unsigned immediate (e.g. for pshufd)
+ {
+ recipes.add_template_inferred(
+ EncodingRecipeBuilder::new("r_ib_unsigned_fpr", &formats.binary_imm8, 2)
+ .operands_in(vec![fpr])
+ .operands_out(vec![fpr])
+ .inst_predicate(InstructionPredicate::new_is_unsigned_int(
+ &*formats.binary_imm8,
+ "imm",
+ 8,
+ 0,
+ ))
+ .emit(
+ r#"
+ {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
+ modrm_rr(in_reg0, out_reg0, sink);
+ let imm: i64 = imm.into();
+ sink.put1(imm as u8);
+ "#,
+ ),
+ "size_with_inferred_rex_for_inreg0_outreg0",
+ );
+ }
+
+ // XX /r ib with 8-bit unsigned immediate (e.g. for extractlane)
+ {
+ recipes.add_template_inferred(
+ EncodingRecipeBuilder::new("r_ib_unsigned_gpr", &formats.binary_imm8, 2)
+ .operands_in(vec![fpr])
+ .operands_out(vec![gpr])
+ .inst_predicate(InstructionPredicate::new_is_unsigned_int(
+ &*formats.binary_imm8, "imm", 8, 0,
+ ))
+ .emit(
+ r#"
+ {{PUT_OP}}(bits, rex2(out_reg0, in_reg0), sink);
+ modrm_rr(out_reg0, in_reg0, sink); // note the flipped register in the ModR/M byte
+ let imm: i64 = imm.into();
+ sink.put1(imm as u8);
+ "#,
+ ), "size_with_inferred_rex_for_inreg0_outreg0"
+ );
+ }
+
+ // XX /r ib with 8-bit unsigned immediate (e.g. for insertlane)
+ {
+ recipes.add_template_inferred(
+ EncodingRecipeBuilder::new("r_ib_unsigned_r", &formats.ternary_imm8, 2)
+ .operands_in(vec![fpr, gpr])
+ .operands_out(vec![0])
+ .inst_predicate(InstructionPredicate::new_is_unsigned_int(
+ &*formats.ternary_imm8,
+ "imm",
+ 8,
+ 0,
+ ))
+ .emit(
+ r#"
+ {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
+ modrm_rr(in_reg1, in_reg0, sink);
+ let imm: i64 = imm.into();
+ sink.put1(imm as u8);
+ "#,
+ ),
+ "size_with_inferred_rex_for_inreg0_inreg1",
+ );
+ }
+
+ {
+ // XX /n id with 32-bit immediate sign-extended. UnaryImm version.
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("u_id", &formats.unary_imm, 5)
+ .operands_out(vec![gpr])
+ .inst_predicate(InstructionPredicate::new_is_signed_int(
+ &*formats.unary_imm,
+ "imm",
+ 32,
+ 0,
+ ))
+ .emit(
+ r#"
+ {{PUT_OP}}(bits, rex1(out_reg0), sink);
+ modrm_r_bits(out_reg0, bits, sink);
+ let imm: i64 = imm.into();
+ sink.put4(imm as u32);
+ "#,
+ ),
+ );
+ }
+
+ // XX+rd id unary with 32-bit immediate. Note no recipe predicate.
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("pu_id", &formats.unary_imm, 4)
+ .operands_out(vec![gpr])
+ .emit(
+ r#"
+ // The destination register is encoded in the low bits of the opcode.
+ // No ModR/M.
+ {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink);
+ let imm: i64 = imm.into();
+ sink.put4(imm as u32);
+ "#,
+ ),
+ );
+
+ // XX+rd id unary with bool immediate. Note no recipe predicate.
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("pu_id_bool", &formats.unary_bool, 4)
+ .operands_out(vec![gpr])
+ .emit(
+ r#"
+ // The destination register is encoded in the low bits of the opcode.
+ // No ModR/M.
+ {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink);
+ let imm: u32 = if imm { 1 } else { 0 };
+ sink.put4(imm);
+ "#,
+ ),
+ );
+
+ // XX+rd id nullary with 0 as 32-bit immediate. Note no recipe predicate.
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("pu_id_ref", &formats.nullary, 4)
+ .operands_out(vec![gpr])
+ .emit(
+ r#"
+ // The destination register is encoded in the low bits of the opcode.
+ // No ModR/M.
+ {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink);
+ sink.put4(0);
+ "#,
+ ),
+ );
+
+ // XX+rd iq unary with 64-bit immediate.
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("pu_iq", &formats.unary_imm, 8)
+ .operands_out(vec![gpr])
+ .emit(
+ r#"
+ {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink);
+ let imm: i64 = imm.into();
+ sink.put8(imm as u64);
+ "#,
+ ),
+ );
+
+ // XX+rd id unary with zero immediate.
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("u_id_z", &formats.unary_imm, 1)
+ .operands_out(vec![gpr])
+ .emit(
+ r#"
+ {{PUT_OP}}(bits, rex2(out_reg0, out_reg0), sink);
+ modrm_rr(out_reg0, out_reg0, sink);
+ "#,
+ ),
+ );
+
+ // XX /n Unary with floating point 32-bit immediate equal to zero.
+ {
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("f32imm_z", &formats.unary_ieee32, 1)
+ .operands_out(vec![fpr])
+ .inst_predicate(InstructionPredicate::new_is_zero_32bit_float(
+ &*formats.unary_ieee32,
+ "imm",
+ ))
+ .emit(
+ r#"
+ {{PUT_OP}}(bits, rex2(out_reg0, out_reg0), sink);
+ modrm_rr(out_reg0, out_reg0, sink);
+ "#,
+ ),
+ );
+ }
+
+ // XX /n Unary with floating point 64-bit immediate equal to zero.
+ {
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("f64imm_z", &formats.unary_ieee64, 1)
+ .operands_out(vec![fpr])
+ .inst_predicate(InstructionPredicate::new_is_zero_64bit_float(
+ &*formats.unary_ieee64,
+ "imm",
+ ))
+ .emit(
+ r#"
+ {{PUT_OP}}(bits, rex2(out_reg0, out_reg0), sink);
+ modrm_rr(out_reg0, out_reg0, sink);
+ "#,
+ ),
+ );
+ }
+
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("pushq", &formats.unary, 0)
+ .operands_in(vec![gpr])
+ .emit(
+ r#"
+ sink.trap(TrapCode::StackOverflow, func.srclocs[inst]);
+ {{PUT_OP}}(bits | (in_reg0 & 7), rex1(in_reg0), sink);
+ "#,
+ ),
+ );
+
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("popq", &formats.nullary, 0)
+ .operands_out(vec![gpr])
+ .emit(
+ r#"
+ {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink);
+ "#,
+ ),
+ );
+
+ // XX /r, for regmove instructions.
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("copysp", &formats.copy_special, 1)
+ .clobbers_flags(false)
+ .emit(
+ r#"
+ {{PUT_OP}}(bits, rex2(dst, src), sink);
+ modrm_rr(dst, src, sink);
+ "#,
+ ),
+ );
+
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("adjustsp", &formats.unary, 1)
+ .operands_in(vec![gpr])
+ .emit(
+ r#"
+ {{PUT_OP}}(bits, rex2(RU::rsp.into(), in_reg0), sink);
+ modrm_rr(RU::rsp.into(), in_reg0, sink);
+ "#,
+ ),
+ );
+
+ {
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("adjustsp_ib", &formats.unary_imm, 2)
+ .inst_predicate(InstructionPredicate::new_is_signed_int(
+ &*formats.unary_imm,
+ "imm",
+ 8,
+ 0,
+ ))
+ .emit(
+ r#"
+ {{PUT_OP}}(bits, rex1(RU::rsp.into()), sink);
+ modrm_r_bits(RU::rsp.into(), bits, sink);
+ let imm: i64 = imm.into();
+ sink.put1(imm as u8);
+ "#,
+ ),
+ );
+
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("adjustsp_id", &formats.unary_imm, 5)
+ .inst_predicate(InstructionPredicate::new_is_signed_int(
+ &*formats.unary_imm,
+ "imm",
+ 32,
+ 0,
+ ))
+ .emit(
+ r#"
+ {{PUT_OP}}(bits, rex1(RU::rsp.into()), sink);
+ modrm_r_bits(RU::rsp.into(), bits, sink);
+ let imm: i64 = imm.into();
+ sink.put4(imm as u32);
+ "#,
+ ),
+ );
+ }
+
+ recipes.add_recipe(
+ EncodingRecipeBuilder::new("dummy_sarg_t", &formats.nullary, 0)
+ .operands_out(vec![Stack::new(gpr)])
+ .emit(""),
+ );
+
+ // XX+rd id with Abs4 function relocation.
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("fnaddr4", &formats.func_addr, 4)
+ .operands_out(vec![gpr])
+ .emit(
+ r#"
+ {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink);
+ sink.reloc_external(func.srclocs[inst],
+ Reloc::Abs4,
+ &func.dfg.ext_funcs[func_ref].name,
+ 0);
+ sink.put4(0);
+ "#,
+ ),
+ );
+
+ // XX+rd iq with Abs8 function relocation.
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("fnaddr8", &formats.func_addr, 8)
+ .operands_out(vec![gpr])
+ .emit(
+ r#"
+ {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink);
+ sink.reloc_external(func.srclocs[inst],
+ Reloc::Abs8,
+ &func.dfg.ext_funcs[func_ref].name,
+ 0);
+ sink.put8(0);
+ "#,
+ ),
+ );
+
+ // Similar to fnaddr4, but writes !0 (this is used by BaldrMonkey).
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("allones_fnaddr4", &formats.func_addr, 4)
+ .operands_out(vec![gpr])
+ .emit(
+ r#"
+ {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink);
+ sink.reloc_external(func.srclocs[inst],
+ Reloc::Abs4,
+ &func.dfg.ext_funcs[func_ref].name,
+ 0);
+ // Write the immediate as `!0` for the benefit of BaldrMonkey.
+ sink.put4(!0);
+ "#,
+ ),
+ );
+
+ // Similar to fnaddr8, but writes !0 (this is used by BaldrMonkey).
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("allones_fnaddr8", &formats.func_addr, 8)
+ .operands_out(vec![gpr])
+ .emit(
+ r#"
+ {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink);
+ sink.reloc_external(func.srclocs[inst],
+ Reloc::Abs8,
+ &func.dfg.ext_funcs[func_ref].name,
+ 0);
+ // Write the immediate as `!0` for the benefit of BaldrMonkey.
+ sink.put8(!0);
+ "#,
+ ),
+ );
+
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("pcrel_fnaddr8", &formats.func_addr, 5)
+ .operands_out(vec![gpr])
+ // rex2 gets passed 0 for r/m register because the upper bit of
+ // r/m doesn't get decoded when in rip-relative addressing mode.
+ .emit(
+ r#"
+ {{PUT_OP}}(bits, rex2(0, out_reg0), sink);
+ modrm_riprel(out_reg0, sink);
+ // The addend adjusts for the difference between the end of the
+ // instruction and the beginning of the immediate field.
+ sink.reloc_external(func.srclocs[inst],
+ Reloc::X86PCRel4,
+ &func.dfg.ext_funcs[func_ref].name,
+ -4);
+ sink.put4(0);
+ "#,
+ ),
+ );
+
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("got_fnaddr8", &formats.func_addr, 5)
+ .operands_out(vec![gpr])
+ // rex2 gets passed 0 for r/m register because the upper bit of
+ // r/m doesn't get decoded when in rip-relative addressing mode.
+ .emit(
+ r#"
+ {{PUT_OP}}(bits, rex2(0, out_reg0), sink);
+ modrm_riprel(out_reg0, sink);
+ // The addend adjusts for the difference between the end of the
+ // instruction and the beginning of the immediate field.
+ sink.reloc_external(func.srclocs[inst],
+ Reloc::X86GOTPCRel4,
+ &func.dfg.ext_funcs[func_ref].name,
+ -4);
+ sink.put4(0);
+ "#,
+ ),
+ );
+
+ // XX+rd id with Abs4 globalsym relocation.
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("gvaddr4", &formats.unary_global_value, 4)
+ .operands_out(vec![gpr])
+ .emit(
+ r#"
+ {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink);
+ sink.reloc_external(func.srclocs[inst],
+ Reloc::Abs4,
+ &func.global_values[global_value].symbol_name(),
+ 0);
+ sink.put4(0);
+ "#,
+ ),
+ );
+
+ // XX+rd iq with Abs8 globalsym relocation.
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("gvaddr8", &formats.unary_global_value, 8)
+ .operands_out(vec![gpr])
+ .emit(
+ r#"
+ {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink);
+ sink.reloc_external(func.srclocs[inst],
+ Reloc::Abs8,
+ &func.global_values[global_value].symbol_name(),
+ 0);
+ sink.put8(0);
+ "#,
+ ),
+ );
+
+ // XX+rd iq with PCRel4 globalsym relocation.
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("pcrel_gvaddr8", &formats.unary_global_value, 5)
+ .operands_out(vec![gpr])
+ .emit(
+ r#"
+ {{PUT_OP}}(bits, rex2(0, out_reg0), sink);
+ modrm_rm(5, out_reg0, sink);
+ // The addend adjusts for the difference between the end of the
+ // instruction and the beginning of the immediate field.
+ sink.reloc_external(func.srclocs[inst],
+ Reloc::X86PCRel4,
+ &func.global_values[global_value].symbol_name(),
+ -4);
+ sink.put4(0);
+ "#,
+ ),
+ );
+
+ // XX+rd iq with Abs8 globalsym relocation.
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("got_gvaddr8", &formats.unary_global_value, 5)
+ .operands_out(vec![gpr])
+ .emit(
+ r#"
+ {{PUT_OP}}(bits, rex2(0, out_reg0), sink);
+ modrm_rm(5, out_reg0, sink);
+ // The addend adjusts for the difference between the end of the
+ // instruction and the beginning of the immediate field.
+ sink.reloc_external(func.srclocs[inst],
+ Reloc::X86GOTPCRel4,
+ &func.global_values[global_value].symbol_name(),
+ -4);
+ sink.put4(0);
+ "#,
+ ),
+ );
+
+ // Stack addresses.
+ //
+ // TODO Alternative forms for 8-bit immediates, when applicable.
+
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("spaddr_id", &formats.stack_load, 6)
+ .operands_out(vec![gpr])
+ .emit(
+ r#"
+ let sp = StackRef::sp(stack_slot, &func.stack_slots);
+ let base = stk_base(sp.base);
+ {{PUT_OP}}(bits, rex2(base, out_reg0), sink);
+ modrm_sib_disp32(out_reg0, sink);
+ sib_noindex(base, sink);
+ let imm : i32 = offset.into();
+ sink.put4(sp.offset.checked_add(imm).unwrap() as u32);
+ "#,
+ ),
+ );
+
+ // Constant addresses.
+
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("const_addr", &formats.unary_const, 5)
+ .operands_out(vec![gpr])
+ .clobbers_flags(false)
+ .emit(
+ r#"
+ {{PUT_OP}}(bits, rex2(0, out_reg0), sink);
+ modrm_riprel(out_reg0, sink);
+ const_disp4(constant_handle, func, sink);
+ "#,
+ ),
+ );
+
+ // Store recipes.
+
+ {
+ // Simple stores.
+
+ // A predicate asking if the offset is zero.
+ let has_no_offset =
+ InstructionPredicate::new_is_field_equal(&*formats.store, "offset", "0".into());
+
+ // XX /r register-indirect store with no offset.
+ let st = recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("st", &formats.store, 1)
+ .operands_in(vec![gpr, gpr])
+ .inst_predicate(has_no_offset.clone())
+ .clobbers_flags(false)
+ .compute_size("size_plus_maybe_sib_or_offset_for_inreg_1")
+ .emit(
+ r#"
+ if !flags.notrap() {
+ sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+ }
+ {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
+ if needs_sib_byte(in_reg1) {
+ modrm_sib(in_reg0, sink);
+ sib_noindex(in_reg1, sink);
+ } else if needs_offset(in_reg1) {
+ modrm_disp8(in_reg1, in_reg0, sink);
+ sink.put1(0);
+ } else {
+ modrm_rm(in_reg1, in_reg0, sink);
+ }
+ "#,
+ ),
+ );
+
+ // XX /r register-indirect store with no offset.
+ // Only ABCD allowed for stored value. This is for byte stores with no REX.
+ recipes.add_template(
+ Template::new(
+ EncodingRecipeBuilder::new("st_abcd", &formats.store, 1)
+ .operands_in(vec![abcd, gpr])
+ .inst_predicate(has_no_offset.clone())
+ .clobbers_flags(false)
+ .compute_size("size_plus_maybe_sib_or_offset_for_inreg_1")
+ .emit(
+ r#"
+ if !flags.notrap() {
+ sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+ }
+ {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
+ if needs_sib_byte(in_reg1) {
+ modrm_sib(in_reg0, sink);
+ sib_noindex(in_reg1, sink);
+ } else if needs_offset(in_reg1) {
+ modrm_disp8(in_reg1, in_reg0, sink);
+ sink.put1(0);
+ } else {
+ modrm_rm(in_reg1, in_reg0, sink);
+ }
+ "#,
+ ),
+ regs,
+ )
+ .when_prefixed(st),
+ );
+
+ // XX /r register-indirect store of FPR with no offset.
+ recipes.add_template_inferred(
+ EncodingRecipeBuilder::new("fst", &formats.store, 1)
+ .operands_in(vec![fpr, gpr])
+ .inst_predicate(has_no_offset)
+ .clobbers_flags(false)
+ .compute_size("size_plus_maybe_sib_or_offset_for_inreg_1")
+ .emit(
+ r#"
+ if !flags.notrap() {
+ sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+ }
+ {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
+ if needs_sib_byte(in_reg1) {
+ modrm_sib(in_reg0, sink);
+ sib_noindex(in_reg1, sink);
+ } else if needs_offset(in_reg1) {
+ modrm_disp8(in_reg1, in_reg0, sink);
+ sink.put1(0);
+ } else {
+ modrm_rm(in_reg1, in_reg0, sink);
+ }
+ "#,
+ ),
+ "size_plus_maybe_sib_or_offset_inreg1_plus_rex_prefix_for_inreg0_inreg1",
+ );
+
+ let has_small_offset =
+ InstructionPredicate::new_is_signed_int(&*formats.store, "offset", 8, 0);
+
+ // XX /r register-indirect store with 8-bit offset.
+ let st_disp8 = recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("stDisp8", &formats.store, 2)
+ .operands_in(vec![gpr, gpr])
+ .inst_predicate(has_small_offset.clone())
+ .clobbers_flags(false)
+ .compute_size("size_plus_maybe_sib_for_inreg_1")
+ .emit(
+ r#"
+ if !flags.notrap() {
+ sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+ }
+ {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
+ if needs_sib_byte(in_reg1) {
+ modrm_sib_disp8(in_reg0, sink);
+ sib_noindex(in_reg1, sink);
+ } else {
+ modrm_disp8(in_reg1, in_reg0, sink);
+ }
+ let offset: i32 = offset.into();
+ sink.put1(offset as u8);
+ "#,
+ ),
+ );
+
+ // XX /r register-indirect store with 8-bit offset.
+ // Only ABCD allowed for stored value. This is for byte stores with no REX.
+ recipes.add_template(
+ Template::new(
+ EncodingRecipeBuilder::new("stDisp8_abcd", &formats.store, 2)
+ .operands_in(vec![abcd, gpr])
+ .inst_predicate(has_small_offset.clone())
+ .clobbers_flags(false)
+ .compute_size("size_plus_maybe_sib_for_inreg_1")
+ .emit(
+ r#"
+ if !flags.notrap() {
+ sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+ }
+ {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
+ if needs_sib_byte(in_reg1) {
+ modrm_sib_disp8(in_reg0, sink);
+ sib_noindex(in_reg1, sink);
+ } else {
+ modrm_disp8(in_reg1, in_reg0, sink);
+ }
+ let offset: i32 = offset.into();
+ sink.put1(offset as u8);
+ "#,
+ ),
+ regs,
+ )
+ .when_prefixed(st_disp8),
+ );
+
+ // XX /r register-indirect store with 8-bit offset of FPR.
+ recipes.add_template_inferred(
+ EncodingRecipeBuilder::new("fstDisp8", &formats.store, 2)
+ .operands_in(vec![fpr, gpr])
+ .inst_predicate(has_small_offset)
+ .clobbers_flags(false)
+ .compute_size("size_plus_maybe_sib_for_inreg_1")
+ .emit(
+ r#"
+ if !flags.notrap() {
+ sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+ }
+ {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
+ if needs_sib_byte(in_reg1) {
+ modrm_sib_disp8(in_reg0, sink);
+ sib_noindex(in_reg1, sink);
+ } else {
+ modrm_disp8(in_reg1, in_reg0, sink);
+ }
+ let offset: i32 = offset.into();
+ sink.put1(offset as u8);
+ "#,
+ ),
+ "size_plus_maybe_sib_inreg1_plus_rex_prefix_for_inreg0_inreg1",
+ );
+
+ // XX /r register-indirect store with 32-bit offset.
+ let st_disp32 = recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("stDisp32", &formats.store, 5)
+ .operands_in(vec![gpr, gpr])
+ .clobbers_flags(false)
+ .compute_size("size_plus_maybe_sib_for_inreg_1")
+ .emit(
+ r#"
+ if !flags.notrap() {
+ sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+ }
+ {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
+ if needs_sib_byte(in_reg1) {
+ modrm_sib_disp32(in_reg0, sink);
+ sib_noindex(in_reg1, sink);
+ } else {
+ modrm_disp32(in_reg1, in_reg0, sink);
+ }
+ let offset: i32 = offset.into();
+ sink.put4(offset as u32);
+ "#,
+ ),
+ );
+
+ // XX /r register-indirect store with 32-bit offset.
+ // Only ABCD allowed for stored value. This is for byte stores with no REX.
+ recipes.add_template(
+ Template::new(
+ EncodingRecipeBuilder::new("stDisp32_abcd", &formats.store, 5)
+ .operands_in(vec![abcd, gpr])
+ .clobbers_flags(false)
+ .compute_size("size_plus_maybe_sib_for_inreg_1")
+ .emit(
+ r#"
+ if !flags.notrap() {
+ sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+ }
+ {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
+ if needs_sib_byte(in_reg1) {
+ modrm_sib_disp32(in_reg0, sink);
+ sib_noindex(in_reg1, sink);
+ } else {
+ modrm_disp32(in_reg1, in_reg0, sink);
+ }
+ let offset: i32 = offset.into();
+ sink.put4(offset as u32);
+ "#,
+ ),
+ regs,
+ )
+ .when_prefixed(st_disp32),
+ );
+
+ // XX /r register-indirect store with 32-bit offset of FPR.
+ recipes.add_template_inferred(
+ EncodingRecipeBuilder::new("fstDisp32", &formats.store, 5)
+ .operands_in(vec![fpr, gpr])
+ .clobbers_flags(false)
+ .compute_size("size_plus_maybe_sib_for_inreg_1")
+ .emit(
+ r#"
+ if !flags.notrap() {
+ sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+ }
+ {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
+ if needs_sib_byte(in_reg1) {
+ modrm_sib_disp32(in_reg0, sink);
+ sib_noindex(in_reg1, sink);
+ } else {
+ modrm_disp32(in_reg1, in_reg0, sink);
+ }
+ let offset: i32 = offset.into();
+ sink.put4(offset as u32);
+ "#,
+ ),
+ "size_plus_maybe_sib_inreg1_plus_rex_prefix_for_inreg0_inreg1",
+ );
+ }
+
+ {
+ // Complex stores.
+
+ // A predicate asking if the offset is zero.
+ let has_no_offset =
+ InstructionPredicate::new_is_field_equal(&*formats.store_complex, "offset", "0".into());
+
+ // XX /r register-indirect store with index and no offset.
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("stWithIndex", &formats.store_complex, 2)
+ .operands_in(vec![gpr, gpr, gpr])
+ .inst_predicate(has_no_offset.clone())
+ .clobbers_flags(false)
+ .compute_size("size_plus_maybe_offset_for_inreg_1")
+ .emit(
+ r#"
+ if !flags.notrap() {
+ sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+ }
+ {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
+ // The else branch always inserts an SIB byte.
+ if needs_offset(in_reg1) {
+ modrm_sib_disp8(in_reg0, sink);
+ sib(0, in_reg2, in_reg1, sink);
+ sink.put1(0);
+ } else {
+ modrm_sib(in_reg0, sink);
+ sib(0, in_reg2, in_reg1, sink);
+ }
+ "#,
+ ),
+ );
+
+ // XX /r register-indirect store with index and no offset.
+ // Only ABCD allowed for stored value. This is for byte stores with no REX.
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("stWithIndex_abcd", &formats.store_complex, 2)
+ .operands_in(vec![abcd, gpr, gpr])
+ .inst_predicate(has_no_offset.clone())
+ .clobbers_flags(false)
+ .compute_size("size_plus_maybe_offset_for_inreg_1")
+ .emit(
+ r#"
+ if !flags.notrap() {
+ sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+ }
+ {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
+ // The else branch always inserts an SIB byte.
+ if needs_offset(in_reg1) {
+ modrm_sib_disp8(in_reg0, sink);
+ sib(0, in_reg2, in_reg1, sink);
+ sink.put1(0);
+ } else {
+ modrm_sib(in_reg0, sink);
+ sib(0, in_reg2, in_reg1, sink);
+ }
+ "#,
+ ),
+ );
+
+ // XX /r register-indirect store with index and no offset of FPR.
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("fstWithIndex", &formats.store_complex, 2)
+ .operands_in(vec![fpr, gpr, gpr])
+ .inst_predicate(has_no_offset)
+ .clobbers_flags(false)
+ .compute_size("size_plus_maybe_offset_for_inreg_1")
+ .emit(
+ r#"
+ if !flags.notrap() {
+ sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+ }
+ {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
+ // The else branch always inserts an SIB byte.
+ if needs_offset(in_reg1) {
+ modrm_sib_disp8(in_reg0, sink);
+ sib(0, in_reg2, in_reg1, sink);
+ sink.put1(0);
+ } else {
+ modrm_sib(in_reg0, sink);
+ sib(0, in_reg2, in_reg1, sink);
+ }
+ "#,
+ ),
+ );
+
+ let has_small_offset =
+ InstructionPredicate::new_is_signed_int(&*formats.store_complex, "offset", 8, 0);
+
+ // XX /r register-indirect store with index and 8-bit offset.
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("stWithIndexDisp8", &formats.store_complex, 3)
+ .operands_in(vec![gpr, gpr, gpr])
+ .inst_predicate(has_small_offset.clone())
+ .clobbers_flags(false)
+ .emit(
+ r#"
+ if !flags.notrap() {
+ sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+ }
+ {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
+ modrm_sib_disp8(in_reg0, sink);
+ sib(0, in_reg2, in_reg1, sink);
+ let offset: i32 = offset.into();
+ sink.put1(offset as u8);
+ "#,
+ ),
+ );
+
+ // XX /r register-indirect store with index and 8-bit offset.
+ // Only ABCD allowed for stored value. This is for byte stores with no REX.
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("stWithIndexDisp8_abcd", &formats.store_complex, 3)
+ .operands_in(vec![abcd, gpr, gpr])
+ .inst_predicate(has_small_offset.clone())
+ .clobbers_flags(false)
+ .emit(
+ r#"
+ if !flags.notrap() {
+ sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+ }
+ {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
+ modrm_sib_disp8(in_reg0, sink);
+ sib(0, in_reg2, in_reg1, sink);
+ let offset: i32 = offset.into();
+ sink.put1(offset as u8);
+ "#,
+ ),
+ );
+
+ // XX /r register-indirect store with index and 8-bit offset of FPR.
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("fstWithIndexDisp8", &formats.store_complex, 3)
+ .operands_in(vec![fpr, gpr, gpr])
+ .inst_predicate(has_small_offset)
+ .clobbers_flags(false)
+ .emit(
+ r#"
+ if !flags.notrap() {
+ sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+ }
+ {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
+ modrm_sib_disp8(in_reg0, sink);
+ sib(0, in_reg2, in_reg1, sink);
+ let offset: i32 = offset.into();
+ sink.put1(offset as u8);
+ "#,
+ ),
+ );
+
+ let has_big_offset =
+ InstructionPredicate::new_is_signed_int(&*formats.store_complex, "offset", 32, 0);
+
+ // XX /r register-indirect store with index and 32-bit offset.
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("stWithIndexDisp32", &formats.store_complex, 6)
+ .operands_in(vec![gpr, gpr, gpr])
+ .inst_predicate(has_big_offset.clone())
+ .clobbers_flags(false)
+ .emit(
+ r#"
+ if !flags.notrap() {
+ sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+ }
+ {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
+ modrm_sib_disp32(in_reg0, sink);
+ sib(0, in_reg2, in_reg1, sink);
+ let offset: i32 = offset.into();
+ sink.put4(offset as u32);
+ "#,
+ ),
+ );
+
+ // XX /r register-indirect store with index and 32-bit offset.
+ // Only ABCD allowed for stored value. This is for byte stores with no REX.
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("stWithIndexDisp32_abcd", &formats.store_complex, 6)
+ .operands_in(vec![abcd, gpr, gpr])
+ .inst_predicate(has_big_offset.clone())
+ .clobbers_flags(false)
+ .emit(
+ r#"
+ if !flags.notrap() {
+ sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+ }
+ {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
+ modrm_sib_disp32(in_reg0, sink);
+ sib(0, in_reg2, in_reg1, sink);
+ let offset: i32 = offset.into();
+ sink.put4(offset as u32);
+ "#,
+ ),
+ );
+
+ // XX /r register-indirect store with index and 32-bit offset of FPR.
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("fstWithIndexDisp32", &formats.store_complex, 6)
+ .operands_in(vec![fpr, gpr, gpr])
+ .inst_predicate(has_big_offset)
+ .clobbers_flags(false)
+ .emit(
+ r#"
+ if !flags.notrap() {
+ sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+ }
+ {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
+ modrm_sib_disp32(in_reg0, sink);
+ sib(0, in_reg2, in_reg1, sink);
+ let offset: i32 = offset.into();
+ sink.put4(offset as u32);
+ "#,
+ ),
+ );
+ }
+
+ // Unary spill with SIB and 32-bit displacement.
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("spillSib32", &formats.unary, 6)
+ .operands_in(vec![gpr])
+ .operands_out(vec![stack_gpr32])
+ .clobbers_flags(false)
+ .emit(
+ r#"
+ sink.trap(TrapCode::StackOverflow, func.srclocs[inst]);
+ let base = stk_base(out_stk0.base);
+ {{PUT_OP}}(bits, rex2(base, in_reg0), sink);
+ modrm_sib_disp32(in_reg0, sink);
+ sib_noindex(base, sink);
+ sink.put4(out_stk0.offset as u32);
+ "#,
+ ),
+ );
+
+ // Like spillSib32, but targeting an FPR rather than a GPR.
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("fspillSib32", &formats.unary, 6)
+ .operands_in(vec![fpr])
+ .operands_out(vec![stack_fpr32])
+ .clobbers_flags(false)
+ .emit(
+ r#"
+ sink.trap(TrapCode::StackOverflow, func.srclocs[inst]);
+ let base = stk_base(out_stk0.base);
+ {{PUT_OP}}(bits, rex2(base, in_reg0), sink);
+ modrm_sib_disp32(in_reg0, sink);
+ sib_noindex(base, sink);
+ sink.put4(out_stk0.offset as u32);
+ "#,
+ ),
+ );
+
+ // Regspill using RSP-relative addressing.
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("regspill32", &formats.reg_spill, 6)
+ .operands_in(vec![gpr])
+ .clobbers_flags(false)
+ .emit(
+ r#"
+ sink.trap(TrapCode::StackOverflow, func.srclocs[inst]);
+ let dst = StackRef::sp(dst, &func.stack_slots);
+ let base = stk_base(dst.base);
+ {{PUT_OP}}(bits, rex2(base, src), sink);
+ modrm_sib_disp32(src, sink);
+ sib_noindex(base, sink);
+ sink.put4(dst.offset as u32);
+ "#,
+ ),
+ );
+
+ // Like regspill32, but targeting an FPR rather than a GPR.
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("fregspill32", &formats.reg_spill, 6)
+ .operands_in(vec![fpr])
+ .clobbers_flags(false)
+ .emit(
+ r#"
+ sink.trap(TrapCode::StackOverflow, func.srclocs[inst]);
+ let dst = StackRef::sp(dst, &func.stack_slots);
+ let base = stk_base(dst.base);
+ {{PUT_OP}}(bits, rex2(base, src), sink);
+ modrm_sib_disp32(src, sink);
+ sib_noindex(base, sink);
+ sink.put4(dst.offset as u32);
+ "#,
+ ),
+ );
+
+ // Load recipes.
+
+ {
+ // Simple loads.
+
+ // A predicate asking if the offset is zero.
+ let has_no_offset =
+ InstructionPredicate::new_is_field_equal(&*formats.load, "offset", "0".into());
+
+ // XX /r load with no offset.
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("ld", &formats.load, 1)
+ .operands_in(vec![gpr])
+ .operands_out(vec![gpr])
+ .inst_predicate(has_no_offset.clone())
+ .clobbers_flags(false)
+ .compute_size("size_plus_maybe_sib_or_offset_for_inreg_0")
+ .emit(
+ r#"
+ if !flags.notrap() {
+ sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+ }
+ {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
+ if needs_sib_byte(in_reg0) {
+ modrm_sib(out_reg0, sink);
+ sib_noindex(in_reg0, sink);
+ } else if needs_offset(in_reg0) {
+ modrm_disp8(in_reg0, out_reg0, sink);
+ sink.put1(0);
+ } else {
+ modrm_rm(in_reg0, out_reg0, sink);
+ }
+ "#,
+ ),
+ );
+
+ // XX /r float load with no offset.
+ recipes.add_template_inferred(
+ EncodingRecipeBuilder::new("fld", &formats.load, 1)
+ .operands_in(vec![gpr])
+ .operands_out(vec![fpr])
+ .inst_predicate(has_no_offset)
+ .clobbers_flags(false)
+ .compute_size("size_plus_maybe_sib_or_offset_for_inreg_0")
+ .emit(
+ r#"
+ if !flags.notrap() {
+ sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+ }
+ {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
+ if needs_sib_byte(in_reg0) {
+ modrm_sib(out_reg0, sink);
+ sib_noindex(in_reg0, sink);
+ } else if needs_offset(in_reg0) {
+ modrm_disp8(in_reg0, out_reg0, sink);
+ sink.put1(0);
+ } else {
+ modrm_rm(in_reg0, out_reg0, sink);
+ }
+ "#,
+ ),
+ "size_plus_maybe_sib_or_offset_for_inreg_0_plus_rex_prefix_for_inreg0_outreg0",
+ );
+
+ let has_small_offset =
+ InstructionPredicate::new_is_signed_int(&*formats.load, "offset", 8, 0);
+
+ // XX /r load with 8-bit offset.
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("ldDisp8", &formats.load, 2)
+ .operands_in(vec![gpr])
+ .operands_out(vec![gpr])
+ .inst_predicate(has_small_offset.clone())
+ .clobbers_flags(false)
+ .compute_size("size_plus_maybe_sib_for_inreg_0")
+ .emit(
+ r#"
+ if !flags.notrap() {
+ sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+ }
+ {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
+ if needs_sib_byte(in_reg0) {
+ modrm_sib_disp8(out_reg0, sink);
+ sib_noindex(in_reg0, sink);
+ } else {
+ modrm_disp8(in_reg0, out_reg0, sink);
+ }
+ let offset: i32 = offset.into();
+ sink.put1(offset as u8);
+ "#,
+ ),
+ );
+
+ // XX /r float load with 8-bit offset.
+ recipes.add_template_inferred(
+ EncodingRecipeBuilder::new("fldDisp8", &formats.load, 2)
+ .operands_in(vec![gpr])
+ .operands_out(vec![fpr])
+ .inst_predicate(has_small_offset)
+ .clobbers_flags(false)
+ .compute_size("size_plus_maybe_sib_for_inreg_0")
+ .emit(
+ r#"
+ if !flags.notrap() {
+ sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+ }
+ {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
+ if needs_sib_byte(in_reg0) {
+ modrm_sib_disp8(out_reg0, sink);
+ sib_noindex(in_reg0, sink);
+ } else {
+ modrm_disp8(in_reg0, out_reg0, sink);
+ }
+ let offset: i32 = offset.into();
+ sink.put1(offset as u8);
+ "#,
+ ),
+ "size_plus_maybe_sib_for_inreg_0_plus_rex_prefix_for_inreg0_outreg0",
+ );
+
+ let has_big_offset =
+ InstructionPredicate::new_is_signed_int(&*formats.load, "offset", 32, 0);
+
+ // XX /r load with 32-bit offset.
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("ldDisp32", &formats.load, 5)
+ .operands_in(vec![gpr])
+ .operands_out(vec![gpr])
+ .inst_predicate(has_big_offset.clone())
+ .clobbers_flags(false)
+ .compute_size("size_plus_maybe_sib_for_inreg_0")
+ .emit(
+ r#"
+ if !flags.notrap() {
+ sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+ }
+ {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
+ if needs_sib_byte(in_reg0) {
+ modrm_sib_disp32(out_reg0, sink);
+ sib_noindex(in_reg0, sink);
+ } else {
+ modrm_disp32(in_reg0, out_reg0, sink);
+ }
+ let offset: i32 = offset.into();
+ sink.put4(offset as u32);
+ "#,
+ ),
+ );
+
+ // XX /r float load with 32-bit offset.
+ recipes.add_template_inferred(
+ EncodingRecipeBuilder::new("fldDisp32", &formats.load, 5)
+ .operands_in(vec![gpr])
+ .operands_out(vec![fpr])
+ .inst_predicate(has_big_offset)
+ .clobbers_flags(false)
+ .compute_size("size_plus_maybe_sib_for_inreg_0")
+ .emit(
+ r#"
+ if !flags.notrap() {
+ sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+ }
+ {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
+ if needs_sib_byte(in_reg0) {
+ modrm_sib_disp32(out_reg0, sink);
+ sib_noindex(in_reg0, sink);
+ } else {
+ modrm_disp32(in_reg0, out_reg0, sink);
+ }
+ let offset: i32 = offset.into();
+ sink.put4(offset as u32);
+ "#,
+ ),
+ "size_plus_maybe_sib_for_inreg_0_plus_rex_prefix_for_inreg0_outreg0",
+ );
+ }
+
+ {
+ // Complex loads.
+
+ // A predicate asking if the offset is zero.
+ let has_no_offset =
+ InstructionPredicate::new_is_field_equal(&*formats.load_complex, "offset", "0".into());
+
+ // XX /r load with index and no offset.
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("ldWithIndex", &formats.load_complex, 2)
+ .operands_in(vec![gpr, gpr])
+ .operands_out(vec![gpr])
+ .inst_predicate(has_no_offset.clone())
+ .clobbers_flags(false)
+ .compute_size("size_plus_maybe_offset_for_inreg_0")
+ .emit(
+ r#"
+ if !flags.notrap() {
+ sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+ }
+ {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink);
+ // The else branch always inserts an SIB byte.
+ if needs_offset(in_reg0) {
+ modrm_sib_disp8(out_reg0, sink);
+ sib(0, in_reg1, in_reg0, sink);
+ sink.put1(0);
+ } else {
+ modrm_sib(out_reg0, sink);
+ sib(0, in_reg1, in_reg0, sink);
+ }
+ "#,
+ ),
+ );
+
+ // XX /r float load with index and no offset.
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("fldWithIndex", &formats.load_complex, 2)
+ .operands_in(vec![gpr, gpr])
+ .operands_out(vec![fpr])
+ .inst_predicate(has_no_offset)
+ .clobbers_flags(false)
+ .compute_size("size_plus_maybe_offset_for_inreg_0")
+ .emit(
+ r#"
+ if !flags.notrap() {
+ sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+ }
+ {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink);
+ // The else branch always inserts an SIB byte.
+ if needs_offset(in_reg0) {
+ modrm_sib_disp8(out_reg0, sink);
+ sib(0, in_reg1, in_reg0, sink);
+ sink.put1(0);
+ } else {
+ modrm_sib(out_reg0, sink);
+ sib(0, in_reg1, in_reg0, sink);
+ }
+ "#,
+ ),
+ );
+
+ let has_small_offset =
+ InstructionPredicate::new_is_signed_int(&*formats.load_complex, "offset", 8, 0);
+
+ // XX /r load with index and 8-bit offset.
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("ldWithIndexDisp8", &formats.load_complex, 3)
+ .operands_in(vec![gpr, gpr])
+ .operands_out(vec![gpr])
+ .inst_predicate(has_small_offset.clone())
+ .clobbers_flags(false)
+ .emit(
+ r#"
+ if !flags.notrap() {
+ sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+ }
+ {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink);
+ modrm_sib_disp8(out_reg0, sink);
+ sib(0, in_reg1, in_reg0, sink);
+ let offset: i32 = offset.into();
+ sink.put1(offset as u8);
+ "#,
+ ),
+ );
+
+ // XX /r float load with 8-bit offset.
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("fldWithIndexDisp8", &formats.load_complex, 3)
+ .operands_in(vec![gpr, gpr])
+ .operands_out(vec![fpr])
+ .inst_predicate(has_small_offset)
+ .clobbers_flags(false)
+ .emit(
+ r#"
+ if !flags.notrap() {
+ sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+ }
+ {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink);
+ modrm_sib_disp8(out_reg0, sink);
+ sib(0, in_reg1, in_reg0, sink);
+ let offset: i32 = offset.into();
+ sink.put1(offset as u8);
+ "#,
+ ),
+ );
+
+ let has_big_offset =
+ InstructionPredicate::new_is_signed_int(&*formats.load_complex, "offset", 32, 0);
+
+ // XX /r load with index and 32-bit offset.
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("ldWithIndexDisp32", &formats.load_complex, 6)
+ .operands_in(vec![gpr, gpr])
+ .operands_out(vec![gpr])
+ .inst_predicate(has_big_offset.clone())
+ .clobbers_flags(false)
+ .emit(
+ r#"
+ if !flags.notrap() {
+ sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+ }
+ {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink);
+ modrm_sib_disp32(out_reg0, sink);
+ sib(0, in_reg1, in_reg0, sink);
+ let offset: i32 = offset.into();
+ sink.put4(offset as u32);
+ "#,
+ ),
+ );
+
+ // XX /r float load with index and 32-bit offset.
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("fldWithIndexDisp32", &formats.load_complex, 6)
+ .operands_in(vec![gpr, gpr])
+ .operands_out(vec![fpr])
+ .inst_predicate(has_big_offset)
+ .clobbers_flags(false)
+ .emit(
+ r#"
+ if !flags.notrap() {
+ sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+ }
+ {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink);
+ modrm_sib_disp32(out_reg0, sink);
+ sib(0, in_reg1, in_reg0, sink);
+ let offset: i32 = offset.into();
+ sink.put4(offset as u32);
+ "#,
+ ),
+ );
+ }
+
+ // Unary fill with SIB and 32-bit displacement.
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("fillSib32", &formats.unary, 6)
+ .operands_in(vec![stack_gpr32])
+ .operands_out(vec![gpr])
+ .clobbers_flags(false)
+ .emit(
+ r#"
+ let base = stk_base(in_stk0.base);
+ {{PUT_OP}}(bits, rex2(base, out_reg0), sink);
+ modrm_sib_disp32(out_reg0, sink);
+ sib_noindex(base, sink);
+ sink.put4(in_stk0.offset as u32);
+ "#,
+ ),
+ );
+
+ // Like fillSib32, but targeting an FPR rather than a GPR.
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("ffillSib32", &formats.unary, 6)
+ .operands_in(vec![stack_fpr32])
+ .operands_out(vec![fpr])
+ .clobbers_flags(false)
+ .emit(
+ r#"
+ let base = stk_base(in_stk0.base);
+ {{PUT_OP}}(bits, rex2(base, out_reg0), sink);
+ modrm_sib_disp32(out_reg0, sink);
+ sib_noindex(base, sink);
+ sink.put4(in_stk0.offset as u32);
+ "#,
+ ),
+ );
+
+ // Regfill with RSP-relative 32-bit displacement.
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("regfill32", &formats.reg_fill, 6)
+ .operands_in(vec![stack_gpr32])
+ .clobbers_flags(false)
+ .emit(
+ r#"
+ let src = StackRef::sp(src, &func.stack_slots);
+ let base = stk_base(src.base);
+ {{PUT_OP}}(bits, rex2(base, dst), sink);
+ modrm_sib_disp32(dst, sink);
+ sib_noindex(base, sink);
+ sink.put4(src.offset as u32);
+ "#,
+ ),
+ );
+
+ // Like regfill32, but targeting an FPR rather than a GPR.
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("fregfill32", &formats.reg_fill, 6)
+ .operands_in(vec![stack_fpr32])
+ .clobbers_flags(false)
+ .emit(
+ r#"
+ let src = StackRef::sp(src, &func.stack_slots);
+ let base = stk_base(src.base);
+ {{PUT_OP}}(bits, rex2(base, dst), sink);
+ modrm_sib_disp32(dst, sink);
+ sib_noindex(base, sink);
+ sink.put4(src.offset as u32);
+ "#,
+ ),
+ );
+
+ // Call/return.
+
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("call_id", &formats.call, 4).emit(
+ r#"
+ sink.trap(TrapCode::StackOverflow, func.srclocs[inst]);
+ {{PUT_OP}}(bits, BASE_REX, sink);
+ // The addend adjusts for the difference between the end of the
+ // instruction and the beginning of the immediate field.
+ sink.reloc_external(func.srclocs[inst],
+ Reloc::X86CallPCRel4,
+ &func.dfg.ext_funcs[func_ref].name,
+ -4);
+ sink.put4(0);
+ sink.add_call_site(opcode, func.srclocs[inst]);
+ "#,
+ ),
+ );
+
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("call_plt_id", &formats.call, 4).emit(
+ r#"
+ sink.trap(TrapCode::StackOverflow, func.srclocs[inst]);
+ {{PUT_OP}}(bits, BASE_REX, sink);
+ sink.reloc_external(func.srclocs[inst],
+ Reloc::X86CallPLTRel4,
+ &func.dfg.ext_funcs[func_ref].name,
+ -4);
+ sink.put4(0);
+ sink.add_call_site(opcode, func.srclocs[inst]);
+ "#,
+ ),
+ );
+
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("call_r", &formats.call_indirect, 1)
+ .operands_in(vec![gpr])
+ .emit(
+ r#"
+ sink.trap(TrapCode::StackOverflow, func.srclocs[inst]);
+ {{PUT_OP}}(bits, rex1(in_reg0), sink);
+ modrm_r_bits(in_reg0, bits, sink);
+ sink.add_call_site(opcode, func.srclocs[inst]);
+ "#,
+ ),
+ );
+
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("ret", &formats.multiary, 0)
+ .emit("{{PUT_OP}}(bits, BASE_REX, sink);"),
+ );
+
+ // Branches.
+
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("jmpb", &formats.jump, 1)
+ .branch_range((1, 8))
+ .clobbers_flags(false)
+ .emit(
+ r#"
+ {{PUT_OP}}(bits, BASE_REX, sink);
+ disp1(destination, func, sink);
+ "#,
+ ),
+ );
+
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("jmpd", &formats.jump, 4)
+ .branch_range((4, 32))
+ .clobbers_flags(false)
+ .emit(
+ r#"
+ {{PUT_OP}}(bits, BASE_REX, sink);
+ disp4(destination, func, sink);
+ "#,
+ ),
+ );
+
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("brib", &formats.branch_int, 1)
+ .operands_in(vec![reg_rflags])
+ .branch_range((1, 8))
+ .clobbers_flags(false)
+ .emit(
+ r#"
+ {{PUT_OP}}(bits | icc2opc(cond), BASE_REX, sink);
+ disp1(destination, func, sink);
+ "#,
+ ),
+ );
+
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("brid", &formats.branch_int, 4)
+ .operands_in(vec![reg_rflags])
+ .branch_range((4, 32))
+ .clobbers_flags(false)
+ .emit(
+ r#"
+ {{PUT_OP}}(bits | icc2opc(cond), BASE_REX, sink);
+ disp4(destination, func, sink);
+ "#,
+ ),
+ );
+
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("brfb", &formats.branch_float, 1)
+ .operands_in(vec![reg_rflags])
+ .branch_range((1, 8))
+ .clobbers_flags(false)
+ .inst_predicate(supported_floatccs_predicate(
+ &supported_floatccs,
+ &*formats.branch_float,
+ ))
+ .emit(
+ r#"
+ {{PUT_OP}}(bits | fcc2opc(cond), BASE_REX, sink);
+ disp1(destination, func, sink);
+ "#,
+ ),
+ );
+
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("brfd", &formats.branch_float, 4)
+ .operands_in(vec![reg_rflags])
+ .branch_range((4, 32))
+ .clobbers_flags(false)
+ .inst_predicate(supported_floatccs_predicate(
+ &supported_floatccs,
+ &*formats.branch_float,
+ ))
+ .emit(
+ r#"
+ {{PUT_OP}}(bits | fcc2opc(cond), BASE_REX, sink);
+ disp4(destination, func, sink);
+ "#,
+ ),
+ );
+
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("indirect_jmp", &formats.indirect_jump, 1)
+ .operands_in(vec![gpr])
+ .clobbers_flags(false)
+ .emit(
+ r#"
+ {{PUT_OP}}(bits, rex1(in_reg0), sink);
+ modrm_r_bits(in_reg0, bits, sink);
+ "#,
+ ),
+ );
+
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("jt_entry", &formats.branch_table_entry, 2)
+ .operands_in(vec![gpr, gpr])
+ .operands_out(vec![gpr])
+ .clobbers_flags(false)
+ .inst_predicate(valid_scale(&*formats.branch_table_entry))
+ .compute_size("size_plus_maybe_offset_for_inreg_1")
+ .emit(
+ r#"
+ {{PUT_OP}}(bits, rex3(in_reg1, out_reg0, in_reg0), sink);
+ if needs_offset(in_reg1) {
+ modrm_sib_disp8(out_reg0, sink);
+ sib(imm.trailing_zeros() as u8, in_reg0, in_reg1, sink);
+ sink.put1(0);
+ } else {
+ modrm_sib(out_reg0, sink);
+ sib(imm.trailing_zeros() as u8, in_reg0, in_reg1, sink);
+ }
+ "#,
+ ),
+ );
+
+ recipes.add_template_inferred(
+ EncodingRecipeBuilder::new("vconst", &formats.unary_const, 5)
+ .operands_out(vec![fpr])
+ .clobbers_flags(false)
+ .emit(
+ r#"
+ {{PUT_OP}}(bits, rex2(0, out_reg0), sink);
+ modrm_riprel(out_reg0, sink);
+ const_disp4(constant_handle, func, sink);
+ "#,
+ ),
+ "size_with_inferred_rex_for_outreg0",
+ );
+
+ recipes.add_template_inferred(
+ EncodingRecipeBuilder::new("vconst_optimized", &formats.unary_const, 1)
+ .operands_out(vec![fpr])
+ .clobbers_flags(false)
+ .emit(
+ r#"
+ {{PUT_OP}}(bits, rex2(out_reg0, out_reg0), sink);
+ modrm_rr(out_reg0, out_reg0, sink);
+ "#,
+ ),
+ "size_with_inferred_rex_for_outreg0",
+ );
+
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("jt_base", &formats.branch_table_base, 5)
+ .operands_out(vec![gpr])
+ .clobbers_flags(false)
+ .emit(
+ r#"
+ {{PUT_OP}}(bits, rex2(0, out_reg0), sink);
+ modrm_riprel(out_reg0, sink);
+
+ // No reloc is needed here as the jump table is emitted directly after
+ // the function body.
+ jt_disp4(table, func, sink);
+ "#,
+ ),
+ );
+
+ // Test flags and set a register.
+ //
+ // These setCC instructions only set the low 8 bits, and they can only write ABCD registers
+ // without a REX prefix.
+ //
+ // Other instruction encodings accepting `b1` inputs have the same constraints and only look at
+ // the low 8 bits of the input register.
+
+ let seti = recipes.add_template(
+ Template::new(
+ EncodingRecipeBuilder::new("seti", &formats.int_cond, 1)
+ .operands_in(vec![reg_rflags])
+ .operands_out(vec![gpr])
+ .clobbers_flags(false)
+ .emit(
+ r#"
+ {{PUT_OP}}(bits | icc2opc(cond), rex1(out_reg0), sink);
+ modrm_r_bits(out_reg0, bits, sink);
+ "#,
+ ),
+ regs,
+ )
+ .rex_kind(RecipePrefixKind::AlwaysEmitRex),
+ );
+
+ recipes.add_template(
+ Template::new(
+ EncodingRecipeBuilder::new("seti_abcd", &formats.int_cond, 1)
+ .operands_in(vec![reg_rflags])
+ .operands_out(vec![abcd])
+ .clobbers_flags(false)
+ .emit(
+ r#"
+ {{PUT_OP}}(bits | icc2opc(cond), rex1(out_reg0), sink);
+ modrm_r_bits(out_reg0, bits, sink);
+ "#,
+ ),
+ regs,
+ )
+ .when_prefixed(seti),
+ );
+
+ let setf = recipes.add_template(
+ Template::new(
+ EncodingRecipeBuilder::new("setf", &formats.float_cond, 1)
+ .operands_in(vec![reg_rflags])
+ .operands_out(vec![gpr])
+ .clobbers_flags(false)
+ .emit(
+ r#"
+ {{PUT_OP}}(bits | fcc2opc(cond), rex1(out_reg0), sink);
+ modrm_r_bits(out_reg0, bits, sink);
+ "#,
+ ),
+ regs,
+ )
+ .rex_kind(RecipePrefixKind::AlwaysEmitRex),
+ );
+
+ recipes.add_template(
+ Template::new(
+ EncodingRecipeBuilder::new("setf_abcd", &formats.float_cond, 1)
+ .operands_in(vec![reg_rflags])
+ .operands_out(vec![abcd])
+ .clobbers_flags(false)
+ .emit(
+ r#"
+ {{PUT_OP}}(bits | fcc2opc(cond), rex1(out_reg0), sink);
+ modrm_r_bits(out_reg0, bits, sink);
+ "#,
+ ),
+ regs,
+ )
+ .when_prefixed(setf),
+ );
+
+ // Conditional move (a.k.a integer select)
+ // (maybe-REX.W) 0F 4x modrm(r,r)
+ // 1 byte, modrm(r,r), is after the opcode
+ recipes.add_template(
+ Template::new(
+ EncodingRecipeBuilder::new("cmov", &formats.int_select, 1)
+ .operands_in(vec![
+ OperandConstraint::FixedReg(reg_rflags),
+ OperandConstraint::RegClass(gpr),
+ OperandConstraint::RegClass(gpr),
+ ])
+ .operands_out(vec![2])
+ .clobbers_flags(false)
+ .emit(
+ r#"
+ {{PUT_OP}}(bits | icc2opc(cond), rex2(in_reg1, in_reg2), sink);
+ modrm_rr(in_reg1, in_reg2, sink);
+ "#,
+ ),
+ regs,
+ )
+ .inferred_rex_compute_size("size_with_inferred_rex_for_cmov"),
+ );
+
+ // Bit scan forwards and reverse
+ recipes.add_template(
+ Template::new(
+ EncodingRecipeBuilder::new("bsf_and_bsr", &formats.unary, 1)
+ .operands_in(vec![gpr])
+ .operands_out(vec![
+ OperandConstraint::RegClass(gpr),
+ OperandConstraint::FixedReg(reg_rflags),
+ ])
+ .emit(
+ r#"
+ {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
+ modrm_rr(in_reg0, out_reg0, sink);
+ "#,
+ ),
+ regs,
+ )
+ .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_outreg0"),
+ );
+
+ // Arithematic with flag I/O.
+
+ // XX /r, MR form. Add two GPR registers and set carry flag.
+ recipes.add_template(
+ Template::new(
+ EncodingRecipeBuilder::new("rout", &formats.binary, 1)
+ .operands_in(vec![gpr, gpr])
+ .operands_out(vec![
+ OperandConstraint::TiedInput(0),
+ OperandConstraint::FixedReg(reg_rflags),
+ ])
+ .clobbers_flags(true)
+ .emit(
+ r#"
+ {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink);
+ modrm_rr(in_reg0, in_reg1, sink);
+ "#,
+ ),
+ regs,
+ )
+ .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_inreg1"),
+ );
+
+ // XX /r, MR form. Add two GPR registers and get carry flag.
+ recipes.add_template(
+ Template::new(
+ EncodingRecipeBuilder::new("rin", &formats.ternary, 1)
+ .operands_in(vec![
+ OperandConstraint::RegClass(gpr),
+ OperandConstraint::RegClass(gpr),
+ OperandConstraint::FixedReg(reg_rflags),
+ ])
+ .operands_out(vec![0])
+ .clobbers_flags(true)
+ .emit(
+ r#"
+ {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink);
+ modrm_rr(in_reg0, in_reg1, sink);
+ "#,
+ ),
+ regs,
+ )
+ .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_inreg1"),
+ );
+
+ // XX /r, MR form. Add two GPR registers with carry flag.
+ recipes.add_template(
+ Template::new(
+ EncodingRecipeBuilder::new("rio", &formats.ternary, 1)
+ .operands_in(vec![
+ OperandConstraint::RegClass(gpr),
+ OperandConstraint::RegClass(gpr),
+ OperandConstraint::FixedReg(reg_rflags),
+ ])
+ .operands_out(vec![
+ OperandConstraint::TiedInput(0),
+ OperandConstraint::FixedReg(reg_rflags),
+ ])
+ .clobbers_flags(true)
+ .emit(
+ r#"
+ {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink);
+ modrm_rr(in_reg0, in_reg1, sink);
+ "#,
+ ),
+ regs,
+ )
+ .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_inreg1"),
+ );
+
+ // Compare and set flags.
+
+ // XX /r, MR form. Compare two GPR registers and set flags.
+ recipes.add_template(
+ Template::new(
+ EncodingRecipeBuilder::new("rcmp", &formats.binary, 1)
+ .operands_in(vec![gpr, gpr])
+ .operands_out(vec![reg_rflags])
+ .emit(
+ r#"
+ {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink);
+ modrm_rr(in_reg0, in_reg1, sink);
+ "#,
+ ),
+ regs,
+ )
+ .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_inreg1"),
+ );
+
+ // Same as rcmp, but second operand is the stack pointer.
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("rcmp_sp", &formats.unary, 1)
+ .operands_in(vec![gpr])
+ .operands_out(vec![reg_rflags])
+ .emit(
+ r#"
+ {{PUT_OP}}(bits, rex2(in_reg0, RU::rsp.into()), sink);
+ modrm_rr(in_reg0, RU::rsp.into(), sink);
+ "#,
+ ),
+ );
+
+ // XX /r, RM form. Compare two FPR registers and set flags.
+ recipes.add_template_inferred(
+ EncodingRecipeBuilder::new("fcmp", &formats.binary, 1)
+ .operands_in(vec![fpr, fpr])
+ .operands_out(vec![reg_rflags])
+ .emit(
+ r#"
+ {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
+ modrm_rr(in_reg1, in_reg0, sink);
+ "#,
+ ),
+ "size_with_inferred_rex_for_inreg0_inreg1",
+ );
+
+ {
+ let has_small_offset =
+ InstructionPredicate::new_is_signed_int(&*formats.binary_imm64, "imm", 8, 0);
+
+ // XX /n, MI form with imm8.
+ recipes.add_template(
+ Template::new(
+ EncodingRecipeBuilder::new("rcmp_ib", &formats.binary_imm64, 2)
+ .operands_in(vec![gpr])
+ .operands_out(vec![reg_rflags])
+ .inst_predicate(has_small_offset)
+ .emit(
+ r#"
+ {{PUT_OP}}(bits, rex1(in_reg0), sink);
+ modrm_r_bits(in_reg0, bits, sink);
+ let imm: i64 = imm.into();
+ sink.put1(imm as u8);
+ "#,
+ ),
+ regs,
+ )
+ .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"),
+ );
+
+ let has_big_offset =
+ InstructionPredicate::new_is_signed_int(&*formats.binary_imm64, "imm", 32, 0);
+
+ // XX /n, MI form with imm32.
+ recipes.add_template(
+ Template::new(
+ EncodingRecipeBuilder::new("rcmp_id", &formats.binary_imm64, 5)
+ .operands_in(vec![gpr])
+ .operands_out(vec![reg_rflags])
+ .inst_predicate(has_big_offset)
+ .emit(
+ r#"
+ {{PUT_OP}}(bits, rex1(in_reg0), sink);
+ modrm_r_bits(in_reg0, bits, sink);
+ let imm: i64 = imm.into();
+ sink.put4(imm as u32);
+ "#,
+ ),
+ regs,
+ )
+ .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"),
+ );
+ }
+
+ // Test-and-branch.
+ //
+ // This recipe represents the macro fusion of a test and a conditional branch.
+ // This serves two purposes:
+ //
+ // 1. Guarantee that the test and branch get scheduled next to each other so
+ // macro fusion is guaranteed to be possible.
+ // 2. Hide the status flags from Cranelift which doesn't currently model flags.
+ //
+ // The encoding bits affect both the test and the branch instruction:
+ //
+ // Bits 0-7 are the Jcc opcode.
+ // Bits 8-15 control the test instruction which always has opcode byte 0x85.
+
+ recipes.add_template(
+ Template::new(
+ EncodingRecipeBuilder::new("tjccb", &formats.branch, 1 + 2)
+ .operands_in(vec![gpr])
+ .branch_range((3, 8))
+ .emit(
+ r#"
+ // test r, r.
+ {{PUT_OP}}((bits & 0xff00) | 0x85, rex2(in_reg0, in_reg0), sink);
+ modrm_rr(in_reg0, in_reg0, sink);
+ // Jcc instruction.
+ sink.put1(bits as u8);
+ disp1(destination, func, sink);
+ "#,
+ ),
+ regs,
+ )
+ .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"),
+ );
+
+ recipes.add_template(
+ Template::new(
+ EncodingRecipeBuilder::new("tjccd", &formats.branch, 1 + 6)
+ .operands_in(vec![gpr])
+ .branch_range((7, 32))
+ .emit(
+ r#"
+ // test r, r.
+ {{PUT_OP}}((bits & 0xff00) | 0x85, rex2(in_reg0, in_reg0), sink);
+ modrm_rr(in_reg0, in_reg0, sink);
+ // Jcc instruction.
+ sink.put1(0x0f);
+ sink.put1(bits as u8);
+ disp4(destination, func, sink);
+ "#,
+ ),
+ regs,
+ )
+ .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"),
+ );
+
+ // 8-bit test-and-branch.
+
+ let t8jccb = recipes.add_template(
+ Template::new(
+ EncodingRecipeBuilder::new("t8jccb", &formats.branch, 1 + 2)
+ .operands_in(vec![gpr])
+ .branch_range((3, 8))
+ .emit(
+ r#"
+ // test8 r, r.
+ {{PUT_OP}}((bits & 0xff00) | 0x84, rex2(in_reg0, in_reg0), sink);
+ modrm_rr(in_reg0, in_reg0, sink);
+ // Jcc instruction.
+ sink.put1(bits as u8);
+ disp1(destination, func, sink);
+ "#,
+ ),
+ regs,
+ )
+ .rex_kind(RecipePrefixKind::AlwaysEmitRex),
+ );
+
+ recipes.add_template(
+ Template::new(
+ EncodingRecipeBuilder::new("t8jccb_abcd", &formats.branch, 1 + 2)
+ .operands_in(vec![abcd])
+ .branch_range((3, 8))
+ .emit(
+ r#"
+ // test8 r, r.
+ {{PUT_OP}}((bits & 0xff00) | 0x84, rex2(in_reg0, in_reg0), sink);
+ modrm_rr(in_reg0, in_reg0, sink);
+ // Jcc instruction.
+ sink.put1(bits as u8);
+ disp1(destination, func, sink);
+ "#,
+ ),
+ regs,
+ )
+ .when_prefixed(t8jccb),
+ );
+
+ let t8jccd = recipes.add_template(
+ Template::new(
+ EncodingRecipeBuilder::new("t8jccd", &formats.branch, 1 + 6)
+ .operands_in(vec![gpr])
+ .branch_range((7, 32))
+ .emit(
+ r#"
+ // test8 r, r.
+ {{PUT_OP}}((bits & 0xff00) | 0x84, rex2(in_reg0, in_reg0), sink);
+ modrm_rr(in_reg0, in_reg0, sink);
+ // Jcc instruction.
+ sink.put1(0x0f);
+ sink.put1(bits as u8);
+ disp4(destination, func, sink);
+ "#,
+ ),
+ regs,
+ )
+ .rex_kind(RecipePrefixKind::AlwaysEmitRex),
+ );
+
+ recipes.add_template(
+ Template::new(
+ EncodingRecipeBuilder::new("t8jccd_abcd", &formats.branch, 1 + 6)
+ .operands_in(vec![abcd])
+ .branch_range((7, 32))
+ .emit(
+ r#"
+ // test8 r, r.
+ {{PUT_OP}}((bits & 0xff00) | 0x84, rex2(in_reg0, in_reg0), sink);
+ modrm_rr(in_reg0, in_reg0, sink);
+ // Jcc instruction.
+ sink.put1(0x0f);
+ sink.put1(bits as u8);
+ disp4(destination, func, sink);
+ "#,
+ ),
+ regs,
+ )
+ .when_prefixed(t8jccd),
+ );
+
+ // Worst case test-and-branch recipe for brz.b1 and brnz.b1 in 32-bit mode.
+ // The register allocator can't handle a branch instruction with constrained
+ // operands like the t8jccd_abcd above. This variant can accept the b1 opernd in
+ // any register, but is is larger because it uses a 32-bit test instruction with
+ // a 0xff immediate.
+
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("t8jccd_long", &formats.branch, 5 + 6)
+ .operands_in(vec![gpr])
+ .branch_range((11, 32))
+ .emit(
+ r#"
+ // test32 r, 0xff.
+ {{PUT_OP}}((bits & 0xff00) | 0xf7, rex1(in_reg0), sink);
+ modrm_r_bits(in_reg0, bits, sink);
+ sink.put4(0xff);
+ // Jcc instruction.
+ sink.put1(0x0f);
+ sink.put1(bits as u8);
+ disp4(destination, func, sink);
+ "#,
+ ),
+ );
+
+ // Comparison that produces a `b1` result in a GPR.
+ //
+ // This is a macro of a `cmp` instruction followed by a `setCC` instruction.
+ //
+ // TODO This is not a great solution because:
+ //
+ // - The cmp+setcc combination is not recognized by CPU's macro fusion.
+ // - The 64-bit encoding has issues with REX prefixes. The `cmp` and `setCC`
+ // instructions may need a REX independently.
+ // - Modeling CPU flags in the type system would be better.
+ //
+ // Since the `setCC` instructions only write an 8-bit register, we use that as
+ // our `b1` representation: A `b1` value is represented as a GPR where the low 8
+ // bits are known to be 0 or 1. The high bits are undefined.
+ //
+ // This bandaid macro doesn't support a REX prefix for the final `setCC`
+ // instruction, so it is limited to the `ABCD` register class for booleans.
+ // The omission of a `when_prefixed` alternative is deliberate here.
+
+ recipes.add_template(
+ Template::new(
+ EncodingRecipeBuilder::new("icscc", &formats.int_compare, 1 + 3)
+ .operands_in(vec![gpr, gpr])
+ .operands_out(vec![abcd])
+ .emit(
+ r#"
+ // Comparison instruction.
+ {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink);
+ modrm_rr(in_reg0, in_reg1, sink);
+ // `setCC` instruction, no REX.
+ let setcc = 0x90 | icc2opc(cond);
+ sink.put1(0x0f);
+ sink.put1(setcc as u8);
+ modrm_rr(out_reg0, 0, sink);
+ "#,
+ ),
+ regs,
+ )
+ .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_inreg1"),
+ );
+
+ recipes.add_template_inferred(
+ EncodingRecipeBuilder::new("icscc_fpr", &formats.int_compare, 1)
+ .operands_in(vec![fpr, fpr])
+ .operands_out(vec![0])
+ .emit(
+ r#"
+ // Comparison instruction.
+ {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
+ modrm_rr(in_reg1, in_reg0, sink);
+ "#,
+ ),
+ "size_with_inferred_rex_for_inreg0_inreg1",
+ );
+
+ {
+ let is_small_imm =
+ InstructionPredicate::new_is_signed_int(&*formats.int_compare_imm, "imm", 8, 0);
+
+ recipes.add_template(
+ Template::new(
+ EncodingRecipeBuilder::new("icscc_ib", &formats.int_compare_imm, 2 + 3)
+ .operands_in(vec![gpr])
+ .operands_out(vec![abcd])
+ .inst_predicate(is_small_imm)
+ .emit(
+ r#"
+ // Comparison instruction.
+ {{PUT_OP}}(bits, rex1(in_reg0), sink);
+ modrm_r_bits(in_reg0, bits, sink);
+ let imm: i64 = imm.into();
+ sink.put1(imm as u8);
+ // `setCC` instruction, no REX.
+ let setcc = 0x90 | icc2opc(cond);
+ sink.put1(0x0f);
+ sink.put1(setcc as u8);
+ modrm_rr(out_reg0, 0, sink);
+ "#,
+ ),
+ regs,
+ )
+ .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"),
+ );
+
+ let is_big_imm =
+ InstructionPredicate::new_is_signed_int(&*formats.int_compare_imm, "imm", 32, 0);
+
+ recipes.add_template(
+ Template::new(
+ EncodingRecipeBuilder::new("icscc_id", &formats.int_compare_imm, 5 + 3)
+ .operands_in(vec![gpr])
+ .operands_out(vec![abcd])
+ .inst_predicate(is_big_imm)
+ .emit(
+ r#"
+ // Comparison instruction.
+ {{PUT_OP}}(bits, rex1(in_reg0), sink);
+ modrm_r_bits(in_reg0, bits, sink);
+ let imm: i64 = imm.into();
+ sink.put4(imm as u32);
+ // `setCC` instruction, no REX.
+ let setcc = 0x90 | icc2opc(cond);
+ sink.put1(0x0f);
+ sink.put1(setcc as u8);
+ modrm_rr(out_reg0, 0, sink);
+ "#,
+ ),
+ regs,
+ )
+ .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"),
+ );
+ }
+
+ // Make a FloatCompare instruction predicate with the supported condition codes.
+ //
+ // Same thing for floating point.
+ //
+ // The ucomiss/ucomisd instructions set the FLAGS bits CF/PF/CF like this:
+ //
+ // ZPC OSA
+ // UN 111 000
+ // GT 000 000
+ // LT 001 000
+ // EQ 100 000
+ //
+ // Not all floating point condition codes are supported.
+ // The omission of a `when_prefixed` alternative is deliberate here.
+
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("fcscc", &formats.float_compare, 1 + 3)
+ .operands_in(vec![fpr, fpr])
+ .operands_out(vec![abcd])
+ .inst_predicate(supported_floatccs_predicate(
+ &supported_floatccs,
+ &*formats.float_compare,
+ ))
+ .emit(
+ r#"
+ // Comparison instruction.
+ {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
+ modrm_rr(in_reg1, in_reg0, sink);
+ // `setCC` instruction, no REX.
+ use crate::ir::condcodes::FloatCC::*;
+ let setcc = match cond {
+ Ordered => 0x9b, // EQ|LT|GT => setnp (P=0)
+ Unordered => 0x9a, // UN => setp (P=1)
+ OrderedNotEqual => 0x95, // LT|GT => setne (Z=0),
+ UnorderedOrEqual => 0x94, // UN|EQ => sete (Z=1)
+ GreaterThan => 0x97, // GT => seta (C=0&Z=0)
+ GreaterThanOrEqual => 0x93, // GT|EQ => setae (C=0)
+ UnorderedOrLessThan => 0x92, // UN|LT => setb (C=1)
+ UnorderedOrLessThanOrEqual => 0x96, // UN|LT|EQ => setbe (Z=1|C=1)
+ Equal | // EQ
+ NotEqual | // UN|LT|GT
+ LessThan | // LT
+ LessThanOrEqual | // LT|EQ
+ UnorderedOrGreaterThan | // UN|GT
+ UnorderedOrGreaterThanOrEqual // UN|GT|EQ
+ => panic!("{} not supported by fcscc", cond),
+ };
+ sink.put1(0x0f);
+ sink.put1(setcc);
+ modrm_rr(out_reg0, 0, sink);
+ "#,
+ ),
+ );
+
+ {
+ let supported_floatccs: Vec<Literal> = ["eq", "lt", "le", "uno", "ne", "uge", "ugt", "ord"]
+ .iter()
+ .map(|name| Literal::enumerator_for(floatcc, name))
+ .collect();
+ recipes.add_template_inferred(
+ EncodingRecipeBuilder::new("pfcmp", &formats.float_compare, 2)
+ .operands_in(vec![fpr, fpr])
+ .operands_out(vec![0])
+ .inst_predicate(supported_floatccs_predicate(
+ &supported_floatccs[..],
+ &*formats.float_compare,
+ ))
+ .emit(
+ r#"
+ // Comparison instruction.
+ {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
+ modrm_rr(in_reg1, in_reg0, sink);
+ // Add immediate byte indicating what type of comparison.
+ use crate::ir::condcodes::FloatCC::*;
+ let imm = match cond {
+ Equal => 0x00,
+ LessThan => 0x01,
+ LessThanOrEqual => 0x02,
+ Unordered => 0x03,
+ NotEqual => 0x04,
+ UnorderedOrGreaterThanOrEqual => 0x05,
+ UnorderedOrGreaterThan => 0x06,
+ Ordered => 0x07,
+ _ => panic!("{} not supported by pfcmp", cond),
+ };
+ sink.put1(imm);
+ "#,
+ ),
+ "size_with_inferred_rex_for_inreg0_inreg1",
+ );
+ }
+
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("is_zero", &formats.unary, 2 + 2)
+ .operands_in(vec![gpr])
+ .operands_out(vec![abcd])
+ .emit(
+ r#"
+ // Test instruction.
+ {{PUT_OP}}(bits, rex2(in_reg0, in_reg0), sink);
+ modrm_rr(in_reg0, in_reg0, sink);
+ // Check ZF = 1 flag to see if register holds 0.
+ sink.put1(0x0f);
+ sink.put1(0x94);
+ modrm_rr(out_reg0, 0, sink);
+ "#,
+ ),
+ );
+
+ recipes.add_template_recipe(
+ EncodingRecipeBuilder::new("is_invalid", &formats.unary, 2 + 3)
+ .operands_in(vec![gpr])
+ .operands_out(vec![abcd])
+ .emit(
+ r#"
+ // Comparison instruction.
+ {{PUT_OP}}(bits, rex1(in_reg0), sink);
+ modrm_r_bits(in_reg0, bits, sink);
+ sink.put1(0xff);
+ // `setCC` instruction, no REX.
+ use crate::ir::condcodes::IntCC::*;
+ let setcc = 0x90 | icc2opc(Equal);
+ sink.put1(0x0f);
+ sink.put1(setcc as u8);
+ modrm_rr(out_reg0, 0, sink);
+ "#,
+ ),
+ );
+
+ recipes.add_recipe(
+ EncodingRecipeBuilder::new("safepoint", &formats.multiary, 0).emit(
+ r#"
+ sink.add_stack_map(args, func, isa);
+ "#,
+ ),
+ );
+
+ // Both `elf_tls_get_addr` and `macho_tls_get_addr` require all caller-saved registers to be spilled.
+ // This is currently special cased in `regalloc/spilling.rs` in the `visit_inst` function.
+
+ recipes.add_recipe(
+ EncodingRecipeBuilder::new("elf_tls_get_addr", &formats.unary_global_value, 16)
+ // FIXME Correct encoding for non rax registers
+ .operands_out(vec![reg_rax])
+ .emit(
+ r#"
+ // output %rax
+ // clobbers %rdi
+
+ // Those data16 prefixes are necessary to pad to 16 bytes.
+
+ // data16 lea gv@tlsgd(%rip),%rdi
+ sink.put1(0x66); // data16
+ sink.put1(0b01001000); // rex.w
+ const LEA: u8 = 0x8d;
+ sink.put1(LEA); // lea
+ modrm_riprel(0b111/*out_reg0*/, sink); // 0x3d
+ sink.reloc_external(func.srclocs[inst],
+ Reloc::ElfX86_64TlsGd,
+ &func.global_values[global_value].symbol_name(),
+ -4);
+ sink.put4(0);
+
+ // data16 data16 callq __tls_get_addr-4
+ sink.put1(0x66); // data16
+ sink.put1(0x66); // data16
+ sink.put1(0b01001000); // rex.w
+ sink.put1(0xe8); // call
+ sink.reloc_external(func.srclocs[inst],
+ Reloc::X86CallPLTRel4,
+ &ExternalName::LibCall(LibCall::ElfTlsGetAddr),
+ -4);
+ sink.put4(0);
+ "#,
+ ),
+ );
+
+ recipes.add_recipe(
+ EncodingRecipeBuilder::new("macho_tls_get_addr", &formats.unary_global_value, 9)
+ // FIXME Correct encoding for non rax registers
+ .operands_out(vec![reg_rax])
+ .emit(
+ r#"
+ // output %rax
+ // clobbers %rdi
+
+ // movq gv@tlv(%rip), %rdi
+ sink.put1(0x48); // rex
+ sink.put1(0x8b); // mov
+ modrm_riprel(0b111/*out_reg0*/, sink); // 0x3d
+ sink.reloc_external(func.srclocs[inst],
+ Reloc::MachOX86_64Tlv,
+ &func.global_values[global_value].symbol_name(),
+ -4);
+ sink.put4(0);
+
+ // callq *(%rdi)
+ sink.put1(0xff);
+ sink.put1(0x17);
+ "#,
+ ),
+ );
+
+ recipes.add_template(
+ Template::new(
+ EncodingRecipeBuilder::new("evex_reg_vvvv_rm_128", &formats.binary, 1)
+ .operands_in(vec![fpr, fpr])
+ .operands_out(vec![fpr])
+ .emit(
+ r#"
+ // instruction encoding operands: reg (op1, w), vvvv (op2, r), rm (op3, r)
+ // this maps to: out_reg0, in_reg0, in_reg1
+ let context = EvexContext::Other { length: EvexVectorLength::V128 };
+ let masking = EvexMasking::None;
+ put_evex(bits, out_reg0, in_reg0, in_reg1, context, masking, sink); // params: reg, vvvv, rm
+ modrm_rr(in_reg1, out_reg0, sink); // params: rm, reg
+ "#,
+ ),
+ regs).rex_kind(RecipePrefixKind::Evex)
+ );
+
+ recipes.add_template(
+ Template::new(
+ EncodingRecipeBuilder::new("evex_reg_rm_128", &formats.unary, 1)
+ .operands_in(vec![fpr])
+ .operands_out(vec![fpr])
+ .emit(
+ r#"
+ // instruction encoding operands: reg (op1, w), rm (op2, r)
+ // this maps to: out_reg0, in_reg0
+ let context = EvexContext::Other { length: EvexVectorLength::V128 };
+ let masking = EvexMasking::None;
+ put_evex(bits, out_reg0, 0, in_reg0, context, masking, sink); // params: reg, vvvv, rm
+ modrm_rr(in_reg0, out_reg0, sink); // params: rm, reg
+ "#,
+ ),
+ regs).rex_kind(RecipePrefixKind::Evex)
+ );
+
+ recipes
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/isa/x86/registers.rs b/third_party/rust/cranelift-codegen-meta/src/isa/x86/registers.rs
new file mode 100644
index 0000000000..85a8965f89
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/isa/x86/registers.rs
@@ -0,0 +1,43 @@
+use crate::cdsl::regs::{IsaRegs, IsaRegsBuilder, RegBankBuilder, RegClassBuilder};
+
+pub(crate) fn define() -> IsaRegs {
+ let mut regs = IsaRegsBuilder::new();
+
+ let builder = RegBankBuilder::new("FloatRegs", "xmm")
+ .units(16)
+ .track_pressure(true);
+ let float_regs = regs.add_bank(builder);
+
+ let builder = RegBankBuilder::new("IntRegs", "r")
+ .units(16)
+ .names(vec!["rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi"])
+ .track_pressure(true)
+ .pinned_reg(15);
+ let int_regs = regs.add_bank(builder);
+
+ let builder = RegBankBuilder::new("FlagRegs", "")
+ .units(1)
+ .names(vec!["rflags"])
+ .track_pressure(false);
+ let flag_reg = regs.add_bank(builder);
+
+ let builder = RegClassBuilder::new_toplevel("GPR", int_regs);
+ let gpr = regs.add_class(builder);
+
+ let builder = RegClassBuilder::new_toplevel("FPR", float_regs);
+ let fpr = regs.add_class(builder);
+
+ let builder = RegClassBuilder::new_toplevel("FLAG", flag_reg);
+ regs.add_class(builder);
+
+ let builder = RegClassBuilder::subclass_of("GPR8", gpr, 0, 8);
+ let gpr8 = regs.add_class(builder);
+
+ let builder = RegClassBuilder::subclass_of("ABCD", gpr8, 0, 4);
+ regs.add_class(builder);
+
+ let builder = RegClassBuilder::subclass_of("FPR8", fpr, 0, 8);
+ regs.add_class(builder);
+
+ regs.build()
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/isa/x86/settings.rs b/third_party/rust/cranelift-codegen-meta/src/isa/x86/settings.rs
new file mode 100644
index 0000000000..dddd69abb3
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/isa/x86/settings.rs
@@ -0,0 +1,135 @@
+use crate::cdsl::settings::{PredicateNode, SettingGroup, SettingGroupBuilder};
+
+pub(crate) fn define(shared: &SettingGroup) -> SettingGroup {
+ let mut settings = SettingGroupBuilder::new("x86");
+
+ // CPUID.01H:ECX
+ let has_sse3 = settings.add_bool("has_sse3", "SSE3: CPUID.01H:ECX.SSE3[bit 0]", false);
+ let has_ssse3 = settings.add_bool("has_ssse3", "SSSE3: CPUID.01H:ECX.SSSE3[bit 9]", false);
+ let has_sse41 = settings.add_bool("has_sse41", "SSE4.1: CPUID.01H:ECX.SSE4_1[bit 19]", false);
+ let has_sse42 = settings.add_bool("has_sse42", "SSE4.2: CPUID.01H:ECX.SSE4_2[bit 20]", false);
+ let has_avx = settings.add_bool("has_avx", "AVX: CPUID.01H:ECX.AVX[bit 28]", false);
+ let has_avx2 = settings.add_bool("has_avx2", "AVX2: CPUID.07H:EBX.AVX2[bit 5]", false);
+ let has_avx512dq = settings.add_bool(
+ "has_avx512dq",
+ "AVX512DQ: CPUID.07H:EBX.AVX512DQ[bit 17]",
+ false,
+ );
+ let has_avx512vl = settings.add_bool(
+ "has_avx512vl",
+ "AVX512VL: CPUID.07H:EBX.AVX512VL[bit 31]",
+ false,
+ );
+ let has_avx512f = settings.add_bool(
+ "has_avx512f",
+ "AVX512F: CPUID.07H:EBX.AVX512F[bit 16]",
+ false,
+ );
+ let has_popcnt = settings.add_bool("has_popcnt", "POPCNT: CPUID.01H:ECX.POPCNT[bit 23]", false);
+
+ // CPUID.(EAX=07H, ECX=0H):EBX
+ let has_bmi1 = settings.add_bool(
+ "has_bmi1",
+ "BMI1: CPUID.(EAX=07H, ECX=0H):EBX.BMI1[bit 3]",
+ false,
+ );
+ let has_bmi2 = settings.add_bool(
+ "has_bmi2",
+ "BMI2: CPUID.(EAX=07H, ECX=0H):EBX.BMI2[bit 8]",
+ false,
+ );
+
+ // CPUID.EAX=80000001H:ECX
+ let has_lzcnt = settings.add_bool(
+ "has_lzcnt",
+ "LZCNT: CPUID.EAX=80000001H:ECX.LZCNT[bit 5]",
+ false,
+ );
+
+ let shared_enable_simd = shared.get_bool("enable_simd");
+
+ settings.add_predicate("use_ssse3", predicate!(has_ssse3));
+ settings.add_predicate("use_sse41", predicate!(has_sse41));
+ settings.add_predicate("use_sse42", predicate!(has_sse41 && has_sse42));
+
+ settings.add_predicate(
+ "use_ssse3_simd",
+ predicate!(shared_enable_simd && has_ssse3),
+ );
+ settings.add_predicate(
+ "use_sse41_simd",
+ predicate!(shared_enable_simd && has_sse41),
+ );
+ settings.add_predicate(
+ "use_sse42_simd",
+ predicate!(shared_enable_simd && has_sse41 && has_sse42),
+ );
+
+ settings.add_predicate("use_avx_simd", predicate!(shared_enable_simd && has_avx));
+ settings.add_predicate("use_avx2_simd", predicate!(shared_enable_simd && has_avx2));
+ settings.add_predicate(
+ "use_avx512dq_simd",
+ predicate!(shared_enable_simd && has_avx512dq),
+ );
+ settings.add_predicate(
+ "use_avx512vl_simd",
+ predicate!(shared_enable_simd && has_avx512vl),
+ );
+ settings.add_predicate(
+ "use_avx512f_simd",
+ predicate!(shared_enable_simd && has_avx512f),
+ );
+
+ settings.add_predicate("use_popcnt", predicate!(has_popcnt && has_sse42));
+ settings.add_predicate("use_bmi1", predicate!(has_bmi1));
+ settings.add_predicate("use_lzcnt", predicate!(has_lzcnt));
+
+ // Some shared boolean values are used in x86 instruction predicates, so we need to group them
+ // in the same TargetIsa, for compabitibity with code generated by meta-python.
+ // TODO Once all the meta generation code has been migrated from Python to Rust, we can put it
+ // back in the shared SettingGroup, and use it in x86 instruction predicates.
+
+ let is_pic = shared.get_bool("is_pic");
+ let emit_all_ones_funcaddrs = shared.get_bool("emit_all_ones_funcaddrs");
+ settings.add_predicate("is_pic", predicate!(is_pic));
+ settings.add_predicate("not_is_pic", predicate!(!is_pic));
+ settings.add_predicate(
+ "all_ones_funcaddrs_and_not_is_pic",
+ predicate!(emit_all_ones_funcaddrs && !is_pic),
+ );
+ settings.add_predicate(
+ "not_all_ones_funcaddrs_and_not_is_pic",
+ predicate!(!emit_all_ones_funcaddrs && !is_pic),
+ );
+
+ // Presets corresponding to x86 CPUs.
+
+ settings.add_preset("baseline", preset!());
+ let nehalem = settings.add_preset(
+ "nehalem",
+ preset!(has_sse3 && has_ssse3 && has_sse41 && has_sse42 && has_popcnt),
+ );
+ let haswell = settings.add_preset(
+ "haswell",
+ preset!(nehalem && has_bmi1 && has_bmi2 && has_lzcnt),
+ );
+ let broadwell = settings.add_preset("broadwell", preset!(haswell));
+ let skylake = settings.add_preset("skylake", preset!(broadwell));
+ let cannonlake = settings.add_preset("cannonlake", preset!(skylake));
+ settings.add_preset("icelake", preset!(cannonlake));
+ settings.add_preset(
+ "znver1",
+ preset!(
+ has_sse3
+ && has_ssse3
+ && has_sse41
+ && has_sse42
+ && has_popcnt
+ && has_bmi1
+ && has_bmi2
+ && has_lzcnt
+ ),
+ );
+
+ settings.build()
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/lib.rs b/third_party/rust/cranelift-codegen-meta/src/lib.rs
new file mode 100644
index 0000000000..ead2c4442f
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/lib.rs
@@ -0,0 +1,124 @@
+//! This crate generates Rust sources for use by
+//! [`cranelift_codegen`](../cranelift_codegen/index.html).
+#[macro_use]
+mod cdsl;
+mod srcgen;
+
+pub mod error;
+pub mod isa;
+
+mod gen_binemit;
+mod gen_encodings;
+mod gen_inst;
+mod gen_legalizer;
+mod gen_registers;
+mod gen_settings;
+mod gen_types;
+
+mod default_map;
+mod shared;
+mod unique_table;
+
+/// Generate an ISA from an architecture string (e.g. "x86_64").
+pub fn isa_from_arch(arch: &str) -> Result<isa::Isa, String> {
+ isa::Isa::from_arch(arch).ok_or_else(|| format!("no supported isa found for arch `{}`", arch))
+}
+
+/// Generates all the Rust source files used in Cranelift from the meta-language.
+pub fn generate(
+ old_backend_isas: &[isa::Isa],
+ new_backend_isas: &[isa::Isa],
+ out_dir: &str,
+) -> Result<(), error::Error> {
+ // Create all the definitions:
+ // - common definitions.
+ let mut shared_defs = shared::define();
+
+ gen_settings::generate(
+ &shared_defs.settings,
+ gen_settings::ParentGroup::None,
+ "settings.rs",
+ &out_dir,
+ )?;
+ gen_types::generate("types.rs", &out_dir)?;
+
+ // - per ISA definitions.
+ let target_isas = isa::define(old_backend_isas, &mut shared_defs);
+
+ // At this point, all definitions are done.
+ let all_formats = shared_defs.verify_instruction_formats();
+
+ // Generate all the code.
+ gen_inst::generate(
+ all_formats,
+ &shared_defs.all_instructions,
+ "opcodes.rs",
+ "inst_builder.rs",
+ &out_dir,
+ )?;
+
+ let extra_legalization_groups: &[&'static str] = if !new_backend_isas.is_empty() {
+ // The new backend only requires the "expand" legalization group.
+ &["expand"]
+ } else {
+ &[]
+ };
+
+ gen_legalizer::generate(
+ &target_isas,
+ &shared_defs.transform_groups,
+ extra_legalization_groups,
+ "legalize",
+ &out_dir,
+ )?;
+
+ for isa in target_isas {
+ gen_registers::generate(&isa, &format!("registers-{}.rs", isa.name), &out_dir)?;
+
+ gen_settings::generate(
+ &isa.settings,
+ gen_settings::ParentGroup::Shared,
+ &format!("settings-{}.rs", isa.name),
+ &out_dir,
+ )?;
+
+ gen_encodings::generate(
+ &shared_defs,
+ &isa,
+ &format!("encoding-{}.rs", isa.name),
+ &out_dir,
+ )?;
+
+ gen_binemit::generate(
+ &isa.name,
+ &isa.recipes,
+ &format!("binemit-{}.rs", isa.name),
+ &out_dir,
+ )?;
+ }
+
+ for isa in new_backend_isas {
+ match isa {
+ isa::Isa::X86 => {
+ // If the old backend ISAs contained x86, this file has already been generated.
+ if old_backend_isas.iter().any(|isa| *isa == isa::Isa::X86) {
+ continue;
+ }
+
+ let settings = crate::isa::x86::settings::define(&shared_defs.settings);
+ gen_settings::generate(
+ &settings,
+ gen_settings::ParentGroup::Shared,
+ "settings-x86.rs",
+ &out_dir,
+ )?;
+ }
+ isa::Isa::Arm64 => {
+ // aarch64 doesn't have platform-specific settings.
+ }
+ isa::Isa::Arm32 | isa::Isa::Riscv => todo!(),
+ }
+ }
+
+ Ok(())
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/shared/entities.rs b/third_party/rust/cranelift-codegen-meta/src/shared/entities.rs
new file mode 100644
index 0000000000..c3f2bc0387
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/shared/entities.rs
@@ -0,0 +1,73 @@
+use crate::cdsl::operands::{OperandKind, OperandKindFields};
+
+/// Small helper to initialize an OperandBuilder with the right kind, for a given name and doc.
+fn new(format_field_name: &'static str, rust_type: &'static str, doc: &'static str) -> OperandKind {
+ OperandKind::new(format_field_name, rust_type, OperandKindFields::EntityRef).with_doc(doc)
+}
+
+pub(crate) struct EntityRefs {
+ /// A reference to a basic block in the same function.
+ /// This is primarliy used in control flow instructions.
+ pub(crate) block: OperandKind,
+
+ /// A reference to a stack slot declared in the function preamble.
+ pub(crate) stack_slot: OperandKind,
+
+ /// A reference to a global value.
+ pub(crate) global_value: OperandKind,
+
+ /// A reference to a function signature declared in the function preamble.
+ /// This is used to provide the call signature in a call_indirect instruction.
+ pub(crate) sig_ref: OperandKind,
+
+ /// A reference to an external function declared in the function preamble.
+ /// This is used to provide the callee and signature in a call instruction.
+ pub(crate) func_ref: OperandKind,
+
+ /// A reference to a jump table declared in the function preamble.
+ pub(crate) jump_table: OperandKind,
+
+ /// A reference to a heap declared in the function preamble.
+ pub(crate) heap: OperandKind,
+
+ /// A reference to a table declared in the function preamble.
+ pub(crate) table: OperandKind,
+
+ /// A variable-sized list of value operands. Use for Block and function call arguments.
+ pub(crate) varargs: OperandKind,
+}
+
+impl EntityRefs {
+ pub fn new() -> Self {
+ Self {
+ block: new(
+ "destination",
+ "ir::Block",
+ "a basic block in the same function.",
+ ),
+ stack_slot: new("stack_slot", "ir::StackSlot", "A stack slot"),
+
+ global_value: new("global_value", "ir::GlobalValue", "A global value."),
+
+ sig_ref: new("sig_ref", "ir::SigRef", "A function signature."),
+
+ func_ref: new("func_ref", "ir::FuncRef", "An external function."),
+
+ jump_table: new("table", "ir::JumpTable", "A jump table."),
+
+ heap: new("heap", "ir::Heap", "A heap."),
+
+ table: new("table", "ir::Table", "A table."),
+
+ varargs: OperandKind::new("", "&[Value]", OperandKindFields::VariableArgs).with_doc(
+ r#"
+ A variable size list of `value` operands.
+
+ Use this to represent arguments passed to a function call, arguments
+ passed to a basic block, or a variable number of results
+ returned from an instruction.
+ "#,
+ ),
+ }
+ }
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/shared/formats.rs b/third_party/rust/cranelift-codegen-meta/src/shared/formats.rs
new file mode 100644
index 0000000000..3d081951a5
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/shared/formats.rs
@@ -0,0 +1,330 @@
+use crate::cdsl::formats::{InstructionFormat, InstructionFormatBuilder as Builder};
+use crate::shared::{entities::EntityRefs, immediates::Immediates};
+use std::rc::Rc;
+
+pub(crate) struct Formats {
+ pub(crate) atomic_cas: Rc<InstructionFormat>,
+ pub(crate) atomic_rmw: Rc<InstructionFormat>,
+ pub(crate) binary: Rc<InstructionFormat>,
+ pub(crate) binary_imm8: Rc<InstructionFormat>,
+ pub(crate) binary_imm64: Rc<InstructionFormat>,
+ pub(crate) branch: Rc<InstructionFormat>,
+ pub(crate) branch_float: Rc<InstructionFormat>,
+ pub(crate) branch_icmp: Rc<InstructionFormat>,
+ pub(crate) branch_int: Rc<InstructionFormat>,
+ pub(crate) branch_table: Rc<InstructionFormat>,
+ pub(crate) branch_table_base: Rc<InstructionFormat>,
+ pub(crate) branch_table_entry: Rc<InstructionFormat>,
+ pub(crate) call: Rc<InstructionFormat>,
+ pub(crate) call_indirect: Rc<InstructionFormat>,
+ pub(crate) cond_trap: Rc<InstructionFormat>,
+ pub(crate) copy_special: Rc<InstructionFormat>,
+ pub(crate) copy_to_ssa: Rc<InstructionFormat>,
+ pub(crate) float_compare: Rc<InstructionFormat>,
+ pub(crate) float_cond: Rc<InstructionFormat>,
+ pub(crate) float_cond_trap: Rc<InstructionFormat>,
+ pub(crate) func_addr: Rc<InstructionFormat>,
+ pub(crate) heap_addr: Rc<InstructionFormat>,
+ pub(crate) indirect_jump: Rc<InstructionFormat>,
+ pub(crate) int_compare: Rc<InstructionFormat>,
+ pub(crate) int_compare_imm: Rc<InstructionFormat>,
+ pub(crate) int_cond: Rc<InstructionFormat>,
+ pub(crate) int_cond_trap: Rc<InstructionFormat>,
+ pub(crate) int_select: Rc<InstructionFormat>,
+ pub(crate) jump: Rc<InstructionFormat>,
+ pub(crate) load: Rc<InstructionFormat>,
+ pub(crate) load_complex: Rc<InstructionFormat>,
+ pub(crate) load_no_offset: Rc<InstructionFormat>,
+ pub(crate) multiary: Rc<InstructionFormat>,
+ pub(crate) nullary: Rc<InstructionFormat>,
+ pub(crate) reg_fill: Rc<InstructionFormat>,
+ pub(crate) reg_move: Rc<InstructionFormat>,
+ pub(crate) reg_spill: Rc<InstructionFormat>,
+ pub(crate) shuffle: Rc<InstructionFormat>,
+ pub(crate) stack_load: Rc<InstructionFormat>,
+ pub(crate) stack_store: Rc<InstructionFormat>,
+ pub(crate) store: Rc<InstructionFormat>,
+ pub(crate) store_complex: Rc<InstructionFormat>,
+ pub(crate) store_no_offset: Rc<InstructionFormat>,
+ pub(crate) table_addr: Rc<InstructionFormat>,
+ pub(crate) ternary: Rc<InstructionFormat>,
+ pub(crate) ternary_imm8: Rc<InstructionFormat>,
+ pub(crate) trap: Rc<InstructionFormat>,
+ pub(crate) unary: Rc<InstructionFormat>,
+ pub(crate) unary_bool: Rc<InstructionFormat>,
+ pub(crate) unary_const: Rc<InstructionFormat>,
+ pub(crate) unary_global_value: Rc<InstructionFormat>,
+ pub(crate) unary_ieee32: Rc<InstructionFormat>,
+ pub(crate) unary_ieee64: Rc<InstructionFormat>,
+ pub(crate) unary_imm: Rc<InstructionFormat>,
+}
+
+impl Formats {
+ pub fn new(imm: &Immediates, entities: &EntityRefs) -> Self {
+ Self {
+ unary: Builder::new("Unary").value().build(),
+
+ unary_imm: Builder::new("UnaryImm").imm(&imm.imm64).build(),
+
+ unary_ieee32: Builder::new("UnaryIeee32").imm(&imm.ieee32).build(),
+
+ unary_ieee64: Builder::new("UnaryIeee64").imm(&imm.ieee64).build(),
+
+ unary_bool: Builder::new("UnaryBool").imm(&imm.boolean).build(),
+
+ unary_const: Builder::new("UnaryConst").imm(&imm.pool_constant).build(),
+
+ unary_global_value: Builder::new("UnaryGlobalValue")
+ .imm(&entities.global_value)
+ .build(),
+
+ binary: Builder::new("Binary").value().value().build(),
+
+ binary_imm8: Builder::new("BinaryImm8").value().imm(&imm.uimm8).build(),
+
+ binary_imm64: Builder::new("BinaryImm64").value().imm(&imm.imm64).build(),
+
+ // The select instructions are controlled by the second VALUE operand.
+ // The first VALUE operand is the controlling flag which has a derived type.
+ // The fma instruction has the same constraint on all inputs.
+ ternary: Builder::new("Ternary")
+ .value()
+ .value()
+ .value()
+ .typevar_operand(1)
+ .build(),
+
+ ternary_imm8: Builder::new("TernaryImm8")
+ .value()
+ .imm(&imm.uimm8)
+ .value()
+ .build(),
+
+ // Catch-all for instructions with many outputs and inputs and no immediate
+ // operands.
+ multiary: Builder::new("MultiAry").varargs().build(),
+
+ nullary: Builder::new("NullAry").build(),
+
+ shuffle: Builder::new("Shuffle")
+ .value()
+ .value()
+ .imm_with_name("mask", &imm.uimm128)
+ .build(),
+
+ int_compare: Builder::new("IntCompare")
+ .imm(&imm.intcc)
+ .value()
+ .value()
+ .build(),
+
+ int_compare_imm: Builder::new("IntCompareImm")
+ .imm(&imm.intcc)
+ .value()
+ .imm(&imm.imm64)
+ .build(),
+
+ int_cond: Builder::new("IntCond").imm(&imm.intcc).value().build(),
+
+ float_compare: Builder::new("FloatCompare")
+ .imm(&imm.floatcc)
+ .value()
+ .value()
+ .build(),
+
+ float_cond: Builder::new("FloatCond").imm(&imm.floatcc).value().build(),
+
+ int_select: Builder::new("IntSelect")
+ .imm(&imm.intcc)
+ .value()
+ .value()
+ .value()
+ .build(),
+
+ jump: Builder::new("Jump").imm(&entities.block).varargs().build(),
+
+ branch: Builder::new("Branch")
+ .value()
+ .imm(&entities.block)
+ .varargs()
+ .build(),
+
+ branch_int: Builder::new("BranchInt")
+ .imm(&imm.intcc)
+ .value()
+ .imm(&entities.block)
+ .varargs()
+ .build(),
+
+ branch_float: Builder::new("BranchFloat")
+ .imm(&imm.floatcc)
+ .value()
+ .imm(&entities.block)
+ .varargs()
+ .build(),
+
+ branch_icmp: Builder::new("BranchIcmp")
+ .imm(&imm.intcc)
+ .value()
+ .value()
+ .imm(&entities.block)
+ .varargs()
+ .build(),
+
+ branch_table: Builder::new("BranchTable")
+ .value()
+ .imm(&entities.block)
+ .imm(&entities.jump_table)
+ .build(),
+
+ branch_table_entry: Builder::new("BranchTableEntry")
+ .value()
+ .value()
+ .imm(&imm.uimm8)
+ .imm(&entities.jump_table)
+ .build(),
+
+ branch_table_base: Builder::new("BranchTableBase")
+ .imm(&entities.jump_table)
+ .build(),
+
+ indirect_jump: Builder::new("IndirectJump")
+ .value()
+ .imm(&entities.jump_table)
+ .build(),
+
+ call: Builder::new("Call")
+ .imm(&entities.func_ref)
+ .varargs()
+ .build(),
+
+ call_indirect: Builder::new("CallIndirect")
+ .imm(&entities.sig_ref)
+ .value()
+ .varargs()
+ .build(),
+
+ func_addr: Builder::new("FuncAddr").imm(&entities.func_ref).build(),
+
+ atomic_rmw: Builder::new("AtomicRmw")
+ .imm(&imm.memflags)
+ .imm(&imm.atomic_rmw_op)
+ .value()
+ .value()
+ .build(),
+
+ atomic_cas: Builder::new("AtomicCas")
+ .imm(&imm.memflags)
+ .value()
+ .value()
+ .value()
+ .typevar_operand(2)
+ .build(),
+
+ load: Builder::new("Load")
+ .imm(&imm.memflags)
+ .value()
+ .imm(&imm.offset32)
+ .build(),
+
+ load_complex: Builder::new("LoadComplex")
+ .imm(&imm.memflags)
+ .varargs()
+ .imm(&imm.offset32)
+ .build(),
+
+ load_no_offset: Builder::new("LoadNoOffset")
+ .imm(&imm.memflags)
+ .value()
+ .build(),
+
+ store: Builder::new("Store")
+ .imm(&imm.memflags)
+ .value()
+ .value()
+ .imm(&imm.offset32)
+ .build(),
+
+ store_complex: Builder::new("StoreComplex")
+ .imm(&imm.memflags)
+ .value()
+ .varargs()
+ .imm(&imm.offset32)
+ .build(),
+
+ store_no_offset: Builder::new("StoreNoOffset")
+ .imm(&imm.memflags)
+ .value()
+ .value()
+ .build(),
+
+ stack_load: Builder::new("StackLoad")
+ .imm(&entities.stack_slot)
+ .imm(&imm.offset32)
+ .build(),
+
+ stack_store: Builder::new("StackStore")
+ .value()
+ .imm(&entities.stack_slot)
+ .imm(&imm.offset32)
+ .build(),
+
+ // Accessing a WebAssembly heap.
+ heap_addr: Builder::new("HeapAddr")
+ .imm(&entities.heap)
+ .value()
+ .imm(&imm.uimm32)
+ .build(),
+
+ // Accessing a WebAssembly table.
+ table_addr: Builder::new("TableAddr")
+ .imm(&entities.table)
+ .value()
+ .imm(&imm.offset32)
+ .build(),
+
+ reg_move: Builder::new("RegMove")
+ .value()
+ .imm_with_name("src", &imm.regunit)
+ .imm_with_name("dst", &imm.regunit)
+ .build(),
+
+ copy_special: Builder::new("CopySpecial")
+ .imm_with_name("src", &imm.regunit)
+ .imm_with_name("dst", &imm.regunit)
+ .build(),
+
+ copy_to_ssa: Builder::new("CopyToSsa")
+ .imm_with_name("src", &imm.regunit)
+ .build(),
+
+ reg_spill: Builder::new("RegSpill")
+ .value()
+ .imm_with_name("src", &imm.regunit)
+ .imm_with_name("dst", &entities.stack_slot)
+ .build(),
+
+ reg_fill: Builder::new("RegFill")
+ .value()
+ .imm_with_name("src", &entities.stack_slot)
+ .imm_with_name("dst", &imm.regunit)
+ .build(),
+
+ trap: Builder::new("Trap").imm(&imm.trapcode).build(),
+
+ cond_trap: Builder::new("CondTrap").value().imm(&imm.trapcode).build(),
+
+ int_cond_trap: Builder::new("IntCondTrap")
+ .imm(&imm.intcc)
+ .value()
+ .imm(&imm.trapcode)
+ .build(),
+
+ float_cond_trap: Builder::new("FloatCondTrap")
+ .imm(&imm.floatcc)
+ .value()
+ .imm(&imm.trapcode)
+ .build(),
+ }
+ }
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/shared/immediates.rs b/third_party/rust/cranelift-codegen-meta/src/shared/immediates.rs
new file mode 100644
index 0000000000..0aa4129daf
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/shared/immediates.rs
@@ -0,0 +1,175 @@
+use crate::cdsl::operands::{EnumValues, OperandKind, OperandKindFields};
+
+use std::collections::HashMap;
+
+pub(crate) struct Immediates {
+ /// A 64-bit immediate integer operand.
+ ///
+ /// This type of immediate integer can interact with SSA values with any IntType type.
+ pub imm64: OperandKind,
+
+ /// An unsigned 8-bit immediate integer operand.
+ ///
+ /// This small operand is used to indicate lane indexes in SIMD vectors and immediate bit
+ /// counts on shift instructions.
+ pub uimm8: OperandKind,
+
+ /// An unsigned 32-bit immediate integer operand.
+ pub uimm32: OperandKind,
+
+ /// An unsigned 128-bit immediate integer operand.
+ ///
+ /// This operand is used to pass entire 128-bit vectors as immediates to instructions like
+ /// const.
+ pub uimm128: OperandKind,
+
+ /// A constant stored in the constant pool.
+ ///
+ /// This operand is used to pass constants to instructions like vconst while storing the
+ /// actual bytes in the constant pool.
+ pub pool_constant: OperandKind,
+
+ /// A 32-bit immediate signed offset.
+ ///
+ /// This is used to represent an immediate address offset in load/store instructions.
+ pub offset32: OperandKind,
+
+ /// A 32-bit immediate floating point operand.
+ ///
+ /// IEEE 754-2008 binary32 interchange format.
+ pub ieee32: OperandKind,
+
+ /// A 64-bit immediate floating point operand.
+ ///
+ /// IEEE 754-2008 binary64 interchange format.
+ pub ieee64: OperandKind,
+
+ /// An immediate boolean operand.
+ ///
+ /// This type of immediate boolean can interact with SSA values with any BoolType type.
+ pub boolean: OperandKind,
+
+ /// A condition code for comparing integer values.
+ ///
+ /// This enumerated operand kind is used for the `icmp` instruction and corresponds to the
+ /// condcodes::IntCC` Rust type.
+ pub intcc: OperandKind,
+
+ /// A condition code for comparing floating point values.
+ ///
+ /// This enumerated operand kind is used for the `fcmp` instruction and corresponds to the
+ /// `condcodes::FloatCC` Rust type.
+ pub floatcc: OperandKind,
+
+ /// Flags for memory operations like `load` and `store`.
+ pub memflags: OperandKind,
+
+ /// A register unit in the current target ISA.
+ pub regunit: OperandKind,
+
+ /// A trap code indicating the reason for trapping.
+ ///
+ /// The Rust enum type also has a `User(u16)` variant for user-provided trap codes.
+ pub trapcode: OperandKind,
+
+ /// A code indicating the arithmetic operation to perform in an atomic_rmw memory access.
+ pub atomic_rmw_op: OperandKind,
+}
+
+fn new_imm(format_field_name: &'static str, rust_type: &'static str) -> OperandKind {
+ OperandKind::new(format_field_name, rust_type, OperandKindFields::ImmValue)
+}
+fn new_enum(
+ format_field_name: &'static str,
+ rust_type: &'static str,
+ values: EnumValues,
+) -> OperandKind {
+ OperandKind::new(
+ format_field_name,
+ rust_type,
+ OperandKindFields::ImmEnum(values),
+ )
+}
+
+impl Immediates {
+ pub fn new() -> Self {
+ Self {
+ imm64: new_imm("imm", "ir::immediates::Imm64").with_doc("A 64-bit immediate integer."),
+ uimm8: new_imm("imm", "ir::immediates::Uimm8")
+ .with_doc("An 8-bit immediate unsigned integer."),
+ uimm32: new_imm("imm", "ir::immediates::Uimm32")
+ .with_doc("A 32-bit immediate unsigned integer."),
+ uimm128: new_imm("imm", "ir::Immediate")
+ .with_doc("A 128-bit immediate unsigned integer."),
+ pool_constant: new_imm("constant_handle", "ir::Constant")
+ .with_doc("A constant stored in the constant pool."),
+ offset32: new_imm("offset", "ir::immediates::Offset32")
+ .with_doc("A 32-bit immediate signed offset."),
+ ieee32: new_imm("imm", "ir::immediates::Ieee32")
+ .with_doc("A 32-bit immediate floating point number."),
+ ieee64: new_imm("imm", "ir::immediates::Ieee64")
+ .with_doc("A 64-bit immediate floating point number."),
+ boolean: new_imm("imm", "bool").with_doc("An immediate boolean."),
+ intcc: {
+ let mut intcc_values = HashMap::new();
+ intcc_values.insert("eq", "Equal");
+ intcc_values.insert("ne", "NotEqual");
+ intcc_values.insert("sge", "SignedGreaterThanOrEqual");
+ intcc_values.insert("sgt", "SignedGreaterThan");
+ intcc_values.insert("sle", "SignedLessThanOrEqual");
+ intcc_values.insert("slt", "SignedLessThan");
+ intcc_values.insert("uge", "UnsignedGreaterThanOrEqual");
+ intcc_values.insert("ugt", "UnsignedGreaterThan");
+ intcc_values.insert("ule", "UnsignedLessThanOrEqual");
+ intcc_values.insert("ult", "UnsignedLessThan");
+ intcc_values.insert("of", "Overflow");
+ intcc_values.insert("nof", "NotOverflow");
+ new_enum("cond", "ir::condcodes::IntCC", intcc_values)
+ .with_doc("An integer comparison condition code.")
+ },
+
+ floatcc: {
+ let mut floatcc_values = HashMap::new();
+ floatcc_values.insert("ord", "Ordered");
+ floatcc_values.insert("uno", "Unordered");
+ floatcc_values.insert("eq", "Equal");
+ floatcc_values.insert("ne", "NotEqual");
+ floatcc_values.insert("one", "OrderedNotEqual");
+ floatcc_values.insert("ueq", "UnorderedOrEqual");
+ floatcc_values.insert("lt", "LessThan");
+ floatcc_values.insert("le", "LessThanOrEqual");
+ floatcc_values.insert("gt", "GreaterThan");
+ floatcc_values.insert("ge", "GreaterThanOrEqual");
+ floatcc_values.insert("ult", "UnorderedOrLessThan");
+ floatcc_values.insert("ule", "UnorderedOrLessThanOrEqual");
+ floatcc_values.insert("ugt", "UnorderedOrGreaterThan");
+ floatcc_values.insert("uge", "UnorderedOrGreaterThanOrEqual");
+ new_enum("cond", "ir::condcodes::FloatCC", floatcc_values)
+ .with_doc("A floating point comparison condition code")
+ },
+
+ memflags: new_imm("flags", "ir::MemFlags").with_doc("Memory operation flags"),
+ regunit: new_imm("regunit", "isa::RegUnit")
+ .with_doc("A register unit in the target ISA"),
+ trapcode: {
+ let mut trapcode_values = HashMap::new();
+ trapcode_values.insert("stk_ovf", "StackOverflow");
+ trapcode_values.insert("heap_oob", "HeapOutOfBounds");
+ trapcode_values.insert("int_ovf", "IntegerOverflow");
+ trapcode_values.insert("int_divz", "IntegerDivisionByZero");
+ new_enum("code", "ir::TrapCode", trapcode_values).with_doc("A trap reason code.")
+ },
+ atomic_rmw_op: {
+ let mut atomic_rmw_op_values = HashMap::new();
+ atomic_rmw_op_values.insert("add", "Add");
+ atomic_rmw_op_values.insert("sub", "Sub");
+ atomic_rmw_op_values.insert("and", "And");
+ atomic_rmw_op_values.insert("or", "Or");
+ atomic_rmw_op_values.insert("xor", "Xor");
+ atomic_rmw_op_values.insert("xchg", "Xchg");
+ new_enum("op", "ir::AtomicRmwOp", atomic_rmw_op_values)
+ .with_doc("Atomic Read-Modify-Write Ops")
+ },
+ }
+ }
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/shared/instructions.rs b/third_party/rust/cranelift-codegen-meta/src/shared/instructions.rs
new file mode 100644
index 0000000000..bd1444d79c
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/shared/instructions.rs
@@ -0,0 +1,4514 @@
+#![allow(non_snake_case)]
+
+use crate::cdsl::instructions::{
+ AllInstructions, InstructionBuilder as Inst, InstructionGroup, InstructionGroupBuilder,
+};
+use crate::cdsl::operands::Operand;
+use crate::cdsl::type_inference::Constraint::WiderOrEq;
+use crate::cdsl::types::{LaneType, ValueType};
+use crate::cdsl::typevar::{Interval, TypeSetBuilder, TypeVar};
+use crate::shared::formats::Formats;
+use crate::shared::types;
+use crate::shared::{entities::EntityRefs, immediates::Immediates};
+
+#[inline(never)]
+fn define_control_flow(
+ ig: &mut InstructionGroupBuilder,
+ formats: &Formats,
+ imm: &Immediates,
+ entities: &EntityRefs,
+) {
+ let block = &Operand::new("block", &entities.block).with_doc("Destination basic block");
+ let args = &Operand::new("args", &entities.varargs).with_doc("block arguments");
+
+ ig.push(
+ Inst::new(
+ "jump",
+ r#"
+ Jump.
+
+ Unconditionally jump to a basic block, passing the specified
+ block arguments. The number and types of arguments must match the
+ destination block.
+ "#,
+ &formats.jump,
+ )
+ .operands_in(vec![block, args])
+ .is_terminator(true)
+ .is_branch(true),
+ );
+
+ ig.push(
+ Inst::new(
+ "fallthrough",
+ r#"
+ Fall through to the next block.
+
+ This is the same as `jump`, except the destination block must be
+ the next one in the layout.
+
+ Jumps are turned into fall-through instructions by the branch
+ relaxation pass. There is no reason to use this instruction outside
+ that pass.
+ "#,
+ &formats.jump,
+ )
+ .operands_in(vec![block, args])
+ .is_terminator(true)
+ .is_branch(true),
+ );
+
+ let Testable = &TypeVar::new(
+ "Testable",
+ "A scalar boolean or integer type",
+ TypeSetBuilder::new()
+ .ints(Interval::All)
+ .bools(Interval::All)
+ .build(),
+ );
+
+ {
+ let c = &Operand::new("c", Testable).with_doc("Controlling value to test");
+
+ ig.push(
+ Inst::new(
+ "brz",
+ r#"
+ Branch when zero.
+
+ If ``c`` is a `b1` value, take the branch when ``c`` is false. If
+ ``c`` is an integer value, take the branch when ``c = 0``.
+ "#,
+ &formats.branch,
+ )
+ .operands_in(vec![c, block, args])
+ .is_branch(true),
+ );
+
+ ig.push(
+ Inst::new(
+ "brnz",
+ r#"
+ Branch when non-zero.
+
+ If ``c`` is a `b1` value, take the branch when ``c`` is true. If
+ ``c`` is an integer value, take the branch when ``c != 0``.
+ "#,
+ &formats.branch,
+ )
+ .operands_in(vec![c, block, args])
+ .is_branch(true),
+ );
+ }
+
+ let iB = &TypeVar::new(
+ "iB",
+ "A scalar integer type",
+ TypeSetBuilder::new().ints(Interval::All).build(),
+ );
+ let iflags: &TypeVar = &ValueType::Special(types::Flag::IFlags.into()).into();
+ let fflags: &TypeVar = &ValueType::Special(types::Flag::FFlags.into()).into();
+
+ {
+ let Cond = &Operand::new("Cond", &imm.intcc);
+ let x = &Operand::new("x", iB);
+ let y = &Operand::new("y", iB);
+
+ ig.push(
+ Inst::new(
+ "br_icmp",
+ r#"
+ Compare scalar integers and branch.
+
+ Compare ``x`` and ``y`` in the same way as the `icmp` instruction
+ and take the branch if the condition is true:
+
+ ```text
+ br_icmp ugt v1, v2, block4(v5, v6)
+ ```
+
+ is semantically equivalent to:
+
+ ```text
+ v10 = icmp ugt, v1, v2
+ brnz v10, block4(v5, v6)
+ ```
+
+ Some RISC architectures like MIPS and RISC-V provide instructions that
+ implement all or some of the condition codes. The instruction can also
+ be used to represent *macro-op fusion* on architectures like Intel's.
+ "#,
+ &formats.branch_icmp,
+ )
+ .operands_in(vec![Cond, x, y, block, args])
+ .is_branch(true),
+ );
+
+ let f = &Operand::new("f", iflags);
+
+ ig.push(
+ Inst::new(
+ "brif",
+ r#"
+ Branch when condition is true in integer CPU flags.
+ "#,
+ &formats.branch_int,
+ )
+ .operands_in(vec![Cond, f, block, args])
+ .is_branch(true),
+ );
+ }
+
+ {
+ let Cond = &Operand::new("Cond", &imm.floatcc);
+
+ let f = &Operand::new("f", fflags);
+
+ ig.push(
+ Inst::new(
+ "brff",
+ r#"
+ Branch when condition is true in floating point CPU flags.
+ "#,
+ &formats.branch_float,
+ )
+ .operands_in(vec![Cond, f, block, args])
+ .is_branch(true),
+ );
+ }
+
+ {
+ let x = &Operand::new("x", iB).with_doc("index into jump table");
+ let JT = &Operand::new("JT", &entities.jump_table);
+
+ ig.push(
+ Inst::new(
+ "br_table",
+ r#"
+ Indirect branch via jump table.
+
+ Use ``x`` as an unsigned index into the jump table ``JT``. If a jump
+ table entry is found, branch to the corresponding block. If no entry was
+ found or the index is out-of-bounds, branch to the given default block.
+
+ Note that this branch instruction can't pass arguments to the targeted
+ blocks. Split critical edges as needed to work around this.
+
+ Do not confuse this with "tables" in WebAssembly. ``br_table`` is for
+ jump tables with destinations within the current function only -- think
+ of a ``match`` in Rust or a ``switch`` in C. If you want to call a
+ function in a dynamic library, that will typically use
+ ``call_indirect``.
+ "#,
+ &formats.branch_table,
+ )
+ .operands_in(vec![x, block, JT])
+ .is_terminator(true)
+ .is_branch(true),
+ );
+ }
+
+ let iAddr = &TypeVar::new(
+ "iAddr",
+ "An integer address type",
+ TypeSetBuilder::new().ints(32..64).refs(32..64).build(),
+ );
+
+ {
+ let x = &Operand::new("x", iAddr).with_doc("index into jump table");
+ let addr = &Operand::new("addr", iAddr);
+ let Size = &Operand::new("Size", &imm.uimm8).with_doc("Size in bytes");
+ let JT = &Operand::new("JT", &entities.jump_table);
+ let entry = &Operand::new("entry", iAddr).with_doc("entry of jump table");
+
+ ig.push(
+ Inst::new(
+ "jump_table_entry",
+ r#"
+ Get an entry from a jump table.
+
+ Load a serialized ``entry`` from a jump table ``JT`` at a given index
+ ``addr`` with a specific ``Size``. The retrieved entry may need to be
+ decoded after loading, depending upon the jump table type used.
+
+ Currently, the only type supported is entries which are relative to the
+ base of the jump table.
+ "#,
+ &formats.branch_table_entry,
+ )
+ .operands_in(vec![x, addr, Size, JT])
+ .operands_out(vec![entry])
+ .can_load(true),
+ );
+
+ ig.push(
+ Inst::new(
+ "jump_table_base",
+ r#"
+ Get the absolute base address of a jump table.
+
+ This is used for jump tables wherein the entries are stored relative to
+ the base of jump table. In order to use these, generated code should first
+ load an entry using ``jump_table_entry``, then use this instruction to add
+ the relative base back to it.
+ "#,
+ &formats.branch_table_base,
+ )
+ .operands_in(vec![JT])
+ .operands_out(vec![addr]),
+ );
+
+ ig.push(
+ Inst::new(
+ "indirect_jump_table_br",
+ r#"
+ Branch indirectly via a jump table entry.
+
+ Unconditionally jump via a jump table entry that was previously loaded
+ with the ``jump_table_entry`` instruction.
+ "#,
+ &formats.indirect_jump,
+ )
+ .operands_in(vec![addr, JT])
+ .is_indirect_branch(true)
+ .is_terminator(true)
+ .is_branch(true),
+ );
+ }
+
+ ig.push(
+ Inst::new(
+ "debugtrap",
+ r#"
+ Encodes an assembly debug trap.
+ "#,
+ &formats.nullary,
+ )
+ .other_side_effects(true)
+ .can_load(true)
+ .can_store(true),
+ );
+
+ {
+ let code = &Operand::new("code", &imm.trapcode);
+ ig.push(
+ Inst::new(
+ "trap",
+ r#"
+ Terminate execution unconditionally.
+ "#,
+ &formats.trap,
+ )
+ .operands_in(vec![code])
+ .can_trap(true)
+ .is_terminator(true),
+ );
+
+ let c = &Operand::new("c", Testable).with_doc("Controlling value to test");
+ ig.push(
+ Inst::new(
+ "trapz",
+ r#"
+ Trap when zero.
+
+ if ``c`` is non-zero, execution continues at the following instruction.
+ "#,
+ &formats.cond_trap,
+ )
+ .operands_in(vec![c, code])
+ .can_trap(true),
+ );
+
+ ig.push(
+ Inst::new(
+ "resumable_trap",
+ r#"
+ A resumable trap.
+
+ This instruction allows non-conditional traps to be used as non-terminal instructions.
+ "#,
+ &formats.trap,
+ )
+ .operands_in(vec![code])
+ .can_trap(true),
+ );
+
+ let c = &Operand::new("c", Testable).with_doc("Controlling value to test");
+ ig.push(
+ Inst::new(
+ "trapnz",
+ r#"
+ Trap when non-zero.
+
+ If ``c`` is zero, execution continues at the following instruction.
+ "#,
+ &formats.cond_trap,
+ )
+ .operands_in(vec![c, code])
+ .can_trap(true),
+ );
+
+ ig.push(
+ Inst::new(
+ "resumable_trapnz",
+ r#"
+ A resumable trap to be called when the passed condition is non-zero.
+
+ If ``c`` is zero, execution continues at the following instruction.
+ "#,
+ &formats.cond_trap,
+ )
+ .operands_in(vec![c, code])
+ .can_trap(true),
+ );
+
+ let Cond = &Operand::new("Cond", &imm.intcc);
+ let f = &Operand::new("f", iflags);
+ ig.push(
+ Inst::new(
+ "trapif",
+ r#"
+ Trap when condition is true in integer CPU flags.
+ "#,
+ &formats.int_cond_trap,
+ )
+ .operands_in(vec![Cond, f, code])
+ .can_trap(true),
+ );
+
+ let Cond = &Operand::new("Cond", &imm.floatcc);
+ let f = &Operand::new("f", fflags);
+ let code = &Operand::new("code", &imm.trapcode);
+ ig.push(
+ Inst::new(
+ "trapff",
+ r#"
+ Trap when condition is true in floating point CPU flags.
+ "#,
+ &formats.float_cond_trap,
+ )
+ .operands_in(vec![Cond, f, code])
+ .can_trap(true),
+ );
+ }
+
+ let rvals = &Operand::new("rvals", &entities.varargs).with_doc("return values");
+ ig.push(
+ Inst::new(
+ "return",
+ r#"
+ Return from the function.
+
+ Unconditionally transfer control to the calling function, passing the
+ provided return values. The list of return values must match the
+ function signature's return types.
+ "#,
+ &formats.multiary,
+ )
+ .operands_in(vec![rvals])
+ .is_return(true)
+ .is_terminator(true),
+ );
+
+ let rvals = &Operand::new("rvals", &entities.varargs).with_doc("return values");
+ ig.push(
+ Inst::new(
+ "fallthrough_return",
+ r#"
+ Return from the function by fallthrough.
+
+ This is a specialized instruction for use where one wants to append
+ a custom epilogue, which will then perform the real return. This
+ instruction has no encoding.
+ "#,
+ &formats.multiary,
+ )
+ .operands_in(vec![rvals])
+ .is_return(true)
+ .is_terminator(true),
+ );
+
+ let FN = &Operand::new("FN", &entities.func_ref)
+ .with_doc("function to call, declared by `function`");
+ let args = &Operand::new("args", &entities.varargs).with_doc("call arguments");
+ let rvals = &Operand::new("rvals", &entities.varargs).with_doc("return values");
+ ig.push(
+ Inst::new(
+ "call",
+ r#"
+ Direct function call.
+
+ Call a function which has been declared in the preamble. The argument
+ types must match the function's signature.
+ "#,
+ &formats.call,
+ )
+ .operands_in(vec![FN, args])
+ .operands_out(vec![rvals])
+ .is_call(true),
+ );
+
+ let SIG = &Operand::new("SIG", &entities.sig_ref).with_doc("function signature");
+ let callee = &Operand::new("callee", iAddr).with_doc("address of function to call");
+ let args = &Operand::new("args", &entities.varargs).with_doc("call arguments");
+ let rvals = &Operand::new("rvals", &entities.varargs).with_doc("return values");
+ ig.push(
+ Inst::new(
+ "call_indirect",
+ r#"
+ Indirect function call.
+
+ Call the function pointed to by `callee` with the given arguments. The
+ called function must match the specified signature.
+
+ Note that this is different from WebAssembly's ``call_indirect``; the
+ callee is a native address, rather than a table index. For WebAssembly,
+ `table_addr` and `load` are used to obtain a native address
+ from a table.
+ "#,
+ &formats.call_indirect,
+ )
+ .operands_in(vec![SIG, callee, args])
+ .operands_out(vec![rvals])
+ .is_call(true),
+ );
+
+ let FN = &Operand::new("FN", &entities.func_ref)
+ .with_doc("function to call, declared by `function`");
+ let addr = &Operand::new("addr", iAddr);
+ ig.push(
+ Inst::new(
+ "func_addr",
+ r#"
+ Get the address of a function.
+
+ Compute the absolute address of a function declared in the preamble.
+ The returned address can be used as a ``callee`` argument to
+ `call_indirect`. This is also a method for calling functions that
+ are too far away to be addressable by a direct `call`
+ instruction.
+ "#,
+ &formats.func_addr,
+ )
+ .operands_in(vec![FN])
+ .operands_out(vec![addr]),
+ );
+}
+
+#[inline(never)]
+fn define_simd_lane_access(
+ ig: &mut InstructionGroupBuilder,
+ formats: &Formats,
+ imm: &Immediates,
+ _: &EntityRefs,
+) {
+ let TxN = &TypeVar::new(
+ "TxN",
+ "A SIMD vector type",
+ TypeSetBuilder::new()
+ .ints(Interval::All)
+ .floats(Interval::All)
+ .bools(Interval::All)
+ .simd_lanes(Interval::All)
+ .includes_scalars(false)
+ .build(),
+ );
+
+ let x = &Operand::new("x", &TxN.lane_of()).with_doc("Value to splat to all lanes");
+ let a = &Operand::new("a", TxN);
+
+ ig.push(
+ Inst::new(
+ "splat",
+ r#"
+ Vector splat.
+
+ Return a vector whose lanes are all ``x``.
+ "#,
+ &formats.unary,
+ )
+ .operands_in(vec![x])
+ .operands_out(vec![a]),
+ );
+
+ let I8x16 = &TypeVar::new(
+ "I8x16",
+ "A SIMD vector type consisting of 16 lanes of 8-bit integers",
+ TypeSetBuilder::new()
+ .ints(8..8)
+ .simd_lanes(16..16)
+ .includes_scalars(false)
+ .build(),
+ );
+ let x = &Operand::new("x", I8x16).with_doc("Vector to modify by re-arranging lanes");
+ let y = &Operand::new("y", I8x16).with_doc("Mask for re-arranging lanes");
+
+ ig.push(
+ Inst::new(
+ "swizzle",
+ r#"
+ Vector swizzle.
+
+ Returns a new vector with byte-width lanes selected from the lanes of the first input
+ vector ``x`` specified in the second input vector ``s``. The indices ``i`` in range
+ ``[0, 15]`` select the ``i``-th element of ``x``. For indices outside of the range the
+ resulting lane is 0. Note that this operates on byte-width lanes.
+ "#,
+ &formats.binary,
+ )
+ .operands_in(vec![x, y])
+ .operands_out(vec![a]),
+ );
+
+ let x = &Operand::new("x", TxN).with_doc("The vector to modify");
+ let y = &Operand::new("y", &TxN.lane_of()).with_doc("New lane value");
+ let Idx = &Operand::new("Idx", &imm.uimm8).with_doc("Lane index");
+
+ ig.push(
+ Inst::new(
+ "insertlane",
+ r#"
+ Insert ``y`` as lane ``Idx`` in x.
+
+ The lane index, ``Idx``, is an immediate value, not an SSA value. It
+ must indicate a valid lane index for the type of ``x``.
+ "#,
+ &formats.ternary_imm8,
+ )
+ .operands_in(vec![x, y, Idx])
+ .operands_out(vec![a]),
+ );
+
+ let x = &Operand::new("x", TxN);
+ let a = &Operand::new("a", &TxN.lane_of());
+
+ ig.push(
+ Inst::new(
+ "extractlane",
+ r#"
+ Extract lane ``Idx`` from ``x``.
+
+ The lane index, ``Idx``, is an immediate value, not an SSA value. It
+ must indicate a valid lane index for the type of ``x``. Note that the upper bits of ``a``
+ may or may not be zeroed depending on the ISA but the type system should prevent using
+ ``a`` as anything other than the extracted value.
+ "#,
+ &formats.binary_imm8,
+ )
+ .operands_in(vec![x, Idx])
+ .operands_out(vec![a]),
+ );
+}
+
+#[inline(never)]
+fn define_simd_arithmetic(
+ ig: &mut InstructionGroupBuilder,
+ formats: &Formats,
+ _: &Immediates,
+ _: &EntityRefs,
+) {
+ let Int = &TypeVar::new(
+ "Int",
+ "A scalar or vector integer type",
+ TypeSetBuilder::new()
+ .ints(Interval::All)
+ .simd_lanes(Interval::All)
+ .build(),
+ );
+
+ let a = &Operand::new("a", Int);
+ let x = &Operand::new("x", Int);
+ let y = &Operand::new("y", Int);
+
+ ig.push(
+ Inst::new(
+ "imin",
+ r#"
+ Signed integer minimum.
+ "#,
+ &formats.binary,
+ )
+ .operands_in(vec![x, y])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "umin",
+ r#"
+ Unsigned integer minimum.
+ "#,
+ &formats.binary,
+ )
+ .operands_in(vec![x, y])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "imax",
+ r#"
+ Signed integer maximum.
+ "#,
+ &formats.binary,
+ )
+ .operands_in(vec![x, y])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "umax",
+ r#"
+ Unsigned integer maximum.
+ "#,
+ &formats.binary,
+ )
+ .operands_in(vec![x, y])
+ .operands_out(vec![a]),
+ );
+
+ let IxN = &TypeVar::new(
+ "IxN",
+ "A SIMD vector type containing integers",
+ TypeSetBuilder::new()
+ .ints(Interval::All)
+ .simd_lanes(Interval::All)
+ .includes_scalars(false)
+ .build(),
+ );
+
+ let a = &Operand::new("a", IxN);
+ let x = &Operand::new("x", IxN);
+ let y = &Operand::new("y", IxN);
+
+ ig.push(
+ Inst::new(
+ "avg_round",
+ r#"
+ Unsigned average with rounding: `a := (x + y + 1) // 2`
+ "#,
+ &formats.binary,
+ )
+ .operands_in(vec![x, y])
+ .operands_out(vec![a]),
+ );
+}
+
+#[allow(clippy::many_single_char_names)]
+pub(crate) fn define(
+ all_instructions: &mut AllInstructions,
+ formats: &Formats,
+ imm: &Immediates,
+ entities: &EntityRefs,
+) -> InstructionGroup {
+ let mut ig = InstructionGroupBuilder::new(all_instructions);
+
+ define_control_flow(&mut ig, formats, imm, entities);
+ define_simd_lane_access(&mut ig, formats, imm, entities);
+ define_simd_arithmetic(&mut ig, formats, imm, entities);
+
+ // Operand kind shorthands.
+ let iflags: &TypeVar = &ValueType::Special(types::Flag::IFlags.into()).into();
+ let fflags: &TypeVar = &ValueType::Special(types::Flag::FFlags.into()).into();
+
+ let b1: &TypeVar = &ValueType::from(LaneType::from(types::Bool::B1)).into();
+ let f32_: &TypeVar = &ValueType::from(LaneType::from(types::Float::F32)).into();
+ let f64_: &TypeVar = &ValueType::from(LaneType::from(types::Float::F64)).into();
+
+ // Starting definitions.
+ let Int = &TypeVar::new(
+ "Int",
+ "A scalar or vector integer type",
+ TypeSetBuilder::new()
+ .ints(Interval::All)
+ .simd_lanes(Interval::All)
+ .build(),
+ );
+
+ let Bool = &TypeVar::new(
+ "Bool",
+ "A scalar or vector boolean type",
+ TypeSetBuilder::new()
+ .bools(Interval::All)
+ .simd_lanes(Interval::All)
+ .build(),
+ );
+
+ let iB = &TypeVar::new(
+ "iB",
+ "A scalar integer type",
+ TypeSetBuilder::new().ints(Interval::All).build(),
+ );
+
+ let iAddr = &TypeVar::new(
+ "iAddr",
+ "An integer address type",
+ TypeSetBuilder::new().ints(32..64).refs(32..64).build(),
+ );
+
+ let Ref = &TypeVar::new(
+ "Ref",
+ "A scalar reference type",
+ TypeSetBuilder::new().refs(Interval::All).build(),
+ );
+
+ let Testable = &TypeVar::new(
+ "Testable",
+ "A scalar boolean or integer type",
+ TypeSetBuilder::new()
+ .ints(Interval::All)
+ .bools(Interval::All)
+ .build(),
+ );
+
+ let TxN = &TypeVar::new(
+ "TxN",
+ "A SIMD vector type",
+ TypeSetBuilder::new()
+ .ints(Interval::All)
+ .floats(Interval::All)
+ .bools(Interval::All)
+ .simd_lanes(Interval::All)
+ .includes_scalars(false)
+ .build(),
+ );
+ let Any = &TypeVar::new(
+ "Any",
+ "Any integer, float, boolean, or reference scalar or vector type",
+ TypeSetBuilder::new()
+ .ints(Interval::All)
+ .floats(Interval::All)
+ .bools(Interval::All)
+ .refs(Interval::All)
+ .simd_lanes(Interval::All)
+ .includes_scalars(true)
+ .build(),
+ );
+
+ let AnyTo = &TypeVar::copy_from(Any, "AnyTo".to_string());
+
+ let Mem = &TypeVar::new(
+ "Mem",
+ "Any type that can be stored in memory",
+ TypeSetBuilder::new()
+ .ints(Interval::All)
+ .floats(Interval::All)
+ .simd_lanes(Interval::All)
+ .refs(Interval::All)
+ .build(),
+ );
+
+ let MemTo = &TypeVar::copy_from(Mem, "MemTo".to_string());
+
+ let addr = &Operand::new("addr", iAddr);
+
+ let SS = &Operand::new("SS", &entities.stack_slot);
+ let Offset = &Operand::new("Offset", &imm.offset32).with_doc("Byte offset from base address");
+ let x = &Operand::new("x", Mem).with_doc("Value to be stored");
+ let a = &Operand::new("a", Mem).with_doc("Value loaded");
+ let p = &Operand::new("p", iAddr);
+ let MemFlags = &Operand::new("MemFlags", &imm.memflags);
+ let args = &Operand::new("args", &entities.varargs).with_doc("Address arguments");
+
+ ig.push(
+ Inst::new(
+ "load",
+ r#"
+ Load from memory at ``p + Offset``.
+
+ This is a polymorphic instruction that can load any value type which
+ has a memory representation.
+ "#,
+ &formats.load,
+ )
+ .operands_in(vec![MemFlags, p, Offset])
+ .operands_out(vec![a])
+ .can_load(true),
+ );
+
+ ig.push(
+ Inst::new(
+ "load_complex",
+ r#"
+ Load from memory at ``sum(args) + Offset``.
+
+ This is a polymorphic instruction that can load any value type which
+ has a memory representation.
+ "#,
+ &formats.load_complex,
+ )
+ .operands_in(vec![MemFlags, args, Offset])
+ .operands_out(vec![a])
+ .can_load(true),
+ );
+
+ ig.push(
+ Inst::new(
+ "store",
+ r#"
+ Store ``x`` to memory at ``p + Offset``.
+
+ This is a polymorphic instruction that can store any value type with a
+ memory representation.
+ "#,
+ &formats.store,
+ )
+ .operands_in(vec![MemFlags, x, p, Offset])
+ .can_store(true),
+ );
+
+ ig.push(
+ Inst::new(
+ "store_complex",
+ r#"
+ Store ``x`` to memory at ``sum(args) + Offset``.
+
+ This is a polymorphic instruction that can store any value type with a
+ memory representation.
+ "#,
+ &formats.store_complex,
+ )
+ .operands_in(vec![MemFlags, x, args, Offset])
+ .can_store(true),
+ );
+
+ let iExt8 = &TypeVar::new(
+ "iExt8",
+ "An integer type with more than 8 bits",
+ TypeSetBuilder::new().ints(16..64).build(),
+ );
+ let x = &Operand::new("x", iExt8);
+ let a = &Operand::new("a", iExt8);
+
+ ig.push(
+ Inst::new(
+ "uload8",
+ r#"
+ Load 8 bits from memory at ``p + Offset`` and zero-extend.
+
+ This is equivalent to ``load.i8`` followed by ``uextend``.
+ "#,
+ &formats.load,
+ )
+ .operands_in(vec![MemFlags, p, Offset])
+ .operands_out(vec![a])
+ .can_load(true),
+ );
+
+ ig.push(
+ Inst::new(
+ "uload8_complex",
+ r#"
+ Load 8 bits from memory at ``sum(args) + Offset`` and zero-extend.
+
+ This is equivalent to ``load.i8`` followed by ``uextend``.
+ "#,
+ &formats.load_complex,
+ )
+ .operands_in(vec![MemFlags, args, Offset])
+ .operands_out(vec![a])
+ .can_load(true),
+ );
+
+ ig.push(
+ Inst::new(
+ "sload8",
+ r#"
+ Load 8 bits from memory at ``p + Offset`` and sign-extend.
+
+ This is equivalent to ``load.i8`` followed by ``sextend``.
+ "#,
+ &formats.load,
+ )
+ .operands_in(vec![MemFlags, p, Offset])
+ .operands_out(vec![a])
+ .can_load(true),
+ );
+
+ ig.push(
+ Inst::new(
+ "sload8_complex",
+ r#"
+ Load 8 bits from memory at ``sum(args) + Offset`` and sign-extend.
+
+ This is equivalent to ``load.i8`` followed by ``sextend``.
+ "#,
+ &formats.load_complex,
+ )
+ .operands_in(vec![MemFlags, args, Offset])
+ .operands_out(vec![a])
+ .can_load(true),
+ );
+
+ ig.push(
+ Inst::new(
+ "istore8",
+ r#"
+ Store the low 8 bits of ``x`` to memory at ``p + Offset``.
+
+ This is equivalent to ``ireduce.i8`` followed by ``store.i8``.
+ "#,
+ &formats.store,
+ )
+ .operands_in(vec![MemFlags, x, p, Offset])
+ .can_store(true),
+ );
+
+ ig.push(
+ Inst::new(
+ "istore8_complex",
+ r#"
+ Store the low 8 bits of ``x`` to memory at ``sum(args) + Offset``.
+
+ This is equivalent to ``ireduce.i8`` followed by ``store.i8``.
+ "#,
+ &formats.store_complex,
+ )
+ .operands_in(vec![MemFlags, x, args, Offset])
+ .can_store(true),
+ );
+
+ let iExt16 = &TypeVar::new(
+ "iExt16",
+ "An integer type with more than 16 bits",
+ TypeSetBuilder::new().ints(32..64).build(),
+ );
+ let x = &Operand::new("x", iExt16);
+ let a = &Operand::new("a", iExt16);
+
+ ig.push(
+ Inst::new(
+ "uload16",
+ r#"
+ Load 16 bits from memory at ``p + Offset`` and zero-extend.
+
+ This is equivalent to ``load.i16`` followed by ``uextend``.
+ "#,
+ &formats.load,
+ )
+ .operands_in(vec![MemFlags, p, Offset])
+ .operands_out(vec![a])
+ .can_load(true),
+ );
+
+ ig.push(
+ Inst::new(
+ "uload16_complex",
+ r#"
+ Load 16 bits from memory at ``sum(args) + Offset`` and zero-extend.
+
+ This is equivalent to ``load.i16`` followed by ``uextend``.
+ "#,
+ &formats.load_complex,
+ )
+ .operands_in(vec![MemFlags, args, Offset])
+ .operands_out(vec![a])
+ .can_load(true),
+ );
+
+ ig.push(
+ Inst::new(
+ "sload16",
+ r#"
+ Load 16 bits from memory at ``p + Offset`` and sign-extend.
+
+ This is equivalent to ``load.i16`` followed by ``sextend``.
+ "#,
+ &formats.load,
+ )
+ .operands_in(vec![MemFlags, p, Offset])
+ .operands_out(vec![a])
+ .can_load(true),
+ );
+
+ ig.push(
+ Inst::new(
+ "sload16_complex",
+ r#"
+ Load 16 bits from memory at ``sum(args) + Offset`` and sign-extend.
+
+ This is equivalent to ``load.i16`` followed by ``sextend``.
+ "#,
+ &formats.load_complex,
+ )
+ .operands_in(vec![MemFlags, args, Offset])
+ .operands_out(vec![a])
+ .can_load(true),
+ );
+
+ ig.push(
+ Inst::new(
+ "istore16",
+ r#"
+ Store the low 16 bits of ``x`` to memory at ``p + Offset``.
+
+ This is equivalent to ``ireduce.i16`` followed by ``store.i16``.
+ "#,
+ &formats.store,
+ )
+ .operands_in(vec![MemFlags, x, p, Offset])
+ .can_store(true),
+ );
+
+ ig.push(
+ Inst::new(
+ "istore16_complex",
+ r#"
+ Store the low 16 bits of ``x`` to memory at ``sum(args) + Offset``.
+
+ This is equivalent to ``ireduce.i16`` followed by ``store.i16``.
+ "#,
+ &formats.store_complex,
+ )
+ .operands_in(vec![MemFlags, x, args, Offset])
+ .can_store(true),
+ );
+
+ let iExt32 = &TypeVar::new(
+ "iExt32",
+ "An integer type with more than 32 bits",
+ TypeSetBuilder::new().ints(64..64).build(),
+ );
+ let x = &Operand::new("x", iExt32);
+ let a = &Operand::new("a", iExt32);
+
+ ig.push(
+ Inst::new(
+ "uload32",
+ r#"
+ Load 32 bits from memory at ``p + Offset`` and zero-extend.
+
+ This is equivalent to ``load.i32`` followed by ``uextend``.
+ "#,
+ &formats.load,
+ )
+ .operands_in(vec![MemFlags, p, Offset])
+ .operands_out(vec![a])
+ .can_load(true),
+ );
+
+ ig.push(
+ Inst::new(
+ "uload32_complex",
+ r#"
+ Load 32 bits from memory at ``sum(args) + Offset`` and zero-extend.
+
+ This is equivalent to ``load.i32`` followed by ``uextend``.
+ "#,
+ &formats.load_complex,
+ )
+ .operands_in(vec![MemFlags, args, Offset])
+ .operands_out(vec![a])
+ .can_load(true),
+ );
+
+ ig.push(
+ Inst::new(
+ "sload32",
+ r#"
+ Load 32 bits from memory at ``p + Offset`` and sign-extend.
+
+ This is equivalent to ``load.i32`` followed by ``sextend``.
+ "#,
+ &formats.load,
+ )
+ .operands_in(vec![MemFlags, p, Offset])
+ .operands_out(vec![a])
+ .can_load(true),
+ );
+
+ ig.push(
+ Inst::new(
+ "sload32_complex",
+ r#"
+ Load 32 bits from memory at ``sum(args) + Offset`` and sign-extend.
+
+ This is equivalent to ``load.i32`` followed by ``sextend``.
+ "#,
+ &formats.load_complex,
+ )
+ .operands_in(vec![MemFlags, args, Offset])
+ .operands_out(vec![a])
+ .can_load(true),
+ );
+
+ ig.push(
+ Inst::new(
+ "istore32",
+ r#"
+ Store the low 32 bits of ``x`` to memory at ``p + Offset``.
+
+ This is equivalent to ``ireduce.i32`` followed by ``store.i32``.
+ "#,
+ &formats.store,
+ )
+ .operands_in(vec![MemFlags, x, p, Offset])
+ .can_store(true),
+ );
+
+ ig.push(
+ Inst::new(
+ "istore32_complex",
+ r#"
+ Store the low 32 bits of ``x`` to memory at ``sum(args) + Offset``.
+
+ This is equivalent to ``ireduce.i32`` followed by ``store.i32``.
+ "#,
+ &formats.store_complex,
+ )
+ .operands_in(vec![MemFlags, x, args, Offset])
+ .can_store(true),
+ );
+
+ let I16x8 = &TypeVar::new(
+ "I16x8",
+ "A SIMD vector with exactly 8 lanes of 16-bit values",
+ TypeSetBuilder::new()
+ .ints(16..16)
+ .simd_lanes(8..8)
+ .includes_scalars(false)
+ .build(),
+ );
+ let a = &Operand::new("a", I16x8).with_doc("Value loaded");
+
+ ig.push(
+ Inst::new(
+ "uload8x8",
+ r#"
+ Load an 8x8 vector (64 bits) from memory at ``p + Offset`` and zero-extend into an i16x8
+ vector.
+ "#,
+ &formats.load,
+ )
+ .operands_in(vec![MemFlags, p, Offset])
+ .operands_out(vec![a])
+ .can_load(true),
+ );
+
+ ig.push(
+ Inst::new(
+ "uload8x8_complex",
+ r#"
+ Load an 8x8 vector (64 bits) from memory at ``sum(args) + Offset`` and zero-extend into an
+ i16x8 vector.
+ "#,
+ &formats.load_complex,
+ )
+ .operands_in(vec![MemFlags, args, Offset])
+ .operands_out(vec![a])
+ .can_load(true),
+ );
+
+ ig.push(
+ Inst::new(
+ "sload8x8",
+ r#"
+ Load an 8x8 vector (64 bits) from memory at ``p + Offset`` and sign-extend into an i16x8
+ vector.
+ "#,
+ &formats.load,
+ )
+ .operands_in(vec![MemFlags, p, Offset])
+ .operands_out(vec![a])
+ .can_load(true),
+ );
+
+ ig.push(
+ Inst::new(
+ "sload8x8_complex",
+ r#"
+ Load an 8x8 vector (64 bits) from memory at ``sum(args) + Offset`` and sign-extend into an
+ i16x8 vector.
+ "#,
+ &formats.load_complex,
+ )
+ .operands_in(vec![MemFlags, args, Offset])
+ .operands_out(vec![a])
+ .can_load(true),
+ );
+
+ let I32x4 = &TypeVar::new(
+ "I32x4",
+ "A SIMD vector with exactly 4 lanes of 32-bit values",
+ TypeSetBuilder::new()
+ .ints(32..32)
+ .simd_lanes(4..4)
+ .includes_scalars(false)
+ .build(),
+ );
+ let a = &Operand::new("a", I32x4).with_doc("Value loaded");
+
+ ig.push(
+ Inst::new(
+ "uload16x4",
+ r#"
+ Load a 16x4 vector (64 bits) from memory at ``p + Offset`` and zero-extend into an i32x4
+ vector.
+ "#,
+ &formats.load,
+ )
+ .operands_in(vec![MemFlags, p, Offset])
+ .operands_out(vec![a])
+ .can_load(true),
+ );
+
+ ig.push(
+ Inst::new(
+ "uload16x4_complex",
+ r#"
+ Load a 16x4 vector (64 bits) from memory at ``sum(args) + Offset`` and zero-extend into an
+ i32x4 vector.
+ "#,
+ &formats.load_complex,
+ )
+ .operands_in(vec![MemFlags, args, Offset])
+ .operands_out(vec![a])
+ .can_load(true),
+ );
+
+ ig.push(
+ Inst::new(
+ "sload16x4",
+ r#"
+ Load a 16x4 vector (64 bits) from memory at ``p + Offset`` and sign-extend into an i32x4
+ vector.
+ "#,
+ &formats.load,
+ )
+ .operands_in(vec![MemFlags, p, Offset])
+ .operands_out(vec![a])
+ .can_load(true),
+ );
+
+ ig.push(
+ Inst::new(
+ "sload16x4_complex",
+ r#"
+ Load a 16x4 vector (64 bits) from memory at ``sum(args) + Offset`` and sign-extend into an
+ i32x4 vector.
+ "#,
+ &formats.load_complex,
+ )
+ .operands_in(vec![MemFlags, args, Offset])
+ .operands_out(vec![a])
+ .can_load(true),
+ );
+
+ let I64x2 = &TypeVar::new(
+ "I64x2",
+ "A SIMD vector with exactly 2 lanes of 64-bit values",
+ TypeSetBuilder::new()
+ .ints(64..64)
+ .simd_lanes(2..2)
+ .includes_scalars(false)
+ .build(),
+ );
+ let a = &Operand::new("a", I64x2).with_doc("Value loaded");
+
+ ig.push(
+ Inst::new(
+ "uload32x2",
+ r#"
+ Load an 32x2 vector (64 bits) from memory at ``p + Offset`` and zero-extend into an i64x2
+ vector.
+ "#,
+ &formats.load,
+ )
+ .operands_in(vec![MemFlags, p, Offset])
+ .operands_out(vec![a])
+ .can_load(true),
+ );
+
+ ig.push(
+ Inst::new(
+ "uload32x2_complex",
+ r#"
+ Load a 32x2 vector (64 bits) from memory at ``sum(args) + Offset`` and zero-extend into an
+ i64x2 vector.
+ "#,
+ &formats.load_complex,
+ )
+ .operands_in(vec![MemFlags, args, Offset])
+ .operands_out(vec![a])
+ .can_load(true),
+ );
+
+ ig.push(
+ Inst::new(
+ "sload32x2",
+ r#"
+ Load a 32x2 vector (64 bits) from memory at ``p + Offset`` and sign-extend into an i64x2
+ vector.
+ "#,
+ &formats.load,
+ )
+ .operands_in(vec![MemFlags, p, Offset])
+ .operands_out(vec![a])
+ .can_load(true),
+ );
+
+ ig.push(
+ Inst::new(
+ "sload32x2_complex",
+ r#"
+ Load a 32x2 vector (64 bits) from memory at ``sum(args) + Offset`` and sign-extend into an
+ i64x2 vector.
+ "#,
+ &formats.load_complex,
+ )
+ .operands_in(vec![MemFlags, args, Offset])
+ .operands_out(vec![a])
+ .can_load(true),
+ );
+
+ let x = &Operand::new("x", Mem).with_doc("Value to be stored");
+ let a = &Operand::new("a", Mem).with_doc("Value loaded");
+ let Offset =
+ &Operand::new("Offset", &imm.offset32).with_doc("In-bounds offset into stack slot");
+
+ ig.push(
+ Inst::new(
+ "stack_load",
+ r#"
+ Load a value from a stack slot at the constant offset.
+
+ This is a polymorphic instruction that can load any value type which
+ has a memory representation.
+
+ The offset is an immediate constant, not an SSA value. The memory
+ access cannot go out of bounds, i.e.
+ `sizeof(a) + Offset <= sizeof(SS)`.
+ "#,
+ &formats.stack_load,
+ )
+ .operands_in(vec![SS, Offset])
+ .operands_out(vec![a])
+ .can_load(true),
+ );
+
+ ig.push(
+ Inst::new(
+ "stack_store",
+ r#"
+ Store a value to a stack slot at a constant offset.
+
+ This is a polymorphic instruction that can store any value type with a
+ memory representation.
+
+ The offset is an immediate constant, not an SSA value. The memory
+ access cannot go out of bounds, i.e.
+ `sizeof(a) + Offset <= sizeof(SS)`.
+ "#,
+ &formats.stack_store,
+ )
+ .operands_in(vec![x, SS, Offset])
+ .can_store(true),
+ );
+
+ ig.push(
+ Inst::new(
+ "stack_addr",
+ r#"
+ Get the address of a stack slot.
+
+ Compute the absolute address of a byte in a stack slot. The offset must
+ refer to a byte inside the stack slot:
+ `0 <= Offset < sizeof(SS)`.
+ "#,
+ &formats.stack_load,
+ )
+ .operands_in(vec![SS, Offset])
+ .operands_out(vec![addr]),
+ );
+
+ let GV = &Operand::new("GV", &entities.global_value);
+
+ ig.push(
+ Inst::new(
+ "global_value",
+ r#"
+ Compute the value of global GV.
+ "#,
+ &formats.unary_global_value,
+ )
+ .operands_in(vec![GV])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "symbol_value",
+ r#"
+ Compute the value of global GV, which is a symbolic value.
+ "#,
+ &formats.unary_global_value,
+ )
+ .operands_in(vec![GV])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "tls_value",
+ r#"
+ Compute the value of global GV, which is a TLS (thread local storage) value.
+ "#,
+ &formats.unary_global_value,
+ )
+ .operands_in(vec![GV])
+ .operands_out(vec![a]),
+ );
+
+ let HeapOffset = &TypeVar::new(
+ "HeapOffset",
+ "An unsigned heap offset",
+ TypeSetBuilder::new().ints(32..64).build(),
+ );
+
+ let H = &Operand::new("H", &entities.heap);
+ let p = &Operand::new("p", HeapOffset);
+ let Size = &Operand::new("Size", &imm.uimm32).with_doc("Size in bytes");
+
+ ig.push(
+ Inst::new(
+ "heap_addr",
+ r#"
+ Bounds check and compute absolute address of heap memory.
+
+ Verify that the offset range ``p .. p + Size - 1`` is in bounds for the
+ heap H, and generate an absolute address that is safe to dereference.
+
+ 1. If ``p + Size`` is not greater than the heap bound, return an
+ absolute address corresponding to a byte offset of ``p`` from the
+ heap's base address.
+ 2. If ``p + Size`` is greater than the heap bound, generate a trap.
+ "#,
+ &formats.heap_addr,
+ )
+ .operands_in(vec![H, p, Size])
+ .operands_out(vec![addr]),
+ );
+
+ // Note this instruction is marked as having other side-effects, so GVN won't try to hoist it,
+ // which would result in it being subject to spilling. While not hoisting would generally hurt
+ // performance, since a computed value used many times may need to be regenerated before each
+ // use, it is not the case here: this instruction doesn't generate any code. That's because,
+ // by definition the pinned register is never used by the register allocator, but is written to
+ // and read explicitly and exclusively by set_pinned_reg and get_pinned_reg.
+ ig.push(
+ Inst::new(
+ "get_pinned_reg",
+ r#"
+ Gets the content of the pinned register, when it's enabled.
+ "#,
+ &formats.nullary,
+ )
+ .operands_out(vec![addr])
+ .other_side_effects(true),
+ );
+
+ ig.push(
+ Inst::new(
+ "set_pinned_reg",
+ r#"
+ Sets the content of the pinned register, when it's enabled.
+ "#,
+ &formats.unary,
+ )
+ .operands_in(vec![addr])
+ .other_side_effects(true),
+ );
+
+ let TableOffset = &TypeVar::new(
+ "TableOffset",
+ "An unsigned table offset",
+ TypeSetBuilder::new().ints(32..64).build(),
+ );
+ let T = &Operand::new("T", &entities.table);
+ let p = &Operand::new("p", TableOffset);
+ let Offset =
+ &Operand::new("Offset", &imm.offset32).with_doc("Byte offset from element address");
+
+ ig.push(
+ Inst::new(
+ "table_addr",
+ r#"
+ Bounds check and compute absolute address of a table entry.
+
+ Verify that the offset ``p`` is in bounds for the table T, and generate
+ an absolute address that is safe to dereference.
+
+ ``Offset`` must be less than the size of a table element.
+
+ 1. If ``p`` is not greater than the table bound, return an absolute
+ address corresponding to a byte offset of ``p`` from the table's
+ base address.
+ 2. If ``p`` is greater than the table bound, generate a trap.
+ "#,
+ &formats.table_addr,
+ )
+ .operands_in(vec![T, p, Offset])
+ .operands_out(vec![addr]),
+ );
+
+ let N = &Operand::new("N", &imm.imm64);
+ let a = &Operand::new("a", Int).with_doc("A constant integer scalar or vector value");
+
+ ig.push(
+ Inst::new(
+ "iconst",
+ r#"
+ Integer constant.
+
+ Create a scalar integer SSA value with an immediate constant value, or
+ an integer vector where all the lanes have the same value.
+ "#,
+ &formats.unary_imm,
+ )
+ .operands_in(vec![N])
+ .operands_out(vec![a]),
+ );
+
+ let N = &Operand::new("N", &imm.ieee32);
+ let a = &Operand::new("a", f32_).with_doc("A constant f32 scalar value");
+
+ ig.push(
+ Inst::new(
+ "f32const",
+ r#"
+ Floating point constant.
+
+ Create a `f32` SSA value with an immediate constant value.
+ "#,
+ &formats.unary_ieee32,
+ )
+ .operands_in(vec![N])
+ .operands_out(vec![a]),
+ );
+
+ let N = &Operand::new("N", &imm.ieee64);
+ let a = &Operand::new("a", f64_).with_doc("A constant f64 scalar value");
+
+ ig.push(
+ Inst::new(
+ "f64const",
+ r#"
+ Floating point constant.
+
+ Create a `f64` SSA value with an immediate constant value.
+ "#,
+ &formats.unary_ieee64,
+ )
+ .operands_in(vec![N])
+ .operands_out(vec![a]),
+ );
+
+ let N = &Operand::new("N", &imm.boolean);
+ let a = &Operand::new("a", Bool).with_doc("A constant boolean scalar or vector value");
+
+ ig.push(
+ Inst::new(
+ "bconst",
+ r#"
+ Boolean constant.
+
+ Create a scalar boolean SSA value with an immediate constant value, or
+ a boolean vector where all the lanes have the same value.
+ "#,
+ &formats.unary_bool,
+ )
+ .operands_in(vec![N])
+ .operands_out(vec![a]),
+ );
+
+ let N = &Operand::new("N", &imm.pool_constant)
+ .with_doc("The 16 immediate bytes of a 128-bit vector");
+ let a = &Operand::new("a", TxN).with_doc("A constant vector value");
+
+ ig.push(
+ Inst::new(
+ "vconst",
+ r#"
+ SIMD vector constant.
+
+ Construct a vector with the given immediate bytes.
+ "#,
+ &formats.unary_const,
+ )
+ .operands_in(vec![N])
+ .operands_out(vec![a]),
+ );
+
+ let constant =
+ &Operand::new("constant", &imm.pool_constant).with_doc("A constant in the constant pool");
+ let address = &Operand::new("address", iAddr);
+ ig.push(
+ Inst::new(
+ "const_addr",
+ r#"
+ Calculate the base address of a value in the constant pool.
+ "#,
+ &formats.unary_const,
+ )
+ .operands_in(vec![constant])
+ .operands_out(vec![address]),
+ );
+
+ let mask = &Operand::new("mask", &imm.uimm128)
+ .with_doc("The 16 immediate bytes used for selecting the elements to shuffle");
+ let Tx16 = &TypeVar::new(
+ "Tx16",
+ "A SIMD vector with exactly 16 lanes of 8-bit values; eventually this may support other \
+ lane counts and widths",
+ TypeSetBuilder::new()
+ .ints(8..8)
+ .bools(8..8)
+ .simd_lanes(16..16)
+ .includes_scalars(false)
+ .build(),
+ );
+ let a = &Operand::new("a", Tx16).with_doc("A vector value");
+ let b = &Operand::new("b", Tx16).with_doc("A vector value");
+
+ ig.push(
+ Inst::new(
+ "shuffle",
+ r#"
+ SIMD vector shuffle.
+
+ Shuffle two vectors using the given immediate bytes. For each of the 16 bytes of the
+ immediate, a value i of 0-15 selects the i-th element of the first vector and a value i of
+ 16-31 selects the (i-16)th element of the second vector. Immediate values outside of the
+ 0-31 range place a 0 in the resulting vector lane.
+ "#,
+ &formats.shuffle,
+ )
+ .operands_in(vec![a, b, mask])
+ .operands_out(vec![a]),
+ );
+
+ let a = &Operand::new("a", Ref).with_doc("A constant reference null value");
+
+ ig.push(
+ Inst::new(
+ "null",
+ r#"
+ Null constant value for reference types.
+
+ Create a scalar reference SSA value with a constant null value.
+ "#,
+ &formats.nullary,
+ )
+ .operands_out(vec![a]),
+ );
+
+ ig.push(Inst::new(
+ "nop",
+ r#"
+ Just a dummy instruction.
+
+ Note: this doesn't compile to a machine code nop.
+ "#,
+ &formats.nullary,
+ ));
+
+ let c = &Operand::new("c", Testable).with_doc("Controlling value to test");
+ let x = &Operand::new("x", Any).with_doc("Value to use when `c` is true");
+ let y = &Operand::new("y", Any).with_doc("Value to use when `c` is false");
+ let a = &Operand::new("a", Any);
+
+ ig.push(
+ Inst::new(
+ "select",
+ r#"
+ Conditional select.
+
+ This instruction selects whole values. Use `vselect` for
+ lane-wise selection.
+ "#,
+ &formats.ternary,
+ )
+ .operands_in(vec![c, x, y])
+ .operands_out(vec![a]),
+ );
+
+ let cc = &Operand::new("cc", &imm.intcc).with_doc("Controlling condition code");
+ let flags = &Operand::new("flags", iflags).with_doc("The machine's flag register");
+
+ ig.push(
+ Inst::new(
+ "selectif",
+ r#"
+ Conditional select, dependent on integer condition codes.
+ "#,
+ &formats.int_select,
+ )
+ .operands_in(vec![cc, flags, x, y])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "selectif_spectre_guard",
+ r#"
+ Conditional select intended for Spectre guards.
+
+ This operation is semantically equivalent to a selectif instruction.
+ However, it is guaranteed to not be removed or otherwise altered by any
+ optimization pass, and is guaranteed to result in a conditional-move
+ instruction, not a branch-based lowering. As such, it is suitable
+ for use when producing Spectre guards. For example, a bounds-check
+ may guard against unsafe speculation past a bounds-check conditional
+ branch by passing the address or index to be accessed through a
+ conditional move, also gated on the same condition. Because no
+ Spectre-vulnerable processors are known to perform speculation on
+ conditional move instructions, this is guaranteed to pick the
+ correct input. If the selected input in case of overflow is a "safe"
+ value, for example a null pointer that causes an exception in the
+ speculative path, this ensures that no Spectre vulnerability will
+ exist.
+ "#,
+ &formats.int_select,
+ )
+ .operands_in(vec![cc, flags, x, y])
+ .operands_out(vec![a])
+ .other_side_effects(true),
+ );
+
+ let c = &Operand::new("c", Any).with_doc("Controlling value to test");
+ ig.push(
+ Inst::new(
+ "bitselect",
+ r#"
+ Conditional select of bits.
+
+ For each bit in `c`, this instruction selects the corresponding bit from `x` if the bit
+ in `c` is 1 and the corresponding bit from `y` if the bit in `c` is 0. See also:
+ `select`, `vselect`.
+ "#,
+ &formats.ternary,
+ )
+ .operands_in(vec![c, x, y])
+ .operands_out(vec![a]),
+ );
+
+ let x = &Operand::new("x", Any);
+
+ ig.push(
+ Inst::new(
+ "copy",
+ r#"
+ Register-register copy.
+
+ This instruction copies its input, preserving the value type.
+
+ A pure SSA-form program does not need to copy values, but this
+ instruction is useful for representing intermediate stages during
+ instruction transformations, and the register allocator needs a way of
+ representing register copies.
+ "#,
+ &formats.unary,
+ )
+ .operands_in(vec![x])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "spill",
+ r#"
+ Spill a register value to a stack slot.
+
+ This instruction behaves exactly like `copy`, but the result
+ value is assigned to a spill slot.
+ "#,
+ &formats.unary,
+ )
+ .operands_in(vec![x])
+ .operands_out(vec![a])
+ .can_store(true),
+ );
+
+ ig.push(
+ Inst::new(
+ "fill",
+ r#"
+ Load a register value from a stack slot.
+
+ This instruction behaves exactly like `copy`, but creates a new
+ SSA value for the spilled input value.
+ "#,
+ &formats.unary,
+ )
+ .operands_in(vec![x])
+ .operands_out(vec![a])
+ .can_load(true),
+ );
+
+ ig.push(
+ Inst::new(
+ "fill_nop",
+ r#"
+ This is identical to `fill`, except it has no encoding, since it is a no-op.
+
+ This instruction is created only during late-stage redundant-reload removal, after all
+ registers and stack slots have been assigned. It is used to replace `fill`s that have
+ been identified as redundant.
+ "#,
+ &formats.unary,
+ )
+ .operands_in(vec![x])
+ .operands_out(vec![a])
+ .can_load(true),
+ );
+
+ let Sarg = &TypeVar::new(
+ "Sarg",
+ "Any scalar or vector type with at most 128 lanes",
+ TypeSetBuilder::new()
+ .specials(vec![crate::cdsl::types::SpecialType::StructArgument])
+ .build(),
+ );
+ let sarg_t = &Operand::new("sarg_t", Sarg);
+
+ // FIXME remove once the old style codegen backends are removed.
+ ig.push(
+ Inst::new(
+ "dummy_sarg_t",
+ r#"
+ This creates a sarg_t
+
+ This instruction is internal and should not be created by
+ Cranelift users.
+ "#,
+ &formats.nullary,
+ )
+ .operands_in(vec![])
+ .operands_out(vec![sarg_t]),
+ );
+
+ let src = &Operand::new("src", &imm.regunit);
+ let dst = &Operand::new("dst", &imm.regunit);
+
+ ig.push(
+ Inst::new(
+ "regmove",
+ r#"
+ Temporarily divert ``x`` from ``src`` to ``dst``.
+
+ This instruction moves the location of a value from one register to
+ another without creating a new SSA value. It is used by the register
+ allocator to temporarily rearrange register assignments in order to
+ satisfy instruction constraints.
+
+ The register diversions created by this instruction must be undone
+ before the value leaves the block. At the entry to a new block, all live
+ values must be in their originally assigned registers.
+ "#,
+ &formats.reg_move,
+ )
+ .operands_in(vec![x, src, dst])
+ .other_side_effects(true),
+ );
+
+ ig.push(
+ Inst::new(
+ "copy_special",
+ r#"
+ Copies the contents of ''src'' register to ''dst'' register.
+
+ This instructions copies the contents of one register to another
+ register without involving any SSA values. This is used for copying
+ special registers, e.g. copying the stack register to the frame
+ register in a function prologue.
+ "#,
+ &formats.copy_special,
+ )
+ .operands_in(vec![src, dst])
+ .other_side_effects(true),
+ );
+
+ ig.push(
+ Inst::new(
+ "copy_to_ssa",
+ r#"
+ Copies the contents of ''src'' register to ''a'' SSA name.
+
+ This instruction copies the contents of one register, regardless of its SSA name, to
+ another register, creating a new SSA name. In that sense it is a one-sided version
+ of ''copy_special''. This instruction is internal and should not be created by
+ Cranelift users.
+ "#,
+ &formats.copy_to_ssa,
+ )
+ .operands_in(vec![src])
+ .operands_out(vec![a])
+ .other_side_effects(true),
+ );
+
+ ig.push(
+ Inst::new(
+ "copy_nop",
+ r#"
+ Stack-slot-to-the-same-stack-slot copy, which is guaranteed to turn
+ into a no-op. This instruction is for use only within Cranelift itself.
+
+ This instruction copies its input, preserving the value type.
+ "#,
+ &formats.unary,
+ )
+ .operands_in(vec![x])
+ .operands_out(vec![a]),
+ );
+
+ let delta = &Operand::new("delta", Int);
+
+ ig.push(
+ Inst::new(
+ "adjust_sp_down",
+ r#"
+ Subtracts ``delta`` offset value from the stack pointer register.
+
+ This instruction is used to adjust the stack pointer by a dynamic amount.
+ "#,
+ &formats.unary,
+ )
+ .operands_in(vec![delta])
+ .other_side_effects(true),
+ );
+
+ let Offset = &Operand::new("Offset", &imm.imm64).with_doc("Offset from current stack pointer");
+
+ ig.push(
+ Inst::new(
+ "adjust_sp_up_imm",
+ r#"
+ Adds ``Offset`` immediate offset value to the stack pointer register.
+
+ This instruction is used to adjust the stack pointer, primarily in function
+ prologues and epilogues. ``Offset`` is constrained to the size of a signed
+ 32-bit integer.
+ "#,
+ &formats.unary_imm,
+ )
+ .operands_in(vec![Offset])
+ .other_side_effects(true),
+ );
+
+ let Offset = &Operand::new("Offset", &imm.imm64).with_doc("Offset from current stack pointer");
+
+ ig.push(
+ Inst::new(
+ "adjust_sp_down_imm",
+ r#"
+ Subtracts ``Offset`` immediate offset value from the stack pointer
+ register.
+
+ This instruction is used to adjust the stack pointer, primarily in function
+ prologues and epilogues. ``Offset`` is constrained to the size of a signed
+ 32-bit integer.
+ "#,
+ &formats.unary_imm,
+ )
+ .operands_in(vec![Offset])
+ .other_side_effects(true),
+ );
+
+ let f = &Operand::new("f", iflags);
+
+ ig.push(
+ Inst::new(
+ "ifcmp_sp",
+ r#"
+ Compare ``addr`` with the stack pointer and set the CPU flags.
+
+ This is like `ifcmp` where ``addr`` is the LHS operand and the stack
+ pointer is the RHS.
+ "#,
+ &formats.unary,
+ )
+ .operands_in(vec![addr])
+ .operands_out(vec![f]),
+ );
+
+ ig.push(
+ Inst::new(
+ "regspill",
+ r#"
+ Temporarily divert ``x`` from ``src`` to ``SS``.
+
+ This instruction moves the location of a value from a register to a
+ stack slot without creating a new SSA value. It is used by the register
+ allocator to temporarily rearrange register assignments in order to
+ satisfy instruction constraints.
+
+ See also `regmove`.
+ "#,
+ &formats.reg_spill,
+ )
+ .operands_in(vec![x, src, SS])
+ .other_side_effects(true),
+ );
+
+ ig.push(
+ Inst::new(
+ "regfill",
+ r#"
+ Temporarily divert ``x`` from ``SS`` to ``dst``.
+
+ This instruction moves the location of a value from a stack slot to a
+ register without creating a new SSA value. It is used by the register
+ allocator to temporarily rearrange register assignments in order to
+ satisfy instruction constraints.
+
+ See also `regmove`.
+ "#,
+ &formats.reg_fill,
+ )
+ .operands_in(vec![x, SS, dst])
+ .other_side_effects(true),
+ );
+
+ let N =
+ &Operand::new("args", &entities.varargs).with_doc("Variable number of args for StackMap");
+
+ ig.push(
+ Inst::new(
+ "safepoint",
+ r#"
+ This instruction will provide live reference values at a point in
+ the function. It can only be used by the compiler.
+ "#,
+ &formats.multiary,
+ )
+ .operands_in(vec![N])
+ .other_side_effects(true),
+ );
+
+ let x = &Operand::new("x", TxN).with_doc("Vector to split");
+ let lo = &Operand::new("lo", &TxN.half_vector()).with_doc("Low-numbered lanes of `x`");
+ let hi = &Operand::new("hi", &TxN.half_vector()).with_doc("High-numbered lanes of `x`");
+
+ ig.push(
+ Inst::new(
+ "vsplit",
+ r#"
+ Split a vector into two halves.
+
+ Split the vector `x` into two separate values, each containing half of
+ the lanes from ``x``. The result may be two scalars if ``x`` only had
+ two lanes.
+ "#,
+ &formats.unary,
+ )
+ .operands_in(vec![x])
+ .operands_out(vec![lo, hi])
+ .is_ghost(true),
+ );
+
+ let Any128 = &TypeVar::new(
+ "Any128",
+ "Any scalar or vector type with as most 128 lanes",
+ TypeSetBuilder::new()
+ .ints(Interval::All)
+ .floats(Interval::All)
+ .bools(Interval::All)
+ .simd_lanes(1..128)
+ .includes_scalars(true)
+ .build(),
+ );
+
+ let x = &Operand::new("x", Any128).with_doc("Low-numbered lanes");
+ let y = &Operand::new("y", Any128).with_doc("High-numbered lanes");
+ let a = &Operand::new("a", &Any128.double_vector()).with_doc("Concatenation of `x` and `y`");
+
+ ig.push(
+ Inst::new(
+ "vconcat",
+ r#"
+ Vector concatenation.
+
+ Return a vector formed by concatenating ``x`` and ``y``. The resulting
+ vector type has twice as many lanes as each of the inputs. The lanes of
+ ``x`` appear as the low-numbered lanes, and the lanes of ``y`` become
+ the high-numbered lanes of ``a``.
+
+ It is possible to form a vector by concatenating two scalars.
+ "#,
+ &formats.binary,
+ )
+ .operands_in(vec![x, y])
+ .operands_out(vec![a])
+ .is_ghost(true),
+ );
+
+ let c = &Operand::new("c", &TxN.as_bool()).with_doc("Controlling vector");
+ let x = &Operand::new("x", TxN).with_doc("Value to use where `c` is true");
+ let y = &Operand::new("y", TxN).with_doc("Value to use where `c` is false");
+ let a = &Operand::new("a", TxN);
+
+ ig.push(
+ Inst::new(
+ "vselect",
+ r#"
+ Vector lane select.
+
+ Select lanes from ``x`` or ``y`` controlled by the lanes of the boolean
+ vector ``c``.
+ "#,
+ &formats.ternary,
+ )
+ .operands_in(vec![c, x, y])
+ .operands_out(vec![a]),
+ );
+
+ let s = &Operand::new("s", b1);
+
+ ig.push(
+ Inst::new(
+ "vany_true",
+ r#"
+ Reduce a vector to a scalar boolean.
+
+ Return a scalar boolean true if any lane in ``a`` is non-zero, false otherwise.
+ "#,
+ &formats.unary,
+ )
+ .operands_in(vec![a])
+ .operands_out(vec![s]),
+ );
+
+ ig.push(
+ Inst::new(
+ "vall_true",
+ r#"
+ Reduce a vector to a scalar boolean.
+
+ Return a scalar boolean true if all lanes in ``i`` are non-zero, false otherwise.
+ "#,
+ &formats.unary,
+ )
+ .operands_in(vec![a])
+ .operands_out(vec![s]),
+ );
+
+ let a = &Operand::new("a", TxN);
+ let x = &Operand::new("x", Int);
+
+ ig.push(
+ Inst::new(
+ "vhigh_bits",
+ r#"
+ Reduce a vector to a scalar integer.
+
+ Return a scalar integer, consisting of the concatenation of the most significant bit
+ of each lane of ``a``.
+ "#,
+ &formats.unary,
+ )
+ .operands_in(vec![a])
+ .operands_out(vec![x]),
+ );
+
+ let a = &Operand::new("a", &Int.as_bool());
+ let Cond = &Operand::new("Cond", &imm.intcc);
+ let x = &Operand::new("x", Int);
+ let y = &Operand::new("y", Int);
+
+ ig.push(
+ Inst::new(
+ "icmp",
+ r#"
+ Integer comparison.
+
+ The condition code determines if the operands are interpreted as signed
+ or unsigned integers.
+
+ | Signed | Unsigned | Condition |
+ |--------|----------|-----------------------|
+ | eq | eq | Equal |
+ | ne | ne | Not equal |
+ | slt | ult | Less than |
+ | sge | uge | Greater than or equal |
+ | sgt | ugt | Greater than |
+ | sle | ule | Less than or equal |
+ | of | * | Overflow |
+ | nof | * | No Overflow |
+
+ \* The unsigned version of overflow conditions have ISA-specific
+ semantics and thus have been kept as methods on the TargetIsa trait as
+ [unsigned_add_overflow_condition][isa::TargetIsa::unsigned_add_overflow_condition] and
+ [unsigned_sub_overflow_condition][isa::TargetIsa::unsigned_sub_overflow_condition].
+
+ When this instruction compares integer vectors, it returns a boolean
+ vector of lane-wise comparisons.
+ "#,
+ &formats.int_compare,
+ )
+ .operands_in(vec![Cond, x, y])
+ .operands_out(vec![a]),
+ );
+
+ let a = &Operand::new("a", b1);
+ let x = &Operand::new("x", iB);
+ let Y = &Operand::new("Y", &imm.imm64);
+
+ ig.push(
+ Inst::new(
+ "icmp_imm",
+ r#"
+ Compare scalar integer to a constant.
+
+ This is the same as the `icmp` instruction, except one operand is
+ an immediate constant.
+
+ This instruction can only compare scalars. Use `icmp` for
+ lane-wise vector comparisons.
+ "#,
+ &formats.int_compare_imm,
+ )
+ .operands_in(vec![Cond, x, Y])
+ .operands_out(vec![a]),
+ );
+
+ let f = &Operand::new("f", iflags);
+ let x = &Operand::new("x", iB);
+ let y = &Operand::new("y", iB);
+
+ ig.push(
+ Inst::new(
+ "ifcmp",
+ r#"
+ Compare scalar integers and return flags.
+
+ Compare two scalar integer values and return integer CPU flags
+ representing the result.
+ "#,
+ &formats.binary,
+ )
+ .operands_in(vec![x, y])
+ .operands_out(vec![f]),
+ );
+
+ ig.push(
+ Inst::new(
+ "ifcmp_imm",
+ r#"
+ Compare scalar integer to a constant and return flags.
+
+ Like `icmp_imm`, but returns integer CPU flags instead of testing
+ a specific condition code.
+ "#,
+ &formats.binary_imm64,
+ )
+ .operands_in(vec![x, Y])
+ .operands_out(vec![f]),
+ );
+
+ let a = &Operand::new("a", Int);
+ let x = &Operand::new("x", Int);
+ let y = &Operand::new("y", Int);
+
+ ig.push(
+ Inst::new(
+ "iadd",
+ r#"
+ Wrapping integer addition: `a := x + y \pmod{2^B}`.
+
+ This instruction does not depend on the signed/unsigned interpretation
+ of the operands.
+ "#,
+ &formats.binary,
+ )
+ .operands_in(vec![x, y])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "uadd_sat",
+ r#"
+ Add with unsigned saturation.
+
+ This is similar to `iadd` but the operands are interpreted as unsigned integers and their
+ summed result, instead of wrapping, will be saturated to the highest unsigned integer for
+ the controlling type (e.g. `0xFF` for i8).
+ "#,
+ &formats.binary,
+ )
+ .operands_in(vec![x, y])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "sadd_sat",
+ r#"
+ Add with signed saturation.
+
+ This is similar to `iadd` but the operands are interpreted as signed integers and their
+ summed result, instead of wrapping, will be saturated to the lowest or highest
+ signed integer for the controlling type (e.g. `0x80` or `0x7F` for i8). For example,
+ since an `sadd_sat.i8` of `0x70` and `0x70` is greater than `0x7F`, the result will be
+ clamped to `0x7F`.
+ "#,
+ &formats.binary,
+ )
+ .operands_in(vec![x, y])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "isub",
+ r#"
+ Wrapping integer subtraction: `a := x - y \pmod{2^B}`.
+
+ This instruction does not depend on the signed/unsigned interpretation
+ of the operands.
+ "#,
+ &formats.binary,
+ )
+ .operands_in(vec![x, y])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "usub_sat",
+ r#"
+ Subtract with unsigned saturation.
+
+ This is similar to `isub` but the operands are interpreted as unsigned integers and their
+ difference, instead of wrapping, will be saturated to the lowest unsigned integer for
+ the controlling type (e.g. `0x00` for i8).
+ "#,
+ &formats.binary,
+ )
+ .operands_in(vec![x, y])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "ssub_sat",
+ r#"
+ Subtract with signed saturation.
+
+ This is similar to `isub` but the operands are interpreted as signed integers and their
+ difference, instead of wrapping, will be saturated to the lowest or highest
+ signed integer for the controlling type (e.g. `0x80` or `0x7F` for i8).
+ "#,
+ &formats.binary,
+ )
+ .operands_in(vec![x, y])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "ineg",
+ r#"
+ Integer negation: `a := -x \pmod{2^B}`.
+ "#,
+ &formats.unary,
+ )
+ .operands_in(vec![x])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "iabs",
+ r#"
+ Integer absolute value with wrapping: `a := |x|`.
+ "#,
+ &formats.unary,
+ )
+ .operands_in(vec![x])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "imul",
+ r#"
+ Wrapping integer multiplication: `a := x y \pmod{2^B}`.
+
+ This instruction does not depend on the signed/unsigned interpretation
+ of the operands.
+
+ Polymorphic over all integer types (vector and scalar).
+ "#,
+ &formats.binary,
+ )
+ .operands_in(vec![x, y])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "umulhi",
+ r#"
+ Unsigned integer multiplication, producing the high half of a
+ double-length result.
+
+ Polymorphic over all scalar integer types, but does not support vector
+ types.
+ "#,
+ &formats.binary,
+ )
+ .operands_in(vec![x, y])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "smulhi",
+ r#"
+ Signed integer multiplication, producing the high half of a
+ double-length result.
+
+ Polymorphic over all scalar integer types, but does not support vector
+ types.
+ "#,
+ &formats.binary,
+ )
+ .operands_in(vec![x, y])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "udiv",
+ r#"
+ Unsigned integer division: `a := \lfloor {x \over y} \rfloor`.
+
+ This operation traps if the divisor is zero.
+ "#,
+ &formats.binary,
+ )
+ .operands_in(vec![x, y])
+ .operands_out(vec![a])
+ .can_trap(true),
+ );
+
+ ig.push(
+ Inst::new(
+ "sdiv",
+ r#"
+ Signed integer division rounded toward zero: `a := sign(xy)
+ \lfloor {|x| \over |y|}\rfloor`.
+
+ This operation traps if the divisor is zero, or if the result is not
+ representable in `B` bits two's complement. This only happens
+ when `x = -2^{B-1}, y = -1`.
+ "#,
+ &formats.binary,
+ )
+ .operands_in(vec![x, y])
+ .operands_out(vec![a])
+ .can_trap(true),
+ );
+
+ ig.push(
+ Inst::new(
+ "urem",
+ r#"
+ Unsigned integer remainder.
+
+ This operation traps if the divisor is zero.
+ "#,
+ &formats.binary,
+ )
+ .operands_in(vec![x, y])
+ .operands_out(vec![a])
+ .can_trap(true),
+ );
+
+ ig.push(
+ Inst::new(
+ "srem",
+ r#"
+ Signed integer remainder. The result has the sign of the dividend.
+
+ This operation traps if the divisor is zero.
+ "#,
+ &formats.binary,
+ )
+ .operands_in(vec![x, y])
+ .operands_out(vec![a])
+ .can_trap(true),
+ );
+
+ let a = &Operand::new("a", iB);
+ let x = &Operand::new("x", iB);
+ let Y = &Operand::new("Y", &imm.imm64);
+
+ ig.push(
+ Inst::new(
+ "iadd_imm",
+ r#"
+ Add immediate integer.
+
+ Same as `iadd`, but one operand is an immediate constant.
+
+ Polymorphic over all scalar integer types, but does not support vector
+ types.
+ "#,
+ &formats.binary_imm64,
+ )
+ .operands_in(vec![x, Y])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "imul_imm",
+ r#"
+ Integer multiplication by immediate constant.
+
+ Polymorphic over all scalar integer types, but does not support vector
+ types.
+ "#,
+ &formats.binary_imm64,
+ )
+ .operands_in(vec![x, Y])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "udiv_imm",
+ r#"
+ Unsigned integer division by an immediate constant.
+
+ This operation traps if the divisor is zero.
+ "#,
+ &formats.binary_imm64,
+ )
+ .operands_in(vec![x, Y])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "sdiv_imm",
+ r#"
+ Signed integer division by an immediate constant.
+
+ This operation traps if the divisor is zero, or if the result is not
+ representable in `B` bits two's complement. This only happens
+ when `x = -2^{B-1}, Y = -1`.
+ "#,
+ &formats.binary_imm64,
+ )
+ .operands_in(vec![x, Y])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "urem_imm",
+ r#"
+ Unsigned integer remainder with immediate divisor.
+
+ This operation traps if the divisor is zero.
+ "#,
+ &formats.binary_imm64,
+ )
+ .operands_in(vec![x, Y])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "srem_imm",
+ r#"
+ Signed integer remainder with immediate divisor.
+
+ This operation traps if the divisor is zero.
+ "#,
+ &formats.binary_imm64,
+ )
+ .operands_in(vec![x, Y])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "irsub_imm",
+ r#"
+ Immediate reverse wrapping subtraction: `a := Y - x \pmod{2^B}`.
+
+ Also works as integer negation when `Y = 0`. Use `iadd_imm`
+ with a negative immediate operand for the reverse immediate
+ subtraction.
+
+ Polymorphic over all scalar integer types, but does not support vector
+ types.
+ "#,
+ &formats.binary_imm64,
+ )
+ .operands_in(vec![x, Y])
+ .operands_out(vec![a]),
+ );
+
+ let a = &Operand::new("a", iB);
+ let x = &Operand::new("x", iB);
+ let y = &Operand::new("y", iB);
+
+ let c_in = &Operand::new("c_in", b1).with_doc("Input carry flag");
+ let c_out = &Operand::new("c_out", b1).with_doc("Output carry flag");
+ let b_in = &Operand::new("b_in", b1).with_doc("Input borrow flag");
+ let b_out = &Operand::new("b_out", b1).with_doc("Output borrow flag");
+
+ let c_if_in = &Operand::new("c_in", iflags);
+ let c_if_out = &Operand::new("c_out", iflags);
+ let b_if_in = &Operand::new("b_in", iflags);
+ let b_if_out = &Operand::new("b_out", iflags);
+
+ ig.push(
+ Inst::new(
+ "iadd_cin",
+ r#"
+ Add integers with carry in.
+
+ Same as `iadd` with an additional carry input. Computes:
+
+ ```text
+ a = x + y + c_{in} \pmod 2^B
+ ```
+
+ Polymorphic over all scalar integer types, but does not support vector
+ types.
+ "#,
+ &formats.ternary,
+ )
+ .operands_in(vec![x, y, c_in])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "iadd_ifcin",
+ r#"
+ Add integers with carry in.
+
+ Same as `iadd` with an additional carry flag input. Computes:
+
+ ```text
+ a = x + y + c_{in} \pmod 2^B
+ ```
+
+ Polymorphic over all scalar integer types, but does not support vector
+ types.
+ "#,
+ &formats.ternary,
+ )
+ .operands_in(vec![x, y, c_if_in])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "iadd_cout",
+ r#"
+ Add integers with carry out.
+
+ Same as `iadd` with an additional carry output.
+
+ ```text
+ a &= x + y \pmod 2^B \\
+ c_{out} &= x+y >= 2^B
+ ```
+
+ Polymorphic over all scalar integer types, but does not support vector
+ types.
+ "#,
+ &formats.binary,
+ )
+ .operands_in(vec![x, y])
+ .operands_out(vec![a, c_out]),
+ );
+
+ ig.push(
+ Inst::new(
+ "iadd_ifcout",
+ r#"
+ Add integers with carry out.
+
+ Same as `iadd` with an additional carry flag output.
+
+ ```text
+ a &= x + y \pmod 2^B \\
+ c_{out} &= x+y >= 2^B
+ ```
+
+ Polymorphic over all scalar integer types, but does not support vector
+ types.
+ "#,
+ &formats.binary,
+ )
+ .operands_in(vec![x, y])
+ .operands_out(vec![a, c_if_out]),
+ );
+
+ ig.push(
+ Inst::new(
+ "iadd_carry",
+ r#"
+ Add integers with carry in and out.
+
+ Same as `iadd` with an additional carry input and output.
+
+ ```text
+ a &= x + y + c_{in} \pmod 2^B \\
+ c_{out} &= x + y + c_{in} >= 2^B
+ ```
+
+ Polymorphic over all scalar integer types, but does not support vector
+ types.
+ "#,
+ &formats.ternary,
+ )
+ .operands_in(vec![x, y, c_in])
+ .operands_out(vec![a, c_out]),
+ );
+
+ ig.push(
+ Inst::new(
+ "iadd_ifcarry",
+ r#"
+ Add integers with carry in and out.
+
+ Same as `iadd` with an additional carry flag input and output.
+
+ ```text
+ a &= x + y + c_{in} \pmod 2^B \\
+ c_{out} &= x + y + c_{in} >= 2^B
+ ```
+
+ Polymorphic over all scalar integer types, but does not support vector
+ types.
+ "#,
+ &formats.ternary,
+ )
+ .operands_in(vec![x, y, c_if_in])
+ .operands_out(vec![a, c_if_out]),
+ );
+
+ ig.push(
+ Inst::new(
+ "isub_bin",
+ r#"
+ Subtract integers with borrow in.
+
+ Same as `isub` with an additional borrow flag input. Computes:
+
+ ```text
+ a = x - (y + b_{in}) \pmod 2^B
+ ```
+
+ Polymorphic over all scalar integer types, but does not support vector
+ types.
+ "#,
+ &formats.ternary,
+ )
+ .operands_in(vec![x, y, b_in])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "isub_ifbin",
+ r#"
+ Subtract integers with borrow in.
+
+ Same as `isub` with an additional borrow flag input. Computes:
+
+ ```text
+ a = x - (y + b_{in}) \pmod 2^B
+ ```
+
+ Polymorphic over all scalar integer types, but does not support vector
+ types.
+ "#,
+ &formats.ternary,
+ )
+ .operands_in(vec![x, y, b_if_in])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "isub_bout",
+ r#"
+ Subtract integers with borrow out.
+
+ Same as `isub` with an additional borrow flag output.
+
+ ```text
+ a &= x - y \pmod 2^B \\
+ b_{out} &= x < y
+ ```
+
+ Polymorphic over all scalar integer types, but does not support vector
+ types.
+ "#,
+ &formats.binary,
+ )
+ .operands_in(vec![x, y])
+ .operands_out(vec![a, b_out]),
+ );
+
+ ig.push(
+ Inst::new(
+ "isub_ifbout",
+ r#"
+ Subtract integers with borrow out.
+
+ Same as `isub` with an additional borrow flag output.
+
+ ```text
+ a &= x - y \pmod 2^B \\
+ b_{out} &= x < y
+ ```
+
+ Polymorphic over all scalar integer types, but does not support vector
+ types.
+ "#,
+ &formats.binary,
+ )
+ .operands_in(vec![x, y])
+ .operands_out(vec![a, b_if_out]),
+ );
+
+ ig.push(
+ Inst::new(
+ "isub_borrow",
+ r#"
+ Subtract integers with borrow in and out.
+
+ Same as `isub` with an additional borrow flag input and output.
+
+ ```text
+ a &= x - (y + b_{in}) \pmod 2^B \\
+ b_{out} &= x < y + b_{in}
+ ```
+
+ Polymorphic over all scalar integer types, but does not support vector
+ types.
+ "#,
+ &formats.ternary,
+ )
+ .operands_in(vec![x, y, b_in])
+ .operands_out(vec![a, b_out]),
+ );
+
+ ig.push(
+ Inst::new(
+ "isub_ifborrow",
+ r#"
+ Subtract integers with borrow in and out.
+
+ Same as `isub` with an additional borrow flag input and output.
+
+ ```text
+ a &= x - (y + b_{in}) \pmod 2^B \\
+ b_{out} &= x < y + b_{in}
+ ```
+
+ Polymorphic over all scalar integer types, but does not support vector
+ types.
+ "#,
+ &formats.ternary,
+ )
+ .operands_in(vec![x, y, b_if_in])
+ .operands_out(vec![a, b_if_out]),
+ );
+
+ let bits = &TypeVar::new(
+ "bits",
+ "Any integer, float, or boolean scalar or vector type",
+ TypeSetBuilder::new()
+ .ints(Interval::All)
+ .floats(Interval::All)
+ .bools(Interval::All)
+ .simd_lanes(Interval::All)
+ .includes_scalars(true)
+ .build(),
+ );
+ let x = &Operand::new("x", bits);
+ let y = &Operand::new("y", bits);
+ let a = &Operand::new("a", bits);
+
+ ig.push(
+ Inst::new(
+ "band",
+ r#"
+ Bitwise and.
+ "#,
+ &formats.binary,
+ )
+ .operands_in(vec![x, y])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "bor",
+ r#"
+ Bitwise or.
+ "#,
+ &formats.binary,
+ )
+ .operands_in(vec![x, y])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "bxor",
+ r#"
+ Bitwise xor.
+ "#,
+ &formats.binary,
+ )
+ .operands_in(vec![x, y])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "bnot",
+ r#"
+ Bitwise not.
+ "#,
+ &formats.unary,
+ )
+ .operands_in(vec![x])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "band_not",
+ r#"
+ Bitwise and not.
+
+ Computes `x & ~y`.
+ "#,
+ &formats.binary,
+ )
+ .operands_in(vec![x, y])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "bor_not",
+ r#"
+ Bitwise or not.
+
+ Computes `x | ~y`.
+ "#,
+ &formats.binary,
+ )
+ .operands_in(vec![x, y])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "bxor_not",
+ r#"
+ Bitwise xor not.
+
+ Computes `x ^ ~y`.
+ "#,
+ &formats.binary,
+ )
+ .operands_in(vec![x, y])
+ .operands_out(vec![a]),
+ );
+
+ let x = &Operand::new("x", iB);
+ let Y = &Operand::new("Y", &imm.imm64);
+ let a = &Operand::new("a", iB);
+
+ ig.push(
+ Inst::new(
+ "band_imm",
+ r#"
+ Bitwise and with immediate.
+
+ Same as `band`, but one operand is an immediate constant.
+
+ Polymorphic over all scalar integer types, but does not support vector
+ types.
+ "#,
+ &formats.binary_imm64,
+ )
+ .operands_in(vec![x, Y])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "bor_imm",
+ r#"
+ Bitwise or with immediate.
+
+ Same as `bor`, but one operand is an immediate constant.
+
+ Polymorphic over all scalar integer types, but does not support vector
+ types.
+ "#,
+ &formats.binary_imm64,
+ )
+ .operands_in(vec![x, Y])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "bxor_imm",
+ r#"
+ Bitwise xor with immediate.
+
+ Same as `bxor`, but one operand is an immediate constant.
+
+ Polymorphic over all scalar integer types, but does not support vector
+ types.
+ "#,
+ &formats.binary_imm64,
+ )
+ .operands_in(vec![x, Y])
+ .operands_out(vec![a]),
+ );
+
+ let x = &Operand::new("x", Int).with_doc("Scalar or vector value to shift");
+ let y = &Operand::new("y", iB).with_doc("Number of bits to shift");
+ let Y = &Operand::new("Y", &imm.imm64);
+ let a = &Operand::new("a", Int);
+
+ ig.push(
+ Inst::new(
+ "rotl",
+ r#"
+ Rotate left.
+
+ Rotate the bits in ``x`` by ``y`` places.
+ "#,
+ &formats.binary,
+ )
+ .operands_in(vec![x, y])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "rotr",
+ r#"
+ Rotate right.
+
+ Rotate the bits in ``x`` by ``y`` places.
+ "#,
+ &formats.binary,
+ )
+ .operands_in(vec![x, y])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "rotl_imm",
+ r#"
+ Rotate left by immediate.
+ "#,
+ &formats.binary_imm64,
+ )
+ .operands_in(vec![x, Y])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "rotr_imm",
+ r#"
+ Rotate right by immediate.
+ "#,
+ &formats.binary_imm64,
+ )
+ .operands_in(vec![x, Y])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "ishl",
+ r#"
+ Integer shift left. Shift the bits in ``x`` towards the MSB by ``y``
+ places. Shift in zero bits to the LSB.
+
+ The shift amount is masked to the size of ``x``.
+
+ When shifting a B-bits integer type, this instruction computes:
+
+ ```text
+ s &:= y \pmod B,
+ a &:= x \cdot 2^s \pmod{2^B}.
+ ```
+ "#,
+ &formats.binary,
+ )
+ .operands_in(vec![x, y])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "ushr",
+ r#"
+ Unsigned shift right. Shift bits in ``x`` towards the LSB by ``y``
+ places, shifting in zero bits to the MSB. Also called a *logical
+ shift*.
+
+ The shift amount is masked to the size of the register.
+
+ When shifting a B-bits integer type, this instruction computes:
+
+ ```text
+ s &:= y \pmod B,
+ a &:= \lfloor x \cdot 2^{-s} \rfloor.
+ ```
+ "#,
+ &formats.binary,
+ )
+ .operands_in(vec![x, y])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "sshr",
+ r#"
+ Signed shift right. Shift bits in ``x`` towards the LSB by ``y``
+ places, shifting in sign bits to the MSB. Also called an *arithmetic
+ shift*.
+
+ The shift amount is masked to the size of the register.
+ "#,
+ &formats.binary,
+ )
+ .operands_in(vec![x, y])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "ishl_imm",
+ r#"
+ Integer shift left by immediate.
+
+ The shift amount is masked to the size of ``x``.
+ "#,
+ &formats.binary_imm64,
+ )
+ .operands_in(vec![x, Y])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "ushr_imm",
+ r#"
+ Unsigned shift right by immediate.
+
+ The shift amount is masked to the size of the register.
+ "#,
+ &formats.binary_imm64,
+ )
+ .operands_in(vec![x, Y])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "sshr_imm",
+ r#"
+ Signed shift right by immediate.
+
+ The shift amount is masked to the size of the register.
+ "#,
+ &formats.binary_imm64,
+ )
+ .operands_in(vec![x, Y])
+ .operands_out(vec![a]),
+ );
+
+ let x = &Operand::new("x", iB);
+ let a = &Operand::new("a", iB);
+
+ ig.push(
+ Inst::new(
+ "bitrev",
+ r#"
+ Reverse the bits of a integer.
+
+ Reverses the bits in ``x``.
+ "#,
+ &formats.unary,
+ )
+ .operands_in(vec![x])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "clz",
+ r#"
+ Count leading zero bits.
+
+ Starting from the MSB in ``x``, count the number of zero bits before
+ reaching the first one bit. When ``x`` is zero, returns the size of x
+ in bits.
+ "#,
+ &formats.unary,
+ )
+ .operands_in(vec![x])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "cls",
+ r#"
+ Count leading sign bits.
+
+ Starting from the MSB after the sign bit in ``x``, count the number of
+ consecutive bits identical to the sign bit. When ``x`` is 0 or -1,
+ returns one less than the size of x in bits.
+ "#,
+ &formats.unary,
+ )
+ .operands_in(vec![x])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "ctz",
+ r#"
+ Count trailing zeros.
+
+ Starting from the LSB in ``x``, count the number of zero bits before
+ reaching the first one bit. When ``x`` is zero, returns the size of x
+ in bits.
+ "#,
+ &formats.unary,
+ )
+ .operands_in(vec![x])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "popcnt",
+ r#"
+ Population count
+
+ Count the number of one bits in ``x``.
+ "#,
+ &formats.unary,
+ )
+ .operands_in(vec![x])
+ .operands_out(vec![a]),
+ );
+
+ let Float = &TypeVar::new(
+ "Float",
+ "A scalar or vector floating point number",
+ TypeSetBuilder::new()
+ .floats(Interval::All)
+ .simd_lanes(Interval::All)
+ .build(),
+ );
+ let Cond = &Operand::new("Cond", &imm.floatcc);
+ let x = &Operand::new("x", Float);
+ let y = &Operand::new("y", Float);
+ let a = &Operand::new("a", &Float.as_bool());
+
+ ig.push(
+ Inst::new(
+ "fcmp",
+ r#"
+ Floating point comparison.
+
+ Two IEEE 754-2008 floating point numbers, `x` and `y`, relate to each
+ other in exactly one of four ways:
+
+ == ==========================================
+ UN Unordered when one or both numbers is NaN.
+ EQ When `x = y`. (And `0.0 = -0.0`).
+ LT When `x < y`.
+ GT When `x > y`.
+ == ==========================================
+
+ The 14 `floatcc` condition codes each correspond to a subset of
+ the four relations, except for the empty set which would always be
+ false, and the full set which would always be true.
+
+ The condition codes are divided into 7 'ordered' conditions which don't
+ include UN, and 7 unordered conditions which all include UN.
+
+ +-------+------------+---------+------------+-------------------------+
+ |Ordered |Unordered |Condition |
+ +=======+============+=========+============+=========================+
+ |ord |EQ | LT | GT|uno |UN |NaNs absent / present. |
+ +-------+------------+---------+------------+-------------------------+
+ |eq |EQ |ueq |UN | EQ |Equal |
+ +-------+------------+---------+------------+-------------------------+
+ |one |LT | GT |ne |UN | LT | GT|Not equal |
+ +-------+------------+---------+------------+-------------------------+
+ |lt |LT |ult |UN | LT |Less than |
+ +-------+------------+---------+------------+-------------------------+
+ |le |LT | EQ |ule |UN | LT | EQ|Less than or equal |
+ +-------+------------+---------+------------+-------------------------+
+ |gt |GT |ugt |UN | GT |Greater than |
+ +-------+------------+---------+------------+-------------------------+
+ |ge |GT | EQ |uge |UN | GT | EQ|Greater than or equal |
+ +-------+------------+---------+------------+-------------------------+
+
+ The standard C comparison operators, `<, <=, >, >=`, are all ordered,
+ so they are false if either operand is NaN. The C equality operator,
+ `==`, is ordered, and since inequality is defined as the logical
+ inverse it is *unordered*. They map to the `floatcc` condition
+ codes as follows:
+
+ ==== ====== ============
+ C `Cond` Subset
+ ==== ====== ============
+ `==` eq EQ
+ `!=` ne UN | LT | GT
+ `<` lt LT
+ `<=` le LT | EQ
+ `>` gt GT
+ `>=` ge GT | EQ
+ ==== ====== ============
+
+ This subset of condition codes also corresponds to the WebAssembly
+ floating point comparisons of the same name.
+
+ When this instruction compares floating point vectors, it returns a
+ boolean vector with the results of lane-wise comparisons.
+ "#,
+ &formats.float_compare,
+ )
+ .operands_in(vec![Cond, x, y])
+ .operands_out(vec![a]),
+ );
+
+ let f = &Operand::new("f", fflags);
+
+ ig.push(
+ Inst::new(
+ "ffcmp",
+ r#"
+ Floating point comparison returning flags.
+
+ Compares two numbers like `fcmp`, but returns floating point CPU
+ flags instead of testing a specific condition.
+ "#,
+ &formats.binary,
+ )
+ .operands_in(vec![x, y])
+ .operands_out(vec![f]),
+ );
+
+ let x = &Operand::new("x", Float);
+ let y = &Operand::new("y", Float);
+ let z = &Operand::new("z", Float);
+ let a = &Operand::new("a", Float).with_doc("Result of applying operator to each lane");
+
+ ig.push(
+ Inst::new(
+ "fadd",
+ r#"
+ Floating point addition.
+ "#,
+ &formats.binary,
+ )
+ .operands_in(vec![x, y])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "fsub",
+ r#"
+ Floating point subtraction.
+ "#,
+ &formats.binary,
+ )
+ .operands_in(vec![x, y])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "fmul",
+ r#"
+ Floating point multiplication.
+ "#,
+ &formats.binary,
+ )
+ .operands_in(vec![x, y])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "fdiv",
+ r#"
+ Floating point division.
+
+ Unlike the integer division instructions ` and
+ `udiv`, this can't trap. Division by zero is infinity or
+ NaN, depending on the dividend.
+ "#,
+ &formats.binary,
+ )
+ .operands_in(vec![x, y])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "sqrt",
+ r#"
+ Floating point square root.
+ "#,
+ &formats.unary,
+ )
+ .operands_in(vec![x])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "fma",
+ r#"
+ Floating point fused multiply-and-add.
+
+ Computes `a := xy+z` without any intermediate rounding of the
+ product.
+ "#,
+ &formats.ternary,
+ )
+ .operands_in(vec![x, y, z])
+ .operands_out(vec![a]),
+ );
+
+ let a = &Operand::new("a", Float).with_doc("``x`` with its sign bit inverted");
+
+ ig.push(
+ Inst::new(
+ "fneg",
+ r#"
+ Floating point negation.
+
+ Note that this is a pure bitwise operation.
+ "#,
+ &formats.unary,
+ )
+ .operands_in(vec![x])
+ .operands_out(vec![a]),
+ );
+
+ let a = &Operand::new("a", Float).with_doc("``x`` with its sign bit cleared");
+
+ ig.push(
+ Inst::new(
+ "fabs",
+ r#"
+ Floating point absolute value.
+
+ Note that this is a pure bitwise operation.
+ "#,
+ &formats.unary,
+ )
+ .operands_in(vec![x])
+ .operands_out(vec![a]),
+ );
+
+ let a = &Operand::new("a", Float).with_doc("``x`` with its sign bit changed to that of ``y``");
+
+ ig.push(
+ Inst::new(
+ "fcopysign",
+ r#"
+ Floating point copy sign.
+
+ Note that this is a pure bitwise operation. The sign bit from ``y`` is
+ copied to the sign bit of ``x``.
+ "#,
+ &formats.binary,
+ )
+ .operands_in(vec![x, y])
+ .operands_out(vec![a]),
+ );
+
+ let a = &Operand::new("a", Float).with_doc("The smaller of ``x`` and ``y``");
+
+ ig.push(
+ Inst::new(
+ "fmin",
+ r#"
+ Floating point minimum, propagating NaNs.
+
+ If either operand is NaN, this returns a NaN.
+ "#,
+ &formats.binary,
+ )
+ .operands_in(vec![x, y])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "fmin_pseudo",
+ r#"
+ Floating point pseudo-minimum, propagating NaNs. This behaves differently from ``fmin``.
+ See https://github.com/WebAssembly/simd/pull/122 for background.
+
+ The behaviour is defined as ``fmin_pseudo(a, b) = (b < a) ? b : a``, and the behaviour
+ for zero or NaN inputs follows from the behaviour of ``<`` with such inputs.
+ "#,
+ &formats.binary,
+ )
+ .operands_in(vec![x, y])
+ .operands_out(vec![a]),
+ );
+
+ let a = &Operand::new("a", Float).with_doc("The larger of ``x`` and ``y``");
+
+ ig.push(
+ Inst::new(
+ "fmax",
+ r#"
+ Floating point maximum, propagating NaNs.
+
+ If either operand is NaN, this returns a NaN.
+ "#,
+ &formats.binary,
+ )
+ .operands_in(vec![x, y])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "fmax_pseudo",
+ r#"
+ Floating point pseudo-maximum, propagating NaNs. This behaves differently from ``fmax``.
+ See https://github.com/WebAssembly/simd/pull/122 for background.
+
+ The behaviour is defined as ``fmax_pseudo(a, b) = (a < b) ? b : a``, and the behaviour
+ for zero or NaN inputs follows from the behaviour of ``<`` with such inputs.
+ "#,
+ &formats.binary,
+ )
+ .operands_in(vec![x, y])
+ .operands_out(vec![a]),
+ );
+
+ let a = &Operand::new("a", Float).with_doc("``x`` rounded to integral value");
+
+ ig.push(
+ Inst::new(
+ "ceil",
+ r#"
+ Round floating point round to integral, towards positive infinity.
+ "#,
+ &formats.unary,
+ )
+ .operands_in(vec![x])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "floor",
+ r#"
+ Round floating point round to integral, towards negative infinity.
+ "#,
+ &formats.unary,
+ )
+ .operands_in(vec![x])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "trunc",
+ r#"
+ Round floating point round to integral, towards zero.
+ "#,
+ &formats.unary,
+ )
+ .operands_in(vec![x])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "nearest",
+ r#"
+ Round floating point round to integral, towards nearest with ties to
+ even.
+ "#,
+ &formats.unary,
+ )
+ .operands_in(vec![x])
+ .operands_out(vec![a]),
+ );
+
+ let a = &Operand::new("a", b1);
+ let x = &Operand::new("x", Ref);
+
+ ig.push(
+ Inst::new(
+ "is_null",
+ r#"
+ Reference verification.
+
+ The condition code determines if the reference type in question is
+ null or not.
+ "#,
+ &formats.unary,
+ )
+ .operands_in(vec![x])
+ .operands_out(vec![a]),
+ );
+
+ let a = &Operand::new("a", b1);
+ let x = &Operand::new("x", Ref);
+
+ ig.push(
+ Inst::new(
+ "is_invalid",
+ r#"
+ Reference verification.
+
+ The condition code determines if the reference type in question is
+ invalid or not.
+ "#,
+ &formats.unary,
+ )
+ .operands_in(vec![x])
+ .operands_out(vec![a]),
+ );
+
+ let Cond = &Operand::new("Cond", &imm.intcc);
+ let f = &Operand::new("f", iflags);
+ let a = &Operand::new("a", b1);
+
+ ig.push(
+ Inst::new(
+ "trueif",
+ r#"
+ Test integer CPU flags for a specific condition.
+
+ Check the CPU flags in ``f`` against the ``Cond`` condition code and
+ return true when the condition code is satisfied.
+ "#,
+ &formats.int_cond,
+ )
+ .operands_in(vec![Cond, f])
+ .operands_out(vec![a]),
+ );
+
+ let Cond = &Operand::new("Cond", &imm.floatcc);
+ let f = &Operand::new("f", fflags);
+
+ ig.push(
+ Inst::new(
+ "trueff",
+ r#"
+ Test floating point CPU flags for a specific condition.
+
+ Check the CPU flags in ``f`` against the ``Cond`` condition code and
+ return true when the condition code is satisfied.
+ "#,
+ &formats.float_cond,
+ )
+ .operands_in(vec![Cond, f])
+ .operands_out(vec![a]),
+ );
+
+ let x = &Operand::new("x", Mem);
+ let a = &Operand::new("a", MemTo).with_doc("Bits of `x` reinterpreted");
+
+ ig.push(
+ Inst::new(
+ "bitcast",
+ r#"
+ Reinterpret the bits in `x` as a different type.
+
+ The input and output types must be storable to memory and of the same
+ size. A bitcast is equivalent to storing one type and loading the other
+ type from the same address.
+ "#,
+ &formats.unary,
+ )
+ .operands_in(vec![x])
+ .operands_out(vec![a]),
+ );
+
+ let x = &Operand::new("x", Any);
+ let a = &Operand::new("a", AnyTo).with_doc("Bits of `x` reinterpreted");
+
+ ig.push(
+ Inst::new(
+ "raw_bitcast",
+ r#"
+ Cast the bits in `x` as a different type of the same bit width.
+
+ This instruction does not change the data's representation but allows
+ data in registers to be used as different types, e.g. an i32x4 as a
+ b8x16. The only constraint on the result `a` is that it can be
+ `raw_bitcast` back to the original type. Also, in a raw_bitcast between
+ vector types with the same number of lanes, the value of each result
+ lane is a raw_bitcast of the corresponding operand lane. TODO there is
+ currently no mechanism for enforcing the bit width constraint.
+ "#,
+ &formats.unary,
+ )
+ .operands_in(vec![x])
+ .operands_out(vec![a]),
+ );
+
+ let a = &Operand::new("a", TxN).with_doc("A vector value");
+ let s = &Operand::new("s", &TxN.lane_of()).with_doc("A scalar value");
+
+ ig.push(
+ Inst::new(
+ "scalar_to_vector",
+ r#"
+ Copies a scalar value to a vector value. The scalar is copied into the
+ least significant lane of the vector, and all other lanes will be zero.
+ "#,
+ &formats.unary,
+ )
+ .operands_in(vec![s])
+ .operands_out(vec![a]),
+ );
+
+ let Bool = &TypeVar::new(
+ "Bool",
+ "A scalar or vector boolean type",
+ TypeSetBuilder::new()
+ .bools(Interval::All)
+ .simd_lanes(Interval::All)
+ .build(),
+ );
+
+ let BoolTo = &TypeVar::new(
+ "BoolTo",
+ "A smaller boolean type with the same number of lanes",
+ TypeSetBuilder::new()
+ .bools(Interval::All)
+ .simd_lanes(Interval::All)
+ .build(),
+ );
+
+ let x = &Operand::new("x", Bool);
+ let a = &Operand::new("a", BoolTo);
+
+ ig.push(
+ Inst::new(
+ "breduce",
+ r#"
+ Convert `x` to a smaller boolean type in the platform-defined way.
+
+ The result type must have the same number of vector lanes as the input,
+ and each lane must not have more bits that the input lanes. If the
+ input and output types are the same, this is a no-op.
+ "#,
+ &formats.unary,
+ )
+ .operands_in(vec![x])
+ .operands_out(vec![a])
+ .constraints(vec![WiderOrEq(Bool.clone(), BoolTo.clone())]),
+ );
+
+ let BoolTo = &TypeVar::new(
+ "BoolTo",
+ "A larger boolean type with the same number of lanes",
+ TypeSetBuilder::new()
+ .bools(Interval::All)
+ .simd_lanes(Interval::All)
+ .build(),
+ );
+ let x = &Operand::new("x", Bool);
+ let a = &Operand::new("a", BoolTo);
+
+ ig.push(
+ Inst::new(
+ "bextend",
+ r#"
+ Convert `x` to a larger boolean type in the platform-defined way.
+
+ The result type must have the same number of vector lanes as the input,
+ and each lane must not have fewer bits that the input lanes. If the
+ input and output types are the same, this is a no-op.
+ "#,
+ &formats.unary,
+ )
+ .operands_in(vec![x])
+ .operands_out(vec![a])
+ .constraints(vec![WiderOrEq(BoolTo.clone(), Bool.clone())]),
+ );
+
+ let IntTo = &TypeVar::new(
+ "IntTo",
+ "An integer type with the same number of lanes",
+ TypeSetBuilder::new()
+ .ints(Interval::All)
+ .simd_lanes(Interval::All)
+ .build(),
+ );
+ let x = &Operand::new("x", Bool);
+ let a = &Operand::new("a", IntTo);
+
+ ig.push(
+ Inst::new(
+ "bint",
+ r#"
+ Convert `x` to an integer.
+
+ True maps to 1 and false maps to 0. The result type must have the same
+ number of vector lanes as the input.
+ "#,
+ &formats.unary,
+ )
+ .operands_in(vec![x])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "bmask",
+ r#"
+ Convert `x` to an integer mask.
+
+ True maps to all 1s and false maps to all 0s. The result type must have
+ the same number of vector lanes as the input.
+ "#,
+ &formats.unary,
+ )
+ .operands_in(vec![x])
+ .operands_out(vec![a]),
+ );
+
+ let Int = &TypeVar::new(
+ "Int",
+ "A scalar or vector integer type",
+ TypeSetBuilder::new()
+ .ints(Interval::All)
+ .simd_lanes(Interval::All)
+ .build(),
+ );
+
+ let IntTo = &TypeVar::new(
+ "IntTo",
+ "A smaller integer type with the same number of lanes",
+ TypeSetBuilder::new()
+ .ints(Interval::All)
+ .simd_lanes(Interval::All)
+ .build(),
+ );
+ let x = &Operand::new("x", Int);
+ let a = &Operand::new("a", IntTo);
+
+ ig.push(
+ Inst::new(
+ "ireduce",
+ r#"
+ Convert `x` to a smaller integer type by dropping high bits.
+
+ Each lane in `x` is converted to a smaller integer type by discarding
+ the most significant bits. This is the same as reducing modulo
+ `2^n`.
+
+ The result type must have the same number of vector lanes as the input,
+ and each lane must not have more bits that the input lanes. If the
+ input and output types are the same, this is a no-op.
+ "#,
+ &formats.unary,
+ )
+ .operands_in(vec![x])
+ .operands_out(vec![a])
+ .constraints(vec![WiderOrEq(Int.clone(), IntTo.clone())]),
+ );
+
+ let I16or32xN = &TypeVar::new(
+ "I16or32xN",
+ "A SIMD vector type containing integer lanes 16 or 32 bits wide",
+ TypeSetBuilder::new()
+ .ints(16..32)
+ .simd_lanes(4..8)
+ .includes_scalars(false)
+ .build(),
+ );
+
+ let x = &Operand::new("x", I16or32xN);
+ let y = &Operand::new("y", I16or32xN);
+ let a = &Operand::new("a", &I16or32xN.split_lanes());
+
+ ig.push(
+ Inst::new(
+ "snarrow",
+ r#"
+ Combine `x` and `y` into a vector with twice the lanes but half the integer width while
+ saturating overflowing values to the signed maximum and minimum.
+
+ The lanes will be concatenated after narrowing. For example, when `x` and `y` are `i32x4`
+ and `x = [x3, x2, x1, x0]` and `y = [y3, y2, y1, y0]`, then after narrowing the value
+ returned is an `i16x8`: `a = [y3', y2', y1', y0', x3', x2', x1', x0']`.
+ "#,
+ &formats.binary,
+ )
+ .operands_in(vec![x, y])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "unarrow",
+ r#"
+ Combine `x` and `y` into a vector with twice the lanes but half the integer width while
+ saturating overflowing values to the unsigned maximum and minimum.
+
+ Note that all input lanes are considered signed: any negative lanes will overflow and be
+ replaced with the unsigned minimum, `0x00`.
+
+ The lanes will be concatenated after narrowing. For example, when `x` and `y` are `i32x4`
+ and `x = [x3, x2, x1, x0]` and `y = [y3, y2, y1, y0]`, then after narrowing the value
+ returned is an `i16x8`: `a = [y3', y2', y1', y0', x3', x2', x1', x0']`.
+ "#,
+ &formats.binary,
+ )
+ .operands_in(vec![x, y])
+ .operands_out(vec![a]),
+ );
+
+ let I8or16xN = &TypeVar::new(
+ "I8or16xN",
+ "A SIMD vector type containing integer lanes 8 or 16 bits wide.",
+ TypeSetBuilder::new()
+ .ints(8..16)
+ .simd_lanes(8..16)
+ .includes_scalars(false)
+ .build(),
+ );
+
+ let x = &Operand::new("x", I8or16xN);
+ let a = &Operand::new("a", &I8or16xN.merge_lanes());
+
+ ig.push(
+ Inst::new(
+ "swiden_low",
+ r#"
+ Widen the low lanes of `x` using signed extension.
+
+ This will double the lane width and halve the number of lanes.
+ "#,
+ &formats.unary,
+ )
+ .operands_in(vec![x])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "swiden_high",
+ r#"
+ Widen the high lanes of `x` using signed extension.
+
+ This will double the lane width and halve the number of lanes.
+ "#,
+ &formats.unary,
+ )
+ .operands_in(vec![x])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "uwiden_low",
+ r#"
+ Widen the low lanes of `x` using unsigned extension.
+
+ This will double the lane width and halve the number of lanes.
+ "#,
+ &formats.unary,
+ )
+ .operands_in(vec![x])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "uwiden_high",
+ r#"
+ Widen the high lanes of `x` using unsigned extension.
+
+ This will double the lane width and halve the number of lanes.
+ "#,
+ &formats.unary,
+ )
+ .operands_in(vec![x])
+ .operands_out(vec![a]),
+ );
+
+ let I16x8 = &TypeVar::new(
+ "I16x8",
+ "A SIMD vector type containing 8 integer lanes each 16 bits wide.",
+ TypeSetBuilder::new()
+ .ints(16..16)
+ .simd_lanes(8..8)
+ .includes_scalars(false)
+ .build(),
+ );
+
+ let x = &Operand::new("x", I16x8);
+ let y = &Operand::new("y", I16x8);
+ let a = &Operand::new("a", &I16x8.merge_lanes());
+
+ ig.push(
+ Inst::new(
+ "widening_pairwise_dot_product_s",
+ r#"
+ Takes corresponding elements in `x` and `y`, performs a sign-extending length-doubling
+ multiplication on them, then adds adjacent pairs of elements to form the result. For
+ example, if the input vectors are `[x3, x2, x1, x0]` and `[y3, y2, y1, y0]`, it produces
+ the vector `[r1, r0]`, where `r1 = sx(x3) * sx(y3) + sx(x2) * sx(y2)` and
+ `r0 = sx(x1) * sx(y1) + sx(x0) * sx(y0)`, and `sx(n)` sign-extends `n` to twice its width.
+
+ This will double the lane width and halve the number of lanes. So the resulting
+ vector has the same number of bits as `x` and `y` do (individually).
+
+ See https://github.com/WebAssembly/simd/pull/127 for background info.
+ "#,
+ &formats.binary,
+ )
+ .operands_in(vec![x, y])
+ .operands_out(vec![a]),
+ );
+
+ let IntTo = &TypeVar::new(
+ "IntTo",
+ "A larger integer type with the same number of lanes",
+ TypeSetBuilder::new()
+ .ints(Interval::All)
+ .simd_lanes(Interval::All)
+ .build(),
+ );
+ let x = &Operand::new("x", Int);
+ let a = &Operand::new("a", IntTo);
+
+ ig.push(
+ Inst::new(
+ "uextend",
+ r#"
+ Convert `x` to a larger integer type by zero-extending.
+
+ Each lane in `x` is converted to a larger integer type by adding
+ zeroes. The result has the same numerical value as `x` when both are
+ interpreted as unsigned integers.
+
+ The result type must have the same number of vector lanes as the input,
+ and each lane must not have fewer bits that the input lanes. If the
+ input and output types are the same, this is a no-op.
+ "#,
+ &formats.unary,
+ )
+ .operands_in(vec![x])
+ .operands_out(vec![a])
+ .constraints(vec![WiderOrEq(IntTo.clone(), Int.clone())]),
+ );
+
+ ig.push(
+ Inst::new(
+ "sextend",
+ r#"
+ Convert `x` to a larger integer type by sign-extending.
+
+ Each lane in `x` is converted to a larger integer type by replicating
+ the sign bit. The result has the same numerical value as `x` when both
+ are interpreted as signed integers.
+
+ The result type must have the same number of vector lanes as the input,
+ and each lane must not have fewer bits that the input lanes. If the
+ input and output types are the same, this is a no-op.
+ "#,
+ &formats.unary,
+ )
+ .operands_in(vec![x])
+ .operands_out(vec![a])
+ .constraints(vec![WiderOrEq(IntTo.clone(), Int.clone())]),
+ );
+
+ let FloatTo = &TypeVar::new(
+ "FloatTo",
+ "A scalar or vector floating point number",
+ TypeSetBuilder::new()
+ .floats(Interval::All)
+ .simd_lanes(Interval::All)
+ .build(),
+ );
+ let x = &Operand::new("x", Float);
+ let a = &Operand::new("a", FloatTo);
+
+ ig.push(
+ Inst::new(
+ "fpromote",
+ r#"
+ Convert `x` to a larger floating point format.
+
+ Each lane in `x` is converted to the destination floating point format.
+ This is an exact operation.
+
+ Cranelift currently only supports two floating point formats
+ - `f32` and `f64`. This may change in the future.
+
+ The result type must have the same number of vector lanes as the input,
+ and the result lanes must not have fewer bits than the input lanes. If
+ the input and output types are the same, this is a no-op.
+ "#,
+ &formats.unary,
+ )
+ .operands_in(vec![x])
+ .operands_out(vec![a])
+ .constraints(vec![WiderOrEq(FloatTo.clone(), Float.clone())]),
+ );
+
+ ig.push(
+ Inst::new(
+ "fdemote",
+ r#"
+ Convert `x` to a smaller floating point format.
+
+ Each lane in `x` is converted to the destination floating point format
+ by rounding to nearest, ties to even.
+
+ Cranelift currently only supports two floating point formats
+ - `f32` and `f64`. This may change in the future.
+
+ The result type must have the same number of vector lanes as the input,
+ and the result lanes must not have more bits than the input lanes. If
+ the input and output types are the same, this is a no-op.
+ "#,
+ &formats.unary,
+ )
+ .operands_in(vec![x])
+ .operands_out(vec![a])
+ .constraints(vec![WiderOrEq(Float.clone(), FloatTo.clone())]),
+ );
+
+ let x = &Operand::new("x", Float);
+ let a = &Operand::new("a", IntTo);
+
+ ig.push(
+ Inst::new(
+ "fcvt_to_uint",
+ r#"
+ Convert floating point to unsigned integer.
+
+ Each lane in `x` is converted to an unsigned integer by rounding
+ towards zero. If `x` is NaN or if the unsigned integral value cannot be
+ represented in the result type, this instruction traps.
+
+ The result type must have the same number of vector lanes as the input.
+ "#,
+ &formats.unary,
+ )
+ .operands_in(vec![x])
+ .operands_out(vec![a])
+ .can_trap(true),
+ );
+
+ ig.push(
+ Inst::new(
+ "fcvt_to_uint_sat",
+ r#"
+ Convert floating point to unsigned integer as fcvt_to_uint does, but
+ saturates the input instead of trapping. NaN and negative values are
+ converted to 0.
+ "#,
+ &formats.unary,
+ )
+ .operands_in(vec![x])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "fcvt_to_sint",
+ r#"
+ Convert floating point to signed integer.
+
+ Each lane in `x` is converted to a signed integer by rounding towards
+ zero. If `x` is NaN or if the signed integral value cannot be
+ represented in the result type, this instruction traps.
+
+ The result type must have the same number of vector lanes as the input.
+ "#,
+ &formats.unary,
+ )
+ .operands_in(vec![x])
+ .operands_out(vec![a])
+ .can_trap(true),
+ );
+
+ ig.push(
+ Inst::new(
+ "fcvt_to_sint_sat",
+ r#"
+ Convert floating point to signed integer as fcvt_to_sint does, but
+ saturates the input instead of trapping. NaN values are converted to 0.
+ "#,
+ &formats.unary,
+ )
+ .operands_in(vec![x])
+ .operands_out(vec![a]),
+ );
+
+ let x = &Operand::new("x", Int);
+ let a = &Operand::new("a", FloatTo);
+
+ ig.push(
+ Inst::new(
+ "fcvt_from_uint",
+ r#"
+ Convert unsigned integer to floating point.
+
+ Each lane in `x` is interpreted as an unsigned integer and converted to
+ floating point using round to nearest, ties to even.
+
+ The result type must have the same number of vector lanes as the input.
+ "#,
+ &formats.unary,
+ )
+ .operands_in(vec![x])
+ .operands_out(vec![a]),
+ );
+
+ ig.push(
+ Inst::new(
+ "fcvt_from_sint",
+ r#"
+ Convert signed integer to floating point.
+
+ Each lane in `x` is interpreted as a signed integer and converted to
+ floating point using round to nearest, ties to even.
+
+ The result type must have the same number of vector lanes as the input.
+ "#,
+ &formats.unary,
+ )
+ .operands_in(vec![x])
+ .operands_out(vec![a]),
+ );
+
+ let WideInt = &TypeVar::new(
+ "WideInt",
+ "An integer type with lanes from `i16` upwards",
+ TypeSetBuilder::new()
+ .ints(16..128)
+ .simd_lanes(Interval::All)
+ .build(),
+ );
+ let x = &Operand::new("x", WideInt);
+ let lo = &Operand::new("lo", &WideInt.half_width()).with_doc("The low bits of `x`");
+ let hi = &Operand::new("hi", &WideInt.half_width()).with_doc("The high bits of `x`");
+
+ ig.push(
+ Inst::new(
+ "isplit",
+ r#"
+ Split an integer into low and high parts.
+
+ Vectors of integers are split lane-wise, so the results have the same
+ number of lanes as the input, but the lanes are half the size.
+
+ Returns the low half of `x` and the high half of `x` as two independent
+ values.
+ "#,
+ &formats.unary,
+ )
+ .operands_in(vec![x])
+ .operands_out(vec![lo, hi])
+ .is_ghost(true),
+ );
+
+ let NarrowInt = &TypeVar::new(
+ "NarrowInt",
+ "An integer type with lanes type to `i64`",
+ TypeSetBuilder::new()
+ .ints(8..64)
+ .simd_lanes(Interval::All)
+ .build(),
+ );
+
+ let lo = &Operand::new("lo", NarrowInt);
+ let hi = &Operand::new("hi", NarrowInt);
+ let a = &Operand::new("a", &NarrowInt.double_width())
+ .with_doc("The concatenation of `lo` and `hi`");
+
+ ig.push(
+ Inst::new(
+ "iconcat",
+ r#"
+ Concatenate low and high bits to form a larger integer type.
+
+ Vectors of integers are concatenated lane-wise such that the result has
+ the same number of lanes as the inputs, but the lanes are twice the
+ size.
+ "#,
+ &formats.binary,
+ )
+ .operands_in(vec![lo, hi])
+ .operands_out(vec![a])
+ .is_ghost(true),
+ );
+
+ // Instructions relating to atomic memory accesses and fences
+ let AtomicMem = &TypeVar::new(
+ "AtomicMem",
+ "Any type that can be stored in memory, which can be used in an atomic operation",
+ TypeSetBuilder::new().ints(8..64).build(),
+ );
+ let x = &Operand::new("x", AtomicMem).with_doc("Value to be atomically stored");
+ let a = &Operand::new("a", AtomicMem).with_doc("Value atomically loaded");
+ let e = &Operand::new("e", AtomicMem).with_doc("Expected value in CAS");
+ let p = &Operand::new("p", iAddr);
+ let MemFlags = &Operand::new("MemFlags", &imm.memflags);
+ let AtomicRmwOp = &Operand::new("AtomicRmwOp", &imm.atomic_rmw_op);
+
+ ig.push(
+ Inst::new(
+ "atomic_rmw",
+ r#"
+ Atomically read-modify-write memory at `p`, with second operand `x`. The old value is
+ returned. `p` has the type of the target word size, and `x` may be an integer type of
+ 8, 16, 32 or 64 bits, even on a 32-bit target. The type of the returned value is the
+ same as the type of `x`. This operation is sequentially consistent and creates
+ happens-before edges that order normal (non-atomic) loads and stores.
+ "#,
+ &formats.atomic_rmw,
+ )
+ .operands_in(vec![MemFlags, AtomicRmwOp, p, x])
+ .operands_out(vec![a])
+ .can_load(true)
+ .can_store(true)
+ .other_side_effects(true),
+ );
+
+ ig.push(
+ Inst::new(
+ "atomic_cas",
+ r#"
+ Perform an atomic compare-and-swap operation on memory at `p`, with expected value `e`,
+ storing `x` if the value at `p` equals `e`. The old value at `p` is returned,
+ regardless of whether the operation succeeds or fails. `p` has the type of the target
+ word size, and `x` and `e` must have the same type and the same size, which may be an
+ integer type of 8, 16, 32 or 64 bits, even on a 32-bit target. The type of the returned
+ value is the same as the type of `x` and `e`. This operation is sequentially
+ consistent and creates happens-before edges that order normal (non-atomic) loads and
+ stores.
+ "#,
+ &formats.atomic_cas,
+ )
+ .operands_in(vec![MemFlags, p, e, x])
+ .operands_out(vec![a])
+ .can_load(true)
+ .can_store(true)
+ .other_side_effects(true),
+ );
+
+ ig.push(
+ Inst::new(
+ "atomic_load",
+ r#"
+ Atomically load from memory at `p`.
+
+ This is a polymorphic instruction that can load any value type which has a memory
+ representation. It should only be used for integer types with 8, 16, 32 or 64 bits.
+ This operation is sequentially consistent and creates happens-before edges that order
+ normal (non-atomic) loads and stores.
+ "#,
+ &formats.load_no_offset,
+ )
+ .operands_in(vec![MemFlags, p])
+ .operands_out(vec![a])
+ .can_load(true)
+ .other_side_effects(true),
+ );
+
+ ig.push(
+ Inst::new(
+ "atomic_store",
+ r#"
+ Atomically store `x` to memory at `p`.
+
+ This is a polymorphic instruction that can store any value type with a memory
+ representation. It should only be used for integer types with 8, 16, 32 or 64 bits.
+ This operation is sequentially consistent and creates happens-before edges that order
+ normal (non-atomic) loads and stores.
+ "#,
+ &formats.store_no_offset,
+ )
+ .operands_in(vec![MemFlags, x, p])
+ .can_store(true)
+ .other_side_effects(true),
+ );
+
+ ig.push(
+ Inst::new(
+ "fence",
+ r#"
+ A memory fence. This must provide ordering to ensure that, at a minimum, neither loads
+ nor stores of any kind may move forwards or backwards across the fence. This operation
+ is sequentially consistent.
+ "#,
+ &formats.nullary,
+ )
+ .other_side_effects(true),
+ );
+
+ let Offset = &Operand::new("Offset", &imm.offset32).with_doc("Byte offset from base address");
+ let a = &Operand::new("a", TxN);
+
+ ig.push(
+ Inst::new(
+ "load_splat",
+ r#"
+ Load an element from memory at ``p + Offset`` and return a vector
+ whose lanes are all set to that element.
+
+ This is equivalent to ``load`` followed by ``splat``.
+ "#,
+ &formats.load,
+ )
+ .operands_in(vec![MemFlags, p, Offset])
+ .operands_out(vec![a])
+ .can_load(true),
+ );
+
+ ig.build()
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/shared/legalize.rs b/third_party/rust/cranelift-codegen-meta/src/shared/legalize.rs
new file mode 100644
index 0000000000..9a0d6cffde
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/shared/legalize.rs
@@ -0,0 +1,1087 @@
+use crate::cdsl::ast::{var, ExprBuilder, Literal};
+use crate::cdsl::instructions::{Bindable, Instruction, InstructionGroup};
+use crate::cdsl::xform::{TransformGroupBuilder, TransformGroups};
+
+use crate::shared::immediates::Immediates;
+use crate::shared::types::Float::{F32, F64};
+use crate::shared::types::Int::{I128, I16, I32, I64, I8};
+use cranelift_codegen_shared::condcodes::{CondCode, IntCC};
+
+#[allow(clippy::many_single_char_names, clippy::cognitive_complexity)]
+pub(crate) fn define(insts: &InstructionGroup, imm: &Immediates) -> TransformGroups {
+ let mut narrow = TransformGroupBuilder::new(
+ "narrow",
+ r#"
+ Legalize instructions by narrowing.
+
+ The transformations in the 'narrow' group work by expressing
+ instructions in terms of smaller types. Operations on vector types are
+ expressed in terms of vector types with fewer lanes, and integer
+ operations are expressed in terms of smaller integer types.
+ "#,
+ );
+
+ let mut widen = TransformGroupBuilder::new(
+ "widen",
+ r#"
+ Legalize instructions by widening.
+
+ The transformations in the 'widen' group work by expressing
+ instructions in terms of larger types.
+ "#,
+ );
+
+ let mut expand = TransformGroupBuilder::new(
+ "expand",
+ r#"
+ Legalize instructions by expansion.
+
+ Rewrite instructions in terms of other instructions, generally
+ operating on the same types as the original instructions.
+ "#,
+ );
+
+ // List of instructions.
+ let band = insts.by_name("band");
+ let band_imm = insts.by_name("band_imm");
+ let band_not = insts.by_name("band_not");
+ let bint = insts.by_name("bint");
+ let bitrev = insts.by_name("bitrev");
+ let bnot = insts.by_name("bnot");
+ let bor = insts.by_name("bor");
+ let bor_imm = insts.by_name("bor_imm");
+ let bor_not = insts.by_name("bor_not");
+ let brnz = insts.by_name("brnz");
+ let brz = insts.by_name("brz");
+ let br_icmp = insts.by_name("br_icmp");
+ let br_table = insts.by_name("br_table");
+ let bxor = insts.by_name("bxor");
+ let bxor_imm = insts.by_name("bxor_imm");
+ let bxor_not = insts.by_name("bxor_not");
+ let cls = insts.by_name("cls");
+ let clz = insts.by_name("clz");
+ let ctz = insts.by_name("ctz");
+ let copy = insts.by_name("copy");
+ let fabs = insts.by_name("fabs");
+ let f32const = insts.by_name("f32const");
+ let f64const = insts.by_name("f64const");
+ let fcopysign = insts.by_name("fcopysign");
+ let fcvt_from_sint = insts.by_name("fcvt_from_sint");
+ let fneg = insts.by_name("fneg");
+ let iadd = insts.by_name("iadd");
+ let iadd_cin = insts.by_name("iadd_cin");
+ let iadd_cout = insts.by_name("iadd_cout");
+ let iadd_carry = insts.by_name("iadd_carry");
+ let iadd_ifcin = insts.by_name("iadd_ifcin");
+ let iadd_ifcout = insts.by_name("iadd_ifcout");
+ let iadd_imm = insts.by_name("iadd_imm");
+ let icmp = insts.by_name("icmp");
+ let icmp_imm = insts.by_name("icmp_imm");
+ let iconcat = insts.by_name("iconcat");
+ let iconst = insts.by_name("iconst");
+ let ifcmp = insts.by_name("ifcmp");
+ let ifcmp_imm = insts.by_name("ifcmp_imm");
+ let imul = insts.by_name("imul");
+ let imul_imm = insts.by_name("imul_imm");
+ let ireduce = insts.by_name("ireduce");
+ let irsub_imm = insts.by_name("irsub_imm");
+ let ishl = insts.by_name("ishl");
+ let ishl_imm = insts.by_name("ishl_imm");
+ let isplit = insts.by_name("isplit");
+ let istore8 = insts.by_name("istore8");
+ let istore16 = insts.by_name("istore16");
+ let isub = insts.by_name("isub");
+ let isub_bin = insts.by_name("isub_bin");
+ let isub_bout = insts.by_name("isub_bout");
+ let isub_borrow = insts.by_name("isub_borrow");
+ let isub_ifbin = insts.by_name("isub_ifbin");
+ let isub_ifbout = insts.by_name("isub_ifbout");
+ let jump = insts.by_name("jump");
+ let load = insts.by_name("load");
+ let popcnt = insts.by_name("popcnt");
+ let resumable_trapnz = insts.by_name("resumable_trapnz");
+ let rotl = insts.by_name("rotl");
+ let rotl_imm = insts.by_name("rotl_imm");
+ let rotr = insts.by_name("rotr");
+ let rotr_imm = insts.by_name("rotr_imm");
+ let sdiv = insts.by_name("sdiv");
+ let sdiv_imm = insts.by_name("sdiv_imm");
+ let select = insts.by_name("select");
+ let sextend = insts.by_name("sextend");
+ let sshr = insts.by_name("sshr");
+ let sshr_imm = insts.by_name("sshr_imm");
+ let srem = insts.by_name("srem");
+ let srem_imm = insts.by_name("srem_imm");
+ let store = insts.by_name("store");
+ let udiv = insts.by_name("udiv");
+ let udiv_imm = insts.by_name("udiv_imm");
+ let uextend = insts.by_name("uextend");
+ let uload8 = insts.by_name("uload8");
+ let uload16 = insts.by_name("uload16");
+ let umulhi = insts.by_name("umulhi");
+ let ushr = insts.by_name("ushr");
+ let ushr_imm = insts.by_name("ushr_imm");
+ let urem = insts.by_name("urem");
+ let urem_imm = insts.by_name("urem_imm");
+ let trapif = insts.by_name("trapif");
+ let trapnz = insts.by_name("trapnz");
+ let trapz = insts.by_name("trapz");
+
+ // Custom expansions for memory objects.
+ expand.custom_legalize(insts.by_name("global_value"), "expand_global_value");
+ expand.custom_legalize(insts.by_name("heap_addr"), "expand_heap_addr");
+ expand.custom_legalize(insts.by_name("table_addr"), "expand_table_addr");
+
+ // Custom expansions for calls.
+ expand.custom_legalize(insts.by_name("call"), "expand_call");
+
+ // Custom expansions that need to change the CFG.
+ // TODO: Add sufficient XForm syntax that we don't need to hand-code these.
+ expand.custom_legalize(trapz, "expand_cond_trap");
+ expand.custom_legalize(trapnz, "expand_cond_trap");
+ expand.custom_legalize(resumable_trapnz, "expand_cond_trap");
+ expand.custom_legalize(br_table, "expand_br_table");
+ expand.custom_legalize(select, "expand_select");
+ widen.custom_legalize(select, "expand_select"); // small ints
+
+ // Custom expansions for floating point constants.
+ // These expansions require bit-casting or creating constant pool entries.
+ expand.custom_legalize(f32const, "expand_fconst");
+ expand.custom_legalize(f64const, "expand_fconst");
+
+ // Custom expansions for stack memory accesses.
+ expand.custom_legalize(insts.by_name("stack_load"), "expand_stack_load");
+ expand.custom_legalize(insts.by_name("stack_store"), "expand_stack_store");
+
+ // Custom expansions for small stack memory acccess.
+ widen.custom_legalize(insts.by_name("stack_load"), "expand_stack_load");
+ widen.custom_legalize(insts.by_name("stack_store"), "expand_stack_store");
+
+ // List of variables to reuse in patterns.
+ let x = var("x");
+ let y = var("y");
+ let z = var("z");
+ let a = var("a");
+ let a1 = var("a1");
+ let a2 = var("a2");
+ let a3 = var("a3");
+ let a4 = var("a4");
+ let b = var("b");
+ let b1 = var("b1");
+ let b2 = var("b2");
+ let b3 = var("b3");
+ let b4 = var("b4");
+ let b_in = var("b_in");
+ let b_int = var("b_int");
+ let c = var("c");
+ let c1 = var("c1");
+ let c2 = var("c2");
+ let c3 = var("c3");
+ let c4 = var("c4");
+ let c_in = var("c_in");
+ let c_int = var("c_int");
+ let d = var("d");
+ let d1 = var("d1");
+ let d2 = var("d2");
+ let d3 = var("d3");
+ let d4 = var("d4");
+ let e = var("e");
+ let e1 = var("e1");
+ let e2 = var("e2");
+ let e3 = var("e3");
+ let e4 = var("e4");
+ let f = var("f");
+ let f1 = var("f1");
+ let f2 = var("f2");
+ let xl = var("xl");
+ let xh = var("xh");
+ let yl = var("yl");
+ let yh = var("yh");
+ let al = var("al");
+ let ah = var("ah");
+ let cc = var("cc");
+ let block = var("block");
+ let ptr = var("ptr");
+ let flags = var("flags");
+ let offset = var("off");
+ let vararg = var("vararg");
+
+ narrow.custom_legalize(load, "narrow_load");
+ narrow.custom_legalize(store, "narrow_store");
+
+ // iconst.i64 can't be legalized in the meta langage (because integer literals can't be
+ // embedded as part of arguments), so use a custom legalization for now.
+ narrow.custom_legalize(iconst, "narrow_iconst");
+
+ for &(ty, ty_half) in &[(I128, I64), (I64, I32)] {
+ let inst = uextend.bind(ty).bind(ty_half);
+ narrow.legalize(
+ def!(a = inst(x)),
+ vec![
+ def!(ah = iconst(Literal::constant(&imm.imm64, 0))),
+ def!(a = iconcat(x, ah)),
+ ],
+ );
+ }
+
+ for &(ty, ty_half, shift) in &[(I128, I64, 63), (I64, I32, 31)] {
+ let inst = sextend.bind(ty).bind(ty_half);
+ narrow.legalize(
+ def!(a = inst(x)),
+ vec![
+ def!(ah = sshr_imm(x, Literal::constant(&imm.imm64, shift))), // splat sign bit to whole number
+ def!(a = iconcat(x, ah)),
+ ],
+ );
+ }
+
+ for &bin_op in &[band, bor, bxor, band_not, bor_not, bxor_not] {
+ narrow.legalize(
+ def!(a = bin_op(x, y)),
+ vec![
+ def!((xl, xh) = isplit(x)),
+ def!((yl, yh) = isplit(y)),
+ def!(al = bin_op(xl, yl)),
+ def!(ah = bin_op(xh, yh)),
+ def!(a = iconcat(al, ah)),
+ ],
+ );
+ }
+
+ narrow.legalize(
+ def!(a = bnot(x)),
+ vec![
+ def!((xl, xh) = isplit(x)),
+ def!(al = bnot(xl)),
+ def!(ah = bnot(xh)),
+ def!(a = iconcat(al, ah)),
+ ],
+ );
+
+ narrow.legalize(
+ def!(a = select(c, x, y)),
+ vec![
+ def!((xl, xh) = isplit(x)),
+ def!((yl, yh) = isplit(y)),
+ def!(al = select(c, xl, yl)),
+ def!(ah = select(c, xh, yh)),
+ def!(a = iconcat(al, ah)),
+ ],
+ );
+
+ for &ty in &[I128, I64] {
+ let block = var("block");
+ let block1 = var("block1");
+ let block2 = var("block2");
+
+ narrow.legalize(
+ def!(brz.ty(x, block, vararg)),
+ vec![
+ def!((xl, xh) = isplit(x)),
+ def!(
+ a = icmp_imm(
+ Literal::enumerator_for(&imm.intcc, "eq"),
+ xl,
+ Literal::constant(&imm.imm64, 0)
+ )
+ ),
+ def!(
+ b = icmp_imm(
+ Literal::enumerator_for(&imm.intcc, "eq"),
+ xh,
+ Literal::constant(&imm.imm64, 0)
+ )
+ ),
+ def!(c = band(a, b)),
+ def!(brnz(c, block, vararg)),
+ ],
+ );
+
+ narrow.legalize(
+ def!(brnz.ty(x, block1, vararg)),
+ vec![
+ def!((xl, xh) = isplit(x)),
+ def!(brnz(xl, block1, vararg)),
+ def!(jump(block2, Literal::empty_vararg())),
+ block!(block2),
+ def!(brnz(xh, block1, vararg)),
+ ],
+ );
+ }
+
+ narrow.legalize(
+ def!(a = popcnt.I128(x)),
+ vec![
+ def!((xl, xh) = isplit(x)),
+ def!(e1 = popcnt(xl)),
+ def!(e2 = popcnt(xh)),
+ def!(e3 = iadd(e1, e2)),
+ def!(a = uextend(e3)),
+ ],
+ );
+
+ // TODO(ryzokuken): benchmark this and decide if branching is a faster
+ // approach than evaluating boolean expressions.
+
+ narrow.custom_legalize(icmp_imm, "narrow_icmp_imm");
+
+ let intcc_eq = Literal::enumerator_for(&imm.intcc, "eq");
+ let intcc_ne = Literal::enumerator_for(&imm.intcc, "ne");
+ for &(int_ty, int_ty_half) in &[(I64, I32), (I128, I64)] {
+ narrow.legalize(
+ def!(b = icmp.int_ty(intcc_eq, x, y)),
+ vec![
+ def!((xl, xh) = isplit(x)),
+ def!((yl, yh) = isplit(y)),
+ def!(b1 = icmp.int_ty_half(intcc_eq, xl, yl)),
+ def!(b2 = icmp.int_ty_half(intcc_eq, xh, yh)),
+ def!(b = band(b1, b2)),
+ ],
+ );
+
+ narrow.legalize(
+ def!(b = icmp.int_ty(intcc_ne, x, y)),
+ vec![
+ def!((xl, xh) = isplit(x)),
+ def!((yl, yh) = isplit(y)),
+ def!(b1 = icmp.int_ty_half(intcc_ne, xl, yl)),
+ def!(b2 = icmp.int_ty_half(intcc_ne, xh, yh)),
+ def!(b = bor(b1, b2)),
+ ],
+ );
+
+ use IntCC::*;
+ for cc in &[
+ SignedGreaterThan,
+ SignedGreaterThanOrEqual,
+ SignedLessThan,
+ SignedLessThanOrEqual,
+ UnsignedGreaterThan,
+ UnsignedGreaterThanOrEqual,
+ UnsignedLessThan,
+ UnsignedLessThanOrEqual,
+ ] {
+ let intcc_cc = Literal::enumerator_for(&imm.intcc, cc.to_static_str());
+ let cc1 = Literal::enumerator_for(&imm.intcc, cc.without_equal().to_static_str());
+ let cc2 =
+ Literal::enumerator_for(&imm.intcc, cc.inverse().without_equal().to_static_str());
+ let cc3 = Literal::enumerator_for(&imm.intcc, cc.unsigned().to_static_str());
+ narrow.legalize(
+ def!(b = icmp.int_ty(intcc_cc, x, y)),
+ vec![
+ def!((xl, xh) = isplit(x)),
+ def!((yl, yh) = isplit(y)),
+ // X = cc1 || (!cc2 && cc3)
+ def!(b1 = icmp.int_ty_half(cc1, xh, yh)),
+ def!(b2 = icmp.int_ty_half(cc2, xh, yh)),
+ def!(b3 = icmp.int_ty_half(cc3, xl, yl)),
+ def!(c1 = bnot(b2)),
+ def!(c2 = band(c1, b3)),
+ def!(b = bor(b1, c2)),
+ ],
+ );
+ }
+ }
+
+ // TODO(ryzokuken): explore the perf diff w/ x86_umulx and consider have a
+ // separate legalization for x86.
+ for &ty in &[I64, I128] {
+ narrow.legalize(
+ def!(a = imul.ty(x, y)),
+ vec![
+ def!((xl, xh) = isplit(x)),
+ def!((yl, yh) = isplit(y)),
+ def!(a1 = imul(xh, yl)),
+ def!(a2 = imul(xl, yh)),
+ def!(a3 = iadd(a1, a2)),
+ def!(a4 = umulhi(xl, yl)),
+ def!(ah = iadd(a3, a4)),
+ def!(al = imul(xl, yl)),
+ def!(a = iconcat(al, ah)),
+ ],
+ );
+ }
+
+ let zero = Literal::constant(&imm.imm64, 0);
+ narrow.legalize(
+ def!(a = iadd_imm.I128(x, c)),
+ vec![
+ def!(yh = iconst.I64(zero)),
+ def!(yl = iconst.I64(c)),
+ def!(y = iconcat.I64(yh, yl)),
+ def!(a = iadd(x, y)),
+ ],
+ );
+
+ // Widen instructions with one input operand.
+ for &op in &[bnot, popcnt] {
+ for &int_ty in &[I8, I16] {
+ widen.legalize(
+ def!(a = op.int_ty(b)),
+ vec![
+ def!(x = uextend.I32(b)),
+ def!(z = op.I32(x)),
+ def!(a = ireduce.int_ty(z)),
+ ],
+ );
+ }
+ }
+
+ // Widen instructions with two input operands.
+ let mut widen_two_arg = |signed: bool, op: &Instruction| {
+ for &int_ty in &[I8, I16] {
+ let sign_ext_op = if signed { sextend } else { uextend };
+ widen.legalize(
+ def!(a = op.int_ty(b, c)),
+ vec![
+ def!(x = sign_ext_op.I32(b)),
+ def!(y = sign_ext_op.I32(c)),
+ def!(z = op.I32(x, y)),
+ def!(a = ireduce.int_ty(z)),
+ ],
+ );
+ }
+ };
+
+ for bin_op in &[
+ iadd, isub, imul, udiv, urem, band, bor, bxor, band_not, bor_not, bxor_not,
+ ] {
+ widen_two_arg(false, bin_op);
+ }
+ for bin_op in &[sdiv, srem] {
+ widen_two_arg(true, bin_op);
+ }
+
+ // Widen instructions using immediate operands.
+ let mut widen_imm = |signed: bool, op: &Instruction| {
+ for &int_ty in &[I8, I16] {
+ let sign_ext_op = if signed { sextend } else { uextend };
+ widen.legalize(
+ def!(a = op.int_ty(b, c)),
+ vec![
+ def!(x = sign_ext_op.I32(b)),
+ def!(z = op.I32(x, c)),
+ def!(a = ireduce.int_ty(z)),
+ ],
+ );
+ }
+ };
+
+ for bin_op in &[
+ iadd_imm, imul_imm, udiv_imm, urem_imm, band_imm, bor_imm, bxor_imm, irsub_imm,
+ ] {
+ widen_imm(false, bin_op);
+ }
+ for bin_op in &[sdiv_imm, srem_imm] {
+ widen_imm(true, bin_op);
+ }
+
+ for &(int_ty, num) in &[(I8, 24), (I16, 16)] {
+ let imm = Literal::constant(&imm.imm64, -num);
+
+ widen.legalize(
+ def!(a = clz.int_ty(b)),
+ vec![
+ def!(c = uextend.I32(b)),
+ def!(d = clz.I32(c)),
+ def!(e = iadd_imm(d, imm)),
+ def!(a = ireduce.int_ty(e)),
+ ],
+ );
+
+ widen.legalize(
+ def!(a = cls.int_ty(b)),
+ vec![
+ def!(c = sextend.I32(b)),
+ def!(d = cls.I32(c)),
+ def!(e = iadd_imm(d, imm)),
+ def!(a = ireduce.int_ty(e)),
+ ],
+ );
+ }
+
+ for &(int_ty, num) in &[(I8, 1 << 8), (I16, 1 << 16)] {
+ let num = Literal::constant(&imm.imm64, num);
+ widen.legalize(
+ def!(a = ctz.int_ty(b)),
+ vec![
+ def!(c = uextend.I32(b)),
+ // When `b` is zero, returns the size of x in bits.
+ def!(d = bor_imm(c, num)),
+ def!(e = ctz.I32(d)),
+ def!(a = ireduce.int_ty(e)),
+ ],
+ );
+ }
+
+ // iconst
+ for &int_ty in &[I8, I16] {
+ widen.legalize(
+ def!(a = iconst.int_ty(b)),
+ vec![def!(c = iconst.I32(b)), def!(a = ireduce.int_ty(c))],
+ );
+ }
+
+ for &extend_op in &[uextend, sextend] {
+ // The sign extension operators have two typevars: the result has one and controls the
+ // instruction, then the input has one.
+ let bound = extend_op.bind(I16).bind(I8);
+ widen.legalize(
+ def!(a = bound(b)),
+ vec![def!(c = extend_op.I32(b)), def!(a = ireduce(c))],
+ );
+ }
+
+ widen.legalize(
+ def!(store.I8(flags, a, ptr, offset)),
+ vec![
+ def!(b = uextend.I32(a)),
+ def!(istore8(flags, b, ptr, offset)),
+ ],
+ );
+
+ widen.legalize(
+ def!(store.I16(flags, a, ptr, offset)),
+ vec![
+ def!(b = uextend.I32(a)),
+ def!(istore16(flags, b, ptr, offset)),
+ ],
+ );
+
+ widen.legalize(
+ def!(a = load.I8(flags, ptr, offset)),
+ vec![
+ def!(b = uload8.I32(flags, ptr, offset)),
+ def!(a = ireduce(b)),
+ ],
+ );
+
+ widen.legalize(
+ def!(a = load.I16(flags, ptr, offset)),
+ vec![
+ def!(b = uload16.I32(flags, ptr, offset)),
+ def!(a = ireduce(b)),
+ ],
+ );
+
+ for &int_ty in &[I8, I16] {
+ widen.legalize(
+ def!(br_table.int_ty(x, y, z)),
+ vec![def!(b = uextend.I32(x)), def!(br_table(b, y, z))],
+ );
+ }
+
+ for &int_ty in &[I8, I16] {
+ widen.legalize(
+ def!(a = bint.int_ty(b)),
+ vec![def!(x = bint.I32(b)), def!(a = ireduce.int_ty(x))],
+ );
+ }
+
+ for &int_ty in &[I8, I16] {
+ for &op in &[ishl, ishl_imm, ushr, ushr_imm] {
+ widen.legalize(
+ def!(a = op.int_ty(b, c)),
+ vec![
+ def!(x = uextend.I32(b)),
+ def!(z = op.I32(x, c)),
+ def!(a = ireduce.int_ty(z)),
+ ],
+ );
+ }
+
+ for &op in &[sshr, sshr_imm] {
+ widen.legalize(
+ def!(a = op.int_ty(b, c)),
+ vec![
+ def!(x = sextend.I32(b)),
+ def!(z = op.I32(x, c)),
+ def!(a = ireduce.int_ty(z)),
+ ],
+ );
+ }
+
+ for cc in &["eq", "ne", "ugt", "ult", "uge", "ule"] {
+ let w_cc = Literal::enumerator_for(&imm.intcc, cc);
+ widen.legalize(
+ def!(a = icmp_imm.int_ty(w_cc, b, c)),
+ vec![def!(x = uextend.I32(b)), def!(a = icmp_imm(w_cc, x, c))],
+ );
+ widen.legalize(
+ def!(a = icmp.int_ty(w_cc, b, c)),
+ vec![
+ def!(x = uextend.I32(b)),
+ def!(y = uextend.I32(c)),
+ def!(a = icmp.I32(w_cc, x, y)),
+ ],
+ );
+ }
+
+ for cc in &["sgt", "slt", "sge", "sle"] {
+ let w_cc = Literal::enumerator_for(&imm.intcc, cc);
+ widen.legalize(
+ def!(a = icmp_imm.int_ty(w_cc, b, c)),
+ vec![def!(x = sextend.I32(b)), def!(a = icmp_imm(w_cc, x, c))],
+ );
+
+ widen.legalize(
+ def!(a = icmp.int_ty(w_cc, b, c)),
+ vec![
+ def!(x = sextend.I32(b)),
+ def!(y = sextend.I32(c)),
+ def!(a = icmp(w_cc, x, y)),
+ ],
+ );
+ }
+ }
+
+ for &ty in &[I8, I16] {
+ widen.legalize(
+ def!(brz.ty(x, block, vararg)),
+ vec![def!(a = uextend.I32(x)), def!(brz(a, block, vararg))],
+ );
+
+ widen.legalize(
+ def!(brnz.ty(x, block, vararg)),
+ vec![def!(a = uextend.I32(x)), def!(brnz(a, block, vararg))],
+ );
+ }
+
+ for &(ty_half, ty) in &[(I64, I128), (I32, I64)] {
+ let inst = ireduce.bind(ty_half).bind(ty);
+ expand.legalize(
+ def!(a = inst(x)),
+ vec![def!((b, c) = isplit(x)), def!(a = copy(b))],
+ );
+ }
+
+ // Expand integer operations with carry for RISC architectures that don't have
+ // the flags.
+ let intcc_ult = Literal::enumerator_for(&imm.intcc, "ult");
+ expand.legalize(
+ def!((a, c) = iadd_cout(x, y)),
+ vec![def!(a = iadd(x, y)), def!(c = icmp(intcc_ult, a, x))],
+ );
+
+ let intcc_ugt = Literal::enumerator_for(&imm.intcc, "ugt");
+ expand.legalize(
+ def!((a, b) = isub_bout(x, y)),
+ vec![def!(a = isub(x, y)), def!(b = icmp(intcc_ugt, a, x))],
+ );
+
+ expand.legalize(
+ def!(a = iadd_cin(x, y, c)),
+ vec![
+ def!(a1 = iadd(x, y)),
+ def!(c_int = bint(c)),
+ def!(a = iadd(a1, c_int)),
+ ],
+ );
+
+ expand.legalize(
+ def!(a = isub_bin(x, y, b)),
+ vec![
+ def!(a1 = isub(x, y)),
+ def!(b_int = bint(b)),
+ def!(a = isub(a1, b_int)),
+ ],
+ );
+
+ expand.legalize(
+ def!((a, c) = iadd_carry(x, y, c_in)),
+ vec![
+ def!((a1, c1) = iadd_cout(x, y)),
+ def!(c_int = bint(c_in)),
+ def!((a, c2) = iadd_cout(a1, c_int)),
+ def!(c = bor(c1, c2)),
+ ],
+ );
+
+ expand.legalize(
+ def!((a, b) = isub_borrow(x, y, b_in)),
+ vec![
+ def!((a1, b1) = isub_bout(x, y)),
+ def!(b_int = bint(b_in)),
+ def!((a, b2) = isub_bout(a1, b_int)),
+ def!(b = bor(b1, b2)),
+ ],
+ );
+
+ // Expansion for fcvt_from_sint for smaller integer types.
+ // This uses expand and not widen because the controlling type variable for
+ // this instruction is f32/f64, which is legalized as part of the expand
+ // group.
+ for &dest_ty in &[F32, F64] {
+ for &src_ty in &[I8, I16] {
+ let bound_inst = fcvt_from_sint.bind(dest_ty).bind(src_ty);
+ expand.legalize(
+ def!(a = bound_inst(b)),
+ vec![
+ def!(x = sextend.I32(b)),
+ def!(a = fcvt_from_sint.dest_ty(x)),
+ ],
+ );
+ }
+ }
+
+ // Expansions for immediate operands that are out of range.
+ for &(inst_imm, inst) in &[
+ (iadd_imm, iadd),
+ (imul_imm, imul),
+ (sdiv_imm, sdiv),
+ (udiv_imm, udiv),
+ (srem_imm, srem),
+ (urem_imm, urem),
+ (band_imm, band),
+ (bor_imm, bor),
+ (bxor_imm, bxor),
+ (ifcmp_imm, ifcmp),
+ ] {
+ expand.legalize(
+ def!(a = inst_imm(x, y)),
+ vec![def!(a1 = iconst(y)), def!(a = inst(x, a1))],
+ );
+ }
+
+ expand.legalize(
+ def!(a = irsub_imm(y, x)),
+ vec![def!(a1 = iconst(x)), def!(a = isub(a1, y))],
+ );
+
+ // Rotates and shifts.
+ for &(inst_imm, inst) in &[
+ (rotl_imm, rotl),
+ (rotr_imm, rotr),
+ (ishl_imm, ishl),
+ (sshr_imm, sshr),
+ (ushr_imm, ushr),
+ ] {
+ expand.legalize(
+ def!(a = inst_imm(x, y)),
+ vec![def!(a1 = iconst.I32(y)), def!(a = inst(x, a1))],
+ );
+ }
+
+ expand.legalize(
+ def!(a = icmp_imm(cc, x, y)),
+ vec![def!(a1 = iconst(y)), def!(a = icmp(cc, x, a1))],
+ );
+
+ //# Expansions for *_not variants of bitwise ops.
+ for &(inst_not, inst) in &[(band_not, band), (bor_not, bor), (bxor_not, bxor)] {
+ expand.legalize(
+ def!(a = inst_not(x, y)),
+ vec![def!(a1 = bnot(y)), def!(a = inst(x, a1))],
+ );
+ }
+
+ //# Expand bnot using xor.
+ let minus_one = Literal::constant(&imm.imm64, -1);
+ expand.legalize(
+ def!(a = bnot(x)),
+ vec![def!(y = iconst(minus_one)), def!(a = bxor(x, y))],
+ );
+
+ //# Expand bitrev
+ //# Adapted from Stack Overflow.
+ //# https://stackoverflow.com/questions/746171/most-efficient-algorithm-for-bit-reversal-from-msb-lsb-to-lsb-msb-in-c
+ let imm64_1 = Literal::constant(&imm.imm64, 1);
+ let imm64_2 = Literal::constant(&imm.imm64, 2);
+ let imm64_4 = Literal::constant(&imm.imm64, 4);
+
+ widen.legalize(
+ def!(a = bitrev.I8(x)),
+ vec![
+ def!(a1 = band_imm(x, Literal::constant(&imm.imm64, 0xaa))),
+ def!(a2 = ushr_imm(a1, imm64_1)),
+ def!(a3 = band_imm(x, Literal::constant(&imm.imm64, 0x55))),
+ def!(a4 = ishl_imm(a3, imm64_1)),
+ def!(b = bor(a2, a4)),
+ def!(b1 = band_imm(b, Literal::constant(&imm.imm64, 0xcc))),
+ def!(b2 = ushr_imm(b1, imm64_2)),
+ def!(b3 = band_imm(b, Literal::constant(&imm.imm64, 0x33))),
+ def!(b4 = ishl_imm(b3, imm64_2)),
+ def!(c = bor(b2, b4)),
+ def!(c1 = band_imm(c, Literal::constant(&imm.imm64, 0xf0))),
+ def!(c2 = ushr_imm(c1, imm64_4)),
+ def!(c3 = band_imm(c, Literal::constant(&imm.imm64, 0x0f))),
+ def!(c4 = ishl_imm(c3, imm64_4)),
+ def!(a = bor(c2, c4)),
+ ],
+ );
+
+ let imm64_8 = Literal::constant(&imm.imm64, 8);
+
+ widen.legalize(
+ def!(a = bitrev.I16(x)),
+ vec![
+ def!(a1 = band_imm(x, Literal::constant(&imm.imm64, 0xaaaa))),
+ def!(a2 = ushr_imm(a1, imm64_1)),
+ def!(a3 = band_imm(x, Literal::constant(&imm.imm64, 0x5555))),
+ def!(a4 = ishl_imm(a3, imm64_1)),
+ def!(b = bor(a2, a4)),
+ def!(b1 = band_imm(b, Literal::constant(&imm.imm64, 0xcccc))),
+ def!(b2 = ushr_imm(b1, imm64_2)),
+ def!(b3 = band_imm(b, Literal::constant(&imm.imm64, 0x3333))),
+ def!(b4 = ishl_imm(b3, imm64_2)),
+ def!(c = bor(b2, b4)),
+ def!(c1 = band_imm(c, Literal::constant(&imm.imm64, 0xf0f0))),
+ def!(c2 = ushr_imm(c1, imm64_4)),
+ def!(c3 = band_imm(c, Literal::constant(&imm.imm64, 0x0f0f))),
+ def!(c4 = ishl_imm(c3, imm64_4)),
+ def!(d = bor(c2, c4)),
+ def!(d1 = band_imm(d, Literal::constant(&imm.imm64, 0xff00))),
+ def!(d2 = ushr_imm(d1, imm64_8)),
+ def!(d3 = band_imm(d, Literal::constant(&imm.imm64, 0x00ff))),
+ def!(d4 = ishl_imm(d3, imm64_8)),
+ def!(a = bor(d2, d4)),
+ ],
+ );
+
+ let imm64_16 = Literal::constant(&imm.imm64, 16);
+
+ expand.legalize(
+ def!(a = bitrev.I32(x)),
+ vec![
+ def!(a1 = band_imm(x, Literal::constant(&imm.imm64, 0xaaaa_aaaa))),
+ def!(a2 = ushr_imm(a1, imm64_1)),
+ def!(a3 = band_imm(x, Literal::constant(&imm.imm64, 0x5555_5555))),
+ def!(a4 = ishl_imm(a3, imm64_1)),
+ def!(b = bor(a2, a4)),
+ def!(b1 = band_imm(b, Literal::constant(&imm.imm64, 0xcccc_cccc))),
+ def!(b2 = ushr_imm(b1, imm64_2)),
+ def!(b3 = band_imm(b, Literal::constant(&imm.imm64, 0x3333_3333))),
+ def!(b4 = ishl_imm(b3, imm64_2)),
+ def!(c = bor(b2, b4)),
+ def!(c1 = band_imm(c, Literal::constant(&imm.imm64, 0xf0f0_f0f0))),
+ def!(c2 = ushr_imm(c1, imm64_4)),
+ def!(c3 = band_imm(c, Literal::constant(&imm.imm64, 0x0f0f_0f0f))),
+ def!(c4 = ishl_imm(c3, imm64_4)),
+ def!(d = bor(c2, c4)),
+ def!(d1 = band_imm(d, Literal::constant(&imm.imm64, 0xff00_ff00))),
+ def!(d2 = ushr_imm(d1, imm64_8)),
+ def!(d3 = band_imm(d, Literal::constant(&imm.imm64, 0x00ff_00ff))),
+ def!(d4 = ishl_imm(d3, imm64_8)),
+ def!(e = bor(d2, d4)),
+ def!(e1 = ushr_imm(e, imm64_16)),
+ def!(e2 = ishl_imm(e, imm64_16)),
+ def!(a = bor(e1, e2)),
+ ],
+ );
+
+ #[allow(overflowing_literals)]
+ let imm64_0xaaaaaaaaaaaaaaaa = Literal::constant(&imm.imm64, 0xaaaa_aaaa_aaaa_aaaa);
+ let imm64_0x5555555555555555 = Literal::constant(&imm.imm64, 0x5555_5555_5555_5555);
+ #[allow(overflowing_literals)]
+ let imm64_0xcccccccccccccccc = Literal::constant(&imm.imm64, 0xcccc_cccc_cccc_cccc);
+ let imm64_0x3333333333333333 = Literal::constant(&imm.imm64, 0x3333_3333_3333_3333);
+ #[allow(overflowing_literals)]
+ let imm64_0xf0f0f0f0f0f0f0f0 = Literal::constant(&imm.imm64, 0xf0f0_f0f0_f0f0_f0f0);
+ let imm64_0x0f0f0f0f0f0f0f0f = Literal::constant(&imm.imm64, 0x0f0f_0f0f_0f0f_0f0f);
+ #[allow(overflowing_literals)]
+ let imm64_0xff00ff00ff00ff00 = Literal::constant(&imm.imm64, 0xff00_ff00_ff00_ff00);
+ let imm64_0x00ff00ff00ff00ff = Literal::constant(&imm.imm64, 0x00ff_00ff_00ff_00ff);
+ #[allow(overflowing_literals)]
+ let imm64_0xffff0000ffff0000 = Literal::constant(&imm.imm64, 0xffff_0000_ffff_0000);
+ let imm64_0x0000ffff0000ffff = Literal::constant(&imm.imm64, 0x0000_ffff_0000_ffff);
+ let imm64_32 = Literal::constant(&imm.imm64, 32);
+
+ expand.legalize(
+ def!(a = bitrev.I64(x)),
+ vec![
+ def!(a1 = band_imm(x, imm64_0xaaaaaaaaaaaaaaaa)),
+ def!(a2 = ushr_imm(a1, imm64_1)),
+ def!(a3 = band_imm(x, imm64_0x5555555555555555)),
+ def!(a4 = ishl_imm(a3, imm64_1)),
+ def!(b = bor(a2, a4)),
+ def!(b1 = band_imm(b, imm64_0xcccccccccccccccc)),
+ def!(b2 = ushr_imm(b1, imm64_2)),
+ def!(b3 = band_imm(b, imm64_0x3333333333333333)),
+ def!(b4 = ishl_imm(b3, imm64_2)),
+ def!(c = bor(b2, b4)),
+ def!(c1 = band_imm(c, imm64_0xf0f0f0f0f0f0f0f0)),
+ def!(c2 = ushr_imm(c1, imm64_4)),
+ def!(c3 = band_imm(c, imm64_0x0f0f0f0f0f0f0f0f)),
+ def!(c4 = ishl_imm(c3, imm64_4)),
+ def!(d = bor(c2, c4)),
+ def!(d1 = band_imm(d, imm64_0xff00ff00ff00ff00)),
+ def!(d2 = ushr_imm(d1, imm64_8)),
+ def!(d3 = band_imm(d, imm64_0x00ff00ff00ff00ff)),
+ def!(d4 = ishl_imm(d3, imm64_8)),
+ def!(e = bor(d2, d4)),
+ def!(e1 = band_imm(e, imm64_0xffff0000ffff0000)),
+ def!(e2 = ushr_imm(e1, imm64_16)),
+ def!(e3 = band_imm(e, imm64_0x0000ffff0000ffff)),
+ def!(e4 = ishl_imm(e3, imm64_16)),
+ def!(f = bor(e2, e4)),
+ def!(f1 = ushr_imm(f, imm64_32)),
+ def!(f2 = ishl_imm(f, imm64_32)),
+ def!(a = bor(f1, f2)),
+ ],
+ );
+
+ narrow.legalize(
+ def!(a = bitrev.I128(x)),
+ vec![
+ def!((xl, xh) = isplit(x)),
+ def!(yh = bitrev(xl)),
+ def!(yl = bitrev(xh)),
+ def!(a = iconcat(yl, yh)),
+ ],
+ );
+
+ // Floating-point sign manipulations.
+ for &(ty, const_inst, minus_zero) in &[
+ (F32, f32const, &Literal::bits(&imm.ieee32, 0x8000_0000)),
+ (
+ F64,
+ f64const,
+ &Literal::bits(&imm.ieee64, 0x8000_0000_0000_0000),
+ ),
+ ] {
+ expand.legalize(
+ def!(a = fabs.ty(x)),
+ vec![def!(b = const_inst(minus_zero)), def!(a = band_not(x, b))],
+ );
+
+ expand.legalize(
+ def!(a = fneg.ty(x)),
+ vec![def!(b = const_inst(minus_zero)), def!(a = bxor(x, b))],
+ );
+
+ expand.legalize(
+ def!(a = fcopysign.ty(x, y)),
+ vec![
+ def!(b = const_inst(minus_zero)),
+ def!(a1 = band_not(x, b)),
+ def!(a2 = band(y, b)),
+ def!(a = bor(a1, a2)),
+ ],
+ );
+ }
+
+ expand.custom_legalize(br_icmp, "expand_br_icmp");
+
+ let mut groups = TransformGroups::new();
+
+ let narrow_id = narrow.build_and_add_to(&mut groups);
+ let expand_id = expand.build_and_add_to(&mut groups);
+
+ // Expansions using CPU flags.
+ let mut expand_flags = TransformGroupBuilder::new(
+ "expand_flags",
+ r#"
+ Instruction expansions for architectures with flags.
+
+ Expand some instructions using CPU flags, then fall back to the normal
+ expansions. Not all architectures support CPU flags, so these patterns
+ are kept separate.
+ "#,
+ )
+ .chain_with(expand_id);
+
+ let imm64_0 = Literal::constant(&imm.imm64, 0);
+ let intcc_ne = Literal::enumerator_for(&imm.intcc, "ne");
+ let intcc_eq = Literal::enumerator_for(&imm.intcc, "eq");
+
+ expand_flags.legalize(
+ def!(trapnz(x, c)),
+ vec![
+ def!(a = ifcmp_imm(x, imm64_0)),
+ def!(trapif(intcc_ne, a, c)),
+ ],
+ );
+
+ expand_flags.legalize(
+ def!(trapz(x, c)),
+ vec![
+ def!(a = ifcmp_imm(x, imm64_0)),
+ def!(trapif(intcc_eq, a, c)),
+ ],
+ );
+
+ expand_flags.build_and_add_to(&mut groups);
+
+ // Narrow legalizations using CPU flags.
+ let mut narrow_flags = TransformGroupBuilder::new(
+ "narrow_flags",
+ r#"
+ Narrow instructions for architectures with flags.
+
+ Narrow some instructions using CPU flags, then fall back to the normal
+ legalizations. Not all architectures support CPU flags, so these
+ patterns are kept separate.
+ "#,
+ )
+ .chain_with(narrow_id);
+
+ narrow_flags.legalize(
+ def!(a = iadd(x, y)),
+ vec![
+ def!((xl, xh) = isplit(x)),
+ def!((yl, yh) = isplit(y)),
+ def!((al, c) = iadd_ifcout(xl, yl)),
+ def!(ah = iadd_ifcin(xh, yh, c)),
+ def!(a = iconcat(al, ah)),
+ ],
+ );
+
+ narrow_flags.legalize(
+ def!(a = isub(x, y)),
+ vec![
+ def!((xl, xh) = isplit(x)),
+ def!((yl, yh) = isplit(y)),
+ def!((al, b) = isub_ifbout(xl, yl)),
+ def!(ah = isub_ifbin(xh, yh, b)),
+ def!(a = iconcat(al, ah)),
+ ],
+ );
+
+ narrow_flags.build_and_add_to(&mut groups);
+
+ // TODO(ryzokuken): figure out a way to legalize iadd_c* to iadd_ifc* (and
+ // similarly isub_b* to isub_ifb*) on expand_flags so that this isn't required.
+ // Narrow legalizations for ISAs that don't have CPU flags.
+ let mut narrow_no_flags = TransformGroupBuilder::new(
+ "narrow_no_flags",
+ r#"
+ Narrow instructions for architectures without flags.
+
+ Narrow some instructions avoiding the use of CPU flags, then fall back
+ to the normal legalizations. Not all architectures support CPU flags,
+ so these patterns are kept separate.
+ "#,
+ )
+ .chain_with(narrow_id);
+
+ narrow_no_flags.legalize(
+ def!(a = iadd(x, y)),
+ vec![
+ def!((xl, xh) = isplit(x)),
+ def!((yl, yh) = isplit(y)),
+ def!((al, c) = iadd_cout(xl, yl)),
+ def!(ah = iadd_cin(xh, yh, c)),
+ def!(a = iconcat(al, ah)),
+ ],
+ );
+
+ narrow_no_flags.legalize(
+ def!(a = isub(x, y)),
+ vec![
+ def!((xl, xh) = isplit(x)),
+ def!((yl, yh) = isplit(y)),
+ def!((al, b) = isub_bout(xl, yl)),
+ def!(ah = isub_bin(xh, yh, b)),
+ def!(a = iconcat(al, ah)),
+ ],
+ );
+
+ narrow_no_flags.build_and_add_to(&mut groups);
+
+ // TODO The order of declarations unfortunately matters to be compatible with the Python code.
+ // When it's all migrated, we can put this next to the narrow/expand build_and_add_to calls
+ // above.
+ widen.build_and_add_to(&mut groups);
+
+ groups
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/shared/mod.rs b/third_party/rust/cranelift-codegen-meta/src/shared/mod.rs
new file mode 100644
index 0000000000..b185262ccd
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/shared/mod.rs
@@ -0,0 +1,101 @@
+//! Shared definitions for the Cranelift intermediate language.
+
+pub mod entities;
+pub mod formats;
+pub mod immediates;
+pub mod instructions;
+pub mod legalize;
+pub mod settings;
+pub mod types;
+
+use crate::cdsl::formats::{FormatStructure, InstructionFormat};
+use crate::cdsl::instructions::{AllInstructions, InstructionGroup};
+use crate::cdsl::settings::SettingGroup;
+use crate::cdsl::xform::TransformGroups;
+
+use crate::shared::entities::EntityRefs;
+use crate::shared::formats::Formats;
+use crate::shared::immediates::Immediates;
+
+use std::collections::HashMap;
+use std::iter::FromIterator;
+use std::rc::Rc;
+
+pub(crate) struct Definitions {
+ pub settings: SettingGroup,
+ pub all_instructions: AllInstructions,
+ pub instructions: InstructionGroup,
+ pub imm: Immediates,
+ pub formats: Formats,
+ pub transform_groups: TransformGroups,
+ pub entities: EntityRefs,
+}
+
+pub(crate) fn define() -> Definitions {
+ let mut all_instructions = AllInstructions::new();
+
+ let immediates = Immediates::new();
+ let entities = EntityRefs::new();
+ let formats = Formats::new(&immediates, &entities);
+ let instructions =
+ instructions::define(&mut all_instructions, &formats, &immediates, &entities);
+ let transform_groups = legalize::define(&instructions, &immediates);
+
+ Definitions {
+ settings: settings::define(),
+ all_instructions,
+ instructions,
+ imm: immediates,
+ formats,
+ transform_groups,
+ entities,
+ }
+}
+
+impl Definitions {
+ /// Verifies certain properties of formats.
+ ///
+ /// - Formats must be uniquely named: if two formats have the same name, they must refer to the
+ /// same data. Otherwise, two format variants in the codegen crate would have the same name.
+ /// - Formats must be structurally different from each other. Otherwise, this would lead to
+ /// code duplicate in the codegen crate.
+ ///
+ /// Returns a list of all the instruction formats effectively used.
+ pub fn verify_instruction_formats(&self) -> Vec<&InstructionFormat> {
+ let mut format_names: HashMap<&'static str, &Rc<InstructionFormat>> = HashMap::new();
+
+ // A structure is: number of input value operands / whether there's varargs or not / names
+ // of immediate fields.
+ let mut format_structures: HashMap<FormatStructure, &InstructionFormat> = HashMap::new();
+
+ for inst in self.all_instructions.values() {
+ // Check name.
+ if let Some(existing_format) = format_names.get(&inst.format.name) {
+ assert!(
+ Rc::ptr_eq(&existing_format, &inst.format),
+ "formats must uniquely named; there's a\
+ conflict on the name '{}', please make sure it is used only once.",
+ existing_format.name
+ );
+ } else {
+ format_names.insert(inst.format.name, &inst.format);
+ }
+
+ // Check structure.
+ let key = inst.format.structure();
+ if let Some(existing_format) = format_structures.get(&key) {
+ assert_eq!(
+ existing_format.name, inst.format.name,
+ "duplicate instruction formats {} and {}; please remove one.",
+ existing_format.name, inst.format.name
+ );
+ } else {
+ format_structures.insert(key, &inst.format);
+ }
+ }
+
+ let mut result = Vec::from_iter(format_structures.into_iter().map(|(_, v)| v));
+ result.sort_by_key(|format| format.name);
+ result
+ }
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/shared/settings.rs b/third_party/rust/cranelift-codegen-meta/src/shared/settings.rs
new file mode 100644
index 0000000000..1ddc445927
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/shared/settings.rs
@@ -0,0 +1,287 @@
+use crate::cdsl::settings::{SettingGroup, SettingGroupBuilder};
+
+pub(crate) fn define() -> SettingGroup {
+ let mut settings = SettingGroupBuilder::new("shared");
+
+ settings.add_enum(
+ "regalloc",
+ r#"Register allocator to use with the MachInst backend.
+
+ This selects the register allocator as an option among those offered by the `regalloc.rs`
+ crate. Please report register allocation bugs to the maintainers of this crate whenever
+ possible.
+
+ Note: this only applies to target that use the MachInst backend. As of 2020-04-17, this
+ means the x86_64 backend doesn't use this yet.
+
+ Possible values:
+
+ - `backtracking` is a greedy, backtracking register allocator as implemented in
+ Spidermonkey's optimizing tier IonMonkey. It may take more time to allocate registers, but
+ it should generate better code in general, resulting in better throughput of generated
+ code.
+ - `backtracking_checked` is the backtracking allocator with additional self checks that may
+ take some time to run, and thus these checks are disabled by default.
+ - `experimental_linear_scan` is an experimental linear scan allocator. It may take less
+ time to allocate registers, but generated code's quality may be inferior. As of
+ 2020-04-17, it is still experimental and it should not be used in production settings.
+ - `experimental_linear_scan_checked` is the linear scan allocator with additional self
+ checks that may take some time to run, and thus these checks are disabled by default.
+ "#,
+ vec![
+ "backtracking",
+ "backtracking_checked",
+ "experimental_linear_scan",
+ "experimental_linear_scan_checked",
+ ],
+ );
+
+ settings.add_enum(
+ "opt_level",
+ r#"
+ Optimization level:
+
+ - none: Minimise compile time by disabling most optimizations.
+ - speed: Generate the fastest possible code
+ - speed_and_size: like "speed", but also perform transformations
+ aimed at reducing code size.
+ "#,
+ vec!["none", "speed", "speed_and_size"],
+ );
+
+ settings.add_bool(
+ "enable_verifier",
+ r#"
+ Run the Cranelift IR verifier at strategic times during compilation.
+
+ This makes compilation slower but catches many bugs. The verifier is always enabled by
+ default, which is useful during development.
+ "#,
+ true,
+ );
+
+ // Note that Cranelift doesn't currently need an is_pie flag, because PIE is
+ // just PIC where symbols can't be pre-empted, which can be expressed with the
+ // `colocated` flag on external functions and global values.
+ settings.add_bool(
+ "is_pic",
+ "Enable Position-Independent Code generation",
+ false,
+ );
+
+ settings.add_bool(
+ "use_colocated_libcalls",
+ r#"
+ Use colocated libcalls.
+
+ Generate code that assumes that libcalls can be declared "colocated",
+ meaning they will be defined along with the current function, such that
+ they can use more efficient addressing.
+ "#,
+ false,
+ );
+
+ settings.add_bool(
+ "avoid_div_traps",
+ r#"
+ Generate explicit checks around native division instructions to avoid
+ their trapping.
+
+ This is primarily used by SpiderMonkey which doesn't install a signal
+ handler for SIGFPE, but expects a SIGILL trap for division by zero.
+
+ On ISAs like ARM where the native division instructions don't trap,
+ this setting has no effect - explicit checks are always inserted.
+ "#,
+ false,
+ );
+
+ settings.add_bool(
+ "enable_float",
+ r#"
+ Enable the use of floating-point instructions
+
+ Disabling use of floating-point instructions is not yet implemented.
+ "#,
+ true,
+ );
+
+ settings.add_bool(
+ "enable_nan_canonicalization",
+ r#"
+ Enable NaN canonicalization
+
+ This replaces NaNs with a single canonical value, for users requiring
+ entirely deterministic WebAssembly computation. This is not required
+ by the WebAssembly spec, so it is not enabled by default.
+ "#,
+ false,
+ );
+
+ settings.add_bool(
+ "enable_pinned_reg",
+ r#"Enable the use of the pinned register.
+
+ This register is excluded from register allocation, and is completely under the control of
+ the end-user. It is possible to read it via the get_pinned_reg instruction, and to set it
+ with the set_pinned_reg instruction.
+ "#,
+ false,
+ );
+
+ settings.add_bool(
+ "use_pinned_reg_as_heap_base",
+ r#"Use the pinned register as the heap base.
+
+ Enabling this requires the enable_pinned_reg setting to be set to true. It enables a custom
+ legalization of the `heap_addr` instruction so it will use the pinned register as the heap
+ base, instead of fetching it from a global value.
+
+ Warning! Enabling this means that the pinned register *must* be maintained to contain the
+ heap base address at all times, during the lifetime of a function. Using the pinned
+ register for other purposes when this is set is very likely to cause crashes.
+ "#,
+ false,
+ );
+
+ settings.add_bool("enable_simd", "Enable the use of SIMD instructions.", false);
+
+ settings.add_bool(
+ "enable_atomics",
+ "Enable the use of atomic instructions",
+ true,
+ );
+
+ settings.add_bool(
+ "enable_safepoints",
+ r#"
+ Enable safepoint instruction insertions.
+
+ This will allow the emit_stack_maps() function to insert the safepoint
+ instruction on top of calls and interrupt traps in order to display the
+ live reference values at that point in the program.
+ "#,
+ false,
+ );
+
+ settings.add_enum(
+ "tls_model",
+ r#"
+ Defines the model used to perform TLS accesses.
+ "#,
+ vec!["none", "elf_gd", "macho", "coff"],
+ );
+
+ // Settings specific to the `baldrdash` calling convention.
+
+ settings.add_enum(
+ "libcall_call_conv",
+ r#"
+ Defines the calling convention to use for LibCalls call expansion,
+ since it may be different from the ISA default calling convention.
+
+ The default value is to use the same calling convention as the ISA
+ default calling convention.
+
+ This list should be kept in sync with the list of calling
+ conventions available in isa/call_conv.rs.
+ "#,
+ vec![
+ "isa_default",
+ "fast",
+ "cold",
+ "system_v",
+ "windows_fastcall",
+ "baldrdash_system_v",
+ "baldrdash_windows",
+ "baldrdash_2020",
+ "probestack",
+ ],
+ );
+
+ settings.add_num(
+ "baldrdash_prologue_words",
+ r#"
+ Number of pointer-sized words pushed by the baldrdash prologue.
+
+ Functions with the `baldrdash` calling convention don't generate their
+ own prologue and epilogue. They depend on externally generated code
+ that pushes a fixed number of words in the prologue and restores them
+ in the epilogue.
+
+ This setting configures the number of pointer-sized words pushed on the
+ stack when the Cranelift-generated code is entered. This includes the
+ pushed return address on x86.
+ "#,
+ 0,
+ );
+
+ // BaldrMonkey requires that not-yet-relocated function addresses be encoded
+ // as all-ones bitpatterns.
+ settings.add_bool(
+ "emit_all_ones_funcaddrs",
+ "Emit not-yet-relocated function addresses as all-ones bit patterns.",
+ false,
+ );
+
+ // Stack probing options.
+
+ settings.add_bool(
+ "enable_probestack",
+ r#"
+ Enable the use of stack probes, for calling conventions which support this
+ functionality.
+ "#,
+ true,
+ );
+
+ settings.add_bool(
+ "probestack_func_adjusts_sp",
+ r#"
+ Set this to true of the stack probe function modifies the stack pointer
+ itself.
+ "#,
+ false,
+ );
+
+ settings.add_num(
+ "probestack_size_log2",
+ r#"
+ The log2 of the size of the stack guard region.
+
+ Stack frames larger than this size will have stack overflow checked
+ by calling the probestack function.
+
+ The default is 12, which translates to a size of 4096.
+ "#,
+ 12,
+ );
+
+ // Jump table options.
+
+ settings.add_bool(
+ "enable_jump_tables",
+ "Enable the use of jump tables in generated machine code.",
+ true,
+ );
+
+ // Spectre options.
+
+ settings.add_bool(
+ "enable_heap_access_spectre_mitigation",
+ r#"
+ Enable Spectre mitigation on heap bounds checks.
+
+ This is a no-op for any heap that needs no bounds checks; e.g.,
+ if the limit is static and the guard region is large enough that
+ the index cannot reach past it.
+
+ This option is enabled by default because it is highly
+ recommended for secure sandboxing. The embedder should consider
+ the security implications carefully before disabling this option.
+ "#,
+ true,
+ );
+
+ settings.build()
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/shared/types.rs b/third_party/rust/cranelift-codegen-meta/src/shared/types.rs
new file mode 100644
index 0000000000..631e5433e9
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/shared/types.rs
@@ -0,0 +1,236 @@
+//! This module predefines all the Cranelift scalar types.
+
+#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
+pub(crate) enum Bool {
+ /// 1-bit bool.
+ B1 = 1,
+ /// 8-bit bool.
+ B8 = 8,
+ /// 16-bit bool.
+ B16 = 16,
+ /// 32-bit bool.
+ B32 = 32,
+ /// 64-bit bool.
+ B64 = 64,
+ /// 128-bit bool.
+ B128 = 128,
+}
+
+/// This provides an iterator through all of the supported bool variants.
+pub(crate) struct BoolIterator {
+ index: u8,
+}
+
+impl BoolIterator {
+ pub fn new() -> Self {
+ Self { index: 0 }
+ }
+}
+
+impl Iterator for BoolIterator {
+ type Item = Bool;
+ fn next(&mut self) -> Option<Self::Item> {
+ let res = match self.index {
+ 0 => Some(Bool::B1),
+ 1 => Some(Bool::B8),
+ 2 => Some(Bool::B16),
+ 3 => Some(Bool::B32),
+ 4 => Some(Bool::B64),
+ 5 => Some(Bool::B128),
+ _ => return None,
+ };
+ self.index += 1;
+ res
+ }
+}
+
+#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
+pub(crate) enum Int {
+ /// 8-bit int.
+ I8 = 8,
+ /// 16-bit int.
+ I16 = 16,
+ /// 32-bit int.
+ I32 = 32,
+ /// 64-bit int.
+ I64 = 64,
+ /// 128-bit int.
+ I128 = 128,
+}
+
+/// This provides an iterator through all of the supported int variants.
+pub(crate) struct IntIterator {
+ index: u8,
+}
+
+impl IntIterator {
+ pub fn new() -> Self {
+ Self { index: 0 }
+ }
+}
+
+impl Iterator for IntIterator {
+ type Item = Int;
+ fn next(&mut self) -> Option<Self::Item> {
+ let res = match self.index {
+ 0 => Some(Int::I8),
+ 1 => Some(Int::I16),
+ 2 => Some(Int::I32),
+ 3 => Some(Int::I64),
+ 4 => Some(Int::I128),
+ _ => return None,
+ };
+ self.index += 1;
+ res
+ }
+}
+
+#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
+pub(crate) enum Float {
+ F32 = 32,
+ F64 = 64,
+}
+
+/// Iterator through the variants of the Float enum.
+pub(crate) struct FloatIterator {
+ index: u8,
+}
+
+impl FloatIterator {
+ pub fn new() -> Self {
+ Self { index: 0 }
+ }
+}
+
+/// This provides an iterator through all of the supported float variants.
+impl Iterator for FloatIterator {
+ type Item = Float;
+ fn next(&mut self) -> Option<Self::Item> {
+ let res = match self.index {
+ 0 => Some(Float::F32),
+ 1 => Some(Float::F64),
+ _ => return None,
+ };
+ self.index += 1;
+ res
+ }
+}
+
+/// A type representing CPU flags.
+///
+/// Flags can't be stored in memory.
+#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
+pub(crate) enum Flag {
+ /// CPU flags from an integer comparison.
+ IFlags,
+ /// CPU flags from a floating point comparison.
+ FFlags,
+}
+
+/// Iterator through the variants of the Flag enum.
+pub(crate) struct FlagIterator {
+ index: u8,
+}
+
+impl FlagIterator {
+ pub fn new() -> Self {
+ Self { index: 0 }
+ }
+}
+
+impl Iterator for FlagIterator {
+ type Item = Flag;
+ fn next(&mut self) -> Option<Self::Item> {
+ let res = match self.index {
+ 0 => Some(Flag::IFlags),
+ 1 => Some(Flag::FFlags),
+ _ => return None,
+ };
+ self.index += 1;
+ res
+ }
+}
+
+#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
+pub(crate) enum Reference {
+ /// 32-bit reference.
+ R32 = 32,
+ /// 64-bit reference.
+ R64 = 64,
+}
+
+/// This provides an iterator through all of the supported reference variants.
+pub(crate) struct ReferenceIterator {
+ index: u8,
+}
+
+impl ReferenceIterator {
+ pub fn new() -> Self {
+ Self { index: 0 }
+ }
+}
+
+impl Iterator for ReferenceIterator {
+ type Item = Reference;
+ fn next(&mut self) -> Option<Self::Item> {
+ let res = match self.index {
+ 0 => Some(Reference::R32),
+ 1 => Some(Reference::R64),
+ _ => return None,
+ };
+ self.index += 1;
+ res
+ }
+}
+
+#[cfg(test)]
+mod iter_tests {
+ use super::*;
+
+ #[test]
+ fn bool_iter_works() {
+ let mut bool_iter = BoolIterator::new();
+ assert_eq!(bool_iter.next(), Some(Bool::B1));
+ assert_eq!(bool_iter.next(), Some(Bool::B8));
+ assert_eq!(bool_iter.next(), Some(Bool::B16));
+ assert_eq!(bool_iter.next(), Some(Bool::B32));
+ assert_eq!(bool_iter.next(), Some(Bool::B64));
+ assert_eq!(bool_iter.next(), Some(Bool::B128));
+ assert_eq!(bool_iter.next(), None);
+ }
+
+ #[test]
+ fn int_iter_works() {
+ let mut int_iter = IntIterator::new();
+ assert_eq!(int_iter.next(), Some(Int::I8));
+ assert_eq!(int_iter.next(), Some(Int::I16));
+ assert_eq!(int_iter.next(), Some(Int::I32));
+ assert_eq!(int_iter.next(), Some(Int::I64));
+ assert_eq!(int_iter.next(), Some(Int::I128));
+ assert_eq!(int_iter.next(), None);
+ }
+
+ #[test]
+ fn float_iter_works() {
+ let mut float_iter = FloatIterator::new();
+ assert_eq!(float_iter.next(), Some(Float::F32));
+ assert_eq!(float_iter.next(), Some(Float::F64));
+ assert_eq!(float_iter.next(), None);
+ }
+
+ #[test]
+ fn flag_iter_works() {
+ let mut flag_iter = FlagIterator::new();
+ assert_eq!(flag_iter.next(), Some(Flag::IFlags));
+ assert_eq!(flag_iter.next(), Some(Flag::FFlags));
+ assert_eq!(flag_iter.next(), None);
+ }
+
+ #[test]
+ fn reference_iter_works() {
+ let mut reference_iter = ReferenceIterator::new();
+ assert_eq!(reference_iter.next(), Some(Reference::R32));
+ assert_eq!(reference_iter.next(), Some(Reference::R64));
+ assert_eq!(reference_iter.next(), None);
+ }
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/srcgen.rs b/third_party/rust/cranelift-codegen-meta/src/srcgen.rs
new file mode 100644
index 0000000000..ad8db175d7
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/srcgen.rs
@@ -0,0 +1,484 @@
+//! Source code generator.
+//!
+//! The `srcgen` module contains generic helper routines and classes for
+//! generating source code.
+
+#![macro_use]
+
+use std::cmp;
+use std::collections::{BTreeMap, BTreeSet};
+use std::fs;
+use std::io::Write;
+use std::path;
+
+use crate::error;
+
+static SHIFTWIDTH: usize = 4;
+
+/// A macro that simplifies the usage of the Formatter by allowing format
+/// strings.
+macro_rules! fmtln {
+ ($fmt:ident, $fmtstring:expr, $($fmtargs:expr),*) => {
+ $fmt.line(format!($fmtstring, $($fmtargs),*));
+ };
+
+ ($fmt:ident, $arg:expr) => {
+ $fmt.line($arg);
+ };
+
+ ($_:tt, $($args:expr),+) => {
+ compile_error!("This macro requires at least two arguments: the Formatter instance and a format string.");
+ };
+
+ ($_:tt) => {
+ compile_error!("This macro requires at least two arguments: the Formatter instance and a format string.");
+ };
+}
+
+pub(crate) struct Formatter {
+ indent: usize,
+ lines: Vec<String>,
+}
+
+impl Formatter {
+ /// Source code formatter class. Used to collect source code to be written
+ /// to a file, and keep track of indentation.
+ pub fn new() -> Self {
+ Self {
+ indent: 0,
+ lines: Vec::new(),
+ }
+ }
+
+ /// Increase current indentation level by one.
+ pub fn indent_push(&mut self) {
+ self.indent += 1;
+ }
+
+ /// Decrease indentation by one level.
+ pub fn indent_pop(&mut self) {
+ assert!(self.indent > 0, "Already at top level indentation");
+ self.indent -= 1;
+ }
+
+ pub fn indent<T, F: FnOnce(&mut Formatter) -> T>(&mut self, f: F) -> T {
+ self.indent_push();
+ let ret = f(self);
+ self.indent_pop();
+ ret
+ }
+
+ /// Get the current whitespace indentation in the form of a String.
+ fn get_indent(&self) -> String {
+ if self.indent == 0 {
+ String::new()
+ } else {
+ format!("{:-1$}", " ", self.indent * SHIFTWIDTH)
+ }
+ }
+
+ /// Get a string containing whitespace outdented one level. Used for
+ /// lines of code that are inside a single indented block.
+ fn get_outdent(&mut self) -> String {
+ self.indent_pop();
+ let s = self.get_indent();
+ self.indent_push();
+ s
+ }
+
+ /// Add an indented line.
+ pub fn line(&mut self, contents: impl AsRef<str>) {
+ let indented_line = format!("{}{}\n", self.get_indent(), contents.as_ref());
+ self.lines.push(indented_line);
+ }
+
+ /// Pushes an empty line.
+ pub fn empty_line(&mut self) {
+ self.lines.push("\n".to_string());
+ }
+
+ /// Emit a line outdented one level.
+ pub fn outdented_line(&mut self, s: &str) {
+ let new_line = format!("{}{}\n", self.get_outdent(), s);
+ self.lines.push(new_line);
+ }
+
+ /// Write `self.lines` to a file.
+ pub fn update_file(
+ &self,
+ filename: impl AsRef<str>,
+ directory: &str,
+ ) -> Result<(), error::Error> {
+ #[cfg(target_family = "windows")]
+ let path_str = format!("{}\\{}", directory, filename.as_ref());
+ #[cfg(not(target_family = "windows"))]
+ let path_str = format!("{}/{}", directory, filename.as_ref());
+
+ let path = path::Path::new(&path_str);
+ let mut f = fs::File::create(path)?;
+
+ for l in self.lines.iter().map(|l| l.as_bytes()) {
+ f.write_all(l)?;
+ }
+
+ Ok(())
+ }
+
+ /// Add one or more lines after stripping common indentation.
+ pub fn multi_line(&mut self, s: &str) {
+ parse_multiline(s).into_iter().for_each(|l| self.line(&l));
+ }
+
+ /// Add a comment line.
+ pub fn comment(&mut self, s: impl AsRef<str>) {
+ fmtln!(self, "// {}", s.as_ref());
+ }
+
+ /// Add a (multi-line) documentation comment.
+ pub fn doc_comment(&mut self, contents: impl AsRef<str>) {
+ parse_multiline(contents.as_ref())
+ .iter()
+ .map(|l| {
+ if l.is_empty() {
+ "///".into()
+ } else {
+ format!("/// {}", l)
+ }
+ })
+ .for_each(|s| self.line(s.as_str()));
+ }
+
+ /// Add a match expression.
+ pub fn add_match(&mut self, m: Match) {
+ fmtln!(self, "match {} {{", m.expr);
+ self.indent(|fmt| {
+ for (&(ref fields, ref body), ref names) in m.arms.iter() {
+ // name { fields } | name { fields } => { body }
+ let conditions = names
+ .iter()
+ .map(|name| {
+ if !fields.is_empty() {
+ format!("{} {{ {} }}", name, fields.join(", "))
+ } else {
+ name.clone()
+ }
+ })
+ .collect::<Vec<_>>()
+ .join(" |\n")
+ + " => {";
+
+ fmt.multi_line(&conditions);
+ fmt.indent(|fmt| {
+ fmt.line(body);
+ });
+ fmt.line("}");
+ }
+
+ // Make sure to include the catch all clause last.
+ if let Some(body) = m.catch_all {
+ fmt.line("_ => {");
+ fmt.indent(|fmt| {
+ fmt.line(body);
+ });
+ fmt.line("}");
+ }
+ });
+ self.line("}");
+ }
+}
+
+/// Compute the indentation of s, or None of an empty line.
+fn _indent(s: &str) -> Option<usize> {
+ if s.is_empty() {
+ None
+ } else {
+ let t = s.trim_start();
+ Some(s.len() - t.len())
+ }
+}
+
+/// Given a multi-line string, split it into a sequence of lines after
+/// stripping a common indentation. This is useful for strings defined with
+/// doc strings.
+fn parse_multiline(s: &str) -> Vec<String> {
+ // Convert tabs into spaces.
+ let expanded_tab = format!("{:-1$}", " ", SHIFTWIDTH);
+ let lines: Vec<String> = s.lines().map(|l| l.replace("\t", &expanded_tab)).collect();
+
+ // Determine minimum indentation, ignoring the first line and empty lines.
+ let indent = lines
+ .iter()
+ .skip(1)
+ .filter(|l| !l.trim().is_empty())
+ .map(|l| l.len() - l.trim_start().len())
+ .min();
+
+ // Strip off leading blank lines.
+ let mut lines_iter = lines.iter().skip_while(|l| l.is_empty());
+ let mut trimmed = Vec::with_capacity(lines.len());
+
+ // Remove indentation (first line is special)
+ if let Some(s) = lines_iter.next().map(|l| l.trim()).map(|l| l.to_string()) {
+ trimmed.push(s);
+ }
+
+ // Remove trailing whitespace from other lines.
+ let mut other_lines = if let Some(indent) = indent {
+ // Note that empty lines may have fewer than `indent` chars.
+ lines_iter
+ .map(|l| &l[cmp::min(indent, l.len())..])
+ .map(|l| l.trim_end())
+ .map(|l| l.to_string())
+ .collect::<Vec<_>>()
+ } else {
+ lines_iter
+ .map(|l| l.trim_end())
+ .map(|l| l.to_string())
+ .collect::<Vec<_>>()
+ };
+
+ trimmed.append(&mut other_lines);
+
+ // Strip off trailing blank lines.
+ while let Some(s) = trimmed.pop() {
+ if s.is_empty() {
+ continue;
+ } else {
+ trimmed.push(s);
+ break;
+ }
+ }
+
+ trimmed
+}
+
+/// Match formatting class.
+///
+/// Match objects collect all the information needed to emit a Rust `match`
+/// expression, automatically deduplicating overlapping identical arms.
+///
+/// Note that this class is ignorant of Rust types, and considers two fields
+/// with the same name to be equivalent. BTreeMap/BTreeSet are used to
+/// represent the arms in order to make the order deterministic.
+pub(crate) struct Match {
+ expr: String,
+ arms: BTreeMap<(Vec<String>, String), BTreeSet<String>>,
+ /// The clause for the placeholder pattern _.
+ catch_all: Option<String>,
+}
+
+impl Match {
+ /// Create a new match statement on `expr`.
+ pub fn new(expr: impl Into<String>) -> Self {
+ Self {
+ expr: expr.into(),
+ arms: BTreeMap::new(),
+ catch_all: None,
+ }
+ }
+
+ fn set_catch_all(&mut self, clause: String) {
+ assert!(self.catch_all.is_none());
+ self.catch_all = Some(clause);
+ }
+
+ /// Add an arm that reads fields to the Match statement.
+ pub fn arm<T: Into<String>, S: Into<String>>(&mut self, name: T, fields: Vec<S>, body: T) {
+ let name = name.into();
+ assert!(
+ name != "_",
+ "catch all clause can't extract fields, use arm_no_fields instead."
+ );
+
+ let body = body.into();
+ let fields = fields.into_iter().map(|x| x.into()).collect();
+ let match_arm = self
+ .arms
+ .entry((fields, body))
+ .or_insert_with(BTreeSet::new);
+ match_arm.insert(name);
+ }
+
+ /// Adds an arm that doesn't read anythings from the fields to the Match statement.
+ pub fn arm_no_fields(&mut self, name: impl Into<String>, body: impl Into<String>) {
+ let body = body.into();
+
+ let name = name.into();
+ if name == "_" {
+ self.set_catch_all(body);
+ return;
+ }
+
+ let match_arm = self
+ .arms
+ .entry((Vec::new(), body))
+ .or_insert_with(BTreeSet::new);
+ match_arm.insert(name);
+ }
+}
+
+#[cfg(test)]
+mod srcgen_tests {
+ use super::parse_multiline;
+ use super::Formatter;
+ use super::Match;
+
+ fn from_raw_string<S: Into<String>>(s: S) -> Vec<String> {
+ s.into()
+ .trim()
+ .split("\n")
+ .into_iter()
+ .map(|x| format!("{}\n", x))
+ .collect()
+ }
+
+ #[test]
+ fn adding_arms_works() {
+ let mut m = Match::new("x");
+ m.arm("Orange", vec!["a", "b"], "some body");
+ m.arm("Yellow", vec!["a", "b"], "some body");
+ m.arm("Green", vec!["a", "b"], "different body");
+ m.arm("Blue", vec!["x", "y"], "some body");
+ assert_eq!(m.arms.len(), 3);
+
+ let mut fmt = Formatter::new();
+ fmt.add_match(m);
+
+ let expected_lines = from_raw_string(
+ r#"
+match x {
+ Green { a, b } => {
+ different body
+ }
+ Orange { a, b } |
+ Yellow { a, b } => {
+ some body
+ }
+ Blue { x, y } => {
+ some body
+ }
+}
+ "#,
+ );
+ assert_eq!(fmt.lines, expected_lines);
+ }
+
+ #[test]
+ fn match_with_catchall_order() {
+ // The catchall placeholder must be placed after other clauses.
+ let mut m = Match::new("x");
+ m.arm("Orange", vec!["a", "b"], "some body");
+ m.arm("Green", vec!["a", "b"], "different body");
+ m.arm_no_fields("_", "unreachable!()");
+ assert_eq!(m.arms.len(), 2); // catchall is not counted
+
+ let mut fmt = Formatter::new();
+ fmt.add_match(m);
+
+ let expected_lines = from_raw_string(
+ r#"
+match x {
+ Green { a, b } => {
+ different body
+ }
+ Orange { a, b } => {
+ some body
+ }
+ _ => {
+ unreachable!()
+ }
+}
+ "#,
+ );
+ assert_eq!(fmt.lines, expected_lines);
+ }
+
+ #[test]
+ fn parse_multiline_works() {
+ let input = "\n hello\n world\n";
+ let expected = vec!["hello", "world"];
+ let output = parse_multiline(input);
+ assert_eq!(output, expected);
+ }
+
+ #[test]
+ fn formatter_basic_example_works() {
+ let mut fmt = Formatter::new();
+ fmt.line("Hello line 1");
+ fmt.indent_push();
+ fmt.comment("Nested comment");
+ fmt.indent_pop();
+ fmt.line("Back home again");
+ let expected_lines = vec![
+ "Hello line 1\n",
+ " // Nested comment\n",
+ "Back home again\n",
+ ];
+ assert_eq!(fmt.lines, expected_lines);
+ }
+
+ #[test]
+ fn get_indent_works() {
+ let mut fmt = Formatter::new();
+ let expected_results = vec!["", " ", " ", ""];
+
+ let actual_results = Vec::with_capacity(4);
+ (0..3).for_each(|_| {
+ fmt.get_indent();
+ fmt.indent_push();
+ });
+ (0..3).for_each(|_| fmt.indent_pop());
+ fmt.get_indent();
+
+ actual_results
+ .into_iter()
+ .zip(expected_results.into_iter())
+ .for_each(|(actual, expected): (String, &str)| assert_eq!(&actual, expected));
+ }
+
+ #[test]
+ fn fmt_can_add_type_to_lines() {
+ let mut fmt = Formatter::new();
+ fmt.line(format!("pub const {}: Type = Type({:#x});", "example", 0,));
+ let expected_lines = vec!["pub const example: Type = Type(0x0);\n"];
+ assert_eq!(fmt.lines, expected_lines);
+ }
+
+ #[test]
+ fn fmt_can_add_indented_line() {
+ let mut fmt = Formatter::new();
+ fmt.line("hello");
+ fmt.indent_push();
+ fmt.line("world");
+ let expected_lines = vec!["hello\n", " world\n"];
+ assert_eq!(fmt.lines, expected_lines);
+ }
+
+ #[test]
+ fn fmt_can_add_doc_comments() {
+ let mut fmt = Formatter::new();
+ fmt.doc_comment("documentation\nis\ngood");
+ let expected_lines = vec!["/// documentation\n", "/// is\n", "/// good\n"];
+ assert_eq!(fmt.lines, expected_lines);
+ }
+
+ #[test]
+ fn fmt_can_add_doc_comments_with_empty_lines() {
+ let mut fmt = Formatter::new();
+ fmt.doc_comment(
+ r#"documentation
+ can be really good.
+
+ If you stick to writing it.
+"#,
+ );
+ let expected_lines = from_raw_string(
+ r#"
+/// documentation
+/// can be really good.
+///
+/// If you stick to writing it."#,
+ );
+ assert_eq!(fmt.lines, expected_lines);
+ }
+}
diff --git a/third_party/rust/cranelift-codegen-meta/src/unique_table.rs b/third_party/rust/cranelift-codegen-meta/src/unique_table.rs
new file mode 100644
index 0000000000..65ef7e8b4a
--- /dev/null
+++ b/third_party/rust/cranelift-codegen-meta/src/unique_table.rs
@@ -0,0 +1,141 @@
+//! An index-accessed table implementation that avoids duplicate entries.
+use std::collections::HashMap;
+use std::hash::Hash;
+use std::slice;
+
+/// Collect items into the `table` list, removing duplicates.
+pub(crate) struct UniqueTable<'entries, T: Eq + Hash> {
+ table: Vec<&'entries T>,
+ map: HashMap<&'entries T, usize>,
+}
+
+impl<'entries, T: Eq + Hash> UniqueTable<'entries, T> {
+ pub fn new() -> Self {
+ Self {
+ table: Vec::new(),
+ map: HashMap::new(),
+ }
+ }
+
+ pub fn add(&mut self, entry: &'entries T) -> usize {
+ match self.map.get(&entry) {
+ None => {
+ let i = self.table.len();
+ self.table.push(entry);
+ self.map.insert(entry, i);
+ i
+ }
+ Some(&i) => i,
+ }
+ }
+
+ pub fn len(&self) -> usize {
+ self.table.len()
+ }
+ pub fn get(&self, index: usize) -> &T {
+ self.table[index]
+ }
+ pub fn iter(&self) -> slice::Iter<&'entries T> {
+ self.table.iter()
+ }
+}
+
+/// A table of sequences which tries to avoid common subsequences.
+pub(crate) struct UniqueSeqTable<T: PartialEq + Clone> {
+ table: Vec<T>,
+}
+
+impl<T: PartialEq + Clone> UniqueSeqTable<T> {
+ pub fn new() -> Self {
+ Self { table: Vec::new() }
+ }
+ pub fn add(&mut self, values: &[T]) -> usize {
+ if values.is_empty() {
+ return 0;
+ }
+ if let Some(offset) = find_subsequence(values, &self.table) {
+ offset
+ } else {
+ let table_len = self.table.len();
+
+ // Try to put in common the last elements of the table if they're a prefix of the new
+ // sequence.
+ //
+ // We know there wasn't a full match, so the best prefix we can hope to find contains
+ // all the values but the last one.
+ let mut start_from = usize::min(table_len, values.len() - 1);
+ while start_from != 0 {
+ // Loop invariant: start_from <= table_len, so table_len - start_from >= 0.
+ if values[0..start_from] == self.table[table_len - start_from..table_len] {
+ break;
+ }
+ start_from -= 1;
+ }
+
+ self.table
+ .extend(values[start_from..values.len()].iter().cloned());
+ table_len - start_from
+ }
+ }
+ pub fn len(&self) -> usize {
+ self.table.len()
+ }
+ pub fn iter(&self) -> slice::Iter<T> {
+ self.table.iter()
+ }
+}
+
+/// Try to find the subsequence `sub` in the `whole` sequence. Returns None if
+/// it's not been found, or Some(index) if it has been. Naive implementation
+/// until proven we need something better.
+fn find_subsequence<T: PartialEq>(sub: &[T], whole: &[T]) -> Option<usize> {
+ assert!(!sub.is_empty());
+ // We want i + sub.len() <= whole.len(), i.e. i < whole.len() + 1 - sub.len().
+ if whole.len() < sub.len() {
+ return None;
+ }
+ let max = whole.len() - sub.len();
+ for i in 0..=max {
+ if whole[i..i + sub.len()] == sub[..] {
+ return Some(i);
+ }
+ }
+ None
+}
+
+#[test]
+fn test_find_subsequence() {
+ assert_eq!(find_subsequence(&vec![1], &vec![4]), None);
+ assert_eq!(find_subsequence(&vec![1], &vec![1]), Some(0));
+ assert_eq!(find_subsequence(&vec![1, 2], &vec![1]), None);
+ assert_eq!(find_subsequence(&vec![1, 2], &vec![1, 2]), Some(0));
+ assert_eq!(find_subsequence(&vec![1, 2], &vec![1, 3]), None);
+ assert_eq!(find_subsequence(&vec![1, 2], &vec![0, 1, 2]), Some(1));
+ assert_eq!(find_subsequence(&vec![1, 2], &vec![0, 1, 3, 1]), None);
+ assert_eq!(find_subsequence(&vec![1, 2], &vec![0, 1, 3, 1, 2]), Some(3));
+ assert_eq!(
+ find_subsequence(&vec![1, 1, 3], &vec![1, 1, 1, 3, 3]),
+ Some(1)
+ );
+}
+
+#[test]
+fn test_optimal_add() {
+ let mut seq_table = UniqueSeqTable::new();
+ // [0, 1, 2, 3]
+ assert_eq!(seq_table.add(&vec![0, 1, 2, 3]), 0);
+ assert_eq!(seq_table.add(&vec![0, 1, 2, 3]), 0);
+ assert_eq!(seq_table.add(&vec![1, 2, 3]), 1);
+ assert_eq!(seq_table.add(&vec![2, 3]), 2);
+ assert_eq!(seq_table.len(), 4);
+ // [0, 1, 2, 3, 4]
+ assert_eq!(seq_table.add(&vec![2, 3, 4]), 2);
+ assert_eq!(seq_table.len(), 5);
+ // [0, 1, 2, 3, 4, 6, 5, 7]
+ assert_eq!(seq_table.add(&vec![4, 6, 5, 7]), 4);
+ assert_eq!(seq_table.len(), 8);
+ // [0, 1, 2, 3, 4, 6, 5, 7, 8, 2, 3, 4]
+ assert_eq!(seq_table.add(&vec![8, 2, 3, 4]), 8);
+ assert_eq!(seq_table.add(&vec![8]), 8);
+ assert_eq!(seq_table.len(), 12);
+}