diff options
Diffstat (limited to 'third_party/rust/cranelift-codegen-meta/src/isa/riscv/encodings.rs')
-rw-r--r-- | third_party/rust/cranelift-codegen-meta/src/isa/riscv/encodings.rs | 431 |
1 files changed, 431 insertions, 0 deletions
diff --git a/third_party/rust/cranelift-codegen-meta/src/isa/riscv/encodings.rs b/third_party/rust/cranelift-codegen-meta/src/isa/riscv/encodings.rs new file mode 100644 index 0000000000..c255ddb483 --- /dev/null +++ b/third_party/rust/cranelift-codegen-meta/src/isa/riscv/encodings.rs @@ -0,0 +1,431 @@ +use crate::cdsl::ast::{Apply, Expr, Literal, VarPool}; +use crate::cdsl::encodings::{Encoding, EncodingBuilder}; +use crate::cdsl::instructions::{ + Bindable, BoundInstruction, InstSpec, InstructionPredicateNode, InstructionPredicateRegistry, +}; +use crate::cdsl::recipes::{EncodingRecipeNumber, Recipes}; +use crate::cdsl::settings::SettingGroup; + +use crate::shared::types::Bool::B1; +use crate::shared::types::Float::{F32, F64}; +use crate::shared::types::Int::{I16, I32, I64, I8}; +use crate::shared::types::Reference::{R32, R64}; +use crate::shared::Definitions as SharedDefinitions; + +use super::recipes::RecipeGroup; + +pub(crate) struct PerCpuModeEncodings<'defs> { + pub inst_pred_reg: InstructionPredicateRegistry, + pub enc32: Vec<Encoding>, + pub enc64: Vec<Encoding>, + recipes: &'defs Recipes, +} + +impl<'defs> PerCpuModeEncodings<'defs> { + fn new(recipes: &'defs Recipes) -> Self { + Self { + inst_pred_reg: InstructionPredicateRegistry::new(), + enc32: Vec::new(), + enc64: Vec::new(), + recipes, + } + } + fn enc( + &self, + inst: impl Into<InstSpec>, + recipe: EncodingRecipeNumber, + bits: u16, + ) -> EncodingBuilder { + EncodingBuilder::new(inst.into(), recipe, bits) + } + fn add32(&mut self, encoding: EncodingBuilder) { + self.enc32 + .push(encoding.build(self.recipes, &mut self.inst_pred_reg)); + } + fn add64(&mut self, encoding: EncodingBuilder) { + self.enc64 + .push(encoding.build(self.recipes, &mut self.inst_pred_reg)); + } +} + +// The low 7 bits of a RISC-V instruction is the base opcode. All 32-bit instructions have 11 as +// the two low bits, with bits 6:2 determining the base opcode. +// +// Encbits for the 32-bit recipes are opcode[6:2] | (funct3 << 5) | ... +// The functions below encode the encbits. + +fn load_bits(funct3: u16) -> u16 { + assert!(funct3 <= 0b111); + funct3 << 5 +} + +fn store_bits(funct3: u16) -> u16 { + assert!(funct3 <= 0b111); + 0b01000 | (funct3 << 5) +} + +fn branch_bits(funct3: u16) -> u16 { + assert!(funct3 <= 0b111); + 0b11000 | (funct3 << 5) +} + +fn jalr_bits() -> u16 { + // This was previously accepting an argument funct3 of 3 bits and used the following formula: + //0b11001 | (funct3 << 5) + 0b11001 +} + +fn jal_bits() -> u16 { + 0b11011 +} + +fn opimm_bits(funct3: u16, funct7: u16) -> u16 { + assert!(funct3 <= 0b111); + 0b00100 | (funct3 << 5) | (funct7 << 8) +} + +fn opimm32_bits(funct3: u16, funct7: u16) -> u16 { + assert!(funct3 <= 0b111); + 0b00110 | (funct3 << 5) | (funct7 << 8) +} + +fn op_bits(funct3: u16, funct7: u16) -> u16 { + assert!(funct3 <= 0b111); + assert!(funct7 <= 0b111_1111); + 0b01100 | (funct3 << 5) | (funct7 << 8) +} + +fn op32_bits(funct3: u16, funct7: u16) -> u16 { + assert!(funct3 <= 0b111); + assert!(funct7 <= 0b111_1111); + 0b01110 | (funct3 << 5) | (funct7 << 8) +} + +fn lui_bits() -> u16 { + 0b01101 +} + +pub(crate) fn define<'defs>( + shared_defs: &'defs SharedDefinitions, + isa_settings: &SettingGroup, + recipes: &'defs RecipeGroup, +) -> PerCpuModeEncodings<'defs> { + // Instructions shorthands. + let shared = &shared_defs.instructions; + + let band = shared.by_name("band"); + let band_imm = shared.by_name("band_imm"); + let bor = shared.by_name("bor"); + let bor_imm = shared.by_name("bor_imm"); + let br_icmp = shared.by_name("br_icmp"); + let brz = shared.by_name("brz"); + let brnz = shared.by_name("brnz"); + let bxor = shared.by_name("bxor"); + let bxor_imm = shared.by_name("bxor_imm"); + let call = shared.by_name("call"); + let call_indirect = shared.by_name("call_indirect"); + let copy = shared.by_name("copy"); + let copy_nop = shared.by_name("copy_nop"); + let copy_to_ssa = shared.by_name("copy_to_ssa"); + let fill = shared.by_name("fill"); + let fill_nop = shared.by_name("fill_nop"); + let iadd = shared.by_name("iadd"); + let iadd_imm = shared.by_name("iadd_imm"); + let iconst = shared.by_name("iconst"); + let icmp = shared.by_name("icmp"); + let icmp_imm = shared.by_name("icmp_imm"); + let imul = shared.by_name("imul"); + let ishl = shared.by_name("ishl"); + let ishl_imm = shared.by_name("ishl_imm"); + let isub = shared.by_name("isub"); + let jump = shared.by_name("jump"); + let regmove = shared.by_name("regmove"); + let spill = shared.by_name("spill"); + let sshr = shared.by_name("sshr"); + let sshr_imm = shared.by_name("sshr_imm"); + let ushr = shared.by_name("ushr"); + let ushr_imm = shared.by_name("ushr_imm"); + let return_ = shared.by_name("return"); + + // Recipes shorthands, prefixed with r_. + let r_copytossa = recipes.by_name("copytossa"); + let r_fillnull = recipes.by_name("fillnull"); + let r_icall = recipes.by_name("Icall"); + let r_icopy = recipes.by_name("Icopy"); + let r_ii = recipes.by_name("Ii"); + let r_iicmp = recipes.by_name("Iicmp"); + let r_iret = recipes.by_name("Iret"); + let r_irmov = recipes.by_name("Irmov"); + let r_iz = recipes.by_name("Iz"); + let r_gp_sp = recipes.by_name("GPsp"); + let r_gp_fi = recipes.by_name("GPfi"); + let r_r = recipes.by_name("R"); + let r_ricmp = recipes.by_name("Ricmp"); + let r_rshamt = recipes.by_name("Rshamt"); + let r_sb = recipes.by_name("SB"); + let r_sb_zero = recipes.by_name("SBzero"); + let r_stacknull = recipes.by_name("stacknull"); + let r_u = recipes.by_name("U"); + let r_uj = recipes.by_name("UJ"); + let r_uj_call = recipes.by_name("UJcall"); + + // Predicates shorthands. + let use_m = isa_settings.predicate_by_name("use_m"); + + // Definitions. + let mut e = PerCpuModeEncodings::new(&recipes.recipes); + + // Basic arithmetic binary instructions are encoded in an R-type instruction. + for &(inst, inst_imm, f3, f7) in &[ + (iadd, Some(iadd_imm), 0b000, 0b000_0000), + (isub, None, 0b000, 0b010_0000), + (bxor, Some(bxor_imm), 0b100, 0b000_0000), + (bor, Some(bor_imm), 0b110, 0b000_0000), + (band, Some(band_imm), 0b111, 0b000_0000), + ] { + e.add32(e.enc(inst.bind(I32), r_r, op_bits(f3, f7))); + e.add64(e.enc(inst.bind(I64), r_r, op_bits(f3, f7))); + + // Immediate versions for add/xor/or/and. + if let Some(inst_imm) = inst_imm { + e.add32(e.enc(inst_imm.bind(I32), r_ii, opimm_bits(f3, 0))); + e.add64(e.enc(inst_imm.bind(I64), r_ii, opimm_bits(f3, 0))); + } + } + + // 32-bit ops in RV64. + e.add64(e.enc(iadd.bind(I32), r_r, op32_bits(0b000, 0b000_0000))); + e.add64(e.enc(isub.bind(I32), r_r, op32_bits(0b000, 0b010_0000))); + // There are no andiw/oriw/xoriw variations. + e.add64(e.enc(iadd_imm.bind(I32), r_ii, opimm32_bits(0b000, 0))); + + // Use iadd_imm with %x0 to materialize constants. + e.add32(e.enc(iconst.bind(I32), r_iz, opimm_bits(0b0, 0))); + e.add64(e.enc(iconst.bind(I32), r_iz, opimm_bits(0b0, 0))); + e.add64(e.enc(iconst.bind(I64), r_iz, opimm_bits(0b0, 0))); + + // Dynamic shifts have the same masking semantics as the clif base instructions. + for &(inst, inst_imm, f3, f7) in &[ + (ishl, ishl_imm, 0b1, 0b0), + (ushr, ushr_imm, 0b101, 0b0), + (sshr, sshr_imm, 0b101, 0b10_0000), + ] { + e.add32(e.enc(inst.bind(I32).bind(I32), r_r, op_bits(f3, f7))); + e.add64(e.enc(inst.bind(I64).bind(I64), r_r, op_bits(f3, f7))); + e.add64(e.enc(inst.bind(I32).bind(I32), r_r, op32_bits(f3, f7))); + // Allow i32 shift amounts in 64-bit shifts. + e.add64(e.enc(inst.bind(I64).bind(I32), r_r, op_bits(f3, f7))); + e.add64(e.enc(inst.bind(I32).bind(I64), r_r, op32_bits(f3, f7))); + + // Immediate shifts. + e.add32(e.enc(inst_imm.bind(I32), r_rshamt, opimm_bits(f3, f7))); + e.add64(e.enc(inst_imm.bind(I64), r_rshamt, opimm_bits(f3, f7))); + e.add64(e.enc(inst_imm.bind(I32), r_rshamt, opimm32_bits(f3, f7))); + } + + // Signed and unsigned integer 'less than'. There are no 'w' variants for comparing 32-bit + // numbers in RV64. + { + let mut var_pool = VarPool::new(); + + // Helper that creates an instruction predicate for an instruction in the icmp family. + let mut icmp_instp = |bound_inst: &BoundInstruction, + intcc_field: &'static str| + -> InstructionPredicateNode { + let x = var_pool.create("x"); + let y = var_pool.create("y"); + let cc = Literal::enumerator_for(&shared_defs.imm.intcc, intcc_field); + Apply::new( + bound_inst.clone().into(), + vec![Expr::Literal(cc), Expr::Var(x), Expr::Var(y)], + ) + .inst_predicate(&var_pool) + .unwrap() + }; + + let icmp_i32 = icmp.bind(I32); + let icmp_i64 = icmp.bind(I64); + e.add32( + e.enc(icmp_i32.clone(), r_ricmp, op_bits(0b010, 0b000_0000)) + .inst_predicate(icmp_instp(&icmp_i32, "slt")), + ); + e.add64( + e.enc(icmp_i64.clone(), r_ricmp, op_bits(0b010, 0b000_0000)) + .inst_predicate(icmp_instp(&icmp_i64, "slt")), + ); + + e.add32( + e.enc(icmp_i32.clone(), r_ricmp, op_bits(0b011, 0b000_0000)) + .inst_predicate(icmp_instp(&icmp_i32, "ult")), + ); + e.add64( + e.enc(icmp_i64.clone(), r_ricmp, op_bits(0b011, 0b000_0000)) + .inst_predicate(icmp_instp(&icmp_i64, "ult")), + ); + + // Immediate variants. + let icmp_i32 = icmp_imm.bind(I32); + let icmp_i64 = icmp_imm.bind(I64); + e.add32( + e.enc(icmp_i32.clone(), r_iicmp, opimm_bits(0b010, 0)) + .inst_predicate(icmp_instp(&icmp_i32, "slt")), + ); + e.add64( + e.enc(icmp_i64.clone(), r_iicmp, opimm_bits(0b010, 0)) + .inst_predicate(icmp_instp(&icmp_i64, "slt")), + ); + + e.add32( + e.enc(icmp_i32.clone(), r_iicmp, opimm_bits(0b011, 0)) + .inst_predicate(icmp_instp(&icmp_i32, "ult")), + ); + e.add64( + e.enc(icmp_i64.clone(), r_iicmp, opimm_bits(0b011, 0)) + .inst_predicate(icmp_instp(&icmp_i64, "ult")), + ); + } + + // Integer constants with the low 12 bits clear are materialized by lui. + e.add32(e.enc(iconst.bind(I32), r_u, lui_bits())); + e.add64(e.enc(iconst.bind(I32), r_u, lui_bits())); + e.add64(e.enc(iconst.bind(I64), r_u, lui_bits())); + + // "M" Standard Extension for Integer Multiplication and Division. + // Gated by the `use_m` flag. + e.add32( + e.enc(imul.bind(I32), r_r, op_bits(0b000, 0b0000_0001)) + .isa_predicate(use_m), + ); + e.add64( + e.enc(imul.bind(I64), r_r, op_bits(0b000, 0b0000_0001)) + .isa_predicate(use_m), + ); + e.add64( + e.enc(imul.bind(I32), r_r, op32_bits(0b000, 0b0000_0001)) + .isa_predicate(use_m), + ); + + // Control flow. + + // Unconditional branches. + e.add32(e.enc(jump, r_uj, jal_bits())); + e.add64(e.enc(jump, r_uj, jal_bits())); + e.add32(e.enc(call, r_uj_call, jal_bits())); + e.add64(e.enc(call, r_uj_call, jal_bits())); + + // Conditional branches. + { + let mut var_pool = VarPool::new(); + + // Helper that creates an instruction predicate for an instruction in the icmp family. + let mut br_icmp_instp = |bound_inst: &BoundInstruction, + intcc_field: &'static str| + -> InstructionPredicateNode { + let x = var_pool.create("x"); + let y = var_pool.create("y"); + let dest = var_pool.create("dest"); + let args = var_pool.create("args"); + let cc = Literal::enumerator_for(&shared_defs.imm.intcc, intcc_field); + Apply::new( + bound_inst.clone().into(), + vec![ + Expr::Literal(cc), + Expr::Var(x), + Expr::Var(y), + Expr::Var(dest), + Expr::Var(args), + ], + ) + .inst_predicate(&var_pool) + .unwrap() + }; + + let br_icmp_i32 = br_icmp.bind(I32); + let br_icmp_i64 = br_icmp.bind(I64); + for &(cond, f3) in &[ + ("eq", 0b000), + ("ne", 0b001), + ("slt", 0b100), + ("sge", 0b101), + ("ult", 0b110), + ("uge", 0b111), + ] { + e.add32( + e.enc(br_icmp_i32.clone(), r_sb, branch_bits(f3)) + .inst_predicate(br_icmp_instp(&br_icmp_i32, cond)), + ); + e.add64( + e.enc(br_icmp_i64.clone(), r_sb, branch_bits(f3)) + .inst_predicate(br_icmp_instp(&br_icmp_i64, cond)), + ); + } + } + + for &(inst, f3) in &[(brz, 0b000), (brnz, 0b001)] { + e.add32(e.enc(inst.bind(I32), r_sb_zero, branch_bits(f3))); + e.add64(e.enc(inst.bind(I64), r_sb_zero, branch_bits(f3))); + e.add32(e.enc(inst.bind(B1), r_sb_zero, branch_bits(f3))); + e.add64(e.enc(inst.bind(B1), r_sb_zero, branch_bits(f3))); + } + + // Returns are a special case of jalr_bits using %x1 to hold the return address. + // The return address is provided by a special-purpose `link` return value that + // is added by legalize_signature(). + e.add32(e.enc(return_, r_iret, jalr_bits())); + e.add64(e.enc(return_, r_iret, jalr_bits())); + e.add32(e.enc(call_indirect.bind(I32), r_icall, jalr_bits())); + e.add64(e.enc(call_indirect.bind(I64), r_icall, jalr_bits())); + + // Spill and fill. + e.add32(e.enc(spill.bind(I32), r_gp_sp, store_bits(0b010))); + e.add64(e.enc(spill.bind(I32), r_gp_sp, store_bits(0b010))); + e.add64(e.enc(spill.bind(I64), r_gp_sp, store_bits(0b011))); + e.add32(e.enc(fill.bind(I32), r_gp_fi, load_bits(0b010))); + e.add64(e.enc(fill.bind(I32), r_gp_fi, load_bits(0b010))); + e.add64(e.enc(fill.bind(I64), r_gp_fi, load_bits(0b011))); + + // No-op fills, created by late-stage redundant-fill removal. + for &ty in &[I64, I32] { + e.add64(e.enc(fill_nop.bind(ty), r_fillnull, 0)); + e.add32(e.enc(fill_nop.bind(ty), r_fillnull, 0)); + } + e.add64(e.enc(fill_nop.bind(B1), r_fillnull, 0)); + e.add32(e.enc(fill_nop.bind(B1), r_fillnull, 0)); + + // Register copies. + e.add32(e.enc(copy.bind(I32), r_icopy, opimm_bits(0b000, 0))); + e.add64(e.enc(copy.bind(I64), r_icopy, opimm_bits(0b000, 0))); + e.add64(e.enc(copy.bind(I32), r_icopy, opimm32_bits(0b000, 0))); + + e.add32(e.enc(regmove.bind(I32), r_irmov, opimm_bits(0b000, 0))); + e.add64(e.enc(regmove.bind(I64), r_irmov, opimm_bits(0b000, 0))); + e.add64(e.enc(regmove.bind(I32), r_irmov, opimm32_bits(0b000, 0))); + + e.add32(e.enc(copy.bind(B1), r_icopy, opimm_bits(0b000, 0))); + e.add64(e.enc(copy.bind(B1), r_icopy, opimm_bits(0b000, 0))); + e.add32(e.enc(regmove.bind(B1), r_irmov, opimm_bits(0b000, 0))); + e.add64(e.enc(regmove.bind(B1), r_irmov, opimm_bits(0b000, 0))); + + // Stack-slot-to-the-same-stack-slot copy, which is guaranteed to turn + // into a no-op. + // The same encoding is generated for both the 64- and 32-bit architectures. + for &ty in &[I64, I32, I16, I8] { + e.add32(e.enc(copy_nop.bind(ty), r_stacknull, 0)); + e.add64(e.enc(copy_nop.bind(ty), r_stacknull, 0)); + } + for &ty in &[F64, F32] { + e.add32(e.enc(copy_nop.bind(ty), r_stacknull, 0)); + e.add64(e.enc(copy_nop.bind(ty), r_stacknull, 0)); + } + + // Copy-to-SSA + e.add32(e.enc(copy_to_ssa.bind(I32), r_copytossa, opimm_bits(0b000, 0))); + e.add64(e.enc(copy_to_ssa.bind(I64), r_copytossa, opimm_bits(0b000, 0))); + e.add64(e.enc(copy_to_ssa.bind(I32), r_copytossa, opimm32_bits(0b000, 0))); + e.add32(e.enc(copy_to_ssa.bind(B1), r_copytossa, opimm_bits(0b000, 0))); + e.add64(e.enc(copy_to_ssa.bind(B1), r_copytossa, opimm_bits(0b000, 0))); + e.add32(e.enc(copy_to_ssa.bind(R32), r_copytossa, opimm_bits(0b000, 0))); + e.add64(e.enc(copy_to_ssa.bind(R64), r_copytossa, opimm_bits(0b000, 0))); + + e +} |