summaryrefslogtreecommitdiffstats
path: root/third_party/rust/cranelift-codegen/src/machinst
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/rust/cranelift-codegen/src/machinst')
-rw-r--r--third_party/rust/cranelift-codegen/src/machinst/abi.rs233
-rw-r--r--third_party/rust/cranelift-codegen/src/machinst/abi_impl.rs1320
-rw-r--r--third_party/rust/cranelift-codegen/src/machinst/adapter.rs140
-rw-r--r--third_party/rust/cranelift-codegen/src/machinst/blockorder.rs624
-rw-r--r--third_party/rust/cranelift-codegen/src/machinst/buffer.rs1813
-rw-r--r--third_party/rust/cranelift-codegen/src/machinst/compile.rs110
-rw-r--r--third_party/rust/cranelift-codegen/src/machinst/helpers.rs28
-rw-r--r--third_party/rust/cranelift-codegen/src/machinst/inst_common.rs59
-rw-r--r--third_party/rust/cranelift-codegen/src/machinst/lower.rs1084
-rw-r--r--third_party/rust/cranelift-codegen/src/machinst/mod.rs421
-rw-r--r--third_party/rust/cranelift-codegen/src/machinst/vcode.rs895
11 files changed, 6727 insertions, 0 deletions
diff --git a/third_party/rust/cranelift-codegen/src/machinst/abi.rs b/third_party/rust/cranelift-codegen/src/machinst/abi.rs
new file mode 100644
index 0000000000..c72a81dcc2
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/machinst/abi.rs
@@ -0,0 +1,233 @@
+//! ABI definitions.
+
+use crate::binemit::StackMap;
+use crate::ir::StackSlot;
+use crate::isa::CallConv;
+use crate::machinst::*;
+use crate::settings;
+
+use regalloc::{Reg, Set, SpillSlot, Writable};
+
+/// Trait implemented by an object that tracks ABI-related state (e.g., stack
+/// layout) and can generate code while emitting the *body* of a function.
+pub trait ABICallee {
+ /// The instruction type for the ISA associated with this ABI.
+ type I: VCodeInst;
+
+ /// Does the ABI-body code need a temp reg? One will be provided to `init()`
+ /// as the `maybe_tmp` arg if so.
+ fn temp_needed(&self) -> bool;
+
+ /// Initialize. This is called after the ABICallee is constructed because it
+ /// may be provided with a temp vreg, which can only be allocated once the
+ /// lowering context exists.
+ fn init(&mut self, maybe_tmp: Option<Writable<Reg>>);
+
+ /// Accumulate outgoing arguments. This ensures that at least SIZE bytes
+ /// are allocated in the prologue to be available for use in function calls
+ /// to hold arguments and/or return values. If this function is called
+ /// multiple times, the maximum of all SIZE values will be available.
+ fn accumulate_outgoing_args_size(&mut self, size: u32);
+
+ /// Get the settings controlling this function's compilation.
+ fn flags(&self) -> &settings::Flags;
+
+ /// Get the calling convention implemented by this ABI object.
+ fn call_conv(&self) -> CallConv;
+
+ /// Get the liveins of the function.
+ fn liveins(&self) -> Set<RealReg>;
+
+ /// Get the liveouts of the function.
+ fn liveouts(&self) -> Set<RealReg>;
+
+ /// Number of arguments.
+ fn num_args(&self) -> usize;
+
+ /// Number of return values.
+ fn num_retvals(&self) -> usize;
+
+ /// Number of stack slots (not spill slots).
+ fn num_stackslots(&self) -> usize;
+
+ /// Generate an instruction which copies an argument to a destination
+ /// register.
+ fn gen_copy_arg_to_reg(&self, idx: usize, into_reg: Writable<Reg>) -> Self::I;
+
+ /// Is the given argument needed in the body (as opposed to, e.g., serving
+ /// only as a special ABI-specific placeholder)? This controls whether
+ /// lowering will copy it to a virtual reg use by CLIF instructions.
+ fn arg_is_needed_in_body(&self, idx: usize) -> bool;
+
+ /// Generate any setup instruction needed to save values to the
+ /// return-value area. This is usually used when were are multiple return
+ /// values or an otherwise large return value that must be passed on the
+ /// stack; typically the ABI specifies an extra hidden argument that is a
+ /// pointer to that memory.
+ fn gen_retval_area_setup(&self) -> Option<Self::I>;
+
+ /// Generate an instruction which copies a source register to a return value slot.
+ fn gen_copy_reg_to_retval(&self, idx: usize, from_reg: Writable<Reg>) -> Vec<Self::I>;
+
+ /// Generate a return instruction.
+ fn gen_ret(&self) -> Self::I;
+
+ /// Generate an epilogue placeholder. The returned instruction should return `true` from
+ /// `is_epilogue_placeholder()`; this is used to indicate to the lowering driver when
+ /// the epilogue should be inserted.
+ fn gen_epilogue_placeholder(&self) -> Self::I;
+
+ // -----------------------------------------------------------------
+ // Every function above this line may only be called pre-regalloc.
+ // Every function below this line may only be called post-regalloc.
+ // `spillslots()` must be called before any other post-regalloc
+ // function.
+ // ----------------------------------------------------------------
+
+ /// Update with the number of spillslots, post-regalloc.
+ fn set_num_spillslots(&mut self, slots: usize);
+
+ /// Update with the clobbered registers, post-regalloc.
+ fn set_clobbered(&mut self, clobbered: Set<Writable<RealReg>>);
+
+ /// Get the address of a stackslot.
+ fn stackslot_addr(&self, slot: StackSlot, offset: u32, into_reg: Writable<Reg>) -> Self::I;
+
+ /// Load from a stackslot.
+ fn load_stackslot(
+ &self,
+ slot: StackSlot,
+ offset: u32,
+ ty: Type,
+ into_reg: Writable<Reg>,
+ ) -> Self::I;
+
+ /// Store to a stackslot.
+ fn store_stackslot(&self, slot: StackSlot, offset: u32, ty: Type, from_reg: Reg) -> Self::I;
+
+ /// Load from a spillslot.
+ fn load_spillslot(&self, slot: SpillSlot, ty: Type, into_reg: Writable<Reg>) -> Self::I;
+
+ /// Store to a spillslot.
+ fn store_spillslot(&self, slot: SpillSlot, ty: Type, from_reg: Reg) -> Self::I;
+
+ /// Generate a stack map, given a list of spillslots and the emission state
+ /// at a given program point (prior to emission fo the safepointing
+ /// instruction).
+ fn spillslots_to_stack_map(
+ &self,
+ slots: &[SpillSlot],
+ state: &<Self::I as MachInstEmit>::State,
+ ) -> StackMap;
+
+ /// Generate a prologue, post-regalloc. This should include any stack
+ /// frame or other setup necessary to use the other methods (`load_arg`,
+ /// `store_retval`, and spillslot accesses.) `self` is mutable so that we
+ /// can store information in it which will be useful when creating the
+ /// epilogue.
+ fn gen_prologue(&mut self) -> Vec<Self::I>;
+
+ /// Generate an epilogue, post-regalloc. Note that this must generate the
+ /// actual return instruction (rather than emitting this in the lowering
+ /// logic), because the epilogue code comes before the return and the two are
+ /// likely closely related.
+ fn gen_epilogue(&self) -> Vec<Self::I>;
+
+ /// Returns the full frame size for the given function, after prologue
+ /// emission has run. This comprises the spill slots and stack-storage slots
+ /// (but not storage for clobbered callee-save registers, arguments pushed
+ /// at callsites within this function, or other ephemeral pushes). This is
+ /// used for ABI variants where the client generates prologue/epilogue code,
+ /// as in Baldrdash (SpiderMonkey integration).
+ fn frame_size(&self) -> u32;
+
+ /// Returns the size of arguments expected on the stack.
+ fn stack_args_size(&self) -> u32;
+
+ /// Get the spill-slot size.
+ fn get_spillslot_size(&self, rc: RegClass, ty: Type) -> u32;
+
+ /// Generate a spill. The type, if known, is given; this can be used to
+ /// generate a store instruction optimized for the particular type rather
+ /// than the RegClass (e.g., only F64 that resides in a V128 register). If
+ /// no type is given, the implementation should spill the whole register.
+ fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, ty: Option<Type>) -> Self::I;
+
+ /// Generate a reload (fill). As for spills, the type may be given to allow
+ /// a more optimized load instruction to be generated.
+ fn gen_reload(
+ &self,
+ to_reg: Writable<RealReg>,
+ from_slot: SpillSlot,
+ ty: Option<Type>,
+ ) -> Self::I;
+
+ /// Desired unwind info type.
+ fn unwind_info_kind(&self) -> UnwindInfoKind;
+}
+
+/// Trait implemented by an object that tracks ABI-related state and can
+/// generate code while emitting a *call* to a function.
+///
+/// An instance of this trait returns information for a *particular*
+/// callsite. It will usually be computed from the called function's
+/// signature.
+///
+/// Unlike `ABICallee` above, methods on this trait are not invoked directly
+/// by the machine-independent code. Rather, the machine-specific lowering
+/// code will typically create an `ABICaller` when creating machine instructions
+/// for an IR call instruction inside `lower()`, directly emit the arg and
+/// and retval copies, and attach the register use/def info to the call.
+///
+/// This trait is thus provided for convenience to the backends.
+pub trait ABICaller {
+ /// The instruction type for the ISA associated with this ABI.
+ type I: VCodeInst;
+
+ /// Get the number of arguments expected.
+ fn num_args(&self) -> usize;
+
+ /// Emit a copy of an argument value from a source register, prior to the call.
+ fn emit_copy_reg_to_arg<C: LowerCtx<I = Self::I>>(
+ &self,
+ ctx: &mut C,
+ idx: usize,
+ from_reg: Reg,
+ );
+
+ /// Emit a copy a return value into a destination register, after the call returns.
+ fn emit_copy_retval_to_reg<C: LowerCtx<I = Self::I>>(
+ &self,
+ ctx: &mut C,
+ idx: usize,
+ into_reg: Writable<Reg>,
+ );
+
+ /// Emit code to pre-adjust the stack, prior to argument copies and call.
+ fn emit_stack_pre_adjust<C: LowerCtx<I = Self::I>>(&self, ctx: &mut C);
+
+ /// Emit code to post-adjust the satck, after call return and return-value copies.
+ fn emit_stack_post_adjust<C: LowerCtx<I = Self::I>>(&self, ctx: &mut C);
+
+ /// Accumulate outgoing arguments. This ensures that the caller (as
+ /// identified via the CTX argument) allocates enough space in the
+ /// prologue to hold all arguments and return values for this call.
+ /// There is no code emitted at the call site, everything is done
+ /// in the caller's function prologue.
+ fn accumulate_outgoing_args_size<C: LowerCtx<I = Self::I>>(&self, ctx: &mut C);
+
+ /// Emit the call itself.
+ ///
+ /// The returned instruction should have proper use- and def-sets according
+ /// to the argument registers, return-value registers, and clobbered
+ /// registers for this function signature in this ABI.
+ ///
+ /// (Arg registers are uses, and retval registers are defs. Clobbered
+ /// registers are also logically defs, but should never be read; their
+ /// values are "defined" (to the regalloc) but "undefined" in every other
+ /// sense.)
+ ///
+ /// This function should only be called once, as it is allowed to re-use
+ /// parts of the ABICaller object in emitting instructions.
+ fn emit_call<C: LowerCtx<I = Self::I>>(&mut self, ctx: &mut C);
+}
diff --git a/third_party/rust/cranelift-codegen/src/machinst/abi_impl.rs b/third_party/rust/cranelift-codegen/src/machinst/abi_impl.rs
new file mode 100644
index 0000000000..e47379da37
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/machinst/abi_impl.rs
@@ -0,0 +1,1320 @@
+//! Implementation of a vanilla ABI, shared between several machines. The
+//! implementation here assumes that arguments will be passed in registers
+//! first, then additional args on the stack; that the stack grows downward,
+//! contains a standard frame (return address and frame pointer), and the
+//! compiler is otherwise free to allocate space below that with its choice of
+//! layout; and that the machine has some notion of caller- and callee-save
+//! registers. Most modern machines, e.g. x86-64 and AArch64, should fit this
+//! mold and thus both of these backends use this shared implementation.
+//!
+//! See the documentation in specific machine backends for the "instantiation"
+//! of this generic ABI, i.e., which registers are caller/callee-save, arguments
+//! and return values, and any other special requirements.
+//!
+//! For now the implementation here assumes a 64-bit machine, but we intend to
+//! make this 32/64-bit-generic shortly.
+//!
+//! # Vanilla ABI
+//!
+//! First, arguments and return values are passed in registers up to a certain
+//! fixed count, after which they overflow onto the stack. Multiple return
+//! values either fit in registers, or are returned in a separate return-value
+//! area on the stack, given by a hidden extra parameter.
+//!
+//! Note that the exact stack layout is up to us. We settled on the
+//! below design based on several requirements. In particular, we need to be
+//! able to generate instructions (or instruction sequences) to access
+//! arguments, stack slots, and spill slots before we know how many spill slots
+//! or clobber-saves there will be, because of our pass structure. We also
+//! prefer positive offsets to negative offsets because of an asymmetry in
+//! some machines' addressing modes (e.g., on AArch64, positive offsets have a
+//! larger possible range without a long-form sequence to synthesize an
+//! arbitrary offset). Finally, it is not allowed to access memory below the
+//! current SP value.
+//!
+//! We assume that a prologue first pushes the frame pointer (and return address
+//! above that, if the machine does not do that in hardware). We set FP to point
+//! to this two-word frame record. We store all other frame slots below this
+//! two-word frame record, with the stack pointer remaining at or below this
+//! fixed frame storage for the rest of the function. We can then access frame
+//! storage slots using positive offsets from SP. In order to allow codegen for
+//! the latter before knowing how many clobber-saves we have, and also allow it
+//! while SP is being adjusted to set up a call, we implement a "nominal SP"
+//! tracking feature by which a fixup (distance between actual SP and a
+//! "nominal" SP) is known at each instruction.
+//!
+//! # Stack Layout
+//!
+//! The stack looks like:
+//!
+//! ```plain
+//! (high address)
+//!
+//! +---------------------------+
+//! | ... |
+//! | stack args |
+//! | (accessed via FP) |
+//! +---------------------------+
+//! SP at function entry -----> | return address |
+//! +---------------------------+
+//! FP after prologue --------> | FP (pushed by prologue) |
+//! +---------------------------+
+//! | ... |
+//! | spill slots |
+//! | (accessed via nominal SP) |
+//! | ... |
+//! | stack slots |
+//! | (accessed via nominal SP) |
+//! nominal SP ---------------> | (alloc'd by prologue) |
+//! +---------------------------+
+//! | ... |
+//! | clobbered callee-saves |
+//! SP at end of prologue ----> | (pushed by prologue) |
+//! +---------------------------+
+//! | [alignment as needed] |
+//! | ... |
+//! | args for call |
+//! SP before making a call --> | (pushed at callsite) |
+//! +---------------------------+
+//!
+//! (low address)
+//! ```
+//!
+//! # Multi-value Returns
+//!
+//! Note that we support multi-value returns in two ways. First, we allow for
+//! multiple return-value registers. Second, if teh appropriate flag is set, we
+//! support the SpiderMonkey Wasm ABI. For details of the multi-value return
+//! ABI, see:
+//!
+//! https://searchfox.org/mozilla-central/rev/bc3600def806859c31b2c7ac06e3d69271052a89/js/src/wasm/WasmStubs.h#134
+//!
+//! In brief:
+//! - Return values are processed in *reverse* order.
+//! - The first return value in this order (so the last return) goes into the
+//! ordinary return register.
+//! - Any further returns go in a struct-return area, allocated upwards (in
+//! address order) during the reverse traversal.
+//! - This struct-return area is provided by the caller, and a pointer to its
+//! start is passed as an invisible last (extra) argument. Normally the caller
+//! will allocate this area on the stack. When we generate calls, we place it
+//! just above the on-stack argument area.
+//! - So, for example, a function returning 4 i64's (v0, v1, v2, v3), with no
+//! formal arguments, would:
+//! - Accept a pointer `P` to the struct return area as a hidden argument in the
+//! first argument register on entry.
+//! - Return v3 in the one and only return-value register.
+//! - Return v2 in memory at `[P]`.
+//! - Return v1 in memory at `[P+8]`.
+//! - Return v0 in memory at `[P+16]`.
+
+use super::abi::*;
+use crate::binemit::StackMap;
+use crate::ir::types::*;
+use crate::ir::{ArgumentExtension, StackSlot};
+use crate::machinst::*;
+use crate::settings;
+use crate::CodegenResult;
+use crate::{ir, isa};
+use alloc::vec::Vec;
+use log::{debug, trace};
+use regalloc::{RealReg, Reg, RegClass, Set, SpillSlot, Writable};
+use std::convert::TryFrom;
+use std::marker::PhantomData;
+use std::mem;
+
+/// A location for an argument or return value.
+#[derive(Clone, Copy, Debug)]
+pub enum ABIArg {
+ /// In a real register.
+ Reg(
+ RealReg,
+ ir::Type,
+ ir::ArgumentExtension,
+ ir::ArgumentPurpose,
+ ),
+ /// Arguments only: on stack, at given offset from SP at entry.
+ Stack(i64, ir::Type, ir::ArgumentExtension, ir::ArgumentPurpose),
+}
+
+impl ABIArg {
+ /// Get the purpose of this arg.
+ fn get_purpose(self) -> ir::ArgumentPurpose {
+ match self {
+ ABIArg::Reg(_, _, _, purpose) => purpose,
+ ABIArg::Stack(_, _, _, purpose) => purpose,
+ }
+ }
+}
+
+/// Are we computing information about arguments or return values? Much of the
+/// handling is factored out into common routines; this enum allows us to
+/// distinguish which case we're handling.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum ArgsOrRets {
+ /// Arguments.
+ Args,
+ /// Return values.
+ Rets,
+}
+
+/// Is an instruction returned by an ABI machine-specific backend a safepoint,
+/// or not?
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum InstIsSafepoint {
+ /// The instruction is a safepoint.
+ Yes,
+ /// The instruction is not a safepoint.
+ No,
+}
+
+/// Abstract location for a machine-specific ABI impl to translate into the
+/// appropriate addressing mode.
+#[derive(Clone, Copy, Debug)]
+pub enum StackAMode {
+ /// Offset from the frame pointer, possibly making use of a specific type
+ /// for a scaled indexing operation.
+ FPOffset(i64, ir::Type),
+ /// Offset from the nominal stack pointer, possibly making use of a specific
+ /// type for a scaled indexing operation.
+ NominalSPOffset(i64, ir::Type),
+ /// Offset from the real stack pointer, possibly making use of a specific
+ /// type for a scaled indexing operation.
+ SPOffset(i64, ir::Type),
+}
+
+/// Trait implemented by machine-specific backend to provide information about
+/// register assignments and to allow generating the specific instructions for
+/// stack loads/saves, prologues/epilogues, etc.
+pub trait ABIMachineSpec {
+ /// The instruction type.
+ type I: VCodeInst;
+
+ /// Returns the number of bits in a word, that is 32/64 for 32/64-bit architecture.
+ fn word_bits() -> u32;
+
+ /// Returns the number of bytes in a word.
+ fn word_bytes() -> u32 {
+ return Self::word_bits() / 8;
+ }
+
+ /// Returns word-size integer type.
+ fn word_type() -> Type {
+ match Self::word_bits() {
+ 32 => I32,
+ 64 => I64,
+ _ => unreachable!(),
+ }
+ }
+
+ /// Returns word register class.
+ fn word_reg_class() -> RegClass {
+ match Self::word_bits() {
+ 32 => RegClass::I32,
+ 64 => RegClass::I64,
+ _ => unreachable!(),
+ }
+ }
+
+ /// Returns required stack alignment in bytes.
+ fn stack_align(call_conv: isa::CallConv) -> u32;
+
+ /// Process a list of parameters or return values and allocate them to registers
+ /// and stack slots.
+ ///
+ /// Returns the list of argument locations, the stack-space used (rounded up
+ /// to as alignment requires), and if `add_ret_area_ptr` was passed, the
+ /// index of the extra synthetic arg that was added.
+ fn compute_arg_locs(
+ call_conv: isa::CallConv,
+ params: &[ir::AbiParam],
+ args_or_rets: ArgsOrRets,
+ add_ret_area_ptr: bool,
+ ) -> CodegenResult<(Vec<ABIArg>, i64, Option<usize>)>;
+
+ /// Returns the offset from FP to the argument area, i.e., jumping over the saved FP, return
+ /// address, and maybe other standard elements depending on ABI (e.g. Wasm TLS reg).
+ fn fp_to_arg_offset(call_conv: isa::CallConv, flags: &settings::Flags) -> i64;
+
+ /// Generate a load from the stack.
+ fn gen_load_stack(mem: StackAMode, into_reg: Writable<Reg>, ty: Type) -> Self::I;
+
+ /// Generate a store to the stack.
+ fn gen_store_stack(mem: StackAMode, from_reg: Reg, ty: Type) -> Self::I;
+
+ /// Generate a move.
+ fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Self::I;
+
+ /// Generate an integer-extend operation.
+ fn gen_extend(
+ to_reg: Writable<Reg>,
+ from_reg: Reg,
+ is_signed: bool,
+ from_bits: u8,
+ to_bits: u8,
+ ) -> Self::I;
+
+ /// Generate a return instruction.
+ fn gen_ret() -> Self::I;
+
+ /// Generate an "epilogue placeholder" instruction, recognized by lowering
+ /// when using the Baldrdash ABI.
+ fn gen_epilogue_placeholder() -> Self::I;
+
+ /// Generate an add-with-immediate. Note that even if this uses a scratch
+ /// register, it must satisfy two requirements:
+ ///
+ /// - The add-imm sequence must only clobber caller-save registers, because
+ /// it will be placed in the prologue before the clobbered callee-save
+ /// registers are saved.
+ ///
+ /// - The add-imm sequence must work correctly when `from_reg` and/or
+ /// `into_reg` are the register returned by `get_stacklimit_reg()`.
+ fn gen_add_imm(into_reg: Writable<Reg>, from_reg: Reg, imm: u32) -> SmallVec<[Self::I; 4]>;
+
+ /// Generate a sequence that traps with a `TrapCode::StackOverflow` code if
+ /// the stack pointer is less than the given limit register (assuming the
+ /// stack grows downward).
+ fn gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallVec<[Self::I; 2]>;
+
+ /// Generate an instruction to compute an address of a stack slot (FP- or
+ /// SP-based offset).
+ fn gen_get_stack_addr(mem: StackAMode, into_reg: Writable<Reg>, ty: Type) -> Self::I;
+
+ /// Get a fixed register to use to compute a stack limit. This is needed for
+ /// certain sequences generated after the register allocator has already
+ /// run. This must satisfy two requirements:
+ ///
+ /// - It must be a caller-save register, because it will be clobbered in the
+ /// prologue before the clobbered callee-save registers are saved.
+ ///
+ /// - It must be safe to pass as an argument and/or destination to
+ /// `gen_add_imm()`. This is relevant when an addition with a large
+ /// immediate needs its own temporary; it cannot use the same fixed
+ /// temporary as this one.
+ fn get_stacklimit_reg() -> Reg;
+
+ /// Generate a store to the given [base+offset] address.
+ fn gen_load_base_offset(into_reg: Writable<Reg>, base: Reg, offset: i32, ty: Type) -> Self::I;
+
+ /// Generate a load from the given [base+offset] address.
+ fn gen_store_base_offset(base: Reg, offset: i32, from_reg: Reg, ty: Type) -> Self::I;
+
+ /// Adjust the stack pointer up or down.
+ fn gen_sp_reg_adjust(amount: i32) -> SmallVec<[Self::I; 2]>;
+
+ /// Generate a meta-instruction that adjusts the nominal SP offset.
+ fn gen_nominal_sp_adj(amount: i32) -> Self::I;
+
+ /// Generate the usual frame-setup sequence for this architecture: e.g.,
+ /// `push rbp / mov rbp, rsp` on x86-64, or `stp fp, lr, [sp, #-16]!` on
+ /// AArch64.
+ fn gen_prologue_frame_setup() -> SmallVec<[Self::I; 2]>;
+
+ /// Generate the usual frame-restore sequence for this architecture.
+ fn gen_epilogue_frame_restore() -> SmallVec<[Self::I; 2]>;
+
+ /// Generate a clobber-save sequence. This takes the list of *all* registers
+ /// written/modified by the function body. The implementation here is
+ /// responsible for determining which of these are callee-saved according to
+ /// the ABI. It should return a sequence of instructions that "push" or
+ /// otherwise save these values to the stack. The sequence of instructions
+ /// should adjust the stack pointer downward, and should align as necessary
+ /// according to ABI requirements.
+ ///
+ /// Returns stack bytes used as well as instructions. Does not adjust
+ /// nominal SP offset; caller will do that.
+ fn gen_clobber_save(
+ call_conv: isa::CallConv,
+ flags: &settings::Flags,
+ clobbers: &Set<Writable<RealReg>>,
+ fixed_frame_storage_size: u32,
+ outgoing_args_size: u32,
+ ) -> (u64, SmallVec<[Self::I; 16]>);
+
+ /// Generate a clobber-restore sequence. This sequence should perform the
+ /// opposite of the clobber-save sequence generated above, assuming that SP
+ /// going into the sequence is at the same point that it was left when the
+ /// clobber-save sequence finished.
+ fn gen_clobber_restore(
+ call_conv: isa::CallConv,
+ flags: &settings::Flags,
+ clobbers: &Set<Writable<RealReg>>,
+ fixed_frame_storage_size: u32,
+ outgoing_args_size: u32,
+ ) -> SmallVec<[Self::I; 16]>;
+
+ /// Generate a call instruction/sequence. This method is provided one
+ /// temporary register to use to synthesize the called address, if needed.
+ fn gen_call(
+ dest: &CallDest,
+ uses: Vec<Reg>,
+ defs: Vec<Writable<Reg>>,
+ opcode: ir::Opcode,
+ tmp: Writable<Reg>,
+ callee_conv: isa::CallConv,
+ callee_conv: isa::CallConv,
+ ) -> SmallVec<[(InstIsSafepoint, Self::I); 2]>;
+
+ /// Get the number of spillslots required for the given register-class and
+ /// type.
+ fn get_number_of_spillslots_for_value(rc: RegClass, ty: Type) -> u32;
+
+ /// Get the current virtual-SP offset from an instruction-emission state.
+ fn get_virtual_sp_offset_from_state(s: &<Self::I as MachInstEmit>::State) -> i64;
+
+ /// Get the "nominal SP to FP" offset from an instruction-emission state.
+ fn get_nominal_sp_to_fp(s: &<Self::I as MachInstEmit>::State) -> i64;
+
+ /// Get all caller-save registers, that is, registers that we expect
+ /// not to be saved across a call to a callee with the given ABI.
+ fn get_regs_clobbered_by_call(call_conv_of_callee: isa::CallConv) -> Vec<Writable<Reg>>;
+}
+
+/// ABI information shared between body (callee) and caller.
+struct ABISig {
+ /// Argument locations (regs or stack slots). Stack offsets are relative to
+ /// SP on entry to function.
+ args: Vec<ABIArg>,
+ /// Return-value locations. Stack offsets are relative to the return-area
+ /// pointer.
+ rets: Vec<ABIArg>,
+ /// Space on stack used to store arguments.
+ stack_arg_space: i64,
+ /// Space on stack used to store return values.
+ stack_ret_space: i64,
+ /// Index in `args` of the stack-return-value-area argument.
+ stack_ret_arg: Option<usize>,
+ /// Calling convention used.
+ call_conv: isa::CallConv,
+}
+
+impl ABISig {
+ fn from_func_sig<M: ABIMachineSpec>(sig: &ir::Signature) -> CodegenResult<ABISig> {
+ // Compute args and retvals from signature. Handle retvals first,
+ // because we may need to add a return-area arg to the args.
+ let (rets, stack_ret_space, _) = M::compute_arg_locs(
+ sig.call_conv,
+ &sig.returns,
+ ArgsOrRets::Rets,
+ /* extra ret-area ptr = */ false,
+ )?;
+ let need_stack_return_area = stack_ret_space > 0;
+ let (args, stack_arg_space, stack_ret_arg) = M::compute_arg_locs(
+ sig.call_conv,
+ &sig.params,
+ ArgsOrRets::Args,
+ need_stack_return_area,
+ )?;
+
+ trace!(
+ "ABISig: sig {:?} => args = {:?} rets = {:?} arg stack = {} ret stack = {} stack_ret_arg = {:?}",
+ sig,
+ args,
+ rets,
+ stack_arg_space,
+ stack_ret_space,
+ stack_ret_arg
+ );
+
+ Ok(ABISig {
+ args,
+ rets,
+ stack_arg_space,
+ stack_ret_space,
+ stack_ret_arg,
+ call_conv: sig.call_conv,
+ })
+ }
+}
+
+/// ABI object for a function body.
+pub struct ABICalleeImpl<M: ABIMachineSpec> {
+ /// Signature: arg and retval regs.
+ sig: ABISig,
+ /// Offsets to each stackslot.
+ stackslots: Vec<u32>,
+ /// Total stack size of all stackslots.
+ stackslots_size: u32,
+ /// Stack size to be reserved for outgoing arguments.
+ outgoing_args_size: u32,
+ /// Clobbered registers, from regalloc.
+ clobbered: Set<Writable<RealReg>>,
+ /// Total number of spillslots, from regalloc.
+ spillslots: Option<usize>,
+ /// Storage allocated for the fixed part of the stack frame. This is
+ /// usually the same as the total frame size below, except in the case
+ /// of the baldrdash calling convention.
+ fixed_frame_storage_size: u32,
+ /// "Total frame size", as defined by "distance between FP and nominal SP".
+ /// Some items are pushed below nominal SP, so the function may actually use
+ /// more stack than this would otherwise imply. It is simply the initial
+ /// frame/allocation size needed for stackslots and spillslots.
+ total_frame_size: Option<u32>,
+ /// The register holding the return-area pointer, if needed.
+ ret_area_ptr: Option<Writable<Reg>>,
+ /// Calling convention this function expects.
+ call_conv: isa::CallConv,
+ /// The settings controlling this function's compilation.
+ flags: settings::Flags,
+ /// Whether or not this function is a "leaf", meaning it calls no other
+ /// functions
+ is_leaf: bool,
+ /// If this function has a stack limit specified, then `Reg` is where the
+ /// stack limit will be located after the instructions specified have been
+ /// executed.
+ ///
+ /// Note that this is intended for insertion into the prologue, if
+ /// present. Also note that because the instructions here execute in the
+ /// prologue this happens after legalization/register allocation/etc so we
+ /// need to be extremely careful with each instruction. The instructions are
+ /// manually register-allocated and carefully only use caller-saved
+ /// registers and keep nothing live after this sequence of instructions.
+ stack_limit: Option<(Reg, Vec<M::I>)>,
+
+ _mach: PhantomData<M>,
+}
+
+fn get_special_purpose_param_register(
+ f: &ir::Function,
+ abi: &ABISig,
+ purpose: ir::ArgumentPurpose,
+) -> Option<Reg> {
+ let idx = f.signature.special_param_index(purpose)?;
+ match abi.args[idx] {
+ ABIArg::Reg(reg, ..) => Some(reg.to_reg()),
+ ABIArg::Stack(..) => None,
+ }
+}
+
+impl<M: ABIMachineSpec> ABICalleeImpl<M> {
+ /// Create a new body ABI instance.
+ pub fn new(f: &ir::Function, flags: settings::Flags) -> CodegenResult<Self> {
+ debug!("ABI: func signature {:?}", f.signature);
+
+ let sig = ABISig::from_func_sig::<M>(&f.signature)?;
+
+ let call_conv = f.signature.call_conv;
+ // Only these calling conventions are supported.
+ debug_assert!(
+ call_conv == isa::CallConv::SystemV
+ || call_conv == isa::CallConv::Fast
+ || call_conv == isa::CallConv::Cold
+ || call_conv.extends_baldrdash(),
+ "Unsupported calling convention: {:?}",
+ call_conv
+ );
+
+ // Compute stackslot locations and total stackslot size.
+ let mut stack_offset: u32 = 0;
+ let mut stackslots = vec![];
+ for (stackslot, data) in f.stack_slots.iter() {
+ let off = stack_offset;
+ stack_offset += data.size;
+ let mask = M::word_bytes() - 1;
+ stack_offset = (stack_offset + mask) & !mask;
+ debug_assert_eq!(stackslot.as_u32() as usize, stackslots.len());
+ stackslots.push(off);
+ }
+
+ // Figure out what instructions, if any, will be needed to check the
+ // stack limit. This can either be specified as a special-purpose
+ // argument or as a global value which often calculates the stack limit
+ // from the arguments.
+ let stack_limit =
+ get_special_purpose_param_register(f, &sig, ir::ArgumentPurpose::StackLimit)
+ .map(|reg| (reg, Vec::new()))
+ .or_else(|| f.stack_limit.map(|gv| gen_stack_limit::<M>(f, &sig, gv)));
+
+ Ok(Self {
+ sig,
+ stackslots,
+ stackslots_size: stack_offset,
+ outgoing_args_size: 0,
+ clobbered: Set::empty(),
+ spillslots: None,
+ fixed_frame_storage_size: 0,
+ total_frame_size: None,
+ ret_area_ptr: None,
+ call_conv,
+ flags,
+ is_leaf: f.is_leaf(),
+ stack_limit,
+ _mach: PhantomData,
+ })
+ }
+
+ /// Inserts instructions necessary for checking the stack limit into the
+ /// prologue.
+ ///
+ /// This function will generate instructions necessary for perform a stack
+ /// check at the header of a function. The stack check is intended to trap
+ /// if the stack pointer goes below a particular threshold, preventing stack
+ /// overflow in wasm or other code. The `stack_limit` argument here is the
+ /// register which holds the threshold below which we're supposed to trap.
+ /// This function is known to allocate `stack_size` bytes and we'll push
+ /// instructions onto `insts`.
+ ///
+ /// Note that the instructions generated here are special because this is
+ /// happening so late in the pipeline (e.g. after register allocation). This
+ /// means that we need to do manual register allocation here and also be
+ /// careful to not clobber any callee-saved or argument registers. For now
+ /// this routine makes do with the `spilltmp_reg` as one temporary
+ /// register, and a second register of `tmp2` which is caller-saved. This
+ /// should be fine for us since no spills should happen in this sequence of
+ /// instructions, so our register won't get accidentally clobbered.
+ ///
+ /// No values can be live after the prologue, but in this case that's ok
+ /// because we just need to perform a stack check before progressing with
+ /// the rest of the function.
+ fn insert_stack_check(&self, stack_limit: Reg, stack_size: u32, insts: &mut Vec<M::I>) {
+ // With no explicit stack allocated we can just emit the simple check of
+ // the stack registers against the stack limit register, and trap if
+ // it's out of bounds.
+ if stack_size == 0 {
+ insts.extend(M::gen_stack_lower_bound_trap(stack_limit));
+ return;
+ }
+
+ // Note that the 32k stack size here is pretty special. See the
+ // documentation in x86/abi.rs for why this is here. The general idea is
+ // that we're protecting against overflow in the addition that happens
+ // below.
+ if stack_size >= 32 * 1024 {
+ insts.extend(M::gen_stack_lower_bound_trap(stack_limit));
+ }
+
+ // Add the `stack_size` to `stack_limit`, placing the result in
+ // `scratch`.
+ //
+ // Note though that `stack_limit`'s register may be the same as
+ // `scratch`. If our stack size doesn't fit into an immediate this
+ // means we need a second scratch register for loading the stack size
+ // into a register.
+ let scratch = Writable::from_reg(M::get_stacklimit_reg());
+ insts.extend(M::gen_add_imm(scratch, stack_limit, stack_size).into_iter());
+ insts.extend(M::gen_stack_lower_bound_trap(scratch.to_reg()));
+ }
+}
+
+/// Generates the instructions necessary for the `gv` to be materialized into a
+/// register.
+///
+/// This function will return a register that will contain the result of
+/// evaluating `gv`. It will also return any instructions necessary to calculate
+/// the value of the register.
+///
+/// Note that global values are typically lowered to instructions via the
+/// standard legalization pass. Unfortunately though prologue generation happens
+/// so late in the pipeline that we can't use these legalization passes to
+/// generate the instructions for `gv`. As a result we duplicate some lowering
+/// of `gv` here and support only some global values. This is similar to what
+/// the x86 backend does for now, and hopefully this can be somewhat cleaned up
+/// in the future too!
+///
+/// Also note that this function will make use of `writable_spilltmp_reg()` as a
+/// temporary register to store values in if necessary. Currently after we write
+/// to this register there's guaranteed to be no spilled values between where
+/// it's used, because we're not participating in register allocation anyway!
+fn gen_stack_limit<M: ABIMachineSpec>(
+ f: &ir::Function,
+ abi: &ABISig,
+ gv: ir::GlobalValue,
+) -> (Reg, Vec<M::I>) {
+ let mut insts = Vec::new();
+ let reg = generate_gv::<M>(f, abi, gv, &mut insts);
+ return (reg, insts);
+}
+
+fn generate_gv<M: ABIMachineSpec>(
+ f: &ir::Function,
+ abi: &ABISig,
+ gv: ir::GlobalValue,
+ insts: &mut Vec<M::I>,
+) -> Reg {
+ match f.global_values[gv] {
+ // Return the direct register the vmcontext is in
+ ir::GlobalValueData::VMContext => {
+ get_special_purpose_param_register(f, abi, ir::ArgumentPurpose::VMContext)
+ .expect("no vmcontext parameter found")
+ }
+ // Load our base value into a register, then load from that register
+ // in to a temporary register.
+ ir::GlobalValueData::Load {
+ base,
+ offset,
+ global_type: _,
+ readonly: _,
+ } => {
+ let base = generate_gv::<M>(f, abi, base, insts);
+ let into_reg = Writable::from_reg(M::get_stacklimit_reg());
+ insts.push(M::gen_load_base_offset(
+ into_reg,
+ base,
+ offset.into(),
+ M::word_type(),
+ ));
+ return into_reg.to_reg();
+ }
+ ref other => panic!("global value for stack limit not supported: {}", other),
+ }
+}
+
+/// Return a type either from an optional type hint, or if not, from the default
+/// type associated with the given register's class. This is used to generate
+/// loads/spills appropriately given the type of value loaded/stored (which may
+/// be narrower than the spillslot). We usually have the type because the
+/// regalloc usually provides the vreg being spilled/reloaded, and we know every
+/// vreg's type. However, the regalloc *can* request a spill/reload without an
+/// associated vreg when needed to satisfy a safepoint (which requires all
+/// ref-typed values, even those in real registers in the original vcode, to be
+/// in spillslots).
+fn ty_from_ty_hint_or_reg_class<M: ABIMachineSpec>(r: Reg, ty: Option<Type>) -> Type {
+ match (ty, r.get_class()) {
+ // If the type is provided
+ (Some(t), _) => t,
+ // If no type is provided, this should be a register spill for a
+ // safepoint, so we only expect I32/I64 (integer) registers.
+ (None, rc) if rc == M::word_reg_class() => M::word_type(),
+ _ => panic!("Unexpected register class!"),
+ }
+}
+
+impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
+ type I = M::I;
+
+ fn temp_needed(&self) -> bool {
+ self.sig.stack_ret_arg.is_some()
+ }
+
+ fn init(&mut self, maybe_tmp: Option<Writable<Reg>>) {
+ if self.sig.stack_ret_arg.is_some() {
+ assert!(maybe_tmp.is_some());
+ self.ret_area_ptr = maybe_tmp;
+ }
+ }
+
+ fn accumulate_outgoing_args_size(&mut self, size: u32) {
+ if size > self.outgoing_args_size {
+ self.outgoing_args_size = size;
+ }
+ }
+
+ fn flags(&self) -> &settings::Flags {
+ &self.flags
+ }
+
+ fn call_conv(&self) -> isa::CallConv {
+ self.sig.call_conv
+ }
+
+ fn liveins(&self) -> Set<RealReg> {
+ let mut set: Set<RealReg> = Set::empty();
+ for &arg in &self.sig.args {
+ if let ABIArg::Reg(r, ..) = arg {
+ set.insert(r);
+ }
+ }
+ set
+ }
+
+ fn liveouts(&self) -> Set<RealReg> {
+ let mut set: Set<RealReg> = Set::empty();
+ for &ret in &self.sig.rets {
+ if let ABIArg::Reg(r, ..) = ret {
+ set.insert(r);
+ }
+ }
+ set
+ }
+
+ fn num_args(&self) -> usize {
+ self.sig.args.len()
+ }
+
+ fn num_retvals(&self) -> usize {
+ self.sig.rets.len()
+ }
+
+ fn num_stackslots(&self) -> usize {
+ self.stackslots.len()
+ }
+
+ fn gen_copy_arg_to_reg(&self, idx: usize, into_reg: Writable<Reg>) -> Self::I {
+ match &self.sig.args[idx] {
+ // Extension mode doesn't matter (we're copying out, not in; we
+ // ignore high bits by convention).
+ &ABIArg::Reg(r, ty, ..) => M::gen_move(into_reg, r.to_reg(), ty),
+ &ABIArg::Stack(off, ty, ..) => M::gen_load_stack(
+ StackAMode::FPOffset(M::fp_to_arg_offset(self.call_conv, &self.flags) + off, ty),
+ into_reg,
+ ty,
+ ),
+ }
+ }
+
+ fn arg_is_needed_in_body(&self, idx: usize) -> bool {
+ match self.sig.args[idx].get_purpose() {
+ // Special Baldrdash-specific pseudo-args that are present only to
+ // fill stack slots. Won't ever be used as ordinary values in the
+ // body.
+ ir::ArgumentPurpose::CalleeTLS | ir::ArgumentPurpose::CallerTLS => false,
+ _ => true,
+ }
+ }
+
+ fn gen_copy_reg_to_retval(&self, idx: usize, from_reg: Writable<Reg>) -> Vec<Self::I> {
+ let mut ret = Vec::new();
+ let word_bits = M::word_bits() as u8;
+ match &self.sig.rets[idx] {
+ &ABIArg::Reg(r, ty, ext, ..) => {
+ let from_bits = ty_bits(ty) as u8;
+ let dest_reg = Writable::from_reg(r.to_reg());
+ match (ext, from_bits) {
+ (ArgumentExtension::Uext, n) | (ArgumentExtension::Sext, n)
+ if n < word_bits =>
+ {
+ let signed = ext == ArgumentExtension::Sext;
+ ret.push(M::gen_extend(
+ dest_reg,
+ from_reg.to_reg(),
+ signed,
+ from_bits,
+ /* to_bits = */ word_bits,
+ ));
+ }
+ _ => ret.push(M::gen_move(dest_reg, from_reg.to_reg(), ty)),
+ };
+ }
+ &ABIArg::Stack(off, mut ty, ext, ..) => {
+ let from_bits = ty_bits(ty) as u8;
+ // A machine ABI implementation should ensure that stack frames
+ // have "reasonable" size. All current ABIs for machinst
+ // backends (aarch64 and x64) enforce a 128MB limit.
+ let off = i32::try_from(off)
+ .expect("Argument stack offset greater than 2GB; should hit impl limit first");
+ // Trash the from_reg; it should be its last use.
+ match (ext, from_bits) {
+ (ArgumentExtension::Uext, n) | (ArgumentExtension::Sext, n)
+ if n < word_bits =>
+ {
+ assert_eq!(M::word_reg_class(), from_reg.to_reg().get_class());
+ let signed = ext == ArgumentExtension::Sext;
+ ret.push(M::gen_extend(
+ from_reg,
+ from_reg.to_reg(),
+ signed,
+ from_bits,
+ /* to_bits = */ word_bits,
+ ));
+ // Store the extended version.
+ ty = M::word_type();
+ }
+ _ => {}
+ };
+ ret.push(M::gen_store_base_offset(
+ self.ret_area_ptr.unwrap().to_reg(),
+ off,
+ from_reg.to_reg(),
+ ty,
+ ));
+ }
+ }
+ ret
+ }
+
+ fn gen_retval_area_setup(&self) -> Option<Self::I> {
+ if let Some(i) = self.sig.stack_ret_arg {
+ let inst = self.gen_copy_arg_to_reg(i, self.ret_area_ptr.unwrap());
+ trace!(
+ "gen_retval_area_setup: inst {:?}; ptr reg is {:?}",
+ inst,
+ self.ret_area_ptr.unwrap().to_reg()
+ );
+ Some(inst)
+ } else {
+ trace!("gen_retval_area_setup: not needed");
+ None
+ }
+ }
+
+ fn gen_ret(&self) -> Self::I {
+ M::gen_ret()
+ }
+
+ fn gen_epilogue_placeholder(&self) -> Self::I {
+ M::gen_epilogue_placeholder()
+ }
+
+ fn set_num_spillslots(&mut self, slots: usize) {
+ self.spillslots = Some(slots);
+ }
+
+ fn set_clobbered(&mut self, clobbered: Set<Writable<RealReg>>) {
+ self.clobbered = clobbered;
+ }
+
+ /// Load from a stackslot.
+ fn load_stackslot(
+ &self,
+ slot: StackSlot,
+ offset: u32,
+ ty: Type,
+ into_reg: Writable<Reg>,
+ ) -> Self::I {
+ // Offset from beginning of stackslot area, which is at nominal SP (see
+ // [MemArg::NominalSPOffset] for more details on nominal SP tracking).
+ let stack_off = self.stackslots[slot.as_u32() as usize] as i64;
+ let sp_off: i64 = stack_off + (offset as i64);
+ trace!("load_stackslot: slot {} -> sp_off {}", slot, sp_off);
+ M::gen_load_stack(StackAMode::NominalSPOffset(sp_off, ty), into_reg, ty)
+ }
+
+ /// Store to a stackslot.
+ fn store_stackslot(&self, slot: StackSlot, offset: u32, ty: Type, from_reg: Reg) -> Self::I {
+ // Offset from beginning of stackslot area, which is at nominal SP (see
+ // [MemArg::NominalSPOffset] for more details on nominal SP tracking).
+ let stack_off = self.stackslots[slot.as_u32() as usize] as i64;
+ let sp_off: i64 = stack_off + (offset as i64);
+ trace!("store_stackslot: slot {} -> sp_off {}", slot, sp_off);
+ M::gen_store_stack(StackAMode::NominalSPOffset(sp_off, ty), from_reg, ty)
+ }
+
+ /// Produce an instruction that computes a stackslot address.
+ fn stackslot_addr(&self, slot: StackSlot, offset: u32, into_reg: Writable<Reg>) -> Self::I {
+ // Offset from beginning of stackslot area, which is at nominal SP (see
+ // [MemArg::NominalSPOffset] for more details on nominal SP tracking).
+ let stack_off = self.stackslots[slot.as_u32() as usize] as i64;
+ let sp_off: i64 = stack_off + (offset as i64);
+ M::gen_get_stack_addr(StackAMode::NominalSPOffset(sp_off, I8), into_reg, I8)
+ }
+
+ /// Load from a spillslot.
+ fn load_spillslot(&self, slot: SpillSlot, ty: Type, into_reg: Writable<Reg>) -> Self::I {
+ // Offset from beginning of spillslot area, which is at nominal SP + stackslots_size.
+ let islot = slot.get() as i64;
+ let spill_off = islot * M::word_bytes() as i64;
+ let sp_off = self.stackslots_size as i64 + spill_off;
+ trace!("load_spillslot: slot {:?} -> sp_off {}", slot, sp_off);
+ M::gen_load_stack(StackAMode::NominalSPOffset(sp_off, ty), into_reg, ty)
+ }
+
+ /// Store to a spillslot.
+ fn store_spillslot(&self, slot: SpillSlot, ty: Type, from_reg: Reg) -> Self::I {
+ // Offset from beginning of spillslot area, which is at nominal SP + stackslots_size.
+ let islot = slot.get() as i64;
+ let spill_off = islot * M::word_bytes() as i64;
+ let sp_off = self.stackslots_size as i64 + spill_off;
+ trace!("store_spillslot: slot {:?} -> sp_off {}", slot, sp_off);
+ M::gen_store_stack(StackAMode::NominalSPOffset(sp_off, ty), from_reg, ty)
+ }
+
+ fn spillslots_to_stack_map(
+ &self,
+ slots: &[SpillSlot],
+ state: &<Self::I as MachInstEmit>::State,
+ ) -> StackMap {
+ let virtual_sp_offset = M::get_virtual_sp_offset_from_state(state);
+ let nominal_sp_to_fp = M::get_nominal_sp_to_fp(state);
+ assert!(virtual_sp_offset >= 0);
+ trace!(
+ "spillslots_to_stackmap: slots = {:?}, state = {:?}",
+ slots,
+ state
+ );
+ let map_size = (virtual_sp_offset + nominal_sp_to_fp) as u32;
+ let bytes = M::word_bytes();
+ let map_words = (map_size + bytes - 1) / bytes;
+ let mut bits = std::iter::repeat(false)
+ .take(map_words as usize)
+ .collect::<Vec<bool>>();
+
+ let first_spillslot_word =
+ ((self.stackslots_size + virtual_sp_offset as u32) / bytes) as usize;
+ for &slot in slots {
+ let slot = slot.get() as usize;
+ bits[first_spillslot_word + slot] = true;
+ }
+
+ StackMap::from_slice(&bits[..])
+ }
+
+ fn gen_prologue(&mut self) -> Vec<Self::I> {
+ let mut insts = vec![];
+ if !self.call_conv.extends_baldrdash() {
+ // set up frame
+ insts.extend(M::gen_prologue_frame_setup().into_iter());
+ }
+
+ let bytes = M::word_bytes();
+ let mut total_stacksize = self.stackslots_size + bytes * self.spillslots.unwrap() as u32;
+ if self.call_conv.extends_baldrdash() {
+ debug_assert!(
+ !self.flags.enable_probestack(),
+ "baldrdash does not expect cranelift to emit stack probes"
+ );
+ total_stacksize += self.flags.baldrdash_prologue_words() as u32 * bytes;
+ }
+ let mask = M::stack_align(self.call_conv) - 1;
+ let total_stacksize = (total_stacksize + mask) & !mask; // 16-align the stack.
+
+ if !self.call_conv.extends_baldrdash() {
+ // Leaf functions with zero stack don't need a stack check if one's
+ // specified, otherwise always insert the stack check.
+ if total_stacksize > 0 || !self.is_leaf {
+ if let Some((reg, stack_limit_load)) = &self.stack_limit {
+ insts.extend_from_slice(stack_limit_load);
+ self.insert_stack_check(*reg, total_stacksize, &mut insts);
+ }
+ }
+ if total_stacksize > 0 {
+ self.fixed_frame_storage_size += total_stacksize;
+ }
+ }
+
+ // N.B.: "nominal SP", which we use to refer to stackslots and
+ // spillslots, is defined to be equal to the stack pointer at this point
+ // in the prologue.
+ //
+ // If we push any clobbers below, we emit a virtual-SP adjustment
+ // meta-instruction so that the nominal SP references behave as if SP
+ // were still at this point. See documentation for
+ // [crate::machinst::abi_impl](this module) for more details on
+ // stackframe layout and nominal SP maintenance.
+
+ // Save clobbered registers.
+ let (clobber_size, clobber_insts) = M::gen_clobber_save(
+ self.call_conv,
+ &self.flags,
+ &self.clobbered,
+ self.fixed_frame_storage_size,
+ self.outgoing_args_size,
+ );
+ insts.extend(clobber_insts);
+
+ let sp_adj = self.outgoing_args_size as i32 + clobber_size as i32;
+ if sp_adj > 0 {
+ insts.push(M::gen_nominal_sp_adj(sp_adj));
+ }
+
+ self.total_frame_size = Some(total_stacksize);
+ insts
+ }
+
+ fn gen_epilogue(&self) -> Vec<M::I> {
+ let mut insts = vec![];
+
+ // Restore clobbered registers.
+ insts.extend(M::gen_clobber_restore(
+ self.call_conv,
+ &self.flags,
+ &self.clobbered,
+ self.fixed_frame_storage_size,
+ self.outgoing_args_size,
+ ));
+
+ // N.B.: we do *not* emit a nominal SP adjustment here, because (i) there will be no
+ // references to nominal SP offsets before the return below, and (ii) the instruction
+ // emission tracks running SP offset linearly (in straight-line order), not according to
+ // the CFG, so early returns in the middle of function bodies would cause an incorrect
+ // offset for the rest of the body.
+
+ if !self.call_conv.extends_baldrdash() {
+ insts.extend(M::gen_epilogue_frame_restore());
+ insts.push(M::gen_ret());
+ }
+
+ debug!("Epilogue: {:?}", insts);
+ insts
+ }
+
+ fn frame_size(&self) -> u32 {
+ self.total_frame_size
+ .expect("frame size not computed before prologue generation")
+ }
+
+ fn stack_args_size(&self) -> u32 {
+ self.sig.stack_arg_space as u32
+ }
+
+ fn get_spillslot_size(&self, rc: RegClass, ty: Type) -> u32 {
+ M::get_number_of_spillslots_for_value(rc, ty)
+ }
+
+ fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, ty: Option<Type>) -> Self::I {
+ let ty = ty_from_ty_hint_or_reg_class::<M>(from_reg.to_reg(), ty);
+ self.store_spillslot(to_slot, ty, from_reg.to_reg())
+ }
+
+ fn gen_reload(
+ &self,
+ to_reg: Writable<RealReg>,
+ from_slot: SpillSlot,
+ ty: Option<Type>,
+ ) -> Self::I {
+ let ty = ty_from_ty_hint_or_reg_class::<M>(to_reg.to_reg().to_reg(), ty);
+ self.load_spillslot(from_slot, ty, to_reg.map(|r| r.to_reg()))
+ }
+
+ fn unwind_info_kind(&self) -> UnwindInfoKind {
+ match self.sig.call_conv {
+ #[cfg(feature = "unwind")]
+ isa::CallConv::Fast | isa::CallConv::Cold | isa::CallConv::SystemV => {
+ UnwindInfoKind::SystemV
+ }
+ #[cfg(feature = "unwind")]
+ isa::CallConv::WindowsFastcall => UnwindInfoKind::Windows,
+ _ => UnwindInfoKind::None,
+ }
+ }
+}
+
+fn abisig_to_uses_and_defs<M: ABIMachineSpec>(sig: &ABISig) -> (Vec<Reg>, Vec<Writable<Reg>>) {
+ // Compute uses: all arg regs.
+ let mut uses = Vec::new();
+ for arg in &sig.args {
+ match arg {
+ &ABIArg::Reg(reg, ..) => uses.push(reg.to_reg()),
+ _ => {}
+ }
+ }
+
+ // Compute defs: all retval regs, and all caller-save (clobbered) regs.
+ let mut defs = M::get_regs_clobbered_by_call(sig.call_conv);
+ for ret in &sig.rets {
+ match ret {
+ &ABIArg::Reg(reg, ..) => defs.push(Writable::from_reg(reg.to_reg())),
+ _ => {}
+ }
+ }
+
+ (uses, defs)
+}
+
+/// ABI object for a callsite.
+pub struct ABICallerImpl<M: ABIMachineSpec> {
+ /// The called function's signature.
+ sig: ABISig,
+ /// All uses for the callsite, i.e., function args.
+ uses: Vec<Reg>,
+ /// All defs for the callsite, i.e., return values and caller-saves.
+ defs: Vec<Writable<Reg>>,
+ /// Call destination.
+ dest: CallDest,
+ /// Actual call opcode; used to distinguish various types of calls.
+ opcode: ir::Opcode,
+ /// Caller's calling convention.
+ caller_conv: isa::CallConv,
+
+ _mach: PhantomData<M>,
+}
+
+/// Destination for a call.
+#[derive(Debug, Clone)]
+pub enum CallDest {
+ /// Call to an ExtName (named function symbol).
+ ExtName(ir::ExternalName, RelocDistance),
+ /// Indirect call to a function pointer in a register.
+ Reg(Reg),
+}
+
+impl<M: ABIMachineSpec> ABICallerImpl<M> {
+ /// Create a callsite ABI object for a call directly to the specified function.
+ pub fn from_func(
+ sig: &ir::Signature,
+ extname: &ir::ExternalName,
+ dist: RelocDistance,
+ caller_conv: isa::CallConv,
+ ) -> CodegenResult<ABICallerImpl<M>> {
+ let sig = ABISig::from_func_sig::<M>(sig)?;
+ let (uses, defs) = abisig_to_uses_and_defs::<M>(&sig);
+ Ok(ABICallerImpl {
+ sig,
+ uses,
+ defs,
+ dest: CallDest::ExtName(extname.clone(), dist),
+ opcode: ir::Opcode::Call,
+ caller_conv,
+ _mach: PhantomData,
+ })
+ }
+
+ /// Create a callsite ABI object for a call to a function pointer with the
+ /// given signature.
+ pub fn from_ptr(
+ sig: &ir::Signature,
+ ptr: Reg,
+ opcode: ir::Opcode,
+ caller_conv: isa::CallConv,
+ ) -> CodegenResult<ABICallerImpl<M>> {
+ let sig = ABISig::from_func_sig::<M>(sig)?;
+ let (uses, defs) = abisig_to_uses_and_defs::<M>(&sig);
+ Ok(ABICallerImpl {
+ sig,
+ uses,
+ defs,
+ dest: CallDest::Reg(ptr),
+ opcode,
+ caller_conv,
+ _mach: PhantomData,
+ })
+ }
+}
+
+fn adjust_stack_and_nominal_sp<M: ABIMachineSpec, C: LowerCtx<I = M::I>>(
+ ctx: &mut C,
+ off: i32,
+ is_sub: bool,
+) {
+ if off == 0 {
+ return;
+ }
+ let amt = if is_sub { -off } else { off };
+ for inst in M::gen_sp_reg_adjust(amt) {
+ ctx.emit(inst);
+ }
+ ctx.emit(M::gen_nominal_sp_adj(-amt));
+}
+
+impl<M: ABIMachineSpec> ABICaller for ABICallerImpl<M> {
+ type I = M::I;
+
+ fn num_args(&self) -> usize {
+ if self.sig.stack_ret_arg.is_some() {
+ self.sig.args.len() - 1
+ } else {
+ self.sig.args.len()
+ }
+ }
+
+ fn accumulate_outgoing_args_size<C: LowerCtx<I = Self::I>>(&self, ctx: &mut C) {
+ let off = self.sig.stack_arg_space + self.sig.stack_ret_space;
+ ctx.abi().accumulate_outgoing_args_size(off as u32);
+ }
+
+ fn emit_stack_pre_adjust<C: LowerCtx<I = Self::I>>(&self, ctx: &mut C) {
+ let off = self.sig.stack_arg_space + self.sig.stack_ret_space;
+ adjust_stack_and_nominal_sp::<M, C>(ctx, off as i32, /* is_sub = */ true)
+ }
+
+ fn emit_stack_post_adjust<C: LowerCtx<I = Self::I>>(&self, ctx: &mut C) {
+ let off = self.sig.stack_arg_space + self.sig.stack_ret_space;
+ adjust_stack_and_nominal_sp::<M, C>(ctx, off as i32, /* is_sub = */ false)
+ }
+
+ fn emit_copy_reg_to_arg<C: LowerCtx<I = Self::I>>(
+ &self,
+ ctx: &mut C,
+ idx: usize,
+ from_reg: Reg,
+ ) {
+ let word_rc = M::word_reg_class();
+ let word_bits = M::word_bits() as usize;
+ match &self.sig.args[idx] {
+ &ABIArg::Reg(reg, ty, ext, _)
+ if ext != ir::ArgumentExtension::None && ty_bits(ty) < word_bits =>
+ {
+ assert_eq!(word_rc, reg.get_class());
+ let signed = match ext {
+ ir::ArgumentExtension::Uext => false,
+ ir::ArgumentExtension::Sext => true,
+ _ => unreachable!(),
+ };
+ ctx.emit(M::gen_extend(
+ Writable::from_reg(reg.to_reg()),
+ from_reg,
+ signed,
+ ty_bits(ty) as u8,
+ word_bits as u8,
+ ));
+ }
+ &ABIArg::Reg(reg, ty, _, _) => {
+ ctx.emit(M::gen_move(Writable::from_reg(reg.to_reg()), from_reg, ty));
+ }
+ &ABIArg::Stack(off, mut ty, ext, _) => {
+ if ext != ir::ArgumentExtension::None && ty_bits(ty) < word_bits {
+ assert_eq!(word_rc, from_reg.get_class());
+ let signed = match ext {
+ ir::ArgumentExtension::Uext => false,
+ ir::ArgumentExtension::Sext => true,
+ _ => unreachable!(),
+ };
+ // Extend in place in the source register. Our convention is to
+ // treat high bits as undefined for values in registers, so this
+ // is safe, even for an argument that is nominally read-only.
+ ctx.emit(M::gen_extend(
+ Writable::from_reg(from_reg),
+ from_reg,
+ signed,
+ ty_bits(ty) as u8,
+ word_bits as u8,
+ ));
+ // Store the extended version.
+ ty = M::word_type();
+ }
+ ctx.emit(M::gen_store_stack(
+ StackAMode::SPOffset(off, ty),
+ from_reg,
+ ty,
+ ));
+ }
+ }
+ }
+
+ fn emit_copy_retval_to_reg<C: LowerCtx<I = Self::I>>(
+ &self,
+ ctx: &mut C,
+ idx: usize,
+ into_reg: Writable<Reg>,
+ ) {
+ match &self.sig.rets[idx] {
+ // Extension mode doesn't matter because we're copying out, not in,
+ // and we ignore high bits in our own registers by convention.
+ &ABIArg::Reg(reg, ty, _, _) => ctx.emit(M::gen_move(into_reg, reg.to_reg(), ty)),
+ &ABIArg::Stack(off, ty, _, _) => {
+ let ret_area_base = self.sig.stack_arg_space;
+ ctx.emit(M::gen_load_stack(
+ StackAMode::SPOffset(off + ret_area_base, ty),
+ into_reg,
+ ty,
+ ));
+ }
+ }
+ }
+
+ fn emit_call<C: LowerCtx<I = Self::I>>(&mut self, ctx: &mut C) {
+ let (uses, defs) = (
+ mem::replace(&mut self.uses, Default::default()),
+ mem::replace(&mut self.defs, Default::default()),
+ );
+ let word_rc = M::word_reg_class();
+ let word_type = M::word_type();
+ if let Some(i) = self.sig.stack_ret_arg {
+ let rd = ctx.alloc_tmp(word_rc, word_type);
+ let ret_area_base = self.sig.stack_arg_space;
+ ctx.emit(M::gen_get_stack_addr(
+ StackAMode::SPOffset(ret_area_base, I8),
+ rd,
+ I8,
+ ));
+ self.emit_copy_reg_to_arg(ctx, i, rd.to_reg());
+ }
+ let tmp = ctx.alloc_tmp(word_rc, word_type);
+ for (is_safepoint, inst) in M::gen_call(
+ &self.dest,
+ uses,
+ defs,
+ self.opcode,
+ tmp,
+ self.sig.call_conv,
+ self.caller_conv,
+ )
+ .into_iter()
+ {
+ match is_safepoint {
+ InstIsSafepoint::Yes => ctx.emit_safepoint(inst),
+ InstIsSafepoint::No => ctx.emit(inst),
+ }
+ }
+ }
+}
diff --git a/third_party/rust/cranelift-codegen/src/machinst/adapter.rs b/third_party/rust/cranelift-codegen/src/machinst/adapter.rs
new file mode 100644
index 0000000000..43ff9e51a4
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/machinst/adapter.rs
@@ -0,0 +1,140 @@
+//! Adapter for a `MachBackend` to implement the `TargetIsa` trait.
+
+use crate::binemit;
+use crate::ir;
+use crate::isa::{EncInfo, Encoding, Encodings, Legalize, RegClass, RegInfo, TargetIsa};
+use crate::machinst::*;
+use crate::regalloc::RegisterSet;
+use crate::settings::Flags;
+
+#[cfg(feature = "testing_hooks")]
+use crate::regalloc::RegDiversions;
+
+use core::any::Any;
+use std::borrow::Cow;
+use std::fmt;
+use target_lexicon::Triple;
+
+/// A wrapper around a `MachBackend` that provides a `TargetIsa` impl.
+pub struct TargetIsaAdapter {
+ backend: Box<dyn MachBackend + Send + Sync + 'static>,
+ triple: Triple,
+}
+
+impl TargetIsaAdapter {
+ /// Create a new `TargetIsa` wrapper around a `MachBackend`.
+ pub fn new<B: MachBackend + Send + Sync + 'static>(backend: B) -> TargetIsaAdapter {
+ let triple = backend.triple();
+ TargetIsaAdapter {
+ backend: Box::new(backend),
+ triple,
+ }
+ }
+}
+
+impl fmt::Display for TargetIsaAdapter {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ f.debug_struct("MachBackend")
+ .field("name", &self.backend.name())
+ .field("triple", &self.backend.triple())
+ .field("flags", &format!("{}", self.backend.flags()))
+ .finish()
+ }
+}
+
+impl TargetIsa for TargetIsaAdapter {
+ fn name(&self) -> &'static str {
+ self.backend.name()
+ }
+
+ fn triple(&self) -> &Triple {
+ &self.triple
+ }
+
+ fn flags(&self) -> &Flags {
+ self.backend.flags()
+ }
+
+ fn register_info(&self) -> RegInfo {
+ // Called from function's Display impl, so we need a stub here.
+ RegInfo {
+ banks: &[],
+ classes: &[],
+ }
+ }
+
+ fn legal_encodings<'a>(
+ &'a self,
+ _func: &'a ir::Function,
+ _inst: &'a ir::InstructionData,
+ _ctrl_typevar: ir::Type,
+ ) -> Encodings<'a> {
+ panic!("Should not be called when new-style backend is available!")
+ }
+
+ fn encode(
+ &self,
+ _func: &ir::Function,
+ _inst: &ir::InstructionData,
+ _ctrl_typevar: ir::Type,
+ ) -> Result<Encoding, Legalize> {
+ panic!("Should not be called when new-style backend is available!")
+ }
+
+ fn encoding_info(&self) -> EncInfo {
+ panic!("Should not be called when new-style backend is available!")
+ }
+
+ fn legalize_signature(&self, _sig: &mut Cow<ir::Signature>, _current: bool) {
+ panic!("Should not be called when new-style backend is available!")
+ }
+
+ fn regclass_for_abi_type(&self, _ty: ir::Type) -> RegClass {
+ panic!("Should not be called when new-style backend is available!")
+ }
+
+ fn allocatable_registers(&self, _func: &ir::Function) -> RegisterSet {
+ panic!("Should not be called when new-style backend is available!")
+ }
+
+ fn prologue_epilogue(&self, _func: &mut ir::Function) -> CodegenResult<()> {
+ panic!("Should not be called when new-style backend is available!")
+ }
+
+ #[cfg(feature = "testing_hooks")]
+ fn emit_inst(
+ &self,
+ _func: &ir::Function,
+ _inst: ir::Inst,
+ _divert: &mut RegDiversions,
+ _sink: &mut dyn binemit::CodeSink,
+ ) {
+ panic!("Should not be called when new-style backend is available!")
+ }
+
+ /// Emit a whole function into memory.
+ fn emit_function_to_memory(&self, _func: &ir::Function, _sink: &mut binemit::MemoryCodeSink) {
+ panic!("Should not be called when new-style backend is available!")
+ }
+
+ fn get_mach_backend(&self) -> Option<&dyn MachBackend> {
+ Some(&*self.backend)
+ }
+
+ fn unsigned_add_overflow_condition(&self) -> ir::condcodes::IntCC {
+ self.backend.unsigned_add_overflow_condition()
+ }
+
+ fn unsigned_sub_overflow_condition(&self) -> ir::condcodes::IntCC {
+ self.backend.unsigned_sub_overflow_condition()
+ }
+
+ #[cfg(feature = "unwind")]
+ fn create_systemv_cie(&self) -> Option<gimli::write::CommonInformationEntry> {
+ self.backend.create_systemv_cie()
+ }
+
+ fn as_any(&self) -> &dyn Any {
+ self as &dyn Any
+ }
+}
diff --git a/third_party/rust/cranelift-codegen/src/machinst/blockorder.rs b/third_party/rust/cranelift-codegen/src/machinst/blockorder.rs
new file mode 100644
index 0000000000..1052d83858
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/machinst/blockorder.rs
@@ -0,0 +1,624 @@
+//! Computation of basic block order in emitted code.
+//!
+//! This module handles the translation from CLIF BBs to VCode BBs.
+//!
+//! The basic idea is that we compute a sequence of "lowered blocks" that
+//! correspond to one or more blocks in the graph: (CLIF CFG) `union` (implicit
+//! block on *every* edge). Conceptually, the lowering pipeline wants to insert
+//! moves for phi-nodes on every block-to-block transfer; these blocks always
+//! conceptually exist, but may be merged with an "original" CLIF block (and
+//! hence not actually exist; this is equivalent to inserting the blocks only on
+//! critical edges).
+//!
+//! In other words, starting from a CFG like this (where each "CLIF block" and
+//! "(edge N->M)" is a separate basic block):
+//!
+//! ```plain
+//!
+//! CLIF block 0
+//! / \
+//! (edge 0->1) (edge 0->2)
+//! | |
+//! CLIF block 1 CLIF block 2
+//! \ /
+//! (edge 1->3) (edge 2->3)
+//! \ /
+//! CLIF block 3
+//! ```
+//!
+//! We can produce a CFG of lowered blocks like so:
+//!
+//! ```plain
+//! +--------------+
+//! | CLIF block 0 |
+//! +--------------+
+//! / \
+//! +--------------+ +--------------+
+//! | (edge 0->1) | |(edge 0->2) |
+//! | CLIF block 1 | | CLIF block 2 |
+//! +--------------+ +--------------+
+//! \ /
+//! +-----------+ +-----------+
+//! |(edge 1->3)| |(edge 2->3)|
+//! +-----------+ +-----------+
+//! \ /
+//! +------------+
+//! |CLIF block 3|
+//! +------------+
+//! ```
+//!
+//! (note that the edges into CLIF blocks 1 and 2 could be merged with those
+//! blocks' original bodies, but the out-edges could not because for simplicity
+//! in the successor-function definition, we only ever merge an edge onto one
+//! side of an original CLIF block.)
+//!
+//! Each `LoweredBlock` names just an original CLIF block, an original CLIF
+//! block prepended or appended with an edge block (never both, though), or just
+//! an edge block.
+//!
+//! To compute this lowering, we do a DFS over the CLIF-plus-edge-block graph
+//! (never actually materialized, just defined by a "successors" function), and
+//! compute the reverse postorder.
+//!
+//! This algorithm isn't perfect w.r.t. generated code quality: we don't, for
+//! example, consider any information about whether edge blocks will actually
+//! have content, because this computation happens as part of lowering *before*
+//! regalloc, and regalloc may or may not insert moves/spills/reloads on any
+//! particular edge. But it works relatively well and is conceptually simple.
+//! Furthermore, the [MachBuffer] machine-code sink performs final peephole-like
+//! branch editing that in practice elides empty blocks and simplifies some of
+//! the other redundancies that this scheme produces.
+
+use crate::entity::SecondaryMap;
+use crate::fx::{FxHashMap, FxHashSet};
+use crate::ir::{Block, Function, Inst, Opcode};
+use crate::machinst::lower::visit_block_succs;
+use crate::machinst::*;
+
+use log::debug;
+use smallvec::SmallVec;
+
+/// Mapping from CLIF BBs to VCode BBs.
+#[derive(Debug)]
+pub struct BlockLoweringOrder {
+ /// Lowered blocks, in BlockIndex order. Each block is some combination of
+ /// (i) a CLIF block, and (ii) inserted crit-edge blocks before or after;
+ /// see [LoweredBlock] for details.
+ lowered_order: Vec<LoweredBlock>,
+ /// Successors for all lowered blocks, in one serialized vector. Indexed by
+ /// the ranges in `lowered_succ_ranges`.
+ lowered_succs: Vec<(Inst, LoweredBlock)>,
+ /// BlockIndex values for successors for all lowered blocks, in the same
+ /// order as `lowered_succs`.
+ lowered_succ_indices: Vec<(Inst, BlockIndex)>,
+ /// Ranges in `lowered_succs` giving the successor lists for each lowered
+ /// block. Indexed by lowering-order index (`BlockIndex`).
+ lowered_succ_ranges: Vec<(usize, usize)>,
+ /// Mapping from CLIF BB to BlockIndex (index in lowered order). Note that
+ /// some CLIF BBs may not be lowered; in particular, we skip unreachable
+ /// blocks.
+ orig_map: SecondaryMap<Block, Option<BlockIndex>>,
+}
+
+/// The origin of a block in the lowered block-order: either an original CLIF
+/// block, or an inserted edge-block, or a combination of the two if an edge is
+/// non-critical.
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+pub enum LoweredBlock {
+ /// Block in original CLIF, with no merged edge-blocks.
+ Orig {
+ /// Original CLIF block.
+ block: Block,
+ },
+ /// Block in the original CLIF, plus edge-block to one succ (which is the
+ /// one successor of the original block).
+ OrigAndEdge {
+ /// The original CLIF block contained in this lowered block.
+ block: Block,
+ /// The edge (jump) instruction transitioning from this block
+ /// to the next, i.e., corresponding to the included edge-block. This
+ /// will be an instruction in `block`.
+ edge_inst: Inst,
+ /// The successor CLIF block.
+ succ: Block,
+ },
+ /// Block in the original CLIF, preceded by edge-block from one pred (which
+ /// is the one pred of the original block).
+ EdgeAndOrig {
+ /// The previous CLIF block, i.e., the edge block's predecessor.
+ pred: Block,
+ /// The edge (jump) instruction corresponding to the included
+ /// edge-block. This will be an instruction in `pred`.
+ edge_inst: Inst,
+ /// The original CLIF block included in this lowered block.
+ block: Block,
+ },
+ /// Split critical edge between two CLIF blocks. This lowered block does not
+ /// correspond to any original CLIF blocks; it only serves as an insertion
+ /// point for work to happen on the transition from `pred` to `succ`.
+ Edge {
+ /// The predecessor CLIF block.
+ pred: Block,
+ /// The edge (jump) instruction corresponding to this edge's transition.
+ /// This will be an instruction in `pred`.
+ edge_inst: Inst,
+ /// The successor CLIF block.
+ succ: Block,
+ },
+}
+
+impl LoweredBlock {
+ /// The associated original (CLIF) block included in this lowered block, if
+ /// any.
+ pub fn orig_block(self) -> Option<Block> {
+ match self {
+ LoweredBlock::Orig { block, .. }
+ | LoweredBlock::OrigAndEdge { block, .. }
+ | LoweredBlock::EdgeAndOrig { block, .. } => Some(block),
+ LoweredBlock::Edge { .. } => None,
+ }
+ }
+
+ /// The associated in-edge, if any.
+ pub fn in_edge(self) -> Option<(Block, Inst, Block)> {
+ match self {
+ LoweredBlock::EdgeAndOrig {
+ pred,
+ edge_inst,
+ block,
+ } => Some((pred, edge_inst, block)),
+ _ => None,
+ }
+ }
+
+ /// the associated out-edge, if any. Also includes edge-only blocks.
+ pub fn out_edge(self) -> Option<(Block, Inst, Block)> {
+ match self {
+ LoweredBlock::OrigAndEdge {
+ block,
+ edge_inst,
+ succ,
+ } => Some((block, edge_inst, succ)),
+ LoweredBlock::Edge {
+ pred,
+ edge_inst,
+ succ,
+ } => Some((pred, edge_inst, succ)),
+ _ => None,
+ }
+ }
+}
+
+impl BlockLoweringOrder {
+ /// Compute and return a lowered block order for `f`.
+ pub fn new(f: &Function) -> BlockLoweringOrder {
+ debug!("BlockLoweringOrder: function body {:?}", f);
+
+ // Step 1: compute the in-edge and out-edge count of every block.
+ let mut block_in_count = SecondaryMap::with_default(0);
+ let mut block_out_count = SecondaryMap::with_default(0);
+
+ // Cache the block successors to avoid re-examining branches below.
+ let mut block_succs: SmallVec<[(Inst, Block); 128]> = SmallVec::new();
+ let mut block_succ_range = SecondaryMap::with_default((0, 0));
+ let mut fallthrough_return_block = None;
+ for block in f.layout.blocks() {
+ let block_succ_start = block_succs.len();
+ visit_block_succs(f, block, |inst, succ| {
+ block_out_count[block] += 1;
+ block_in_count[succ] += 1;
+ block_succs.push((inst, succ));
+ });
+ let block_succ_end = block_succs.len();
+ block_succ_range[block] = (block_succ_start, block_succ_end);
+
+ for inst in f.layout.block_likely_branches(block) {
+ if f.dfg[inst].opcode() == Opcode::Return {
+ // Implicit output edge for any return.
+ block_out_count[block] += 1;
+ }
+ if f.dfg[inst].opcode() == Opcode::FallthroughReturn {
+ // Fallthrough return block must come last.
+ debug_assert!(fallthrough_return_block == None);
+ fallthrough_return_block = Some(block);
+ }
+ }
+ }
+ // Implicit input edge for entry block.
+ if let Some(entry) = f.layout.entry_block() {
+ block_in_count[entry] += 1;
+ }
+
+ // Here we define the implicit CLIF-plus-edges graph. There are
+ // conceptually two such graphs: the original, with every edge explicit,
+ // and the merged one, with blocks (represented by `LoweredBlock`
+ // values) that contain original CLIF blocks, edges, or both. This
+ // function returns a lowered block's successors as per the latter, with
+ // consideration to edge-block merging.
+ //
+ // Note that there is a property of the block-merging rules below
+ // that is very important to ensure we don't miss any lowered blocks:
+ // any block in the implicit CLIF-plus-edges graph will *only* be
+ // included in one block in the merged graph.
+ //
+ // This, combined with the property that every edge block is reachable
+ // only from one predecessor (and hence cannot be reached by a DFS
+ // backedge), means that it is sufficient in our DFS below to track
+ // visited-bits per original CLIF block only, not per edge. This greatly
+ // simplifies the data structures (no need to keep a sparse hash-set of
+ // (block, block) tuples).
+ let compute_lowered_succs = |ret: &mut Vec<(Inst, LoweredBlock)>, block: LoweredBlock| {
+ let start_idx = ret.len();
+ match block {
+ LoweredBlock::Orig { block } | LoweredBlock::EdgeAndOrig { block, .. } => {
+ // At an orig block; successors are always edge blocks,
+ // possibly with orig blocks following.
+ let range = block_succ_range[block];
+ for &(edge_inst, succ) in &block_succs[range.0..range.1] {
+ if block_in_count[succ] == 1 {
+ ret.push((
+ edge_inst,
+ LoweredBlock::EdgeAndOrig {
+ pred: block,
+ edge_inst,
+ block: succ,
+ },
+ ));
+ } else {
+ ret.push((
+ edge_inst,
+ LoweredBlock::Edge {
+ pred: block,
+ edge_inst,
+ succ,
+ },
+ ));
+ }
+ }
+ }
+ LoweredBlock::Edge {
+ succ, edge_inst, ..
+ }
+ | LoweredBlock::OrigAndEdge {
+ succ, edge_inst, ..
+ } => {
+ // At an edge block; successors are always orig blocks,
+ // possibly with edge blocks following.
+ if block_out_count[succ] == 1 {
+ let range = block_succ_range[succ];
+ // check if the one succ is a real CFG edge (vs.
+ // implicit return succ).
+ if range.1 - range.0 > 0 {
+ debug_assert!(range.1 - range.0 == 1);
+ let (succ_edge_inst, succ_succ) = block_succs[range.0];
+ ret.push((
+ edge_inst,
+ LoweredBlock::OrigAndEdge {
+ block: succ,
+ edge_inst: succ_edge_inst,
+ succ: succ_succ,
+ },
+ ));
+ } else {
+ ret.push((edge_inst, LoweredBlock::Orig { block: succ }));
+ }
+ } else {
+ ret.push((edge_inst, LoweredBlock::Orig { block: succ }));
+ }
+ }
+ }
+ let end_idx = ret.len();
+ (start_idx, end_idx)
+ };
+
+ // Build the explicit LoweredBlock-to-LoweredBlock successors list.
+ let mut lowered_succs = vec![];
+ let mut lowered_succ_indices = vec![];
+
+ // Step 2: Compute RPO traversal of the implicit CLIF-plus-edge-block graph. Use an
+ // explicit stack so we don't overflow the real stack with a deep DFS.
+ #[derive(Debug)]
+ struct StackEntry {
+ this: LoweredBlock,
+ succs: (usize, usize), // range in lowered_succs
+ cur_succ: usize, // index in lowered_succs
+ }
+
+ let mut stack: SmallVec<[StackEntry; 16]> = SmallVec::new();
+ let mut visited = FxHashSet::default();
+ let mut postorder = vec![];
+ if let Some(entry) = f.layout.entry_block() {
+ // FIXME(cfallin): we might be able to use OrigAndEdge. Find a way
+ // to not special-case the entry block here.
+ let block = LoweredBlock::Orig { block: entry };
+ visited.insert(block);
+ let range = compute_lowered_succs(&mut lowered_succs, block);
+ lowered_succ_indices.resize(lowered_succs.len(), 0);
+ stack.push(StackEntry {
+ this: block,
+ succs: range,
+ cur_succ: range.1,
+ });
+ }
+
+ let mut deferred_last = None;
+ while !stack.is_empty() {
+ let stack_entry = stack.last_mut().unwrap();
+ let range = stack_entry.succs;
+ if stack_entry.cur_succ == range.0 {
+ let orig_block = stack_entry.this.orig_block();
+ if orig_block.is_some() && orig_block == fallthrough_return_block {
+ deferred_last = Some((stack_entry.this, range));
+ } else {
+ postorder.push((stack_entry.this, range));
+ }
+ stack.pop();
+ } else {
+ // Heuristic: chase the children in reverse. This puts the first
+ // successor block first in RPO, all other things being equal,
+ // which tends to prioritize loop backedges over out-edges,
+ // putting the edge-block closer to the loop body and minimizing
+ // live-ranges in linear instruction space.
+ let next = lowered_succs[stack_entry.cur_succ - 1].1;
+ stack_entry.cur_succ -= 1;
+ if visited.contains(&next) {
+ continue;
+ }
+ visited.insert(next);
+ let range = compute_lowered_succs(&mut lowered_succs, next);
+ lowered_succ_indices.resize(lowered_succs.len(), 0);
+ stack.push(StackEntry {
+ this: next,
+ succs: range,
+ cur_succ: range.1,
+ });
+ }
+ }
+
+ postorder.reverse();
+ let mut rpo = postorder;
+ if let Some(d) = deferred_last {
+ rpo.push(d);
+ }
+
+ // Step 3: now that we have RPO, build the BlockIndex/BB fwd/rev maps.
+ let mut lowered_order = vec![];
+ let mut lowered_succ_ranges = vec![];
+ let mut lb_to_bindex = FxHashMap::default();
+ for (block, succ_range) in rpo.into_iter() {
+ lb_to_bindex.insert(block, lowered_order.len() as BlockIndex);
+ lowered_order.push(block);
+ lowered_succ_ranges.push(succ_range);
+ }
+
+ let lowered_succ_indices = lowered_succs
+ .iter()
+ .map(|&(inst, succ)| (inst, lb_to_bindex.get(&succ).cloned().unwrap()))
+ .collect();
+
+ let mut orig_map = SecondaryMap::with_default(None);
+ for (i, lb) in lowered_order.iter().enumerate() {
+ let i = i as BlockIndex;
+ if let Some(b) = lb.orig_block() {
+ orig_map[b] = Some(i);
+ }
+ }
+
+ let result = BlockLoweringOrder {
+ lowered_order,
+ lowered_succs,
+ lowered_succ_indices,
+ lowered_succ_ranges,
+ orig_map,
+ };
+ debug!("BlockLoweringOrder: {:?}", result);
+ result
+ }
+
+ /// Get the lowered order of blocks.
+ pub fn lowered_order(&self) -> &[LoweredBlock] {
+ &self.lowered_order[..]
+ }
+
+ /// Get the successors for a lowered block, by index in `lowered_order()`'s
+ /// returned slice. Each successsor is paired with the edge-instruction
+ /// (branch) corresponding to this edge.
+ pub fn succs(&self, block: BlockIndex) -> &[(Inst, LoweredBlock)] {
+ let range = self.lowered_succ_ranges[block as usize];
+ &self.lowered_succs[range.0..range.1]
+ }
+
+ /// Get the successor indices for a lowered block.
+ pub fn succ_indices(&self, block: BlockIndex) -> &[(Inst, BlockIndex)] {
+ let range = self.lowered_succ_ranges[block as usize];
+ &self.lowered_succ_indices[range.0..range.1]
+ }
+
+ /// Get the lowered block index containing a CLIF block, if any. (May not be
+ /// present if the original CLIF block was unreachable.)
+ pub fn lowered_block_for_bb(&self, bb: Block) -> Option<BlockIndex> {
+ self.orig_map[bb]
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+ use crate::cursor::{Cursor, FuncCursor};
+ use crate::ir::types::*;
+ use crate::ir::{AbiParam, ExternalName, Function, InstBuilder, Signature};
+ use crate::isa::CallConv;
+
+ fn build_test_func(n_blocks: usize, edges: &[(usize, usize)]) -> Function {
+ assert!(n_blocks > 0);
+
+ let name = ExternalName::testcase("test0");
+ let mut sig = Signature::new(CallConv::SystemV);
+ sig.params.push(AbiParam::new(I32));
+ let mut func = Function::with_name_signature(name, sig);
+ let blocks = (0..n_blocks)
+ .map(|i| {
+ let bb = func.dfg.make_block();
+ assert!(bb.as_u32() == i as u32);
+ bb
+ })
+ .collect::<Vec<_>>();
+
+ let arg0 = func.dfg.append_block_param(blocks[0], I32);
+
+ let mut pos = FuncCursor::new(&mut func);
+
+ let mut edge = 0;
+ for i in 0..n_blocks {
+ pos.insert_block(blocks[i]);
+ let mut succs = vec![];
+ while edge < edges.len() && edges[edge].0 == i {
+ succs.push(edges[edge].1);
+ edge += 1;
+ }
+ if succs.len() == 0 {
+ pos.ins().return_(&[arg0]);
+ } else if succs.len() == 1 {
+ pos.ins().jump(blocks[succs[0]], &[]);
+ } else if succs.len() == 2 {
+ pos.ins().brnz(arg0, blocks[succs[0]], &[]);
+ pos.ins().jump(blocks[succs[1]], &[]);
+ } else {
+ panic!("Too many successors");
+ }
+ }
+
+ func
+ }
+
+ #[test]
+ fn test_blockorder_diamond() {
+ let func = build_test_func(4, &[(0, 1), (0, 2), (1, 3), (2, 3)]);
+ let order = BlockLoweringOrder::new(&func);
+
+ assert_eq!(order.lowered_order.len(), 6);
+
+ assert!(order.lowered_order[0].orig_block().unwrap().as_u32() == 0);
+ assert!(order.lowered_order[0].in_edge().is_none());
+ assert!(order.lowered_order[0].out_edge().is_none());
+
+ assert!(order.lowered_order[1].orig_block().unwrap().as_u32() == 1);
+ assert!(order.lowered_order[1].in_edge().unwrap().0.as_u32() == 0);
+ assert!(order.lowered_order[1].in_edge().unwrap().2.as_u32() == 1);
+
+ assert!(order.lowered_order[2].orig_block().is_none());
+ assert!(order.lowered_order[2].in_edge().is_none());
+ assert!(order.lowered_order[2].out_edge().unwrap().0.as_u32() == 1);
+ assert!(order.lowered_order[2].out_edge().unwrap().2.as_u32() == 3);
+
+ assert!(order.lowered_order[3].orig_block().unwrap().as_u32() == 2);
+ assert!(order.lowered_order[3].in_edge().unwrap().0.as_u32() == 0);
+ assert!(order.lowered_order[3].in_edge().unwrap().2.as_u32() == 2);
+ assert!(order.lowered_order[3].out_edge().is_none());
+
+ assert!(order.lowered_order[4].orig_block().is_none());
+ assert!(order.lowered_order[4].in_edge().is_none());
+ assert!(order.lowered_order[4].out_edge().unwrap().0.as_u32() == 2);
+ assert!(order.lowered_order[4].out_edge().unwrap().2.as_u32() == 3);
+
+ assert!(order.lowered_order[5].orig_block().unwrap().as_u32() == 3);
+ assert!(order.lowered_order[5].in_edge().is_none());
+ assert!(order.lowered_order[5].out_edge().is_none());
+ }
+
+ #[test]
+ fn test_blockorder_critedge() {
+ // 0
+ // / \
+ // 1 2
+ // / \ \
+ // 3 4 |
+ // |\ _|____|
+ // | \/ |
+ // | /\ |
+ // 5 6
+ //
+ // (3 -> 5, 3 -> 6, 4 -> 6 are critical edges and must be split)
+ //
+ let func = build_test_func(
+ 7,
+ &[
+ (0, 1),
+ (0, 2),
+ (1, 3),
+ (1, 4),
+ (2, 5),
+ (3, 5),
+ (3, 6),
+ (4, 6),
+ ],
+ );
+ let order = BlockLoweringOrder::new(&func);
+
+ assert_eq!(order.lowered_order.len(), 11);
+ println!("ordered = {:?}", order.lowered_order);
+
+ // block 0
+ assert!(order.lowered_order[0].orig_block().unwrap().as_u32() == 0);
+ assert!(order.lowered_order[0].in_edge().is_none());
+ assert!(order.lowered_order[0].out_edge().is_none());
+
+ // edge 0->1 + block 1
+ assert!(order.lowered_order[1].orig_block().unwrap().as_u32() == 1);
+ assert!(order.lowered_order[1].in_edge().unwrap().0.as_u32() == 0);
+ assert!(order.lowered_order[1].in_edge().unwrap().2.as_u32() == 1);
+ assert!(order.lowered_order[1].out_edge().is_none());
+
+ // edge 1->3 + block 3
+ assert!(order.lowered_order[2].orig_block().unwrap().as_u32() == 3);
+ assert!(order.lowered_order[2].in_edge().unwrap().0.as_u32() == 1);
+ assert!(order.lowered_order[2].in_edge().unwrap().2.as_u32() == 3);
+ assert!(order.lowered_order[2].out_edge().is_none());
+
+ // edge 3->5
+ assert!(order.lowered_order[3].orig_block().is_none());
+ assert!(order.lowered_order[3].in_edge().is_none());
+ assert!(order.lowered_order[3].out_edge().unwrap().0.as_u32() == 3);
+ assert!(order.lowered_order[3].out_edge().unwrap().2.as_u32() == 5);
+
+ // edge 3->6
+ assert!(order.lowered_order[4].orig_block().is_none());
+ assert!(order.lowered_order[4].in_edge().is_none());
+ assert!(order.lowered_order[4].out_edge().unwrap().0.as_u32() == 3);
+ assert!(order.lowered_order[4].out_edge().unwrap().2.as_u32() == 6);
+
+ // edge 1->4 + block 4
+ assert!(order.lowered_order[5].orig_block().unwrap().as_u32() == 4);
+ assert!(order.lowered_order[5].in_edge().unwrap().0.as_u32() == 1);
+ assert!(order.lowered_order[5].in_edge().unwrap().2.as_u32() == 4);
+ assert!(order.lowered_order[5].out_edge().is_none());
+
+ // edge 4->6
+ assert!(order.lowered_order[6].orig_block().is_none());
+ assert!(order.lowered_order[6].in_edge().is_none());
+ assert!(order.lowered_order[6].out_edge().unwrap().0.as_u32() == 4);
+ assert!(order.lowered_order[6].out_edge().unwrap().2.as_u32() == 6);
+
+ // block 6
+ assert!(order.lowered_order[7].orig_block().unwrap().as_u32() == 6);
+ assert!(order.lowered_order[7].in_edge().is_none());
+ assert!(order.lowered_order[7].out_edge().is_none());
+
+ // edge 0->2 + block 2
+ assert!(order.lowered_order[8].orig_block().unwrap().as_u32() == 2);
+ assert!(order.lowered_order[8].in_edge().unwrap().0.as_u32() == 0);
+ assert!(order.lowered_order[8].in_edge().unwrap().2.as_u32() == 2);
+ assert!(order.lowered_order[8].out_edge().is_none());
+
+ // edge 2->5
+ assert!(order.lowered_order[9].orig_block().is_none());
+ assert!(order.lowered_order[9].in_edge().is_none());
+ assert!(order.lowered_order[9].out_edge().unwrap().0.as_u32() == 2);
+ assert!(order.lowered_order[9].out_edge().unwrap().2.as_u32() == 5);
+
+ // block 5
+ assert!(order.lowered_order[10].orig_block().unwrap().as_u32() == 5);
+ assert!(order.lowered_order[10].in_edge().is_none());
+ assert!(order.lowered_order[10].out_edge().is_none());
+ }
+}
diff --git a/third_party/rust/cranelift-codegen/src/machinst/buffer.rs b/third_party/rust/cranelift-codegen/src/machinst/buffer.rs
new file mode 100644
index 0000000000..b2187a9b68
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/machinst/buffer.rs
@@ -0,0 +1,1813 @@
+//! In-memory representation of compiled machine code, with labels and fixups to
+//! refer to those labels. Handles constant-pool island insertion and also
+//! veneer insertion for out-of-range jumps.
+//!
+//! This code exists to solve three problems:
+//!
+//! - Branch targets for forward branches are not known until later, when we
+//! emit code in a single pass through the instruction structs.
+//!
+//! - On many architectures, address references or offsets have limited range.
+//! For example, on AArch64, conditional branches can only target code +/- 1MB
+//! from the branch itself.
+//!
+//! - The lowering of control flow from the CFG-with-edges produced by
+//! [BlockLoweringOrder](super::BlockLoweringOrder), combined with many empty
+//! edge blocks when the register allocator does not need to insert any
+//! spills/reloads/moves in edge blocks, results in many suboptimal branch
+//! patterns. The lowering also pays no attention to block order, and so
+//! two-target conditional forms (cond-br followed by uncond-br) can often by
+//! avoided because one of the targets is the fallthrough. There are several
+//! cases here where we can simplify to use fewer branches.
+//!
+//! This "buffer" implements a single-pass code emission strategy (with a later
+//! "fixup" pass, but only through recorded fixups, not all instructions). The
+//! basic idea is:
+//!
+//! - Emit branches as they are, including two-target (cond/uncond) compound
+//! forms, but with zero offsets and optimistically assuming the target will be
+//! in range. Record the "fixup" for later. Targets are denoted instead by
+//! symbolic "labels" that are then bound to certain offsets in the buffer as
+//! we emit code. (Nominally, there is a label at the start of every basic
+//! block.)
+//!
+//! - As we do this, track the offset in the buffer at which the first label
+//! reference "goes out of range". We call this the "deadline". If we reach the
+//! deadline and we still have not bound the label to which an unresolved branch
+//! refers, we have a problem!
+//!
+//! - To solve this problem, we emit "islands" full of "veneers". An island is
+//! simply a chunk of code inserted in the middle of the code actually produced
+//! by the emitter (e.g., vcode iterating over instruction structs). The emitter
+//! has some awareness of this: it either asks for an island between blocks, so
+//! it is not accidentally executed, or else it emits a branch around the island
+//! when all other options fail (see `Inst::EmitIsland` meta-instruction).
+//!
+//! - A "veneer" is an instruction (or sequence of instructions) in an "island"
+//! that implements a longer-range reference to a label. The idea is that, for
+//! example, a branch with a limited range can branch to a "veneer" instead,
+//! which is simply a branch in a form that can use a longer-range reference. On
+//! AArch64, for example, conditionals have a +/- 1 MB range, but a conditional
+//! can branch to an unconditional branch which has a +/- 128 MB range. Hence, a
+//! conditional branch's label reference can be fixed up with a "veneer" to
+//! achieve a longer range.
+//!
+//! - To implement all of this, we require the backend to provide a `LabelUse`
+//! type that implements a trait. This is nominally an enum that records one of
+//! several kinds of references to an offset in code -- basically, a relocation
+//! type -- and will usually correspond to different instruction formats. The
+//! `LabelUse` implementation specifies the maximum range, how to patch in the
+//! actual label location when known, and how to generate a veneer to extend the
+//! range.
+//!
+//! That satisfies label references, but we still may have suboptimal branch
+//! patterns. To clean up the branches, we do a simple "peephole"-style
+//! optimization on the fly. To do so, the emitter (e.g., `Inst::emit()`)
+//! informs the buffer of branches in the code and, in the case of conditionals,
+//! the code that would have been emitted to invert this branch's condition. We
+//! track the "latest branches": these are branches that are contiguous up to
+//! the current offset. (If any code is emitted after a branch, that branch or
+//! run of contiguous branches is no longer "latest".) The latest branches are
+//! those that we can edit by simply truncating the buffer and doing something
+//! else instead.
+//!
+//! To optimize branches, we implement several simple rules, and try to apply
+//! them to the "latest branches" when possible:
+//!
+//! - A branch with a label target, when that label is bound to the ending
+//! offset of the branch (the fallthrough location), can be removed altogether,
+//! because the branch would have no effect).
+//!
+//! - An unconditional branch that starts at a label location, and branches to
+//! another label, results in a "label alias": all references to the label bound
+//! *to* this branch instruction are instead resolved to the *target* of the
+//! branch instruction. This effectively removes empty blocks that just
+//! unconditionally branch to the next block. We call this "branch threading".
+//!
+//! - A conditional followed by an unconditional, when the conditional branches
+//! to the unconditional's fallthrough, results in (i) the truncation of the
+//! unconditional, (ii) the inversion of the condition's condition, and (iii)
+//! replacement of the conditional's target (using the original target of the
+//! unconditional). This is a fancy way of saying "we can flip a two-target
+//! conditional branch's taken/not-taken targets if it works better with our
+//! fallthrough". To make this work, the emitter actually gives the buffer
+//! *both* forms of every conditional branch: the true form is emitted into the
+//! buffer, and the "inverted" machine-code bytes are provided as part of the
+//! branch-fixup metadata.
+//!
+//! - An unconditional B preceded by another unconditional P, when B's label(s) have
+//! been redirected to target(B), can be removed entirely. This is an extension
+//! of the branch-threading optimization, and is valid because if we know there
+//! will be no fallthrough into this branch instruction (the prior instruction
+//! is an unconditional jump), and if we know we have successfully redirected
+//! all labels, then this branch instruction is unreachable. Note that this
+//! works because the redirection happens before the label is ever resolved
+//! (fixups happen at island emission time, at which point latest-branches are
+//! cleared, or at the end of emission), so we are sure to catch and redirect
+//! all possible paths to this instruction.
+//!
+//! # Branch-optimization Correctness
+//!
+//! The branch-optimization mechanism depends on a few data structures with
+//! invariants, which are always held outside the scope of top-level public
+//! methods:
+//!
+//! - The latest-branches list. Each entry describes a span of the buffer
+//! (start/end offsets), the label target, the corresponding fixup-list entry
+//! index, and the bytes (must be the same length) for the inverted form, if
+//! conditional. The list of labels that are bound to the start-offset of this
+//! branch is *complete* (if any label has a resolved offset equal to `start`
+//! and is not an alias, it must appear in this list) and *precise* (no label
+//! in this list can be bound to another offset). No label in this list should
+//! be an alias. No two branch ranges can overlap, and branches are in
+//! ascending-offset order.
+//!
+//! - The labels-at-tail list. This contains all MachLabels that have been bound
+//! to (whose resolved offsets are equal to) the tail offset of the buffer.
+//! No label in this list should be an alias.
+//!
+//! - The label_offsets array, containing the bound offset of a label or
+//! UNKNOWN. No label can be bound at an offset greater than the current
+//! buffer tail.
+//!
+//! - The label_aliases array, containing another label to which a label is
+//! bound or UNKNOWN. A label's resolved offset is the resolved offset
+//! of the label it is aliased to, if this is set.
+//!
+//! We argue below, at each method, how the invariants in these data structures
+//! are maintained (grep for "Post-invariant").
+//!
+//! Given these invariants, we argue why each optimization preserves execution
+//! semantics below (grep for "Preserves execution semantics").
+
+use crate::binemit::{Addend, CodeOffset, CodeSink, Reloc, StackMap};
+use crate::ir::{ExternalName, Opcode, SourceLoc, TrapCode};
+use crate::machinst::{BlockIndex, MachInstLabelUse, VCodeConstant, VCodeConstants, VCodeInst};
+use crate::timing;
+use cranelift_entity::{entity_impl, SecondaryMap};
+
+use log::trace;
+use smallvec::SmallVec;
+use std::mem;
+use std::string::String;
+
+/// A buffer of output to be produced, fixed up, and then emitted to a CodeSink
+/// in bulk.
+///
+/// This struct uses `SmallVec`s to support small-ish function bodies without
+/// any heap allocation. As such, it will be several kilobytes large. This is
+/// likely fine as long as it is stack-allocated for function emission then
+/// thrown away; but beware if many buffer objects are retained persistently.
+pub struct MachBuffer<I: VCodeInst> {
+ /// The buffer contents, as raw bytes.
+ data: SmallVec<[u8; 1024]>,
+ /// Any relocations referring to this code. Note that only *external*
+ /// relocations are tracked here; references to labels within the buffer are
+ /// resolved before emission.
+ relocs: SmallVec<[MachReloc; 16]>,
+ /// Any trap records referring to this code.
+ traps: SmallVec<[MachTrap; 16]>,
+ /// Any call site records referring to this code.
+ call_sites: SmallVec<[MachCallSite; 16]>,
+ /// Any source location mappings referring to this code.
+ srclocs: SmallVec<[MachSrcLoc; 64]>,
+ /// Any stack maps referring to this code.
+ stack_maps: SmallVec<[MachStackMap; 8]>,
+ /// The current source location in progress (after `start_srcloc()` and
+ /// before `end_srcloc()`). This is a (start_offset, src_loc) tuple.
+ cur_srcloc: Option<(CodeOffset, SourceLoc)>,
+ /// Known label offsets; `UNKNOWN_LABEL_OFFSET` if unknown.
+ label_offsets: SmallVec<[CodeOffset; 16]>,
+ /// Label aliases: when one label points to an unconditional jump, and that
+ /// jump points to another label, we can redirect references to the first
+ /// label immediately to the second.
+ ///
+ /// Invariant: we don't have label-alias cycles. We ensure this by,
+ /// before setting label A to alias label B, resolving B's alias
+ /// target (iteratively until a non-aliased label); if B is already
+ /// aliased to A, then we cannot alias A back to B.
+ label_aliases: SmallVec<[MachLabel; 16]>,
+ /// Constants that must be emitted at some point.
+ pending_constants: SmallVec<[MachLabelConstant; 16]>,
+ /// Fixups that must be performed after all code is emitted.
+ fixup_records: SmallVec<[MachLabelFixup<I>; 16]>,
+ /// Current deadline at which all constants are flushed and all code labels
+ /// are extended by emitting long-range jumps in an island. This flush
+ /// should be rare (e.g., on AArch64, the shortest-range PC-rel references
+ /// are +/- 1MB for conditional jumps and load-literal instructions), so
+ /// it's acceptable to track a minimum and flush-all rather than doing more
+ /// detailed "current minimum" / sort-by-deadline trickery.
+ island_deadline: CodeOffset,
+ /// How many bytes are needed in the worst case for an island, given all
+ /// pending constants and fixups.
+ island_worst_case_size: CodeOffset,
+ /// Latest branches, to facilitate in-place editing for better fallthrough
+ /// behavior and empty-block removal.
+ latest_branches: SmallVec<[MachBranch; 4]>,
+ /// All labels at the current offset (emission tail). This is lazily
+ /// cleared: it is actually accurate as long as the current offset is
+ /// `labels_at_tail_off`, but if `cur_offset()` has grown larger, it should
+ /// be considered as empty.
+ ///
+ /// For correctness, this *must* be complete (i.e., the vector must contain
+ /// all labels whose offsets are resolved to the current tail), because we
+ /// rely on it to update labels when we truncate branches.
+ labels_at_tail: SmallVec<[MachLabel; 4]>,
+ /// The last offset at which `labels_at_tail` is valid. It is conceptually
+ /// always describing the tail of the buffer, but we do not clear
+ /// `labels_at_tail` eagerly when the tail grows, rather we lazily clear it
+ /// when the offset has grown past this (`labels_at_tail_off`) point.
+ /// Always <= `cur_offset()`.
+ labels_at_tail_off: CodeOffset,
+ /// Map used constants to their [MachLabel].
+ constant_labels: SecondaryMap<VCodeConstant, MachLabel>,
+}
+
+/// A `MachBuffer` once emission is completed: holds generated code and records,
+/// without fixups. This allows the type to be independent of the backend.
+pub struct MachBufferFinalized {
+ /// The buffer contents, as raw bytes.
+ pub data: SmallVec<[u8; 1024]>,
+ /// Any relocations referring to this code. Note that only *external*
+ /// relocations are tracked here; references to labels within the buffer are
+ /// resolved before emission.
+ relocs: SmallVec<[MachReloc; 16]>,
+ /// Any trap records referring to this code.
+ traps: SmallVec<[MachTrap; 16]>,
+ /// Any call site records referring to this code.
+ call_sites: SmallVec<[MachCallSite; 16]>,
+ /// Any source location mappings referring to this code.
+ srclocs: SmallVec<[MachSrcLoc; 64]>,
+ /// Any stack maps referring to this code.
+ stack_maps: SmallVec<[MachStackMap; 8]>,
+}
+
+static UNKNOWN_LABEL_OFFSET: CodeOffset = 0xffff_ffff;
+static UNKNOWN_LABEL: MachLabel = MachLabel(0xffff_ffff);
+
+/// A label refers to some offset in a `MachBuffer`. It may not be resolved at
+/// the point at which it is used by emitted code; the buffer records "fixups"
+/// for references to the label, and will come back and patch the code
+/// appropriately when the label's location is eventually known.
+#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub struct MachLabel(u32);
+entity_impl!(MachLabel);
+
+impl MachLabel {
+ /// Get a label for a block. (The first N MachLabels are always reseved for
+ /// the N blocks in the vcode.)
+ pub fn from_block(bindex: BlockIndex) -> MachLabel {
+ MachLabel(bindex)
+ }
+
+ /// Get the numeric label index.
+ pub fn get(self) -> u32 {
+ self.0
+ }
+
+ /// Creates a string representing this label, for convenience.
+ pub fn to_string(&self) -> String {
+ format!("label{}", self.0)
+ }
+}
+
+impl Default for MachLabel {
+ fn default() -> Self {
+ UNKNOWN_LABEL
+ }
+}
+
+/// A stack map extent, when creating a stack map.
+pub enum StackMapExtent {
+ /// The stack map starts at this instruction, and ends after the number of upcoming bytes
+ /// (note: this is a code offset diff).
+ UpcomingBytes(CodeOffset),
+
+ /// The stack map started at the given offset and ends at the current one. This helps
+ /// architectures where the instruction size has not a fixed length.
+ StartedAtOffset(CodeOffset),
+}
+
+impl<I: VCodeInst> MachBuffer<I> {
+ /// Create a new section, known to start at `start_offset` and with a size limited to
+ /// `length_limit`.
+ pub fn new() -> MachBuffer<I> {
+ MachBuffer {
+ data: SmallVec::new(),
+ relocs: SmallVec::new(),
+ traps: SmallVec::new(),
+ call_sites: SmallVec::new(),
+ srclocs: SmallVec::new(),
+ stack_maps: SmallVec::new(),
+ cur_srcloc: None,
+ label_offsets: SmallVec::new(),
+ label_aliases: SmallVec::new(),
+ pending_constants: SmallVec::new(),
+ fixup_records: SmallVec::new(),
+ island_deadline: UNKNOWN_LABEL_OFFSET,
+ island_worst_case_size: 0,
+ latest_branches: SmallVec::new(),
+ labels_at_tail: SmallVec::new(),
+ labels_at_tail_off: 0,
+ constant_labels: SecondaryMap::new(),
+ }
+ }
+
+ /// Debug-only: check invariants of labels and branch-records described
+ /// under "Branch-optimization Correctness" above.
+ #[cfg(debug)]
+ fn check_label_branch_invariants(&self) {
+ let cur_off = self.cur_offset();
+ // Check that every entry in latest_branches has *correct*
+ // labels_at_this_branch lists. We do not check completeness because
+ // that would require building a reverse index, which is too slow even
+ // for a debug invariant check.
+ let mut last_end = 0;
+ for b in &self.latest_branches {
+ debug_assert!(b.start < b.end);
+ debug_assert!(b.end <= cur_off);
+ debug_assert!(b.start >= last_end);
+ last_end = b.end;
+ for &l in &b.labels_at_this_branch {
+ debug_assert_eq!(self.resolve_label_offset(l), b.start);
+ debug_assert_eq!(self.label_aliases[l.0 as usize], UNKNOWN_LABEL);
+ }
+ }
+
+ // Check that every label is unresolved, or resolved at or before
+ // cur_offset. If at cur_offset, must be in `labels_at_tail`.
+ for (i, &off) in self.label_offsets.iter().enumerate() {
+ let label = MachLabel(i as u32);
+ debug_assert!(off == UNKNOWN_LABEL_OFFSET || off <= cur_off);
+ if off == cur_off {
+ debug_assert!(
+ self.labels_at_tail_off == cur_off && self.labels_at_tail.contains(&label)
+ );
+ }
+ }
+
+ // Check that every label in `labels_at_tail_off` is precise, i.e.,
+ // resolves to the cur offset.
+ debug_assert!(self.labels_at_tail_off <= cur_off);
+ if self.labels_at_tail_off == cur_off {
+ for &l in &self.labels_at_tail {
+ debug_assert_eq!(self.resolve_label_offset(l), cur_off);
+ debug_assert_eq!(self.label_aliases[l.0 as usize], UNKNOWN_LABEL);
+ }
+ }
+ }
+
+ #[cfg(not(debug))]
+ fn check_label_branch_invariants(&self) {
+ // Nothing.
+ }
+
+ /// Current offset from start of buffer.
+ pub fn cur_offset(&self) -> CodeOffset {
+ self.data.len() as CodeOffset
+ }
+
+ /// Add a byte.
+ pub fn put1(&mut self, value: u8) {
+ trace!("MachBuffer: put byte @ {}: {:x}", self.cur_offset(), value);
+ self.data.push(value);
+
+ // Post-invariant: conceptual-labels_at_tail contains a complete and
+ // precise list of labels bound at `cur_offset()`. We have advanced
+ // `cur_offset()`, hence if it had been equal to `labels_at_tail_off`
+ // before, it is not anymore (and it cannot become equal, because
+ // `labels_at_tail_off` is always <= `cur_offset()`). Thus the list is
+ // conceptually empty (even though it is only lazily cleared). No labels
+ // can be bound at this new offset (by invariant on `label_offsets`).
+ // Hence the invariant holds.
+ }
+
+ /// Add 2 bytes.
+ pub fn put2(&mut self, value: u16) {
+ trace!(
+ "MachBuffer: put 16-bit word @ {}: {:x}",
+ self.cur_offset(),
+ value
+ );
+ let bytes = value.to_le_bytes();
+ self.data.extend_from_slice(&bytes[..]);
+
+ // Post-invariant: as for `put1()`.
+ }
+
+ /// Add 4 bytes.
+ pub fn put4(&mut self, value: u32) {
+ trace!(
+ "MachBuffer: put 32-bit word @ {}: {:x}",
+ self.cur_offset(),
+ value
+ );
+ let bytes = value.to_le_bytes();
+ self.data.extend_from_slice(&bytes[..]);
+
+ // Post-invariant: as for `put1()`.
+ }
+
+ /// Add 8 bytes.
+ pub fn put8(&mut self, value: u64) {
+ trace!(
+ "MachBuffer: put 64-bit word @ {}: {:x}",
+ self.cur_offset(),
+ value
+ );
+ let bytes = value.to_le_bytes();
+ self.data.extend_from_slice(&bytes[..]);
+
+ // Post-invariant: as for `put1()`.
+ }
+
+ /// Add a slice of bytes.
+ pub fn put_data(&mut self, data: &[u8]) {
+ trace!(
+ "MachBuffer: put data @ {}: len {}",
+ self.cur_offset(),
+ data.len()
+ );
+ self.data.extend_from_slice(data);
+
+ // Post-invariant: as for `put1()`.
+ }
+
+ /// Reserve appended space and return a mutable slice referring to it.
+ pub fn get_appended_space(&mut self, len: usize) -> &mut [u8] {
+ trace!("MachBuffer: put data @ {}: len {}", self.cur_offset(), len);
+ let off = self.data.len();
+ let new_len = self.data.len() + len;
+ self.data.resize(new_len, 0);
+ &mut self.data[off..]
+
+ // Post-invariant: as for `put1()`.
+ }
+
+ /// Align up to the given alignment.
+ pub fn align_to(&mut self, align_to: CodeOffset) {
+ trace!("MachBuffer: align to {}", align_to);
+ assert!(align_to.is_power_of_two());
+ while self.cur_offset() & (align_to - 1) != 0 {
+ self.put1(0);
+ }
+
+ // Post-invariant: as for `put1()`.
+ }
+
+ /// Allocate a `Label` to refer to some offset. May not be bound to a fixed
+ /// offset yet.
+ pub fn get_label(&mut self) -> MachLabel {
+ let l = self.label_offsets.len() as u32;
+ self.label_offsets.push(UNKNOWN_LABEL_OFFSET);
+ self.label_aliases.push(UNKNOWN_LABEL);
+ trace!("MachBuffer: new label -> {:?}", MachLabel(l));
+ MachLabel(l)
+
+ // Post-invariant: the only mutation is to add a new label; it has no
+ // bound offset yet, so it trivially satisfies all invariants.
+ }
+
+ /// Reserve the first N MachLabels for blocks.
+ pub fn reserve_labels_for_blocks(&mut self, blocks: BlockIndex) {
+ trace!("MachBuffer: first {} labels are for blocks", blocks);
+ debug_assert!(self.label_offsets.is_empty());
+ self.label_offsets
+ .resize(blocks as usize, UNKNOWN_LABEL_OFFSET);
+ self.label_aliases.resize(blocks as usize, UNKNOWN_LABEL);
+
+ // Post-invariant: as for `get_label()`.
+ }
+
+ /// Reserve the next N MachLabels for constants.
+ pub fn reserve_labels_for_constants(&mut self, constants: &VCodeConstants) {
+ trace!(
+ "MachBuffer: next {} labels are for constants",
+ constants.len()
+ );
+ for c in constants.keys() {
+ self.constant_labels[c] = self.get_label();
+ }
+
+ // Post-invariant: as for `get_label()`.
+ }
+
+ /// Retrieve the reserved label for a constant.
+ pub fn get_label_for_constant(&self, constant: VCodeConstant) -> MachLabel {
+ self.constant_labels[constant]
+ }
+
+ /// Bind a label to the current offset. A label can only be bound once.
+ pub fn bind_label(&mut self, label: MachLabel) {
+ trace!(
+ "MachBuffer: bind label {:?} at offset {}",
+ label,
+ self.cur_offset()
+ );
+ debug_assert_eq!(self.label_offsets[label.0 as usize], UNKNOWN_LABEL_OFFSET);
+ debug_assert_eq!(self.label_aliases[label.0 as usize], UNKNOWN_LABEL);
+ let offset = self.cur_offset();
+ self.label_offsets[label.0 as usize] = offset;
+ self.lazily_clear_labels_at_tail();
+ self.labels_at_tail.push(label);
+
+ // Invariants hold: bound offset of label is <= cur_offset (in fact it
+ // is equal). If the `labels_at_tail` list was complete and precise
+ // before, it is still, because we have bound this label to the current
+ // offset and added it to the list (which contains all labels at the
+ // current offset).
+
+ self.check_label_branch_invariants();
+ self.optimize_branches();
+
+ // Post-invariant: by `optimize_branches()` (see argument there).
+ self.check_label_branch_invariants();
+ }
+
+ /// Lazily clear `labels_at_tail` if the tail offset has moved beyond the
+ /// offset that it applies to.
+ fn lazily_clear_labels_at_tail(&mut self) {
+ let offset = self.cur_offset();
+ if offset > self.labels_at_tail_off {
+ self.labels_at_tail_off = offset;
+ self.labels_at_tail.clear();
+ }
+
+ // Post-invariant: either labels_at_tail_off was at cur_offset, and
+ // state is untouched, or was less than cur_offset, in which case the
+ // labels_at_tail list was conceptually empty, and is now actually
+ // empty.
+ }
+
+ /// Resolve a label to an offset, if known. May return `UNKNOWN_LABEL_OFFSET`.
+ fn resolve_label_offset(&self, mut label: MachLabel) -> CodeOffset {
+ let mut iters = 0;
+ while self.label_aliases[label.0 as usize] != UNKNOWN_LABEL {
+ label = self.label_aliases[label.0 as usize];
+ // To protect against an infinite loop (despite our assurances to
+ // ourselves that the invariants make this impossible), assert out
+ // after 1M iterations. The number of basic blocks is limited
+ // in most contexts anyway so this should be impossible to hit with
+ // a legitimate input.
+ iters += 1;
+ assert!(iters < 1_000_000, "Unexpected cycle in label aliases");
+ }
+ self.label_offsets[label.0 as usize]
+
+ // Post-invariant: no mutations.
+ }
+
+ /// Emit a reference to the given label with the given reference type (i.e.,
+ /// branch-instruction format) at the current offset. This is like a
+ /// relocation, but handled internally.
+ ///
+ /// This can be called before the branch is actually emitted; fixups will
+ /// not happen until an island is emitted or the buffer is finished.
+ pub fn use_label_at_offset(&mut self, offset: CodeOffset, label: MachLabel, kind: I::LabelUse) {
+ trace!(
+ "MachBuffer: use_label_at_offset: offset {} label {:?} kind {:?}",
+ offset,
+ label,
+ kind
+ );
+
+ // Add the fixup, and update the worst-case island size based on a
+ // veneer for this label use.
+ self.fixup_records.push(MachLabelFixup {
+ label,
+ offset,
+ kind,
+ });
+ if kind.supports_veneer() {
+ self.island_worst_case_size += kind.veneer_size();
+ self.island_worst_case_size &= !(I::LabelUse::ALIGN - 1);
+ }
+ let deadline = offset + kind.max_pos_range();
+ if deadline < self.island_deadline {
+ self.island_deadline = deadline;
+ }
+
+ // Post-invariant: no mutations to branches/labels data structures.
+ self.check_label_branch_invariants();
+ }
+
+ /// Inform the buffer of an unconditional branch at the given offset,
+ /// targetting the given label. May be used to optimize branches.
+ /// The last added label-use must correspond to this branch.
+ /// This must be called when the current offset is equal to `start`; i.e.,
+ /// before actually emitting the branch. This implies that for a branch that
+ /// uses a label and is eligible for optimizations by the MachBuffer, the
+ /// proper sequence is:
+ ///
+ /// - Call `use_label_at_offset()` to emit the fixup record.
+ /// - Call `add_uncond_branch()` to make note of the branch.
+ /// - Emit the bytes for the branch's machine code.
+ ///
+ /// Additional requirement: no labels may be bound between `start` and `end`
+ /// (exclusive on both ends).
+ pub fn add_uncond_branch(&mut self, start: CodeOffset, end: CodeOffset, target: MachLabel) {
+ assert!(self.cur_offset() == start);
+ debug_assert!(end > start);
+ assert!(!self.fixup_records.is_empty());
+ let fixup = self.fixup_records.len() - 1;
+ self.lazily_clear_labels_at_tail();
+ self.latest_branches.push(MachBranch {
+ start,
+ end,
+ target,
+ fixup,
+ inverted: None,
+ labels_at_this_branch: self.labels_at_tail.clone(),
+ });
+
+ // Post-invariant: we asserted branch start is current tail; the list of
+ // labels at branch is cloned from list of labels at current tail.
+ self.check_label_branch_invariants();
+ }
+
+ /// Inform the buffer of a conditional branch at the given offset,
+ /// targetting the given label. May be used to optimize branches.
+ /// The last added label-use must correspond to this branch.
+ ///
+ /// Additional requirement: no labels may be bound between `start` and `end`
+ /// (exclusive on both ends).
+ pub fn add_cond_branch(
+ &mut self,
+ start: CodeOffset,
+ end: CodeOffset,
+ target: MachLabel,
+ inverted: &[u8],
+ ) {
+ assert!(self.cur_offset() == start);
+ debug_assert!(end > start);
+ assert!(!self.fixup_records.is_empty());
+ debug_assert!(inverted.len() == (end - start) as usize);
+ let fixup = self.fixup_records.len() - 1;
+ let inverted = Some(SmallVec::from(inverted));
+ self.lazily_clear_labels_at_tail();
+ self.latest_branches.push(MachBranch {
+ start,
+ end,
+ target,
+ fixup,
+ inverted,
+ labels_at_this_branch: self.labels_at_tail.clone(),
+ });
+
+ // Post-invariant: we asserted branch start is current tail; labels at
+ // branch list is cloned from list of labels at current tail.
+ self.check_label_branch_invariants();
+ }
+
+ fn truncate_last_branch(&mut self) {
+ self.lazily_clear_labels_at_tail();
+ // Invariants hold at this point.
+
+ let b = self.latest_branches.pop().unwrap();
+ assert!(b.end == self.cur_offset());
+
+ // State:
+ // [PRE CODE]
+ // Offset b.start, b.labels_at_this_branch:
+ // [BRANCH CODE]
+ // cur_off, self.labels_at_tail -->
+ // (end of buffer)
+ self.data.truncate(b.start as usize);
+ self.fixup_records.truncate(b.fixup);
+ // State:
+ // [PRE CODE]
+ // cur_off, Offset b.start, b.labels_at_this_branch:
+ // (end of buffer)
+ //
+ // self.labels_at_tail --> (past end of buffer)
+ let cur_off = self.cur_offset();
+ self.labels_at_tail_off = cur_off;
+ // State:
+ // [PRE CODE]
+ // cur_off, Offset b.start, b.labels_at_this_branch,
+ // self.labels_at_tail:
+ // (end of buffer)
+ //
+ // resolve_label_offset(l) for l in labels_at_tail:
+ // (past end of buffer)
+
+ trace!(
+ "truncate_last_branch: truncated {:?}; off now {}",
+ b,
+ cur_off
+ );
+
+ // Fix up resolved label offsets for labels at tail.
+ for &l in &self.labels_at_tail {
+ self.label_offsets[l.0 as usize] = cur_off;
+ }
+ // Old labels_at_this_branch are now at cur_off.
+ self.labels_at_tail
+ .extend(b.labels_at_this_branch.into_iter());
+
+ // Post-invariant: this operation is defined to truncate the buffer,
+ // which moves cur_off backward, and to move labels at the end of the
+ // buffer back to the start-of-branch offset.
+ //
+ // latest_branches satisfies all invariants:
+ // - it has no branches past the end of the buffer (branches are in
+ // order, we removed the last one, and we truncated the buffer to just
+ // before the start of that branch)
+ // - no labels were moved to lower offsets than the (new) cur_off, so
+ // the labels_at_this_branch list for any other branch need not change.
+ //
+ // labels_at_tail satisfies all invariants:
+ // - all labels that were at the tail after the truncated branch are
+ // moved backward to just before the branch, which becomes the new tail;
+ // thus every element in the list should remain (ensured by `.extend()`
+ // above).
+ // - all labels that refer to the new tail, which is the start-offset of
+ // the truncated branch, must be present. The `labels_at_this_branch`
+ // list in the truncated branch's record is a complete and precise list
+ // of exactly these labels; we append these to labels_at_tail.
+ // - labels_at_tail_off is at cur_off after truncation occurs, so the
+ // list is valid (not to be lazily cleared).
+ //
+ // The stated operation was performed:
+ // - For each label at the end of the buffer prior to this method, it
+ // now resolves to the new (truncated) end of the buffer: it must have
+ // been in `labels_at_tail` (this list is precise and complete, and
+ // the tail was at the end of the truncated branch on entry), and we
+ // iterate over this list and set `label_offsets` to the new tail.
+ // None of these labels could have been an alias (by invariant), so
+ // `label_offsets` is authoritative for each.
+ // - No other labels will be past the end of the buffer, because of the
+ // requirement that no labels be bound to the middle of branch ranges
+ // (see comments to `add_{cond,uncond}_branch()`).
+ // - The buffer is truncated to just before the last branch, and the
+ // fixup record referring to that last branch is removed.
+ self.check_label_branch_invariants();
+ }
+
+ fn optimize_branches(&mut self) {
+ self.lazily_clear_labels_at_tail();
+ // Invariants valid at this point.
+
+ trace!(
+ "enter optimize_branches:\n b = {:?}\n l = {:?}\n f = {:?}",
+ self.latest_branches,
+ self.labels_at_tail,
+ self.fixup_records
+ );
+
+ // We continue to munch on branches at the tail of the buffer until no
+ // more rules apply. Note that the loop only continues if a branch is
+ // actually truncated (or if labels are redirected away from a branch),
+ // so this always makes progress.
+ while let Some(b) = self.latest_branches.last() {
+ let cur_off = self.cur_offset();
+ trace!("optimize_branches: last branch {:?} at off {}", b, cur_off);
+ // If there has been any code emission since the end of the last branch or
+ // label definition, then there's nothing we can edit (because we
+ // don't move code once placed, only back up and overwrite), so
+ // clear the records and finish.
+ if b.end < cur_off {
+ break;
+ }
+
+ // Invariant: we are looking at a branch that ends at the tail of
+ // the buffer.
+
+ // For any branch, conditional or unconditional:
+ // - If the target is a label at the current offset, then remove
+ // the conditional branch, and reset all labels that targetted
+ // the current offset (end of branch) to the truncated
+ // end-of-code.
+ //
+ // Preserves execution semantics: a branch to its own fallthrough
+ // address is equivalent to a no-op; in both cases, nextPC is the
+ // fallthrough.
+ if self.resolve_label_offset(b.target) == cur_off {
+ trace!("branch with target == cur off; truncating");
+ self.truncate_last_branch();
+ continue;
+ }
+
+ // If latest is an unconditional branch:
+ //
+ // - If the branch's target is not its own start address, then for
+ // each label at the start of branch, make the label an alias of the
+ // branch target, and remove the label from the "labels at this
+ // branch" list.
+ //
+ // - Preserves execution semantics: an unconditional branch's
+ // only effect is to set PC to a new PC; this change simply
+ // collapses one step in the step-semantics.
+ //
+ // - Post-invariant: the labels that were bound to the start of
+ // this branch become aliases, so they must not be present in any
+ // labels-at-this-branch list or the labels-at-tail list. The
+ // labels are removed form the latest-branch record's
+ // labels-at-this-branch list, and are never placed in the
+ // labels-at-tail list. Furthermore, it is correct that they are
+ // not in either list, because they are now aliases, and labels
+ // that are aliases remain aliases forever.
+ //
+ // - If there is a prior unconditional branch that ends just before
+ // this one begins, and this branch has no labels bound to its
+ // start, then we can truncate this branch, because it is entirely
+ // unreachable (we have redirected all labels that make it
+ // reachable otherwise). Do so and continue around the loop.
+ //
+ // - Preserves execution semantics: the branch is unreachable,
+ // because execution can only flow into an instruction from the
+ // prior instruction's fallthrough or from a branch bound to that
+ // instruction's start offset. Unconditional branches have no
+ // fallthrough, so if the prior instruction is an unconditional
+ // branch, no fallthrough entry can happen. The
+ // labels-at-this-branch list is complete (by invariant), so if it
+ // is empty, then the instruction is entirely unreachable. Thus,
+ // it can be removed.
+ //
+ // - Post-invariant: ensured by truncate_last_branch().
+ //
+ // - If there is a prior conditional branch whose target label
+ // resolves to the current offset (branches around the
+ // unconditional branch), then remove the unconditional branch,
+ // and make the target of the unconditional the target of the
+ // conditional instead.
+ //
+ // - Preserves execution semantics: previously we had:
+ //
+ // L1:
+ // cond_br L2
+ // br L3
+ // L2:
+ // (end of buffer)
+ //
+ // by removing the last branch, we have:
+ //
+ // L1:
+ // cond_br L2
+ // L2:
+ // (end of buffer)
+ //
+ // we then fix up the records for the conditional branch to
+ // have:
+ //
+ // L1:
+ // cond_br.inverted L3
+ // L2:
+ //
+ // In the original code, control flow reaches L2 when the
+ // conditional branch's predicate is true, and L3 otherwise. In
+ // the optimized code, the same is true.
+ //
+ // - Post-invariant: all edits to latest_branches and
+ // labels_at_tail are performed by `truncate_last_branch()`,
+ // which maintains the invariants at each step.
+
+ if b.is_uncond() {
+ // Set any label equal to current branch's start as an alias of
+ // the branch's target, if the target is not the branch itself
+ // (i.e., an infinite loop).
+ //
+ // We cannot perform this aliasing if the target of this branch
+ // ultimately aliases back here; if so, we need to keep this
+ // branch, so break out of this loop entirely (and clear the
+ // latest-branches list below).
+ //
+ // Note that this check is what prevents cycles from forming in
+ // `self.label_aliases`. To see why, consider an arbitrary start
+ // state:
+ //
+ // label_aliases[L1] = L2, label_aliases[L2] = L3, ..., up to
+ // Ln, which is not aliased.
+ //
+ // We would create a cycle if we assigned label_aliases[Ln]
+ // = L1. Note that the below assignment is the only write
+ // to label_aliases.
+ //
+ // By our other invariants, we have that Ln (`l` below)
+ // resolves to the offset `b.start`, because it is in the
+ // set `b.labels_at_this_branch`.
+ //
+ // If L1 were already aliased, through some arbitrarily deep
+ // chain, to Ln, then it must also resolve to this offset
+ // `b.start`.
+ //
+ // By checking the resolution of `L1` against this offset,
+ // and aborting this branch-simplification if they are
+ // equal, we prevent the below assignment from ever creating
+ // a cycle.
+ if self.resolve_label_offset(b.target) != b.start {
+ let redirected = b.labels_at_this_branch.len();
+ for &l in &b.labels_at_this_branch {
+ trace!(
+ " -> label at start of branch {:?} redirected to target {:?}",
+ l,
+ b.target
+ );
+ self.label_aliases[l.0 as usize] = b.target;
+ // NOTE: we continue to ensure the invariant that labels
+ // pointing to tail of buffer are in `labels_at_tail`
+ // because we already ensured above that the last branch
+ // cannot have a target of `cur_off`; so we never have
+ // to put the label into `labels_at_tail` when moving it
+ // here.
+ }
+ // Maintain invariant: all branches have been redirected
+ // and are no longer pointing at the start of this branch.
+ let mut_b = self.latest_branches.last_mut().unwrap();
+ mut_b.labels_at_this_branch.clear();
+
+ if redirected > 0 {
+ trace!(" -> after label redirects, restarting loop");
+ continue;
+ }
+ } else {
+ break;
+ }
+
+ let b = self.latest_branches.last().unwrap();
+
+ // Examine any immediately preceding branch.
+ if self.latest_branches.len() > 1 {
+ let prev_b = &self.latest_branches[self.latest_branches.len() - 2];
+ trace!(" -> more than one branch; prev_b = {:?}", prev_b);
+ // This uncond is immediately after another uncond; we
+ // should have already redirected labels to this uncond away
+ // (but check to be sure); so we can truncate this uncond.
+ if prev_b.is_uncond()
+ && prev_b.end == b.start
+ && b.labels_at_this_branch.is_empty()
+ {
+ trace!(" -> uncond follows another uncond; truncating");
+ self.truncate_last_branch();
+ continue;
+ }
+
+ // This uncond is immediately after a conditional, and the
+ // conditional's target is the end of this uncond, and we've
+ // already redirected labels to this uncond away; so we can
+ // truncate this uncond, flip the sense of the conditional, and
+ // set the conditional's target (in `latest_branches` and in
+ // `fixup_records`) to the uncond's target.
+ if prev_b.is_cond()
+ && prev_b.end == b.start
+ && self.resolve_label_offset(prev_b.target) == cur_off
+ {
+ trace!(" -> uncond follows a conditional, and conditional's target resolves to current offset");
+ // Save the target of the uncond (this becomes the
+ // target of the cond), and truncate the uncond.
+ let target = b.target;
+ let data = prev_b.inverted.clone().unwrap();
+ self.truncate_last_branch();
+
+ // Mutate the code and cond branch.
+ let off_before_edit = self.cur_offset();
+ let prev_b = self.latest_branches.last_mut().unwrap();
+ let not_inverted = SmallVec::from(
+ &self.data[(prev_b.start as usize)..(prev_b.end as usize)],
+ );
+
+ // Low-level edit: replaces bytes of branch with
+ // inverted form. cur_off remains the same afterward, so
+ // we do not need to modify label data structures.
+ self.data.truncate(prev_b.start as usize);
+ self.data.extend_from_slice(&data[..]);
+
+ // Save the original code as the inversion of the
+ // inverted branch, in case we later edit this branch
+ // again.
+ prev_b.inverted = Some(not_inverted);
+ self.fixup_records[prev_b.fixup].label = target;
+ trace!(" -> reassigning target of condbr to {:?}", target);
+ prev_b.target = target;
+ debug_assert_eq!(off_before_edit, self.cur_offset());
+ continue;
+ }
+ }
+ }
+
+ // If we couldn't do anything with the last branch, then break.
+ break;
+ }
+
+ self.purge_latest_branches();
+
+ trace!(
+ "leave optimize_branches:\n b = {:?}\n l = {:?}\n f = {:?}",
+ self.latest_branches,
+ self.labels_at_tail,
+ self.fixup_records
+ );
+ }
+
+ fn purge_latest_branches(&mut self) {
+ // All of our branch simplification rules work only if a branch ends at
+ // the tail of the buffer, with no following code; and branches are in
+ // order in latest_branches; so if the last entry ends prior to
+ // cur_offset, then clear all entries.
+ let cur_off = self.cur_offset();
+ if let Some(l) = self.latest_branches.last() {
+ if l.end < cur_off {
+ trace!("purge_latest_branches: removing branch {:?}", l);
+ self.latest_branches.clear();
+ }
+ }
+
+ // Post-invariant: no invariant requires any branch to appear in
+ // `latest_branches`; it is always optional. The list-clear above thus
+ // preserves all semantics.
+ }
+
+ /// Emit a constant at some point in the future, binding the given label to
+ /// its offset. The constant will be placed at most `max_distance` from the
+ /// current offset.
+ pub fn defer_constant(
+ &mut self,
+ label: MachLabel,
+ align: CodeOffset,
+ data: &[u8],
+ max_distance: CodeOffset,
+ ) {
+ trace!(
+ "defer_constant: eventually emit {} bytes aligned to {} at label {:?}",
+ data.len(),
+ align,
+ label
+ );
+ let deadline = self.cur_offset().saturating_add(max_distance);
+ self.island_worst_case_size += data.len() as CodeOffset;
+ self.island_worst_case_size =
+ (self.island_worst_case_size + I::LabelUse::ALIGN - 1) & !(I::LabelUse::ALIGN - 1);
+ self.pending_constants.push(MachLabelConstant {
+ label,
+ align,
+ data: SmallVec::from(data),
+ });
+ if deadline < self.island_deadline {
+ self.island_deadline = deadline;
+ }
+ }
+
+ /// Is an island needed within the next N bytes?
+ pub fn island_needed(&self, distance: CodeOffset) -> bool {
+ let worst_case_end_of_island = self.cur_offset() + distance + self.island_worst_case_size;
+ worst_case_end_of_island > self.island_deadline
+ }
+
+ /// Emit all pending constants and veneers. Should only be called if
+ /// `island_needed()` returns true, i.e., if we actually reach a deadline:
+ /// otherwise, unnecessary veneers may be inserted.
+ pub fn emit_island(&mut self) {
+ // We're going to purge fixups, so no latest-branch editing can happen
+ // anymore.
+ self.latest_branches.clear();
+
+ let pending_constants = mem::replace(&mut self.pending_constants, SmallVec::new());
+ for MachLabelConstant { label, align, data } in pending_constants.into_iter() {
+ self.align_to(align);
+ self.bind_label(label);
+ self.put_data(&data[..]);
+ }
+
+ let fixup_records = mem::replace(&mut self.fixup_records, SmallVec::new());
+ let mut new_fixups = SmallVec::new();
+ for MachLabelFixup {
+ label,
+ offset,
+ kind,
+ } in fixup_records.into_iter()
+ {
+ trace!(
+ "emit_island: fixup for label {:?} at offset {} kind {:?}",
+ label,
+ offset,
+ kind
+ );
+ // We eagerly perform fixups whose label targets are known, if not out
+ // of range, to avoid unnecessary veneers.
+ let label_offset = self.resolve_label_offset(label);
+ let known = label_offset != UNKNOWN_LABEL_OFFSET;
+ let in_range = if known {
+ if label_offset >= offset {
+ (label_offset - offset) <= kind.max_pos_range()
+ } else {
+ (offset - label_offset) <= kind.max_neg_range()
+ }
+ } else {
+ false
+ };
+
+ trace!(
+ " -> label_offset = {}, known = {}, in_range = {} (pos {} neg {})",
+ label_offset,
+ known,
+ in_range,
+ kind.max_pos_range(),
+ kind.max_neg_range()
+ );
+
+ let start = offset as usize;
+ let end = (offset + kind.patch_size()) as usize;
+ if in_range {
+ debug_assert!(known); // implied by in_range.
+ let slice = &mut self.data[start..end];
+ trace!("patching in-range!");
+ kind.patch(slice, offset, label_offset);
+ } else if !known && !kind.supports_veneer() {
+ // Nothing for now. Keep it for next round.
+ new_fixups.push(MachLabelFixup {
+ label,
+ offset,
+ kind,
+ });
+ } else if !in_range && kind.supports_veneer() {
+ // Allocate space for a veneer in the island.
+ self.align_to(I::LabelUse::ALIGN);
+ let veneer_offset = self.cur_offset();
+ trace!("making a veneer at {}", veneer_offset);
+ let slice = &mut self.data[start..end];
+ // Patch the original label use to refer to the veneer.
+ trace!(
+ "patching original at offset {} to veneer offset {}",
+ offset,
+ veneer_offset
+ );
+ kind.patch(slice, offset, veneer_offset);
+ // Generate the veneer.
+ let veneer_slice = self.get_appended_space(kind.veneer_size() as usize);
+ let (veneer_fixup_off, veneer_label_use) =
+ kind.generate_veneer(veneer_slice, veneer_offset);
+ trace!(
+ "generated veneer; fixup offset {}, label_use {:?}",
+ veneer_fixup_off,
+ veneer_label_use
+ );
+ // If the label is known (but was just out of range), do the
+ // veneer label-use fixup now too; otherwise, save it for later.
+ if known {
+ let start = veneer_fixup_off as usize;
+ let end = (veneer_fixup_off + veneer_label_use.patch_size()) as usize;
+ let veneer_slice = &mut self.data[start..end];
+ trace!("doing veneer fixup right away too");
+ veneer_label_use.patch(veneer_slice, veneer_fixup_off, label_offset);
+ } else {
+ new_fixups.push(MachLabelFixup {
+ label,
+ offset: veneer_fixup_off,
+ kind: veneer_label_use,
+ });
+ }
+ } else {
+ panic!(
+ "Cannot support label-use {:?} (known = {}, in-range = {})",
+ kind, known, in_range
+ );
+ }
+ }
+
+ self.fixup_records = new_fixups;
+ self.island_deadline = UNKNOWN_LABEL_OFFSET;
+ }
+
+ /// Finish any deferred emissions and/or fixups.
+ pub fn finish(mut self) -> MachBufferFinalized {
+ let _tt = timing::vcode_emit_finish();
+
+ while !self.pending_constants.is_empty() || !self.fixup_records.is_empty() {
+ // `emit_island()` will emit any pending veneers and constants, and
+ // as a side-effect, will also take care of any fixups with resolved
+ // labels eagerly.
+ self.emit_island();
+ }
+
+ // Ensure that all labels have been fixed up after the last island is emitted. This is a
+ // full (release-mode) assert because an unresolved label means the emitted code is
+ // incorrect.
+ assert!(self.fixup_records.is_empty());
+
+ MachBufferFinalized {
+ data: self.data,
+ relocs: self.relocs,
+ traps: self.traps,
+ call_sites: self.call_sites,
+ srclocs: self.srclocs,
+ stack_maps: self.stack_maps,
+ }
+ }
+
+ /// Add an external relocation at the current offset.
+ pub fn add_reloc(
+ &mut self,
+ srcloc: SourceLoc,
+ kind: Reloc,
+ name: &ExternalName,
+ addend: Addend,
+ ) {
+ let name = name.clone();
+ self.relocs.push(MachReloc {
+ offset: self.data.len() as CodeOffset,
+ srcloc,
+ kind,
+ name,
+ addend,
+ });
+ }
+
+ /// Add a trap record at the current offset.
+ pub fn add_trap(&mut self, srcloc: SourceLoc, code: TrapCode) {
+ self.traps.push(MachTrap {
+ offset: self.data.len() as CodeOffset,
+ srcloc,
+ code,
+ });
+ }
+
+ /// Add a call-site record at the current offset.
+ pub fn add_call_site(&mut self, srcloc: SourceLoc, opcode: Opcode) {
+ self.call_sites.push(MachCallSite {
+ ret_addr: self.data.len() as CodeOffset,
+ srcloc,
+ opcode,
+ });
+ }
+
+ /// Set the `SourceLoc` for code from this offset until the offset at the
+ /// next call to `end_srcloc()`.
+ pub fn start_srcloc(&mut self, loc: SourceLoc) {
+ self.cur_srcloc = Some((self.cur_offset(), loc));
+ }
+
+ /// Mark the end of the `SourceLoc` segment started at the last
+ /// `start_srcloc()` call.
+ pub fn end_srcloc(&mut self) {
+ let (start, loc) = self
+ .cur_srcloc
+ .take()
+ .expect("end_srcloc() called without start_srcloc()");
+ let end = self.cur_offset();
+ // Skip zero-length extends.
+ debug_assert!(end >= start);
+ if end > start {
+ self.srclocs.push(MachSrcLoc { start, end, loc });
+ }
+ }
+
+ /// Add stack map metadata for this program point: a set of stack offsets
+ /// (from SP upward) that contain live references.
+ ///
+ /// The `offset_to_fp` value is the offset from the nominal SP (at which the `stack_offsets`
+ /// are based) and the FP value. By subtracting `offset_to_fp` from each `stack_offsets`
+ /// element, one can obtain live-reference offsets from FP instead.
+ pub fn add_stack_map(&mut self, extent: StackMapExtent, stack_map: StackMap) {
+ let (start, end) = match extent {
+ StackMapExtent::UpcomingBytes(insn_len) => {
+ let start_offset = self.cur_offset();
+ (start_offset, start_offset + insn_len)
+ }
+ StackMapExtent::StartedAtOffset(start_offset) => {
+ let end_offset = self.cur_offset();
+ debug_assert!(end_offset >= start_offset);
+ (start_offset, end_offset)
+ }
+ };
+ self.stack_maps.push(MachStackMap {
+ offset: start,
+ offset_end: end,
+ stack_map,
+ });
+ }
+}
+
+impl MachBufferFinalized {
+ /// Get a list of source location mapping tuples in sorted-by-start-offset order.
+ pub fn get_srclocs_sorted(&self) -> &[MachSrcLoc] {
+ &self.srclocs[..]
+ }
+
+ /// Get the total required size for the code.
+ pub fn total_size(&self) -> CodeOffset {
+ self.data.len() as CodeOffset
+ }
+
+ /// Emit this buffer to the given CodeSink.
+ pub fn emit<CS: CodeSink>(&self, sink: &mut CS) {
+ // N.B.: we emit every section into the .text section as far as
+ // the `CodeSink` is concerned; we do not bother to segregate
+ // the contents into the actual program text, the jumptable and the
+ // rodata (constant pool). This allows us to generate code assuming
+ // that these will not be relocated relative to each other, and avoids
+ // having to designate each section as belonging in one of the three
+ // fixed categories defined by `CodeSink`. If this becomes a problem
+ // later (e.g. because of memory permissions or similar), we can
+ // add this designation and segregate the output; take care, however,
+ // to add the appropriate relocations in this case.
+
+ let mut next_reloc = 0;
+ let mut next_trap = 0;
+ let mut next_call_site = 0;
+ for (idx, byte) in self.data.iter().enumerate() {
+ if next_reloc < self.relocs.len() {
+ let reloc = &self.relocs[next_reloc];
+ if reloc.offset == idx as CodeOffset {
+ sink.reloc_external(reloc.srcloc, reloc.kind, &reloc.name, reloc.addend);
+ next_reloc += 1;
+ }
+ }
+ if next_trap < self.traps.len() {
+ let trap = &self.traps[next_trap];
+ if trap.offset == idx as CodeOffset {
+ sink.trap(trap.code, trap.srcloc);
+ next_trap += 1;
+ }
+ }
+ if next_call_site < self.call_sites.len() {
+ let call_site = &self.call_sites[next_call_site];
+ if call_site.ret_addr == idx as CodeOffset {
+ sink.add_call_site(call_site.opcode, call_site.srcloc);
+ next_call_site += 1;
+ }
+ }
+ sink.put1(*byte);
+ }
+
+ sink.begin_jumptables();
+ sink.begin_rodata();
+ sink.end_codegen();
+ }
+
+ /// Get the stack map metadata for this code.
+ pub fn stack_maps(&self) -> &[MachStackMap] {
+ &self.stack_maps[..]
+ }
+}
+
+/// A constant that is deferred to the next constant-pool opportunity.
+struct MachLabelConstant {
+ /// This label will refer to the constant's offset.
+ label: MachLabel,
+ /// Required alignment.
+ align: CodeOffset,
+ /// This data will be emitted when able.
+ data: SmallVec<[u8; 16]>,
+}
+
+/// A fixup to perform on the buffer once code is emitted. Fixups always refer
+/// to labels and patch the code based on label offsets. Hence, they are like
+/// relocations, but internal to one buffer.
+#[derive(Debug)]
+struct MachLabelFixup<I: VCodeInst> {
+ /// The label whose offset controls this fixup.
+ label: MachLabel,
+ /// The offset to fix up / patch to refer to this label.
+ offset: CodeOffset,
+ /// The kind of fixup. This is architecture-specific; each architecture may have,
+ /// e.g., several types of branch instructions, each with differently-sized
+ /// offset fields and different places within the instruction to place the
+ /// bits.
+ kind: I::LabelUse,
+}
+
+/// A relocation resulting from a compilation.
+struct MachReloc {
+ /// The offset at which the relocation applies, *relative to the
+ /// containing section*.
+ offset: CodeOffset,
+ /// The original source location.
+ srcloc: SourceLoc,
+ /// The kind of relocation.
+ kind: Reloc,
+ /// The external symbol / name to which this relocation refers.
+ name: ExternalName,
+ /// The addend to add to the symbol value.
+ addend: i64,
+}
+
+/// A trap record resulting from a compilation.
+struct MachTrap {
+ /// The offset at which the trap instruction occurs, *relative to the
+ /// containing section*.
+ offset: CodeOffset,
+ /// The original source location.
+ srcloc: SourceLoc,
+ /// The trap code.
+ code: TrapCode,
+}
+
+/// A call site record resulting from a compilation.
+struct MachCallSite {
+ /// The offset of the call's return address, *relative to the containing section*.
+ ret_addr: CodeOffset,
+ /// The original source location.
+ srcloc: SourceLoc,
+ /// The call's opcode.
+ opcode: Opcode,
+}
+
+/// A source-location mapping resulting from a compilation.
+#[derive(Clone, Debug)]
+pub struct MachSrcLoc {
+ /// The start of the region of code corresponding to a source location.
+ /// This is relative to the start of the function, not to the start of the
+ /// section.
+ pub start: CodeOffset,
+ /// The end of the region of code corresponding to a source location.
+ /// This is relative to the start of the section, not to the start of the
+ /// section.
+ pub end: CodeOffset,
+ /// The source location.
+ pub loc: SourceLoc,
+}
+
+/// Record of stack map metadata: stack offsets containing references.
+#[derive(Clone, Debug)]
+pub struct MachStackMap {
+ /// The code offset at which this stack map applies.
+ pub offset: CodeOffset,
+ /// The code offset just past the "end" of the instruction: that is, the
+ /// offset of the first byte of the following instruction, or equivalently,
+ /// the start offset plus the instruction length.
+ pub offset_end: CodeOffset,
+ /// The stack map itself.
+ pub stack_map: StackMap,
+}
+
+/// Record of branch instruction in the buffer, to facilitate editing.
+#[derive(Clone, Debug)]
+struct MachBranch {
+ start: CodeOffset,
+ end: CodeOffset,
+ target: MachLabel,
+ fixup: usize,
+ inverted: Option<SmallVec<[u8; 8]>>,
+ /// All labels pointing to the start of this branch. For correctness, this
+ /// *must* be complete (i.e., must contain all labels whose resolved offsets
+ /// are at the start of this branch): we rely on being able to redirect all
+ /// labels that could jump to this branch before removing it, if it is
+ /// otherwise unreachable.
+ labels_at_this_branch: SmallVec<[MachLabel; 4]>,
+}
+
+impl MachBranch {
+ fn is_cond(&self) -> bool {
+ self.inverted.is_some()
+ }
+ fn is_uncond(&self) -> bool {
+ self.inverted.is_none()
+ }
+}
+
+// We use an actual instruction definition to do tests, so we depend on the `arm64` feature here.
+#[cfg(all(test, feature = "arm64"))]
+mod test {
+ use super::*;
+ use crate::isa::aarch64::inst::xreg;
+ use crate::isa::aarch64::inst::{BranchTarget, CondBrKind, EmitInfo, Inst};
+ use crate::machinst::MachInstEmit;
+ use crate::settings;
+ use std::default::Default;
+
+ fn label(n: u32) -> MachLabel {
+ MachLabel::from_block(n)
+ }
+ fn target(n: u32) -> BranchTarget {
+ BranchTarget::Label(label(n))
+ }
+
+ #[test]
+ fn test_elide_jump_to_next() {
+ let info = EmitInfo::new(settings::Flags::new(settings::builder()));
+ let mut buf = MachBuffer::new();
+ let mut state = Default::default();
+
+ buf.reserve_labels_for_blocks(2);
+ buf.bind_label(label(0));
+ let inst = Inst::Jump { dest: target(1) };
+ inst.emit(&mut buf, &info, &mut state);
+ buf.bind_label(label(1));
+ let buf = buf.finish();
+ assert_eq!(0, buf.total_size());
+ }
+
+ #[test]
+ fn test_elide_trivial_jump_blocks() {
+ let info = EmitInfo::new(settings::Flags::new(settings::builder()));
+ let mut buf = MachBuffer::new();
+ let mut state = Default::default();
+
+ buf.reserve_labels_for_blocks(4);
+
+ buf.bind_label(label(0));
+ let inst = Inst::CondBr {
+ kind: CondBrKind::NotZero(xreg(0)),
+ taken: target(1),
+ not_taken: target(2),
+ };
+ inst.emit(&mut buf, &info, &mut state);
+
+ buf.bind_label(label(1));
+ let inst = Inst::Jump { dest: target(3) };
+ inst.emit(&mut buf, &info, &mut state);
+
+ buf.bind_label(label(2));
+ let inst = Inst::Jump { dest: target(3) };
+ inst.emit(&mut buf, &info, &mut state);
+
+ buf.bind_label(label(3));
+
+ let buf = buf.finish();
+ assert_eq!(0, buf.total_size());
+ }
+
+ #[test]
+ fn test_flip_cond() {
+ let info = EmitInfo::new(settings::Flags::new(settings::builder()));
+ let mut buf = MachBuffer::new();
+ let mut state = Default::default();
+
+ buf.reserve_labels_for_blocks(4);
+
+ buf.bind_label(label(0));
+ let inst = Inst::CondBr {
+ kind: CondBrKind::NotZero(xreg(0)),
+ taken: target(1),
+ not_taken: target(2),
+ };
+ inst.emit(&mut buf, &info, &mut state);
+
+ buf.bind_label(label(1));
+ let inst = Inst::Udf {
+ trap_code: TrapCode::Interrupt,
+ };
+ inst.emit(&mut buf, &info, &mut state);
+
+ buf.bind_label(label(2));
+ let inst = Inst::Nop4;
+ inst.emit(&mut buf, &info, &mut state);
+
+ buf.bind_label(label(3));
+
+ let buf = buf.finish();
+
+ let mut buf2 = MachBuffer::new();
+ let mut state = Default::default();
+ let inst = Inst::TrapIf {
+ kind: CondBrKind::NotZero(xreg(0)),
+ trap_code: TrapCode::Interrupt,
+ };
+ inst.emit(&mut buf2, &info, &mut state);
+ let inst = Inst::Nop4;
+ inst.emit(&mut buf2, &info, &mut state);
+
+ let buf2 = buf2.finish();
+
+ assert_eq!(buf.data, buf2.data);
+ }
+
+ #[test]
+ fn test_island() {
+ let info = EmitInfo::new(settings::Flags::new(settings::builder()));
+ let mut buf = MachBuffer::new();
+ let mut state = Default::default();
+
+ buf.reserve_labels_for_blocks(4);
+
+ buf.bind_label(label(0));
+ let inst = Inst::CondBr {
+ kind: CondBrKind::NotZero(xreg(0)),
+ taken: target(2),
+ not_taken: target(3),
+ };
+ inst.emit(&mut buf, &info, &mut state);
+
+ buf.bind_label(label(1));
+ while buf.cur_offset() < 2000000 {
+ if buf.island_needed(0) {
+ buf.emit_island();
+ }
+ let inst = Inst::Nop4;
+ inst.emit(&mut buf, &info, &mut state);
+ }
+
+ buf.bind_label(label(2));
+ let inst = Inst::Nop4;
+ inst.emit(&mut buf, &info, &mut state);
+
+ buf.bind_label(label(3));
+ let inst = Inst::Nop4;
+ inst.emit(&mut buf, &info, &mut state);
+
+ let buf = buf.finish();
+
+ assert_eq!(2000000 + 8, buf.total_size());
+
+ let mut buf2 = MachBuffer::new();
+ let mut state = Default::default();
+ let inst = Inst::CondBr {
+ kind: CondBrKind::NotZero(xreg(0)),
+ taken: BranchTarget::ResolvedOffset(1048576 - 4),
+ not_taken: BranchTarget::ResolvedOffset(2000000 + 4 - 4),
+ };
+ inst.emit(&mut buf2, &info, &mut state);
+
+ let buf2 = buf2.finish();
+
+ assert_eq!(&buf.data[0..8], &buf2.data[..]);
+ }
+
+ #[test]
+ fn test_island_backward() {
+ let info = EmitInfo::new(settings::Flags::new(settings::builder()));
+ let mut buf = MachBuffer::new();
+ let mut state = Default::default();
+
+ buf.reserve_labels_for_blocks(4);
+
+ buf.bind_label(label(0));
+ let inst = Inst::Nop4;
+ inst.emit(&mut buf, &info, &mut state);
+
+ buf.bind_label(label(1));
+ let inst = Inst::Nop4;
+ inst.emit(&mut buf, &info, &mut state);
+
+ buf.bind_label(label(2));
+ while buf.cur_offset() < 2000000 {
+ let inst = Inst::Nop4;
+ inst.emit(&mut buf, &info, &mut state);
+ }
+
+ buf.bind_label(label(3));
+ let inst = Inst::CondBr {
+ kind: CondBrKind::NotZero(xreg(0)),
+ taken: target(0),
+ not_taken: target(1),
+ };
+ inst.emit(&mut buf, &info, &mut state);
+
+ let buf = buf.finish();
+
+ assert_eq!(2000000 + 12, buf.total_size());
+
+ let mut buf2 = MachBuffer::new();
+ let mut state = Default::default();
+ let inst = Inst::CondBr {
+ kind: CondBrKind::NotZero(xreg(0)),
+ taken: BranchTarget::ResolvedOffset(8),
+ not_taken: BranchTarget::ResolvedOffset(4 - (2000000 + 4)),
+ };
+ inst.emit(&mut buf2, &info, &mut state);
+ let inst = Inst::Jump {
+ dest: BranchTarget::ResolvedOffset(-(2000000 + 8)),
+ };
+ inst.emit(&mut buf2, &info, &mut state);
+
+ let buf2 = buf2.finish();
+
+ assert_eq!(&buf.data[2000000..], &buf2.data[..]);
+ }
+
+ #[test]
+ fn test_multiple_redirect() {
+ // label0:
+ // cbz x0, label1
+ // b label2
+ // label1:
+ // b label3
+ // label2:
+ // nop
+ // nop
+ // b label0
+ // label3:
+ // b label4
+ // label4:
+ // b label5
+ // label5:
+ // b label7
+ // label6:
+ // nop
+ // label7:
+ // ret
+ //
+ // -- should become:
+ //
+ // label0:
+ // cbz x0, label7
+ // label2:
+ // nop
+ // nop
+ // b label0
+ // label6:
+ // nop
+ // label7:
+ // ret
+
+ let info = EmitInfo::new(settings::Flags::new(settings::builder()));
+ let mut buf = MachBuffer::new();
+ let mut state = Default::default();
+
+ buf.reserve_labels_for_blocks(8);
+
+ buf.bind_label(label(0));
+ let inst = Inst::CondBr {
+ kind: CondBrKind::Zero(xreg(0)),
+ taken: target(1),
+ not_taken: target(2),
+ };
+ inst.emit(&mut buf, &info, &mut state);
+
+ buf.bind_label(label(1));
+ let inst = Inst::Jump { dest: target(3) };
+ inst.emit(&mut buf, &info, &mut state);
+
+ buf.bind_label(label(2));
+ let inst = Inst::Nop4;
+ inst.emit(&mut buf, &info, &mut state);
+ inst.emit(&mut buf, &info, &mut state);
+ let inst = Inst::Jump { dest: target(0) };
+ inst.emit(&mut buf, &info, &mut state);
+
+ buf.bind_label(label(3));
+ let inst = Inst::Jump { dest: target(4) };
+ inst.emit(&mut buf, &info, &mut state);
+
+ buf.bind_label(label(4));
+ let inst = Inst::Jump { dest: target(5) };
+ inst.emit(&mut buf, &info, &mut state);
+
+ buf.bind_label(label(5));
+ let inst = Inst::Jump { dest: target(7) };
+ inst.emit(&mut buf, &info, &mut state);
+
+ buf.bind_label(label(6));
+ let inst = Inst::Nop4;
+ inst.emit(&mut buf, &info, &mut state);
+
+ buf.bind_label(label(7));
+ let inst = Inst::Ret;
+ inst.emit(&mut buf, &info, &mut state);
+
+ let buf = buf.finish();
+
+ let golden_data = vec![
+ 0xa0, 0x00, 0x00, 0xb4, // cbz x0, 0x14
+ 0x1f, 0x20, 0x03, 0xd5, // nop
+ 0x1f, 0x20, 0x03, 0xd5, // nop
+ 0xfd, 0xff, 0xff, 0x17, // b 0
+ 0x1f, 0x20, 0x03, 0xd5, // nop
+ 0xc0, 0x03, 0x5f, 0xd6, // ret
+ ];
+
+ assert_eq!(&golden_data[..], &buf.data[..]);
+ }
+
+ #[test]
+ fn test_handle_branch_cycle() {
+ // label0:
+ // b label1
+ // label1:
+ // b label2
+ // label2:
+ // b label3
+ // label3:
+ // b label4
+ // label4:
+ // b label1 // note: not label0 (to make it interesting).
+ //
+ // -- should become:
+ //
+ // label0, label1, ..., label4:
+ // b label0
+ let info = EmitInfo::new(settings::Flags::new(settings::builder()));
+ let mut buf = MachBuffer::new();
+ let mut state = Default::default();
+
+ buf.reserve_labels_for_blocks(5);
+
+ buf.bind_label(label(0));
+ let inst = Inst::Jump { dest: target(1) };
+ inst.emit(&mut buf, &info, &mut state);
+
+ buf.bind_label(label(1));
+ let inst = Inst::Jump { dest: target(2) };
+ inst.emit(&mut buf, &info, &mut state);
+
+ buf.bind_label(label(2));
+ let inst = Inst::Jump { dest: target(3) };
+ inst.emit(&mut buf, &info, &mut state);
+
+ buf.bind_label(label(3));
+ let inst = Inst::Jump { dest: target(4) };
+ inst.emit(&mut buf, &info, &mut state);
+
+ buf.bind_label(label(4));
+ let inst = Inst::Jump { dest: target(1) };
+ inst.emit(&mut buf, &info, &mut state);
+
+ let buf = buf.finish();
+
+ let golden_data = vec![
+ 0x00, 0x00, 0x00, 0x14, // b 0
+ ];
+
+ assert_eq!(&golden_data[..], &buf.data[..]);
+ }
+}
diff --git a/third_party/rust/cranelift-codegen/src/machinst/compile.rs b/third_party/rust/cranelift-codegen/src/machinst/compile.rs
new file mode 100644
index 0000000000..9a00cee805
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/machinst/compile.rs
@@ -0,0 +1,110 @@
+//! Compilation backend pipeline: optimized IR to VCode / binemit.
+
+use crate::ir::Function;
+use crate::machinst::*;
+use crate::settings;
+use crate::timing;
+
+use log::debug;
+use regalloc::{allocate_registers_with_opts, Algorithm, Options, PrettyPrint};
+
+/// Compile the given function down to VCode with allocated registers, ready
+/// for binary emission.
+pub fn compile<B: LowerBackend + MachBackend>(
+ f: &Function,
+ b: &B,
+ abi: Box<dyn ABICallee<I = B::MInst>>,
+ emit_info: <B::MInst as MachInstEmit>::Info,
+) -> CodegenResult<VCode<B::MInst>>
+where
+ B::MInst: PrettyPrint,
+{
+ // Compute lowered block order.
+ let block_order = BlockLoweringOrder::new(f);
+ // Build the lowering context.
+ let lower = Lower::new(f, abi, emit_info, block_order)?;
+ // Lower the IR.
+ let (mut vcode, stack_map_request_info) = {
+ let _tt = timing::vcode_lower();
+ lower.lower(b)?
+ };
+
+ debug!(
+ "vcode from lowering: \n{}",
+ vcode.show_rru(Some(b.reg_universe()))
+ );
+
+ // Perform register allocation.
+ let (run_checker, algorithm) = match vcode.flags().regalloc() {
+ settings::Regalloc::Backtracking => (false, Algorithm::Backtracking(Default::default())),
+ settings::Regalloc::BacktrackingChecked => {
+ (true, Algorithm::Backtracking(Default::default()))
+ }
+ settings::Regalloc::ExperimentalLinearScan => {
+ (false, Algorithm::LinearScan(Default::default()))
+ }
+ settings::Regalloc::ExperimentalLinearScanChecked => {
+ (true, Algorithm::LinearScan(Default::default()))
+ }
+ };
+
+ #[cfg(feature = "regalloc-snapshot")]
+ {
+ use std::fs;
+ use std::path::Path;
+ if let Some(path) = std::env::var("SERIALIZE_REGALLOC").ok() {
+ let snapshot = regalloc::IRSnapshot::from_function(&vcode, b.reg_universe());
+ let serialized = bincode::serialize(&snapshot).expect("couldn't serialize snapshot");
+
+ let file_path = Path::new(&path).join(Path::new(&format!("ir{}.bin", f.name)));
+ fs::write(file_path, &serialized).expect("couldn't write IR snapshot file");
+ }
+ }
+
+ // If either there are no reference-typed values, or else there are
+ // but there are no safepoints at which we need to know about them,
+ // then we don't need stack maps.
+ let sri = if stack_map_request_info.reftyped_vregs.len() > 0
+ && stack_map_request_info.safepoint_insns.len() > 0
+ {
+ Some(&stack_map_request_info)
+ } else {
+ None
+ };
+
+ let result = {
+ let _tt = timing::regalloc();
+ allocate_registers_with_opts(
+ &mut vcode,
+ b.reg_universe(),
+ sri,
+ Options {
+ run_checker,
+ algorithm,
+ },
+ )
+ .map_err(|err| {
+ debug!(
+ "Register allocation error for vcode\n{}\nError: {:?}",
+ vcode.show_rru(Some(b.reg_universe())),
+ err
+ );
+ err
+ })
+ .expect("register allocation")
+ };
+
+ // Reorder vcode into final order and copy out final instruction sequence
+ // all at once. This also inserts prologues/epilogues.
+ {
+ let _tt = timing::vcode_post_ra();
+ vcode.replace_insns_from_regalloc(result);
+ }
+
+ debug!(
+ "vcode after regalloc: final version:\n{}",
+ vcode.show_rru(Some(b.reg_universe()))
+ );
+
+ Ok(vcode)
+}
diff --git a/third_party/rust/cranelift-codegen/src/machinst/helpers.rs b/third_party/rust/cranelift-codegen/src/machinst/helpers.rs
new file mode 100644
index 0000000000..0138da7670
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/machinst/helpers.rs
@@ -0,0 +1,28 @@
+//! Miscellaneous helpers for machine backends.
+
+use super::{InsnOutput, LowerCtx, VCodeInst};
+use crate::ir::Type;
+use regalloc::{Reg, Writable};
+
+/// Returns the size (in bits) of a given type.
+pub fn ty_bits(ty: Type) -> usize {
+ usize::from(ty.bits())
+}
+
+/// Is the type represented by an integer (not float) at the machine level?
+pub(crate) fn ty_has_int_representation(ty: Type) -> bool {
+ ty.is_int() || ty.is_bool() || ty.is_ref()
+}
+
+/// Is the type represented by a float or vector value at the machine level?
+pub(crate) fn ty_has_float_or_vec_representation(ty: Type) -> bool {
+ ty.is_vector() || ty.is_float()
+}
+
+/// Allocate a register for an instruction output and return it.
+pub(crate) fn get_output_reg<I: VCodeInst, C: LowerCtx<I = I>>(
+ ctx: &mut C,
+ spec: InsnOutput,
+) -> Writable<Reg> {
+ ctx.get_output(spec.insn, spec.output)
+}
diff --git a/third_party/rust/cranelift-codegen/src/machinst/inst_common.rs b/third_party/rust/cranelift-codegen/src/machinst/inst_common.rs
new file mode 100644
index 0000000000..1ff6faedd4
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/machinst/inst_common.rs
@@ -0,0 +1,59 @@
+//! A place to park MachInst::Inst fragments which are common across multiple architectures.
+
+use crate::ir::{self, Inst as IRInst};
+
+//============================================================================
+// Instruction input "slots".
+//
+// We use these types to refer to operand numbers, and result numbers, together
+// with the associated instruction, in a type-safe way.
+
+/// Identifier for a particular input of an instruction.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub(crate) struct InsnInput {
+ pub(crate) insn: IRInst,
+ pub(crate) input: usize,
+}
+
+/// Identifier for a particular output of an instruction.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub(crate) struct InsnOutput {
+ pub(crate) insn: IRInst,
+ pub(crate) output: usize,
+}
+
+//============================================================================
+// Atomic instructions.
+
+/// Atomic memory update operations. As of 21 Aug 2020 these are used for the aarch64 and x64
+/// targets.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+#[repr(u8)]
+pub enum AtomicRmwOp {
+ /// Add
+ Add,
+ /// Sub
+ Sub,
+ /// And
+ And,
+ /// Or
+ Or,
+ /// Exclusive Or
+ Xor,
+ /// Exchange (swap operands)
+ Xchg,
+}
+
+impl AtomicRmwOp {
+ /// Converts an `ir::AtomicRmwOp` to the corresponding `inst_common::AtomicRmwOp`.
+ pub fn from(ir_op: ir::AtomicRmwOp) -> Self {
+ match ir_op {
+ ir::AtomicRmwOp::Add => AtomicRmwOp::Add,
+ ir::AtomicRmwOp::Sub => AtomicRmwOp::Sub,
+ ir::AtomicRmwOp::And => AtomicRmwOp::And,
+ ir::AtomicRmwOp::Or => AtomicRmwOp::Or,
+ ir::AtomicRmwOp::Xor => AtomicRmwOp::Xor,
+ ir::AtomicRmwOp::Xchg => AtomicRmwOp::Xchg,
+ }
+ }
+}
diff --git a/third_party/rust/cranelift-codegen/src/machinst/lower.rs b/third_party/rust/cranelift-codegen/src/machinst/lower.rs
new file mode 100644
index 0000000000..531085ae4e
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/machinst/lower.rs
@@ -0,0 +1,1084 @@
+//! This module implements lowering (instruction selection) from Cranelift IR
+//! to machine instructions with virtual registers. This is *almost* the final
+//! machine code, except for register allocation.
+
+use crate::entity::SecondaryMap;
+use crate::fx::{FxHashMap, FxHashSet};
+use crate::inst_predicates::{has_lowering_side_effect, is_constant_64bit};
+use crate::ir::instructions::BranchInfo;
+use crate::ir::types::I64;
+use crate::ir::{
+ ArgumentPurpose, Block, Constant, ConstantData, ExternalName, Function, GlobalValueData, Inst,
+ InstructionData, MemFlags, Opcode, Signature, SourceLoc, Type, Value, ValueDef,
+};
+use crate::machinst::{
+ ABICallee, BlockIndex, BlockLoweringOrder, LoweredBlock, MachLabel, VCode, VCodeBuilder,
+ VCodeConstant, VCodeConstantData, VCodeConstants, VCodeInst,
+};
+use crate::CodegenResult;
+
+use regalloc::{Reg, RegClass, StackmapRequestInfo, VirtualReg, Writable};
+
+use crate::data_value::DataValue;
+use alloc::boxed::Box;
+use alloc::vec::Vec;
+use core::convert::TryInto;
+use log::debug;
+use smallvec::SmallVec;
+
+/// An "instruction color" partitions CLIF instructions by side-effecting ops.
+/// All instructions with the same "color" are guaranteed not to be separated by
+/// any side-effecting op (for this purpose, loads are also considered
+/// side-effecting, to avoid subtle questions w.r.t. the memory model), and
+/// furthermore, it is guaranteed that for any two instructions A and B such
+/// that color(A) == color(B), either A dominates B and B postdominates A, or
+/// vice-versa. (For now, in practice, only ops in the same basic block can ever
+/// have the same color, trivially providing the second condition.) Intuitively,
+/// this means that the ops of the same color must always execute "together", as
+/// part of one atomic contiguous section of the dynamic execution trace, and
+/// they can be freely permuted (modulo true dataflow dependencies) without
+/// affecting program behavior.
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+pub struct InstColor(u32);
+impl InstColor {
+ fn new(n: u32) -> InstColor {
+ InstColor(n)
+ }
+
+ /// Get an arbitrary index representing this color. The index is unique
+ /// *within a single function compilation*, but indices may be reused across
+ /// functions.
+ pub fn get(self) -> u32 {
+ self.0
+ }
+}
+
+/// A context that machine-specific lowering code can use to emit lowered
+/// instructions. This is the view of the machine-independent per-function
+/// lowering context that is seen by the machine backend.
+pub trait LowerCtx {
+ /// The instruction type for which this lowering framework is instantiated.
+ type I: VCodeInst;
+
+ // Function-level queries:
+
+ /// Get the `ABICallee`.
+ fn abi(&mut self) -> &mut dyn ABICallee<I = Self::I>;
+ /// Get the (virtual) register that receives the return value. A return
+ /// instruction should lower into a sequence that fills this register. (Why
+ /// not allow the backend to specify its own result register for the return?
+ /// Because there may be multiple return points.)
+ fn retval(&self, idx: usize) -> Writable<Reg>;
+ /// Returns the vreg containing the VmContext parameter, if there's one.
+ fn get_vm_context(&self) -> Option<Reg>;
+
+ // General instruction queries:
+
+ /// Get the instdata for a given IR instruction.
+ fn data(&self, ir_inst: Inst) -> &InstructionData;
+ /// Get the controlling type for a polymorphic IR instruction.
+ fn ty(&self, ir_inst: Inst) -> Type;
+ /// Get the target for a call instruction, as an `ExternalName`. Returns a tuple
+ /// providing this name and the "relocation distance", i.e., whether the backend
+ /// can assume the target will be "nearby" (within some small offset) or an
+ /// arbitrary address. (This comes from the `colocated` bit in the CLIF.)
+ fn call_target<'b>(&'b self, ir_inst: Inst) -> Option<(&'b ExternalName, RelocDistance)>;
+ /// Get the signature for a call or call-indirect instruction.
+ fn call_sig<'b>(&'b self, ir_inst: Inst) -> Option<&'b Signature>;
+ /// Get the symbol name, relocation distance estimate, and offset for a
+ /// symbol_value instruction.
+ fn symbol_value<'b>(&'b self, ir_inst: Inst) -> Option<(&'b ExternalName, RelocDistance, i64)>;
+ /// Returns the memory flags of a given memory access.
+ fn memflags(&self, ir_inst: Inst) -> Option<MemFlags>;
+ /// Get the source location for a given instruction.
+ fn srcloc(&self, ir_inst: Inst) -> SourceLoc;
+ /// Get the side-effect color of the given instruction (specifically, at the
+ /// program point just prior to the instruction). The "color" changes at
+ /// every side-effecting op; the backend should not try to merge across
+ /// side-effect colors unless the op being merged is known to be pure.
+ fn inst_color(&self, ir_inst: Inst) -> InstColor;
+
+ // Instruction input/output queries:
+
+ /// Get the number of inputs to the given IR instruction.
+ fn num_inputs(&self, ir_inst: Inst) -> usize;
+ /// Get the number of outputs to the given IR instruction.
+ fn num_outputs(&self, ir_inst: Inst) -> usize;
+ /// Get the type for an instruction's input.
+ fn input_ty(&self, ir_inst: Inst, idx: usize) -> Type;
+ /// Get the type for an instruction's output.
+ fn output_ty(&self, ir_inst: Inst, idx: usize) -> Type;
+ /// Get the value of a constant instruction (`iconst`, etc.) as a 64-bit
+ /// value, if possible.
+ fn get_constant(&self, ir_inst: Inst) -> Option<u64>;
+ /// Get the input in any combination of three forms:
+ ///
+ /// - An instruction, if the same color as this instruction or if the
+ /// producing instruction has no side effects (thus in both cases
+ /// mergeable);
+ /// - A constant, if the value is a constant;
+ /// - A register.
+ ///
+ /// The instruction input may be available in some or all of these
+ /// forms. More than one is possible: e.g., it may be produced by an
+ /// instruction in the same block, but may also have been forced into a
+ /// register already by an earlier op. It will *always* be available
+ /// in a register, at least.
+ ///
+ /// If the backend uses the register, rather than one of the other
+ /// forms (constant or merging of the producing op), it must call
+ /// `use_input_reg()` to ensure the producing inst is actually lowered
+ /// as well. Failing to do so may result in the instruction that generates
+ /// this value never being generated, thus resulting in incorrect execution.
+ /// For correctness, backends should thus wrap `get_input()` and
+ /// `use_input_regs()` with helpers that return a register only after
+ /// ensuring it is marked as used.
+ fn get_input(&self, ir_inst: Inst, idx: usize) -> LowerInput;
+ /// Get the `idx`th output register of the given IR instruction. When
+ /// `backend.lower_inst_to_regs(ctx, inst)` is called, it is expected that
+ /// the backend will write results to these output register(s). This
+ /// register will always be "fresh"; it is guaranteed not to overlap with
+ /// any of the inputs, and can be freely used as a scratch register within
+ /// the lowered instruction sequence, as long as its final value is the
+ /// result of the computation.
+ fn get_output(&self, ir_inst: Inst, idx: usize) -> Writable<Reg>;
+
+ // Codegen primitives: allocate temps, emit instructions, set result registers,
+ // ask for an input to be gen'd into a register.
+
+ /// Get a new temp.
+ fn alloc_tmp(&mut self, rc: RegClass, ty: Type) -> Writable<Reg>;
+ /// Emit a machine instruction.
+ fn emit(&mut self, mach_inst: Self::I);
+ /// Emit a machine instruction that is a safepoint.
+ fn emit_safepoint(&mut self, mach_inst: Self::I);
+ /// Indicate that the given input uses the register returned by
+ /// `get_input()`. Codegen may not happen otherwise for the producing
+ /// instruction if it has no side effects and no uses.
+ fn use_input_reg(&mut self, input: LowerInput);
+ /// Is the given register output needed after the given instruction? Allows
+ /// instructions with multiple outputs to make fine-grained decisions on
+ /// which outputs to actually generate.
+ fn is_reg_needed(&self, ir_inst: Inst, reg: Reg) -> bool;
+ /// Retrieve constant data given a handle.
+ fn get_constant_data(&self, constant_handle: Constant) -> &ConstantData;
+ /// Indicate that a constant should be emitted.
+ fn use_constant(&mut self, constant: VCodeConstantData) -> VCodeConstant;
+ /// Retrieve the value immediate from an instruction. This will perform necessary lookups on the
+ /// `DataFlowGraph` to retrieve even large immediates.
+ fn get_immediate(&self, ir_inst: Inst) -> Option<DataValue>;
+ /// Cause the value in `reg` to be in a virtual reg, by copying it into a new virtual reg
+ /// if `reg` is a real reg. `ty` describes the type of the value in `reg`.
+ fn ensure_in_vreg(&mut self, reg: Reg, ty: Type) -> Reg;
+}
+
+/// A representation of all of the ways in which an instruction input is
+/// available: as a producing instruction (in the same color-partition), as a
+/// constant, and/or in an existing register. See [LowerCtx::get_input] for more
+/// details.
+#[derive(Clone, Copy, Debug)]
+pub struct LowerInput {
+ /// The value is live in a register. This option is always available. Call
+ /// [LowerCtx::use_input_reg()] if the register is used.
+ pub reg: Reg,
+ /// An instruction produces this value; the instruction's result index that
+ /// produces this value is given.
+ pub inst: Option<(Inst, usize)>,
+ /// The value is a known constant.
+ pub constant: Option<u64>,
+}
+
+/// A machine backend.
+pub trait LowerBackend {
+ /// The machine instruction type.
+ type MInst: VCodeInst;
+
+ /// Lower a single instruction.
+ ///
+ /// For a branch, this function should not generate the actual branch
+ /// instruction. However, it must force any values it needs for the branch
+ /// edge (block-param actuals) into registers, because the actual branch
+ /// generation (`lower_branch_group()`) happens *after* any possible merged
+ /// out-edge.
+ fn lower<C: LowerCtx<I = Self::MInst>>(&self, ctx: &mut C, inst: Inst) -> CodegenResult<()>;
+
+ /// Lower a block-terminating group of branches (which together can be seen
+ /// as one N-way branch), given a vcode MachLabel for each target.
+ fn lower_branch_group<C: LowerCtx<I = Self::MInst>>(
+ &self,
+ ctx: &mut C,
+ insts: &[Inst],
+ targets: &[MachLabel],
+ fallthrough: Option<MachLabel>,
+ ) -> CodegenResult<()>;
+
+ /// A bit of a hack: give a fixed register that always holds the result of a
+ /// `get_pinned_reg` instruction, if known. This allows elision of moves
+ /// into the associated vreg, instead using the real reg directly.
+ fn maybe_pinned_reg(&self) -> Option<Reg> {
+ None
+ }
+}
+
+/// A pending instruction to insert and auxiliary information about it: its source location and
+/// whether it is a safepoint.
+struct InstTuple<I: VCodeInst> {
+ loc: SourceLoc,
+ is_safepoint: bool,
+ inst: I,
+}
+
+/// Machine-independent lowering driver / machine-instruction container. Maintains a correspondence
+/// from original Inst to MachInsts.
+pub struct Lower<'func, I: VCodeInst> {
+ /// The function to lower.
+ f: &'func Function,
+
+ /// Lowered machine instructions.
+ vcode: VCodeBuilder<I>,
+
+ /// Mapping from `Value` (SSA value in IR) to virtual register.
+ value_regs: SecondaryMap<Value, Reg>,
+
+ /// Return-value vregs.
+ retval_regs: Vec<Reg>,
+
+ /// Instruction colors.
+ inst_colors: SecondaryMap<Inst, InstColor>,
+
+ /// Instruction constant values, if known.
+ inst_constants: FxHashMap<Inst, u64>,
+
+ /// Instruction has a side-effect and must be codegen'd.
+ inst_needed: SecondaryMap<Inst, bool>,
+
+ /// Value (vreg) is needed and producer must be codegen'd.
+ vreg_needed: Vec<bool>,
+
+ /// Next virtual register number to allocate.
+ next_vreg: u32,
+
+ /// Insts in reverse block order, before final copy to vcode.
+ block_insts: Vec<InstTuple<I>>,
+
+ /// Ranges in `block_insts` constituting BBs.
+ block_ranges: Vec<(usize, usize)>,
+
+ /// Instructions collected for the BB in progress, in reverse order, with
+ /// source-locs attached.
+ bb_insts: Vec<InstTuple<I>>,
+
+ /// Instructions collected for the CLIF inst in progress, in forward order.
+ ir_insts: Vec<InstTuple<I>>,
+
+ /// The register to use for GetPinnedReg, if any, on this architecture.
+ pinned_reg: Option<Reg>,
+
+ /// The vreg containing the special VmContext parameter, if it is present in the current
+ /// function's signature.
+ vm_context: Option<Reg>,
+}
+
+/// Notion of "relocation distance". This gives an estimate of how far away a symbol will be from a
+/// reference.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum RelocDistance {
+ /// Target of relocation is "nearby". The threshold for this is fuzzy but should be interpreted
+ /// as approximately "within the compiled output of one module"; e.g., within AArch64's +/-
+ /// 128MB offset. If unsure, use `Far` instead.
+ Near,
+ /// Target of relocation could be anywhere in the address space.
+ Far,
+}
+
+fn alloc_vreg(
+ value_regs: &mut SecondaryMap<Value, Reg>,
+ regclass: RegClass,
+ value: Value,
+ next_vreg: &mut u32,
+) -> VirtualReg {
+ if value_regs[value].is_invalid() {
+ // default value in map.
+ let v = *next_vreg;
+ *next_vreg += 1;
+ value_regs[value] = Reg::new_virtual(regclass, v);
+ debug!("value {} gets vreg {:?}", value, v);
+ }
+ value_regs[value].as_virtual_reg().unwrap()
+}
+
+enum GenerateReturn {
+ Yes,
+ No,
+}
+
+impl<'func, I: VCodeInst> Lower<'func, I> {
+ /// Prepare a new lowering context for the given IR function.
+ pub fn new(
+ f: &'func Function,
+ abi: Box<dyn ABICallee<I = I>>,
+ emit_info: I::Info,
+ block_order: BlockLoweringOrder,
+ ) -> CodegenResult<Lower<'func, I>> {
+ let constants = VCodeConstants::with_capacity(f.dfg.constants.len());
+ let mut vcode = VCodeBuilder::new(abi, emit_info, block_order, constants);
+
+ let mut next_vreg: u32 = 0;
+
+ let mut value_regs = SecondaryMap::with_default(Reg::invalid());
+
+ // Assign a vreg to each block param and each inst result.
+ for bb in f.layout.blocks() {
+ for &param in f.dfg.block_params(bb) {
+ let ty = f.dfg.value_type(param);
+ let vreg = alloc_vreg(&mut value_regs, I::rc_for_type(ty)?, param, &mut next_vreg);
+ vcode.set_vreg_type(vreg, ty);
+ debug!("bb {} param {}: vreg {:?}", bb, param, vreg);
+ }
+ for inst in f.layout.block_insts(bb) {
+ for &result in f.dfg.inst_results(inst) {
+ let ty = f.dfg.value_type(result);
+ let vreg =
+ alloc_vreg(&mut value_regs, I::rc_for_type(ty)?, result, &mut next_vreg);
+ vcode.set_vreg_type(vreg, ty);
+ debug!(
+ "bb {} inst {} ({:?}): result vreg {:?}",
+ bb, inst, f.dfg[inst], vreg
+ );
+ }
+ }
+ }
+
+ let vm_context = f
+ .signature
+ .special_param_index(ArgumentPurpose::VMContext)
+ .map(|vm_context_index| {
+ let entry_block = f.layout.entry_block().unwrap();
+ let param = f.dfg.block_params(entry_block)[vm_context_index];
+ value_regs[param]
+ });
+
+ // Assign a vreg to each return value.
+ let mut retval_regs = vec![];
+ for ret in &f.signature.returns {
+ let v = next_vreg;
+ next_vreg += 1;
+ let regclass = I::rc_for_type(ret.value_type)?;
+ let vreg = Reg::new_virtual(regclass, v);
+ retval_regs.push(vreg);
+ vcode.set_vreg_type(vreg.as_virtual_reg().unwrap(), ret.value_type);
+ }
+
+ // Compute instruction colors, find constant instructions, and find instructions with
+ // side-effects, in one combined pass.
+ let mut cur_color = 0;
+ let mut inst_colors = SecondaryMap::with_default(InstColor::new(0));
+ let mut inst_constants = FxHashMap::default();
+ let mut inst_needed = SecondaryMap::with_default(false);
+ for bb in f.layout.blocks() {
+ cur_color += 1;
+ for inst in f.layout.block_insts(bb) {
+ let side_effect = has_lowering_side_effect(f, inst);
+
+ // Assign colors. A new color is chosen *after* any side-effecting instruction.
+ inst_colors[inst] = InstColor::new(cur_color);
+ debug!("bb {} inst {} has color {}", bb, inst, cur_color);
+ if side_effect {
+ debug!(" -> side-effecting");
+ inst_needed[inst] = true;
+ cur_color += 1;
+ }
+
+ // Determine if this is a constant; if so, add to the table.
+ if let Some(c) = is_constant_64bit(f, inst) {
+ debug!(" -> constant: {}", c);
+ inst_constants.insert(inst, c);
+ }
+ }
+ }
+
+ let vreg_needed = std::iter::repeat(false).take(next_vreg as usize).collect();
+
+ Ok(Lower {
+ f,
+ vcode,
+ value_regs,
+ retval_regs,
+ inst_colors,
+ inst_constants,
+ inst_needed,
+ vreg_needed,
+ next_vreg,
+ block_insts: vec![],
+ block_ranges: vec![],
+ bb_insts: vec![],
+ ir_insts: vec![],
+ pinned_reg: None,
+ vm_context,
+ })
+ }
+
+ fn gen_arg_setup(&mut self) {
+ if let Some(entry_bb) = self.f.layout.entry_block() {
+ debug!(
+ "gen_arg_setup: entry BB {} args are:\n{:?}",
+ entry_bb,
+ self.f.dfg.block_params(entry_bb)
+ );
+ for (i, param) in self.f.dfg.block_params(entry_bb).iter().enumerate() {
+ if !self.vcode.abi().arg_is_needed_in_body(i) {
+ continue;
+ }
+ let reg = Writable::from_reg(self.value_regs[*param]);
+ let insn = self.vcode.abi().gen_copy_arg_to_reg(i, reg);
+ self.emit(insn);
+ }
+ if let Some(insn) = self.vcode.abi().gen_retval_area_setup() {
+ self.emit(insn);
+ }
+ }
+ }
+
+ fn gen_retval_setup(&mut self, gen_ret_inst: GenerateReturn) {
+ let retval_regs = self.retval_regs.clone();
+ for (i, reg) in retval_regs.into_iter().enumerate() {
+ let reg = Writable::from_reg(reg);
+ let insns = self.vcode.abi().gen_copy_reg_to_retval(i, reg);
+ for insn in insns {
+ self.emit(insn);
+ }
+ }
+ let inst = match gen_ret_inst {
+ GenerateReturn::Yes => self.vcode.abi().gen_ret(),
+ GenerateReturn::No => self.vcode.abi().gen_epilogue_placeholder(),
+ };
+ self.emit(inst);
+ }
+
+ fn lower_edge(&mut self, pred: Block, inst: Inst, succ: Block) -> CodegenResult<()> {
+ debug!("lower_edge: pred {} succ {}", pred, succ);
+
+ let num_args = self.f.dfg.block_params(succ).len();
+ debug_assert!(num_args == self.f.dfg.inst_variable_args(inst).len());
+
+ // Most blocks have no params, so skip all the hoop-jumping below and make an early exit.
+ if num_args == 0 {
+ return Ok(());
+ }
+
+ // Make up two vectors of info:
+ //
+ // * one for dsts which are to be assigned constants. We'll deal with those second, so
+ // as to minimise live ranges.
+ //
+ // * one for dsts whose sources are non-constants.
+
+ let mut const_bundles = SmallVec::<[(Type, Writable<Reg>, u64); 16]>::new();
+ let mut var_bundles = SmallVec::<[(Type, Writable<Reg>, Reg); 16]>::new();
+
+ let mut i = 0;
+ for (dst_val, src_val) in self
+ .f
+ .dfg
+ .block_params(succ)
+ .iter()
+ .zip(self.f.dfg.inst_variable_args(inst).iter())
+ {
+ let src_val = self.f.dfg.resolve_aliases(*src_val);
+ let ty = self.f.dfg.value_type(src_val);
+
+ debug_assert!(ty == self.f.dfg.value_type(*dst_val));
+ let dst_reg = self.value_regs[*dst_val];
+
+ let input = self.get_input_for_val(inst, src_val);
+ debug!("jump arg {} is {}, reg {:?}", i, src_val, input.reg);
+ i += 1;
+
+ if let Some(c) = input.constant {
+ const_bundles.push((ty, Writable::from_reg(dst_reg), c));
+ } else {
+ self.use_input_reg(input);
+ let src_reg = input.reg;
+ // Skip self-assignments. Not only are they pointless, they falsely trigger the
+ // overlap-check below and hence can cause a lot of unnecessary copying through
+ // temporaries.
+ if dst_reg != src_reg {
+ var_bundles.push((ty, Writable::from_reg(dst_reg), src_reg));
+ }
+ }
+ }
+
+ // Deal first with the moves whose sources are variables.
+
+ // FIXME: use regalloc.rs' SparseSetU here. This would avoid all heap allocation
+ // for cases of up to circa 16 args. Currently not possible because regalloc.rs
+ // does not export it.
+ let mut src_reg_set = FxHashSet::<Reg>::default();
+ for (_, _, src_reg) in &var_bundles {
+ src_reg_set.insert(*src_reg);
+ }
+ let mut overlaps = false;
+ for (_, dst_reg, _) in &var_bundles {
+ if src_reg_set.contains(&dst_reg.to_reg()) {
+ overlaps = true;
+ break;
+ }
+ }
+
+ // If, as is mostly the case, the source and destination register sets are non
+ // overlapping, then we can copy directly, so as to save the register allocator work.
+ if !overlaps {
+ for (ty, dst_reg, src_reg) in &var_bundles {
+ self.emit(I::gen_move(*dst_reg, *src_reg, *ty));
+ }
+ } else {
+ // There's some overlap, so play safe and copy via temps.
+ let mut tmp_regs = SmallVec::<[Writable<Reg>; 16]>::new();
+ for (ty, _, _) in &var_bundles {
+ tmp_regs.push(self.alloc_tmp(I::rc_for_type(*ty)?, *ty));
+ }
+ for ((ty, _, src_reg), tmp_reg) in var_bundles.iter().zip(tmp_regs.iter()) {
+ self.emit(I::gen_move(*tmp_reg, *src_reg, *ty));
+ }
+ for ((ty, dst_reg, _), tmp_reg) in var_bundles.iter().zip(tmp_regs.iter()) {
+ self.emit(I::gen_move(*dst_reg, (*tmp_reg).to_reg(), *ty));
+ }
+ }
+
+ // Now, finally, deal with the moves whose sources are constants.
+ for (ty, dst_reg, const_u64) in &const_bundles {
+ for inst in I::gen_constant(*dst_reg, *const_u64, *ty, |reg_class, ty| {
+ self.alloc_tmp(reg_class, ty)
+ })
+ .into_iter()
+ {
+ self.emit(inst);
+ }
+ }
+
+ Ok(())
+ }
+
+ fn lower_clif_block<B: LowerBackend<MInst = I>>(
+ &mut self,
+ backend: &B,
+ block: Block,
+ ) -> CodegenResult<()> {
+ // Lowering loop:
+ // - For each non-branch instruction, in reverse order:
+ // - If side-effecting (load, store, branch/call/return, possible trap), or if
+ // used outside of this block, or if demanded by another inst, then lower.
+ //
+ // That's it! Lowering of side-effecting ops will force all *needed*
+ // (live) non-side-effecting ops to be lowered at the right places, via
+ // the `use_input_reg()` callback on the `LowerCtx` (that's us). That's
+ // because `use_input_reg()` sets the eager/demand bit for any insts
+ // whose result registers are used.
+ //
+ // We build up the BB in reverse instruction order in `bb_insts`.
+ // Because the machine backend calls `ctx.emit()` in forward order, we
+ // collect per-IR-inst lowered instructions in `ir_insts`, then reverse
+ // these and append to `bb_insts` as we go backward through the block.
+ // `bb_insts` are then reversed again and appended to the VCode at the
+ // end of the BB (in the toplevel driver `lower()`).
+ for inst in self.f.layout.block_insts(block).rev() {
+ let data = &self.f.dfg[inst];
+ let value_needed = self
+ .f
+ .dfg
+ .inst_results(inst)
+ .iter()
+ .any(|&result| self.vreg_needed[self.value_regs[result].get_index()]);
+ debug!(
+ "lower_clif_block: block {} inst {} ({:?}) is_branch {} inst_needed {} value_needed {}",
+ block,
+ inst,
+ data,
+ data.opcode().is_branch(),
+ self.inst_needed[inst],
+ value_needed,
+ );
+ if self.f.dfg[inst].opcode().is_branch() {
+ continue;
+ }
+ // Normal instruction: codegen if eager bit is set. (Other instructions may also be
+ // codegened if not eager when they are used by another instruction.)
+ if self.inst_needed[inst] || value_needed {
+ debug!("lowering: inst {}: {:?}", inst, self.f.dfg[inst]);
+ backend.lower(self, inst)?;
+ }
+ if data.opcode().is_return() {
+ // Return: handle specially, using ABI-appropriate sequence.
+ let gen_ret = if data.opcode() == Opcode::Return {
+ GenerateReturn::Yes
+ } else {
+ debug_assert!(data.opcode() == Opcode::FallthroughReturn);
+ GenerateReturn::No
+ };
+ self.gen_retval_setup(gen_ret);
+ }
+
+ let loc = self.srcloc(inst);
+ self.finish_ir_inst(loc);
+ }
+ Ok(())
+ }
+
+ fn finish_ir_inst(&mut self, loc: SourceLoc) {
+ // `bb_insts` is kept in reverse order, so emit the instructions in
+ // reverse order.
+ for mut tuple in self.ir_insts.drain(..).rev() {
+ tuple.loc = loc;
+ self.bb_insts.push(tuple);
+ }
+ }
+
+ fn finish_bb(&mut self) {
+ let start = self.block_insts.len();
+ for tuple in self.bb_insts.drain(..).rev() {
+ self.block_insts.push(tuple);
+ }
+ let end = self.block_insts.len();
+ self.block_ranges.push((start, end));
+ }
+
+ fn copy_bbs_to_vcode(&mut self) {
+ for &(start, end) in self.block_ranges.iter().rev() {
+ for &InstTuple {
+ loc,
+ is_safepoint,
+ ref inst,
+ } in &self.block_insts[start..end]
+ {
+ self.vcode.set_srcloc(loc);
+ self.vcode.push(inst.clone(), is_safepoint);
+ }
+ self.vcode.end_bb();
+ }
+ }
+
+ fn lower_clif_branches<B: LowerBackend<MInst = I>>(
+ &mut self,
+ backend: &B,
+ block: Block,
+ branches: &SmallVec<[Inst; 2]>,
+ targets: &SmallVec<[MachLabel; 2]>,
+ maybe_fallthrough: Option<MachLabel>,
+ ) -> CodegenResult<()> {
+ debug!(
+ "lower_clif_branches: block {} branches {:?} targets {:?} maybe_fallthrough {:?}",
+ block, branches, targets, maybe_fallthrough
+ );
+ backend.lower_branch_group(self, branches, targets, maybe_fallthrough)?;
+ let loc = self.srcloc(branches[0]);
+ self.finish_ir_inst(loc);
+ Ok(())
+ }
+
+ fn collect_branches_and_targets(
+ &self,
+ bindex: BlockIndex,
+ _bb: Block,
+ branches: &mut SmallVec<[Inst; 2]>,
+ targets: &mut SmallVec<[MachLabel; 2]>,
+ ) {
+ branches.clear();
+ targets.clear();
+ let mut last_inst = None;
+ for &(inst, succ) in self.vcode.block_order().succ_indices(bindex) {
+ // Avoid duplicates: this ensures a br_table is only inserted once.
+ if last_inst != Some(inst) {
+ branches.push(inst);
+ } else {
+ debug_assert!(self.f.dfg[inst].opcode() == Opcode::BrTable);
+ debug_assert!(branches.len() == 1);
+ }
+ last_inst = Some(inst);
+ targets.push(MachLabel::from_block(succ));
+ }
+ }
+
+ /// Lower the function.
+ pub fn lower<B: LowerBackend<MInst = I>>(
+ mut self,
+ backend: &B,
+ ) -> CodegenResult<(VCode<I>, StackmapRequestInfo)> {
+ debug!("about to lower function: {:?}", self.f);
+
+ // Initialize the ABI object, giving it a temp if requested.
+ let maybe_tmp = if self.vcode.abi().temp_needed() {
+ Some(self.alloc_tmp(RegClass::I64, I64))
+ } else {
+ None
+ };
+ self.vcode.abi().init(maybe_tmp);
+
+ // Get the pinned reg here (we only parameterize this function on `B`,
+ // not the whole `Lower` impl).
+ self.pinned_reg = backend.maybe_pinned_reg();
+
+ self.vcode.set_entry(0);
+
+ // Reused vectors for branch lowering.
+ let mut branches: SmallVec<[Inst; 2]> = SmallVec::new();
+ let mut targets: SmallVec<[MachLabel; 2]> = SmallVec::new();
+
+ // get a copy of the lowered order; we hold this separately because we
+ // need a mut ref to the vcode to mutate it below.
+ let lowered_order: SmallVec<[LoweredBlock; 64]> = self
+ .vcode
+ .block_order()
+ .lowered_order()
+ .iter()
+ .cloned()
+ .collect();
+
+ // Main lowering loop over lowered blocks.
+ for (bindex, lb) in lowered_order.iter().enumerate().rev() {
+ let bindex = bindex as BlockIndex;
+
+ // Lower the block body in reverse order (see comment in
+ // `lower_clif_block()` for rationale).
+
+ // End branches.
+ if let Some(bb) = lb.orig_block() {
+ self.collect_branches_and_targets(bindex, bb, &mut branches, &mut targets);
+ if branches.len() > 0 {
+ let maybe_fallthrough = if (bindex + 1) < (lowered_order.len() as BlockIndex) {
+ Some(MachLabel::from_block(bindex + 1))
+ } else {
+ None
+ };
+ self.lower_clif_branches(backend, bb, &branches, &targets, maybe_fallthrough)?;
+ self.finish_ir_inst(self.srcloc(branches[0]));
+ }
+ } else {
+ // If no orig block, this must be a pure edge block; get the successor and
+ // emit a jump.
+ let (_, succ) = self.vcode.block_order().succ_indices(bindex)[0];
+ self.emit(I::gen_jump(MachLabel::from_block(succ)));
+ self.finish_ir_inst(SourceLoc::default());
+ }
+
+ // Out-edge phi moves.
+ if let Some((pred, inst, succ)) = lb.out_edge() {
+ self.lower_edge(pred, inst, succ)?;
+ self.finish_ir_inst(SourceLoc::default());
+ }
+ // Original block body.
+ if let Some(bb) = lb.orig_block() {
+ self.lower_clif_block(backend, bb)?;
+ }
+ // In-edge phi moves.
+ if let Some((pred, inst, succ)) = lb.in_edge() {
+ self.lower_edge(pred, inst, succ)?;
+ self.finish_ir_inst(SourceLoc::default());
+ }
+
+ if bindex == 0 {
+ // Set up the function with arg vreg inits.
+ self.gen_arg_setup();
+ self.finish_ir_inst(SourceLoc::default());
+ }
+
+ self.finish_bb();
+ }
+
+ self.copy_bbs_to_vcode();
+
+ // Now that we've emitted all instructions into the VCodeBuilder, let's build the VCode.
+ let (vcode, stack_map_info) = self.vcode.build();
+ debug!("built vcode: {:?}", vcode);
+
+ Ok((vcode, stack_map_info))
+ }
+
+ /// Get the actual inputs for a value. This is the implementation for
+ /// `get_input()` but starting from the SSA value, which is not exposed to
+ /// the backend.
+ fn get_input_for_val(&self, at_inst: Inst, val: Value) -> LowerInput {
+ debug!("get_input_for_val: val {} at inst {}", val, at_inst);
+ let mut reg = self.value_regs[val];
+ debug!(" -> reg {:?}", reg);
+ assert!(reg.is_valid());
+ let mut inst = match self.f.dfg.value_def(val) {
+ // OK to merge source instruction if (i) we have a source
+ // instruction, and either (ii-a) it has no side effects, or (ii-b)
+ // it has the same color as this instruction.
+ ValueDef::Result(src_inst, result_idx) => {
+ debug!(" -> src inst {}", src_inst);
+ debug!(
+ " -> has lowering side effect: {}",
+ has_lowering_side_effect(self.f, src_inst)
+ );
+ debug!(
+ " -> our color is {:?}, src inst is {:?}",
+ self.inst_color(at_inst),
+ self.inst_color(src_inst)
+ );
+ if !has_lowering_side_effect(self.f, src_inst)
+ || self.inst_color(at_inst) == self.inst_color(src_inst)
+ {
+ Some((src_inst, result_idx))
+ } else {
+ None
+ }
+ }
+ _ => None,
+ };
+ let constant = inst.and_then(|(inst, _)| self.get_constant(inst));
+
+ // Pinned-reg hack: if backend specifies a fixed pinned register, use it
+ // directly when we encounter a GetPinnedReg op, rather than lowering
+ // the actual op, and do not return the source inst to the caller; the
+ // value comes "out of the ether" and we will not force generation of
+ // the superfluous move.
+ if let Some((i, _)) = inst {
+ if self.f.dfg[i].opcode() == Opcode::GetPinnedReg {
+ if let Some(pr) = self.pinned_reg {
+ reg = pr;
+ }
+ inst = None;
+ }
+ }
+
+ LowerInput {
+ reg,
+ inst,
+ constant,
+ }
+ }
+}
+
+impl<'func, I: VCodeInst> LowerCtx for Lower<'func, I> {
+ type I = I;
+
+ fn abi(&mut self) -> &mut dyn ABICallee<I = I> {
+ self.vcode.abi()
+ }
+
+ fn retval(&self, idx: usize) -> Writable<Reg> {
+ Writable::from_reg(self.retval_regs[idx])
+ }
+
+ fn get_vm_context(&self) -> Option<Reg> {
+ self.vm_context
+ }
+
+ fn data(&self, ir_inst: Inst) -> &InstructionData {
+ &self.f.dfg[ir_inst]
+ }
+
+ fn ty(&self, ir_inst: Inst) -> Type {
+ self.f.dfg.ctrl_typevar(ir_inst)
+ }
+
+ fn call_target<'b>(&'b self, ir_inst: Inst) -> Option<(&'b ExternalName, RelocDistance)> {
+ match &self.f.dfg[ir_inst] {
+ &InstructionData::Call { func_ref, .. }
+ | &InstructionData::FuncAddr { func_ref, .. } => {
+ let funcdata = &self.f.dfg.ext_funcs[func_ref];
+ let dist = funcdata.reloc_distance();
+ Some((&funcdata.name, dist))
+ }
+ _ => None,
+ }
+ }
+
+ fn call_sig<'b>(&'b self, ir_inst: Inst) -> Option<&'b Signature> {
+ match &self.f.dfg[ir_inst] {
+ &InstructionData::Call { func_ref, .. } => {
+ let funcdata = &self.f.dfg.ext_funcs[func_ref];
+ Some(&self.f.dfg.signatures[funcdata.signature])
+ }
+ &InstructionData::CallIndirect { sig_ref, .. } => Some(&self.f.dfg.signatures[sig_ref]),
+ _ => None,
+ }
+ }
+
+ fn symbol_value<'b>(&'b self, ir_inst: Inst) -> Option<(&'b ExternalName, RelocDistance, i64)> {
+ match &self.f.dfg[ir_inst] {
+ &InstructionData::UnaryGlobalValue { global_value, .. } => {
+ let gvdata = &self.f.global_values[global_value];
+ match gvdata {
+ &GlobalValueData::Symbol {
+ ref name,
+ ref offset,
+ ..
+ } => {
+ let offset = offset.bits();
+ let dist = gvdata.maybe_reloc_distance().unwrap();
+ Some((name, dist, offset))
+ }
+ _ => None,
+ }
+ }
+ _ => None,
+ }
+ }
+
+ fn memflags(&self, ir_inst: Inst) -> Option<MemFlags> {
+ match &self.f.dfg[ir_inst] {
+ &InstructionData::AtomicCas { flags, .. } => Some(flags),
+ &InstructionData::AtomicRmw { flags, .. } => Some(flags),
+ &InstructionData::Load { flags, .. }
+ | &InstructionData::LoadComplex { flags, .. }
+ | &InstructionData::LoadNoOffset { flags, .. }
+ | &InstructionData::Store { flags, .. }
+ | &InstructionData::StoreComplex { flags, .. } => Some(flags),
+ &InstructionData::StoreNoOffset { flags, .. } => Some(flags),
+ _ => None,
+ }
+ }
+
+ fn srcloc(&self, ir_inst: Inst) -> SourceLoc {
+ self.f.srclocs[ir_inst]
+ }
+
+ fn inst_color(&self, ir_inst: Inst) -> InstColor {
+ self.inst_colors[ir_inst]
+ }
+
+ fn num_inputs(&self, ir_inst: Inst) -> usize {
+ self.f.dfg.inst_args(ir_inst).len()
+ }
+
+ fn num_outputs(&self, ir_inst: Inst) -> usize {
+ self.f.dfg.inst_results(ir_inst).len()
+ }
+
+ fn input_ty(&self, ir_inst: Inst, idx: usize) -> Type {
+ let val = self.f.dfg.inst_args(ir_inst)[idx];
+ let val = self.f.dfg.resolve_aliases(val);
+ self.f.dfg.value_type(val)
+ }
+
+ fn output_ty(&self, ir_inst: Inst, idx: usize) -> Type {
+ self.f.dfg.value_type(self.f.dfg.inst_results(ir_inst)[idx])
+ }
+
+ fn get_constant(&self, ir_inst: Inst) -> Option<u64> {
+ self.inst_constants.get(&ir_inst).cloned()
+ }
+
+ fn get_input(&self, ir_inst: Inst, idx: usize) -> LowerInput {
+ let val = self.f.dfg.inst_args(ir_inst)[idx];
+ let val = self.f.dfg.resolve_aliases(val);
+ self.get_input_for_val(ir_inst, val)
+ }
+
+ fn get_output(&self, ir_inst: Inst, idx: usize) -> Writable<Reg> {
+ let val = self.f.dfg.inst_results(ir_inst)[idx];
+ Writable::from_reg(self.value_regs[val])
+ }
+
+ fn alloc_tmp(&mut self, rc: RegClass, ty: Type) -> Writable<Reg> {
+ let v = self.next_vreg;
+ self.next_vreg += 1;
+ let vreg = Reg::new_virtual(rc, v);
+ self.vcode.set_vreg_type(vreg.as_virtual_reg().unwrap(), ty);
+ Writable::from_reg(vreg)
+ }
+
+ fn emit(&mut self, mach_inst: I) {
+ self.ir_insts.push(InstTuple {
+ loc: SourceLoc::default(),
+ is_safepoint: false,
+ inst: mach_inst,
+ });
+ }
+
+ fn emit_safepoint(&mut self, mach_inst: I) {
+ self.ir_insts.push(InstTuple {
+ loc: SourceLoc::default(),
+ is_safepoint: true,
+ inst: mach_inst,
+ });
+ }
+
+ fn use_input_reg(&mut self, input: LowerInput) {
+ debug!("use_input_reg: vreg {:?} is needed", input.reg);
+ // We may directly return a real (machine) register when we know that register holds the
+ // result of an opcode (e.g. GetPinnedReg).
+ if input.reg.is_virtual() {
+ self.vreg_needed[input.reg.get_index()] = true;
+ }
+ }
+
+ fn is_reg_needed(&self, ir_inst: Inst, reg: Reg) -> bool {
+ self.inst_needed[ir_inst] || self.vreg_needed[reg.get_index()]
+ }
+
+ fn get_constant_data(&self, constant_handle: Constant) -> &ConstantData {
+ self.f.dfg.constants.get(constant_handle)
+ }
+
+ fn use_constant(&mut self, constant: VCodeConstantData) -> VCodeConstant {
+ self.vcode.constants().insert(constant)
+ }
+
+ fn get_immediate(&self, ir_inst: Inst) -> Option<DataValue> {
+ let inst_data = self.data(ir_inst);
+ match inst_data {
+ InstructionData::Shuffle { mask, .. } => {
+ let buffer = self.f.dfg.immediates.get(mask.clone()).unwrap().as_slice();
+ let value = DataValue::V128(buffer.try_into().expect("a 16-byte data buffer"));
+ Some(value)
+ }
+ InstructionData::UnaryConst {
+ constant_handle, ..
+ } => {
+ let buffer = self.f.dfg.constants.get(constant_handle.clone()).as_slice();
+ let value = DataValue::V128(buffer.try_into().expect("a 16-byte data buffer"));
+ Some(value)
+ }
+ _ => inst_data.imm_value(),
+ }
+ }
+
+ fn ensure_in_vreg(&mut self, reg: Reg, ty: Type) -> Reg {
+ if reg.is_virtual() {
+ reg
+ } else {
+ let rc = reg.get_class();
+ let new_reg = self.alloc_tmp(rc, ty);
+ self.emit(I::gen_move(new_reg, reg, ty));
+ new_reg.to_reg()
+ }
+ }
+}
+
+/// Visit all successors of a block with a given visitor closure.
+pub(crate) fn visit_block_succs<F: FnMut(Inst, Block)>(f: &Function, block: Block, mut visit: F) {
+ for inst in f.layout.block_likely_branches(block) {
+ if f.dfg[inst].opcode().is_branch() {
+ visit_branch_targets(f, block, inst, &mut visit);
+ }
+ }
+}
+
+fn visit_branch_targets<F: FnMut(Inst, Block)>(
+ f: &Function,
+ block: Block,
+ inst: Inst,
+ visit: &mut F,
+) {
+ if f.dfg[inst].opcode() == Opcode::Fallthrough {
+ visit(inst, f.layout.next_block(block).unwrap());
+ } else {
+ match f.dfg[inst].analyze_branch(&f.dfg.value_lists) {
+ BranchInfo::NotABranch => {}
+ BranchInfo::SingleDest(dest, _) => {
+ visit(inst, dest);
+ }
+ BranchInfo::Table(table, maybe_dest) => {
+ if let Some(dest) = maybe_dest {
+ visit(inst, dest);
+ }
+ for &dest in f.jump_tables[table].as_slice() {
+ visit(inst, dest);
+ }
+ }
+ }
+ }
+}
diff --git a/third_party/rust/cranelift-codegen/src/machinst/mod.rs b/third_party/rust/cranelift-codegen/src/machinst/mod.rs
new file mode 100644
index 0000000000..4b12f2fd1d
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/machinst/mod.rs
@@ -0,0 +1,421 @@
+//! This module exposes the machine-specific backend definition pieces.
+//!
+//! The MachInst infrastructure is the compiler backend, from CLIF
+//! (ir::Function) to machine code. The purpose of this infrastructure is, at a
+//! high level, to do instruction selection/lowering (to machine instructions),
+//! register allocation, and then perform all the fixups to branches, constant
+//! data references, etc., needed to actually generate machine code.
+//!
+//! The container for machine instructions, at various stages of construction,
+//! is the `VCode` struct. We refer to a sequence of machine instructions organized
+//! into basic blocks as "vcode". This is short for "virtual-register code", though
+//! it's a bit of a misnomer because near the end of the pipeline, vcode has all
+//! real registers. Nevertheless, the name is catchy and we like it.
+//!
+//! The compilation pipeline, from an `ir::Function` (already optimized as much as
+//! you like by machine-independent optimization passes) onward, is as follows.
+//! (N.B.: though we show the VCode separately at each stage, the passes
+//! mutate the VCode in place; these are not separate copies of the code.)
+//!
+//! ```plain
+//!
+//! ir::Function (SSA IR, machine-independent opcodes)
+//! |
+//! | [lower]
+//! |
+//! VCode<arch_backend::Inst> (machine instructions:
+//! | - mostly virtual registers.
+//! | - cond branches in two-target form.
+//! | - branch targets are block indices.
+//! | - in-memory constants held by insns,
+//! | with unknown offsets.
+//! | - critical edges (actually all edges)
+//! | are split.)
+//! | [regalloc]
+//! |
+//! VCode<arch_backend::Inst> (machine instructions:
+//! | - all real registers.
+//! | - new instruction sequence returned
+//! | out-of-band in RegAllocResult.
+//! | - instruction sequence has spills,
+//! | reloads, and moves inserted.
+//! | - other invariants same as above.)
+//! |
+//! | [preamble/postamble]
+//! |
+//! VCode<arch_backend::Inst> (machine instructions:
+//! | - stack-frame size known.
+//! | - out-of-band instruction sequence
+//! | has preamble prepended to entry
+//! | block, and postamble injected before
+//! | every return instruction.
+//! | - all symbolic stack references to
+//! | stackslots and spillslots are resolved
+//! | to concrete FP-offset mem addresses.)
+//! | [block/insn ordering]
+//! |
+//! VCode<arch_backend::Inst> (machine instructions:
+//! | - vcode.final_block_order is filled in.
+//! | - new insn sequence from regalloc is
+//! | placed back into vcode and block
+//! | boundaries are updated.)
+//! | [redundant branch/block
+//! | removal]
+//! |
+//! VCode<arch_backend::Inst> (machine instructions:
+//! | - all blocks that were just an
+//! | unconditional branch are removed.)
+//! |
+//! | [branch finalization
+//! | (fallthroughs)]
+//! |
+//! VCode<arch_backend::Inst> (machine instructions:
+//! | - all branches are in lowered one-
+//! | target form, but targets are still
+//! | block indices.)
+//! |
+//! | [branch finalization
+//! | (offsets)]
+//! |
+//! VCode<arch_backend::Inst> (machine instructions:
+//! | - all branch offsets from start of
+//! | function are known, and all branches
+//! | have resolved-offset targets.)
+//! |
+//! | [MemArg finalization]
+//! |
+//! VCode<arch_backend::Inst> (machine instructions:
+//! | - all MemArg references to the constant
+//! | pool are replaced with offsets.
+//! | - all constant-pool data is collected
+//! | in the VCode.)
+//! |
+//! | [binary emission]
+//! |
+//! Vec<u8> (machine code!)
+//!
+//! ```
+
+use crate::binemit::{CodeInfo, CodeOffset, StackMap};
+use crate::ir::condcodes::IntCC;
+use crate::ir::{Function, SourceLoc, Type};
+use crate::isa::unwind::input as unwind_input;
+use crate::result::CodegenResult;
+use crate::settings::Flags;
+
+use alloc::boxed::Box;
+use alloc::vec::Vec;
+use core::fmt::Debug;
+use core::ops::Range;
+use regalloc::RegUsageCollector;
+use regalloc::{
+ RealReg, RealRegUniverse, Reg, RegClass, RegUsageMapper, SpillSlot, VirtualReg, Writable,
+};
+use smallvec::SmallVec;
+use std::string::String;
+use target_lexicon::Triple;
+
+pub mod lower;
+pub use lower::*;
+pub mod vcode;
+pub use vcode::*;
+pub mod compile;
+pub use compile::*;
+pub mod blockorder;
+pub use blockorder::*;
+pub mod abi;
+pub use abi::*;
+pub mod abi_impl;
+pub use abi_impl::*;
+pub mod buffer;
+pub use buffer::*;
+pub mod adapter;
+pub use adapter::*;
+pub mod helpers;
+pub use helpers::*;
+pub mod inst_common;
+pub use inst_common::*;
+
+/// A machine instruction.
+pub trait MachInst: Clone + Debug {
+ /// Return the registers referenced by this machine instruction along with
+ /// the modes of reference (use, def, modify).
+ fn get_regs(&self, collector: &mut RegUsageCollector);
+
+ /// Map virtual registers to physical registers using the given virt->phys
+ /// maps corresponding to the program points prior to, and after, this instruction.
+ fn map_regs<RUM: RegUsageMapper>(&mut self, maps: &RUM);
+
+ /// If this is a simple move, return the (source, destination) tuple of registers.
+ fn is_move(&self) -> Option<(Writable<Reg>, Reg)>;
+
+ /// Is this a terminator (branch or ret)? If so, return its type
+ /// (ret/uncond/cond) and target if applicable.
+ fn is_term<'a>(&'a self) -> MachTerminator<'a>;
+
+ /// Returns true if the instruction is an epilogue placeholder.
+ fn is_epilogue_placeholder(&self) -> bool;
+
+ /// Should this instruction be included in the clobber-set?
+ fn is_included_in_clobbers(&self) -> bool {
+ true
+ }
+
+ /// Generate a move.
+ fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Self;
+
+ /// Generate a constant into a reg.
+ fn gen_constant<F: FnMut(RegClass, Type) -> Writable<Reg>>(
+ to_reg: Writable<Reg>,
+ value: u64,
+ ty: Type,
+ alloc_tmp: F,
+ ) -> SmallVec<[Self; 4]>;
+
+ /// Generate a zero-length no-op.
+ fn gen_zero_len_nop() -> Self;
+
+ /// Possibly operate on a value directly in a spill-slot rather than a
+ /// register. Useful if the machine has register-memory instruction forms
+ /// (e.g., add directly from or directly to memory), like x86.
+ fn maybe_direct_reload(&self, reg: VirtualReg, slot: SpillSlot) -> Option<Self>;
+
+ /// Determine a register class to store the given Cranelift type.
+ /// May return an error if the type isn't supported by this backend.
+ fn rc_for_type(ty: Type) -> CodegenResult<RegClass>;
+
+ /// Generate a jump to another target. Used during lowering of
+ /// control flow.
+ fn gen_jump(target: MachLabel) -> Self;
+
+ /// Generate a NOP. The `preferred_size` parameter allows the caller to
+ /// request a NOP of that size, or as close to it as possible. The machine
+ /// backend may return a NOP whose binary encoding is smaller than the
+ /// preferred size, but must not return a NOP that is larger. However,
+ /// the instruction must have a nonzero size.
+ fn gen_nop(preferred_size: usize) -> Self;
+
+ /// Get the register universe for this backend.
+ fn reg_universe(flags: &Flags) -> RealRegUniverse;
+
+ /// Align a basic block offset (from start of function). By default, no
+ /// alignment occurs.
+ fn align_basic_block(offset: CodeOffset) -> CodeOffset {
+ offset
+ }
+
+ /// What is the worst-case instruction size emitted by this instruction type?
+ fn worst_case_size() -> CodeOffset;
+
+ /// What is the register class used for reference types (GC-observable pointers)? Can
+ /// be dependent on compilation flags.
+ fn ref_type_regclass(_flags: &Flags) -> RegClass;
+
+ /// A label-use kind: a type that describes the types of label references that
+ /// can occur in an instruction.
+ type LabelUse: MachInstLabelUse;
+}
+
+/// A descriptor of a label reference (use) in an instruction set.
+pub trait MachInstLabelUse: Clone + Copy + Debug + Eq {
+ /// Required alignment for any veneer. Usually the required instruction
+ /// alignment (e.g., 4 for a RISC with 32-bit instructions, or 1 for x86).
+ const ALIGN: CodeOffset;
+
+ /// What is the maximum PC-relative range (positive)? E.g., if `1024`, a
+ /// label-reference fixup at offset `x` is valid if the label resolves to `x
+ /// + 1024`.
+ fn max_pos_range(self) -> CodeOffset;
+ /// What is the maximum PC-relative range (negative)? This is the absolute
+ /// value; i.e., if `1024`, then a label-reference fixup at offset `x` is
+ /// valid if the label resolves to `x - 1024`.
+ fn max_neg_range(self) -> CodeOffset;
+ /// What is the size of code-buffer slice this label-use needs to patch in
+ /// the label's value?
+ fn patch_size(self) -> CodeOffset;
+ /// Perform a code-patch, given the offset into the buffer of this label use
+ /// and the offset into the buffer of the label's definition.
+ /// It is guaranteed that, given `delta = offset - label_offset`, we will
+ /// have `offset >= -self.max_neg_range()` and `offset <=
+ /// self.max_pos_range()`.
+ fn patch(self, buffer: &mut [u8], use_offset: CodeOffset, label_offset: CodeOffset);
+ /// Can the label-use be patched to a veneer that supports a longer range?
+ /// Usually valid for jumps (a short-range jump can jump to a longer-range
+ /// jump), but not for e.g. constant pool references, because the constant
+ /// load would require different code (one more level of indirection).
+ fn supports_veneer(self) -> bool;
+ /// How many bytes are needed for a veneer?
+ fn veneer_size(self) -> CodeOffset;
+ /// Generate a veneer. The given code-buffer slice is `self.veneer_size()`
+ /// bytes long at offset `veneer_offset` in the buffer. The original
+ /// label-use will be patched to refer to this veneer's offset. A new
+ /// (offset, LabelUse) is returned that allows the veneer to use the actual
+ /// label. For veneers to work properly, it is expected that the new veneer
+ /// has a larger range; on most platforms this probably means either a
+ /// "long-range jump" (e.g., on ARM, the 26-bit form), or if already at that
+ /// stage, a jump that supports a full 32-bit range, for example.
+ fn generate_veneer(self, buffer: &mut [u8], veneer_offset: CodeOffset) -> (CodeOffset, Self);
+}
+
+/// Describes a block terminator (not call) in the vcode, when its branches
+/// have not yet been finalized (so a branch may have two targets).
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub enum MachTerminator<'a> {
+ /// Not a terminator.
+ None,
+ /// A return instruction.
+ Ret,
+ /// An unconditional branch to another block.
+ Uncond(MachLabel),
+ /// A conditional branch to one of two other blocks.
+ Cond(MachLabel, MachLabel),
+ /// An indirect branch with known possible targets.
+ Indirect(&'a [MachLabel]),
+}
+
+/// A trait describing the ability to encode a MachInst into binary machine code.
+pub trait MachInstEmit: MachInst {
+ /// Persistent state carried across `emit` invocations.
+ type State: MachInstEmitState<Self>;
+ /// Constant information used in `emit` invocations.
+ type Info: MachInstEmitInfo;
+ /// Unwind info generator.
+ type UnwindInfo: UnwindInfoGenerator<Self>;
+ /// Emit the instruction.
+ fn emit(&self, code: &mut MachBuffer<Self>, info: &Self::Info, state: &mut Self::State);
+ /// Pretty-print the instruction.
+ fn pretty_print(&self, mb_rru: Option<&RealRegUniverse>, state: &mut Self::State) -> String;
+}
+
+/// Constant information used to emit an instruction.
+pub trait MachInstEmitInfo {
+ /// Return the target-independent settings used for the compilation of this
+ /// particular function.
+ fn flags(&self) -> &Flags;
+}
+
+/// A trait describing the emission state carried between MachInsts when
+/// emitting a function body.
+pub trait MachInstEmitState<I: MachInst>: Default + Clone + Debug {
+ /// Create a new emission state given the ABI object.
+ fn new(abi: &dyn ABICallee<I = I>) -> Self;
+ /// Update the emission state before emitting an instruction that is a
+ /// safepoint.
+ fn pre_safepoint(&mut self, _stack_map: StackMap) {}
+ /// Update the emission state to indicate instructions are associated with a
+ /// particular SourceLoc.
+ fn pre_sourceloc(&mut self, _srcloc: SourceLoc) {}
+}
+
+/// The result of a `MachBackend::compile_function()` call. Contains machine
+/// code (as bytes) and a disassembly, if requested.
+pub struct MachCompileResult {
+ /// Machine code.
+ pub buffer: MachBufferFinalized,
+ /// Size of stack frame, in bytes.
+ pub frame_size: u32,
+ /// Disassembly, if requested.
+ pub disasm: Option<String>,
+ /// Unwind info.
+ pub unwind_info: Option<unwind_input::UnwindInfo<Reg>>,
+}
+
+impl MachCompileResult {
+ /// Get a `CodeInfo` describing section sizes from this compilation result.
+ pub fn code_info(&self) -> CodeInfo {
+ let code_size = self.buffer.total_size();
+ CodeInfo {
+ code_size,
+ jumptables_size: 0,
+ rodata_size: 0,
+ total_size: code_size,
+ }
+ }
+}
+
+/// Top-level machine backend trait, which wraps all monomorphized code and
+/// allows a virtual call from the machine-independent `Function::compile()`.
+pub trait MachBackend {
+ /// Compile the given function.
+ fn compile_function(
+ &self,
+ func: &Function,
+ want_disasm: bool,
+ ) -> CodegenResult<MachCompileResult>;
+
+ /// Return flags for this backend.
+ fn flags(&self) -> &Flags;
+
+ /// Return triple for this backend.
+ fn triple(&self) -> Triple;
+
+ /// Return name for this backend.
+ fn name(&self) -> &'static str;
+
+ /// Return the register universe for this backend.
+ fn reg_universe(&self) -> &RealRegUniverse;
+
+ /// Machine-specific condcode info needed by TargetIsa.
+ /// Condition that will be true when an IaddIfcout overflows.
+ fn unsigned_add_overflow_condition(&self) -> IntCC;
+
+ /// Machine-specific condcode info needed by TargetIsa.
+ /// Condition that will be true when an IsubIfcout overflows.
+ fn unsigned_sub_overflow_condition(&self) -> IntCC;
+
+ /// Produces unwind info based on backend results.
+ #[cfg(feature = "unwind")]
+ fn emit_unwind_info(
+ &self,
+ _result: &MachCompileResult,
+ _kind: UnwindInfoKind,
+ ) -> CodegenResult<Option<crate::isa::unwind::UnwindInfo>> {
+ // By default, an backend cannot produce unwind info.
+ Ok(None)
+ }
+
+ /// Machine-specific condcode info needed by TargetIsa.
+ /// Creates a new System V Common Information Entry for the ISA.
+ #[cfg(feature = "unwind")]
+ fn create_systemv_cie(&self) -> Option<gimli::write::CommonInformationEntry> {
+ // By default, an ISA cannot create a System V CIE
+ None
+ }
+}
+
+/// Expected unwind info type.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+#[non_exhaustive]
+pub enum UnwindInfoKind {
+ /// No unwind info.
+ None,
+ /// SystemV CIE/FDE unwind info.
+ #[cfg(feature = "unwind")]
+ SystemV,
+ /// Windows X64 Unwind info
+ #[cfg(feature = "unwind")]
+ Windows,
+}
+
+/// Input data for UnwindInfoGenerator.
+pub struct UnwindInfoContext<'a, Inst: MachInstEmit> {
+ /// Function instructions.
+ pub insts: &'a [Inst],
+ /// Instruction layout: end offsets
+ pub insts_layout: &'a [CodeOffset],
+ /// Length of the function.
+ pub len: CodeOffset,
+ /// Prologue range.
+ pub prologue: Range<u32>,
+ /// Epilogue ranges.
+ pub epilogues: &'a [Range<u32>],
+}
+
+/// UnwindInfo generator/helper.
+pub trait UnwindInfoGenerator<I: MachInstEmit> {
+ /// Creates unwind info based on function signature and
+ /// emitted instructions.
+ fn create_unwind_info(
+ context: UnwindInfoContext<I>,
+ ) -> CodegenResult<Option<unwind_input::UnwindInfo<Reg>>>;
+}
diff --git a/third_party/rust/cranelift-codegen/src/machinst/vcode.rs b/third_party/rust/cranelift-codegen/src/machinst/vcode.rs
new file mode 100644
index 0000000000..c57f018e35
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/machinst/vcode.rs
@@ -0,0 +1,895 @@
+//! This implements the VCode container: a CFG of Insts that have been lowered.
+//!
+//! VCode is virtual-register code. An instruction in VCode is almost a machine
+//! instruction; however, its register slots can refer to virtual registers in
+//! addition to real machine registers.
+//!
+//! VCode is structured with traditional basic blocks, and
+//! each block must be terminated by an unconditional branch (one target), a
+//! conditional branch (two targets), or a return (no targets). Note that this
+//! slightly differs from the machine code of most ISAs: in most ISAs, a
+//! conditional branch has one target (and the not-taken case falls through).
+//! However, we expect that machine backends will elide branches to the following
+//! block (i.e., zero-offset jumps), and will be able to codegen a branch-cond /
+//! branch-uncond pair if *both* targets are not fallthrough. This allows us to
+//! play with layout prior to final binary emission, as well, if we want.
+//!
+//! See the main module comment in `mod.rs` for more details on the VCode-based
+//! backend pipeline.
+
+use crate::ir::{self, types, Constant, ConstantData, SourceLoc};
+use crate::machinst::*;
+use crate::settings;
+use crate::timing;
+
+use regalloc::Function as RegallocFunction;
+use regalloc::Set as RegallocSet;
+use regalloc::{
+ BlockIx, InstIx, PrettyPrint, Range, RegAllocResult, RegClass, RegUsageCollector,
+ RegUsageMapper, SpillSlot, StackmapRequestInfo,
+};
+
+use alloc::boxed::Box;
+use alloc::{borrow::Cow, vec::Vec};
+use cranelift_entity::{entity_impl, Keys, PrimaryMap};
+use std::cell::RefCell;
+use std::collections::HashMap;
+use std::fmt;
+use std::iter;
+use std::string::String;
+
+/// Index referring to an instruction in VCode.
+pub type InsnIndex = u32;
+/// Index referring to a basic block in VCode.
+pub type BlockIndex = u32;
+/// Range of an instructions in VCode.
+pub type InsnRange = core::ops::Range<InsnIndex>;
+
+/// VCodeInst wraps all requirements for a MachInst to be in VCode: it must be
+/// a `MachInst` and it must be able to emit itself at least to a `SizeCodeSink`.
+pub trait VCodeInst: MachInst + MachInstEmit {}
+impl<I: MachInst + MachInstEmit> VCodeInst for I {}
+
+/// A function in "VCode" (virtualized-register code) form, after lowering.
+/// This is essentially a standard CFG of basic blocks, where each basic block
+/// consists of lowered instructions produced by the machine-specific backend.
+pub struct VCode<I: VCodeInst> {
+ /// Function liveins.
+ liveins: RegallocSet<RealReg>,
+
+ /// Function liveouts.
+ liveouts: RegallocSet<RealReg>,
+
+ /// VReg IR-level types.
+ vreg_types: Vec<Type>,
+
+ /// Do we have any ref values among our vregs?
+ have_ref_values: bool,
+
+ /// Lowered machine instructions in order corresponding to the original IR.
+ insts: Vec<I>,
+
+ /// Source locations for each instruction. (`SourceLoc` is a `u32`, so it is
+ /// reasonable to keep one of these per instruction.)
+ srclocs: Vec<SourceLoc>,
+
+ /// Entry block.
+ entry: BlockIndex,
+
+ /// Block instruction indices.
+ block_ranges: Vec<(InsnIndex, InsnIndex)>,
+
+ /// Block successors: index range in the successor-list below.
+ block_succ_range: Vec<(usize, usize)>,
+
+ /// Block successor lists, concatenated into one Vec. The `block_succ_range`
+ /// list of tuples above gives (start, end) ranges within this list that
+ /// correspond to each basic block's successors.
+ block_succs: Vec<BlockIx>,
+
+ /// Block-order information.
+ block_order: BlockLoweringOrder,
+
+ /// ABI object.
+ abi: Box<dyn ABICallee<I = I>>,
+
+ /// Constant information used during code emission. This should be
+ /// immutable across function compilations within the same module.
+ emit_info: I::Info,
+
+ /// Safepoint instruction indices. Filled in post-regalloc. (Prior to
+ /// regalloc, the safepoint instructions are listed in the separate
+ /// `StackmapRequestInfo` held separate from the `VCode`.)
+ safepoint_insns: Vec<InsnIndex>,
+
+ /// For each safepoint entry in `safepoint_insns`, a list of `SpillSlot`s.
+ /// These are used to generate actual stack maps at emission. Filled in
+ /// post-regalloc.
+ safepoint_slots: Vec<Vec<SpillSlot>>,
+
+ /// Ranges for prologue and epilogue instructions.
+ prologue_epilogue_ranges: Option<(InsnRange, Box<[InsnRange]>)>,
+
+ /// Instruction end offsets
+ insts_layout: RefCell<(Vec<u32>, u32)>,
+
+ /// Constants.
+ constants: VCodeConstants,
+}
+
+/// A builder for a VCode function body. This builder is designed for the
+/// lowering approach that we take: we traverse basic blocks in forward
+/// (original IR) order, but within each basic block, we generate code from
+/// bottom to top; and within each IR instruction that we visit in this reverse
+/// order, we emit machine instructions in *forward* order again.
+///
+/// Hence, to produce the final instructions in proper order, we perform two
+/// swaps. First, the machine instructions (`I` instances) are produced in
+/// forward order for an individual IR instruction. Then these are *reversed*
+/// and concatenated to `bb_insns` at the end of the IR instruction lowering.
+/// The `bb_insns` vec will thus contain all machine instructions for a basic
+/// block, in reverse order. Finally, when we're done with a basic block, we
+/// reverse the whole block's vec of instructions again, and concatenate onto
+/// the VCode's insts.
+pub struct VCodeBuilder<I: VCodeInst> {
+ /// In-progress VCode.
+ vcode: VCode<I>,
+
+ /// In-progress stack map-request info.
+ stack_map_info: StackmapRequestInfo,
+
+ /// Index of the last block-start in the vcode.
+ block_start: InsnIndex,
+
+ /// Start of succs for the current block in the concatenated succs list.
+ succ_start: usize,
+
+ /// Current source location.
+ cur_srcloc: SourceLoc,
+}
+
+impl<I: VCodeInst> VCodeBuilder<I> {
+ /// Create a new VCodeBuilder.
+ pub fn new(
+ abi: Box<dyn ABICallee<I = I>>,
+ emit_info: I::Info,
+ block_order: BlockLoweringOrder,
+ constants: VCodeConstants,
+ ) -> VCodeBuilder<I> {
+ let reftype_class = I::ref_type_regclass(abi.flags());
+ let vcode = VCode::new(abi, emit_info, block_order, constants);
+ let stack_map_info = StackmapRequestInfo {
+ reftype_class,
+ reftyped_vregs: vec![],
+ safepoint_insns: vec![],
+ };
+
+ VCodeBuilder {
+ vcode,
+ stack_map_info,
+ block_start: 0,
+ succ_start: 0,
+ cur_srcloc: SourceLoc::default(),
+ }
+ }
+
+ /// Access the ABI object.
+ pub fn abi(&mut self) -> &mut dyn ABICallee<I = I> {
+ &mut *self.vcode.abi
+ }
+
+ /// Access to the BlockLoweringOrder object.
+ pub fn block_order(&self) -> &BlockLoweringOrder {
+ &self.vcode.block_order
+ }
+
+ /// Set the type of a VReg.
+ pub fn set_vreg_type(&mut self, vreg: VirtualReg, ty: Type) {
+ if self.vcode.vreg_types.len() <= vreg.get_index() {
+ self.vcode
+ .vreg_types
+ .resize(vreg.get_index() + 1, ir::types::I8);
+ }
+ self.vcode.vreg_types[vreg.get_index()] = ty;
+ if is_reftype(ty) {
+ self.stack_map_info.reftyped_vregs.push(vreg);
+ self.vcode.have_ref_values = true;
+ }
+ }
+
+ /// Are there any reference-typed values at all among the vregs?
+ pub fn have_ref_values(&self) -> bool {
+ self.vcode.have_ref_values()
+ }
+
+ /// Set the current block as the entry block.
+ pub fn set_entry(&mut self, block: BlockIndex) {
+ self.vcode.entry = block;
+ }
+
+ /// End the current basic block. Must be called after emitting vcode insts
+ /// for IR insts and prior to ending the function (building the VCode).
+ pub fn end_bb(&mut self) {
+ let start_idx = self.block_start;
+ let end_idx = self.vcode.insts.len() as InsnIndex;
+ self.block_start = end_idx;
+ // Add the instruction index range to the list of blocks.
+ self.vcode.block_ranges.push((start_idx, end_idx));
+ // End the successors list.
+ let succ_end = self.vcode.block_succs.len();
+ self.vcode
+ .block_succ_range
+ .push((self.succ_start, succ_end));
+ self.succ_start = succ_end;
+ }
+
+ /// Push an instruction for the current BB and current IR inst within the BB.
+ pub fn push(&mut self, insn: I, is_safepoint: bool) {
+ match insn.is_term() {
+ MachTerminator::None | MachTerminator::Ret => {}
+ MachTerminator::Uncond(target) => {
+ self.vcode.block_succs.push(BlockIx::new(target.get()));
+ }
+ MachTerminator::Cond(true_branch, false_branch) => {
+ self.vcode.block_succs.push(BlockIx::new(true_branch.get()));
+ self.vcode
+ .block_succs
+ .push(BlockIx::new(false_branch.get()));
+ }
+ MachTerminator::Indirect(targets) => {
+ for target in targets {
+ self.vcode.block_succs.push(BlockIx::new(target.get()));
+ }
+ }
+ }
+ self.vcode.insts.push(insn);
+ self.vcode.srclocs.push(self.cur_srcloc);
+ if is_safepoint {
+ self.stack_map_info
+ .safepoint_insns
+ .push(InstIx::new((self.vcode.insts.len() - 1) as u32));
+ }
+ }
+
+ /// Get the current source location.
+ pub fn get_srcloc(&self) -> SourceLoc {
+ self.cur_srcloc
+ }
+
+ /// Set the current source location.
+ pub fn set_srcloc(&mut self, srcloc: SourceLoc) {
+ self.cur_srcloc = srcloc;
+ }
+
+ /// Access the constants.
+ pub fn constants(&mut self) -> &mut VCodeConstants {
+ &mut self.vcode.constants
+ }
+
+ /// Build the final VCode, returning the vcode itself as well as auxiliary
+ /// information, such as the stack map request information.
+ pub fn build(self) -> (VCode<I>, StackmapRequestInfo) {
+ // TODO: come up with an abstraction for "vcode and auxiliary data". The
+ // auxiliary data needs to be separate from the vcode so that it can be
+ // referenced as the vcode is mutated (e.g. by the register allocator).
+ (self.vcode, self.stack_map_info)
+ }
+}
+
+fn is_redundant_move<I: VCodeInst>(insn: &I) -> bool {
+ if let Some((to, from)) = insn.is_move() {
+ to.to_reg() == from
+ } else {
+ false
+ }
+}
+
+/// Is this type a reference type?
+fn is_reftype(ty: Type) -> bool {
+ ty == types::R64 || ty == types::R32
+}
+
+impl<I: VCodeInst> VCode<I> {
+ /// New empty VCode.
+ fn new(
+ abi: Box<dyn ABICallee<I = I>>,
+ emit_info: I::Info,
+ block_order: BlockLoweringOrder,
+ constants: VCodeConstants,
+ ) -> VCode<I> {
+ VCode {
+ liveins: abi.liveins(),
+ liveouts: abi.liveouts(),
+ vreg_types: vec![],
+ have_ref_values: false,
+ insts: vec![],
+ srclocs: vec![],
+ entry: 0,
+ block_ranges: vec![],
+ block_succ_range: vec![],
+ block_succs: vec![],
+ block_order,
+ abi,
+ emit_info,
+ safepoint_insns: vec![],
+ safepoint_slots: vec![],
+ prologue_epilogue_ranges: None,
+ insts_layout: RefCell::new((vec![], 0)),
+ constants,
+ }
+ }
+
+ /// Returns the flags controlling this function's compilation.
+ pub fn flags(&self) -> &settings::Flags {
+ self.abi.flags()
+ }
+
+ /// Get the IR-level type of a VReg.
+ pub fn vreg_type(&self, vreg: VirtualReg) -> Type {
+ self.vreg_types[vreg.get_index()]
+ }
+
+ /// Are there any reference-typed values at all among the vregs?
+ pub fn have_ref_values(&self) -> bool {
+ self.have_ref_values
+ }
+
+ /// Get the entry block.
+ pub fn entry(&self) -> BlockIndex {
+ self.entry
+ }
+
+ /// Get the number of blocks. Block indices will be in the range `0 ..
+ /// (self.num_blocks() - 1)`.
+ pub fn num_blocks(&self) -> usize {
+ self.block_ranges.len()
+ }
+
+ /// Stack frame size for the full function's body.
+ pub fn frame_size(&self) -> u32 {
+ self.abi.frame_size()
+ }
+
+ /// Inbound stack-args size.
+ pub fn stack_args_size(&self) -> u32 {
+ self.abi.stack_args_size()
+ }
+
+ /// Get the successors for a block.
+ pub fn succs(&self, block: BlockIndex) -> &[BlockIx] {
+ let (start, end) = self.block_succ_range[block as usize];
+ &self.block_succs[start..end]
+ }
+
+ /// Take the results of register allocation, with a sequence of
+ /// instructions including spliced fill/reload/move instructions, and replace
+ /// the VCode with them.
+ pub fn replace_insns_from_regalloc(&mut self, result: RegAllocResult<Self>) {
+ // Record the spillslot count and clobbered registers for the ABI/stack
+ // setup code.
+ self.abi.set_num_spillslots(result.num_spill_slots as usize);
+ self.abi
+ .set_clobbered(result.clobbered_registers.map(|r| Writable::from_reg(*r)));
+
+ let mut final_insns = vec![];
+ let mut final_block_ranges = vec![(0, 0); self.num_blocks()];
+ let mut final_srclocs = vec![];
+ let mut final_safepoint_insns = vec![];
+ let mut safept_idx = 0;
+
+ let mut prologue_start = None;
+ let mut prologue_end = None;
+ let mut epilogue_islands = vec![];
+
+ assert!(result.target_map.elems().len() == self.num_blocks());
+ for block in 0..self.num_blocks() {
+ let start = result.target_map.elems()[block].get() as usize;
+ let end = if block == self.num_blocks() - 1 {
+ result.insns.len()
+ } else {
+ result.target_map.elems()[block + 1].get() as usize
+ };
+ let block = block as BlockIndex;
+ let final_start = final_insns.len() as InsnIndex;
+
+ if block == self.entry {
+ prologue_start = Some(final_insns.len() as InsnIndex);
+ // Start with the prologue.
+ let prologue = self.abi.gen_prologue();
+ let len = prologue.len();
+ final_insns.extend(prologue.into_iter());
+ final_srclocs.extend(iter::repeat(SourceLoc::default()).take(len));
+ prologue_end = Some(final_insns.len() as InsnIndex);
+ }
+
+ for i in start..end {
+ let insn = &result.insns[i];
+
+ // Elide redundant moves at this point (we only know what is
+ // redundant once registers are allocated).
+ if is_redundant_move(insn) {
+ continue;
+ }
+
+ // Is there a srcloc associated with this insn? Look it up based on original
+ // instruction index (if new insn corresponds to some original insn, i.e., is not
+ // an inserted load/spill/move).
+ let orig_iix = result.orig_insn_map[InstIx::new(i as u32)];
+ let srcloc = if orig_iix.is_invalid() {
+ SourceLoc::default()
+ } else {
+ self.srclocs[orig_iix.get() as usize]
+ };
+
+ // Whenever encountering a return instruction, replace it
+ // with the epilogue.
+ let is_ret = insn.is_term() == MachTerminator::Ret;
+ if is_ret {
+ let epilogue_start = final_insns.len() as InsnIndex;
+ let epilogue = self.abi.gen_epilogue();
+ let len = epilogue.len();
+ final_insns.extend(epilogue.into_iter());
+ final_srclocs.extend(iter::repeat(srcloc).take(len));
+ epilogue_islands.push(epilogue_start..final_insns.len() as InsnIndex);
+ } else {
+ final_insns.push(insn.clone());
+ final_srclocs.push(srcloc);
+ }
+
+ // Was this instruction a safepoint instruction? Add its final
+ // index to the safepoint insn-index list if so.
+ if safept_idx < result.new_safepoint_insns.len()
+ && (result.new_safepoint_insns[safept_idx].get() as usize) == i
+ {
+ let idx = final_insns.len() - 1;
+ final_safepoint_insns.push(idx as InsnIndex);
+ safept_idx += 1;
+ }
+ }
+
+ let final_end = final_insns.len() as InsnIndex;
+ final_block_ranges[block as usize] = (final_start, final_end);
+ }
+
+ debug_assert!(final_insns.len() == final_srclocs.len());
+
+ self.insts = final_insns;
+ self.srclocs = final_srclocs;
+ self.block_ranges = final_block_ranges;
+ self.safepoint_insns = final_safepoint_insns;
+
+ // Save safepoint slot-lists. These will be passed to the `EmitState`
+ // for the machine backend during emission so that it can do
+ // target-specific translations of slot numbers to stack offsets.
+ self.safepoint_slots = result.stackmaps;
+
+ self.prologue_epilogue_ranges = Some((
+ prologue_start.unwrap()..prologue_end.unwrap(),
+ epilogue_islands.into_boxed_slice(),
+ ));
+ }
+
+ /// Emit the instructions to a `MachBuffer`, containing fixed-up code and external
+ /// reloc/trap/etc. records ready for use.
+ pub fn emit(&self) -> MachBuffer<I>
+ where
+ I: MachInstEmit,
+ {
+ let _tt = timing::vcode_emit();
+ let mut buffer = MachBuffer::new();
+ let mut state = I::State::new(&*self.abi);
+
+ // The first M MachLabels are reserved for block indices, the next N MachLabels for
+ // constants.
+ buffer.reserve_labels_for_blocks(self.num_blocks() as BlockIndex);
+ buffer.reserve_labels_for_constants(&self.constants);
+
+ let mut insts_layout = vec![0; self.insts.len()];
+
+ let mut safepoint_idx = 0;
+ let mut cur_srcloc = None;
+ for block in 0..self.num_blocks() {
+ let block = block as BlockIndex;
+ let new_offset = I::align_basic_block(buffer.cur_offset());
+ while new_offset > buffer.cur_offset() {
+ // Pad with NOPs up to the aligned block offset.
+ let nop = I::gen_nop((new_offset - buffer.cur_offset()) as usize);
+ nop.emit(&mut buffer, &self.emit_info, &mut Default::default());
+ }
+ assert_eq!(buffer.cur_offset(), new_offset);
+
+ let (start, end) = self.block_ranges[block as usize];
+ buffer.bind_label(MachLabel::from_block(block));
+ for iix in start..end {
+ let srcloc = self.srclocs[iix as usize];
+ if cur_srcloc != Some(srcloc) {
+ if cur_srcloc.is_some() {
+ buffer.end_srcloc();
+ }
+ buffer.start_srcloc(srcloc);
+ cur_srcloc = Some(srcloc);
+ }
+ state.pre_sourceloc(cur_srcloc.unwrap_or(SourceLoc::default()));
+
+ if safepoint_idx < self.safepoint_insns.len()
+ && self.safepoint_insns[safepoint_idx] == iix
+ {
+ if self.safepoint_slots[safepoint_idx].len() > 0 {
+ let stack_map = self.abi.spillslots_to_stack_map(
+ &self.safepoint_slots[safepoint_idx][..],
+ &state,
+ );
+ state.pre_safepoint(stack_map);
+ }
+ safepoint_idx += 1;
+ }
+
+ self.insts[iix as usize].emit(&mut buffer, &self.emit_info, &mut state);
+
+ insts_layout[iix as usize] = buffer.cur_offset();
+ }
+
+ if cur_srcloc.is_some() {
+ buffer.end_srcloc();
+ cur_srcloc = None;
+ }
+
+ // Do we need an island? Get the worst-case size of the next BB and see if, having
+ // emitted that many bytes, we will be beyond the deadline.
+ if block < (self.num_blocks() - 1) as BlockIndex {
+ let next_block = block + 1;
+ let next_block_range = self.block_ranges[next_block as usize];
+ let next_block_size = next_block_range.1 - next_block_range.0;
+ let worst_case_next_bb = I::worst_case_size() * next_block_size;
+ if buffer.island_needed(worst_case_next_bb) {
+ buffer.emit_island();
+ }
+ }
+ }
+
+ // Emit the constants used by the function.
+ for (constant, data) in self.constants.iter() {
+ let label = buffer.get_label_for_constant(constant);
+ buffer.defer_constant(label, data.alignment(), data.as_slice(), u32::max_value());
+ }
+
+ *self.insts_layout.borrow_mut() = (insts_layout, buffer.cur_offset());
+
+ buffer
+ }
+
+ /// Generates unwind info.
+ pub fn unwind_info(
+ &self,
+ ) -> crate::result::CodegenResult<Option<crate::isa::unwind::input::UnwindInfo<Reg>>> {
+ let layout = &self.insts_layout.borrow();
+ let (prologue, epilogues) = self.prologue_epilogue_ranges.as_ref().unwrap();
+ let context = UnwindInfoContext {
+ insts: &self.insts,
+ insts_layout: &layout.0,
+ len: layout.1,
+ prologue: prologue.clone(),
+ epilogues,
+ };
+ I::UnwindInfo::create_unwind_info(context)
+ }
+
+ /// Get the IR block for a BlockIndex, if one exists.
+ pub fn bindex_to_bb(&self, block: BlockIndex) -> Option<ir::Block> {
+ self.block_order.lowered_order()[block as usize].orig_block()
+ }
+}
+
+impl<I: VCodeInst> RegallocFunction for VCode<I> {
+ type Inst = I;
+
+ fn insns(&self) -> &[I] {
+ &self.insts[..]
+ }
+
+ fn insns_mut(&mut self) -> &mut [I] {
+ &mut self.insts[..]
+ }
+
+ fn get_insn(&self, insn: InstIx) -> &I {
+ &self.insts[insn.get() as usize]
+ }
+
+ fn get_insn_mut(&mut self, insn: InstIx) -> &mut I {
+ &mut self.insts[insn.get() as usize]
+ }
+
+ fn blocks(&self) -> Range<BlockIx> {
+ Range::new(BlockIx::new(0), self.block_ranges.len())
+ }
+
+ fn entry_block(&self) -> BlockIx {
+ BlockIx::new(self.entry)
+ }
+
+ fn block_insns(&self, block: BlockIx) -> Range<InstIx> {
+ let (start, end) = self.block_ranges[block.get() as usize];
+ Range::new(InstIx::new(start), (end - start) as usize)
+ }
+
+ fn block_succs(&self, block: BlockIx) -> Cow<[BlockIx]> {
+ let (start, end) = self.block_succ_range[block.get() as usize];
+ Cow::Borrowed(&self.block_succs[start..end])
+ }
+
+ fn is_ret(&self, insn: InstIx) -> bool {
+ match self.insts[insn.get() as usize].is_term() {
+ MachTerminator::Ret => true,
+ _ => false,
+ }
+ }
+
+ fn is_included_in_clobbers(&self, insn: &I) -> bool {
+ insn.is_included_in_clobbers()
+ }
+
+ fn get_regs(insn: &I, collector: &mut RegUsageCollector) {
+ insn.get_regs(collector)
+ }
+
+ fn map_regs<RUM: RegUsageMapper>(insn: &mut I, mapper: &RUM) {
+ insn.map_regs(mapper);
+ }
+
+ fn is_move(&self, insn: &I) -> Option<(Writable<Reg>, Reg)> {
+ insn.is_move()
+ }
+
+ fn get_num_vregs(&self) -> usize {
+ self.vreg_types.len()
+ }
+
+ fn get_spillslot_size(&self, regclass: RegClass, vreg: VirtualReg) -> u32 {
+ let ty = self.vreg_type(vreg);
+ self.abi.get_spillslot_size(regclass, ty)
+ }
+
+ fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, vreg: Option<VirtualReg>) -> I {
+ let ty = vreg.map(|v| self.vreg_type(v));
+ self.abi.gen_spill(to_slot, from_reg, ty)
+ }
+
+ fn gen_reload(
+ &self,
+ to_reg: Writable<RealReg>,
+ from_slot: SpillSlot,
+ vreg: Option<VirtualReg>,
+ ) -> I {
+ let ty = vreg.map(|v| self.vreg_type(v));
+ self.abi.gen_reload(to_reg, from_slot, ty)
+ }
+
+ fn gen_move(&self, to_reg: Writable<RealReg>, from_reg: RealReg, vreg: VirtualReg) -> I {
+ let ty = self.vreg_type(vreg);
+ I::gen_move(to_reg.map(|r| r.to_reg()), from_reg.to_reg(), ty)
+ }
+
+ fn gen_zero_len_nop(&self) -> I {
+ I::gen_zero_len_nop()
+ }
+
+ fn maybe_direct_reload(&self, insn: &I, reg: VirtualReg, slot: SpillSlot) -> Option<I> {
+ insn.maybe_direct_reload(reg, slot)
+ }
+
+ fn func_liveins(&self) -> RegallocSet<RealReg> {
+ self.liveins.clone()
+ }
+
+ fn func_liveouts(&self) -> RegallocSet<RealReg> {
+ self.liveouts.clone()
+ }
+}
+
+impl<I: VCodeInst> fmt::Debug for VCode<I> {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ writeln!(f, "VCode_Debug {{")?;
+ writeln!(f, " Entry block: {}", self.entry)?;
+
+ for block in 0..self.num_blocks() {
+ writeln!(f, "Block {}:", block,)?;
+ for succ in self.succs(block as BlockIndex) {
+ writeln!(f, " (successor: Block {})", succ.get())?;
+ }
+ let (start, end) = self.block_ranges[block];
+ writeln!(f, " (instruction range: {} .. {})", start, end)?;
+ for inst in start..end {
+ writeln!(f, " Inst {}: {:?}", inst, self.insts[inst as usize])?;
+ }
+ }
+
+ writeln!(f, "}}")?;
+ Ok(())
+ }
+}
+
+/// Pretty-printing with `RealRegUniverse` context.
+impl<I: VCodeInst> PrettyPrint for VCode<I> {
+ fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
+ use std::fmt::Write;
+
+ let mut s = String::new();
+ write!(&mut s, "VCode_ShowWithRRU {{{{\n").unwrap();
+ write!(&mut s, " Entry block: {}\n", self.entry).unwrap();
+
+ let mut state = Default::default();
+ let mut safepoint_idx = 0;
+ for i in 0..self.num_blocks() {
+ let block = i as BlockIndex;
+
+ write!(&mut s, "Block {}:\n", block).unwrap();
+ if let Some(bb) = self.bindex_to_bb(block) {
+ write!(&mut s, " (original IR block: {})\n", bb).unwrap();
+ }
+ for succ in self.succs(block) {
+ write!(&mut s, " (successor: Block {})\n", succ.get()).unwrap();
+ }
+ let (start, end) = self.block_ranges[block as usize];
+ write!(&mut s, " (instruction range: {} .. {})\n", start, end).unwrap();
+ for inst in start..end {
+ if safepoint_idx < self.safepoint_insns.len()
+ && self.safepoint_insns[safepoint_idx] == inst
+ {
+ write!(
+ &mut s,
+ " (safepoint: slots {:?} with EmitState {:?})\n",
+ self.safepoint_slots[safepoint_idx], state,
+ )
+ .unwrap();
+ safepoint_idx += 1;
+ }
+ write!(
+ &mut s,
+ " Inst {}: {}\n",
+ inst,
+ self.insts[inst as usize].pretty_print(mb_rru, &mut state)
+ )
+ .unwrap();
+ }
+ }
+
+ write!(&mut s, "}}}}\n").unwrap();
+
+ s
+ }
+}
+
+/// This structure tracks the large constants used in VCode that will be emitted separately by the
+/// [MachBuffer].
+///
+/// First, during the lowering phase, constants are inserted using
+/// [VCodeConstants.insert]; an intermediate handle, [VCodeConstant], tracks what constants are
+/// used in this phase. Some deduplication is performed, when possible, as constant
+/// values are inserted.
+///
+/// Secondly, during the emission phase, the [MachBuffer] assigns [MachLabel]s for each of the
+/// constants so that instructions can refer to the value's memory location. The [MachBuffer]
+/// then writes the constant values to the buffer.
+#[derive(Default)]
+pub struct VCodeConstants {
+ constants: PrimaryMap<VCodeConstant, VCodeConstantData>,
+ pool_uses: HashMap<Constant, VCodeConstant>,
+ well_known_uses: HashMap<*const [u8], VCodeConstant>,
+}
+impl VCodeConstants {
+ /// Initialize the structure with the expected number of constants.
+ pub fn with_capacity(expected_num_constants: usize) -> Self {
+ Self {
+ constants: PrimaryMap::with_capacity(expected_num_constants),
+ pool_uses: HashMap::with_capacity(expected_num_constants),
+ well_known_uses: HashMap::new(),
+ }
+ }
+
+ /// Insert a constant; using this method indicates that a constant value will be used and thus
+ /// will be emitted to the `MachBuffer`. The current implementation can deduplicate constants
+ /// that are [VCodeConstantData::Pool] or [VCodeConstantData::WellKnown] but not
+ /// [VCodeConstantData::Generated].
+ pub fn insert(&mut self, data: VCodeConstantData) -> VCodeConstant {
+ match data {
+ VCodeConstantData::Generated(_) => self.constants.push(data),
+ VCodeConstantData::Pool(constant, _) => match self.pool_uses.get(&constant) {
+ None => {
+ let vcode_constant = self.constants.push(data);
+ self.pool_uses.insert(constant, vcode_constant);
+ vcode_constant
+ }
+ Some(&vcode_constant) => vcode_constant,
+ },
+ VCodeConstantData::WellKnown(data_ref) => {
+ match self.well_known_uses.get(&(data_ref as *const [u8])) {
+ None => {
+ let vcode_constant = self.constants.push(data);
+ self.well_known_uses
+ .insert(data_ref as *const [u8], vcode_constant);
+ vcode_constant
+ }
+ Some(&vcode_constant) => vcode_constant,
+ }
+ }
+ }
+ }
+
+ /// Retrieve a byte slice for the given [VCodeConstant], if available.
+ pub fn get(&self, constant: VCodeConstant) -> Option<&[u8]> {
+ self.constants.get(constant).map(|d| d.as_slice())
+ }
+
+ /// Return the number of constants inserted.
+ pub fn len(&self) -> usize {
+ self.constants.len()
+ }
+
+ /// Iterate over the [VCodeConstant] keys inserted in this structure.
+ pub fn keys(&self) -> Keys<VCodeConstant> {
+ self.constants.keys()
+ }
+
+ /// Iterate over the [VCodeConstant] keys and the data (as a byte slice) inserted in this
+ /// structure.
+ pub fn iter(&self) -> impl Iterator<Item = (VCodeConstant, &VCodeConstantData)> {
+ self.constants.iter()
+ }
+}
+
+/// A use of a constant by one or more VCode instructions; see [VCodeConstants].
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub struct VCodeConstant(u32);
+entity_impl!(VCodeConstant);
+
+/// Identify the different types of constant that can be inserted into [VCodeConstants]. Tracking
+/// these separately instead of as raw byte buffers allows us to avoid some duplication.
+pub enum VCodeConstantData {
+ /// A constant already present in the Cranelift IR
+ /// [ConstantPool](crate::ir::constant::ConstantPool).
+ Pool(Constant, ConstantData),
+ /// A reference to a well-known constant value that is statically encoded within the compiler.
+ WellKnown(&'static [u8]),
+ /// A constant value generated during lowering; the value may depend on the instruction context
+ /// which makes it difficult to de-duplicate--if possible, use other variants.
+ Generated(ConstantData),
+}
+impl VCodeConstantData {
+ /// Retrieve the constant data as a byte slice.
+ pub fn as_slice(&self) -> &[u8] {
+ match self {
+ VCodeConstantData::Pool(_, d) | VCodeConstantData::Generated(d) => d.as_slice(),
+ VCodeConstantData::WellKnown(d) => d,
+ }
+ }
+
+ /// Calculate the alignment of the constant data.
+ pub fn alignment(&self) -> u32 {
+ if self.as_slice().len() <= 8 {
+ 8
+ } else {
+ 16
+ }
+ }
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+ use std::mem::size_of;
+
+ #[test]
+ fn size_of_constant_structs() {
+ assert_eq!(size_of::<Constant>(), 4);
+ assert_eq!(size_of::<VCodeConstant>(), 4);
+ assert_eq!(size_of::<ConstantData>(), 24);
+ assert_eq!(size_of::<VCodeConstantData>(), 32);
+ assert_eq!(
+ size_of::<PrimaryMap<VCodeConstant, VCodeConstantData>>(),
+ 24
+ );
+ // TODO The VCodeConstants structure's memory size could be further optimized.
+ // With certain versions of Rust, each `HashMap` in `VCodeConstants` occupied at
+ // least 48 bytes, making an empty `VCodeConstants` cost 120 bytes.
+ }
+}