Adding upstream version 86.0.1.upstream/86.0.1 upstream

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-28 14:29:10 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-28 14:29:10 +0000
commit: 2aa4a82499d4becd2284cdb482213d541b8804dd (patch)
tree: b80bf8bf13c3766139fbacc530efd0dd9d54394c /third_party/rust/cranelift-codegen/src/binemit
parent: Initial commit. (diff)
download: firefox-2aa4a82499d4becd2284cdb482213d541b8804dd.tar.xz
firefox-2aa4a82499d4becd2284cdb482213d541b8804dd.zip
5 files changed, 1129 insertions, 0 deletions
diff --git a/third_party/rust/cranelift-codegen/src/binemit/memorysink.rs b/third_party/rust/cranelift-codegen/src/binemit/memorysink.rs
new file mode 100644
index 0000000000..d50d1c10eb
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/binemit/memorysink.rs
@@ -0,0 +1,232 @@
+//! Code sink that writes binary machine code into contiguous memory.
+//!
+//! The `CodeSink` trait is the most general way of extracting binary machine code from Cranelift,
+//! and it is implemented by things like the `test binemit` file test driver to generate
+//! hexadecimal machine code. The `CodeSink` has some undesirable performance properties because of
+//! the dual abstraction: `TargetIsa` is a trait object implemented by each supported ISA, so it
+//! can't have any generic functions that could be specialized for each `CodeSink` implementation.
+//! This results in many virtual function callbacks (one per `put*` call) when
+//! `TargetIsa::emit_inst()` is used.
+//!
+//! The `MemoryCodeSink` type fixes the performance problem because it is a type known to
+//! `TargetIsa` so it can specialize its machine code generation for the type. The trade-off is
+//! that a `MemoryCodeSink` will always write binary machine code to raw memory. It forwards any
+//! relocations to a `RelocSink` trait object. Relocations are less frequent than the
+//! `CodeSink::put*` methods, so the performance impact of the virtual callbacks is less severe.
+use super::{Addend, CodeInfo, CodeOffset, CodeSink, Reloc};
+use crate::binemit::stack_map::StackMap;
+use crate::ir::entities::Value;
+use crate::ir::{ConstantOffset, ExternalName, Function, JumpTable, Opcode, SourceLoc, TrapCode};
+use crate::isa::TargetIsa;
+use core::ptr::write_unaligned;
+
+/// A `CodeSink` that writes binary machine code directly into memory.
+///
+/// A `MemoryCodeSink` object should be used when emitting a Cranelift IR function into executable
+/// memory. It writes machine code directly to a raw pointer without any bounds checking, so make
+/// sure to allocate enough memory for the whole function. The number of bytes required is returned
+/// by the `Context::compile()` function.
+///
+/// Any relocations in the function are forwarded to the `RelocSink` trait object.
+///
+/// Note that `MemoryCodeSink` writes multi-byte values in the native byte order of the host. This
+/// is not the right thing to do for cross compilation.
+pub struct MemoryCodeSink<'a> {
+    /// Pointer to start of sink's preallocated memory.
+    data: *mut u8,
+    /// Offset is isize because its major consumer needs it in that form.
+    offset: isize,
+    relocs: &'a mut dyn RelocSink,
+    traps: &'a mut dyn TrapSink,
+    stack_maps: &'a mut dyn StackMapSink,
+    /// Information about the generated code and read-only data.
+    pub info: CodeInfo,
+}
+
+impl<'a> MemoryCodeSink<'a> {
+    /// Create a new memory code sink that writes a function to the memory pointed to by `data`.
+    ///
+    /// # Safety
+    ///
+    /// This function is unsafe since `MemoryCodeSink` does not perform bounds checking on the
+    /// memory buffer, and it can't guarantee that the `data` pointer is valid.
+    pub unsafe fn new(
+        data: *mut u8,
+        relocs: &'a mut dyn RelocSink,
+        traps: &'a mut dyn TrapSink,
+        stack_maps: &'a mut dyn StackMapSink,
+    ) -> Self {
+        Self {
+            data,
+            offset: 0,
+            info: CodeInfo {
+                code_size: 0,
+                jumptables_size: 0,
+                rodata_size: 0,
+                total_size: 0,
+            },
+            relocs,
+            traps,
+            stack_maps,
+        }
+    }
+}
+
+/// A trait for receiving relocations for code that is emitted directly into memory.
+pub trait RelocSink {
+    /// Add a relocation referencing an external symbol at the current offset.
+    fn reloc_external(
+        &mut self,
+        _: CodeOffset,
+        _: SourceLoc,
+        _: Reloc,
+        _: &ExternalName,
+        _: Addend,
+    );
+
+    /// Add a relocation referencing a constant.
+    fn reloc_constant(&mut self, _: CodeOffset, _: Reloc, _: ConstantOffset);
+
+    /// Add a relocation referencing a jump table.
+    fn reloc_jt(&mut self, _: CodeOffset, _: Reloc, _: JumpTable);
+
+    /// Track a call site whose return address is the given CodeOffset, for the given opcode. Does
+    /// nothing in general, only useful for certain embedders (SpiderMonkey).
+    fn add_call_site(&mut self, _: Opcode, _: CodeOffset, _: SourceLoc) {}
+}
+
+/// A trait for receiving trap codes and offsets.
+///
+/// If you don't need information about possible traps, you can use the
+/// [`NullTrapSink`](NullTrapSink) implementation.
+pub trait TrapSink {
+    /// Add trap information for a specific offset.
+    fn trap(&mut self, _: CodeOffset, _: SourceLoc, _: TrapCode);
+}
+
+impl<'a> MemoryCodeSink<'a> {
+    fn write<T>(&mut self, x: T) {
+        unsafe {
+            #[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))]
+            write_unaligned(self.data.offset(self.offset) as *mut T, x);
+            self.offset += core::mem::size_of::<T>() as isize;
+        }
+    }
+}
+
+impl<'a> CodeSink for MemoryCodeSink<'a> {
+    fn offset(&self) -> CodeOffset {
+        self.offset as CodeOffset
+    }
+
+    fn put1(&mut self, x: u8) {
+        self.write(x);
+    }
+
+    fn put2(&mut self, x: u16) {
+        self.write(x);
+    }
+
+    fn put4(&mut self, x: u32) {
+        self.write(x);
+    }
+
+    fn put8(&mut self, x: u64) {
+        self.write(x);
+    }
+
+    fn reloc_external(
+        &mut self,
+        srcloc: SourceLoc,
+        rel: Reloc,
+        name: &ExternalName,
+        addend: Addend,
+    ) {
+        let ofs = self.offset();
+        self.relocs.reloc_external(ofs, srcloc, rel, name, addend);
+    }
+
+    fn reloc_constant(&mut self, rel: Reloc, constant_offset: ConstantOffset) {
+        let ofs = self.offset();
+        self.relocs.reloc_constant(ofs, rel, constant_offset);
+    }
+
+    fn reloc_jt(&mut self, rel: Reloc, jt: JumpTable) {
+        let ofs = self.offset();
+        self.relocs.reloc_jt(ofs, rel, jt);
+    }
+
+    fn trap(&mut self, code: TrapCode, srcloc: SourceLoc) {
+        let ofs = self.offset();
+        self.traps.trap(ofs, srcloc, code);
+    }
+
+    fn begin_jumptables(&mut self) {
+        self.info.code_size = self.offset();
+    }
+
+    fn begin_rodata(&mut self) {
+        self.info.jumptables_size = self.offset() - self.info.code_size;
+    }
+
+    fn end_codegen(&mut self) {
+        self.info.rodata_size = self.offset() - (self.info.jumptables_size + self.info.code_size);
+        self.info.total_size = self.offset();
+    }
+
+    fn add_stack_map(&mut self, val_list: &[Value], func: &Function, isa: &dyn TargetIsa) {
+        let ofs = self.offset();
+        let stack_map = StackMap::from_values(&val_list, func, isa);
+        self.stack_maps.add_stack_map(ofs, stack_map);
+    }
+
+    fn add_call_site(&mut self, opcode: Opcode, loc: SourceLoc) {
+        debug_assert!(
+            opcode.is_call(),
+            "adding call site info for a non-call instruction."
+        );
+        let ret_addr = self.offset();
+        self.relocs.add_call_site(opcode, ret_addr, loc);
+    }
+}
+
+/// A `RelocSink` implementation that does nothing, which is convenient when
+/// compiling code that does not relocate anything.
+#[derive(Default)]
+pub struct NullRelocSink {}
+
+impl RelocSink for NullRelocSink {
+    fn reloc_external(
+        &mut self,
+        _: CodeOffset,
+        _: SourceLoc,
+        _: Reloc,
+        _: &ExternalName,
+        _: Addend,
+    ) {
+    }
+    fn reloc_constant(&mut self, _: CodeOffset, _: Reloc, _: ConstantOffset) {}
+    fn reloc_jt(&mut self, _: CodeOffset, _: Reloc, _: JumpTable) {}
+}
+
+/// A `TrapSink` implementation that does nothing, which is convenient when
+/// compiling code that does not rely on trapping semantics.
+#[derive(Default)]
+pub struct NullTrapSink {}
+
+impl TrapSink for NullTrapSink {
+    fn trap(&mut self, _offset: CodeOffset, _srcloc: SourceLoc, _code: TrapCode) {}
+}
+
+/// A trait for emitting stack maps.
+pub trait StackMapSink {
+    /// Output a bitmap of the stack representing the live reference variables at this code offset.
+    fn add_stack_map(&mut self, _: CodeOffset, _: StackMap);
+}
+
+/// Placeholder StackMapSink that does nothing.
+pub struct NullStackMapSink {}
+
+impl StackMapSink for NullStackMapSink {
+    fn add_stack_map(&mut self, _: CodeOffset, _: StackMap) {}
+}
diff --git a/third_party/rust/cranelift-codegen/src/binemit/mod.rs b/third_party/rust/cranelift-codegen/src/binemit/mod.rs
new file mode 100644
index 0000000000..b534ec9765
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/binemit/mod.rs
@@ -0,0 +1,222 @@
+//! Binary machine code emission.
+//!
+//! The `binemit` module contains code for translating Cranelift's intermediate representation into
+//! binary machine code.
+
+mod memorysink;
+mod relaxation;
+mod shrink;
+mod stack_map;
+
+pub use self::memorysink::{
+    MemoryCodeSink, NullRelocSink, NullStackMapSink, NullTrapSink, RelocSink, StackMapSink,
+    TrapSink,
+};
+pub use self::relaxation::relax_branches;
+pub use self::shrink::shrink_instructions;
+pub use self::stack_map::StackMap;
+use crate::ir::entities::Value;
+use crate::ir::{
+    ConstantOffset, ExternalName, Function, Inst, JumpTable, Opcode, SourceLoc, TrapCode,
+};
+use crate::isa::TargetIsa;
+pub use crate::regalloc::RegDiversions;
+use core::fmt;
+#[cfg(feature = "enable-serde")]
+use serde::{Deserialize, Serialize};
+
+/// Offset in bytes from the beginning of the function.
+///
+/// Cranelift can be used as a cross compiler, so we don't want to use a type like `usize` which
+/// depends on the *host* platform, not the *target* platform.
+pub type CodeOffset = u32;
+
+/// Addend to add to the symbol value.
+pub type Addend = i64;
+
+/// Relocation kinds for every ISA
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
+pub enum Reloc {
+    /// absolute 4-byte
+    Abs4,
+    /// absolute 8-byte
+    Abs8,
+    /// x86 PC-relative 4-byte
+    X86PCRel4,
+    /// x86 PC-relative 4-byte offset to trailing rodata
+    X86PCRelRodata4,
+    /// x86 call to PC-relative 4-byte
+    X86CallPCRel4,
+    /// x86 call to PLT-relative 4-byte
+    X86CallPLTRel4,
+    /// x86 GOT PC-relative 4-byte
+    X86GOTPCRel4,
+    /// Arm32 call target
+    Arm32Call,
+    /// Arm64 call target. Encoded as bottom 26 bits of instruction. This
+    /// value is sign-extended, multiplied by 4, and added to the PC of
+    /// the call instruction to form the destination address.
+    Arm64Call,
+    /// RISC-V call target
+    RiscvCall,
+
+    /// Elf x86_64 32 bit signed PC relative offset to two GOT entries for GD symbol.
+    ElfX86_64TlsGd,
+
+    /// Mach-O x86_64 32 bit signed PC relative offset to a `__thread_vars` entry.
+    MachOX86_64Tlv,
+}
+
+impl fmt::Display for Reloc {
+    /// Display trait implementation drops the arch, since its used in contexts where the arch is
+    /// already unambiguous, e.g. clif syntax with isa specified. In other contexts, use Debug.
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match *self {
+            Self::Abs4 => write!(f, "Abs4"),
+            Self::Abs8 => write!(f, "Abs8"),
+            Self::X86PCRel4 => write!(f, "PCRel4"),
+            Self::X86PCRelRodata4 => write!(f, "PCRelRodata4"),
+            Self::X86CallPCRel4 => write!(f, "CallPCRel4"),
+            Self::X86CallPLTRel4 => write!(f, "CallPLTRel4"),
+            Self::X86GOTPCRel4 => write!(f, "GOTPCRel4"),
+            Self::Arm32Call | Self::Arm64Call | Self::RiscvCall => write!(f, "Call"),
+
+            Self::ElfX86_64TlsGd => write!(f, "ElfX86_64TlsGd"),
+            Self::MachOX86_64Tlv => write!(f, "MachOX86_64Tlv"),
+        }
+    }
+}
+
+/// Container for information about a vector of compiled code and its supporting read-only data.
+///
+/// The code starts at offset 0 and is followed optionally by relocatable jump tables and copyable
+/// (raw binary) read-only data.  Any padding between sections is always part of the section that
+/// precedes the boundary between the sections.
+#[derive(PartialEq)]
+pub struct CodeInfo {
+    /// Number of bytes of machine code (the code starts at offset 0).
+    pub code_size: CodeOffset,
+
+    /// Number of bytes of jumptables.
+    pub jumptables_size: CodeOffset,
+
+    /// Number of bytes of rodata.
+    pub rodata_size: CodeOffset,
+
+    /// Number of bytes in total.
+    pub total_size: CodeOffset,
+}
+
+impl CodeInfo {
+    /// Offset of any relocatable jump tables, or equal to rodata if there are no jump tables.
+    pub fn jumptables(&self) -> CodeOffset {
+        self.code_size
+    }
+
+    /// Offset of any copyable read-only data, or equal to total_size if there are no rodata.
+    pub fn rodata(&self) -> CodeOffset {
+        self.code_size + self.jumptables_size
+    }
+}
+
+/// Abstract interface for adding bytes to the code segment.
+///
+/// A `CodeSink` will receive all of the machine code for a function. It also accepts relocations
+/// which are locations in the code section that need to be fixed up when linking.
+pub trait CodeSink {
+    /// Get the current position.
+    fn offset(&self) -> CodeOffset;
+
+    /// Add 1 byte to the code section.
+    fn put1(&mut self, _: u8);
+
+    /// Add 2 bytes to the code section.
+    fn put2(&mut self, _: u16);
+
+    /// Add 4 bytes to the code section.
+    fn put4(&mut self, _: u32);
+
+    /// Add 8 bytes to the code section.
+    fn put8(&mut self, _: u64);
+
+    /// Add a relocation referencing an external symbol plus the addend at the current offset.
+    fn reloc_external(&mut self, _: SourceLoc, _: Reloc, _: &ExternalName, _: Addend);
+
+    /// Add a relocation referencing a constant.
+    fn reloc_constant(&mut self, _: Reloc, _: ConstantOffset);
+
+    /// Add a relocation referencing a jump table.
+    fn reloc_jt(&mut self, _: Reloc, _: JumpTable);
+
+    /// Add trap information for the current offset.
+    fn trap(&mut self, _: TrapCode, _: SourceLoc);
+
+    /// Machine code output is complete, jump table data may follow.
+    fn begin_jumptables(&mut self);
+
+    /// Jump table output is complete, raw read-only data may follow.
+    fn begin_rodata(&mut self);
+
+    /// Read-only data output is complete, we're done.
+    fn end_codegen(&mut self);
+
+    /// Add a stack map at the current code offset.
+    fn add_stack_map(&mut self, _: &[Value], _: &Function, _: &dyn TargetIsa);
+
+    /// Add a call site for a call with the given opcode, returning at the current offset.
+    fn add_call_site(&mut self, _: Opcode, _: SourceLoc) {
+        // Default implementation doesn't need to do anything.
+    }
+}
+
+/// Report a bad encoding error.
+#[cold]
+pub fn bad_encoding(func: &Function, inst: Inst) -> ! {
+    panic!(
+        "Bad encoding {} for {}",
+        func.encodings[inst],
+        func.dfg.display_inst(inst, None)
+    );
+}
+
+/// Emit a function to `sink`, given an instruction emitter function.
+///
+/// This function is called from the `TargetIsa::emit_function()` implementations with the
+/// appropriate instruction emitter.
+pub fn emit_function<CS, EI>(func: &Function, emit_inst: EI, sink: &mut CS, isa: &dyn TargetIsa)
+where
+    CS: CodeSink,
+    EI: Fn(&Function, Inst, &mut RegDiversions, &mut CS, &dyn TargetIsa),
+{
+    let mut divert = RegDiversions::new();
+    for block in func.layout.blocks() {
+        divert.at_block(&func.entry_diversions, block);
+        debug_assert_eq!(func.offsets[block], sink.offset());
+        for inst in func.layout.block_insts(block) {
+            emit_inst(func, inst, &mut divert, sink, isa);
+        }
+    }
+
+    sink.begin_jumptables();
+
+    // Output jump tables.
+    for (jt, jt_data) in func.jump_tables.iter() {
+        let jt_offset = func.jt_offsets[jt];
+        for block in jt_data.iter() {
+            let rel_offset: i32 = func.offsets[*block] as i32 - jt_offset as i32;
+            sink.put4(rel_offset as u32)
+        }
+    }
+
+    sink.begin_rodata();
+
+    // Output constants.
+    for (_, constant_data) in func.dfg.constants.iter() {
+        for byte in constant_data.iter() {
+            sink.put1(*byte)
+        }
+    }
+
+    sink.end_codegen();
+}
diff --git a/third_party/rust/cranelift-codegen/src/binemit/relaxation.rs b/third_party/rust/cranelift-codegen/src/binemit/relaxation.rs
new file mode 100644
index 0000000000..142e400985
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/binemit/relaxation.rs
@@ -0,0 +1,397 @@
+//! Branch relaxation and offset computation.
+//!
+//! # block header offsets
+//!
+//! Before we can generate binary machine code for branch instructions, we need to know the final
+//! offsets of all the block headers in the function. This information is encoded in the
+//! `func.offsets` table.
+//!
+//! # Branch relaxation
+//!
+//! Branch relaxation is the process of ensuring that all branches in the function have enough
+//! range to encode their destination. It is common to have multiple branch encodings in an ISA.
+//! For example, x86 branches can have either an 8-bit or a 32-bit displacement.
+//!
+//! On RISC architectures, it can happen that conditional branches have a shorter range than
+//! unconditional branches:
+//!
+//! ```clif
+//!     brz v1, block17
+//! ```
+//!
+//! can be transformed into:
+//!
+//! ```clif
+//!     brnz v1, block23
+//!     jump block17
+//! block23:
+//! ```
+
+use crate::binemit::{CodeInfo, CodeOffset};
+use crate::cursor::{Cursor, FuncCursor};
+use crate::dominator_tree::DominatorTree;
+use crate::flowgraph::ControlFlowGraph;
+use crate::ir::{Block, Function, Inst, InstructionData, Opcode, Value, ValueList};
+use crate::isa::{EncInfo, TargetIsa};
+use crate::iterators::IteratorExtras;
+use crate::regalloc::RegDiversions;
+use crate::timing;
+use crate::CodegenResult;
+use core::convert::TryFrom;
+use log::debug;
+
+/// Relax branches and compute the final layout of block headers in `func`.
+///
+/// Fill in the `func.offsets` table so the function is ready for binary emission.
+pub fn relax_branches(
+    func: &mut Function,
+    _cfg: &mut ControlFlowGraph,
+    _domtree: &mut DominatorTree,
+    isa: &dyn TargetIsa,
+) -> CodegenResult<CodeInfo> {
+    let _tt = timing::relax_branches();
+
+    let encinfo = isa.encoding_info();
+
+    // Clear all offsets so we can recognize blocks that haven't been visited yet.
+    func.offsets.clear();
+    func.offsets.resize(func.dfg.num_blocks());
+
+    // Start by removing redundant jumps.
+    fold_redundant_jumps(func, _cfg, _domtree);
+
+    // Convert jumps to fallthrough instructions where possible.
+    fallthroughs(func);
+
+    let mut offset = 0;
+    let mut divert = RegDiversions::new();
+
+    // First, compute initial offsets for every block.
+    {
+        let mut cur = FuncCursor::new(func);
+        while let Some(block) = cur.next_block() {
+            divert.at_block(&cur.func.entry_diversions, block);
+            cur.func.offsets[block] = offset;
+            while let Some(inst) = cur.next_inst() {
+                divert.apply(&cur.func.dfg[inst]);
+                let enc = cur.func.encodings[inst];
+                offset += encinfo.byte_size(enc, inst, &divert, &cur.func);
+            }
+        }
+    }
+
+    // Then, run the relaxation algorithm until it converges.
+    let mut go_again = true;
+    while go_again {
+        go_again = false;
+        offset = 0;
+
+        // Visit all instructions in layout order.
+        let mut cur = FuncCursor::new(func);
+        while let Some(block) = cur.next_block() {
+            divert.at_block(&cur.func.entry_diversions, block);
+
+            // Record the offset for `block` and make sure we iterate until offsets are stable.
+            if cur.func.offsets[block] != offset {
+                cur.func.offsets[block] = offset;
+                go_again = true;
+            }
+
+            while let Some(inst) = cur.next_inst() {
+                divert.apply(&cur.func.dfg[inst]);
+
+                let enc = cur.func.encodings[inst];
+
+                // See if this is a branch has a range and a destination, and if the target is in
+                // range.
+                if let Some(range) = encinfo.branch_range(enc) {
+                    if let Some(dest) = cur.func.dfg[inst].branch_destination() {
+                        let dest_offset = cur.func.offsets[dest];
+                        if !range.contains(offset, dest_offset) {
+                            offset +=
+                                relax_branch(&mut cur, &divert, offset, dest_offset, &encinfo, isa);
+                            continue;
+                        }
+                    }
+                }
+
+                offset += encinfo.byte_size(enc, inst, &divert, &cur.func);
+            }
+        }
+    }
+
+    let code_size = offset;
+    let jumptables = offset;
+
+    for (jt, jt_data) in func.jump_tables.iter() {
+        func.jt_offsets[jt] = offset;
+        // TODO: this should be computed based on the min size needed to hold the furthest branch.
+        offset += jt_data.len() as u32 * 4;
+    }
+
+    let jumptables_size = offset - jumptables;
+    let rodata = offset;
+
+    for constant in func.dfg.constants.entries_mut() {
+        constant.set_offset(offset);
+        offset +=
+            u32::try_from(constant.len()).expect("Constants must have a length that fits in a u32")
+    }
+
+    let rodata_size = offset - rodata;
+
+    Ok(CodeInfo {
+        code_size,
+        jumptables_size,
+        rodata_size,
+        total_size: offset,
+    })
+}
+
+/// Folds an instruction if it is a redundant jump.
+/// Returns whether folding was performed (which invalidates the CFG).
+fn try_fold_redundant_jump(
+    func: &mut Function,
+    cfg: &mut ControlFlowGraph,
+    block: Block,
+    first_inst: Inst,
+) -> bool {
+    let first_dest = match func.dfg[first_inst].branch_destination() {
+        Some(block) => block, // The instruction was a single-target branch.
+        None => {
+            return false; // The instruction was either multi-target or not a branch.
+        }
+    };
+
+    // For the moment, only attempt to fold a branch to a block that is parameterless.
+    // These blocks are mainly produced by critical edge splitting.
+    //
+    // TODO: Allow folding blocks that define SSA values and function as phi nodes.
+    if func.dfg.num_block_params(first_dest) != 0 {
+        return false;
+    }
+
+    // Look at the first instruction of the first branch's destination.
+    // If it is an unconditional branch, maybe the second jump can be bypassed.
+    let second_inst = func.layout.first_inst(first_dest).expect("Instructions");
+    if func.dfg[second_inst].opcode() != Opcode::Jump {
+        return false;
+    }
+
+    // Now we need to fix up first_inst's block parameters to match second_inst's,
+    // without changing the branch-specific arguments.
+    //
+    // The intermediary block is allowed to reference any SSA value that dominates it,
+    // but that SSA value may not necessarily also dominate the instruction that's
+    // being patched.
+
+    // Get the arguments and parameters passed by the first branch.
+    let num_fixed = func.dfg[first_inst]
+        .opcode()
+        .constraints()
+        .num_fixed_value_arguments();
+    let (first_args, first_params) = func.dfg[first_inst]
+        .arguments(&func.dfg.value_lists)
+        .split_at(num_fixed);
+
+    // Get the parameters passed by the second jump.
+    let num_fixed = func.dfg[second_inst]
+        .opcode()
+        .constraints()
+        .num_fixed_value_arguments();
+    let (_, second_params) = func.dfg[second_inst]
+        .arguments(&func.dfg.value_lists)
+        .split_at(num_fixed);
+    let mut second_params = second_params.to_vec(); // Clone for rewriting below.
+
+    // For each parameter passed by the second jump, if any of those parameters
+    // was a block parameter, rewrite it to refer to the value that the first jump
+    // passed in its parameters. Otherwise, make sure it dominates first_inst.
+    //
+    // For example: if we `block0: jump block1(v1)` to `block1(v2): jump block2(v2)`,
+    // we want to rewrite the original jump to `jump block2(v1)`.
+    let block_params: &[Value] = func.dfg.block_params(first_dest);
+    debug_assert!(block_params.len() == first_params.len());
+
+    for value in second_params.iter_mut() {
+        if let Some((n, _)) = block_params.iter().enumerate().find(|(_, &p)| p == *value) {
+            // This value was the Nth parameter passed to the second_inst's block.
+            // Rewrite it as the Nth parameter passed by first_inst.
+            *value = first_params[n];
+        }
+    }
+
+    // Build a value list of first_args (unchanged) followed by second_params (rewritten).
+    let arguments_vec: alloc::vec::Vec<_> = first_args
+        .iter()
+        .chain(second_params.iter())
+        .copied()
+        .collect();
+    let value_list = ValueList::from_slice(&arguments_vec, &mut func.dfg.value_lists);
+
+    func.dfg[first_inst].take_value_list(); // Drop the current list.
+    func.dfg[first_inst].put_value_list(value_list); // Put the new list.
+
+    // Bypass the second jump.
+    // This can disconnect the Block containing `second_inst`, to be cleaned up later.
+    let second_dest = func.dfg[second_inst].branch_destination().expect("Dest");
+    func.change_branch_destination(first_inst, second_dest);
+    cfg.recompute_block(func, block);
+
+    // The previously-intermediary Block may now be unreachable. Update CFG.
+    if cfg.pred_iter(first_dest).count() == 0 {
+        // Remove all instructions from that block.
+        while let Some(inst) = func.layout.first_inst(first_dest) {
+            func.layout.remove_inst(inst);
+        }
+
+        // Remove the block...
+        cfg.recompute_block(func, first_dest); // ...from predecessor lists.
+        func.layout.remove_block(first_dest); // ...from the layout.
+    }
+
+    true
+}
+
+/// Redirects `jump` instructions that point to other `jump` instructions to the final destination.
+/// This transformation may orphan some blocks.
+fn fold_redundant_jumps(
+    func: &mut Function,
+    cfg: &mut ControlFlowGraph,
+    domtree: &mut DominatorTree,
+) {
+    let mut folded = false;
+
+    // Postorder iteration guarantees that a chain of jumps is visited from
+    // the end of the chain to the start of the chain.
+    for &block in domtree.cfg_postorder() {
+        // Only proceed if the first terminator instruction is a single-target branch.
+        let first_inst = func
+            .layout
+            .last_inst(block)
+            .expect("Block has no terminator");
+        folded |= try_fold_redundant_jump(func, cfg, block, first_inst);
+
+        // Also try the previous instruction.
+        if let Some(prev_inst) = func.layout.prev_inst(first_inst) {
+            folded |= try_fold_redundant_jump(func, cfg, block, prev_inst);
+        }
+    }
+
+    // Folding jumps invalidates the dominator tree.
+    if folded {
+        domtree.compute(func, cfg);
+    }
+}
+
+/// Convert `jump` instructions to `fallthrough` instructions where possible and verify that any
+/// existing `fallthrough` instructions are correct.
+fn fallthroughs(func: &mut Function) {
+    for (block, succ) in func.layout.blocks().adjacent_pairs() {
+        let term = func
+            .layout
+            .last_inst(block)
+            .expect("block has no terminator.");
+        if let InstructionData::Jump {
+            ref mut opcode,
+            destination,
+            ..
+        } = func.dfg[term]
+        {
+            match *opcode {
+                Opcode::Fallthrough => {
+                    // Somebody used a fall-through instruction before the branch relaxation pass.
+                    // Make sure it is correct, i.e. the destination is the layout successor.
+                    debug_assert_eq!(
+                        destination, succ,
+                        "Illegal fallthrough from {} to {}, but {}'s successor is {}",
+                        block, destination, block, succ
+                    )
+                }
+                Opcode::Jump => {
+                    // If this is a jump to the successor block, change it to a fall-through.
+                    if destination == succ {
+                        *opcode = Opcode::Fallthrough;
+                        func.encodings[term] = Default::default();
+                    }
+                }
+                _ => {}
+            }
+        }
+    }
+}
+
+/// Relax the branch instruction at `cur` so it can cover the range `offset - dest_offset`.
+///
+/// Return the size of the replacement instructions up to and including the location where `cur` is
+/// left.
+fn relax_branch(
+    cur: &mut FuncCursor,
+    divert: &RegDiversions,
+    offset: CodeOffset,
+    dest_offset: CodeOffset,
+    encinfo: &EncInfo,
+    isa: &dyn TargetIsa,
+) -> CodeOffset {
+    let inst = cur.current_inst().unwrap();
+    debug!(
+        "Relaxing [{}] {} for {:#x}-{:#x} range",
+        encinfo.display(cur.func.encodings[inst]),
+        cur.func.dfg.display_inst(inst, isa),
+        offset,
+        dest_offset
+    );
+
+    // Pick the smallest encoding that can handle the branch range.
+    let dfg = &cur.func.dfg;
+    let ctrl_type = dfg.ctrl_typevar(inst);
+    if let Some(enc) = isa
+        .legal_encodings(cur.func, &dfg[inst], ctrl_type)
+        .filter(|&enc| {
+            let range = encinfo.branch_range(enc).expect("Branch with no range");
+            if !range.contains(offset, dest_offset) {
+                debug!("  trying [{}]: out of range", encinfo.display(enc));
+                false
+            } else if encinfo.operand_constraints(enc)
+                != encinfo.operand_constraints(cur.func.encodings[inst])
+            {
+                // Conservatively give up if the encoding has different constraints
+                // than the original, so that we don't risk picking a new encoding
+                // which the existing operands don't satisfy. We can't check for
+                // validity directly because we don't have a RegDiversions active so
+                // we don't know which registers are actually in use.
+                debug!("  trying [{}]: constraints differ", encinfo.display(enc));
+                false
+            } else {
+                debug!("  trying [{}]: OK", encinfo.display(enc));
+                true
+            }
+        })
+        .min_by_key(|&enc| encinfo.byte_size(enc, inst, &divert, &cur.func))
+    {
+        debug_assert!(enc != cur.func.encodings[inst]);
+        cur.func.encodings[inst] = enc;
+        return encinfo.byte_size(enc, inst, &divert, &cur.func);
+    }
+
+    // Note: On some RISC ISAs, conditional branches have shorter range than unconditional
+    // branches, so one way of extending the range of a conditional branch is to invert its
+    // condition and make it branch over an unconditional jump which has the larger range.
+    //
+    // Splitting the block is problematic this late because there may be register diversions in
+    // effect across the conditional branch, and they can't survive the control flow edge to a new
+    // block. We have two options for handling that:
+    //
+    // 1. Set a flag on the new block that indicates it wants the preserve the register diversions of
+    //    its layout predecessor, or
+    // 2. Use an encoding macro for the branch-over-jump pattern so we don't need to split the block.
+    //
+    // It seems that 1. would allow us to share code among RISC ISAs that need this.
+    //
+    // We can't allow register diversions to survive from the layout predecessor because the layout
+    // predecessor could contain kill points for some values that are live in this block, and
+    // diversions are not automatically cancelled when the live range of a value ends.
+
+    // This assumes solution 2. above:
+    panic!("No branch in range for {:#x}-{:#x}", offset, dest_offset);
+}
diff --git a/third_party/rust/cranelift-codegen/src/binemit/shrink.rs b/third_party/rust/cranelift-codegen/src/binemit/shrink.rs
new file mode 100644
index 0000000000..f6fa43e062
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/binemit/shrink.rs
@@ -0,0 +1,73 @@
+//! Instruction shrinking.
+//!
+//! Sometimes there are multiple valid encodings for a given instruction. Cranelift often initially
+//! chooses the largest one, because this typically provides the register allocator the most
+//! flexibility. However, once register allocation is done, this is no longer important, and we
+//! can switch to smaller encodings when possible.
+
+use crate::ir::instructions::InstructionData;
+use crate::ir::Function;
+use crate::isa::TargetIsa;
+use crate::regalloc::RegDiversions;
+use crate::timing;
+use log::debug;
+
+/// Pick the smallest valid encodings for instructions.
+pub fn shrink_instructions(func: &mut Function, isa: &dyn TargetIsa) {
+    let _tt = timing::shrink_instructions();
+
+    let encinfo = isa.encoding_info();
+    let mut divert = RegDiversions::new();
+
+    for block in func.layout.blocks() {
+        // Load diversions from predecessors.
+        divert.at_block(&func.entry_diversions, block);
+
+        for inst in func.layout.block_insts(block) {
+            let enc = func.encodings[inst];
+            if enc.is_legal() {
+                // regmove/regfill/regspill are special instructions with register immediates
+                // that represented as normal operands, so the normal predicates below don't
+                // handle them correctly.
+                //
+                // Also, they need to be presented to the `RegDiversions` to update the
+                // location tracking.
+                //
+                // TODO: Eventually, we want the register allocator to avoid leaving these special
+                // instructions behind, but for now, just temporarily avoid trying to shrink them.
+                let inst_data = &func.dfg[inst];
+                match inst_data {
+                    InstructionData::RegMove { .. }
+                    | InstructionData::RegFill { .. }
+                    | InstructionData::RegSpill { .. } => {
+                        divert.apply(inst_data);
+                        continue;
+                    }
+                    _ => (),
+                }
+
+                let ctrl_type = func.dfg.ctrl_typevar(inst);
+
+                // Pick the last encoding with constraints that are satisfied.
+                let best_enc = isa
+                    .legal_encodings(func, &func.dfg[inst], ctrl_type)
+                    .filter(|e| encinfo.constraints[e.recipe()].satisfied(inst, &divert, &func))
+                    .min_by_key(|e| encinfo.byte_size(*e, inst, &divert, &func))
+                    .unwrap();
+
+                if best_enc != enc {
+                    func.encodings[inst] = best_enc;
+
+                    debug!(
+                        "Shrunk [{}] to [{}] in {}, reducing the size from {} to {}",
+                        encinfo.display(enc),
+                        encinfo.display(best_enc),
+                        func.dfg.display_inst(inst, isa),
+                        encinfo.byte_size(enc, inst, &divert, &func),
+                        encinfo.byte_size(best_enc, inst, &divert, &func)
+                    );
+                }
+            }
+        }
+    }
+}
diff --git a/third_party/rust/cranelift-codegen/src/binemit/stack_map.rs b/third_party/rust/cranelift-codegen/src/binemit/stack_map.rs
new file mode 100644
index 0000000000..c3055a0154
--- /dev/null
+++ b/third_party/rust/cranelift-codegen/src/binemit/stack_map.rs
@@ -0,0 +1,205 @@
+use crate::bitset::BitSet;
+use crate::ir;
+use crate::isa::TargetIsa;
+use alloc::vec::Vec;
+
+type Num = u32;
+const NUM_BITS: usize = core::mem::size_of::<Num>() * 8;
+
+/// Stack maps record which words in a stack frame contain live GC references at
+/// a given instruction pointer.
+///
+/// Logically, a set of stack maps for a function record a table of the form:
+///
+/// ```text
+/// +---------------------+-------------------------------------------+
+/// | Instruction Pointer | SP-Relative Offsets of Live GC References |
+/// +---------------------+-------------------------------------------+
+/// | 0x12345678          | 2, 6, 12                                  |
+/// | 0x1234abcd          | 2, 6                                      |
+/// | ...                 | ...                                       |
+/// +---------------------+-------------------------------------------+
+/// ```
+///
+/// Where "instruction pointer" is an instruction pointer within the function,
+/// and "offsets of live GC references" contains the offsets (in units of words)
+/// from the frame's stack pointer where live GC references are stored on the
+/// stack. Instruction pointers within the function that do not have an entry in
+/// this table are not GC safepoints.
+///
+/// Because
+///
+/// * offsets of live GC references are relative from the stack pointer, and
+/// * stack frames grow down from higher addresses to lower addresses,
+///
+/// to get a pointer to a live reference at offset `x` within a stack frame, you
+/// add `x` from the frame's stack pointer.
+///
+/// For example, to calculate the pointer to the live GC reference inside "frame
+/// 1" below, you would do `frame_1_sp + x`:
+///
+/// ```text
+///           Stack
+///         +-------------------+
+///         | Frame 0           |
+///         |                   |
+///    |    |                   |
+///    |    +-------------------+ <--- Frame 0's SP
+///    |    | Frame 1           |
+///  Grows  |                   |
+///  down   |                   |
+///    |    | Live GC reference | --+--
+///    |    |                   |   |
+///    |    |                   |   |
+///    V    |                   |   x = offset of live GC reference
+///         |                   |   |
+///         |                   |   |
+///         +-------------------+ --+--  <--- Frame 1's SP
+///         | Frame 2           |
+///         | ...               |
+/// ```
+///
+/// An individual `StackMap` is associated with just one instruction pointer
+/// within the function, contains the size of the stack frame, and represents
+/// the stack frame as a bitmap. There is one bit per word in the stack frame,
+/// and if the bit is set, then the word contains a live GC reference.
+///
+/// Note that a caller's `OutgoingArg` stack slots and callee's `IncomingArg`
+/// stack slots overlap, so we must choose which function's stack maps record
+/// live GC references in these slots. We record the `IncomingArg`s in the
+/// callee's stack map.
+#[derive(Clone, Debug, PartialEq, Eq)]
+#[cfg_attr(feature = "enable-serde", derive(serde::Deserialize, serde::Serialize))]
+pub struct StackMap {
+    bitmap: Vec<BitSet<Num>>,
+    mapped_words: u32,
+}
+
+impl StackMap {
+    /// Create a `StackMap` based on where references are located on a
+    /// function's stack.
+    pub fn from_values(
+        args: &[ir::entities::Value],
+        func: &ir::Function,
+        isa: &dyn TargetIsa,
+    ) -> Self {
+        let loc = &func.locations;
+        let mut live_ref_in_stack_slot = crate::HashSet::new();
+        // References can be in registers, and live registers values are pushed onto the stack before calls and traps.
+        // TODO: Implement register maps. If a register containing a reference is spilled and reused after a safepoint,
+        // it could contain a stale reference value if the garbage collector relocated the value.
+        for val in args {
+            if let Some(value_loc) = loc.get(*val) {
+                match *value_loc {
+                    ir::ValueLoc::Stack(stack_slot) => {
+                        live_ref_in_stack_slot.insert(stack_slot);
+                    }
+                    _ => {}
+                }
+            }
+        }
+
+        let stack = &func.stack_slots;
+        let info = func.stack_slots.layout_info.unwrap();
+
+        // Refer to the doc comment for `StackMap` above to understand the
+        // bitmap representation used here.
+        let map_size = (info.frame_size + info.inbound_args_size) as usize;
+        let word_size = isa.pointer_bytes() as usize;
+        let num_words = map_size / word_size;
+
+        let mut vec = alloc::vec::Vec::with_capacity(num_words);
+        vec.resize(num_words, false);
+
+        for (ss, ssd) in stack.iter() {
+            if !live_ref_in_stack_slot.contains(&ss)
+                || ssd.kind == ir::stackslot::StackSlotKind::OutgoingArg
+            {
+                continue;
+            }
+
+            debug_assert!(ssd.size as usize == word_size);
+            let bytes_from_bottom = info.frame_size as i32 + ssd.offset.unwrap();
+            let words_from_bottom = (bytes_from_bottom as usize) / word_size;
+            vec[words_from_bottom] = true;
+        }
+
+        Self::from_slice(&vec)
+    }
+
+    /// Create a vec of Bitsets from a slice of bools.
+    pub fn from_slice(vec: &[bool]) -> Self {
+        let len = vec.len();
+        let num_word = len / NUM_BITS + (len % NUM_BITS != 0) as usize;
+        let mut bitmap = Vec::with_capacity(num_word);
+
+        for segment in vec.chunks(NUM_BITS) {
+            let mut curr_word = 0;
+            for (i, set) in segment.iter().enumerate() {
+                if *set {
+                    curr_word |= 1 << i;
+                }
+            }
+            bitmap.push(BitSet(curr_word));
+        }
+        Self {
+            mapped_words: len as u32,
+            bitmap,
+        }
+    }
+
+    /// Returns a specified bit.
+    pub fn get_bit(&self, bit_index: usize) -> bool {
+        assert!(bit_index < NUM_BITS * self.bitmap.len());
+        let word_index = bit_index / NUM_BITS;
+        let word_offset = (bit_index % NUM_BITS) as u8;
+        self.bitmap[word_index].contains(word_offset)
+    }
+
+    /// Returns the raw bitmap that represents this stack map.
+    pub fn as_slice(&self) -> &[BitSet<u32>] {
+        &self.bitmap
+    }
+
+    /// Returns the number of words represented by this stack map.
+    pub fn mapped_words(&self) -> u32 {
+        self.mapped_words
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn stack_maps() {
+        let vec: Vec<bool> = Vec::new();
+        assert!(StackMap::from_slice(&vec).bitmap.is_empty());
+
+        let mut vec: [bool; NUM_BITS] = Default::default();
+        let set_true_idx = [5, 7, 24, 31];
+
+        for &idx in &set_true_idx {
+            vec[idx] = true;
+        }
+
+        let mut vec = vec.to_vec();
+        assert_eq!(
+            vec![BitSet::<Num>(2164261024)],
+            StackMap::from_slice(&vec).bitmap
+        );
+
+        vec.push(false);
+        vec.push(true);
+        let res = StackMap::from_slice(&vec);
+        assert_eq!(
+            vec![BitSet::<Num>(2164261024), BitSet::<Num>(2)],
+            res.bitmap
+        );
+
+        assert!(res.get_bit(5));
+        assert!(res.get_bit(31));
+        assert!(res.get_bit(33));
+        assert!(!res.get_bit(1));
+    }
+}
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-28 14:29:10 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-28 14:29:10 +0000
commit	2aa4a82499d4becd2284cdb482213d541b8804dd (patch)
tree	b80bf8bf13c3766139fbacc530efd0dd9d54394c /third_party/rust/cranelift-codegen/src/binemit
parent	Initial commit. (diff)
download	firefox-2aa4a82499d4becd2284cdb482213d541b8804dd.tar.xz firefox-2aa4a82499d4becd2284cdb482213d541b8804dd.zip