#![allow(non_snake_case)] #![allow(non_camel_case_types)] //! Core implementation of the backtracking allocator. use log::{debug, info, log_enabled, Level}; use smallvec::SmallVec; use std::default; use std::fmt; use crate::analysis_data_flow::{add_raw_reg_vecs_for_insn, does_inst_use_def_or_mod_reg}; use crate::analysis_main::{run_analysis, AnalysisInfo}; use crate::avl_tree::{AVLTree, AVL_NULL}; use crate::bt_coalescing_analysis::{do_coalescing_analysis, Hint}; use crate::bt_commitment_map::{CommitmentMap, RangeFragAndRangeId}; use crate::bt_spillslot_allocator::SpillSlotAllocator; use crate::bt_vlr_priority_queue::VirtualRangePrioQ; use crate::data_structures::{ BlockIx, InstIx, InstPoint, Map, Point, RangeFrag, RangeFragIx, RangeId, RealRange, RealRangeIx, RealReg, RealRegUniverse, Reg, RegClass, RegVecBounds, RegVecs, RegVecsAndBounds, Set, SortedRangeFrags, SpillCost, SpillSlot, TypedIxVec, VirtualRange, VirtualRangeIx, VirtualReg, Writable, }; use crate::inst_stream::{ edit_inst_stream, ExtPoint, InstExtPoint, InstToInsert, InstToInsertAndExtPoint, }; use crate::sparse_set::SparseSetU; use crate::union_find::UnionFindEquivClasses; use crate::{AlgorithmWithDefaults, Function, RegAllocError, RegAllocResult, StackmapRequestInfo}; #[derive(Clone)] pub struct BacktrackingOptions { /// Should the register allocator generate block annotations? pub request_block_annotations: bool, } impl default::Default for BacktrackingOptions { fn default() -> Self { Self { request_block_annotations: false, } } } impl fmt::Debug for BacktrackingOptions { fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { write!( fmt, "backtracking (block annotations: {})", self.request_block_annotations ) } } //============================================================================= // The per-real-register state // // Relevant methods are expected to be parameterised by the same VirtualRange // env as used in calls to `VirtualRangePrioQ`. struct PerRealReg { // The current committed fragments for this RealReg. committed: CommitmentMap, // The set of VirtualRanges which have been assigned to this RealReg. The // union of their frags will be equal to `committed` only if this RealReg // has no RealRanges. If this RealReg does have RealRanges the // aforementioned union will be exactly the subset of `committed` not used // by the RealRanges. vlrixs_assigned: Set, } impl PerRealReg { fn new() -> Self { Self { committed: CommitmentMap::new(), vlrixs_assigned: Set::::empty(), } } #[inline(never)] fn add_RealRange( &mut self, to_add_rlrix: RealRangeIx, rlr_env: &TypedIxVec, frag_env: &TypedIxVec, ) { // Commit this register to `to_add`, irrevocably. Don't add it to `vlrixs_assigned` // since we will never want to later evict the assignment. (Also, from a types point of // view that would be impossible.) let to_add_rlr = &rlr_env[to_add_rlrix]; self.committed.add_indirect( &to_add_rlr.sorted_frags, RangeId::new_real(to_add_rlrix), frag_env, ); } #[inline(never)] fn add_VirtualRange( &mut self, to_add_vlrix: VirtualRangeIx, vlr_env: &TypedIxVec, ) { let to_add_vlr = &vlr_env[to_add_vlrix]; self.committed .add(&to_add_vlr.sorted_frags, RangeId::new_virtual(to_add_vlrix)); assert!(!self.vlrixs_assigned.contains(to_add_vlrix)); self.vlrixs_assigned.insert(to_add_vlrix); } #[inline(never)] fn del_VirtualRange( &mut self, to_del_vlrix: VirtualRangeIx, vlr_env: &TypedIxVec, ) { // Remove it from `vlrixs_assigned` // FIXME 2020June18: we could do this more efficiently by inspecting // the return value from `delete`. if self.vlrixs_assigned.contains(to_del_vlrix) { self.vlrixs_assigned.delete(to_del_vlrix); } else { panic!("PerRealReg: del_VirtualRange on VR not in vlrixs_assigned"); } // Remove it from `committed` let to_del_vlr = &vlr_env[to_del_vlrix]; self.committed.del(&to_del_vlr.sorted_frags); } } // HELPER FUNCTION // For a given `RangeFrag`, traverse the commitment tree rooted at `root`, // adding to `running_set` the set of VLRIxs that the frag intersects, and // adding their spill costs to `running_cost`. Return false if, for one of // the 4 reasons documented below, the traversal has been abandoned, and true // if the search completed successfully. fn search_commitment_tree( // The running state, threaded through the tree traversal. These // accumulate ranges and costs as we traverse the tree. These are mutable // because our caller (`find_evict_set`) will want to try and allocate // multiple `RangeFrag`s in this tree, not just a single one, and so it // needs a way to accumulate the total evict-cost and evict-set for all // the `RangeFrag`s it iterates over. running_set: &mut SparseSetU<[VirtualRangeIx; 4]>, running_cost: &mut SpillCost, // The tree to search. tree: &AVLTree, // The RangeFrag we want to accommodate. pair_frag: &RangeFrag, spill_cost_budget: &SpillCost, allowed_to_evict: &IsAllowedToEvict, vlr_env: &TypedIxVec, ) -> bool where IsAllowedToEvict: Fn(VirtualRangeIx) -> bool, { let mut stack = SmallVec::<[u32; 32]>::new(); assert!(tree.root != AVL_NULL); stack.push(tree.root); while let Some(curr) = stack.pop() { let curr_node = &tree.pool[curr as usize]; let curr_node_item = &curr_node.item; let curr_frag = &curr_node_item.frag; // Figure out whether `pair_frag` overlaps the root of the current // subtree. let overlaps_curr = pair_frag.last >= curr_frag.first && pair_frag.first <= curr_frag.last; // Let's first consider the current node. If we need it but it's not // evictable, we might as well stop now. if overlaps_curr { // This frag is committed to a real range, not a virtual one, and hence is not // evictable. if curr_node_item.id.is_real() { return false; } // Maybe this one is a spill range, in which case, it can't be // evicted. let vlrix_to_evict = curr_node_item.id.to_virtual(); let vlr_to_evict = &vlr_env[vlrix_to_evict]; if vlr_to_evict.spill_cost.is_infinite() { return false; } // Check that this range alone doesn't exceed our total spill // cost. NB: given the check XXX below, this isn't actually // necessary; however it means that we avoid doing two // SparseSet::contains operations before exiting. This saves // around 0.3% instruction count for large inputs. if !vlr_to_evict.spill_cost.is_less_than(spill_cost_budget) { return false; } // Maybe our caller doesn't want us to evict this one. if !allowed_to_evict(vlrix_to_evict) { return false; } // Ok! We can evict the current node. Update the running state // accordingly. Note that we may be presented with the same VLRIx // to evict multiple times, so we must be careful to add the cost // of it only once. if !running_set.contains(vlrix_to_evict) { let mut tmp_cost = *running_cost; tmp_cost.add(&vlr_to_evict.spill_cost); // See above XXX if !tmp_cost.is_less_than(spill_cost_budget) { return false; } *running_cost = tmp_cost; running_set.insert(vlrix_to_evict); } } // Now figure out if we need to visit the subtrees, and if so push the // relevant nodes. Whether we visit the left or right subtree first // is unimportant, at least from a correctness perspective. let must_check_left = pair_frag.first < curr_frag.first; if must_check_left { let left_of_curr = tree.pool[curr as usize].left; if left_of_curr != AVL_NULL { stack.push(left_of_curr); } } let must_check_right = pair_frag.last > curr_frag.last; if must_check_right { let right_of_curr = tree.pool[curr as usize].right; if right_of_curr != AVL_NULL { stack.push(right_of_curr); } } } // If we get here, it means that `pair_frag` can be accommodated if we // evict all the frags it overlaps in `tree`. // // Stay sane .. assert!(running_cost.is_finite()); assert!(running_cost.is_less_than(spill_cost_budget)); true } impl PerRealReg { // Find the set of VirtualRangeIxs that would need to be evicted in order to // allocate `would_like_to_add` to this register. Virtual ranges mentioned // in `do_not_evict` must not be considered as candidates for eviction. // Also returns the total associated spill cost. That spill cost cannot be // infinite. // // This can fail (return None) for four different reasons: // // - `would_like_to_add` interferes with a real-register-live-range // commitment, so the register would be unavailable even if we evicted // *all* virtual ranges assigned to it. // // - `would_like_to_add` interferes with a virtual range which is a spill // range (has infinite cost). We cannot evict those without risking // non-termination of the overall allocation algorithm. // // - `would_like_to_add` interferes with a virtual range listed in // `do_not_evict`. Our caller uses this mechanism when trying to do // coalesing, to avoid the nonsensicality of evicting some part of a // virtual live range group in order to allocate a member of the same // group. // // - The total spill cost of the candidate set exceeds the spill cost of // `would_like_to_add`. This means that spilling them would be a net loss // per our cost model. Note that `would_like_to_add` may have an infinite // spill cost, in which case it will "win" over all other // non-infinite-cost eviction candidates. This is by design (so as to // guarantee that we can always allocate spill/reload bridges). #[inline(never)] fn find_evict_set( &self, would_like_to_add: VirtualRangeIx, allowed_to_evict: &IsAllowedToEvict, vlr_env: &TypedIxVec, ) -> Option<(SparseSetU<[VirtualRangeIx; 4]>, SpillCost)> where IsAllowedToEvict: Fn(VirtualRangeIx) -> bool, { // Firstly, if the commitment tree is for this reg is empty, we can // declare success immediately. if self.committed.tree.root == AVL_NULL { let evict_set = SparseSetU::<[VirtualRangeIx; 4]>::empty(); let evict_cost = SpillCost::zero(); return Some((evict_set, evict_cost)); } // The tree isn't empty, so we will have to do this the hard way: iterate // over all fragments in `would_like_to_add` and check them against the // tree. // Useful constants for the main loop let would_like_to_add_vlr = &vlr_env[would_like_to_add]; let evict_cost_budget = would_like_to_add_vlr.spill_cost; // Note that `evict_cost_budget` can be infinite because // `would_like_to_add` might be a spill/reload range. // The overall evict set and cost so far. These are updated as we iterate // over the fragments that make up `would_like_to_add`. let mut running_set = SparseSetU::<[VirtualRangeIx; 4]>::empty(); let mut running_cost = SpillCost::zero(); // "wlta" = would like to add for wlta_frag in &would_like_to_add_vlr.sorted_frags.frags { let wlta_frag_ok = search_commitment_tree( &mut running_set, &mut running_cost, &self.committed.tree, &wlta_frag, &evict_cost_budget, allowed_to_evict, vlr_env, ); if !wlta_frag_ok { // This fragment won't fit, for one of the four reasons listed // above. So give up now. return None; } // And move on to the next fragment. } // If we got here, it means that `would_like_to_add` can be accommodated \o/ assert!(running_cost.is_finite()); assert!(running_cost.is_less_than(&evict_cost_budget)); Some((running_set, running_cost)) } #[allow(dead_code)] #[inline(never)] fn show1_with_envs(&self, _frag_env: &TypedIxVec) -> String { //"in_use: ".to_string() + &self.committed.show_with_frag_env(&frag_env) "(show1_with_envs:FIXME)".to_string() } #[allow(dead_code)] #[inline(never)] fn show2_with_envs(&self, _frag_env: &TypedIxVec) -> String { //"assigned: ".to_string() + &format!("{:?}", &self.vlrixs_assigned) "(show2_with_envs:FIXME)".to_string() } } //============================================================================= // Printing the allocator's top level state #[inline(never)] fn print_RA_state( who: &str, _universe: &RealRegUniverse, // State components prioQ: &VirtualRangePrioQ, _perRealReg: &Vec, edit_list_move: &Vec, edit_list_other: &Vec, // The context (environment) vlr_env: &TypedIxVec, _frag_env: &TypedIxVec, ) { debug!("<<<<====---- RA state at '{}' ----====", who); //for ix in 0..perRealReg.len() { // if !&perRealReg[ix].committed.pairs.is_empty() { // debug!( // "{:<5} {}", // universe.regs[ix].1, // &perRealReg[ix].show1_with_envs(&frag_env) // ); // debug!(" {}", &perRealReg[ix].show2_with_envs(&frag_env)); // debug!(""); // } //} if !prioQ.is_empty() { for s in prioQ.show_with_envs(vlr_env) { debug!("{}", s); } } for eli in edit_list_move { debug!("ELI MOVE: {:?}", eli); } for eli in edit_list_other { debug!("ELI other: {:?}", eli); } debug!(">>>>"); } //============================================================================= // Reftype/stackmap support // This creates the artefacts for a safepoint/stackmap at some insn `iix`: the set of reftyped // spill slots, the spills to be placed at `iix.r` (yes, you read that right) and the reloads to // be placed at `iix.s`. // // This consults: // // * the commitment maps, to figure out which real registers are live and reftyped at `iix.u`. // // * the spillslot allocator, to figure out which spill slots are live and reftyped at `iix.u`. // // This may fail, meaning the request is in some way nonsensical; failure is propagated upwards. fn get_stackmap_artefacts_at( spill_slot_allocator: &mut SpillSlotAllocator, univ: &RealRegUniverse, reftype_class: RegClass, reg_vecs_and_bounds: &RegVecsAndBounds, per_real_reg: &Vec, rlr_env: &TypedIxVec, vlr_env: &TypedIxVec, iix: InstIx, ) -> Result<(Vec, Vec, Vec), RegAllocError> { // From a code generation perspective, what we need to compute is: // // * Sbefore: real regs that are live at `iix.u`, that are reftypes // // * Safter: Sbefore - real regs written by `iix` // // Then: // // * for r in Sbefore . add "spill r" at `iix.r` *after* all the reloads that are already // there // // * for r in Safter . add "reload r" at `iix.s` *before* all the spills that are already // there // // Once those spills have been "recorded" by the `spill_slot_allocator`, we can then ask it // to tell us all the reftyped spill slots at `iix.u`, and that's our stackmap! This routine // only computes the stackmap and the vectors of spills and reloads. It doesn't deal with // interleaving them into the final code sequence. // // Note that this scheme isn't as runtime-inefficient as it sounds, at least in the // SpiderMonkey use case and where `iix` is a call insn. That's because SM's calling // convention has no callee saved registers. Hence "real regs written by `iix`" will be // "all real regs" and so Safter will be empty. And Sbefore is in any case pretty small. // // (/me thinks ..) hmm, if Safter is empty, then what is the point of dumping Sbefore on the // stack before the GC? For r in Sbefore, either r is the only reference to some object, in // which case there's no point in presenting that ref to the GC since r is dead after call, // or r isn't the only ref to the object, in which case some other ref to it must exist // elsewhere in the stack, and that will keep the object alive. Maybe this needs a rethink. // Maybe the spills before the call should be only for the set Safter? let pt = InstPoint::new_use(iix); // Compute Sbefore. // FIXME change this to SparseSet let mut s_before = Set::::empty(); let rci = univ.allocable_by_class[reftype_class.rc_to_usize()]; if rci.is_none() { return Err(RegAllocError::Other( "stackmap request: no regs in specified reftype class".to_string(), )); } let rci = rci.unwrap(); debug!("computing stackmap info at {:?}", pt); for rreg_no in rci.first..rci.last + 1 { // Get the RangeId, if any, assigned for `rreg_no` at `iix.u`. From that we can figure // out if it is reftyped. let mb_range_id = per_real_reg[rreg_no].committed.lookup_inst_point(pt); if let Some(range_id) = mb_range_id { // `rreg_no` is live at `iix.u`. let is_ref = if range_id.is_real() { debug!( " real reg {:?} is real-range {:?}", rreg_no, rlr_env[range_id.to_real()] ); rlr_env[range_id.to_real()].is_ref } else { debug!( " real reg {:?} is virtual-range {:?}", rreg_no, vlr_env[range_id.to_virtual()] ); vlr_env[range_id.to_virtual()].is_ref }; if is_ref { // Finally .. we know that `rreg_no` is reftyped and live at `iix.u`. let rreg = univ.regs[rreg_no].0; s_before.insert(rreg); } } } debug!("Sbefore = {:?}", s_before); // Compute Safter. let mut s_after = s_before.clone(); let bounds = ®_vecs_and_bounds.bounds[iix]; if bounds.mods_len != 0 { // Only the GC is allowed to modify reftyped regs at this insn! return Err(RegAllocError::Other( "stackmap request: safepoint insn modifies a reftyped reg".to_string(), )); } for i in bounds.defs_start..bounds.defs_start + bounds.defs_len as u32 { let r_defd = reg_vecs_and_bounds.vecs.defs[i as usize]; if r_defd.is_real() && r_defd.get_class() == reftype_class { s_after.delete(r_defd.to_real_reg()); } } debug!("Safter = {:?}", s_before); // Create the spill insns, as defined by Sbefore. This has the side effect of recording the // spill in `spill_slot_allocator`, so we can later ask it to tell us all the reftyped spill // slots. let frag = RangeFrag::new(InstPoint::new_reload(iix), InstPoint::new_spill(iix)); let mut spill_insns = Vec::::new(); let mut where_reg_got_spilled_to = Map::::default(); for from_reg in s_before.iter() { let to_slot = spill_slot_allocator.alloc_reftyped_spillslot_for_frag(frag.clone()); let spill = InstToInsert::Spill { to_slot, from_reg: *from_reg, for_vreg: None, // spill isn't associated with any virtual reg }; spill_insns.push(spill); // We also need to remember where we stashed it, so we can reload it, if it is in Safter. if s_after.contains(*from_reg) { where_reg_got_spilled_to.insert(*from_reg, to_slot); } } // Create the reload insns, as defined by Safter. Except, we might as well use the map we // just made, since its domain is the same as Safter. let mut reload_insns = Vec::::new(); for (to_reg, from_slot) in where_reg_got_spilled_to.iter() { let reload = InstToInsert::Reload { to_reg: Writable::from_reg(*to_reg), from_slot: *from_slot, for_vreg: None, // reload isn't associated with any virtual reg }; reload_insns.push(reload); } // And finally .. round up all the reftyped spill slots. That includes both "normal" spill // slots that happen to hold reftyped values, as well as the "extras" we created here, to // hold values of reftyped regs that are live over this instruction. let reftyped_spillslots = spill_slot_allocator.get_reftyped_spillslots_at_inst_point(pt); debug!("reftyped_spillslots = {:?}", reftyped_spillslots); // And we're done! Ok((spill_insns, reload_insns, reftyped_spillslots)) } //============================================================================= // Allocator top level /* (const) For each virtual live range - its sorted RangeFrags - its total size - its spill cost - (eventually) its assigned real register New VirtualRanges are created as we go, but existing ones are unchanged, apart from being tagged with its assigned real register. (mut) For each real register - the sorted RangeFrags assigned to it (todo: rm the M) - the virtual LR indices assigned to it. This is so we can eject a VirtualRange from the commitment, as needed (mut) the set of VirtualRanges not yet allocated, prioritised by total size (mut) the edit list that is produced */ /* fn show_commit_tab(commit_tab: &Vec::, who: &str, context: &TypedIxVec::) -> String { let mut res = "Commit Table at '".to_string() + &who.to_string() + &"'\n".to_string(); let mut rregNo = 0; for smf in commit_tab.iter() { res += &" ".to_string(); res += &mkRealReg(rregNo).show(); res += &" ".to_string(); res += &smf.show_with_fenv(&context); res += &"\n".to_string(); rregNo += 1; } res } */ // VirtualRanges created by spilling all pertain to a single InstIx. But // within that InstIx, there are three kinds of "bridges": #[derive(Clone, Copy, PartialEq)] pub(crate) enum BridgeKind { RtoU, // A bridge for a USE. This connects the reload to the use. RtoS, // a bridge for a MOD. This connects the reload, the use/def // and the spill, since the value must first be reloade, then // operated on, and finally re-spilled. DtoS, // A bridge for a DEF. This connects the def to the spill. } impl fmt::Debug for BridgeKind { fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { match self { BridgeKind::RtoU => write!(fmt, "R->U"), BridgeKind::RtoS => write!(fmt, "R->S"), BridgeKind::DtoS => write!(fmt, "D->S"), } } } #[derive(Clone, Copy)] struct EditListItem { // This holds enough information to create a spill or reload instruction, // or both, and also specifies where in the instruction stream it/they // should be added. Note that if the edit list as a whole specifies // multiple items for the same location, then it is assumed that the order // in which they execute isn't important. // // Some of the relevant info can be found via the VirtualRangeIx link: // (1) the real reg involved // (2) the place where the insn should go, since the VirtualRange should // only have one RangeFrag, and we can deduce the correct location // from that. // Despite (2) we also carry here the InstIx of the affected instruction // (there should be only one) since computing it via (2) is expensive. // This however gives a redundancy in representation against (2). Beware! slot: SpillSlot, vlrix: VirtualRangeIx, kind: BridgeKind, iix: InstIx, } impl fmt::Debug for EditListItem { fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { write!( fmt, "(ELI: at {:?} for {:?} add {:?}, slot={:?})", self.iix, self.vlrix, self.kind, self.slot ) } } // Allocator top level. This function returns a result struct that contains // the final sequence of instructions, possibly with fills/spills/moves // spliced in and redundant moves elided, and with all virtual registers // replaced with real registers. Allocation can fail if there are insufficient // registers to even generate spill/reload code, or if the function appears to // have any undefined VirtualReg/RealReg uses. #[inline(never)] pub fn alloc_main( func: &mut F, reg_universe: &RealRegUniverse, stackmap_request: Option<&StackmapRequestInfo>, use_checker: bool, opts: &BacktrackingOptions, ) -> Result, RegAllocError> { // -------- Initial arrangements for stackmaps -------- let empty_vec_vregs = vec![]; let empty_vec_iixs = vec![]; let (client_wants_stackmaps, reftype_class, reftyped_vregs, safepoint_insns) = match stackmap_request { Some(&StackmapRequestInfo { reftype_class, ref reftyped_vregs, ref safepoint_insns, }) => (true, reftype_class, reftyped_vregs, safepoint_insns), None => (false, RegClass::INVALID, &empty_vec_vregs, &empty_vec_iixs), }; // -------- Perform initial liveness analysis -------- // Note that the analysis phase can fail; hence we propagate any error. let AnalysisInfo { reg_vecs_and_bounds, real_ranges: rlr_env, virtual_ranges: mut vlr_env, range_frags: frag_env, range_metrics: frag_metrics_env, estimated_frequencies: est_freqs, inst_to_block_map, reg_to_ranges_maps: mb_reg_to_ranges_maps, move_info: mb_move_info, } = run_analysis( func, reg_universe, AlgorithmWithDefaults::Backtracking, client_wants_stackmaps, reftype_class, reftyped_vregs, ) .map_err(|err| RegAllocError::Analysis(err))?; assert!(reg_vecs_and_bounds.is_sanitized()); assert!(frag_env.len() == frag_metrics_env.len()); assert!(mb_reg_to_ranges_maps.is_some()); // ensured by `run_analysis` assert!(mb_move_info.is_some()); // ensured by `run_analysis` let reg_to_ranges_maps = mb_reg_to_ranges_maps.unwrap(); let move_info = mb_move_info.unwrap(); // Also perform analysis that finds all coalescing opportunities. let coalescing_info = do_coalescing_analysis( func, ®_universe, &rlr_env, &mut vlr_env, &frag_env, ®_to_ranges_maps, &move_info, ); let mut hints: TypedIxVec> = coalescing_info.0; let vlrEquivClasses: UnionFindEquivClasses = coalescing_info.1; let is_vv_boundary_move: TypedIxVec = coalescing_info.2; assert!(hints.len() == vlr_env.len()); // -------- Alloc main -------- // Create initial state info!("alloc_main: begin"); info!( "alloc_main: in: {} insns in {} blocks", func.insns().len(), func.blocks().len() ); let num_vlrs_initial = vlr_env.len(); info!( "alloc_main: in: {} VLRs, {} RLRs", num_vlrs_initial, rlr_env.len() ); // This is fully populated by the ::new call. let mut prioQ = VirtualRangePrioQ::new(&vlr_env); // Whereas this is empty. We have to populate it "by hand", by // effectively cloning the allocatable part (prefix) of the universe. let mut per_real_reg = Vec::::new(); for _ in 0..reg_universe.allocable { // Doing this instead of simply .resize avoids needing Clone for // PerRealReg per_real_reg.push(PerRealReg::new()); } for (rlrix_no, rlr) in rlr_env.iter().enumerate() { let rlrix = RealRangeIx::new(rlrix_no as u32); let rregIndex = rlr.rreg.get_index(); // Ignore RealRanges for RealRegs that are not part of the allocatable // set. As far as the allocator is concerned, such RealRegs simply // don't exist. if rregIndex >= reg_universe.allocable { continue; } per_real_reg[rregIndex].add_RealRange(rlrix, &rlr_env, &frag_env); } let mut edit_list_move = Vec::::new(); let mut edit_list_other = Vec::::new(); if log_enabled!(Level::Debug) { debug!(""); print_RA_state( "Initial", ®_universe, &prioQ, &per_real_reg, &edit_list_move, &edit_list_other, &vlr_env, &frag_env, ); } // This is also part of the running state. `vlr_slot_env` tells us the // assigned spill slot for each VirtualRange, if any. // `spill_slot_allocator` decides on the assignments and writes them into // `vlr_slot_env`. let mut vlr_slot_env = TypedIxVec::>::new(); vlr_slot_env.resize(num_vlrs_initial, None); let mut spill_slot_allocator = SpillSlotAllocator::new(); // Main allocation loop. Each time round, pull out the longest // unallocated VirtualRange, and do one of three things: // // * allocate it to a RealReg, possibly by ejecting some existing // allocation, but only one with a lower spill cost than this one, or // // * spill it. This causes the VirtualRange to disappear. It is replaced // by a set of very short VirtualRanges to carry the spill and reload // values. Or, // // * split it. This causes it to disappear but be replaced by two // VirtualRanges which together constitute the original. debug!(""); debug!("-- MAIN ALLOCATION LOOP (DI means 'direct', CO means 'coalesced'):"); info!("alloc_main: main allocation loop: begin"); // ======== BEGIN Main allocation loop ======== let mut num_vlrs_processed = 0; // stats only let mut num_vlrs_spilled = 0; // stats only let mut num_vlrs_evicted = 0; // stats only 'main_allocation_loop: loop { debug!("-- still TODO {}", prioQ.len()); if false { if log_enabled!(Level::Debug) { debug!(""); print_RA_state( "Loop Top", ®_universe, &prioQ, &per_real_reg, &edit_list_move, &edit_list_other, &vlr_env, &frag_env, ); debug!(""); } } let mb_curr_vlrix = prioQ.get_longest_VirtualRange(); if mb_curr_vlrix.is_none() { break 'main_allocation_loop; } num_vlrs_processed += 1; let curr_vlrix = mb_curr_vlrix.unwrap(); let curr_vlr = &vlr_env[curr_vlrix]; debug!("-- considering {:?}: {:?}", curr_vlrix, curr_vlr); assert!(curr_vlr.vreg.to_reg().is_virtual()); assert!(curr_vlr.rreg.is_none()); let curr_vlr_regclass = curr_vlr.vreg.get_class(); let curr_vlr_rc = curr_vlr_regclass.rc_to_usize(); // ====== BEGIN Try to do coalescing ====== // // First, look through the hints for `curr_vlr`, collecting up candidate // real regs, in decreasing order of preference, in `hinted_regs`. Note // that we don't have to consider the weights here, because the coalescing // analysis phase has already sorted the hints for the VLR so as to // present the most favoured (weighty) first, so we merely need to retain // that ordering when copying into `hinted_regs`. assert!(hints.len() == vlr_env.len()); let mut hinted_regs = SmallVec::<[RealReg; 8]>::new(); // === BEGIN collect all hints for `curr_vlr` === // `hints` has one entry per VLR, but only for VLRs which existed // initially (viz, not for any created by spilling/splitting/whatever). // Similarly, `vlrEquivClasses` can only map VLRs that existed initially, // and will panic otherwise. Hence the following check: if curr_vlrix.get() < hints.len() { for hint in &hints[curr_vlrix] { // BEGIN for each hint let mb_cand = match hint { Hint::SameAs(other_vlrix, _weight) => { // It wants the same reg as some other VLR, but we can only honour // that if the other VLR actually *has* a reg at this point. Its // `rreg` field will tell us exactly that. vlr_env[*other_vlrix].rreg } Hint::Exactly(rreg, _weight) => Some(*rreg), }; // So now `mb_cand` might have a preferred real reg. If so, add it to // the list of cands. De-dup as we go, since that is way cheaper than // effectively doing the same via repeated lookups in the // CommitmentMaps. if let Some(rreg) = mb_cand { if !hinted_regs.iter().any(|r| *r == rreg) { hinted_regs.push(rreg); } } // END for each hint } // At this point, we have in `hinted_regs`, the hint candidates that // arise from copies between `curr_vlr` and its immediate neighbouring // VLRs or RLRs, in order of declining preference. And that is a good // start. // // However, it may be the case that there is some other VLR which // is in the same equivalence class as `curr_vlr`, but is not a // direct neighbour, and which has already been assigned a // register. We really ought to take those into account too, as // the least-preferred candidates. Hence we need to iterate over // the equivalence class and "round up the secondary candidates." // // Note that the equivalence class might contain VirtualRanges // that are mutually overlapping. That is handled correctly, // since we always consult the relevant CommitmentMaps (in the // PerRealRegs) to detect interference. To more fully understand // this, see the big block comment at the top of // bt_coalescing_analysis.rs. let n_primary_cands = hinted_regs.len(); // Work the equivalence class set for `curr_vlrix` to pick up any // rreg hints. Equivalence class info exists only for "initial" VLRs. if curr_vlrix.get() < num_vlrs_initial { // `curr_vlrix` is an "initial" VLR. for vlrix in vlrEquivClasses.equiv_class_elems_iter(curr_vlrix) { if vlrix != curr_vlrix { if let Some(rreg) = vlr_env[vlrix].rreg { // Add `rreg` as a cand, if we don't already have it. if !hinted_regs.iter().any(|r| *r == rreg) { hinted_regs.push(rreg); } } } } // Sort the secondary cands, so as to try and impose more consistency // across the group. I don't know if this is worthwhile, but it seems // sensible. hinted_regs[n_primary_cands..].sort_by(|rreg1, rreg2| { rreg1.get_index().partial_cmp(&rreg2.get_index()).unwrap() }); } if log_enabled!(Level::Debug) { if !hinted_regs.is_empty() { let mut candStr = "pri {".to_string(); for (rreg, n) in hinted_regs.iter().zip(0..) { if n == n_primary_cands { candStr = candStr + &" } sec {".to_string(); } candStr = candStr + &" ".to_string() + ®_universe.regs[rreg.get_index()].1; } candStr = candStr + &" }"; debug!("-- CO candidates {}", candStr); } } } // === END collect all hints for `curr_vlr` === // === BEGIN try to use the hints for `curr_vlr` === // Now work through the list of preferences, to see if we can honour any // of them. for rreg in &hinted_regs { let rregNo = rreg.get_index(); // Find the set of ranges which we'd have to evict in order to honour // this hint. In the best case the set will be empty. In the worst // case, we will get None either because (1) it would require evicting a // spill range, which is disallowed so as to guarantee termination of // the algorithm, or (2) because it would require evicting a real reg // live range, which we can't do. // // We take care not to evict any range which is in the same equivalence // class as `curr_vlr` since those are (by definition) connected to // `curr_vlr` via V-V copies, and so evicting any of them would be // counterproductive from the point of view of removing copies. let mb_evict_info: Option<(SparseSetU<[VirtualRangeIx; 4]>, SpillCost)> = per_real_reg[rregNo].find_evict_set( curr_vlrix, &|vlrix_to_evict| { // What this means is: don't evict `vlrix_to_evict` if // it is in the same equivalence class as `curr_vlrix` // (the VLR which we're trying to allocate) AND we // actually know the equivalence classes for both // (hence the `Some`). Spill/reload ("non-original") // VLRS don't have entries in `vlrEquivClasses`. vlrEquivClasses.in_same_equivalence_class(vlrix_to_evict, curr_vlrix) != Some(true) }, &vlr_env, ); if let Some((vlrixs_to_evict, total_evict_cost)) = mb_evict_info { // Stay sane #1 assert!(curr_vlr.rreg.is_none()); // Stay sane #2 assert!(vlrixs_to_evict.is_empty() == total_evict_cost.is_zero()); // Can't evict if any in the set are spill ranges assert!(total_evict_cost.is_finite()); // Ensure forward progress assert!(total_evict_cost.is_less_than(&curr_vlr.spill_cost)); // Evict all evictees in the set for vlrix_to_evict in vlrixs_to_evict.iter() { // Ensure we're not evicting anything in `curr_vlrix`'s eclass. // This should be guaranteed us by find_evict_set. assert!( vlrEquivClasses.in_same_equivalence_class(*vlrix_to_evict, curr_vlrix) != Some(true) ); // Evict .. debug!( "-- CO evict {:?}: {:?}", *vlrix_to_evict, &vlr_env[*vlrix_to_evict] ); per_real_reg[rregNo].del_VirtualRange(*vlrix_to_evict, &vlr_env); prioQ.add_VirtualRange(&vlr_env, *vlrix_to_evict); // Directly modify bits of vlr_env. This means we have to abandon // the immutable borrow for curr_vlr, but that's OK -- we won't need // it again (in this loop iteration). debug_assert!(vlr_env[*vlrix_to_evict].rreg.is_some()); vlr_env[*vlrix_to_evict].rreg = None; num_vlrs_evicted += 1; } // .. and reassign. debug!("-- CO alloc to {}", reg_universe.regs[rregNo].1); per_real_reg[rregNo].add_VirtualRange(curr_vlrix, &vlr_env); vlr_env[curr_vlrix].rreg = Some(*rreg); // We're done! continue 'main_allocation_loop; } } /* for rreg in hinted_regs */ // === END try to use the hints for `curr_vlr` === // ====== END Try to do coalescing ====== // We get here if we failed to find a viable assignment by the process of // looking at the coalescing hints. // // So: do almost exactly as we did in the "try for coalescing" stage // above. Except, instead of trying each coalescing candidate // individually, iterate over all the registers in the class, to find the // one (if any) that has the lowest total evict cost. If we find one that // has zero cost -- that is, we can make the assignment without evicting // anything -- then stop the search at that point, since searching further // is pointless. let (first_in_rc, last_in_rc) = match ®_universe.allocable_by_class[curr_vlr_rc] { &None => { return Err(RegAllocError::OutOfRegisters(curr_vlr_regclass)); } &Some(ref info) => (info.first, info.last), }; let mut best_so_far: Option<( /*rreg index*/ usize, SparseSetU<[VirtualRangeIx; 4]>, SpillCost, )> = None; 'search_through_cand_rregs_loop: for rregNo in first_in_rc..last_in_rc + 1 { //debug!("-- Cand {} ...", // reg_universe.regs[rregNo].1); let mb_evict_info: Option<(SparseSetU<[VirtualRangeIx; 4]>, SpillCost)> = per_real_reg[rregNo].find_evict_set( curr_vlrix, // We pass a closure that ignores its arg and returns `true`. // Meaning, "we are not specifying any particular // can't-be-evicted VLRs in this call." &|_vlrix_to_evict| true, &vlr_env, ); // //match mb_evict_info { // None => debug!("-- Cand {}: Unavail", // reg_universe.regs[rregNo].1), // Some((ref evict_set, ref evict_cost)) => // debug!("-- Cand {}: Avail, evict cost {:?}, set {:?}", // reg_universe.regs[rregNo].1, evict_cost, evict_set) //} // if let Some((cand_vlrixs_to_evict, cand_total_evict_cost)) = mb_evict_info { // Stay sane .. assert!(cand_vlrixs_to_evict.is_empty() == cand_total_evict_cost.is_zero()); // We can't evict if any in the set are spill ranges, and // find_evict_set should not offer us that possibility. assert!(cand_total_evict_cost.is_finite()); // Ensure forward progress assert!(cand_total_evict_cost.is_less_than(&curr_vlr.spill_cost)); // Update the "best so far". First, if the evict set is empty, then // the candidate is by definition better than all others, and we will // quit searching just below. let mut cand_is_better = cand_vlrixs_to_evict.is_empty(); if !cand_is_better { if let Some((_, _, best_spill_cost)) = best_so_far { // If we've already got a candidate, this one is better if it has // a lower total spill cost. if cand_total_evict_cost.is_less_than(&best_spill_cost) { cand_is_better = true; } } else { // We don't have any candidate so far, so what we just got is // better (than nothing). cand_is_better = true; } } // Remember the candidate if required. let cand_vlrixs_to_evict_is_empty = cand_vlrixs_to_evict.is_empty(); if cand_is_better { best_so_far = Some((rregNo, cand_vlrixs_to_evict, cand_total_evict_cost)); } // If we've found a no-evictions-necessary candidate, quit searching // immediately. We won't find anything better. if cand_vlrixs_to_evict_is_empty { break 'search_through_cand_rregs_loop; } } } // for rregNo in first_in_rc..last_in_rc + 1 { // Examine the results of the search. Did we find any usable candidate? if let Some((rregNo, vlrixs_to_evict, total_spill_cost)) = best_so_far { // We are still Totally Paranoid (tm) // Stay sane #1 debug_assert!(curr_vlr.rreg.is_none()); // Can't spill a spill range assert!(total_spill_cost.is_finite()); // Ensure forward progress assert!(total_spill_cost.is_less_than(&curr_vlr.spill_cost)); // Now the same evict-reassign section as with the coalescing logic above. // Evict all evictees in the set for vlrix_to_evict in vlrixs_to_evict.iter() { // Evict .. debug!( "-- DI evict {:?}: {:?}", *vlrix_to_evict, &vlr_env[*vlrix_to_evict] ); per_real_reg[rregNo].del_VirtualRange(*vlrix_to_evict, &vlr_env); prioQ.add_VirtualRange(&vlr_env, *vlrix_to_evict); debug_assert!(vlr_env[*vlrix_to_evict].rreg.is_some()); vlr_env[*vlrix_to_evict].rreg = None; num_vlrs_evicted += 1; } // .. and reassign. debug!("-- DI alloc to {}", reg_universe.regs[rregNo].1); per_real_reg[rregNo].add_VirtualRange(curr_vlrix, &vlr_env); let rreg = reg_universe.regs[rregNo].0; vlr_env[curr_vlrix].rreg = Some(rreg); // We're done! continue 'main_allocation_loop; } // Still no luck. We can't find a register to put it in, so we'll // have to spill it, since splitting it isn't yet implemented. debug!("-- spill"); // If the live range already pertains to a spill or restore, then // it's Game Over. The constraints (availability of RealRegs vs // requirement for them) are impossible to fulfill, and so we cannot // generate any valid allocation for this function. if curr_vlr.spill_cost.is_infinite() { return Err(RegAllocError::OutOfRegisters(curr_vlr_regclass)); } // Generate a new spill slot number, S /* Spilling vreg V with virtual live range VirtualRange to slot S: for each frag F in VirtualRange { for each insn I in F.first.iix .. F.last.iix { if I does not mention V continue if I mentions V in a Read role { // invar: I cannot mention V in a Mod role add new VirtualRange I.reload -> I.use, vreg V, spillcost Inf add eli @ I.reload V SpillSlot } if I mentions V in a Mod role { // invar: I cannot mention V in a Read or Write Role add new VirtualRange I.reload -> I.spill, Vreg V, spillcost Inf add eli @ I.reload V SpillSlot add eli @ I.spill SpillSlot V } if I mentions V in a Write role { // invar: I cannot mention V in a Mod role add new VirtualRange I.def -> I.spill, vreg V, spillcost Inf add eli @ I.spill V SpillSlot } } } */ // We will be spilling vreg `curr_vlr.reg` with VirtualRange `curr_vlr` to // a spill slot that the spill slot allocator will choose for us, just a // bit further down this function. // This holds enough info to create reload or spill (or both) // instructions around an instruction that references a VirtualReg // that has been spilled. struct SpillAndOrReloadInfo { bix: BlockIx, // that `iix` is in iix: InstIx, // this is the Inst we are spilling/reloading for kind: BridgeKind, // says whether to create a spill or reload or both } // Most spills won't require anywhere near 32 entries, so this avoids // almost all heap allocation. let mut sri_vec = SmallVec::<[SpillAndOrReloadInfo; 32]>::new(); let curr_vlr_vreg = curr_vlr.vreg; let curr_vlr_reg = curr_vlr_vreg.to_reg(); let curr_vlr_is_ref = curr_vlr.is_ref; for frag in &curr_vlr.sorted_frags.frags { for iix in frag.first.iix().dotdot(frag.last.iix().plus(1)) { let (iix_uses_curr_vlr_reg, iix_defs_curr_vlr_reg, iix_mods_curr_vlr_reg) = does_inst_use_def_or_mod_reg(®_vecs_and_bounds, iix, curr_vlr_reg); // If this insn doesn't mention the vreg we're spilling for, // move on. if !iix_defs_curr_vlr_reg && !iix_mods_curr_vlr_reg && !iix_uses_curr_vlr_reg { continue; } // USES: Do we need to create a reload-to-use bridge // (VirtualRange) ? if iix_uses_curr_vlr_reg && frag.contains(&InstPoint::new_use(iix)) { debug_assert!(!iix_mods_curr_vlr_reg); // Stash enough info that we can create a new VirtualRange // and a new edit list entry for the reload. let bix = inst_to_block_map.map(iix); let sri = SpillAndOrReloadInfo { bix, iix, kind: BridgeKind::RtoU, }; sri_vec.push(sri); } // MODS: Do we need to create a reload-to-spill bridge? This // can only happen for instructions which modify a register. // Note this has to be a single VirtualRange, since if it were // two (one for the reload, one for the spill) they could // later end up being assigned to different RealRegs, which is // obviously nonsensical. if iix_mods_curr_vlr_reg && frag.contains(&InstPoint::new_use(iix)) && frag.contains(&InstPoint::new_def(iix)) { debug_assert!(!iix_uses_curr_vlr_reg); debug_assert!(!iix_defs_curr_vlr_reg); let bix = inst_to_block_map.map(iix); let sri = SpillAndOrReloadInfo { bix, iix, kind: BridgeKind::RtoS, }; sri_vec.push(sri); } // DEFS: Do we need to create a def-to-spill bridge? if iix_defs_curr_vlr_reg && frag.contains(&InstPoint::new_def(iix)) { debug_assert!(!iix_mods_curr_vlr_reg); let bix = inst_to_block_map.map(iix); let sri = SpillAndOrReloadInfo { bix, iix, kind: BridgeKind::DtoS, }; sri_vec.push(sri); } } } // Now that we no longer need to access `frag_env` or `vlr_env` for // the remainder of this iteration of the main allocation loop, we can // actually generate the required spill/reload artefacts. // First off, poke the spill slot allocator to get an intelligent choice // of slot. Note that this will fail for "non-initial" VirtualRanges; but // the only non-initial ones will have been created by spilling anyway, // any we definitely shouldn't be trying to spill them again. Hence we // can assert. assert!(vlr_slot_env.len() == num_vlrs_initial); assert!(curr_vlrix < VirtualRangeIx::new(num_vlrs_initial)); if vlr_slot_env[curr_vlrix].is_none() { // It hasn't been decided yet. Cause it to be so by asking for an // allocation for the entire eclass that `curr_vlrix` belongs to. spill_slot_allocator.alloc_spill_slots( &mut vlr_slot_env, func, &vlr_env, &vlrEquivClasses, curr_vlrix, ); assert!(vlr_slot_env[curr_vlrix].is_some()); } let spill_slot_to_use = vlr_slot_env[curr_vlrix].unwrap(); // If we're spilling a reffy VLR, we'll need to tell the spillslot allocator that. The // VLR will already have been allocated to some spill slot, and relevant RangeFrags in // the slot should have already been reserved for it, by the above call to // `alloc_spill_slots` (although possibly relating to a prior VLR in the same // equivalence class, and not this one). However, those RangeFrags will have all been // marked non-reffy, because we don't know, in general, at spillslot-allocation-time, // whether a VLR will actually be spilled, and we don't want the resulting stack maps to // mention stack entries which are dead at the point of the safepoint insn. Hence the // need to update those RangeFrags pertaining to just this VLR -- now that we *know* // it's going to be spilled. if curr_vlr.is_ref { spill_slot_allocator .notify_spillage_of_reftyped_vlr(spill_slot_to_use, &curr_vlr.sorted_frags); } for sri in sri_vec { let (new_vlr_first_pt, new_vlr_last_pt) = match sri.kind { BridgeKind::RtoU => (Point::Reload, Point::Use), BridgeKind::RtoS => (Point::Reload, Point::Spill), BridgeKind::DtoS => (Point::Def, Point::Spill), }; let new_vlr_frag = RangeFrag { first: InstPoint::new(sri.iix, new_vlr_first_pt), last: InstPoint::new(sri.iix, new_vlr_last_pt), }; debug!("-- new RangeFrag {:?}", &new_vlr_frag); let new_vlr_sfrags = SortedRangeFrags::unit(new_vlr_frag); let new_vlr = VirtualRange { vreg: curr_vlr_vreg, rreg: None, sorted_frags: new_vlr_sfrags, is_ref: curr_vlr_is_ref, // "inherit" refness size: 1, // Effectively infinite. We'll never look at this again anyway. total_cost: 0xFFFF_FFFFu32, spill_cost: SpillCost::infinite(), }; let new_vlrix = VirtualRangeIx::new(vlr_env.len() as u32); debug!( "-- new VirtRange {:?} := {:?}", new_vlrix, &new_vlr ); vlr_env.push(new_vlr); prioQ.add_VirtualRange(&vlr_env, new_vlrix); // BEGIN (optimisation only) see if we can create any coalescing hints // for this new VLR. let mut new_vlr_hint = SmallVec::<[Hint; 8]>::new(); if is_vv_boundary_move[sri.iix] { // Collect the src and dst regs for the move. It must be a // move because `is_vv_boundary_move` claims that to be true. let im = func.is_move(&func.get_insn(sri.iix)); assert!(im.is_some()); let (wdst_reg, src_reg): (Writable, Reg) = im.unwrap(); let dst_reg: Reg = wdst_reg.to_reg(); assert!(src_reg.is_virtual() && dst_reg.is_virtual()); let dst_vreg: VirtualReg = dst_reg.to_virtual_reg(); let src_vreg: VirtualReg = src_reg.to_virtual_reg(); let bridge_eef = est_freqs[sri.bix]; match sri.kind { BridgeKind::RtoU => { // Reload-to-Use bridge. Hint that we want to be // allocated to the same reg as the destination of the // move. That means we have to find the VLR that owns // the destination vreg. for vlrix in ®_to_ranges_maps.vreg_to_vlrs_map[dst_vreg.get_index()] { if vlr_env[*vlrix].vreg == dst_vreg { new_vlr_hint.push(Hint::SameAs(*vlrix, bridge_eef)); break; } } } BridgeKind::DtoS => { // Def-to-Spill bridge. Hint that we want to be // allocated to the same reg as the source of the // move. for vlrix in ®_to_ranges_maps.vreg_to_vlrs_map[src_vreg.get_index()] { if vlr_env[*vlrix].vreg == src_vreg { new_vlr_hint.push(Hint::SameAs(*vlrix, bridge_eef)); break; } } } BridgeKind::RtoS => { // A Reload-to-Spill bridge. This can't happen. It // implies that the instruction modifies (both reads // and writes) one of its operands, but the client's // `is_move` function claims this instruction is a // plain "move" (reads source, writes dest). These // claims are mutually contradictory. panic!("RtoS bridge for v-v boundary move"); } } } hints.push(new_vlr_hint); // END see if we can create any coalescing hints for this new VLR. // Finally, create a new EditListItem. This holds enough // information that a suitable spill or reload instruction can // later be created. let new_eli = EditListItem { slot: spill_slot_to_use, vlrix: new_vlrix, kind: sri.kind, iix: sri.iix, }; if is_vv_boundary_move[sri.iix] { debug!("-- new ELI MOVE {:?}", &new_eli); edit_list_move.push(new_eli); } else { debug!("-- new ELI other {:?}", &new_eli); edit_list_other.push(new_eli); } } num_vlrs_spilled += 1; // And implicitly "continue 'main_allocation_loop" } // ======== END Main allocation loop ======== info!("alloc_main: main allocation loop: end"); if log_enabled!(Level::Debug) { debug!(""); print_RA_state( "Final", ®_universe, &prioQ, &per_real_reg, &edit_list_move, &edit_list_other, &vlr_env, &frag_env, ); } // ======== BEGIN Do spill slot coalescing ======== debug!(""); info!("alloc_main: create spills_n_reloads for MOVE insns"); // Sort `edit_list_move` by the insn with which each item is associated. edit_list_move.sort_unstable_by(|eli1, eli2| eli1.iix.cmp(&eli2.iix)); // Now go through `edit_list_move` and find pairs which constitute a // spillslot-to-the-same-spillslot move. What we have in `edit_list_move` is // heavily constrained, as follows: // // * each entry should reference an InstIx which the coalescing analysis // identified as a virtual-to-virtual copy which exists at the boundary // between two VirtualRanges. The "boundary" aspect is important; we // can't coalesce out moves in which the source vreg is not the "last use" // or for which the destination vreg is not the "first def". (The same is // true for coalescing of unspilled moves). // // * the each entry must reference a VirtualRange which has only a single // RangeFrag, and that frag must exist entirely "within" the referenced // instruction. Specifically, it may only be a R->U frag (bridge) or a // D->S frag. // // * For a referenced instruction, there may be at most two entries in this // list: one that references the R->U frag and one that references the // D->S frag. Furthermore, the two entries must carry values of the same // RegClass; if that isn't true, the client's `is_move` function is // defective. // // For any such pair identified, if both frags mention the same spill slot, // we skip generating both the reload and the spill instruction. We also // note that the instruction itself is to be deleted (converted to a // zero-len nop). In a sense we have "cancelled out" a reload/spill pair. // Entries that can't be cancelled out are handled the same way as for // entries in `edit_list_other`, by simply copying them there. // // Since `edit_list_move` is sorted by insn ix, we can scan linearly over // it, looking for adjacent pairs. We'll have to accept them in either // order though (first R->U then D->S, or the other way round). There's no // fixed ordering since there is no particular ordering in the way // VirtualRanges are allocated. // As a result of spill slot coalescing, we'll need to delete the move // instructions leading to a mergable spill slot move. The insn stream // editor can't delete instructions, so instead it'll replace them with zero // len nops obtained from the client. `iixs_to_nop_out` records the insns // that this has to happen to. It is in increasing order of InstIx (because // `edit_list_sorted` is), and the indices in it refer to the original // virtual-registerised code. let mut iixs_to_nop_out = Vec::::new(); let mut ghost_moves = vec![]; let n_edit_list_move = edit_list_move.len(); let mut n_edit_list_move_processed = 0; // for assertions only let mut i_min = 0; loop { if i_min >= n_edit_list_move { break; } // Find the bounds of the current group. debug!("editlist entry (MOVE): min: {:?}", &edit_list_move[i_min]); let i_min_iix = edit_list_move[i_min].iix; let mut i_max = i_min; while i_max + 1 < n_edit_list_move && edit_list_move[i_max + 1].iix == i_min_iix { i_max += 1; debug!("editlist entry (MOVE): max: {:?}", &edit_list_move[i_max]); } // Current group is from i_min to i_max inclusive. At most 2 entries are // allowed per group. assert!(i_max - i_min <= 1); // Check for a mergeable pair. if i_max - i_min == 1 { assert!(is_vv_boundary_move[i_min_iix]); let vlrix1 = edit_list_move[i_min].vlrix; let vlrix2 = edit_list_move[i_max].vlrix; assert!(vlrix1 != vlrix2); let vlr1 = &vlr_env[vlrix1]; let vlr2 = &vlr_env[vlrix2]; let frags1 = &vlr1.sorted_frags; let frags2 = &vlr2.sorted_frags; assert!(frags1.frags.len() == 1); assert!(frags2.frags.len() == 1); let frag1 = &frags1.frags[0]; let frag2 = &frags2.frags[0]; assert!(frag1.first.iix() == i_min_iix); assert!(frag1.last.iix() == i_min_iix); assert!(frag2.first.iix() == i_min_iix); assert!(frag2.last.iix() == i_min_iix); // frag1 must be R->U and frag2 must be D->S, or vice versa match ( frag1.first.pt(), frag1.last.pt(), frag2.first.pt(), frag2.last.pt(), ) { (Point::Reload, Point::Use, Point::Def, Point::Spill) | (Point::Def, Point::Spill, Point::Reload, Point::Use) => { let slot1 = edit_list_move[i_min].slot; let slot2 = edit_list_move[i_max].slot; if slot1 == slot2 { // Yay. We've found a coalescable pair. We can just ignore the // two entries and move on. Except we have to mark the insn // itself for deletion. debug!("editlist entry (MOVE): delete {:?}", i_min_iix); iixs_to_nop_out.push(i_min_iix); i_min = i_max + 1; n_edit_list_move_processed += 2; if use_checker { let (from_reg, to_reg) = if frag1.last.pt() == Point::Use { (vlr1.vreg.to_reg(), vlr2.vreg.to_reg()) } else { (vlr2.vreg.to_reg(), vlr1.vreg.to_reg()) }; ghost_moves.push(InstToInsertAndExtPoint::new( InstToInsert::ChangeSpillSlotOwnership { inst_ix: i_min_iix, slot: slot1, from_reg, to_reg, }, InstExtPoint::new(i_min_iix, ExtPoint::Reload), )); } continue; } } (_, _, _, _) => { panic!("spill slot coalescing, edit_list_move: unexpected frags"); } } } // If we get here for whatever reason, this group is uninteresting. Copy // in to `edit_list_other` so that it gets processed in the normal way. for i in i_min..=i_max { edit_list_other.push(edit_list_move[i]); n_edit_list_move_processed += 1; } i_min = i_max + 1; } assert!(n_edit_list_move_processed == n_edit_list_move); // ======== END Do spill slot coalescing ======== // ======== BEGIN Create all other spills and reloads ======== debug!(""); info!("alloc_main: create spills_n_reloads for other insns"); // Reload and spill instructions are missing. To generate them, go through // the "edit list", which contains info on both how to generate the // instructions, and where to insert them. let mut spills_n_reloads = Vec::::new(); let mut num_spills = 0; // stats only let mut num_reloads = 0; // stats only for eli in &edit_list_other { debug!("editlist entry (other): {:?}", eli); let vlr = &vlr_env[eli.vlrix]; let vlr_sfrags = &vlr.sorted_frags; assert!(vlr_sfrags.frags.len() == 1); let vlr_frag = &vlr_sfrags.frags[0]; let rreg = vlr.rreg.expect("Gen of spill/reload: reg not assigned?!"); let vreg = vlr.vreg; match eli.kind { BridgeKind::RtoU => { debug_assert!(vlr_frag.first.pt().is_reload()); debug_assert!(vlr_frag.last.pt().is_use()); debug_assert!(vlr_frag.first.iix() == vlr_frag.last.iix()); let insnR = InstToInsert::Reload { to_reg: Writable::from_reg(rreg), from_slot: eli.slot, for_vreg: Some(vreg), }; let whereToR = InstExtPoint::from_inst_point(vlr_frag.first); spills_n_reloads.push(InstToInsertAndExtPoint::new(insnR, whereToR)); num_reloads += 1; } BridgeKind::RtoS => { debug_assert!(vlr_frag.first.pt().is_reload()); debug_assert!(vlr_frag.last.pt().is_spill()); debug_assert!(vlr_frag.first.iix() == vlr_frag.last.iix()); let insnR = InstToInsert::Reload { to_reg: Writable::from_reg(rreg), from_slot: eli.slot, for_vreg: Some(vreg), }; let whereToR = InstExtPoint::from_inst_point(vlr_frag.first); let insnS = InstToInsert::Spill { to_slot: eli.slot, from_reg: rreg, for_vreg: Some(vreg), }; let whereToS = InstExtPoint::from_inst_point(vlr_frag.last); spills_n_reloads.push(InstToInsertAndExtPoint::new(insnR, whereToR)); spills_n_reloads.push(InstToInsertAndExtPoint::new(insnS, whereToS)); num_reloads += 1; num_spills += 1; } BridgeKind::DtoS => { debug_assert!(vlr_frag.first.pt().is_def()); debug_assert!(vlr_frag.last.pt().is_spill()); debug_assert!(vlr_frag.first.iix() == vlr_frag.last.iix()); let insnS = InstToInsert::Spill { to_slot: eli.slot, from_reg: rreg, for_vreg: Some(vreg), }; let whereToS = InstExtPoint::from_inst_point(vlr_frag.last); spills_n_reloads.push(InstToInsertAndExtPoint::new(insnS, whereToS)); num_spills += 1; } } } // Append all ghost moves. if use_checker { spills_n_reloads.extend(ghost_moves.into_iter()); spills_n_reloads.sort_by_key(|inst_and_point| inst_and_point.iep.clone()); } // ======== END Create all other spills and reloads ======== // ======== BEGIN Create final instruction stream ======== // Gather up a vector of (RangeFrag, VirtualReg, RealReg) resulting from // the previous phase. This fundamentally is the result of the allocation // and tells us how the instruction stream must be edited. Note it does // not take account of spill or reload instructions. Dealing with those // is relatively simple and happens later. info!("alloc_main: create frag_map"); let mut frag_map = Vec::<(RangeFrag, VirtualReg, RealReg)>::new(); // For each real register under our control .. for i in 0..reg_universe.allocable { let rreg = reg_universe.regs[i].0; // .. look at all the VirtualRanges assigned to it. And for each such // VirtualRange .. for vlrix_assigned in per_real_reg[i].vlrixs_assigned.iter() { let VirtualRange { vreg, sorted_frags, .. } = &vlr_env[*vlrix_assigned]; // All the RangeFrags in `vlr_assigned` require `vlr_assigned.reg` // to be mapped to the real reg `i` // .. collect up all its constituent RangeFrags. for frag in &sorted_frags.frags { frag_map.push((frag.clone(), *vreg, rreg)); } } } // There is one of these for every entry in `safepoint_insns`. let mut stackmaps = Vec::>::new(); if !safepoint_insns.is_empty() { info!("alloc_main: create safepoints and stackmaps"); for safepoint_iix in safepoint_insns { // Create the stackmap artefacts for `safepoint_iix`. Save the stackmap (the // reftyped spillslots); we'll have to return it to the client as part of the // overall allocation result. The extra spill and reload instructions can simply // be added to `spills_n_reloads` though, and `edit_inst_stream` will correctly // merge them in. // // Note: this modifies `spill_slot_allocator`, since at this point we have to // allocate spill slots to hold reftyped real regs across the safepoint insn. // // Because the SB (spill-before) and RA (reload-after) `ExtPoint`s are "closer" to // the "core" of an instruction than the R (reload) and S (spill) `ExtPoint`s, any // "normal" reload or spill ranges that are reftyped will be handled correctly. // From `get_stackmap_artefacts_at`s point of view, such spill/reload ranges are // just like any other real-reg live range that it will have to spill around the // safepoint. The fact that they are for spills or reloads doesn't make any // difference. // // Note also: this call can fail; failure is propagated upwards. // // FIXME Passing these 3 small vectors around is inefficient. Use SmallVec or // (better) owned-by-this-function vectors instead. let (spills_before, reloads_after, reftyped_spillslots) = get_stackmap_artefacts_at( &mut spill_slot_allocator, ®_universe, reftype_class, ®_vecs_and_bounds, &per_real_reg, &rlr_env, &vlr_env, *safepoint_iix, )?; stackmaps.push(reftyped_spillslots); for spill_before in spills_before { spills_n_reloads.push(InstToInsertAndExtPoint::new( spill_before, InstExtPoint::new(*safepoint_iix, ExtPoint::SpillBefore), )); } for reload_after in reloads_after { spills_n_reloads.push(InstToInsertAndExtPoint::new( reload_after, InstExtPoint::new(*safepoint_iix, ExtPoint::ReloadAfter), )); } } } info!("alloc_main: edit_inst_stream"); let final_insns_and_targetmap_and_new_safepoints__or_err = edit_inst_stream( func, &safepoint_insns, spills_n_reloads, &iixs_to_nop_out, frag_map, ®_universe, use_checker, &stackmaps[..], &reftyped_vregs[..], ); // ======== END Create final instruction stream ======== // ======== BEGIN Create the RegAllocResult ======== match final_insns_and_targetmap_and_new_safepoints__or_err { Ok((ref final_insns, ..)) => { info!( "alloc_main: out: VLRs: {} initially, {} processed", num_vlrs_initial, num_vlrs_processed ); info!( "alloc_main: out: VLRs: {} evicted, {} spilled", num_vlrs_evicted, num_vlrs_spilled ); info!( "alloc_main: out: insns: {} total, {} spills, {} reloads, {} nopzs", final_insns.len(), num_spills, num_reloads, iixs_to_nop_out.len() ); info!( "alloc_main: out: spill slots: {} used", spill_slot_allocator.num_slots_in_use() ); } Err(_) => { info!("alloc_main: allocation failed!"); } } let (final_insns, target_map, new_to_old_insn_map, new_safepoint_insns) = match final_insns_and_targetmap_and_new_safepoints__or_err { Err(e) => { info!("alloc_main: fail"); return Err(e); } Ok(quad) => { info!("alloc_main: creating RegAllocResult"); quad } }; // Compute clobbered registers with one final, quick pass. // // FIXME: derive this information directly from the allocation data // structures used above. // // NB at this point, the `san_reg_uses` that was computed in the analysis // phase is no longer valid, because we've added and removed instructions to // the function relative to the one that `san_reg_uses` was computed from, // so we have to re-visit all insns with `add_raw_reg_vecs_for_insn`. // That's inefficient, but we don't care .. this should only be a temporary // fix. let mut clobbered_registers: Set = Set::empty(); // We'll dump all the reg uses in here. We don't care about the bounds, so just // pass a dummy one in the loop. let mut reg_vecs = RegVecs::new(/*sanitized=*/ false); let mut dummy_bounds = RegVecBounds::new(); for insn in &final_insns { if func.is_included_in_clobbers(insn) { add_raw_reg_vecs_for_insn::(insn, &mut reg_vecs, &mut dummy_bounds); } } for reg in reg_vecs.defs.iter().chain(reg_vecs.mods.iter()) { assert!(reg.is_real()); clobbered_registers.insert(reg.to_real_reg()); } // And now remove from the set, all those not available to the allocator. // But not removing the reserved regs, since we might have modified those. clobbered_registers.filter_map(|®| { if reg.get_index() >= reg_universe.allocable { None } else { Some(reg) } }); assert!(est_freqs.len() as usize == func.blocks().len()); let mut block_annotations = None; if opts.request_block_annotations { let mut anns = TypedIxVec::>::new(); for (estFreq, i) in est_freqs.iter().zip(0..) { let bix = BlockIx::new(i); let ef_str = format!("RA: bix {:?}, estFreq {}", bix, estFreq); anns.push(vec![ef_str]); } block_annotations = Some(anns); } assert!(stackmaps.len() == safepoint_insns.len()); assert!(new_safepoint_insns.len() == safepoint_insns.len()); let ra_res = RegAllocResult { insns: final_insns, target_map, orig_insn_map: new_to_old_insn_map, clobbered_registers, num_spill_slots: spill_slot_allocator.num_slots_in_use() as u32, block_annotations, stackmaps, new_safepoint_insns, }; info!("alloc_main: end"); // ======== END Create the RegAllocResult ======== Ok(ra_res) }