38 files changed, 2077 insertions, 532 deletions
diff --git a/compiler/rustc_data_structures/src/aligned.rs b/compiler/rustc_data_structures/src/aligned.rs
new file mode 100644
index 000000000..0e5ecfd9b
--- /dev/null
+++ b/compiler/rustc_data_structures/src/aligned.rs
@@ -0,0 +1,33 @@
+use std::ptr::Alignment;
+
+/// Returns the ABI-required minimum alignment of a type in bytes.
+///
+/// This is equivalent to [`mem::align_of`], but also works for some unsized
+/// types (e.g. slices or rustc's `List`s).
+///
+/// [`mem::align_of`]: std::mem::align_of
+pub const fn align_of<T: ?Sized + Aligned>() -> Alignment {
+    T::ALIGN
+}
+
+/// A type with a statically known alignment.
+///
+/// # Safety
+///
+/// `Self::ALIGN` must be equal to the alignment of `Self`. For sized types it
+/// is [`mem::align_of<Self>()`], for unsized types it depends on the type, for
+/// example `[T]` has alignment of `T`.
+///
+/// [`mem::align_of<Self>()`]: std::mem::align_of
+pub unsafe trait Aligned {
+    /// Alignment of `Self`.
+    const ALIGN: Alignment;
+}
+
+unsafe impl<T> Aligned for T {
+    const ALIGN: Alignment = Alignment::of::<Self>();
+}
+
+unsafe impl<T> Aligned for [T] {
+    const ALIGN: Alignment = Alignment::of::<T>();
+}
diff --git a/compiler/rustc_data_structures/src/fingerprint.rs b/compiler/rustc_data_structures/src/fingerprint.rs
index b6e866f15..9995c0834 100644
--- a/compiler/rustc_data_structures/src/fingerprint.rs
+++ b/compiler/rustc_data_structures/src/fingerprint.rs
@@ -1,4 +1,4 @@
-use crate::stable_hasher;
+use crate::stable_hasher::{Hash64, StableHasher, StableHasherResult};
 use rustc_serialize::{Decodable, Decoder, Encodable, Encoder};
 use std::hash::{Hash, Hasher};
 
@@ -9,32 +9,49 @@ mod tests;
 #[repr(C)]
 pub struct Fingerprint(u64, u64);
 
-impl Fingerprint {
-    pub const ZERO: Fingerprint = Fingerprint(0, 0);
+pub trait FingerprintComponent {
+    fn as_u64(&self) -> u64;
+}
 
+impl FingerprintComponent for Hash64 {
     #[inline]
-    pub fn new(_0: u64, _1: u64) -> Fingerprint {
-        Fingerprint(_0, _1)
+    fn as_u64(&self) -> u64 {
+        Hash64::as_u64(*self)
     }
+}
 
+impl FingerprintComponent for u64 {
     #[inline]
-    pub fn from_smaller_hash(hash: u64) -> Fingerprint {
-        Fingerprint(hash, hash)
+    fn as_u64(&self) -> u64 {
+        *self
+    }
+}
+
+impl Fingerprint {
+    pub const ZERO: Fingerprint = Fingerprint(0, 0);
+
+    #[inline]
+    pub fn new<A, B>(_0: A, _1: B) -> Fingerprint
+    where
+        A: FingerprintComponent,
+        B: FingerprintComponent,
+    {
+        Fingerprint(_0.as_u64(), _1.as_u64())
     }
 
     #[inline]
-    pub fn to_smaller_hash(&self) -> u64 {
+    pub fn to_smaller_hash(&self) -> Hash64 {
         // Even though both halves of the fingerprint are expected to be good
         // quality hash values, let's still combine the two values because the
         // Fingerprints in DefPathHash have the StableCrateId portion which is
         // the same for all DefPathHashes from the same crate. Combining the
         // two halves makes sure we get a good quality hash in such cases too.
-        self.0.wrapping_mul(3).wrapping_add(self.1)
+        Hash64::new(self.0.wrapping_mul(3).wrapping_add(self.1))
     }
 
     #[inline]
-    pub fn as_value(&self) -> (u64, u64) {
-        (self.0, self.1)
+    pub fn split(&self) -> (Hash64, Hash64) {
+        (Hash64::new(self.0), Hash64::new(self.1))
     }
 
     #[inline]
@@ -47,6 +64,11 @@ impl Fingerprint {
         )
     }
 
+    #[inline]
+    pub(crate) fn as_u128(self) -> u128 {
+        u128::from(self.1) << 64 | u128::from(self.0)
+    }
+
     // Combines two hashes in an order independent way. Make sure this is what
     // you want.
     #[inline]
@@ -131,9 +153,9 @@ impl FingerprintHasher for crate::unhash::Unhasher {
     }
 }
 
-impl stable_hasher::StableHasherResult for Fingerprint {
+impl StableHasherResult for Fingerprint {
     #[inline]
-    fn finish(hasher: stable_hasher::StableHasher) -> Self {
+    fn finish(hasher: StableHasher) -> Self {
         let (_0, _1) = hasher.finalize();
         Fingerprint(_0, _1)
     }
diff --git a/compiler/rustc_data_structures/src/fingerprint/tests.rs b/compiler/rustc_data_structures/src/fingerprint/tests.rs
index 9b0783e33..09ec2622a 100644
--- a/compiler/rustc_data_structures/src/fingerprint/tests.rs
+++ b/compiler/rustc_data_structures/src/fingerprint/tests.rs
@@ -1,11 +1,12 @@
 use super::*;
+use crate::stable_hasher::Hash64;
 
 // Check that `combine_commutative` is order independent.
 #[test]
 fn combine_commutative_is_order_independent() {
-    let a = Fingerprint::new(0xf6622fb349898b06, 0x70be9377b2f9c610);
-    let b = Fingerprint::new(0xa9562bf5a2a5303c, 0x67d9b6c82034f13d);
-    let c = Fingerprint::new(0x0d013a27811dbbc3, 0x9a3f7b3d9142ec43);
+    let a = Fingerprint::new(Hash64::new(0xf6622fb349898b06), Hash64::new(0x70be9377b2f9c610));
+    let b = Fingerprint::new(Hash64::new(0xa9562bf5a2a5303c), Hash64::new(0x67d9b6c82034f13d));
+    let c = Fingerprint::new(Hash64::new(0x0d013a27811dbbc3), Hash64::new(0x9a3f7b3d9142ec43));
     let permutations = [(a, b, c), (a, c, b), (b, a, c), (b, c, a), (c, a, b), (c, b, a)];
     let f = a.combine_commutative(b).combine_commutative(c);
     for p in &permutations {
diff --git a/compiler/rustc_data_structures/src/functor.rs b/compiler/rustc_data_structures/src/functor.rs
index 28fcf80b3..e3fcaccb1 100644
--- a/compiler/rustc_data_structures/src/functor.rs
+++ b/compiler/rustc_data_structures/src/functor.rs
@@ -1,4 +1,4 @@
-use rustc_index::vec::{Idx, IndexVec};
+use rustc_index::{Idx, IndexVec};
 use std::{mem, rc::Rc, sync::Arc};
 
 pub trait IdFunctor: Sized {
diff --git a/compiler/rustc_data_structures/src/graph/dominators/mod.rs b/compiler/rustc_data_structures/src/graph/dominators/mod.rs
index 0df9dc112..a5db14d91 100644
--- a/compiler/rustc_data_structures/src/graph/dominators/mod.rs
+++ b/compiler/rustc_data_structures/src/graph/dominators/mod.rs
@@ -10,7 +10,8 @@
 //! <https://www.cs.princeton.edu/courses/archive/spr03/cs423/download/dominators.pdf>
 
 use super::ControlFlowGraph;
-use rustc_index::vec::{Idx, IndexSlice, IndexVec};
+use rustc_index::{Idx, IndexSlice, IndexVec};
+
 use std::cmp::Ordering;
 
 #[cfg(test)]
@@ -25,7 +26,7 @@ rustc_index::newtype_index! {
     struct PreorderIndex {}
 }
 
-pub fn dominators<G: ControlFlowGraph>(graph: G) -> Dominators<G::Node> {
+pub fn dominators<G: ControlFlowGraph>(graph: &G) -> Dominators<G::Node> {
     // compute the post order index (rank) for each node
     let mut post_order_rank = IndexVec::from_elem_n(0, graph.num_nodes());
 
@@ -108,28 +109,27 @@ pub fn dominators<G: ControlFlowGraph>(graph: G) -> Dominators<G::Node> {
         // they have been placed in the bucket.
         //
         // We compute a partial set of immediate dominators here.
-        let z = parent[w];
-        for &v in bucket[z].iter() {
+        for &v in bucket[w].iter() {
             // This uses the result of Lemma 5 from section 2 from the original
             // 1979 paper, to compute either the immediate or relative dominator
             // for a given vertex v.
             //
             // eval returns a vertex y, for which semi[y] is minimum among
-            // vertices semi[v] +> y *> v. Note that semi[v] = z as we're in the
-            // z bucket.
+            // vertices semi[v] +> y *> v. Note that semi[v] = w as we're in the
+            // w bucket.
             //
             // Given such a vertex y, semi[y] <= semi[v] and idom[y] = idom[v].
             // If semi[y] = semi[v], though, idom[v] = semi[v].
             //
             // Using this, we can either set idom[v] to be:
-            //  * semi[v] (i.e. z), if semi[y] is z
+            //  * semi[v] (i.e. w), if semi[y] is w
             //  * idom[y], otherwise
             //
             // We don't directly set to idom[y] though as it's not necessarily
             // known yet. The second preorder traversal will cleanup by updating
             // the idom for any that were missed in this pass.
             let y = eval(&mut parent, lastlinked, &semi, &mut label, v);
-            idom[v] = if semi[y] < z { y } else { z };
+            idom[v] = if semi[y] < w { y } else { w };
         }
 
         // This loop computes the semi[w] for w.
@@ -212,10 +212,11 @@ pub fn dominators<G: ControlFlowGraph>(graph: G) -> Dominators<G::Node> {
         // If we don't yet know the idom directly, then push this vertex into
         // our semidominator's bucket, where it will get processed at a later
         // stage to compute its immediate dominator.
-        if parent[w] != semi[w] {
+        let z = parent[w];
+        if z != semi[w] {
             bucket[semi[w]].push(w);
         } else {
-            idom[w] = parent[w];
+            idom[w] = z;
         }
 
         // Optimization: We share the parent array between processed and not
@@ -241,7 +242,12 @@ pub fn dominators<G: ControlFlowGraph>(graph: G) -> Dominators<G::Node> {
         immediate_dominators[*node] = Some(pre_order_to_real[idom[idx]]);
     }
 
-    Dominators { post_order_rank, immediate_dominators }
+    let start_node = graph.start_node();
+    immediate_dominators[start_node] = None;
+
+    let time = compute_access_time(start_node, &immediate_dominators);
+
+    Dominators { start_node, post_order_rank, immediate_dominators, time }
 }
 
 /// Evaluate the link-eval virtual forest, providing the currently minimum semi
@@ -307,34 +313,31 @@ fn compress(
 /// Tracks the list of dominators for each node.
 #[derive(Clone, Debug)]
 pub struct Dominators<N: Idx> {
+    start_node: N,
     post_order_rank: IndexVec<N, usize>,
     // Even though we track only the immediate dominator of each node, it's
     // possible to get its full list of dominators by looking up the dominator
     // of each dominator. (See the `impl Iterator for Iter` definition).
     immediate_dominators: IndexVec<N, Option<N>>,
+    time: IndexVec<N, Time>,
 }
 
 impl<Node: Idx> Dominators<Node> {
-    /// Whether the given Node has an immediate dominator.
+    /// Returns true if node is reachable from the start node.
     pub fn is_reachable(&self, node: Node) -> bool {
-        self.immediate_dominators[node].is_some()
+        node == self.start_node || self.immediate_dominators[node].is_some()
     }
 
-    pub fn immediate_dominator(&self, node: Node) -> Node {
-        assert!(self.is_reachable(node), "node {node:?} is not reachable");
-        self.immediate_dominators[node].unwrap()
+    /// Returns the immediate dominator of node, if any.
+    pub fn immediate_dominator(&self, node: Node) -> Option<Node> {
+        self.immediate_dominators[node]
     }
 
     /// Provides an iterator over each dominator up the CFG, for the given Node.
     /// See the `impl Iterator for Iter` definition to understand how this works.
     pub fn dominators(&self, node: Node) -> Iter<'_, Node> {
         assert!(self.is_reachable(node), "node {node:?} is not reachable");
-        Iter { dominators: self, node: Some(node) }
-    }
-
-    pub fn dominates(&self, dom: Node, node: Node) -> bool {
-        // FIXME -- could be optimized by using post-order-rank
-        self.dominators(node).any(|n| n == dom)
+        Iter { dom_tree: self, node: Some(node) }
     }
 
     /// Provide deterministic ordering of nodes such that, if any two nodes have a dominator
@@ -344,10 +347,22 @@ impl<Node: Idx> Dominators<Node> {
     pub fn rank_partial_cmp(&self, lhs: Node, rhs: Node) -> Option<Ordering> {
         self.post_order_rank[rhs].partial_cmp(&self.post_order_rank[lhs])
     }
+
+    /// Returns true if `a` dominates `b`.
+    ///
+    /// # Panics
+    ///
+    /// Panics if `b` is unreachable.
+    pub fn dominates(&self, a: Node, b: Node) -> bool {
+        let a = self.time[a];
+        let b = self.time[b];
+        assert!(b.start != 0, "node {b:?} is not reachable");
+        a.start <= b.start && b.finish <= a.finish
+    }
 }
 
 pub struct Iter<'dom, Node: Idx> {
-    dominators: &'dom Dominators<Node>,
+    dom_tree: &'dom Dominators<Node>,
     node: Option<Node>,
 }
 
@@ -356,15 +371,74 @@ impl<'dom, Node: Idx> Iterator for Iter<'dom, Node> {
 
     fn next(&mut self) -> Option<Self::Item> {
         if let Some(node) = self.node {
-            let dom = self.dominators.immediate_dominator(node);
-            if dom == node {
-                self.node = None; // reached the root
-            } else {
-                self.node = Some(dom);
-            }
+            self.node = self.dom_tree.immediate_dominator(node);
             Some(node)
         } else {
             None
         }
     }
 }
+
+/// Describes the number of vertices discovered at the time when processing of a particular vertex
+/// started and when it finished. Both values are zero for unreachable vertices.
+#[derive(Copy, Clone, Default, Debug)]
+struct Time {
+    start: u32,
+    finish: u32,
+}
+
+fn compute_access_time<N: Idx>(
+    start_node: N,
+    immediate_dominators: &IndexSlice<N, Option<N>>,
+) -> IndexVec<N, Time> {
+    // Transpose the dominator tree edges, so that child nodes of vertex v are stored in
+    // node[edges[v].start..edges[v].end].
+    let mut edges: IndexVec<N, std::ops::Range<u32>> =
+        IndexVec::from_elem(0..0, immediate_dominators);
+    for &idom in immediate_dominators.iter() {
+        if let Some(idom) = idom {
+            edges[idom].end += 1;
+        }
+    }
+    let mut m = 0;
+    for e in edges.iter_mut() {
+        m += e.end;
+        e.start = m;
+        e.end = m;
+    }
+    let mut node = IndexVec::from_elem_n(Idx::new(0), m.try_into().unwrap());
+    for (i, &idom) in immediate_dominators.iter_enumerated() {
+        if let Some(idom) = idom {
+            edges[idom].start -= 1;
+            node[edges[idom].start] = i;
+        }
+    }
+
+    // Perform a depth-first search of the dominator tree. Record the number of vertices discovered
+    // when vertex v is discovered first as time[v].start, and when its processing is finished as
+    // time[v].finish.
+    let mut time: IndexVec<N, Time> = IndexVec::from_elem(Time::default(), immediate_dominators);
+    let mut stack = Vec::new();
+
+    let mut discovered = 1;
+    stack.push(start_node);
+    time[start_node].start = discovered;
+
+    while let Some(&i) = stack.last() {
+        let e = &mut edges[i];
+        if e.start == e.end {
+            // Finish processing vertex i.
+            time[i].finish = discovered;
+            stack.pop();
+        } else {
+            let j = node[e.start];
+            e.start += 1;
+            // Start processing vertex j.
+            discovered += 1;
+            time[j].start = discovered;
+            stack.push(j);
+        }
+    }
+
+    time
+}
diff --git a/compiler/rustc_data_structures/src/graph/dominators/tests.rs b/compiler/rustc_data_structures/src/graph/dominators/tests.rs
index ff31d8f7f..5472bb808 100644
--- a/compiler/rustc_data_structures/src/graph/dominators/tests.rs
+++ b/compiler/rustc_data_structures/src/graph/dominators/tests.rs
@@ -8,7 +8,7 @@ fn diamond() {
 
     let dominators = dominators(&graph);
     let immediate_dominators = &dominators.immediate_dominators;
-    assert_eq!(immediate_dominators[0], Some(0));
+    assert_eq!(immediate_dominators[0], None);
     assert_eq!(immediate_dominators[1], Some(0));
     assert_eq!(immediate_dominators[2], Some(0));
     assert_eq!(immediate_dominators[3], Some(0));
@@ -30,7 +30,7 @@ fn paper() {
     assert_eq!(immediate_dominators[3], Some(6));
     assert_eq!(immediate_dominators[4], Some(6));
     assert_eq!(immediate_dominators[5], Some(6));
-    assert_eq!(immediate_dominators[6], Some(6));
+    assert_eq!(immediate_dominators[6], None);
 }
 
 #[test]
@@ -43,3 +43,40 @@ fn paper_slt() {
 
     dominators(&graph);
 }
+
+#[test]
+fn immediate_dominator() {
+    let graph = TestGraph::new(1, &[(1, 2), (2, 3)]);
+    let dominators = dominators(&graph);
+    assert_eq!(dominators.immediate_dominator(0), None);
+    assert_eq!(dominators.immediate_dominator(1), None);
+    assert_eq!(dominators.immediate_dominator(2), Some(1));
+    assert_eq!(dominators.immediate_dominator(3), Some(2));
+}
+
+#[test]
+fn transitive_dominator() {
+    let graph = TestGraph::new(
+        0,
+        &[
+            // First tree branch.
+            (0, 1),
+            (1, 2),
+            (2, 3),
+            (3, 4),
+            // Second tree branch.
+            (1, 5),
+            (5, 6),
+            // Third tree branch.
+            (0, 7),
+            // These links make 0 the dominator for 2 and 3.
+            (7, 2),
+            (5, 3),
+        ],
+    );
+
+    let dom_tree = dominators(&graph);
+    let immediate_dominators = &dom_tree.immediate_dominators;
+    assert_eq!(immediate_dominators[2], Some(0));
+    assert_eq!(immediate_dominators[3], Some(0)); // This used to return Some(1).
+}
diff --git a/compiler/rustc_data_structures/src/graph/iterate/mod.rs b/compiler/rustc_data_structures/src/graph/iterate/mod.rs
index 01a83b40a..9eb4b5278 100644
--- a/compiler/rustc_data_structures/src/graph/iterate/mod.rs
+++ b/compiler/rustc_data_structures/src/graph/iterate/mod.rs
@@ -1,6 +1,6 @@
 use super::{DirectedGraph, WithNumNodes, WithStartNode, WithSuccessors};
 use rustc_index::bit_set::BitSet;
-use rustc_index::vec::{IndexSlice, IndexVec};
+use rustc_index::{IndexSlice, IndexVec};
 use std::ops::ControlFlow;
 
 #[cfg(test)]
diff --git a/compiler/rustc_data_structures/src/graph/mod.rs b/compiler/rustc_data_structures/src/graph/mod.rs
index 3560df6e5..e06ab2fe3 100644
--- a/compiler/rustc_data_structures/src/graph/mod.rs
+++ b/compiler/rustc_data_structures/src/graph/mod.rs
@@ -1,4 +1,4 @@
-use rustc_index::vec::Idx;
+use rustc_index::Idx;
 
 pub mod dominators;
 pub mod implementation;
diff --git a/compiler/rustc_data_structures/src/graph/scc/mod.rs b/compiler/rustc_data_structures/src/graph/scc/mod.rs
index 28c357e54..cf9312ea8 100644
--- a/compiler/rustc_data_structures/src/graph/scc/mod.rs
+++ b/compiler/rustc_data_structures/src/graph/scc/mod.rs
@@ -8,7 +8,7 @@
 use crate::fx::FxHashSet;
 use crate::graph::vec_graph::VecGraph;
 use crate::graph::{DirectedGraph, GraphSuccessors, WithNumEdges, WithNumNodes, WithSuccessors};
-use rustc_index::vec::{Idx, IndexSlice, IndexVec};
+use rustc_index::{Idx, IndexSlice, IndexVec};
 use std::ops::Range;
 
 #[cfg(test)]
diff --git a/compiler/rustc_data_structures/src/graph/vec_graph/mod.rs b/compiler/rustc_data_structures/src/graph/vec_graph/mod.rs
index 94232bb76..00f6266ce 100644
--- a/compiler/rustc_data_structures/src/graph/vec_graph/mod.rs
+++ b/compiler/rustc_data_structures/src/graph/vec_graph/mod.rs
@@ -1,5 +1,5 @@
 use crate::graph::{DirectedGraph, GraphSuccessors, WithNumEdges, WithNumNodes, WithSuccessors};
-use rustc_index::vec::{Idx, IndexVec};
+use rustc_index::{Idx, IndexVec};
 
 #[cfg(test)]
 mod tests;
diff --git a/compiler/rustc_data_structures/src/hashes.rs b/compiler/rustc_data_structures/src/hashes.rs
new file mode 100644
index 000000000..ad068cdbc
--- /dev/null
+++ b/compiler/rustc_data_structures/src/hashes.rs
@@ -0,0 +1,132 @@
+//! rustc encodes a lot of hashes. If hashes are stored as `u64` or `u128`, a `derive(Encodable)`
+//! will apply varint encoding to the hashes, which is less efficient than directly encoding the 8
+//! or 16 bytes of the hash.
+//!
+//! The types in this module represent 64-bit or 128-bit hashes produced by a `StableHasher`.
+//! `Hash64` and `Hash128` expose some utilty functions to encourage users to not extract the inner
+//! hash value as an integer type and accidentally apply varint encoding to it.
+//!
+//! In contrast with `Fingerprint`, users of these types cannot and should not attempt to construct
+//! and decompose these types into constitutent pieces. The point of these types is only to
+//! connect the fact that they can only be produced by a `StableHasher` to their
+//! `Encode`/`Decode` impls.
+
+use crate::stable_hasher::{StableHasher, StableHasherResult};
+use rustc_serialize::{Decodable, Decoder, Encodable, Encoder};
+use std::fmt;
+use std::ops::BitXorAssign;
+
+#[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Default)]
+pub struct Hash64 {
+    inner: u64,
+}
+
+impl Hash64 {
+    pub const ZERO: Hash64 = Hash64 { inner: 0 };
+
+    #[inline]
+    pub(crate) fn new(n: u64) -> Self {
+        Self { inner: n }
+    }
+
+    #[inline]
+    pub fn as_u64(self) -> u64 {
+        self.inner
+    }
+}
+
+impl BitXorAssign<u64> for Hash64 {
+    #[inline]
+    fn bitxor_assign(&mut self, rhs: u64) {
+        self.inner ^= rhs;
+    }
+}
+
+impl<S: Encoder> Encodable<S> for Hash64 {
+    #[inline]
+    fn encode(&self, s: &mut S) {
+        s.emit_raw_bytes(&self.inner.to_le_bytes());
+    }
+}
+
+impl<D: Decoder> Decodable<D> for Hash64 {
+    #[inline]
+    fn decode(d: &mut D) -> Self {
+        Self { inner: u64::from_le_bytes(d.read_raw_bytes(8).try_into().unwrap()) }
+    }
+}
+
+impl StableHasherResult for Hash64 {
+    #[inline]
+    fn finish(hasher: StableHasher) -> Self {
+        Self { inner: hasher.finalize().0 }
+    }
+}
+
+impl fmt::Debug for Hash64 {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        self.inner.fmt(f)
+    }
+}
+
+impl fmt::LowerHex for Hash64 {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        fmt::LowerHex::fmt(&self.inner, f)
+    }
+}
+
+#[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Default)]
+pub struct Hash128 {
+    inner: u128,
+}
+
+impl Hash128 {
+    #[inline]
+    pub fn truncate(self) -> Hash64 {
+        Hash64 { inner: self.inner as u64 }
+    }
+
+    #[inline]
+    pub fn wrapping_add(self, other: Self) -> Self {
+        Self { inner: self.inner.wrapping_add(other.inner) }
+    }
+
+    #[inline]
+    pub fn as_u128(self) -> u128 {
+        self.inner
+    }
+}
+
+impl<S: Encoder> Encodable<S> for Hash128 {
+    #[inline]
+    fn encode(&self, s: &mut S) {
+        s.emit_raw_bytes(&self.inner.to_le_bytes());
+    }
+}
+
+impl<D: Decoder> Decodable<D> for Hash128 {
+    #[inline]
+    fn decode(d: &mut D) -> Self {
+        Self { inner: u128::from_le_bytes(d.read_raw_bytes(16).try_into().unwrap()) }
+    }
+}
+
+impl StableHasherResult for Hash128 {
+    #[inline]
+    fn finish(hasher: StableHasher) -> Self {
+        let (_0, _1) = hasher.finalize();
+        Self { inner: u128::from(_0) | (u128::from(_1) << 64) }
+    }
+}
+
+impl fmt::Debug for Hash128 {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        self.inner.fmt(f)
+    }
+}
+
+impl fmt::LowerHex for Hash128 {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        fmt::LowerHex::fmt(&self.inner, f)
+    }
+}
diff --git a/compiler/rustc_data_structures/src/lib.rs b/compiler/rustc_data_structures/src/lib.rs
index e373bd184..859e384d8 100644
--- a/compiler/rustc_data_structures/src/lib.rs
+++ b/compiler/rustc_data_structures/src/lib.rs
@@ -26,13 +26,18 @@
 #![feature(test)]
 #![feature(thread_id_value)]
 #![feature(vec_into_raw_parts)]
+#![feature(allocator_api)]
 #![feature(get_mut_unchecked)]
 #![feature(lint_reasons)]
 #![feature(unwrap_infallible)]
+#![feature(strict_provenance)]
+#![feature(ptr_alignment_type)]
+#![feature(macro_metavar_expr)]
 #![allow(rustc::default_hash_types)]
 #![allow(rustc::potential_query_instability)]
 #![deny(rustc::untranslatable_diagnostic)]
 #![deny(rustc::diagnostic_outside_of_impl)]
+#![deny(unsafe_op_in_unsafe_fn)]
 
 #[macro_use]
 extern crate tracing;
@@ -73,6 +78,7 @@ pub mod sorted_map;
 pub mod stable_hasher;
 mod atomic_ref;
 pub mod fingerprint;
+pub mod marker;
 pub mod profiling;
 pub mod sharded;
 pub mod stack;
@@ -82,7 +88,9 @@ pub mod transitive_relation;
 pub mod vec_linked_list;
 pub mod work_queue;
 pub use atomic_ref::AtomicRef;
+pub mod aligned;
 pub mod frozen;
+mod hashes;
 pub mod owned_slice;
 pub mod sso;
 pub mod steal;
@@ -94,21 +102,27 @@ pub mod unord;
 pub use ena::undo_log;
 pub use ena::unify;
 
-pub struct OnDrop<F: Fn()>(pub F);
+/// Returns a structure that calls `f` when dropped.
+pub fn defer<F: FnOnce()>(f: F) -> OnDrop<F> {
+    OnDrop(Some(f))
+}
+
+pub struct OnDrop<F: FnOnce()>(Option<F>);
 
-impl<F: Fn()> OnDrop<F> {
-    /// Forgets the function which prevents it from running.
-    /// Ensure that the function owns no memory, otherwise it will be leaked.
+impl<F: FnOnce()> OnDrop<F> {
+    /// Disables on-drop call.
     #[inline]
-    pub fn disable(self) {
-        std::mem::forget(self);
+    pub fn disable(mut self) {
+        self.0.take();
     }
 }
 
-impl<F: Fn()> Drop for OnDrop<F> {
+impl<F: FnOnce()> Drop for OnDrop<F> {
     #[inline]
     fn drop(&mut self) {
-        (self.0)();
+        if let Some(f) = self.0.take() {
+            f();
+        }
     }
 }
 
diff --git a/compiler/rustc_data_structures/src/marker.rs b/compiler/rustc_data_structures/src/marker.rs
new file mode 100644
index 000000000..f8c06f9a8
--- /dev/null
+++ b/compiler/rustc_data_structures/src/marker.rs
@@ -0,0 +1,257 @@
+cfg_if!(
+    if #[cfg(not(parallel_compiler))] {
+        pub auto trait DynSend {}
+        pub auto trait DynSync {}
+
+        impl<T> DynSend for T {}
+        impl<T> DynSync for T {}
+    } else {
+        #[rustc_on_unimplemented(
+            message = "`{Self}` doesn't implement `DynSend`. \
+            Add it to `rustc_data_structures::marker` or use `IntoDynSyncSend` if it's already `Send`"
+        )]
+        // This is an auto trait for types which can be sent across threads if `sync::is_dyn_thread_safe()`
+        // is true. These types can be wrapped in a `FromDyn` to get a `Send` type. Wrapping a
+        // `Send` type in `IntoDynSyncSend` will create a `DynSend` type.
+        pub unsafe auto trait DynSend {}
+
+        #[rustc_on_unimplemented(
+            message = "`{Self}` doesn't implement `DynSync`. \
+            Add it to `rustc_data_structures::marker` or use `IntoDynSyncSend` if it's already `Sync`"
+        )]
+        // This is an auto trait for types which can be shared across threads if `sync::is_dyn_thread_safe()`
+        // is true. These types can be wrapped in a `FromDyn` to get a `Sync` type. Wrapping a
+        // `Sync` type in `IntoDynSyncSend` will create a `DynSync` type.
+        pub unsafe auto trait DynSync {}
+
+        // Same with `Sync` and `Send`.
+        unsafe impl<T: DynSync + ?Sized> DynSend for &T {}
+
+        macro_rules! impls_dyn_send_neg {
+            ($([$t1: ty $(where $($generics1: tt)*)?])*) => {
+                $(impl$(<$($generics1)*>)? !DynSend for $t1 {})*
+            };
+        }
+
+        // Consistent with `std`
+        impls_dyn_send_neg!(
+            [std::env::Args]
+            [std::env::ArgsOs]
+            [*const T where T: ?Sized]
+            [*mut T where T: ?Sized]
+            [std::ptr::NonNull<T> where T: ?Sized]
+            [std::rc::Rc<T> where T: ?Sized]
+            [std::rc::Weak<T> where T: ?Sized]
+            [std::sync::MutexGuard<'_, T> where T: ?Sized]
+            [std::sync::RwLockReadGuard<'_, T> where T: ?Sized]
+            [std::sync::RwLockWriteGuard<'_, T> where T: ?Sized]
+            [std::io::StdoutLock<'_>]
+            [std::io::StderrLock<'_>]
+        );
+        cfg_if!(
+            // Consistent with `std`
+            // `os_imp::Env` is `!Send` in these platforms
+            if #[cfg(any(unix, target_os = "hermit", target_os = "wasi", target_os = "solid_asp3"))] {
+                impl !DynSend for std::env::VarsOs {}
+            }
+        );
+
+        macro_rules! already_send {
+            ($([$ty: ty])*) => {
+                $(unsafe impl DynSend for $ty where $ty: Send {})*
+            };
+        }
+
+        // These structures are already `Send`.
+        already_send!(
+            [std::backtrace::Backtrace]
+            [std::io::Stdout]
+            [std::io::Stderr]
+            [std::io::Error]
+            [std::fs::File]
+            [rustc_arena::DroplessArena]
+            [crate::memmap::Mmap]
+            [crate::profiling::SelfProfiler]
+            [crate::owned_slice::OwnedSlice]
+        );
+
+        macro_rules! impl_dyn_send {
+            ($($($attr: meta)* [$ty: ty where $($generics2: tt)*])*) => {
+                $(unsafe impl<$($generics2)*> DynSend for $ty {})*
+            };
+        }
+
+        impl_dyn_send!(
+            [std::sync::atomic::AtomicPtr<T> where T]
+            [std::sync::Mutex<T> where T: ?Sized+ DynSend]
+            [std::sync::mpsc::Sender<T> where T: DynSend]
+            [std::sync::Arc<T> where T: ?Sized + DynSync + DynSend]
+            [std::sync::LazyLock<T, F> where T: DynSend, F: DynSend]
+            [std::collections::HashSet<K, S> where K: DynSend, S: DynSend]
+            [std::collections::HashMap<K, V, S> where K: DynSend, V: DynSend, S: DynSend]
+            [std::collections::BTreeMap<K, V, A> where K: DynSend, V: DynSend, A: std::alloc::Allocator + Clone + DynSend]
+            [Vec<T, A> where T: DynSend, A: std::alloc::Allocator + DynSend]
+            [Box<T, A> where T: ?Sized + DynSend, A: std::alloc::Allocator + DynSend]
+            [crate::sync::Lock<T> where T: DynSend]
+            [crate::sync::RwLock<T> where T: DynSend]
+            [crate::tagged_ptr::CopyTaggedPtr<P, T, CP> where P: Send + crate::tagged_ptr::Pointer, T: Send + crate::tagged_ptr::Tag, const CP: bool]
+            [rustc_arena::TypedArena<T> where T: DynSend]
+            [indexmap::IndexSet<V, S> where V: DynSend, S: DynSend]
+            [indexmap::IndexMap<K, V, S> where K: DynSend, V: DynSend, S: DynSend]
+            [thin_vec::ThinVec<T> where T: DynSend]
+            [smallvec::SmallVec<A> where A: smallvec::Array + DynSend]
+        );
+
+        macro_rules! impls_dyn_sync_neg {
+            ($([$t1: ty $(where $($generics1: tt)*)?])*) => {
+                $(impl$(<$($generics1)*>)? !DynSync for $t1 {})*
+            };
+        }
+
+        // Consistent with `std`
+        impls_dyn_sync_neg!(
+            [std::env::Args]
+            [std::env::ArgsOs]
+            [*const T where T: ?Sized]
+            [*mut T where T: ?Sized]
+            [std::cell::Cell<T> where T: ?Sized]
+            [std::cell::RefCell<T> where T: ?Sized]
+            [std::cell::UnsafeCell<T> where T: ?Sized]
+            [std::ptr::NonNull<T> where T: ?Sized]
+            [std::rc::Rc<T> where T: ?Sized]
+            [std::rc::Weak<T> where T: ?Sized]
+            [std::cell::OnceCell<T> where T]
+            [std::sync::mpsc::Receiver<T> where T]
+            [std::sync::mpsc::Sender<T> where T]
+        );
+        cfg_if!(
+            // Consistent with `std`
+            // `os_imp::Env` is `!Sync` in these platforms
+            if #[cfg(any(unix, target_os = "hermit", target_os = "wasi", target_os = "solid_asp3"))] {
+                impl !DynSync for std::env::VarsOs {}
+            }
+        );
+
+        macro_rules! already_sync {
+            ($([$ty: ty])*) => {
+                $(unsafe impl DynSync for $ty where $ty: Sync {})*
+            };
+        }
+
+        // These structures are already `Sync`.
+        already_sync!(
+            [std::sync::atomic::AtomicBool]
+            [std::sync::atomic::AtomicUsize]
+            [std::sync::atomic::AtomicU8]
+            [std::sync::atomic::AtomicU32]
+            [std::sync::atomic::AtomicU64]
+            [std::backtrace::Backtrace]
+            [std::io::Error]
+            [std::fs::File]
+            [jobserver_crate::Client]
+            [crate::memmap::Mmap]
+            [crate::profiling::SelfProfiler]
+            [crate::owned_slice::OwnedSlice]
+        );
+
+        macro_rules! impl_dyn_sync {
+            ($($($attr: meta)* [$ty: ty where $($generics2: tt)*])*) => {
+                $(unsafe impl<$($generics2)*> DynSync for $ty {})*
+            };
+        }
+
+        impl_dyn_sync!(
+            [std::sync::atomic::AtomicPtr<T> where T]
+            [std::sync::OnceLock<T> where T: DynSend + DynSync]
+            [std::sync::Mutex<T> where T: ?Sized + DynSend]
+            [std::sync::Arc<T> where T: ?Sized + DynSync + DynSend]
+            [std::sync::LazyLock<T, F> where T: DynSend + DynSync, F: DynSend]
+            [std::collections::HashSet<K, S> where K: DynSync, S: DynSync]
+            [std::collections::HashMap<K, V, S> where K: DynSync, V: DynSync, S: DynSync]
+            [std::collections::BTreeMap<K, V, A> where K: DynSync, V: DynSync, A: std::alloc::Allocator + Clone + DynSync]
+            [Vec<T, A> where T: DynSync, A: std::alloc::Allocator + DynSync]
+            [Box<T, A> where T: ?Sized + DynSync, A: std::alloc::Allocator + DynSync]
+            [crate::sync::Lock<T> where T: DynSend]
+            [crate::sync::RwLock<T> where T: DynSend + DynSync]
+            [crate::sync::OneThread<T> where T]
+            [crate::sync::WorkerLocal<T> where T: DynSend]
+            [crate::intern::Interned<'a, T> where 'a, T: DynSync]
+            [crate::tagged_ptr::CopyTaggedPtr<P, T, CP> where P: Sync + crate::tagged_ptr::Pointer, T: Sync + crate::tagged_ptr::Tag, const CP: bool]
+            [parking_lot::lock_api::Mutex<R, T> where R: DynSync, T: ?Sized + DynSend]
+            [parking_lot::lock_api::RwLock<R, T> where R: DynSync, T: ?Sized + DynSend + DynSync]
+            [indexmap::IndexSet<V, S> where V: DynSync, S: DynSync]
+            [indexmap::IndexMap<K, V, S> where K: DynSync, V: DynSync, S: DynSync]
+            [smallvec::SmallVec<A> where A: smallvec::Array + DynSync]
+            [thin_vec::ThinVec<T> where T: DynSync]
+        );
+    }
+);
+
+pub fn assert_dyn_sync<T: ?Sized + DynSync>() {}
+pub fn assert_dyn_send<T: ?Sized + DynSend>() {}
+pub fn assert_dyn_send_val<T: ?Sized + DynSend>(_t: &T) {}
+pub fn assert_dyn_send_sync_val<T: ?Sized + DynSync + DynSend>(_t: &T) {}
+
+#[derive(Copy, Clone)]
+pub struct FromDyn<T>(T);
+
+impl<T> FromDyn<T> {
+    #[inline(always)]
+    pub fn from(val: T) -> Self {
+        // Check that `sync::is_dyn_thread_safe()` is true on creation so we can
+        // implement `Send` and `Sync` for this structure when `T`
+        // implements `DynSend` and `DynSync` respectively.
+        #[cfg(parallel_compiler)]
+        assert!(crate::sync::is_dyn_thread_safe());
+        FromDyn(val)
+    }
+
+    #[inline(always)]
+    pub fn into_inner(self) -> T {
+        self.0
+    }
+}
+
+// `FromDyn` is `Send` if `T` is `DynSend`, since it ensures that sync::is_dyn_thread_safe() is true.
+#[cfg(parallel_compiler)]
+unsafe impl<T: DynSend> Send for FromDyn<T> {}
+
+// `FromDyn` is `Sync` if `T` is `DynSync`, since it ensures that sync::is_dyn_thread_safe() is true.
+#[cfg(parallel_compiler)]
+unsafe impl<T: DynSync> Sync for FromDyn<T> {}
+
+impl<T> std::ops::Deref for FromDyn<T> {
+    type Target = T;
+
+    #[inline(always)]
+    fn deref(&self) -> &Self::Target {
+        &self.0
+    }
+}
+
+// A wrapper to convert a struct that is already a `Send` or `Sync` into
+// an instance of `DynSend` and `DynSync`, since the compiler cannot infer
+// it automatically in some cases. (e.g. Box<dyn Send / Sync>)
+#[derive(Copy, Clone)]
+pub struct IntoDynSyncSend<T: ?Sized>(pub T);
+
+#[cfg(parallel_compiler)]
+unsafe impl<T: ?Sized + Send> DynSend for IntoDynSyncSend<T> {}
+#[cfg(parallel_compiler)]
+unsafe impl<T: ?Sized + Sync> DynSync for IntoDynSyncSend<T> {}
+
+impl<T> std::ops::Deref for IntoDynSyncSend<T> {
+    type Target = T;
+
+    #[inline(always)]
+    fn deref(&self) -> &T {
+        &self.0
+    }
+}
+
+impl<T> std::ops::DerefMut for IntoDynSyncSend<T> {
+    #[inline(always)]
+    fn deref_mut(&mut self) -> &mut T {
+        &mut self.0
+    }
+}
diff --git a/compiler/rustc_data_structures/src/memmap.rs b/compiler/rustc_data_structures/src/memmap.rs
index ef37a606f..ca908671a 100644
--- a/compiler/rustc_data_structures/src/memmap.rs
+++ b/compiler/rustc_data_structures/src/memmap.rs
@@ -13,7 +13,8 @@ pub struct Mmap(Vec<u8>);
 impl Mmap {
     #[inline]
     pub unsafe fn map(file: File) -> io::Result<Self> {
-        memmap2::Mmap::map(&file).map(Mmap)
+        // Safety: this is in fact not safe.
+        unsafe { memmap2::Mmap::map(&file).map(Mmap) }
     }
 }
 
diff --git a/compiler/rustc_data_structures/src/obligation_forest/mod.rs b/compiler/rustc_data_structures/src/obligation_forest/mod.rs
index 27a869eb7..a47908648 100644
--- a/compiler/rustc_data_structures/src/obligation_forest/mod.rs
+++ b/compiler/rustc_data_structures/src/obligation_forest/mod.rs
@@ -366,7 +366,7 @@ impl<O: ForestObligation> ObligationForest<O> {
                     && self
                         .error_cache
                         .get(&obligation_tree_id)
-                        .map_or(false, |errors| errors.contains(v.key()));
+                        .is_some_and(|errors| errors.contains(v.key()));
 
                 if already_failed {
                     Err(())
diff --git a/compiler/rustc_data_structures/src/owned_slice.rs b/compiler/rustc_data_structures/src/owned_slice.rs
index 048401f66..cbb3047d8 100644
--- a/compiler/rustc_data_structures/src/owned_slice.rs
+++ b/compiler/rustc_data_structures/src/owned_slice.rs
@@ -1,5 +1,6 @@
 use std::{borrow::Borrow, ops::Deref};
 
+use crate::sync::Lrc;
 // Use our fake Send/Sync traits when on not parallel compiler,
 // so that `OwnedSlice` only implements/requires Send/Sync
 // for parallel compiler builds.
@@ -7,7 +8,7 @@ use crate::sync::{Send, Sync};
 
 /// An owned slice.
 ///
-/// This is similar to `Box<[u8]>` but allows slicing and using anything as the
+/// This is similar to `Lrc<[u8]>` but allows slicing and using anything as the
 /// backing buffer.
 ///
 /// See [`slice_owned`] for `OwnedSlice` construction and examples.
@@ -16,6 +17,7 @@ use crate::sync::{Send, Sync};
 ///
 /// This is essentially a replacement for `owning_ref` which is a lot simpler
 /// and even sound! 🌸
+#[derive(Clone)]
 pub struct OwnedSlice {
     /// This is conceptually a `&'self.owner [u8]`.
     bytes: *const [u8],
@@ -31,7 +33,7 @@ pub struct OwnedSlice {
     //       \/
     //      ⊂(´･◡･⊂ )∘˚˳° (I am the phantom remnant of #97770)
     #[expect(dead_code)]
-    owner: Box<dyn Send + Sync>,
+    owner: Lrc<dyn Send + Sync>,
 }
 
 /// Makes an [`OwnedSlice`] out of an `owner` and a `slicer` function.
@@ -72,10 +74,10 @@ where
     O: Send + Sync + 'static,
     F: FnOnce(&O) -> Result<&[u8], E>,
 {
-    // We box the owner of the bytes, so it doesn't move.
+    // We wrap the owner of the bytes in, so it doesn't move.
     //
     // Since the owner does not move and we don't access it in any way
-    // before drop, there is nothing that can invalidate the bytes pointer.
+    // before dropping, there is nothing that can invalidate the bytes pointer.
     //
     // Thus, "extending" the lifetime of the reference returned from `F` is fine.
     // We pretend that we pass it a reference that lives as long as the returned slice.
@@ -83,12 +85,39 @@ where
     // N.B. the HRTB on the `slicer` is important — without it the caller could provide
     // a short lived slice, unrelated to the owner.
 
-    let owner = Box::new(owner);
+    let owner = Lrc::new(owner);
     let bytes = slicer(&*owner)?;
 
     Ok(OwnedSlice { bytes, owner })
 }
 
+impl OwnedSlice {
+    /// Slice this slice by `slicer`.
+    ///
+    /// # Examples
+    ///
+    /// ```rust
+    /// # use rustc_data_structures::owned_slice::{OwnedSlice, slice_owned};
+    /// let vec = vec![1, 2, 3, 4];
+    ///
+    /// // Identical to slicing via `&v[1..3]` but produces an owned slice
+    /// let slice: OwnedSlice = slice_owned(vec, |v| &v[..]);
+    /// assert_eq!(&*slice, [1, 2, 3, 4]);
+    ///
+    /// let slice = slice.slice(|slice| &slice[1..][..2]);
+    /// assert_eq!(&*slice, [2, 3]);
+    /// ```
+    ///
+    pub fn slice(self, slicer: impl FnOnce(&[u8]) -> &[u8]) -> OwnedSlice {
+        // This is basically identical to `try_slice_owned`,
+        // `slicer` can only return slices of its argument or some static data,
+        // both of which are valid while `owner` is alive.
+
+        let bytes = slicer(&self);
+        OwnedSlice { bytes, ..self }
+    }
+}
+
 impl Deref for OwnedSlice {
     type Target = [u8];
 
@@ -108,10 +137,12 @@ impl Borrow<[u8]> for OwnedSlice {
     }
 }
 
-// Safety: `OwnedSlice` is conceptually `(&'self.1 [u8], Box<dyn Send + Sync>)`, which is `Send`
+// Safety: `OwnedSlice` is conceptually `(&'self.1 [u8], Arc<dyn Send + Sync>)`, which is `Send`
+#[cfg(parallel_compiler)]
 unsafe impl Send for OwnedSlice {}
 
-// Safety: `OwnedSlice` is conceptually `(&'self.1 [u8], Box<dyn Send + Sync>)`, which is `Sync`
+// Safety: `OwnedSlice` is conceptually `(&'self.1 [u8], Arc<dyn Send + Sync>)`, which is `Sync`
+#[cfg(parallel_compiler)]
 unsafe impl Sync for OwnedSlice {}
 
 #[cfg(test)]
diff --git a/compiler/rustc_data_structures/src/owned_slice/tests.rs b/compiler/rustc_data_structures/src/owned_slice/tests.rs
index e715fb553..520871a12 100644
--- a/compiler/rustc_data_structures/src/owned_slice/tests.rs
+++ b/compiler/rustc_data_structures/src/owned_slice/tests.rs
@@ -7,8 +7,8 @@ use std::{
 };
 
 use crate::{
+    defer,
     owned_slice::{slice_owned, try_slice_owned, OwnedSlice},
-    OnDrop,
 };
 
 #[test]
@@ -26,7 +26,7 @@ fn static_storage() {
 }
 
 #[test]
-fn slice_the_slice() {
+fn slice_owned_the_slice() {
     let slice = slice_owned(vec![1, 2, 3, 4, 5, 6], Vec::as_slice);
     let slice = slice_owned(slice, |s| &s[1..][..4]);
     let slice = slice_owned(slice, |s| s);
@@ -36,6 +36,16 @@ fn slice_the_slice() {
 }
 
 #[test]
+fn slice_the_slice() {
+    let slice = slice_owned(vec![1, 2, 3, 4, 5, 6], Vec::as_slice)
+        .slice(|s| &s[1..][..4])
+        .slice(|s| s)
+        .slice(|s| &s[1..]);
+
+    assert_eq!(&*slice, &[1, 2, 3, 4, 5, 6][1..][..4][1..]);
+}
+
+#[test]
 fn try_and_fail() {
     let res = try_slice_owned(vec![0], |v| v.get(12..).ok_or(()));
 
@@ -56,7 +66,7 @@ fn boxed() {
 fn drop_drops() {
     let flag = Arc::new(AtomicBool::new(false));
     let flag_prime = Arc::clone(&flag);
-    let d = OnDrop(move || flag_prime.store(true, atomic::Ordering::Relaxed));
+    let d = defer(move || flag_prime.store(true, atomic::Ordering::Relaxed));
 
     let slice = slice_owned(d, |_| &[]);
 
@@ -69,6 +79,6 @@ fn drop_drops() {
 
 #[test]
 fn send_sync() {
-    crate::sync::assert_send::<OwnedSlice>();
-    crate::sync::assert_sync::<OwnedSlice>();
+    crate::sync::assert_dyn_send::<OwnedSlice>();
+    crate::sync::assert_dyn_sync::<OwnedSlice>();
 }
diff --git a/compiler/rustc_data_structures/src/profiling.rs b/compiler/rustc_data_structures/src/profiling.rs
index 1ed584eaf..3c76c2b79 100644
--- a/compiler/rustc_data_structures/src/profiling.rs
+++ b/compiler/rustc_data_structures/src/profiling.rs
@@ -87,6 +87,7 @@ use crate::fx::FxHashMap;
 use std::borrow::Borrow;
 use std::collections::hash_map::Entry;
 use std::error::Error;
+use std::fmt::Display;
 use std::fs;
 use std::intrinsics::unlikely;
 use std::path::Path;
@@ -97,11 +98,10 @@ use std::time::{Duration, Instant};
 pub use measureme::EventId;
 use measureme::{EventIdBuilder, Profiler, SerializableString, StringId};
 use parking_lot::RwLock;
-use serde_json::json;
 use smallvec::SmallVec;
 
 bitflags::bitflags! {
-    struct EventFilter: u32 {
+    struct EventFilter: u16 {
         const GENERIC_ACTIVITIES  = 1 << 0;
         const QUERY_PROVIDERS     = 1 << 1;
         const QUERY_CACHE_HITS    = 1 << 2;
@@ -557,7 +557,7 @@ impl SelfProfiler {
         let crate_name = crate_name.unwrap_or("unknown-crate");
         // HACK(eddyb) we need to pad the PID, strange as it may seem, as its
         // length can behave as a source of entropy for heap addresses, when
-        // ASLR is disabled and the heap is otherwise determinic.
+        // ASLR is disabled and the heap is otherwise deterministic.
         let pid: u32 = process::id();
         let filename = format!("{crate_name}-{pid:07}.rustc_profile");
         let path = output_directory.join(&filename);
@@ -763,6 +763,31 @@ impl Drop for VerboseTimingGuard<'_> {
     }
 }
 
+struct JsonTimePassesEntry<'a> {
+    pass: &'a str,
+    time: f64,
+    start_rss: Option<usize>,
+    end_rss: Option<usize>,
+}
+
+impl Display for JsonTimePassesEntry<'_> {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        let Self { pass: what, time, start_rss, end_rss } = self;
+        write!(f, r#"{{"pass":"{what}","time":{time},"rss_start":"#).unwrap();
+        match start_rss {
+            Some(rss) => write!(f, "{rss}")?,
+            None => write!(f, "null")?,
+        }
+        write!(f, r#","rss_end":"#)?;
+        match end_rss {
+            Some(rss) => write!(f, "{rss}")?,
+            None => write!(f, "null")?,
+        }
+        write!(f, "}}")?;
+        Ok(())
+    }
+}
+
 pub fn print_time_passes_entry(
     what: &str,
     dur: Duration,
@@ -772,13 +797,10 @@ pub fn print_time_passes_entry(
 ) {
     match format {
         TimePassesFormat::Json => {
-            let json = json!({
-                "pass": what,
-                "time": dur.as_secs_f64(),
-                "rss_start": start_rss,
-                "rss_end": end_rss,
-            });
-            eprintln!("time: {json}");
+            let entry =
+                JsonTimePassesEntry { pass: what, time: dur.as_secs_f64(), start_rss, end_rss };
+
+            eprintln!(r#"time: {entry}"#);
             return;
         }
         TimePassesFormat::Text => (),
@@ -843,14 +865,16 @@ cfg_if! {
             use std::mem;
 
             use windows::{
-                Win32::System::ProcessStatus::{K32GetProcessMemoryInfo, PROCESS_MEMORY_COUNTERS},
+                // FIXME: change back to K32GetProcessMemoryInfo when windows crate
+                // updated to 0.49.0+ to drop dependency on psapi.dll
+                Win32::System::ProcessStatus::{GetProcessMemoryInfo, PROCESS_MEMORY_COUNTERS},
                 Win32::System::Threading::GetCurrentProcess,
             };
 
             let mut pmc = PROCESS_MEMORY_COUNTERS::default();
             let pmc_size = mem::size_of_val(&pmc);
             unsafe {
-                K32GetProcessMemoryInfo(
+                GetProcessMemoryInfo(
                     GetCurrentProcess(),
                     &mut pmc,
                     pmc_size as u32,
@@ -894,3 +918,6 @@ cfg_if! {
         }
     }
 }
+
+#[cfg(test)]
+mod tests;
diff --git a/compiler/rustc_data_structures/src/profiling/tests.rs b/compiler/rustc_data_structures/src/profiling/tests.rs
new file mode 100644
index 000000000..2b09de085
--- /dev/null
+++ b/compiler/rustc_data_structures/src/profiling/tests.rs
@@ -0,0 +1,19 @@
+use super::JsonTimePassesEntry;
+
+#[test]
+fn with_rss() {
+    let entry =
+        JsonTimePassesEntry { pass: "typeck", time: 56.1, start_rss: Some(10), end_rss: Some(20) };
+
+    assert_eq!(entry.to_string(), r#"{"pass":"typeck","time":56.1,"rss_start":10,"rss_end":20}"#)
+}
+
+#[test]
+fn no_rss() {
+    let entry = JsonTimePassesEntry { pass: "typeck", time: 56.1, start_rss: None, end_rss: None };
+
+    assert_eq!(
+        entry.to_string(),
+        r#"{"pass":"typeck","time":56.1,"rss_start":null,"rss_end":null}"#
+    )
+}
diff --git a/compiler/rustc_data_structures/src/sharded.rs b/compiler/rustc_data_structures/src/sharded.rs
index bd7a86f67..7ed70ba1e 100644
--- a/compiler/rustc_data_structures/src/sharded.rs
+++ b/compiler/rustc_data_structures/src/sharded.rs
@@ -1,14 +1,10 @@
 use crate::fx::{FxHashMap, FxHasher};
-use crate::sync::{Lock, LockGuard};
+use crate::sync::{CacheAligned, Lock, LockGuard};
 use std::borrow::Borrow;
 use std::collections::hash_map::RawEntryMut;
 use std::hash::{Hash, Hasher};
 use std::mem;
 
-#[derive(Default)]
-#[cfg_attr(parallel_compiler, repr(align(64)))]
-struct CacheAligned<T>(T);
-
 #[cfg(parallel_compiler)]
 // 32 shards is sufficient to reduce contention on an 8-core Ryzen 7 1700,
 // but this should be tested on higher core count CPUs. How the `Sharded` type gets used
diff --git a/compiler/rustc_data_structures/src/sip128.rs b/compiler/rustc_data_structures/src/sip128.rs
index d849fe037..4a0ed87f7 100644
--- a/compiler/rustc_data_structures/src/sip128.rs
+++ b/compiler/rustc_data_structures/src/sip128.rs
@@ -96,28 +96,30 @@ macro_rules! compress {
 unsafe fn copy_nonoverlapping_small(src: *const u8, dst: *mut u8, count: usize) {
     debug_assert!(count <= 8);
 
-    if count == 8 {
-        ptr::copy_nonoverlapping(src, dst, 8);
-        return;
-    }
+    unsafe {
+        if count == 8 {
+            ptr::copy_nonoverlapping(src, dst, 8);
+            return;
+        }
 
-    let mut i = 0;
-    if i + 3 < count {
-        ptr::copy_nonoverlapping(src.add(i), dst.add(i), 4);
-        i += 4;
-    }
+        let mut i = 0;
+        if i + 3 < count {
+            ptr::copy_nonoverlapping(src.add(i), dst.add(i), 4);
+            i += 4;
+        }
 
-    if i + 1 < count {
-        ptr::copy_nonoverlapping(src.add(i), dst.add(i), 2);
-        i += 2
-    }
+        if i + 1 < count {
+            ptr::copy_nonoverlapping(src.add(i), dst.add(i), 2);
+            i += 2
+        }
 
-    if i < count {
-        *dst.add(i) = *src.add(i);
-        i += 1;
-    }
+        if i < count {
+            *dst.add(i) = *src.add(i);
+            i += 1;
+        }
 
-    debug_assert_eq!(i, count);
+        debug_assert_eq!(i, count);
+    }
 }
 
 // # Implementation
@@ -232,38 +234,40 @@ impl SipHasher128 {
     // overflow) if it wasn't already.
     #[inline(never)]
     unsafe fn short_write_process_buffer<const LEN: usize>(&mut self, bytes: [u8; LEN]) {
-        let nbuf = self.nbuf;
-        debug_assert!(LEN <= 8);
-        debug_assert!(nbuf < BUFFER_SIZE);
-        debug_assert!(nbuf + LEN >= BUFFER_SIZE);
-        debug_assert!(nbuf + LEN < BUFFER_WITH_SPILL_SIZE);
+        unsafe {
+            let nbuf = self.nbuf;
+            debug_assert!(LEN <= 8);
+            debug_assert!(nbuf < BUFFER_SIZE);
+            debug_assert!(nbuf + LEN >= BUFFER_SIZE);
+            debug_assert!(nbuf + LEN < BUFFER_WITH_SPILL_SIZE);
+
+            // Copy first part of input into end of buffer, possibly into spill
+            // element. The memcpy call is optimized away because the size is known.
+            let dst = (self.buf.as_mut_ptr() as *mut u8).add(nbuf);
+            ptr::copy_nonoverlapping(bytes.as_ptr(), dst, LEN);
+
+            // Process buffer.
+            for i in 0..BUFFER_CAPACITY {
+                let elem = self.buf.get_unchecked(i).assume_init().to_le();
+                self.state.v3 ^= elem;
+                Sip13Rounds::c_rounds(&mut self.state);
+                self.state.v0 ^= elem;
+            }
 
-        // Copy first part of input into end of buffer, possibly into spill
-        // element. The memcpy call is optimized away because the size is known.
-        let dst = (self.buf.as_mut_ptr() as *mut u8).add(nbuf);
-        ptr::copy_nonoverlapping(bytes.as_ptr(), dst, LEN);
-
-        // Process buffer.
-        for i in 0..BUFFER_CAPACITY {
-            let elem = self.buf.get_unchecked(i).assume_init().to_le();
-            self.state.v3 ^= elem;
-            Sip13Rounds::c_rounds(&mut self.state);
-            self.state.v0 ^= elem;
+            // Copy remaining input into start of buffer by copying LEN - 1
+            // elements from spill (at most LEN - 1 bytes could have overflowed
+            // into the spill). The memcpy call is optimized away because the size
+            // is known. And the whole copy is optimized away for LEN == 1.
+            let dst = self.buf.as_mut_ptr() as *mut u8;
+            let src = self.buf.get_unchecked(BUFFER_SPILL_INDEX) as *const _ as *const u8;
+            ptr::copy_nonoverlapping(src, dst, LEN - 1);
+
+            // This function should only be called when the write fills the buffer.
+            // Therefore, when LEN == 1, the new `self.nbuf` must be zero.
+            // LEN is statically known, so the branch is optimized away.
+            self.nbuf = if LEN == 1 { 0 } else { nbuf + LEN - BUFFER_SIZE };
+            self.processed += BUFFER_SIZE;
         }
-
-        // Copy remaining input into start of buffer by copying LEN - 1
-        // elements from spill (at most LEN - 1 bytes could have overflowed
-        // into the spill). The memcpy call is optimized away because the size
-        // is known. And the whole copy is optimized away for LEN == 1.
-        let dst = self.buf.as_mut_ptr() as *mut u8;
-        let src = self.buf.get_unchecked(BUFFER_SPILL_INDEX) as *const _ as *const u8;
-        ptr::copy_nonoverlapping(src, dst, LEN - 1);
-
-        // This function should only be called when the write fills the buffer.
-        // Therefore, when LEN == 1, the new `self.nbuf` must be zero.
-        // LEN is statically known, so the branch is optimized away.
-        self.nbuf = if LEN == 1 { 0 } else { nbuf + LEN - BUFFER_SIZE };
-        self.processed += BUFFER_SIZE;
     }
 
     // A write function for byte slices.
@@ -301,57 +305,59 @@ impl SipHasher128 {
     // containing the byte offset `self.nbuf`.
     #[inline(never)]
     unsafe fn slice_write_process_buffer(&mut self, msg: &[u8]) {
-        let length = msg.len();
-        let nbuf = self.nbuf;
-        debug_assert!(nbuf < BUFFER_SIZE);
-        debug_assert!(nbuf + length >= BUFFER_SIZE);
-
-        // Always copy first part of input into current element of buffer.
-        // This function should only be called when the write fills the buffer,
-        // so we know that there is enough input to fill the current element.
-        let valid_in_elem = nbuf % ELEM_SIZE;
-        let needed_in_elem = ELEM_SIZE - valid_in_elem;
-
-        let src = msg.as_ptr();
-        let dst = (self.buf.as_mut_ptr() as *mut u8).add(nbuf);
-        copy_nonoverlapping_small(src, dst, needed_in_elem);
-
-        // Process buffer.
+        unsafe {
+            let length = msg.len();
+            let nbuf = self.nbuf;
+            debug_assert!(nbuf < BUFFER_SIZE);
+            debug_assert!(nbuf + length >= BUFFER_SIZE);
+
+            // Always copy first part of input into current element of buffer.
+            // This function should only be called when the write fills the buffer,
+            // so we know that there is enough input to fill the current element.
+            let valid_in_elem = nbuf % ELEM_SIZE;
+            let needed_in_elem = ELEM_SIZE - valid_in_elem;
+
+            let src = msg.as_ptr();
+            let dst = (self.buf.as_mut_ptr() as *mut u8).add(nbuf);
+            copy_nonoverlapping_small(src, dst, needed_in_elem);
+
+            // Process buffer.
+
+            // Using `nbuf / ELEM_SIZE + 1` rather than `(nbuf + needed_in_elem) /
+            // ELEM_SIZE` to show the compiler that this loop's upper bound is > 0.
+            // We know that is true, because last step ensured we have a full
+            // element in the buffer.
+            let last = nbuf / ELEM_SIZE + 1;
+
+            for i in 0..last {
+                let elem = self.buf.get_unchecked(i).assume_init().to_le();
+                self.state.v3 ^= elem;
+                Sip13Rounds::c_rounds(&mut self.state);
+                self.state.v0 ^= elem;
+            }
 
-        // Using `nbuf / ELEM_SIZE + 1` rather than `(nbuf + needed_in_elem) /
-        // ELEM_SIZE` to show the compiler that this loop's upper bound is > 0.
-        // We know that is true, because last step ensured we have a full
-        // element in the buffer.
-        let last = nbuf / ELEM_SIZE + 1;
+            // Process the remaining element-sized chunks of input.
+            let mut processed = needed_in_elem;
+            let input_left = length - processed;
+            let elems_left = input_left / ELEM_SIZE;
+            let extra_bytes_left = input_left % ELEM_SIZE;
+
+            for _ in 0..elems_left {
+                let elem = (msg.as_ptr().add(processed) as *const u64).read_unaligned().to_le();
+                self.state.v3 ^= elem;
+                Sip13Rounds::c_rounds(&mut self.state);
+                self.state.v0 ^= elem;
+                processed += ELEM_SIZE;
+            }
 
-        for i in 0..last {
-            let elem = self.buf.get_unchecked(i).assume_init().to_le();
-            self.state.v3 ^= elem;
-            Sip13Rounds::c_rounds(&mut self.state);
-            self.state.v0 ^= elem;
-        }
+            // Copy remaining input into start of buffer.
+            let src = msg.as_ptr().add(processed);
+            let dst = self.buf.as_mut_ptr() as *mut u8;
+            copy_nonoverlapping_small(src, dst, extra_bytes_left);
 
-        // Process the remaining element-sized chunks of input.
-        let mut processed = needed_in_elem;
-        let input_left = length - processed;
-        let elems_left = input_left / ELEM_SIZE;
-        let extra_bytes_left = input_left % ELEM_SIZE;
-
-        for _ in 0..elems_left {
-            let elem = (msg.as_ptr().add(processed) as *const u64).read_unaligned().to_le();
-            self.state.v3 ^= elem;
-            Sip13Rounds::c_rounds(&mut self.state);
-            self.state.v0 ^= elem;
-            processed += ELEM_SIZE;
+            self.nbuf = extra_bytes_left;
+            self.processed += nbuf + processed;
         }
-
-        // Copy remaining input into start of buffer.
-        let src = msg.as_ptr().add(processed);
-        let dst = self.buf.as_mut_ptr() as *mut u8;
-        copy_nonoverlapping_small(src, dst, extra_bytes_left);
-
-        self.nbuf = extra_bytes_left;
-        self.processed += nbuf + processed;
     }
 
     #[inline]
diff --git a/compiler/rustc_data_structures/src/sorted_map/index_map.rs b/compiler/rustc_data_structures/src/sorted_map/index_map.rs
index 7d23ff519..c172ee1c9 100644
--- a/compiler/rustc_data_structures/src/sorted_map/index_map.rs
+++ b/compiler/rustc_data_structures/src/sorted_map/index_map.rs
@@ -3,7 +3,7 @@
 use std::hash::{Hash, Hasher};
 
 use crate::stable_hasher::{HashStable, StableHasher};
-use rustc_index::vec::{Idx, IndexVec};
+use rustc_index::{Idx, IndexVec};
 
 /// An indexed multi-map that preserves insertion order while permitting both *O*(log *n*) lookup of
 /// an item by key and *O*(1) lookup by index.
diff --git a/compiler/rustc_data_structures/src/sso/map.rs b/compiler/rustc_data_structures/src/sso/map.rs
index 89b8c8526..99581ed23 100644
--- a/compiler/rustc_data_structures/src/sso/map.rs
+++ b/compiler/rustc_data_structures/src/sso/map.rs
@@ -256,12 +256,9 @@ impl<K: Eq + Hash, V> SsoHashMap<K, V> {
     pub fn remove(&mut self, key: &K) -> Option<V> {
         match self {
             SsoHashMap::Array(array) => {
-                if let Some(index) = array.iter().position(|(k, _v)| k == key) {
-                    Some(array.swap_remove(index).1)
-                } else {
-                    None
-                }
+                array.iter().position(|(k, _v)| k == key).map(|index| array.swap_remove(index).1)
             }
+
             SsoHashMap::Map(map) => map.remove(key),
         }
     }
diff --git a/compiler/rustc_data_structures/src/stable_hasher.rs b/compiler/rustc_data_structures/src/stable_hasher.rs
index 3ed1de1bc..6d57d81c5 100644
--- a/compiler/rustc_data_structures/src/stable_hasher.rs
+++ b/compiler/rustc_data_structures/src/stable_hasher.rs
@@ -1,7 +1,8 @@
 use crate::sip128::SipHasher128;
-use rustc_index::bit_set;
-use rustc_index::vec;
+use rustc_index::bit_set::{self, BitSet};
+use rustc_index::{Idx, IndexVec};
 use smallvec::SmallVec;
+use std::fmt;
 use std::hash::{BuildHasher, Hash, Hasher};
 use std::marker::PhantomData;
 use std::mem;
@@ -9,6 +10,8 @@ use std::mem;
 #[cfg(test)]
 mod tests;
 
+pub use crate::hashes::{Hash128, Hash64};
+
 /// When hashing something that ends up affecting properties like symbol names,
 /// we want these symbol names to be calculated independently of other factors
 /// like what architecture you're compiling *from*.
@@ -20,8 +23,8 @@ pub struct StableHasher {
     state: SipHasher128,
 }
 
-impl ::std::fmt::Debug for StableHasher {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+impl fmt::Debug for StableHasher {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         write!(f, "{:?}", self.state)
     }
 }
@@ -42,21 +45,6 @@ impl StableHasher {
     }
 }
 
-impl StableHasherResult for u128 {
-    #[inline]
-    fn finish(hasher: StableHasher) -> Self {
-        let (_0, _1) = hasher.finalize();
-        u128::from(_0) | (u128::from(_1) << 64)
-    }
-}
-
-impl StableHasherResult for u64 {
-    #[inline]
-    fn finish(hasher: StableHasher) -> Self {
-        hasher.finalize().0
-    }
-}
-
 impl StableHasher {
     #[inline]
     pub fn finalize(self) -> (u64, u64) {
@@ -107,7 +95,8 @@ impl Hasher for StableHasher {
 
     #[inline]
     fn write_u128(&mut self, i: u128) {
-        self.state.write(&i.to_le_bytes());
+        self.write_u64(i as u64);
+        self.write_u64((i >> 64) as u64);
     }
 
     #[inline]
@@ -286,6 +275,9 @@ impl_stable_traits_for_trivial_type!(i128);
 impl_stable_traits_for_trivial_type!(char);
 impl_stable_traits_for_trivial_type!(());
 
+impl_stable_traits_for_trivial_type!(Hash64);
+impl_stable_traits_for_trivial_type!(Hash128);
+
 impl<CTX> HashStable<CTX> for ! {
     fn hash_stable(&self, _ctx: &mut CTX, _hasher: &mut StableHasher) {
         unreachable!()
@@ -565,7 +557,7 @@ where
     }
 }
 
-impl<I: vec::Idx, T, CTX> HashStable<CTX> for vec::IndexVec<I, T>
+impl<I: Idx, T, CTX> HashStable<CTX> for IndexVec<I, T>
 where
     T: HashStable<CTX>,
 {
@@ -577,13 +569,13 @@ where
     }
 }
 
-impl<I: vec::Idx, CTX> HashStable<CTX> for bit_set::BitSet<I> {
+impl<I: Idx, CTX> HashStable<CTX> for BitSet<I> {
     fn hash_stable(&self, _ctx: &mut CTX, hasher: &mut StableHasher) {
         ::std::hash::Hash::hash(self, hasher);
     }
 }
 
-impl<R: vec::Idx, C: vec::Idx, CTX> HashStable<CTX> for bit_set::BitMatrix<R, C> {
+impl<R: Idx, C: Idx, CTX> HashStable<CTX> for bit_set::BitMatrix<R, C> {
     fn hash_stable(&self, _ctx: &mut CTX, hasher: &mut StableHasher) {
         ::std::hash::Hash::hash(self, hasher);
     }
@@ -668,7 +660,7 @@ fn stable_hash_reduce<HCX, I, C, F>(
                 .map(|value| {
                     let mut hasher = StableHasher::new();
                     hash_function(&mut hasher, hcx, value);
-                    hasher.finish::<u128>()
+                    hasher.finish::<Hash128>()
                 })
                 .reduce(|accum, value| accum.wrapping_add(value));
             hash.hash_stable(hcx, hasher);
diff --git a/compiler/rustc_data_structures/src/stable_hasher/tests.rs b/compiler/rustc_data_structures/src/stable_hasher/tests.rs
index a98b1bc36..c8921f6a7 100644
--- a/compiler/rustc_data_structures/src/stable_hasher/tests.rs
+++ b/compiler/rustc_data_structures/src/stable_hasher/tests.rs
@@ -72,7 +72,7 @@ fn test_hash_isize() {
     assert_eq!(h.finalize(), expected);
 }
 
-fn hash<T: HashStable<()>>(t: &T) -> u128 {
+fn hash<T: HashStable<()>>(t: &T) -> Hash128 {
     let mut h = StableHasher::new();
     let ctx = &mut ();
     t.hash_stable(ctx, &mut h);
diff --git a/compiler/rustc_data_structures/src/svh.rs b/compiler/rustc_data_structures/src/svh.rs
index 61654b9e8..71679086f 100644
--- a/compiler/rustc_data_structures/src/svh.rs
+++ b/compiler/rustc_data_structures/src/svh.rs
@@ -5,58 +5,36 @@
 //! mismatches where we have two versions of the same crate that were
 //! compiled from distinct sources.
 
-use rustc_serialize::{Decodable, Decoder, Encodable, Encoder};
+use crate::fingerprint::Fingerprint;
 use std::fmt;
-use std::hash::{Hash, Hasher};
 
 use crate::stable_hasher;
 
-#[derive(Copy, Clone, PartialEq, Eq, Debug)]
+#[derive(Copy, Clone, PartialEq, Eq, Debug, Encodable, Decodable, Hash)]
 pub struct Svh {
-    hash: u64,
+    hash: Fingerprint,
 }
 
 impl Svh {
     /// Creates a new `Svh` given the hash. If you actually want to
     /// compute the SVH from some HIR, you want the `calculate_svh`
     /// function found in `rustc_incremental`.
-    pub fn new(hash: u64) -> Svh {
+    pub fn new(hash: Fingerprint) -> Svh {
         Svh { hash }
     }
 
-    pub fn as_u64(&self) -> u64 {
-        self.hash
+    pub fn as_u128(self) -> u128 {
+        self.hash.as_u128()
     }
 
-    pub fn to_string(&self) -> String {
-        format!("{:016x}", self.hash)
-    }
-}
-
-impl Hash for Svh {
-    fn hash<H>(&self, state: &mut H)
-    where
-        H: Hasher,
-    {
-        self.hash.to_le().hash(state);
+    pub fn to_hex(self) -> String {
+        format!("{:032x}", self.hash.as_u128())
     }
 }
 
 impl fmt::Display for Svh {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        f.pad(&self.to_string())
-    }
-}
-
-impl<S: Encoder> Encodable<S> for Svh {
-    fn encode(&self, s: &mut S) {
-        s.emit_u64(self.as_u64().to_le());
-    }
-}
-
-impl<D: Decoder> Decodable<D> for Svh {
-    fn decode(d: &mut D) -> Svh {
-        Svh::new(u64::from_le(d.read_u64()))
+        f.pad(&self.to_hex())
     }
 }
 
diff --git a/compiler/rustc_data_structures/src/sync.rs b/compiler/rustc_data_structures/src/sync.rs
index ef1da8519..6c3197d8e 100644
--- a/compiler/rustc_data_structures/src/sync.rs
+++ b/compiler/rustc_data_structures/src/sync.rs
@@ -39,12 +39,15 @@
 //!
 //! [^2] `MTLockRef` is a typedef.
 
-use crate::owned_slice::OwnedSlice;
+pub use crate::marker::*;
 use std::collections::HashMap;
 use std::hash::{BuildHasher, Hash};
 use std::ops::{Deref, DerefMut};
 use std::panic::{catch_unwind, resume_unwind, AssertUnwindSafe};
 
+mod worker_local;
+pub use worker_local::{Registry, WorkerLocal};
+
 pub use std::sync::atomic::Ordering;
 pub use std::sync::atomic::Ordering::SeqCst;
 
@@ -52,6 +55,43 @@ pub use vec::{AppendOnlyIndexVec, AppendOnlyVec};
 
 mod vec;
 
+mod mode {
+    use super::Ordering;
+    use std::sync::atomic::AtomicU8;
+
+    const UNINITIALIZED: u8 = 0;
+    const DYN_NOT_THREAD_SAFE: u8 = 1;
+    const DYN_THREAD_SAFE: u8 = 2;
+
+    static DYN_THREAD_SAFE_MODE: AtomicU8 = AtomicU8::new(UNINITIALIZED);
+
+    // Whether thread safety is enabled (due to running under multiple threads).
+    #[inline]
+    pub fn is_dyn_thread_safe() -> bool {
+        match DYN_THREAD_SAFE_MODE.load(Ordering::Relaxed) {
+            DYN_NOT_THREAD_SAFE => false,
+            DYN_THREAD_SAFE => true,
+            _ => panic!("uninitialized dyn_thread_safe mode!"),
+        }
+    }
+
+    // Only set by the `-Z threads` compile option
+    pub fn set_dyn_thread_safe_mode(mode: bool) {
+        let set: u8 = if mode { DYN_THREAD_SAFE } else { DYN_NOT_THREAD_SAFE };
+        let previous = DYN_THREAD_SAFE_MODE.compare_exchange(
+            UNINITIALIZED,
+            set,
+            Ordering::Relaxed,
+            Ordering::Relaxed,
+        );
+
+        // Check that the mode was either uninitialized or was already set to the requested mode.
+        assert!(previous.is_ok() || previous == Err(set));
+    }
+}
+
+pub use mode::{is_dyn_thread_safe, set_dyn_thread_safe_mode};
+
 cfg_if! {
     if #[cfg(not(parallel_compiler))] {
         pub unsafe auto trait Send {}
@@ -146,7 +186,7 @@ cfg_if! {
 
         #[macro_export]
         macro_rules! parallel {
-            ($($blocks:tt),*) => {
+            ($($blocks:block),*) => {
                 // We catch panics here ensuring that all the blocks execute.
                 // This makes behavior consistent with the parallel compiler.
                 let mut panic = None;
@@ -165,12 +205,6 @@ cfg_if! {
             }
         }
 
-        pub use Iterator as ParallelIterator;
-
-        pub fn par_iter<T: IntoIterator>(t: T) -> T::IntoIter {
-            t.into_iter()
-        }
-
         pub fn par_for_each_in<T: IntoIterator>(t: T, mut for_each: impl FnMut(T::Item) + Sync + Send) {
             // We catch panics here ensuring that all the loop iterations execute.
             // This makes behavior consistent with the parallel compiler.
@@ -187,7 +221,28 @@ cfg_if! {
             }
         }
 
-        pub type MetadataRef = OwnedSlice;
+        pub fn par_map<T: IntoIterator, R, C: FromIterator<R>>(
+            t: T,
+            mut map: impl FnMut(<<T as IntoIterator>::IntoIter as Iterator>::Item) -> R,
+        ) -> C {
+            // We catch panics here ensuring that all the loop iterations execute.
+            let mut panic = None;
+            let r = t.into_iter().filter_map(|i| {
+                match catch_unwind(AssertUnwindSafe(|| map(i))) {
+                    Ok(r) => Some(r),
+                    Err(p) => {
+                        if panic.is_none() {
+                            panic = Some(p);
+                        }
+                        None
+                    }
+                }
+            }).collect();
+            if let Some(panic) = panic {
+                resume_unwind(panic);
+            }
+            r
+        }
 
         pub use std::rc::Rc as Lrc;
         pub use std::rc::Weak as Weak;
@@ -205,33 +260,6 @@ cfg_if! {
 
         use std::cell::Cell;
 
-        #[derive(Debug)]
-        pub struct WorkerLocal<T>(OneThread<T>);
-
-        impl<T> WorkerLocal<T> {
-            /// Creates a new worker local where the `initial` closure computes the
-            /// value this worker local should take for each thread in the thread pool.
-            #[inline]
-            pub fn new<F: FnMut(usize) -> T>(mut f: F) -> WorkerLocal<T> {
-                WorkerLocal(OneThread::new(f(0)))
-            }
-
-            /// Returns the worker-local value for each thread
-            #[inline]
-            pub fn into_inner(self) -> Vec<T> {
-                vec![OneThread::into_inner(self.0)]
-            }
-        }
-
-        impl<T> Deref for WorkerLocal<T> {
-            type Target = T;
-
-            #[inline(always)]
-            fn deref(&self) -> &T {
-                &self.0
-            }
-        }
-
         pub type MTLockRef<'a, T> = &'a mut MTLock<T>;
 
         #[derive(Debug, Default)]
@@ -326,51 +354,166 @@ cfg_if! {
         use parking_lot::RwLock as InnerRwLock;
 
         use std::thread;
-        pub use rayon::{join, scope};
+
+        #[inline]
+        pub fn join<A, B, RA: DynSend, RB: DynSend>(oper_a: A, oper_b: B) -> (RA, RB)
+        where
+            A: FnOnce() -> RA + DynSend,
+            B: FnOnce() -> RB + DynSend,
+        {
+            if mode::is_dyn_thread_safe() {
+                let oper_a = FromDyn::from(oper_a);
+                let oper_b = FromDyn::from(oper_b);
+                let (a, b) = rayon::join(move || FromDyn::from(oper_a.into_inner()()), move || FromDyn::from(oper_b.into_inner()()));
+                (a.into_inner(), b.into_inner())
+            } else {
+                (oper_a(), oper_b())
+            }
+        }
+
+        // This function only works when `mode::is_dyn_thread_safe()`.
+        pub fn scope<'scope, OP, R>(op: OP) -> R
+        where
+            OP: FnOnce(&rayon::Scope<'scope>) -> R + DynSend,
+            R: DynSend,
+        {
+            let op = FromDyn::from(op);
+            rayon::scope(|s| FromDyn::from(op.into_inner()(s))).into_inner()
+        }
 
         /// Runs a list of blocks in parallel. The first block is executed immediately on
         /// the current thread. Use that for the longest running block.
         #[macro_export]
         macro_rules! parallel {
-            (impl $fblock:tt [$($c:tt,)*] [$block:tt $(, $rest:tt)*]) => {
+            (impl $fblock:block [$($c:expr,)*] [$block:expr $(, $rest:expr)*]) => {
                 parallel!(impl $fblock [$block, $($c,)*] [$($rest),*])
             };
-            (impl $fblock:tt [$($blocks:tt,)*] []) => {
+            (impl $fblock:block [$($blocks:expr,)*] []) => {
                 ::rustc_data_structures::sync::scope(|s| {
+                    $(let block = rustc_data_structures::sync::FromDyn::from(|| $blocks);
+                    s.spawn(move |_| block.into_inner()());)*
+                    (|| $fblock)();
+                });
+            };
+            ($fblock:block, $($blocks:block),*) => {
+                if rustc_data_structures::sync::is_dyn_thread_safe() {
+                    // Reverse the order of the later blocks since Rayon executes them in reverse order
+                    // when using a single thread. This ensures the execution order matches that
+                    // of a single threaded rustc.
+                    parallel!(impl $fblock [] [$($blocks),*]);
+                } else {
+                    // We catch panics here ensuring that all the blocks execute.
+                    // This makes behavior consistent with the parallel compiler.
+                    let mut panic = None;
+                    if let Err(p) = ::std::panic::catch_unwind(
+                        ::std::panic::AssertUnwindSafe(|| $fblock)
+                    ) {
+                        if panic.is_none() {
+                            panic = Some(p);
+                        }
+                    }
                     $(
-                        s.spawn(|_| $blocks);
+                        if let Err(p) = ::std::panic::catch_unwind(
+                            ::std::panic::AssertUnwindSafe(|| $blocks)
+                        ) {
+                            if panic.is_none() {
+                                panic = Some(p);
+                            }
+                        }
                     )*
-                    $fblock;
-                })
-            };
-            ($fblock:tt, $($blocks:tt),*) => {
-                // Reverse the order of the later blocks since Rayon executes them in reverse order
-                // when using a single thread. This ensures the execution order matches that
-                // of a single threaded rustc
-                parallel!(impl $fblock [] [$($blocks),*]);
+                    if let Some(panic) = panic {
+                        ::std::panic::resume_unwind(panic);
+                    }
+                }
             };
         }
 
-        pub use rayon_core::WorkerLocal;
+        use rayon::iter::{FromParallelIterator, IntoParallelIterator, ParallelIterator};
 
-        pub use rayon::iter::ParallelIterator;
-        use rayon::iter::IntoParallelIterator;
+        pub fn par_for_each_in<I, T: IntoIterator<Item = I> + IntoParallelIterator<Item = I>>(
+            t: T,
+            for_each: impl Fn(I) + DynSync + DynSend
+        ) {
+            if mode::is_dyn_thread_safe() {
+                let for_each = FromDyn::from(for_each);
+                let panic: Lock<Option<_>> = Lock::new(None);
+                t.into_par_iter().for_each(|i| if let Err(p) = catch_unwind(AssertUnwindSafe(|| for_each(i))) {
+                    let mut l = panic.lock();
+                    if l.is_none() {
+                        *l = Some(p)
+                    }
+                });
 
-        pub fn par_iter<T: IntoParallelIterator>(t: T) -> T::Iter {
-            t.into_par_iter()
+                if let Some(panic) = panic.into_inner() {
+                    resume_unwind(panic);
+                }
+            } else {
+                // We catch panics here ensuring that all the loop iterations execute.
+                // This makes behavior consistent with the parallel compiler.
+                let mut panic = None;
+                t.into_iter().for_each(|i| {
+                    if let Err(p) = catch_unwind(AssertUnwindSafe(|| for_each(i))) {
+                        if panic.is_none() {
+                            panic = Some(p);
+                        }
+                    }
+                });
+                if let Some(panic) = panic {
+                    resume_unwind(panic);
+                }
+            }
         }
 
-        pub fn par_for_each_in<T: IntoParallelIterator>(
+        pub fn par_map<
+            I,
+            T: IntoIterator<Item = I> + IntoParallelIterator<Item = I>,
+            R: std::marker::Send,
+            C: FromIterator<R> + FromParallelIterator<R>
+        >(
             t: T,
-            for_each: impl Fn(T::Item) + Sync + Send,
-        ) {
-            let ps: Vec<_> = t.into_par_iter().map(|i| catch_unwind(AssertUnwindSafe(|| for_each(i)))).collect();
-            ps.into_iter().for_each(|p| if let Err(panic) = p {
-                resume_unwind(panic)
-            });
-        }
+            map: impl Fn(I) -> R + DynSync + DynSend
+        ) -> C {
+            if mode::is_dyn_thread_safe() {
+                let panic: Lock<Option<_>> = Lock::new(None);
+                let map = FromDyn::from(map);
+                // We catch panics here ensuring that all the loop iterations execute.
+                let r = t.into_par_iter().filter_map(|i| {
+                    match catch_unwind(AssertUnwindSafe(|| map(i))) {
+                        Ok(r) => Some(r),
+                        Err(p) => {
+                            let mut l = panic.lock();
+                            if l.is_none() {
+                                *l = Some(p);
+                            }
+                            None
+                        },
+                    }
+                }).collect();
 
-        pub type MetadataRef = OwnedSlice;
+                if let Some(panic) = panic.into_inner() {
+                    resume_unwind(panic);
+                }
+                r
+            } else {
+                // We catch panics here ensuring that all the loop iterations execute.
+                let mut panic = None;
+                let r = t.into_iter().filter_map(|i| {
+                    match catch_unwind(AssertUnwindSafe(|| map(i))) {
+                        Ok(r) => Some(r),
+                        Err(p) => {
+                            if panic.is_none() {
+                                panic = Some(p);
+                            }
+                            None
+                        }
+                    }
+                }).collect();
+                if let Some(panic) = panic {
+                    resume_unwind(panic);
+                }
+                r
+            }
+        }
 
         /// This makes locks panic if they are already held.
         /// It is only useful when you are running in a single thread
@@ -378,10 +521,9 @@ cfg_if! {
     }
 }
 
-pub fn assert_sync<T: ?Sized + Sync>() {}
-pub fn assert_send<T: ?Sized + Send>() {}
-pub fn assert_send_val<T: ?Sized + Send>(_t: &T) {}
-pub fn assert_send_sync_val<T: ?Sized + Sync + Send>(_t: &T) {}
+#[derive(Default)]
+#[cfg_attr(parallel_compiler, repr(align(64)))]
+pub struct CacheAligned<T>(pub T);
 
 pub trait HashMapExt<K, V> {
     /// Same as HashMap::insert, but it may panic if there's already an
diff --git a/compiler/rustc_data_structures/src/sync/vec.rs b/compiler/rustc_data_structures/src/sync/vec.rs
index 1783b4b35..e36dded9e 100644
--- a/compiler/rustc_data_structures/src/sync/vec.rs
+++ b/compiler/rustc_data_structures/src/sync/vec.rs
@@ -1,6 +1,6 @@
 use std::marker::PhantomData;
 
-use rustc_index::vec::Idx;
+use rustc_index::Idx;
 
 #[derive(Default)]
 pub struct AppendOnlyIndexVec<I: Idx, T: Copy> {
diff --git a/compiler/rustc_data_structures/src/sync/worker_local.rs b/compiler/rustc_data_structures/src/sync/worker_local.rs
new file mode 100644
index 000000000..d61bb55be
--- /dev/null
+++ b/compiler/rustc_data_structures/src/sync/worker_local.rs
@@ -0,0 +1,173 @@
+use crate::sync::Lock;
+use std::cell::Cell;
+use std::cell::OnceCell;
+use std::ops::Deref;
+use std::ptr;
+use std::sync::Arc;
+
+#[cfg(parallel_compiler)]
+use {crate::cold_path, crate::sync::CacheAligned};
+
+/// A pointer to the `RegistryData` which uniquely identifies a registry.
+/// This identifier can be reused if the registry gets freed.
+#[derive(Clone, Copy, PartialEq)]
+struct RegistryId(*const RegistryData);
+
+impl RegistryId {
+    #[inline(always)]
+    /// Verifies that the current thread is associated with the registry and returns its unique
+    /// index within the registry. This panics if the current thread is not associated with this
+    /// registry.
+    ///
+    /// Note that there's a race possible where the identifer in `THREAD_DATA` could be reused
+    /// so this can succeed from a different registry.
+    #[cfg(parallel_compiler)]
+    fn verify(self) -> usize {
+        let (id, index) = THREAD_DATA.with(|data| (data.registry_id.get(), data.index.get()));
+
+        if id == self {
+            index
+        } else {
+            cold_path(|| panic!("Unable to verify registry association"))
+        }
+    }
+}
+
+struct RegistryData {
+    thread_limit: usize,
+    threads: Lock<usize>,
+}
+
+/// Represents a list of threads which can access worker locals.
+#[derive(Clone)]
+pub struct Registry(Arc<RegistryData>);
+
+thread_local! {
+    /// The registry associated with the thread.
+    /// This allows the `WorkerLocal` type to clone the registry in its constructor.
+    static REGISTRY: OnceCell<Registry> = OnceCell::new();
+}
+
+struct ThreadData {
+    registry_id: Cell<RegistryId>,
+    index: Cell<usize>,
+}
+
+thread_local! {
+    /// A thread local which contains the identifer of `REGISTRY` but allows for faster access.
+    /// It also holds the index of the current thread.
+    static THREAD_DATA: ThreadData = const { ThreadData {
+        registry_id: Cell::new(RegistryId(ptr::null())),
+        index: Cell::new(0),
+    }};
+}
+
+impl Registry {
+    /// Creates a registry which can hold up to `thread_limit` threads.
+    pub fn new(thread_limit: usize) -> Self {
+        Registry(Arc::new(RegistryData { thread_limit, threads: Lock::new(0) }))
+    }
+
+    /// Gets the registry associated with the current thread. Panics if there's no such registry.
+    pub fn current() -> Self {
+        REGISTRY.with(|registry| registry.get().cloned().expect("No assocated registry"))
+    }
+
+    /// Registers the current thread with the registry so worker locals can be used on it.
+    /// Panics if the thread limit is hit or if the thread already has an associated registry.
+    pub fn register(&self) {
+        let mut threads = self.0.threads.lock();
+        if *threads < self.0.thread_limit {
+            REGISTRY.with(|registry| {
+                if registry.get().is_some() {
+                    drop(threads);
+                    panic!("Thread already has a registry");
+                }
+                registry.set(self.clone()).ok();
+                THREAD_DATA.with(|data| {
+                    data.registry_id.set(self.id());
+                    data.index.set(*threads);
+                });
+                *threads += 1;
+            });
+        } else {
+            drop(threads);
+            panic!("Thread limit reached");
+        }
+    }
+
+    /// Gets the identifer of this registry.
+    fn id(&self) -> RegistryId {
+        RegistryId(&*self.0)
+    }
+}
+
+/// Holds worker local values for each possible thread in a registry. You can only access the
+/// worker local value through the `Deref` impl on the registry associated with the thread it was
+/// created on. It will panic otherwise.
+pub struct WorkerLocal<T> {
+    #[cfg(not(parallel_compiler))]
+    local: T,
+    #[cfg(parallel_compiler)]
+    locals: Box<[CacheAligned<T>]>,
+    #[cfg(parallel_compiler)]
+    registry: Registry,
+}
+
+// This is safe because the `deref` call will return a reference to a `T` unique to each thread
+// or it will panic for threads without an associated local. So there isn't a need for `T` to do
+// it's own synchronization. The `verify` method on `RegistryId` has an issue where the the id
+// can be reused, but `WorkerLocal` has a reference to `Registry` which will prevent any reuse.
+#[cfg(parallel_compiler)]
+unsafe impl<T: Send> Sync for WorkerLocal<T> {}
+
+impl<T> WorkerLocal<T> {
+    /// Creates a new worker local where the `initial` closure computes the
+    /// value this worker local should take for each thread in the registry.
+    #[inline]
+    pub fn new<F: FnMut(usize) -> T>(mut initial: F) -> WorkerLocal<T> {
+        #[cfg(parallel_compiler)]
+        {
+            let registry = Registry::current();
+            WorkerLocal {
+                locals: (0..registry.0.thread_limit).map(|i| CacheAligned(initial(i))).collect(),
+                registry,
+            }
+        }
+        #[cfg(not(parallel_compiler))]
+        {
+            WorkerLocal { local: initial(0) }
+        }
+    }
+
+    /// Returns the worker-local values for each thread
+    #[inline]
+    pub fn into_inner(self) -> impl Iterator<Item = T> {
+        #[cfg(parallel_compiler)]
+        {
+            self.locals.into_vec().into_iter().map(|local| local.0)
+        }
+        #[cfg(not(parallel_compiler))]
+        {
+            std::iter::once(self.local)
+        }
+    }
+}
+
+impl<T> Deref for WorkerLocal<T> {
+    type Target = T;
+
+    #[inline(always)]
+    #[cfg(not(parallel_compiler))]
+    fn deref(&self) -> &T {
+        &self.local
+    }
+
+    #[inline(always)]
+    #[cfg(parallel_compiler)]
+    fn deref(&self) -> &T {
+        // This is safe because `verify` will only return values less than
+        // `self.registry.thread_limit` which is the size of the `self.locals` array.
+        unsafe { &self.locals.get_unchecked(self.registry.id().verify()).0 }
+    }
+}
diff --git a/compiler/rustc_data_structures/src/tagged_ptr.rs b/compiler/rustc_data_structures/src/tagged_ptr.rs
index 651bc556c..2914eece6 100644
--- a/compiler/rustc_data_structures/src/tagged_ptr.rs
+++ b/compiler/rustc_data_structures/src/tagged_ptr.rs
@@ -3,166 +3,281 @@
 //! In order to utilize the pointer packing, you must have two types: a pointer,
 //! and a tag.
 //!
-//! The pointer must implement the `Pointer` trait, with the primary requirement
-//! being conversion to and from a usize. Note that the pointer must be
-//! dereferenceable, so raw pointers generally cannot implement the `Pointer`
-//! trait. This implies that the pointer must also be nonzero.
+//! The pointer must implement the [`Pointer`] trait, with the primary
+//! requirement being convertible to and from a raw pointer. Note that the
+//! pointer must be dereferenceable, so raw pointers generally cannot implement
+//! the [`Pointer`] trait. This implies that the pointer must also be non-null.
 //!
-//! Many common pointer types already implement the `Pointer` trait.
+//! Many common pointer types already implement the [`Pointer`] trait.
 //!
-//! The tag must implement the `Tag` trait. We assert that the tag and `Pointer`
-//! are compatible at compile time.
+//! The tag must implement the [`Tag`] trait.
+//!
+//! We assert that the tag and the [`Pointer`] types are compatible at compile
+//! time.
 
-use std::mem::ManuallyDrop;
 use std::ops::Deref;
+use std::ptr::NonNull;
 use std::rc::Rc;
 use std::sync::Arc;
 
+use crate::aligned::Aligned;
+
 mod copy;
 mod drop;
+mod impl_tag;
 
 pub use copy::CopyTaggedPtr;
 pub use drop::TaggedPtr;
 
-/// This describes the pointer type encapsulated by TaggedPtr.
+/// This describes the pointer type encapsulated by [`TaggedPtr`] and
+/// [`CopyTaggedPtr`].
 ///
 /// # Safety
 ///
-/// The usize returned from `into_usize` must be a valid, dereferenceable,
-/// pointer to `<Self as Deref>::Target`. Note that pointers to `Pointee` must
-/// be thin, even though `Pointee` may not be sized.
+/// The pointer returned from [`into_ptr`] must be a [valid], pointer to
+/// [`<Self as Deref>::Target`].
 ///
-/// Note that the returned pointer from `into_usize` should be castable to `&mut
-/// <Self as Deref>::Target` if `Pointer: DerefMut`.
+/// Note that if `Self` implements [`DerefMut`] the pointer returned from
+/// [`into_ptr`] must be valid for writes (and thus calling [`NonNull::as_mut`]
+/// on it must be safe).
 ///
-/// The BITS constant must be correct. At least `BITS` bits, least-significant,
-/// must be zero on all returned pointers from `into_usize`.
+/// The [`BITS`] constant must be correct. [`BITS`] least-significant bits,
+/// must be zero on all pointers returned from [`into_ptr`].
 ///
-/// For example, if the alignment of `Pointee` is 2, then `BITS` should be 1.
+/// For example, if the alignment of [`Self::Target`] is 2, then `BITS` should be 1.
+///
+/// [`BITS`]: Pointer::BITS
+/// [`into_ptr`]: Pointer::into_ptr
+/// [valid]: std::ptr#safety
+/// [`<Self as Deref>::Target`]: Deref::Target
+/// [`Self::Target`]: Deref::Target
+/// [`DerefMut`]: std::ops::DerefMut
 pub unsafe trait Pointer: Deref {
+    /// Number of unused (always zero) **least-significant bits** in this
+    /// pointer, usually related to the pointees alignment.
+    ///
+    /// For example if [`BITS`] = `2`, then given `ptr = Self::into_ptr(..)`,
+    /// `ptr.addr() & 0b11 == 0` must be true.
+    ///
     /// Most likely the value you want to use here is the following, unless
-    /// your Pointee type is unsized (e.g., `ty::List<T>` in rustc) in which
-    /// case you'll need to manually figure out what the right type to pass to
-    /// align_of is.
+    /// your [`Self::Target`] type is unsized (e.g., `ty::List<T>` in rustc)
+    /// or your pointer is over/under aligned, in which case you'll need to
+    /// manually figure out what the right type to pass to [`bits_for`] is, or
+    /// what the value to set here.
     ///
-    /// ```ignore UNSOLVED (what to do about the Self)
+    /// ```rust
     /// # use std::ops::Deref;
-    /// std::mem::align_of::<<Self as Deref>::Target>().trailing_zeros() as usize;
+    /// # use rustc_data_structures::tagged_ptr::bits_for;
+    /// # struct T;
+    /// # impl Deref for T { type Target = u8; fn deref(&self) -> &u8 { &0 } }
+    /// # impl T {
+    /// const BITS: u32 = bits_for::<<Self as Deref>::Target>();
+    /// # }
     /// ```
-    const BITS: usize;
-    fn into_usize(self) -> usize;
+    ///
+    /// [`BITS`]: Pointer::BITS
+    /// [`Self::Target`]: Deref::Target
+    const BITS: u32;
 
-    /// # Safety
+    /// Turns this pointer into a raw, non-null pointer.
+    ///
+    /// The inverse of this function is [`from_ptr`].
     ///
-    /// The passed `ptr` must be returned from `into_usize`.
+    /// This function guarantees that the least-significant [`Self::BITS`] bits
+    /// are zero.
     ///
-    /// This acts as `ptr::read` semantically, it should not be called more than
-    /// once on non-`Copy` `Pointer`s.
-    unsafe fn from_usize(ptr: usize) -> Self;
+    /// [`from_ptr`]: Pointer::from_ptr
+    /// [`Self::BITS`]: Pointer::BITS
+    fn into_ptr(self) -> NonNull<Self::Target>;
 
-    /// This provides a reference to the `Pointer` itself, rather than the
-    /// `Deref::Target`. It is used for cases where we want to call methods that
-    /// may be implement differently for the Pointer than the Pointee (e.g.,
-    /// `Rc::clone` vs cloning the inner value).
+    /// Re-creates the original pointer, from a raw pointer returned by [`into_ptr`].
     ///
     /// # Safety
     ///
-    /// The passed `ptr` must be returned from `into_usize`.
-    unsafe fn with_ref<R, F: FnOnce(&Self) -> R>(ptr: usize, f: F) -> R;
+    /// The passed `ptr` must be returned from [`into_ptr`].
+    ///
+    /// This acts as [`ptr::read::<Self>()`] semantically, it should not be called more than
+    /// once on non-[`Copy`] `Pointer`s.
+    ///
+    /// [`into_ptr`]: Pointer::into_ptr
+    /// [`ptr::read::<Self>()`]: std::ptr::read
+    unsafe fn from_ptr(ptr: NonNull<Self::Target>) -> Self;
 }
 
-/// This describes tags that the `TaggedPtr` struct can hold.
+/// This describes tags that the [`TaggedPtr`] struct can hold.
 ///
 /// # Safety
 ///
-/// The BITS constant must be correct.
+/// The [`BITS`] constant must be correct.
 ///
-/// No more than `BITS` least significant bits may be set in the returned usize.
+/// No more than [`BITS`] least-significant bits may be set in the returned usize.
+///
+/// [`BITS`]: Tag::BITS
 pub unsafe trait Tag: Copy {
-    const BITS: usize;
+    /// Number of least-significant bits in the return value of [`into_usize`]
+    /// which may be non-zero. In other words this is the bit width of the
+    /// value.
+    ///
+    /// [`into_usize`]: Tag::into_usize
+    const BITS: u32;
 
+    /// Turns this tag into an integer.
+    ///
+    /// The inverse of this function is [`from_usize`].
+    ///
+    /// This function guarantees that only the least-significant [`Self::BITS`]
+    /// bits can be non-zero.
+    ///
+    /// [`from_usize`]: Tag::from_usize
+    /// [`Self::BITS`]: Tag::BITS
     fn into_usize(self) -> usize;
 
+    /// Re-creates the tag from the integer returned by [`into_usize`].
+    ///
     /// # Safety
     ///
-    /// The passed `tag` must be returned from `into_usize`.
+    /// The passed `tag` must be returned from [`into_usize`].
+    ///
+    /// [`into_usize`]: Tag::into_usize
     unsafe fn from_usize(tag: usize) -> Self;
 }
 
-unsafe impl<T> Pointer for Box<T> {
-    const BITS: usize = std::mem::align_of::<T>().trailing_zeros() as usize;
-    #[inline]
-    fn into_usize(self) -> usize {
-        Box::into_raw(self) as usize
+/// Returns the number of bits available for use for tags in a pointer to `T`
+/// (this is based on `T`'s alignment).
+pub const fn bits_for<T: ?Sized + Aligned>() -> u32 {
+    crate::aligned::align_of::<T>().as_nonzero().trailing_zeros()
+}
+
+/// Returns the correct [`Tag::BITS`] constant for a set of tag values.
+pub const fn bits_for_tags(mut tags: &[usize]) -> u32 {
+    let mut bits = 0;
+
+    while let &[tag, ref rest @ ..] = tags {
+        tags = rest;
+
+        // bits required to represent `tag`,
+        // position of the most significant 1
+        let b = usize::BITS - tag.leading_zeros();
+        if b > bits {
+            bits = b;
+        }
     }
+
+    bits
+}
+
+unsafe impl<T: ?Sized + Aligned> Pointer for Box<T> {
+    const BITS: u32 = bits_for::<Self::Target>();
+
     #[inline]
-    unsafe fn from_usize(ptr: usize) -> Self {
-        Box::from_raw(ptr as *mut T)
+    fn into_ptr(self) -> NonNull<T> {
+        // Safety: pointers from `Box::into_raw` are valid & non-null
+        unsafe { NonNull::new_unchecked(Box::into_raw(self)) }
     }
-    unsafe fn with_ref<R, F: FnOnce(&Self) -> R>(ptr: usize, f: F) -> R {
-        let raw = ManuallyDrop::new(Self::from_usize(ptr));
-        f(&raw)
+
+    #[inline]
+    unsafe fn from_ptr(ptr: NonNull<T>) -> Self {
+        // Safety: `ptr` comes from `into_ptr` which calls `Box::into_raw`
+        unsafe { Box::from_raw(ptr.as_ptr()) }
     }
 }
 
-unsafe impl<T> Pointer for Rc<T> {
-    const BITS: usize = std::mem::align_of::<T>().trailing_zeros() as usize;
+unsafe impl<T: ?Sized + Aligned> Pointer for Rc<T> {
+    const BITS: u32 = bits_for::<Self::Target>();
+
     #[inline]
-    fn into_usize(self) -> usize {
-        Rc::into_raw(self) as usize
+    fn into_ptr(self) -> NonNull<T> {
+        // Safety: pointers from `Rc::into_raw` are valid & non-null
+        unsafe { NonNull::new_unchecked(Rc::into_raw(self).cast_mut()) }
     }
+
     #[inline]
-    unsafe fn from_usize(ptr: usize) -> Self {
-        Rc::from_raw(ptr as *const T)
-    }
-    unsafe fn with_ref<R, F: FnOnce(&Self) -> R>(ptr: usize, f: F) -> R {
-        let raw = ManuallyDrop::new(Self::from_usize(ptr));
-        f(&raw)
+    unsafe fn from_ptr(ptr: NonNull<T>) -> Self {
+        // Safety: `ptr` comes from `into_ptr` which calls `Rc::into_raw`
+        unsafe { Rc::from_raw(ptr.as_ptr()) }
     }
 }
 
-unsafe impl<T> Pointer for Arc<T> {
-    const BITS: usize = std::mem::align_of::<T>().trailing_zeros() as usize;
+unsafe impl<T: ?Sized + Aligned> Pointer for Arc<T> {
+    const BITS: u32 = bits_for::<Self::Target>();
+
     #[inline]
-    fn into_usize(self) -> usize {
-        Arc::into_raw(self) as usize
+    fn into_ptr(self) -> NonNull<T> {
+        // Safety: pointers from `Arc::into_raw` are valid & non-null
+        unsafe { NonNull::new_unchecked(Arc::into_raw(self).cast_mut()) }
     }
+
     #[inline]
-    unsafe fn from_usize(ptr: usize) -> Self {
-        Arc::from_raw(ptr as *const T)
-    }
-    unsafe fn with_ref<R, F: FnOnce(&Self) -> R>(ptr: usize, f: F) -> R {
-        let raw = ManuallyDrop::new(Self::from_usize(ptr));
-        f(&raw)
+    unsafe fn from_ptr(ptr: NonNull<T>) -> Self {
+        // Safety: `ptr` comes from `into_ptr` which calls `Arc::into_raw`
+        unsafe { Arc::from_raw(ptr.as_ptr()) }
     }
 }
 
-unsafe impl<'a, T: 'a> Pointer for &'a T {
-    const BITS: usize = std::mem::align_of::<T>().trailing_zeros() as usize;
+unsafe impl<'a, T: 'a + ?Sized + Aligned> Pointer for &'a T {
+    const BITS: u32 = bits_for::<Self::Target>();
+
     #[inline]
-    fn into_usize(self) -> usize {
-        self as *const T as usize
+    fn into_ptr(self) -> NonNull<T> {
+        NonNull::from(self)
     }
+
     #[inline]
-    unsafe fn from_usize(ptr: usize) -> Self {
-        &*(ptr as *const T)
-    }
-    unsafe fn with_ref<R, F: FnOnce(&Self) -> R>(ptr: usize, f: F) -> R {
-        f(&*(&ptr as *const usize as *const Self))
+    unsafe fn from_ptr(ptr: NonNull<T>) -> Self {
+        // Safety:
+        // `ptr` comes from `into_ptr` which gets the pointer from a reference
+        unsafe { ptr.as_ref() }
     }
 }
 
-unsafe impl<'a, T: 'a> Pointer for &'a mut T {
-    const BITS: usize = std::mem::align_of::<T>().trailing_zeros() as usize;
+unsafe impl<'a, T: 'a + ?Sized + Aligned> Pointer for &'a mut T {
+    const BITS: u32 = bits_for::<Self::Target>();
+
     #[inline]
-    fn into_usize(self) -> usize {
-        self as *mut T as usize
+    fn into_ptr(self) -> NonNull<T> {
+        NonNull::from(self)
     }
+
     #[inline]
-    unsafe fn from_usize(ptr: usize) -> Self {
-        &mut *(ptr as *mut T)
+    unsafe fn from_ptr(mut ptr: NonNull<T>) -> Self {
+        // Safety:
+        // `ptr` comes from `into_ptr` which gets the pointer from a reference
+        unsafe { ptr.as_mut() }
+    }
+}
+
+/// A tag type used in [`CopyTaggedPtr`] and [`TaggedPtr`] tests.
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+#[cfg(test)]
+enum Tag2 {
+    B00 = 0b00,
+    B01 = 0b01,
+    B10 = 0b10,
+    B11 = 0b11,
+}
+
+#[cfg(test)]
+unsafe impl Tag for Tag2 {
+    const BITS: u32 = 2;
+
+    fn into_usize(self) -> usize {
+        self as _
     }
-    unsafe fn with_ref<R, F: FnOnce(&Self) -> R>(ptr: usize, f: F) -> R {
-        f(&*(&ptr as *const usize as *const Self))
+
+    unsafe fn from_usize(tag: usize) -> Self {
+        match tag {
+            0b00 => Tag2::B00,
+            0b01 => Tag2::B01,
+            0b10 => Tag2::B10,
+            0b11 => Tag2::B11,
+            _ => unreachable!(),
+        }
+    }
+}
+
+#[cfg(test)]
+impl<HCX> crate::stable_hasher::HashStable<HCX> for Tag2 {
+    fn hash_stable(&self, hcx: &mut HCX, hasher: &mut crate::stable_hasher::StableHasher) {
+        (*self as u8).hash_stable(hcx, hasher);
     }
 }
diff --git a/compiler/rustc_data_structures/src/tagged_ptr/copy.rs b/compiler/rustc_data_structures/src/tagged_ptr/copy.rs
index e1d3e0bd3..e893a2c78 100644
--- a/compiler/rustc_data_structures/src/tagged_ptr/copy.rs
+++ b/compiler/rustc_data_structures/src/tagged_ptr/copy.rs
@@ -1,78 +1,94 @@
 use super::{Pointer, Tag};
 use crate::stable_hasher::{HashStable, StableHasher};
 use std::fmt;
+use std::hash::{Hash, Hasher};
 use std::marker::PhantomData;
+use std::mem::ManuallyDrop;
 use std::num::NonZeroUsize;
+use std::ops::{Deref, DerefMut};
+use std::ptr::NonNull;
 
-/// A `Copy` TaggedPtr.
+/// A [`Copy`] tagged pointer.
 ///
-/// You should use this instead of the `TaggedPtr` type in all cases where
-/// `P: Copy`.
+/// This is essentially `{ pointer: P, tag: T }` packed in a single pointer.
+///
+/// You should use this instead of the [`TaggedPtr`] type in all cases where
+/// `P` implements [`Copy`].
 ///
 /// If `COMPARE_PACKED` is true, then the pointers will be compared and hashed without
-/// unpacking. Otherwise we don't implement PartialEq/Eq/Hash; if you want that,
-/// wrap the TaggedPtr.
+/// unpacking. Otherwise we don't implement [`PartialEq`], [`Eq`] and [`Hash`];
+/// if you want that, wrap the [`CopyTaggedPtr`].
+///
+/// [`TaggedPtr`]: crate::tagged_ptr::TaggedPtr
 pub struct CopyTaggedPtr<P, T, const COMPARE_PACKED: bool>
 where
     P: Pointer,
     T: Tag,
 {
-    packed: NonZeroUsize,
-    data: PhantomData<(P, T)>,
-}
-
-impl<P, T, const COMPARE_PACKED: bool> Copy for CopyTaggedPtr<P, T, COMPARE_PACKED>
-where
-    P: Pointer,
-    T: Tag,
-    P: Copy,
-{
-}
-
-impl<P, T, const COMPARE_PACKED: bool> Clone for CopyTaggedPtr<P, T, COMPARE_PACKED>
-where
-    P: Pointer,
-    T: Tag,
-    P: Copy,
-{
-    fn clone(&self) -> Self {
-        *self
-    }
+    /// This is semantically a pair of `pointer: P` and `tag: T` fields,
+    /// however we pack them in a single pointer, to save space.
+    ///
+    /// We pack the tag into the **most**-significant bits of the pointer to
+    /// ease retrieval of the value. A left shift is a multiplication and
+    /// those are embeddable in instruction encoding, for example:
+    ///
+    /// ```asm
+    /// // (<https://godbolt.org/z/jqcYPWEr3>)
+    /// example::shift_read3:
+    ///     mov     eax, dword ptr [8*rdi]
+    ///     ret
+    ///
+    /// example::mask_read3:
+    ///     and     rdi, -8
+    ///     mov     eax, dword ptr [rdi]
+    ///     ret
+    /// ```
+    ///
+    /// This is ASM outputted by rustc for reads of values behind tagged
+    /// pointers for different approaches of tagging:
+    /// - `shift_read3` uses `<< 3` (the tag is in the most-significant bits)
+    /// - `mask_read3` uses `& !0b111` (the tag is in the least-significant bits)
+    ///
+    /// The shift approach thus produces less instructions and is likely faster
+    /// (see <https://godbolt.org/z/Y913sMdWb>).
+    ///
+    /// Encoding diagram:
+    /// ```text
+    /// [ packed.addr                     ]
+    /// [ tag ] [ pointer.addr >> T::BITS ] <-- usize::BITS - T::BITS bits
+    ///    ^
+    ///    |
+    /// T::BITS bits
+    /// ```
+    ///
+    /// The tag can be retrieved by `packed.addr() >> T::BITS` and the pointer
+    /// can be retrieved by `packed.map_addr(|addr| addr << T::BITS)`.
+    packed: NonNull<P::Target>,
+    tag_ghost: PhantomData<T>,
 }
 
-// We pack the tag into the *upper* bits of the pointer to ease retrieval of the
-// value; a left shift is a multiplication and those are embeddable in
-// instruction encoding.
-impl<P, T, const COMPARE_PACKED: bool> CopyTaggedPtr<P, T, COMPARE_PACKED>
+// Note that even though `CopyTaggedPtr` is only really expected to work with
+// `P: Copy`, can't add `P: Copy` bound, because `CopyTaggedPtr` is used in the
+// `TaggedPtr`'s implementation.
+impl<P, T, const CP: bool> CopyTaggedPtr<P, T, CP>
 where
     P: Pointer,
     T: Tag,
 {
-    const TAG_BIT_SHIFT: usize = usize::BITS as usize - T::BITS;
-    const ASSERTION: () = {
-        assert!(T::BITS <= P::BITS);
-        // Used for the transmute_copy's below
-        assert!(std::mem::size_of::<&P::Target>() == std::mem::size_of::<usize>());
-    };
-
+    /// Tags `pointer` with `tag`.
+    ///
+    /// Note that this leaks `pointer`: it won't be dropped when
+    /// `CopyTaggedPtr` is dropped. If you have a pointer with a significant
+    /// drop, use [`TaggedPtr`] instead.
+    ///
+    /// [`TaggedPtr`]: crate::tagged_ptr::TaggedPtr
+    #[inline]
     pub fn new(pointer: P, tag: T) -> Self {
-        // Trigger assert!
-        let () = Self::ASSERTION;
-        let packed_tag = tag.into_usize() << Self::TAG_BIT_SHIFT;
-
-        Self {
-            // SAFETY: We know that the pointer is non-null, as it must be
-            // dereferenceable per `Pointer` safety contract.
-            packed: unsafe {
-                NonZeroUsize::new_unchecked((P::into_usize(pointer) >> T::BITS) | packed_tag)
-            },
-            data: PhantomData,
-        }
+        Self { packed: Self::pack(P::into_ptr(pointer), tag), tag_ghost: PhantomData }
     }
 
-    pub(super) fn pointer_raw(&self) -> usize {
-        self.packed.get() << T::BITS
-    }
+    /// Retrieves the pointer.
+    #[inline]
     pub fn pointer(self) -> P
     where
         P: Copy,
@@ -81,66 +97,143 @@ where
         //
         // Note that this isn't going to double-drop or anything because we have
         // P: Copy
-        unsafe { P::from_usize(self.pointer_raw()) }
-    }
-    pub fn pointer_ref(&self) -> &P::Target {
-        // SAFETY: pointer_raw returns the original pointer
-        unsafe { std::mem::transmute_copy(&self.pointer_raw()) }
-    }
-    pub fn pointer_mut(&mut self) -> &mut P::Target
-    where
-        P: std::ops::DerefMut,
-    {
-        // SAFETY: pointer_raw returns the original pointer
-        unsafe { std::mem::transmute_copy(&self.pointer_raw()) }
+        unsafe { P::from_ptr(self.pointer_raw()) }
     }
+
+    /// Retrieves the tag.
     #[inline]
     pub fn tag(&self) -> T {
-        unsafe { T::from_usize(self.packed.get() >> Self::TAG_BIT_SHIFT) }
+        // Unpack the tag, according to the `self.packed` encoding scheme
+        let tag = self.packed.addr().get() >> Self::TAG_BIT_SHIFT;
+
+        // Safety:
+        // The shift retrieves the original value from `T::into_usize`,
+        // satisfying `T::from_usize`'s preconditions.
+        unsafe { T::from_usize(tag) }
     }
+
+    /// Sets the tag to a new value.
     #[inline]
     pub fn set_tag(&mut self, tag: T) {
-        let mut packed = self.packed.get();
-        let new_tag = T::into_usize(tag) << Self::TAG_BIT_SHIFT;
-        let tag_mask = (1 << T::BITS) - 1;
-        packed &= !(tag_mask << Self::TAG_BIT_SHIFT);
-        packed |= new_tag;
-        self.packed = unsafe { NonZeroUsize::new_unchecked(packed) };
+        self.packed = Self::pack(self.pointer_raw(), tag);
+    }
+
+    const TAG_BIT_SHIFT: u32 = usize::BITS - T::BITS;
+    const ASSERTION: () = { assert!(T::BITS <= P::BITS) };
+
+    /// Pack pointer `ptr` that comes from [`P::into_ptr`] with a `tag`,
+    /// according to `self.packed` encoding scheme.
+    ///
+    /// [`P::into_ptr`]: Pointer::into_ptr
+    #[inline]
+    fn pack(ptr: NonNull<P::Target>, tag: T) -> NonNull<P::Target> {
+        // Trigger assert!
+        let () = Self::ASSERTION;
+
+        let packed_tag = tag.into_usize() << Self::TAG_BIT_SHIFT;
+
+        ptr.map_addr(|addr| {
+            // Safety:
+            // - The pointer is `NonNull` => it's address is `NonZeroUsize`
+            // - `P::BITS` least significant bits are always zero (`Pointer` contract)
+            // - `T::BITS <= P::BITS` (from `Self::ASSERTION`)
+            //
+            // Thus `addr >> T::BITS` is guaranteed to be non-zero.
+            //
+            // `{non_zero} | packed_tag` can't make the value zero.
+
+            let packed = (addr.get() >> T::BITS) | packed_tag;
+            unsafe { NonZeroUsize::new_unchecked(packed) }
+        })
     }
+
+    /// Retrieves the original raw pointer from `self.packed`.
+    #[inline]
+    pub(super) fn pointer_raw(&self) -> NonNull<P::Target> {
+        self.packed.map_addr(|addr| unsafe { NonZeroUsize::new_unchecked(addr.get() << T::BITS) })
+    }
+
+    /// This provides a reference to the `P` pointer itself, rather than the
+    /// `Deref::Target`. It is used for cases where we want to call methods
+    /// that may be implement differently for the Pointer than the Pointee
+    /// (e.g., `Rc::clone` vs cloning the inner value).
+    pub(super) fn with_pointer_ref<R>(&self, f: impl FnOnce(&P) -> R) -> R {
+        // Safety:
+        // - `self.raw.pointer_raw()` is originally returned from `P::into_ptr`
+        //   and as such is valid for `P::from_ptr`.
+        //   - This also allows us to not care whatever `f` panics or not.
+        // - Even though we create a copy of the pointer, we store it inside
+        //   `ManuallyDrop` and only access it by-ref, so we don't double-drop.
+        //
+        // Semantically this is just `f(&self.pointer)` (where `self.pointer`
+        // is non-packed original pointer).
+        //
+        // Note that even though `CopyTaggedPtr` is only really expected to
+        // work with `P: Copy`, we have to assume `P: ?Copy`, because
+        // `CopyTaggedPtr` is used in the `TaggedPtr`'s implementation.
+        let ptr = unsafe { ManuallyDrop::new(P::from_ptr(self.pointer_raw())) };
+        f(&ptr)
+    }
+}
+
+impl<P, T, const CP: bool> Copy for CopyTaggedPtr<P, T, CP>
+where
+    P: Pointer + Copy,
+    T: Tag,
+{
 }
 
-impl<P, T, const COMPARE_PACKED: bool> std::ops::Deref for CopyTaggedPtr<P, T, COMPARE_PACKED>
+impl<P, T, const CP: bool> Clone for CopyTaggedPtr<P, T, CP>
+where
+    P: Pointer + Copy,
+    T: Tag,
+{
+    #[inline]
+    fn clone(&self) -> Self {
+        *self
+    }
+}
+
+impl<P, T, const CP: bool> Deref for CopyTaggedPtr<P, T, CP>
 where
     P: Pointer,
     T: Tag,
 {
     type Target = P::Target;
+
+    #[inline]
     fn deref(&self) -> &Self::Target {
-        self.pointer_ref()
+        // Safety:
+        // `pointer_raw` returns the original pointer from `P::into_ptr` which,
+        // by the `Pointer`'s contract, must be valid.
+        unsafe { self.pointer_raw().as_ref() }
     }
 }
 
-impl<P, T, const COMPARE_PACKED: bool> std::ops::DerefMut for CopyTaggedPtr<P, T, COMPARE_PACKED>
+impl<P, T, const CP: bool> DerefMut for CopyTaggedPtr<P, T, CP>
 where
-    P: Pointer + std::ops::DerefMut,
+    P: Pointer + DerefMut,
     T: Tag,
 {
+    #[inline]
     fn deref_mut(&mut self) -> &mut Self::Target {
-        self.pointer_mut()
+        // Safety:
+        // `pointer_raw` returns the original pointer from `P::into_ptr` which,
+        // by the `Pointer`'s contract, must be valid for writes if
+        // `P: DerefMut`.
+        unsafe { self.pointer_raw().as_mut() }
     }
 }
 
-impl<P, T, const COMPARE_PACKED: bool> fmt::Debug for CopyTaggedPtr<P, T, COMPARE_PACKED>
+impl<P, T, const CP: bool> fmt::Debug for CopyTaggedPtr<P, T, CP>
 where
-    P: Pointer,
-    P::Target: fmt::Debug,
+    P: Pointer + fmt::Debug,
     T: Tag + fmt::Debug,
 {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        f.debug_struct("CopyTaggedPtr")
-            .field("pointer", &self.pointer_ref())
-            .field("tag", &self.tag())
-            .finish()
+        self.with_pointer_ref(|ptr| {
+            f.debug_struct("CopyTaggedPtr").field("pointer", ptr).field("tag", &self.tag()).finish()
+        })
     }
 }
 
@@ -149,6 +242,7 @@ where
     P: Pointer,
     T: Tag,
 {
+    #[inline]
     fn eq(&self, other: &Self) -> bool {
         self.packed == other.packed
     }
@@ -161,25 +255,74 @@ where
 {
 }
 
-impl<P, T> std::hash::Hash for CopyTaggedPtr<P, T, true>
+impl<P, T> Hash for CopyTaggedPtr<P, T, true>
 where
     P: Pointer,
     T: Tag,
 {
-    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
+    #[inline]
+    fn hash<H: Hasher>(&self, state: &mut H) {
         self.packed.hash(state);
     }
 }
 
-impl<P, T, HCX, const COMPARE_PACKED: bool> HashStable<HCX> for CopyTaggedPtr<P, T, COMPARE_PACKED>
+impl<P, T, HCX, const CP: bool> HashStable<HCX> for CopyTaggedPtr<P, T, CP>
 where
     P: Pointer + HashStable<HCX>,
     T: Tag + HashStable<HCX>,
 {
     fn hash_stable(&self, hcx: &mut HCX, hasher: &mut StableHasher) {
-        unsafe {
-            Pointer::with_ref(self.pointer_raw(), |p: &P| p.hash_stable(hcx, hasher));
-        }
+        self.with_pointer_ref(|ptr| ptr.hash_stable(hcx, hasher));
         self.tag().hash_stable(hcx, hasher);
     }
 }
+
+// Safety:
+// `CopyTaggedPtr<P, T, ..>` is semantically just `{ ptr: P, tag: T }`, as such
+// it's ok to implement `Sync` as long as `P: Sync, T: Sync`
+unsafe impl<P, T, const CP: bool> Sync for CopyTaggedPtr<P, T, CP>
+where
+    P: Sync + Pointer,
+    T: Sync + Tag,
+{
+}
+
+// Safety:
+// `CopyTaggedPtr<P, T, ..>` is semantically just `{ ptr: P, tag: T }`, as such
+// it's ok to implement `Send` as long as `P: Send, T: Send`
+unsafe impl<P, T, const CP: bool> Send for CopyTaggedPtr<P, T, CP>
+where
+    P: Send + Pointer,
+    T: Send + Tag,
+{
+}
+
+/// Test that `new` does not compile if there is not enough alignment for the
+/// tag in the pointer.
+///
+/// ```compile_fail,E0080
+/// use rustc_data_structures::tagged_ptr::{CopyTaggedPtr, Tag};
+///
+/// #[derive(Copy, Clone, Debug, PartialEq, Eq)]
+/// enum Tag2 { B00 = 0b00, B01 = 0b01, B10 = 0b10, B11 = 0b11 };
+///
+/// unsafe impl Tag for Tag2 {
+///     const BITS: u32 = 2;
+///
+///     fn into_usize(self) -> usize { todo!() }
+///     unsafe fn from_usize(tag: usize) -> Self { todo!() }
+/// }
+///
+/// let value = 12u16;
+/// let reference = &value;
+/// let tag = Tag2::B01;
+///
+/// let _ptr = CopyTaggedPtr::<_, _, true>::new(reference, tag);
+/// ```
+// For some reason miri does not get the compile error
+// probably it `check`s instead of `build`ing?
+#[cfg(not(miri))]
+const _: () = ();
+
+#[cfg(test)]
+mod tests;
diff --git a/compiler/rustc_data_structures/src/tagged_ptr/copy/tests.rs b/compiler/rustc_data_structures/src/tagged_ptr/copy/tests.rs
new file mode 100644
index 000000000..bfcc2e603
--- /dev/null
+++ b/compiler/rustc_data_structures/src/tagged_ptr/copy/tests.rs
@@ -0,0 +1,50 @@
+use std::ptr;
+
+use crate::stable_hasher::{HashStable, StableHasher};
+use crate::tagged_ptr::{CopyTaggedPtr, Pointer, Tag, Tag2};
+
+#[test]
+fn smoke() {
+    let value = 12u32;
+    let reference = &value;
+    let tag = Tag2::B01;
+
+    let ptr = tag_ptr(reference, tag);
+
+    assert_eq!(ptr.tag(), tag);
+    assert_eq!(*ptr, 12);
+    assert!(ptr::eq(ptr.pointer(), reference));
+
+    let copy = ptr;
+
+    let mut ptr = ptr;
+    ptr.set_tag(Tag2::B00);
+    assert_eq!(ptr.tag(), Tag2::B00);
+
+    assert_eq!(copy.tag(), tag);
+    assert_eq!(*copy, 12);
+    assert!(ptr::eq(copy.pointer(), reference));
+}
+
+#[test]
+fn stable_hash_hashes_as_tuple() {
+    let hash_packed = {
+        let mut hasher = StableHasher::new();
+        tag_ptr(&12, Tag2::B11).hash_stable(&mut (), &mut hasher);
+
+        hasher.finalize()
+    };
+
+    let hash_tupled = {
+        let mut hasher = StableHasher::new();
+        (&12, Tag2::B11).hash_stable(&mut (), &mut hasher);
+        hasher.finalize()
+    };
+
+    assert_eq!(hash_packed, hash_tupled);
+}
+
+/// Helper to create tagged pointers without specifying `COMPARE_PACKED` if it does not matter.
+fn tag_ptr<P: Pointer, T: Tag>(ptr: P, tag: T) -> CopyTaggedPtr<P, T, true> {
+    CopyTaggedPtr::new(ptr, tag)
+}
diff --git a/compiler/rustc_data_structures/src/tagged_ptr/drop.rs b/compiler/rustc_data_structures/src/tagged_ptr/drop.rs
index b0315c93d..4e42b5b4a 100644
--- a/compiler/rustc_data_structures/src/tagged_ptr/drop.rs
+++ b/compiler/rustc_data_structures/src/tagged_ptr/drop.rs
@@ -1,14 +1,21 @@
-use super::{Pointer, Tag};
-use crate::stable_hasher::{HashStable, StableHasher};
 use std::fmt;
+use std::hash::{Hash, Hasher};
+use std::ops::{Deref, DerefMut};
 
 use super::CopyTaggedPtr;
+use super::{Pointer, Tag};
+use crate::stable_hasher::{HashStable, StableHasher};
 
-/// A TaggedPtr implementing `Drop`.
+/// A tagged pointer that supports pointers that implement [`Drop`].
+///
+/// This is essentially `{ pointer: P, tag: T }` packed in a single pointer.
+///
+/// You should use [`CopyTaggedPtr`] instead of the this type in all cases
+/// where `P` implements [`Copy`].
 ///
 /// If `COMPARE_PACKED` is true, then the pointers will be compared and hashed without
-/// unpacking. Otherwise we don't implement PartialEq/Eq/Hash; if you want that,
-/// wrap the TaggedPtr.
+/// unpacking. Otherwise we don't implement [`PartialEq`], [`Eq`] and [`Hash`];
+/// if you want that, wrap the [`TaggedPtr`].
 pub struct TaggedPtr<P, T, const COMPARE_PACKED: bool>
 where
     P: Pointer,
@@ -17,58 +24,67 @@ where
     raw: CopyTaggedPtr<P, T, COMPARE_PACKED>,
 }
 
-impl<P, T, const COMPARE_PACKED: bool> Clone for TaggedPtr<P, T, COMPARE_PACKED>
-where
-    P: Pointer + Clone,
-    T: Tag,
-{
-    fn clone(&self) -> Self {
-        unsafe { Self::new(P::with_ref(self.raw.pointer_raw(), |p| p.clone()), self.raw.tag()) }
-    }
-}
-
-// We pack the tag into the *upper* bits of the pointer to ease retrieval of the
-// value; a right shift is a multiplication and those are embeddable in
-// instruction encoding.
-impl<P, T, const COMPARE_PACKED: bool> TaggedPtr<P, T, COMPARE_PACKED>
+impl<P, T, const CP: bool> TaggedPtr<P, T, CP>
 where
     P: Pointer,
     T: Tag,
 {
+    /// Tags `pointer` with `tag`.
+    #[inline]
     pub fn new(pointer: P, tag: T) -> Self {
         TaggedPtr { raw: CopyTaggedPtr::new(pointer, tag) }
     }
 
-    pub fn pointer_ref(&self) -> &P::Target {
-        self.raw.pointer_ref()
-    }
+    /// Retrieves the tag.
+    #[inline]
     pub fn tag(&self) -> T {
         self.raw.tag()
     }
+
+    /// Sets the tag to a new value.
+    #[inline]
+    pub fn set_tag(&mut self, tag: T) {
+        self.raw.set_tag(tag)
+    }
+}
+
+impl<P, T, const CP: bool> Clone for TaggedPtr<P, T, CP>
+where
+    P: Pointer + Clone,
+    T: Tag,
+{
+    fn clone(&self) -> Self {
+        let ptr = self.raw.with_pointer_ref(P::clone);
+
+        Self::new(ptr, self.tag())
+    }
 }
 
-impl<P, T, const COMPARE_PACKED: bool> std::ops::Deref for TaggedPtr<P, T, COMPARE_PACKED>
+impl<P, T, const CP: bool> Deref for TaggedPtr<P, T, CP>
 where
     P: Pointer,
     T: Tag,
 {
     type Target = P::Target;
+
+    #[inline]
     fn deref(&self) -> &Self::Target {
-        self.raw.pointer_ref()
+        self.raw.deref()
     }
 }
 
-impl<P, T, const COMPARE_PACKED: bool> std::ops::DerefMut for TaggedPtr<P, T, COMPARE_PACKED>
+impl<P, T, const CP: bool> DerefMut for TaggedPtr<P, T, CP>
 where
-    P: Pointer + std::ops::DerefMut,
+    P: Pointer + DerefMut,
     T: Tag,
 {
+    #[inline]
     fn deref_mut(&mut self) -> &mut Self::Target {
-        self.raw.pointer_mut()
+        self.raw.deref_mut()
     }
 }
 
-impl<P, T, const COMPARE_PACKED: bool> Drop for TaggedPtr<P, T, COMPARE_PACKED>
+impl<P, T, const CP: bool> Drop for TaggedPtr<P, T, CP>
 where
     P: Pointer,
     T: Tag,
@@ -76,22 +92,20 @@ where
     fn drop(&mut self) {
         // No need to drop the tag, as it's Copy
         unsafe {
-            drop(P::from_usize(self.raw.pointer_raw()));
+            drop(P::from_ptr(self.raw.pointer_raw()));
         }
     }
 }
 
-impl<P, T, const COMPARE_PACKED: bool> fmt::Debug for TaggedPtr<P, T, COMPARE_PACKED>
+impl<P, T, const CP: bool> fmt::Debug for TaggedPtr<P, T, CP>
 where
-    P: Pointer,
-    P::Target: fmt::Debug,
+    P: Pointer + fmt::Debug,
     T: Tag + fmt::Debug,
 {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        f.debug_struct("TaggedPtr")
-            .field("pointer", &self.pointer_ref())
-            .field("tag", &self.tag())
-            .finish()
+        self.raw.with_pointer_ref(|ptr| {
+            f.debug_struct("TaggedPtr").field("pointer", ptr).field("tag", &self.tag()).finish()
+        })
     }
 }
 
@@ -100,6 +114,7 @@ where
     P: Pointer,
     T: Tag,
 {
+    #[inline]
     fn eq(&self, other: &Self) -> bool {
         self.raw.eq(&other.raw)
     }
@@ -112,17 +127,18 @@ where
 {
 }
 
-impl<P, T> std::hash::Hash for TaggedPtr<P, T, true>
+impl<P, T> Hash for TaggedPtr<P, T, true>
 where
     P: Pointer,
     T: Tag,
 {
-    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
+    #[inline]
+    fn hash<H: Hasher>(&self, state: &mut H) {
         self.raw.hash(state);
     }
 }
 
-impl<P, T, HCX, const COMPARE_PACKED: bool> HashStable<HCX> for TaggedPtr<P, T, COMPARE_PACKED>
+impl<P, T, HCX, const CP: bool> HashStable<HCX> for TaggedPtr<P, T, CP>
 where
     P: Pointer + HashStable<HCX>,
     T: Tag + HashStable<HCX>,
@@ -131,3 +147,33 @@ where
         self.raw.hash_stable(hcx, hasher);
     }
 }
+
+/// Test that `new` does not compile if there is not enough alignment for the
+/// tag in the pointer.
+///
+/// ```compile_fail,E0080
+/// use rustc_data_structures::tagged_ptr::{TaggedPtr, Tag};
+///
+/// #[derive(Copy, Clone, Debug, PartialEq, Eq)]
+/// enum Tag2 { B00 = 0b00, B01 = 0b01, B10 = 0b10, B11 = 0b11 };
+///
+/// unsafe impl Tag for Tag2 {
+///     const BITS: u32 = 2;
+///
+///     fn into_usize(self) -> usize { todo!() }
+///     unsafe fn from_usize(tag: usize) -> Self { todo!() }
+/// }
+///
+/// let value = 12u16;
+/// let reference = &value;
+/// let tag = Tag2::B01;
+///
+/// let _ptr = TaggedPtr::<_, _, true>::new(reference, tag);
+/// ```
+// For some reason miri does not get the compile error
+// probably it `check`s instead of `build`ing?
+#[cfg(not(miri))]
+const _: () = ();
+
+#[cfg(test)]
+mod tests;
diff --git a/compiler/rustc_data_structures/src/tagged_ptr/drop/tests.rs b/compiler/rustc_data_structures/src/tagged_ptr/drop/tests.rs
new file mode 100644
index 000000000..2c17d678d
--- /dev/null
+++ b/compiler/rustc_data_structures/src/tagged_ptr/drop/tests.rs
@@ -0,0 +1,71 @@
+use std::{ptr, sync::Arc};
+
+use crate::tagged_ptr::{Pointer, Tag, Tag2, TaggedPtr};
+
+#[test]
+fn smoke() {
+    let value = 12u32;
+    let reference = &value;
+    let tag = Tag2::B01;
+
+    let ptr = tag_ptr(reference, tag);
+
+    assert_eq!(ptr.tag(), tag);
+    assert_eq!(*ptr, 12);
+
+    let clone = ptr.clone();
+    assert_eq!(clone.tag(), tag);
+    assert_eq!(*clone, 12);
+
+    let mut ptr = ptr;
+    ptr.set_tag(Tag2::B00);
+    assert_eq!(ptr.tag(), Tag2::B00);
+
+    assert_eq!(clone.tag(), tag);
+    assert_eq!(*clone, 12);
+    assert!(ptr::eq(&*ptr, &*clone))
+}
+
+#[test]
+fn boxed() {
+    let value = 12u32;
+    let boxed = Box::new(value);
+    let tag = Tag2::B01;
+
+    let ptr = tag_ptr(boxed, tag);
+
+    assert_eq!(ptr.tag(), tag);
+    assert_eq!(*ptr, 12);
+
+    let clone = ptr.clone();
+    assert_eq!(clone.tag(), tag);
+    assert_eq!(*clone, 12);
+
+    let mut ptr = ptr;
+    ptr.set_tag(Tag2::B00);
+    assert_eq!(ptr.tag(), Tag2::B00);
+
+    assert_eq!(clone.tag(), tag);
+    assert_eq!(*clone, 12);
+    assert!(!ptr::eq(&*ptr, &*clone))
+}
+
+#[test]
+fn arclones() {
+    let value = 12u32;
+    let arc = Arc::new(value);
+    let tag = Tag2::B01;
+
+    let ptr = tag_ptr(arc, tag);
+
+    assert_eq!(ptr.tag(), tag);
+    assert_eq!(*ptr, 12);
+
+    let clone = ptr.clone();
+    assert!(ptr::eq(&*ptr, &*clone))
+}
+
+/// Helper to create tagged pointers without specifying `COMPARE_PACKED` if it does not matter.
+fn tag_ptr<P: Pointer, T: Tag>(ptr: P, tag: T) -> TaggedPtr<P, T, true> {
+    TaggedPtr::new(ptr, tag)
+}
diff --git a/compiler/rustc_data_structures/src/tagged_ptr/impl_tag.rs b/compiler/rustc_data_structures/src/tagged_ptr/impl_tag.rs
new file mode 100644
index 000000000..cb7f7d318
--- /dev/null
+++ b/compiler/rustc_data_structures/src/tagged_ptr/impl_tag.rs
@@ -0,0 +1,144 @@
+/// Implements [`Tag`] for a given type.
+///
+/// You can use `impl_tag` on structs and enums.
+/// You need to specify the type and all its possible values,
+/// which can only be paths with optional fields.
+///
+/// [`Tag`]: crate::tagged_ptr::Tag
+///
+/// # Examples
+///
+/// Basic usage:
+///
+/// ```
+/// #![feature(macro_metavar_expr)]
+/// use rustc_data_structures::{impl_tag, tagged_ptr::Tag};
+///
+/// #[derive(Copy, Clone, PartialEq, Debug)]
+/// enum SomeTag {
+///     A,
+///     B,
+///     X { v: bool },
+///     Y(bool, bool),
+/// }
+///
+/// impl_tag! {
+///     // The type for which the `Tag` will be implemented
+///     impl Tag for SomeTag;
+///     // You need to specify all possible tag values:
+///     SomeTag::A, // 0
+///     SomeTag::B, // 1
+///     // For variants with fields, you need to specify the fields:
+///     SomeTag::X { v: true  }, // 2
+///     SomeTag::X { v: false }, // 3
+///     // For tuple variants use named syntax:
+///     SomeTag::Y { 0: true,  1: true  }, // 4
+///     SomeTag::Y { 0: false, 1: true  }, // 5
+///     SomeTag::Y { 0: true,  1: false }, // 6
+///     SomeTag::Y { 0: false, 1: false }, // 7
+/// }
+///
+/// // Tag values are assigned in order:
+/// assert_eq!(SomeTag::A.into_usize(), 0);
+/// assert_eq!(SomeTag::X { v: false }.into_usize(), 3);
+/// assert_eq!(SomeTag::Y(false, true).into_usize(), 5);
+///
+/// assert_eq!(unsafe { SomeTag::from_usize(1) }, SomeTag::B);
+/// assert_eq!(unsafe { SomeTag::from_usize(2) }, SomeTag::X { v: true });
+/// assert_eq!(unsafe { SomeTag::from_usize(7) }, SomeTag::Y(false, false));
+/// ```
+///
+/// Structs are supported:
+///
+/// ```
+/// #![feature(macro_metavar_expr)]
+/// # use rustc_data_structures::impl_tag;
+/// #[derive(Copy, Clone)]
+/// struct Flags { a: bool, b: bool }
+///
+/// impl_tag! {
+///     impl Tag for Flags;
+///     Flags { a: true,  b: true  },
+///     Flags { a: false, b: true  },
+///     Flags { a: true,  b: false },
+///     Flags { a: false, b: false },
+/// }
+/// ```
+///
+/// Not specifying all values results in a compile error:
+///
+/// ```compile_fail,E0004
+/// #![feature(macro_metavar_expr)]
+/// # use rustc_data_structures::impl_tag;
+/// #[derive(Copy, Clone)]
+/// enum E {
+///     A,
+///     B,
+/// }
+///
+/// impl_tag! {
+///     impl Tag for E;
+///     E::A,
+/// }
+/// ```
+#[macro_export]
+macro_rules! impl_tag {
+    (
+        impl Tag for $Self:ty;
+        $(
+            $($path:ident)::* $( { $( $fields:tt )* })?,
+        )*
+    ) => {
+        // Safety:
+        // `bits_for_tags` is called on the same `${index()}`-es as
+        // `into_usize` returns, thus `BITS` constant is correct.
+        unsafe impl $crate::tagged_ptr::Tag for $Self {
+            const BITS: u32 = $crate::tagged_ptr::bits_for_tags(&[
+                $(
+                    ${index()},
+                    $( ${ignore(path)} )*
+                )*
+            ]);
+
+            #[inline]
+            fn into_usize(self) -> usize {
+                // This forbids use of repeating patterns (`Enum::V`&`Enum::V`, etc)
+                // (or at least it should, see <https://github.com/rust-lang/rust/issues/110613>)
+                #[forbid(unreachable_patterns)]
+                match self {
+                    // `match` is doing heavy lifting here, by requiring exhaustiveness
+                    $(
+                        $($path)::* $( { $( $fields )* } )? => ${index()},
+                    )*
+                }
+            }
+
+            #[inline]
+            unsafe fn from_usize(tag: usize) -> Self {
+                match tag {
+                    $(
+                        ${index()} => $($path)::* $( { $( $fields )* } )?,
+                    )*
+
+                    // Safety:
+                    // `into_usize` only returns `${index()}` of the same
+                    // repetition as we are filtering above, thus if this is
+                    // reached, the safety contract of this function was
+                    // already breached.
+                    _ => unsafe {
+                        debug_assert!(
+                            false,
+                            "invalid tag: {tag}\
+                             (this is a bug in the caller of `from_usize`)"
+                        );
+                        std::hint::unreachable_unchecked()
+                    },
+                }
+            }
+
+        }
+    };
+}
+
+#[cfg(test)]
+mod tests;
diff --git a/compiler/rustc_data_structures/src/tagged_ptr/impl_tag/tests.rs b/compiler/rustc_data_structures/src/tagged_ptr/impl_tag/tests.rs
new file mode 100644
index 000000000..62c926153
--- /dev/null
+++ b/compiler/rustc_data_structures/src/tagged_ptr/impl_tag/tests.rs
@@ -0,0 +1,34 @@
+#[test]
+fn bits_constant() {
+    use crate::tagged_ptr::Tag;
+
+    #[derive(Copy, Clone)]
+    struct Unit;
+    impl_tag! { impl Tag for Unit; Unit, }
+    assert_eq!(Unit::BITS, 0);
+
+    #[derive(Copy, Clone)]
+    enum Enum3 {
+        A,
+        B,
+        C,
+    }
+    impl_tag! { impl Tag for Enum3; Enum3::A, Enum3::B, Enum3::C, }
+    assert_eq!(Enum3::BITS, 2);
+
+    #[derive(Copy, Clone)]
+    struct Eight(bool, bool, bool);
+    impl_tag! {
+        impl Tag for Eight;
+        Eight { 0: true,  1: true,  2: true  },
+        Eight { 0: true,  1: true,  2: false },
+        Eight { 0: true,  1: false, 2: true  },
+        Eight { 0: true,  1: false, 2: false },
+        Eight { 0: false, 1: true,  2: true  },
+        Eight { 0: false, 1: true,  2: false },
+        Eight { 0: false, 1: false, 2: true  },
+        Eight { 0: false, 1: false, 2: false },
+    }
+
+    assert_eq!(Eight::BITS, 3);
+}
diff --git a/compiler/rustc_data_structures/src/vec_linked_list.rs b/compiler/rustc_data_structures/src/vec_linked_list.rs
index ce60d40b2..fda72c9a3 100644
--- a/compiler/rustc_data_structures/src/vec_linked_list.rs
+++ b/compiler/rustc_data_structures/src/vec_linked_list.rs
@@ -1,4 +1,4 @@
-use rustc_index::vec::{Idx, IndexVec};
+use rustc_index::{Idx, IndexVec};
 
 pub fn iter<Ls>(
     first: Option<Ls::LinkIndex>,
diff --git a/compiler/rustc_data_structures/src/work_queue.rs b/compiler/rustc_data_structures/src/work_queue.rs
index 10317f1af..9db6b6f20 100644
--- a/compiler/rustc_data_structures/src/work_queue.rs
+++ b/compiler/rustc_data_structures/src/work_queue.rs
@@ -1,5 +1,5 @@
 use rustc_index::bit_set::BitSet;
-use rustc_index::vec::Idx;
+use rustc_index::Idx;
 use std::collections::VecDeque;
 
 /// A work queue is a handy data structure for tracking work left to