From 698f8c2f01ea549d77d7dc3338a12e04c11057b9 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 17 Apr 2024 14:02:58 +0200 Subject: Adding upstream version 1.64.0+dfsg1. Signed-off-by: Daniel Baumann --- compiler/rustc_data_structures/Cargo.toml | 41 + compiler/rustc_data_structures/src/atomic_ref.rs | 26 + compiler/rustc_data_structures/src/base_n.rs | 42 + compiler/rustc_data_structures/src/base_n/tests.rs | 24 + .../src/binary_search_util/mod.rs | 68 ++ .../src/binary_search_util/tests.rs | 23 + compiler/rustc_data_structures/src/captures.rs | 8 + compiler/rustc_data_structures/src/fingerprint.rs | 215 ++++ .../rustc_data_structures/src/fingerprint/tests.rs | 14 + compiler/rustc_data_structures/src/flock.rs | 26 + compiler/rustc_data_structures/src/flock/linux.rs | 40 + compiler/rustc_data_structures/src/flock/unix.rs | 51 + .../rustc_data_structures/src/flock/unsupported.rs | 16 + .../rustc_data_structures/src/flock/windows.rs | 77 ++ compiler/rustc_data_structures/src/frozen.rs | 64 ++ compiler/rustc_data_structures/src/functor.rs | 99 ++ compiler/rustc_data_structures/src/fx.rs | 14 + .../src/graph/dominators/mod.rs | 324 ++++++ .../src/graph/dominators/tests.rs | 45 + .../src/graph/implementation/mod.rs | 366 ++++++ .../src/graph/implementation/tests.rs | 131 +++ .../rustc_data_structures/src/graph/iterate/mod.rs | 353 ++++++ .../src/graph/iterate/tests.rs | 38 + compiler/rustc_data_structures/src/graph/mod.rs | 81 ++ .../rustc_data_structures/src/graph/reference.rs | 39 + .../rustc_data_structures/src/graph/scc/mod.rs | 567 +++++++++ .../rustc_data_structures/src/graph/scc/tests.rs | 216 ++++ compiler/rustc_data_structures/src/graph/tests.rs | 73 ++ .../src/graph/vec_graph/mod.rs | 109 ++ .../src/graph/vec_graph/tests.rs | 42 + compiler/rustc_data_structures/src/intern.rs | 196 ++++ compiler/rustc_data_structures/src/intern/tests.rs | 59 + compiler/rustc_data_structures/src/jobserver.rs | 40 + compiler/rustc_data_structures/src/lib.rs | 113 ++ compiler/rustc_data_structures/src/macros.rs | 37 + compiler/rustc_data_structures/src/map_in_place.rs | 108 ++ compiler/rustc_data_structures/src/memmap.rs | 108 ++ .../src/obligation_forest/graphviz.rs | 90 ++ .../src/obligation_forest/mod.rs | 698 +++++++++++ .../src/obligation_forest/tests.rs | 479 ++++++++ .../rustc_data_structures/src/owning_ref/LICENSE | 21 + .../rustc_data_structures/src/owning_ref/mod.rs | 1214 ++++++++++++++++++++ .../rustc_data_structures/src/owning_ref/tests.rs | 711 ++++++++++++ compiler/rustc_data_structures/src/profiling.rs | 861 ++++++++++++++ compiler/rustc_data_structures/src/sharded.rs | 150 +++ compiler/rustc_data_structures/src/sip128.rs | 496 ++++++++ compiler/rustc_data_structures/src/sip128/tests.rs | 497 ++++++++ compiler/rustc_data_structures/src/small_c_str.rs | 81 ++ .../rustc_data_structures/src/small_c_str/tests.rs | 45 + compiler/rustc_data_structures/src/small_str.rs | 68 ++ .../rustc_data_structures/src/small_str/tests.rs | 20 + .../rustc_data_structures/src/snapshot_map/mod.rs | 143 +++ .../src/snapshot_map/tests.rs | 43 + compiler/rustc_data_structures/src/sorted_map.rs | 302 +++++ .../src/sorted_map/index_map.rs | 154 +++ .../rustc_data_structures/src/sorted_map/tests.rs | 222 ++++ .../rustc_data_structures/src/sso/either_iter.rs | 75 ++ compiler/rustc_data_structures/src/sso/map.rs | 557 +++++++++ compiler/rustc_data_structures/src/sso/mod.rs | 6 + compiler/rustc_data_structures/src/sso/set.rs | 238 ++++ .../rustc_data_structures/src/stable_hasher.rs | 650 +++++++++++ .../src/stable_hasher/tests.rs | 163 +++ compiler/rustc_data_structures/src/stack.rs | 18 + compiler/rustc_data_structures/src/steal.rs | 55 + compiler/rustc_data_structures/src/svh.rs | 69 ++ compiler/rustc_data_structures/src/sync.rs | 630 ++++++++++ compiler/rustc_data_structures/src/tagged_ptr.rs | 168 +++ .../rustc_data_structures/src/tagged_ptr/copy.rs | 185 +++ .../rustc_data_structures/src/tagged_ptr/drop.rs | 133 +++ compiler/rustc_data_structures/src/temp_dir.rs | 34 + compiler/rustc_data_structures/src/thin_vec.rs | 135 +++ .../rustc_data_structures/src/thin_vec/tests.rs | 42 + compiler/rustc_data_structures/src/tiny_list.rs | 81 ++ .../rustc_data_structures/src/tiny_list/tests.rs | 155 +++ .../src/transitive_relation.rs | 392 +++++++ .../src/transitive_relation/tests.rs | 362 ++++++ compiler/rustc_data_structures/src/unhash.rs | 29 + .../rustc_data_structures/src/vec_linked_list.rs | 70 ++ compiler/rustc_data_structures/src/vec_map.rs | 194 ++++ .../rustc_data_structures/src/vec_map/tests.rs | 48 + compiler/rustc_data_structures/src/work_queue.rs | 44 + 81 files changed, 14721 insertions(+) create mode 100644 compiler/rustc_data_structures/Cargo.toml create mode 100644 compiler/rustc_data_structures/src/atomic_ref.rs create mode 100644 compiler/rustc_data_structures/src/base_n.rs create mode 100644 compiler/rustc_data_structures/src/base_n/tests.rs create mode 100644 compiler/rustc_data_structures/src/binary_search_util/mod.rs create mode 100644 compiler/rustc_data_structures/src/binary_search_util/tests.rs create mode 100644 compiler/rustc_data_structures/src/captures.rs create mode 100644 compiler/rustc_data_structures/src/fingerprint.rs create mode 100644 compiler/rustc_data_structures/src/fingerprint/tests.rs create mode 100644 compiler/rustc_data_structures/src/flock.rs create mode 100644 compiler/rustc_data_structures/src/flock/linux.rs create mode 100644 compiler/rustc_data_structures/src/flock/unix.rs create mode 100644 compiler/rustc_data_structures/src/flock/unsupported.rs create mode 100644 compiler/rustc_data_structures/src/flock/windows.rs create mode 100644 compiler/rustc_data_structures/src/frozen.rs create mode 100644 compiler/rustc_data_structures/src/functor.rs create mode 100644 compiler/rustc_data_structures/src/fx.rs create mode 100644 compiler/rustc_data_structures/src/graph/dominators/mod.rs create mode 100644 compiler/rustc_data_structures/src/graph/dominators/tests.rs create mode 100644 compiler/rustc_data_structures/src/graph/implementation/mod.rs create mode 100644 compiler/rustc_data_structures/src/graph/implementation/tests.rs create mode 100644 compiler/rustc_data_structures/src/graph/iterate/mod.rs create mode 100644 compiler/rustc_data_structures/src/graph/iterate/tests.rs create mode 100644 compiler/rustc_data_structures/src/graph/mod.rs create mode 100644 compiler/rustc_data_structures/src/graph/reference.rs create mode 100644 compiler/rustc_data_structures/src/graph/scc/mod.rs create mode 100644 compiler/rustc_data_structures/src/graph/scc/tests.rs create mode 100644 compiler/rustc_data_structures/src/graph/tests.rs create mode 100644 compiler/rustc_data_structures/src/graph/vec_graph/mod.rs create mode 100644 compiler/rustc_data_structures/src/graph/vec_graph/tests.rs create mode 100644 compiler/rustc_data_structures/src/intern.rs create mode 100644 compiler/rustc_data_structures/src/intern/tests.rs create mode 100644 compiler/rustc_data_structures/src/jobserver.rs create mode 100644 compiler/rustc_data_structures/src/lib.rs create mode 100644 compiler/rustc_data_structures/src/macros.rs create mode 100644 compiler/rustc_data_structures/src/map_in_place.rs create mode 100644 compiler/rustc_data_structures/src/memmap.rs create mode 100644 compiler/rustc_data_structures/src/obligation_forest/graphviz.rs create mode 100644 compiler/rustc_data_structures/src/obligation_forest/mod.rs create mode 100644 compiler/rustc_data_structures/src/obligation_forest/tests.rs create mode 100644 compiler/rustc_data_structures/src/owning_ref/LICENSE create mode 100644 compiler/rustc_data_structures/src/owning_ref/mod.rs create mode 100644 compiler/rustc_data_structures/src/owning_ref/tests.rs create mode 100644 compiler/rustc_data_structures/src/profiling.rs create mode 100644 compiler/rustc_data_structures/src/sharded.rs create mode 100644 compiler/rustc_data_structures/src/sip128.rs create mode 100644 compiler/rustc_data_structures/src/sip128/tests.rs create mode 100644 compiler/rustc_data_structures/src/small_c_str.rs create mode 100644 compiler/rustc_data_structures/src/small_c_str/tests.rs create mode 100644 compiler/rustc_data_structures/src/small_str.rs create mode 100644 compiler/rustc_data_structures/src/small_str/tests.rs create mode 100644 compiler/rustc_data_structures/src/snapshot_map/mod.rs create mode 100644 compiler/rustc_data_structures/src/snapshot_map/tests.rs create mode 100644 compiler/rustc_data_structures/src/sorted_map.rs create mode 100644 compiler/rustc_data_structures/src/sorted_map/index_map.rs create mode 100644 compiler/rustc_data_structures/src/sorted_map/tests.rs create mode 100644 compiler/rustc_data_structures/src/sso/either_iter.rs create mode 100644 compiler/rustc_data_structures/src/sso/map.rs create mode 100644 compiler/rustc_data_structures/src/sso/mod.rs create mode 100644 compiler/rustc_data_structures/src/sso/set.rs create mode 100644 compiler/rustc_data_structures/src/stable_hasher.rs create mode 100644 compiler/rustc_data_structures/src/stable_hasher/tests.rs create mode 100644 compiler/rustc_data_structures/src/stack.rs create mode 100644 compiler/rustc_data_structures/src/steal.rs create mode 100644 compiler/rustc_data_structures/src/svh.rs create mode 100644 compiler/rustc_data_structures/src/sync.rs create mode 100644 compiler/rustc_data_structures/src/tagged_ptr.rs create mode 100644 compiler/rustc_data_structures/src/tagged_ptr/copy.rs create mode 100644 compiler/rustc_data_structures/src/tagged_ptr/drop.rs create mode 100644 compiler/rustc_data_structures/src/temp_dir.rs create mode 100644 compiler/rustc_data_structures/src/thin_vec.rs create mode 100644 compiler/rustc_data_structures/src/thin_vec/tests.rs create mode 100644 compiler/rustc_data_structures/src/tiny_list.rs create mode 100644 compiler/rustc_data_structures/src/tiny_list/tests.rs create mode 100644 compiler/rustc_data_structures/src/transitive_relation.rs create mode 100644 compiler/rustc_data_structures/src/transitive_relation/tests.rs create mode 100644 compiler/rustc_data_structures/src/unhash.rs create mode 100644 compiler/rustc_data_structures/src/vec_linked_list.rs create mode 100644 compiler/rustc_data_structures/src/vec_map.rs create mode 100644 compiler/rustc_data_structures/src/vec_map/tests.rs create mode 100644 compiler/rustc_data_structures/src/work_queue.rs (limited to 'compiler/rustc_data_structures') diff --git a/compiler/rustc_data_structures/Cargo.toml b/compiler/rustc_data_structures/Cargo.toml new file mode 100644 index 000000000..5c641f54f --- /dev/null +++ b/compiler/rustc_data_structures/Cargo.toml @@ -0,0 +1,41 @@ +[package] +name = "rustc_data_structures" +version = "0.0.0" +edition = "2021" + +[lib] +doctest = false + +[dependencies] +arrayvec = { version = "0.7", default-features = false } +ena = "0.14" +indexmap = { version = "1.9.1" } +tracing = "0.1" +jobserver_crate = { version = "0.1.13", package = "jobserver" } +rustc_serialize = { path = "../rustc_serialize" } +rustc_macros = { path = "../rustc_macros" } +rustc_graphviz = { path = "../rustc_graphviz" } +cfg-if = "0.1.2" +stable_deref_trait = "1.0.0" +rayon = { version = "0.4.0", package = "rustc-rayon", optional = true } +rayon-core = { version = "0.4.0", package = "rustc-rayon-core", optional = true } +rustc-hash = "1.1.0" +smallvec = { version = "1.8.1", features = ["const_generics", "union", "may_dangle"] } +rustc_index = { path = "../rustc_index", package = "rustc_index" } +bitflags = "1.2.1" +measureme = "10.0.0" +libc = "0.2" +stacker = "0.1.14" +tempfile = "3.2" + +[dependencies.parking_lot] +version = "0.11" + +[target.'cfg(windows)'.dependencies] +winapi = { version = "0.3", features = ["fileapi", "psapi", "winerror"] } + +[target.'cfg(not(target_arch = "wasm32"))'.dependencies] +memmap2 = "0.2.1" + +[features] +rustc_use_parallel_compiler = ["indexmap/rustc-rayon", "rayon", "rayon-core"] diff --git a/compiler/rustc_data_structures/src/atomic_ref.rs b/compiler/rustc_data_structures/src/atomic_ref.rs new file mode 100644 index 000000000..eeb1b3092 --- /dev/null +++ b/compiler/rustc_data_structures/src/atomic_ref.rs @@ -0,0 +1,26 @@ +use std::marker::PhantomData; +use std::sync::atomic::{AtomicPtr, Ordering}; + +/// This is essentially an `AtomicPtr` but is guaranteed to always be valid +pub struct AtomicRef(AtomicPtr, PhantomData<&'static T>); + +impl AtomicRef { + pub const fn new(initial: &'static T) -> AtomicRef { + AtomicRef(AtomicPtr::new(initial as *const T as *mut T), PhantomData) + } + + pub fn swap(&self, new: &'static T) -> &'static T { + // We never allow storing anything but a `'static` reference so it's safe to + // return it for the same. + unsafe { &*self.0.swap(new as *const T as *mut T, Ordering::SeqCst) } + } +} + +impl std::ops::Deref for AtomicRef { + type Target = T; + fn deref(&self) -> &Self::Target { + // We never allow storing anything but a `'static` reference so it's safe to lend + // it out for any amount of time. + unsafe { &*self.0.load(Ordering::SeqCst) } + } +} diff --git a/compiler/rustc_data_structures/src/base_n.rs b/compiler/rustc_data_structures/src/base_n.rs new file mode 100644 index 000000000..3c7bea271 --- /dev/null +++ b/compiler/rustc_data_structures/src/base_n.rs @@ -0,0 +1,42 @@ +/// Converts unsigned integers into a string representation with some base. +/// Bases up to and including 36 can be used for case-insensitive things. +use std::str; + +#[cfg(test)] +mod tests; + +pub const MAX_BASE: usize = 64; +pub const ALPHANUMERIC_ONLY: usize = 62; +pub const CASE_INSENSITIVE: usize = 36; + +const BASE_64: &[u8; MAX_BASE as usize] = + b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ@$"; + +#[inline] +pub fn push_str(mut n: u128, base: usize, output: &mut String) { + debug_assert!(base >= 2 && base <= MAX_BASE); + let mut s = [0u8; 128]; + let mut index = 0; + + let base = base as u128; + + loop { + s[index] = BASE_64[(n % base) as usize]; + index += 1; + n /= base; + + if n == 0 { + break; + } + } + s[0..index].reverse(); + + output.push_str(str::from_utf8(&s[0..index]).unwrap()); +} + +#[inline] +pub fn encode(n: u128, base: usize) -> String { + let mut s = String::new(); + push_str(n, base, &mut s); + s +} diff --git a/compiler/rustc_data_structures/src/base_n/tests.rs b/compiler/rustc_data_structures/src/base_n/tests.rs new file mode 100644 index 000000000..2be2f0532 --- /dev/null +++ b/compiler/rustc_data_structures/src/base_n/tests.rs @@ -0,0 +1,24 @@ +use super::*; + +#[test] +fn test_encode() { + fn test(n: u128, base: usize) { + assert_eq!(Ok(n), u128::from_str_radix(&encode(n, base), base as u32)); + } + + for base in 2..37 { + test(0, base); + test(1, base); + test(35, base); + test(36, base); + test(37, base); + test(u64::MAX as u128, base); + test(u128::MAX, base); + + const N: u128 = if cfg!(miri) { 10 } else { 1000 }; + + for i in 0..N { + test(i * 983, base); + } + } +} diff --git a/compiler/rustc_data_structures/src/binary_search_util/mod.rs b/compiler/rustc_data_structures/src/binary_search_util/mod.rs new file mode 100644 index 000000000..d40172a2e --- /dev/null +++ b/compiler/rustc_data_structures/src/binary_search_util/mod.rs @@ -0,0 +1,68 @@ +#[cfg(test)] +mod tests; + +/// Uses a sorted slice `data: &[E]` as a kind of "multi-map". The +/// `key_fn` extracts a key of type `K` from the data, and this +/// function finds the range of elements that match the key. `data` +/// must have been sorted as if by a call to `sort_by_key` for this to +/// work. +pub fn binary_search_slice<'d, E, K>(data: &'d [E], key_fn: impl Fn(&E) -> K, key: &K) -> &'d [E] +where + K: Ord, +{ + let Ok(mid) = data.binary_search_by_key(key, &key_fn) else { + return &[]; + }; + let size = data.len(); + + // We get back *some* element with the given key -- so do + // a galloping search backwards to find the *first* one. + let mut start = mid; + let mut previous = mid; + let mut step = 1; + loop { + start = start.saturating_sub(step); + if start == 0 || key_fn(&data[start]) != *key { + break; + } + previous = start; + step *= 2; + } + step = previous - start; + while step > 1 { + let half = step / 2; + let mid = start + half; + if key_fn(&data[mid]) != *key { + start = mid; + } + step -= half; + } + // adjust by one if we have overshot + if start < size && key_fn(&data[start]) != *key { + start += 1; + } + + // Now search forward to find the *last* one. + let mut end = mid; + let mut previous = mid; + let mut step = 1; + loop { + end = end.saturating_add(step).min(size); + if end == size || key_fn(&data[end]) != *key { + break; + } + previous = end; + step *= 2; + } + step = end - previous; + while step > 1 { + let half = step / 2; + let mid = end - half; + if key_fn(&data[mid]) != *key { + end = mid; + } + step -= half; + } + + &data[start..end] +} diff --git a/compiler/rustc_data_structures/src/binary_search_util/tests.rs b/compiler/rustc_data_structures/src/binary_search_util/tests.rs new file mode 100644 index 000000000..d74febb5c --- /dev/null +++ b/compiler/rustc_data_structures/src/binary_search_util/tests.rs @@ -0,0 +1,23 @@ +use super::*; + +type Element = (usize, &'static str); + +fn test_map() -> Vec { + let mut data = vec![(3, "three-a"), (0, "zero"), (3, "three-b"), (22, "twenty-two")]; + data.sort_by_key(get_key); + data +} + +fn get_key(data: &Element) -> usize { + data.0 +} + +#[test] +fn binary_search_slice_test() { + let map = test_map(); + assert_eq!(binary_search_slice(&map, get_key, &0), &[(0, "zero")]); + assert_eq!(binary_search_slice(&map, get_key, &1), &[]); + assert_eq!(binary_search_slice(&map, get_key, &3), &[(3, "three-a"), (3, "three-b")]); + assert_eq!(binary_search_slice(&map, get_key, &22), &[(22, "twenty-two")]); + assert_eq!(binary_search_slice(&map, get_key, &23), &[]); +} diff --git a/compiler/rustc_data_structures/src/captures.rs b/compiler/rustc_data_structures/src/captures.rs new file mode 100644 index 000000000..677ccb314 --- /dev/null +++ b/compiler/rustc_data_structures/src/captures.rs @@ -0,0 +1,8 @@ +/// "Signaling" trait used in impl trait to tag lifetimes that you may +/// need to capture but don't really need for other reasons. +/// Basically a workaround; see [this comment] for details. +/// +/// [this comment]: https://github.com/rust-lang/rust/issues/34511#issuecomment-373423999 +pub trait Captures<'a> {} + +impl<'a, T: ?Sized> Captures<'a> for T {} diff --git a/compiler/rustc_data_structures/src/fingerprint.rs b/compiler/rustc_data_structures/src/fingerprint.rs new file mode 100644 index 000000000..5ff2d18dd --- /dev/null +++ b/compiler/rustc_data_structures/src/fingerprint.rs @@ -0,0 +1,215 @@ +use crate::stable_hasher; +use rustc_serialize::{Decodable, Decoder, Encodable, Encoder}; +use std::convert::TryInto; +use std::hash::{Hash, Hasher}; + +#[cfg(test)] +mod tests; + +#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy)] +#[repr(C)] +pub struct Fingerprint(u64, u64); + +impl Fingerprint { + pub const ZERO: Fingerprint = Fingerprint(0, 0); + + #[inline] + pub fn new(_0: u64, _1: u64) -> Fingerprint { + Fingerprint(_0, _1) + } + + #[inline] + pub fn from_smaller_hash(hash: u64) -> Fingerprint { + Fingerprint(hash, hash) + } + + #[inline] + pub fn to_smaller_hash(&self) -> u64 { + // Even though both halves of the fingerprint are expected to be good + // quality hash values, let's still combine the two values because the + // Fingerprints in DefPathHash have the StableCrateId portion which is + // the same for all DefPathHashes from the same crate. Combining the + // two halfs makes sure we get a good quality hash in such cases too. + self.0.wrapping_mul(3).wrapping_add(self.1) + } + + #[inline] + pub fn as_value(&self) -> (u64, u64) { + (self.0, self.1) + } + + #[inline] + pub fn combine(self, other: Fingerprint) -> Fingerprint { + // See https://stackoverflow.com/a/27952689 on why this function is + // implemented this way. + Fingerprint( + self.0.wrapping_mul(3).wrapping_add(other.0), + self.1.wrapping_mul(3).wrapping_add(other.1), + ) + } + + // Combines two hashes in an order independent way. Make sure this is what + // you want. + #[inline] + pub fn combine_commutative(self, other: Fingerprint) -> Fingerprint { + let a = u128::from(self.1) << 64 | u128::from(self.0); + let b = u128::from(other.1) << 64 | u128::from(other.0); + + let c = a.wrapping_add(b); + + Fingerprint(c as u64, (c >> 64) as u64) + } + + pub fn to_hex(&self) -> String { + format!("{:x}{:x}", self.0, self.1) + } + + #[inline] + pub fn to_le_bytes(&self) -> [u8; 16] { + // This seems to optimize to the same machine code as + // `unsafe { mem::transmute(*k) }`. Well done, LLVM! :) + let mut result = [0u8; 16]; + + let first_half: &mut [u8; 8] = (&mut result[0..8]).try_into().unwrap(); + *first_half = self.0.to_le_bytes(); + + let second_half: &mut [u8; 8] = (&mut result[8..16]).try_into().unwrap(); + *second_half = self.1.to_le_bytes(); + + result + } + + #[inline] + pub fn from_le_bytes(bytes: [u8; 16]) -> Fingerprint { + Fingerprint( + u64::from_le_bytes(bytes[0..8].try_into().unwrap()), + u64::from_le_bytes(bytes[8..16].try_into().unwrap()), + ) + } +} + +impl std::fmt::Display for Fingerprint { + fn fmt(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(formatter, "{:x}-{:x}", self.0, self.1) + } +} + +impl Hash for Fingerprint { + #[inline] + fn hash(&self, state: &mut H) { + state.write_fingerprint(self); + } +} + +trait FingerprintHasher { + fn write_fingerprint(&mut self, fingerprint: &Fingerprint); +} + +impl FingerprintHasher for H { + #[inline] + default fn write_fingerprint(&mut self, fingerprint: &Fingerprint) { + self.write_u64(fingerprint.0); + self.write_u64(fingerprint.1); + } +} + +impl FingerprintHasher for crate::unhash::Unhasher { + #[inline] + fn write_fingerprint(&mut self, fingerprint: &Fingerprint) { + // Even though both halves of the fingerprint are expected to be good + // quality hash values, let's still combine the two values because the + // Fingerprints in DefPathHash have the StableCrateId portion which is + // the same for all DefPathHashes from the same crate. Combining the + // two halfs makes sure we get a good quality hash in such cases too. + // + // Since `Unhasher` is used only in the context of HashMaps, it is OK + // to combine the two components in an order-independent way (which is + // cheaper than the more robust Fingerprint::to_smaller_hash()). For + // HashMaps we don't really care if Fingerprint(x,y) and + // Fingerprint(y, x) result in the same hash value. Collision + // probability will still be much better than with FxHash. + self.write_u64(fingerprint.0.wrapping_add(fingerprint.1)); + } +} + +impl stable_hasher::StableHasherResult for Fingerprint { + #[inline] + fn finish(hasher: stable_hasher::StableHasher) -> Self { + let (_0, _1) = hasher.finalize(); + Fingerprint(_0, _1) + } +} + +impl_stable_hash_via_hash!(Fingerprint); + +impl Encodable for Fingerprint { + #[inline] + fn encode(&self, s: &mut E) { + s.emit_raw_bytes(&self.to_le_bytes()); + } +} + +impl Decodable for Fingerprint { + #[inline] + fn decode(d: &mut D) -> Self { + Fingerprint::from_le_bytes(d.read_raw_bytes(16).try_into().unwrap()) + } +} + +// `PackedFingerprint` wraps a `Fingerprint`. Its purpose is to, on certain +// architectures, behave like a `Fingerprint` without alignment requirements. +// This behavior is only enabled on x86 and x86_64, where the impact of +// unaligned accesses is tolerable in small doses. +// +// This may be preferable to use in large collections of structs containing +// fingerprints, as it can reduce memory consumption by preventing the padding +// that the more strictly-aligned `Fingerprint` can introduce. An application of +// this is in the query dependency graph, which contains a large collection of +// `DepNode`s. As of this writing, the size of a `DepNode` decreases by ~30% +// (from 24 bytes to 17) by using the packed representation here, which +// noticeably decreases total memory usage when compiling large crates. +// +// The wrapped `Fingerprint` is private to reduce the chance of a client +// invoking undefined behavior by taking a reference to the packed field. +#[cfg_attr(any(target_arch = "x86", target_arch = "x86_64"), repr(packed))] +#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash)] +pub struct PackedFingerprint(Fingerprint); + +impl std::fmt::Display for PackedFingerprint { + #[inline] + fn fmt(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + // Copy to avoid taking reference to packed field. + let copy = self.0; + copy.fmt(formatter) + } +} + +impl Encodable for PackedFingerprint { + #[inline] + fn encode(&self, s: &mut E) { + // Copy to avoid taking reference to packed field. + let copy = self.0; + copy.encode(s); + } +} + +impl Decodable for PackedFingerprint { + #[inline] + fn decode(d: &mut D) -> Self { + Self(Fingerprint::decode(d)) + } +} + +impl From for PackedFingerprint { + #[inline] + fn from(f: Fingerprint) -> PackedFingerprint { + PackedFingerprint(f) + } +} + +impl From for Fingerprint { + #[inline] + fn from(f: PackedFingerprint) -> Fingerprint { + f.0 + } +} diff --git a/compiler/rustc_data_structures/src/fingerprint/tests.rs b/compiler/rustc_data_structures/src/fingerprint/tests.rs new file mode 100644 index 000000000..9b0783e33 --- /dev/null +++ b/compiler/rustc_data_structures/src/fingerprint/tests.rs @@ -0,0 +1,14 @@ +use super::*; + +// Check that `combine_commutative` is order independent. +#[test] +fn combine_commutative_is_order_independent() { + let a = Fingerprint::new(0xf6622fb349898b06, 0x70be9377b2f9c610); + let b = Fingerprint::new(0xa9562bf5a2a5303c, 0x67d9b6c82034f13d); + let c = Fingerprint::new(0x0d013a27811dbbc3, 0x9a3f7b3d9142ec43); + let permutations = [(a, b, c), (a, c, b), (b, a, c), (b, c, a), (c, a, b), (c, b, a)]; + let f = a.combine_commutative(b).combine_commutative(c); + for p in &permutations { + assert_eq!(f, p.0.combine_commutative(p.1).combine_commutative(p.2)); + } +} diff --git a/compiler/rustc_data_structures/src/flock.rs b/compiler/rustc_data_structures/src/flock.rs new file mode 100644 index 000000000..e395d8dbb --- /dev/null +++ b/compiler/rustc_data_structures/src/flock.rs @@ -0,0 +1,26 @@ +//! Simple file-locking apis for each OS. +//! +//! This is not meant to be in the standard library, it does nothing with +//! green/native threading. This is just a bare-bones enough solution for +//! librustdoc, it is not production quality at all. + +#![allow(non_camel_case_types)] +#![allow(nonstandard_style)] + +cfg_if! { + if #[cfg(target_os = "linux")] { + mod linux; + use linux as imp; + } else if #[cfg(unix)] { + mod unix; + use unix as imp; + } else if #[cfg(windows)] { + mod windows; + use windows as imp; + } else { + mod unsupported; + use unsupported as imp; + } +} + +pub use imp::Lock; diff --git a/compiler/rustc_data_structures/src/flock/linux.rs b/compiler/rustc_data_structures/src/flock/linux.rs new file mode 100644 index 000000000..bb3ecfbc3 --- /dev/null +++ b/compiler/rustc_data_structures/src/flock/linux.rs @@ -0,0 +1,40 @@ +//! We use `flock` rather than `fcntl` on Linux, because WSL1 does not support +//! `fcntl`-style advisory locks properly (rust-lang/rust#72157). For other Unix +//! targets we still use `fcntl` because it's more portable than `flock`. + +use std::fs::{File, OpenOptions}; +use std::io; +use std::os::unix::prelude::*; +use std::path::Path; + +#[derive(Debug)] +pub struct Lock { + _file: File, +} + +impl Lock { + pub fn new(p: &Path, wait: bool, create: bool, exclusive: bool) -> io::Result { + let file = OpenOptions::new() + .read(true) + .write(true) + .create(create) + .mode(libc::S_IRWXU as u32) + .open(p)?; + + let mut operation = if exclusive { libc::LOCK_EX } else { libc::LOCK_SH }; + if !wait { + operation |= libc::LOCK_NB + } + + let ret = unsafe { libc::flock(file.as_raw_fd(), operation) }; + if ret == -1 { Err(io::Error::last_os_error()) } else { Ok(Lock { _file: file }) } + } + + pub fn error_unsupported(err: &io::Error) -> bool { + matches!(err.raw_os_error(), Some(libc::ENOTSUP) | Some(libc::ENOSYS)) + } +} + +// Note that we don't need a Drop impl to execute `flock(fd, LOCK_UN)`. A lock acquired by +// `flock` is associated with the file descriptor and closing the file releases it +// automatically. diff --git a/compiler/rustc_data_structures/src/flock/unix.rs b/compiler/rustc_data_structures/src/flock/unix.rs new file mode 100644 index 000000000..4e5297d58 --- /dev/null +++ b/compiler/rustc_data_structures/src/flock/unix.rs @@ -0,0 +1,51 @@ +use std::fs::{File, OpenOptions}; +use std::io; +use std::mem; +use std::os::unix::prelude::*; +use std::path::Path; + +#[derive(Debug)] +pub struct Lock { + file: File, +} + +impl Lock { + pub fn new(p: &Path, wait: bool, create: bool, exclusive: bool) -> io::Result { + let file = OpenOptions::new() + .read(true) + .write(true) + .create(create) + .mode(libc::S_IRWXU as u32) + .open(p)?; + + let lock_type = if exclusive { libc::F_WRLCK } else { libc::F_RDLCK }; + + let mut flock: libc::flock = unsafe { mem::zeroed() }; + flock.l_type = lock_type as libc::c_short; + flock.l_whence = libc::SEEK_SET as libc::c_short; + flock.l_start = 0; + flock.l_len = 0; + + let cmd = if wait { libc::F_SETLKW } else { libc::F_SETLK }; + let ret = unsafe { libc::fcntl(file.as_raw_fd(), cmd, &flock) }; + if ret == -1 { Err(io::Error::last_os_error()) } else { Ok(Lock { file }) } + } + + pub fn error_unsupported(err: &io::Error) -> bool { + matches!(err.raw_os_error(), Some(libc::ENOTSUP) | Some(libc::ENOSYS)) + } +} + +impl Drop for Lock { + fn drop(&mut self) { + let mut flock: libc::flock = unsafe { mem::zeroed() }; + flock.l_type = libc::F_UNLCK as libc::c_short; + flock.l_whence = libc::SEEK_SET as libc::c_short; + flock.l_start = 0; + flock.l_len = 0; + + unsafe { + libc::fcntl(self.file.as_raw_fd(), libc::F_SETLK, &flock); + } + } +} diff --git a/compiler/rustc_data_structures/src/flock/unsupported.rs b/compiler/rustc_data_structures/src/flock/unsupported.rs new file mode 100644 index 000000000..9245fca37 --- /dev/null +++ b/compiler/rustc_data_structures/src/flock/unsupported.rs @@ -0,0 +1,16 @@ +use std::io; +use std::path::Path; + +#[derive(Debug)] +pub struct Lock(()); + +impl Lock { + pub fn new(_p: &Path, _wait: bool, _create: bool, _exclusive: bool) -> io::Result { + let msg = "file locks not supported on this platform"; + Err(io::Error::new(io::ErrorKind::Other, msg)) + } + + pub fn error_unsupported(_err: &io::Error) -> bool { + true + } +} diff --git a/compiler/rustc_data_structures/src/flock/windows.rs b/compiler/rustc_data_structures/src/flock/windows.rs new file mode 100644 index 000000000..43e6caaa1 --- /dev/null +++ b/compiler/rustc_data_structures/src/flock/windows.rs @@ -0,0 +1,77 @@ +use std::fs::{File, OpenOptions}; +use std::io; +use std::mem; +use std::os::windows::prelude::*; +use std::path::Path; + +use winapi::shared::winerror::ERROR_INVALID_FUNCTION; +use winapi::um::fileapi::LockFileEx; +use winapi::um::minwinbase::{LOCKFILE_EXCLUSIVE_LOCK, LOCKFILE_FAIL_IMMEDIATELY, OVERLAPPED}; +use winapi::um::winnt::{FILE_SHARE_DELETE, FILE_SHARE_READ, FILE_SHARE_WRITE}; + +#[derive(Debug)] +pub struct Lock { + _file: File, +} + +impl Lock { + pub fn new(p: &Path, wait: bool, create: bool, exclusive: bool) -> io::Result { + assert!( + p.parent().unwrap().exists(), + "Parent directory of lock-file must exist: {}", + p.display() + ); + + let share_mode = FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE; + + let mut open_options = OpenOptions::new(); + open_options.read(true).share_mode(share_mode); + + if create { + open_options.create(true).write(true); + } + + debug!("attempting to open lock file `{}`", p.display()); + let file = match open_options.open(p) { + Ok(file) => { + debug!("lock file opened successfully"); + file + } + Err(err) => { + debug!("error opening lock file: {}", err); + return Err(err); + } + }; + + let ret = unsafe { + let mut overlapped: OVERLAPPED = mem::zeroed(); + + let mut dwFlags = 0; + if !wait { + dwFlags |= LOCKFILE_FAIL_IMMEDIATELY; + } + + if exclusive { + dwFlags |= LOCKFILE_EXCLUSIVE_LOCK; + } + + debug!("attempting to acquire lock on lock file `{}`", p.display()); + LockFileEx(file.as_raw_handle(), dwFlags, 0, 0xFFFF_FFFF, 0xFFFF_FFFF, &mut overlapped) + }; + if ret == 0 { + let err = io::Error::last_os_error(); + debug!("failed acquiring file lock: {}", err); + Err(err) + } else { + debug!("successfully acquired lock"); + Ok(Lock { _file: file }) + } + } + + pub fn error_unsupported(err: &io::Error) -> bool { + err.raw_os_error() == Some(ERROR_INVALID_FUNCTION as i32) + } +} + +// Note that we don't need a Drop impl on Windows: The file is unlocked +// automatically when it's closed. diff --git a/compiler/rustc_data_structures/src/frozen.rs b/compiler/rustc_data_structures/src/frozen.rs new file mode 100644 index 000000000..c81e1b124 --- /dev/null +++ b/compiler/rustc_data_structures/src/frozen.rs @@ -0,0 +1,64 @@ +//! An immutable, owned value (except for interior mutability). +//! +//! The purpose of `Frozen` is to make a value immutable for the sake of defensive programming. For example, +//! suppose we have the following: +//! +//! ```rust +//! struct Bar { /* some data */ } +//! +//! struct Foo { +//! /// Some computed data that should never change after construction. +//! pub computed: Bar, +//! +//! /* some other fields */ +//! } +//! +//! impl Bar { +//! /// Mutate the `Bar`. +//! pub fn mutate(&mut self) { } +//! } +//! ``` +//! +//! Now suppose we want to pass around a mutable `Foo` instance but, we want to make sure that +//! `computed` does not change accidentally (e.g. somebody might accidentally call +//! `foo.computed.mutate()`). This is what `Frozen` is for. We can do the following: +//! +//! ``` +//! # struct Bar {} +//! use rustc_data_structures::frozen::Frozen; +//! +//! struct Foo { +//! /// Some computed data that should never change after construction. +//! pub computed: Frozen, +//! +//! /* some other fields */ +//! } +//! ``` +//! +//! `Frozen` impls `Deref`, so we can ergonomically call methods on `Bar`, but it doesn't `impl +//! DerefMut`. Now calling `foo.compute.mutate()` will result in a compile-time error stating that +//! `mutate` requires a mutable reference but we don't have one. +//! +//! # Caveats +//! +//! - `Frozen` doesn't try to defend against interior mutability (e.g. `Frozen>`). +//! - `Frozen` doesn't pin it's contents (e.g. one could still do `foo.computed = +//! Frozen::freeze(new_bar)`). + +/// An owned immutable value. +#[derive(Debug)] +pub struct Frozen(T); + +impl Frozen { + pub fn freeze(val: T) -> Self { + Frozen(val) + } +} + +impl std::ops::Deref for Frozen { + type Target = T; + + fn deref(&self) -> &T { + &self.0 + } +} diff --git a/compiler/rustc_data_structures/src/functor.rs b/compiler/rustc_data_structures/src/functor.rs new file mode 100644 index 000000000..a3d3f9883 --- /dev/null +++ b/compiler/rustc_data_structures/src/functor.rs @@ -0,0 +1,99 @@ +use rustc_index::vec::{Idx, IndexVec}; +use std::mem; + +pub trait IdFunctor: Sized { + type Inner; + + fn try_map_id(self, f: F) -> Result + where + F: FnMut(Self::Inner) -> Result; +} + +impl IdFunctor for Box { + type Inner = T; + + #[inline] + fn try_map_id(self, mut f: F) -> Result + where + F: FnMut(Self::Inner) -> Result, + { + let raw = Box::into_raw(self); + Ok(unsafe { + // SAFETY: The raw pointer points to a valid value of type `T`. + let value = raw.read(); + // SAFETY: Converts `Box` to `Box>` which is the + // inverse of `Box::assume_init()` and should be safe. + let raw: Box> = Box::from_raw(raw.cast()); + // SAFETY: Write the mapped value back into the `Box`. + Box::write(raw, f(value)?) + }) + } +} + +impl IdFunctor for Vec { + type Inner = T; + + #[inline] + fn try_map_id(self, mut f: F) -> Result + where + F: FnMut(Self::Inner) -> Result, + { + struct HoleVec { + vec: Vec>, + hole: Option, + } + + impl Drop for HoleVec { + fn drop(&mut self) { + unsafe { + for (index, slot) in self.vec.iter_mut().enumerate() { + if self.hole != Some(index) { + mem::ManuallyDrop::drop(slot); + } + } + } + } + } + + unsafe { + let (ptr, length, capacity) = self.into_raw_parts(); + let vec = Vec::from_raw_parts(ptr.cast(), length, capacity); + let mut hole_vec = HoleVec { vec, hole: None }; + + for (index, slot) in hole_vec.vec.iter_mut().enumerate() { + hole_vec.hole = Some(index); + let original = mem::ManuallyDrop::take(slot); + let mapped = f(original)?; + *slot = mem::ManuallyDrop::new(mapped); + hole_vec.hole = None; + } + + mem::forget(hole_vec); + Ok(Vec::from_raw_parts(ptr, length, capacity)) + } + } +} + +impl IdFunctor for Box<[T]> { + type Inner = T; + + #[inline] + fn try_map_id(self, f: F) -> Result + where + F: FnMut(Self::Inner) -> Result, + { + Vec::from(self).try_map_id(f).map(Into::into) + } +} + +impl IdFunctor for IndexVec { + type Inner = T; + + #[inline] + fn try_map_id(self, f: F) -> Result + where + F: FnMut(Self::Inner) -> Result, + { + self.raw.try_map_id(f).map(IndexVec::from_raw) + } +} diff --git a/compiler/rustc_data_structures/src/fx.rs b/compiler/rustc_data_structures/src/fx.rs new file mode 100644 index 000000000..bbeb193db --- /dev/null +++ b/compiler/rustc_data_structures/src/fx.rs @@ -0,0 +1,14 @@ +use std::hash::BuildHasherDefault; + +pub use rustc_hash::{FxHashMap, FxHashSet, FxHasher}; + +pub type FxIndexMap = indexmap::IndexMap>; +pub type FxIndexSet = indexmap::IndexSet>; + +#[macro_export] +macro_rules! define_id_collections { + ($map_name:ident, $set_name:ident, $key:ty) => { + pub type $map_name = $crate::fx::FxHashMap<$key, T>; + pub type $set_name = $crate::fx::FxHashSet<$key>; + }; +} diff --git a/compiler/rustc_data_structures/src/graph/dominators/mod.rs b/compiler/rustc_data_structures/src/graph/dominators/mod.rs new file mode 100644 index 000000000..00913a483 --- /dev/null +++ b/compiler/rustc_data_structures/src/graph/dominators/mod.rs @@ -0,0 +1,324 @@ +//! Finding the dominators in a control-flow graph. +//! +//! Algorithm based on Loukas Georgiadis, +//! "Linear-Time Algorithms for Dominators and Related Problems", +//! +//! +//! Additionally useful is the original Lengauer-Tarjan paper on this subject, +//! "A Fast Algorithm for Finding Dominators in a Flowgraph" +//! Thomas Lengauer and Robert Endre Tarjan. +//! + +use super::ControlFlowGraph; +use rustc_index::vec::{Idx, IndexVec}; +use std::cmp::Ordering; + +#[cfg(test)] +mod tests; + +struct PreOrderFrame { + pre_order_idx: PreorderIndex, + iter: Iter, +} + +rustc_index::newtype_index! { + struct PreorderIndex { .. } +} + +pub fn dominators(graph: G) -> Dominators { + // compute the post order index (rank) for each node + let mut post_order_rank = IndexVec::from_elem_n(0, graph.num_nodes()); + + // We allocate capacity for the full set of nodes, because most of the time + // most of the nodes *are* reachable. + let mut parent: IndexVec = + IndexVec::with_capacity(graph.num_nodes()); + + let mut stack = vec![PreOrderFrame { + pre_order_idx: PreorderIndex::new(0), + iter: graph.successors(graph.start_node()), + }]; + let mut pre_order_to_real: IndexVec = + IndexVec::with_capacity(graph.num_nodes()); + let mut real_to_pre_order: IndexVec> = + IndexVec::from_elem_n(None, graph.num_nodes()); + pre_order_to_real.push(graph.start_node()); + parent.push(PreorderIndex::new(0)); // the parent of the root node is the root for now. + real_to_pre_order[graph.start_node()] = Some(PreorderIndex::new(0)); + let mut post_order_idx = 0; + + // Traverse the graph, collecting a number of things: + // + // * Preorder mapping (to it, and back to the actual ordering) + // * Postorder mapping (used exclusively for rank_partial_cmp on the final product) + // * Parents for each vertex in the preorder tree + // + // These are all done here rather than through one of the 'standard' + // graph traversals to help make this fast. + 'recurse: while let Some(frame) = stack.last_mut() { + while let Some(successor) = frame.iter.next() { + if real_to_pre_order[successor].is_none() { + let pre_order_idx = pre_order_to_real.push(successor); + real_to_pre_order[successor] = Some(pre_order_idx); + parent.push(frame.pre_order_idx); + stack.push(PreOrderFrame { pre_order_idx, iter: graph.successors(successor) }); + + continue 'recurse; + } + } + post_order_rank[pre_order_to_real[frame.pre_order_idx]] = post_order_idx; + post_order_idx += 1; + + stack.pop(); + } + + let reachable_vertices = pre_order_to_real.len(); + + let mut idom = IndexVec::from_elem_n(PreorderIndex::new(0), reachable_vertices); + let mut semi = IndexVec::from_fn_n(std::convert::identity, reachable_vertices); + let mut label = semi.clone(); + let mut bucket = IndexVec::from_elem_n(vec![], reachable_vertices); + let mut lastlinked = None; + + // We loop over vertices in reverse preorder. This implements the pseudocode + // of the simple Lengauer-Tarjan algorithm. A few key facts are noted here + // which are helpful for understanding the code (full proofs and such are + // found in various papers, including one cited at the top of this file). + // + // For each vertex w (which is not the root), + // * semi[w] is a proper ancestor of the vertex w (i.e., semi[w] != w) + // * idom[w] is an ancestor of semi[w] (i.e., idom[w] may equal semi[w]) + // + // An immediate dominator of w (idom[w]) is a vertex v where v dominates w + // and every other dominator of w dominates v. (Every vertex except the root has + // a unique immediate dominator.) + // + // A semidominator for a given vertex w (semi[w]) is the vertex v with minimum + // preorder number such that there exists a path from v to w in which all elements (other than w) have + // preorder numbers greater than w (i.e., this path is not the tree path to + // w). + for w in (PreorderIndex::new(1)..PreorderIndex::new(reachable_vertices)).rev() { + // Optimization: process buckets just once, at the start of the + // iteration. Do not explicitly empty the bucket (even though it will + // not be used again), to save some instructions. + // + // The bucket here contains the vertices whose semidominator is the + // vertex w, which we are guaranteed to have found: all vertices who can + // be semidominated by w must have a preorder number exceeding w, so + // they have been placed in the bucket. + // + // We compute a partial set of immediate dominators here. + let z = parent[w]; + for &v in bucket[z].iter() { + // This uses the result of Lemma 5 from section 2 from the original + // 1979 paper, to compute either the immediate or relative dominator + // for a given vertex v. + // + // eval returns a vertex y, for which semi[y] is minimum among + // vertices semi[v] +> y *> v. Note that semi[v] = z as we're in the + // z bucket. + // + // Given such a vertex y, semi[y] <= semi[v] and idom[y] = idom[v]. + // If semi[y] = semi[v], though, idom[v] = semi[v]. + // + // Using this, we can either set idom[v] to be: + // * semi[v] (i.e. z), if semi[y] is z + // * idom[y], otherwise + // + // We don't directly set to idom[y] though as it's not necessarily + // known yet. The second preorder traversal will cleanup by updating + // the idom for any that were missed in this pass. + let y = eval(&mut parent, lastlinked, &semi, &mut label, v); + idom[v] = if semi[y] < z { y } else { z }; + } + + // This loop computes the semi[w] for w. + semi[w] = w; + for v in graph.predecessors(pre_order_to_real[w]) { + let v = real_to_pre_order[v].unwrap(); + + // eval returns a vertex x from which semi[x] is minimum among + // vertices semi[v] +> x *> v. + // + // From Lemma 4 from section 2, we know that the semidominator of a + // vertex w is the minimum (by preorder number) vertex of the + // following: + // + // * direct predecessors of w with preorder number less than w + // * semidominators of u such that u > w and there exists (v, w) + // such that u *> v + // + // This loop therefore identifies such a minima. Note that any + // semidominator path to w must have all but the first vertex go + // through vertices numbered greater than w, so the reverse preorder + // traversal we are using guarantees that all of the information we + // might need is available at this point. + // + // The eval call will give us semi[x], which is either: + // + // * v itself, if v has not yet been processed + // * A possible 'best' semidominator for w. + let x = eval(&mut parent, lastlinked, &semi, &mut label, v); + semi[w] = std::cmp::min(semi[w], semi[x]); + } + // semi[w] is now semidominator(w) and won't change any more. + + // Optimization: Do not insert into buckets if parent[w] = semi[w], as + // we then immediately know the idom. + // + // If we don't yet know the idom directly, then push this vertex into + // our semidominator's bucket, where it will get processed at a later + // stage to compute its immediate dominator. + if parent[w] != semi[w] { + bucket[semi[w]].push(w); + } else { + idom[w] = parent[w]; + } + + // Optimization: We share the parent array between processed and not + // processed elements; lastlinked represents the divider. + lastlinked = Some(w); + } + + // Finalize the idoms for any that were not fully settable during initial + // traversal. + // + // If idom[w] != semi[w] then we know that we've stored vertex y from above + // into idom[w]. It is known to be our 'relative dominator', which means + // that it's one of w's ancestors and has the same immediate dominator as w, + // so use that idom. + for w in PreorderIndex::new(1)..PreorderIndex::new(reachable_vertices) { + if idom[w] != semi[w] { + idom[w] = idom[idom[w]]; + } + } + + let mut immediate_dominators = IndexVec::from_elem_n(None, graph.num_nodes()); + for (idx, node) in pre_order_to_real.iter_enumerated() { + immediate_dominators[*node] = Some(pre_order_to_real[idom[idx]]); + } + + Dominators { post_order_rank, immediate_dominators } +} + +/// Evaluate the link-eval virtual forest, providing the currently minimum semi +/// value for the passed `node` (which may be itself). +/// +/// This maintains that for every vertex v, `label[v]` is such that: +/// +/// ```text +/// semi[eval(v)] = min { semi[label[u]] | root_in_forest(v) +> u *> v } +/// ``` +/// +/// where `+>` is a proper ancestor and `*>` is just an ancestor. +#[inline] +fn eval( + ancestor: &mut IndexVec, + lastlinked: Option, + semi: &IndexVec, + label: &mut IndexVec, + node: PreorderIndex, +) -> PreorderIndex { + if is_processed(node, lastlinked) { + compress(ancestor, lastlinked, semi, label, node); + label[node] + } else { + node + } +} + +#[inline] +fn is_processed(v: PreorderIndex, lastlinked: Option) -> bool { + if let Some(ll) = lastlinked { v >= ll } else { false } +} + +#[inline] +fn compress( + ancestor: &mut IndexVec, + lastlinked: Option, + semi: &IndexVec, + label: &mut IndexVec, + v: PreorderIndex, +) { + assert!(is_processed(v, lastlinked)); + // Compute the processed list of ancestors + // + // We use a heap stack here to avoid recursing too deeply, exhausting the + // stack space. + let mut stack: smallvec::SmallVec<[_; 8]> = smallvec::smallvec![v]; + let mut u = ancestor[v]; + while is_processed(u, lastlinked) { + stack.push(u); + u = ancestor[u]; + } + + // Then in reverse order, popping the stack + for &[v, u] in stack.array_windows().rev() { + if semi[label[u]] < semi[label[v]] { + label[v] = label[u]; + } + ancestor[v] = ancestor[u]; + } +} + +#[derive(Clone, Debug)] +pub struct Dominators { + post_order_rank: IndexVec, + immediate_dominators: IndexVec>, +} + +impl Dominators { + pub fn dummy() -> Self { + Self { post_order_rank: IndexVec::new(), immediate_dominators: IndexVec::new() } + } + + pub fn is_reachable(&self, node: Node) -> bool { + self.immediate_dominators[node].is_some() + } + + pub fn immediate_dominator(&self, node: Node) -> Node { + assert!(self.is_reachable(node), "node {:?} is not reachable", node); + self.immediate_dominators[node].unwrap() + } + + pub fn dominators(&self, node: Node) -> Iter<'_, Node> { + assert!(self.is_reachable(node), "node {:?} is not reachable", node); + Iter { dominators: self, node: Some(node) } + } + + pub fn is_dominated_by(&self, node: Node, dom: Node) -> bool { + // FIXME -- could be optimized by using post-order-rank + self.dominators(node).any(|n| n == dom) + } + + /// Provide deterministic ordering of nodes such that, if any two nodes have a dominator + /// relationship, the dominator will always precede the dominated. (The relative ordering + /// of two unrelated nodes will also be consistent, but otherwise the order has no + /// meaning.) This method cannot be used to determine if either Node dominates the other. + pub fn rank_partial_cmp(&self, lhs: Node, rhs: Node) -> Option { + self.post_order_rank[lhs].partial_cmp(&self.post_order_rank[rhs]) + } +} + +pub struct Iter<'dom, Node: Idx> { + dominators: &'dom Dominators, + node: Option, +} + +impl<'dom, Node: Idx> Iterator for Iter<'dom, Node> { + type Item = Node; + + fn next(&mut self) -> Option { + if let Some(node) = self.node { + let dom = self.dominators.immediate_dominator(node); + if dom == node { + self.node = None; // reached the root + } else { + self.node = Some(dom); + } + Some(node) + } else { + None + } + } +} diff --git a/compiler/rustc_data_structures/src/graph/dominators/tests.rs b/compiler/rustc_data_structures/src/graph/dominators/tests.rs new file mode 100644 index 000000000..ff31d8f7f --- /dev/null +++ b/compiler/rustc_data_structures/src/graph/dominators/tests.rs @@ -0,0 +1,45 @@ +use super::*; + +use super::super::tests::TestGraph; + +#[test] +fn diamond() { + let graph = TestGraph::new(0, &[(0, 1), (0, 2), (1, 3), (2, 3)]); + + let dominators = dominators(&graph); + let immediate_dominators = &dominators.immediate_dominators; + assert_eq!(immediate_dominators[0], Some(0)); + assert_eq!(immediate_dominators[1], Some(0)); + assert_eq!(immediate_dominators[2], Some(0)); + assert_eq!(immediate_dominators[3], Some(0)); +} + +#[test] +fn paper() { + // example from the paper: + let graph = TestGraph::new( + 6, + &[(6, 5), (6, 4), (5, 1), (4, 2), (4, 3), (1, 2), (2, 3), (3, 2), (2, 1)], + ); + + let dominators = dominators(&graph); + let immediate_dominators = &dominators.immediate_dominators; + assert_eq!(immediate_dominators[0], None); // <-- note that 0 is not in graph + assert_eq!(immediate_dominators[1], Some(6)); + assert_eq!(immediate_dominators[2], Some(6)); + assert_eq!(immediate_dominators[3], Some(6)); + assert_eq!(immediate_dominators[4], Some(6)); + assert_eq!(immediate_dominators[5], Some(6)); + assert_eq!(immediate_dominators[6], Some(6)); +} + +#[test] +fn paper_slt() { + // example from the paper: + let graph = TestGraph::new( + 1, + &[(1, 2), (1, 3), (2, 3), (2, 7), (3, 4), (3, 6), (4, 5), (5, 4), (6, 7), (7, 8), (8, 5)], + ); + + dominators(&graph); +} diff --git a/compiler/rustc_data_structures/src/graph/implementation/mod.rs b/compiler/rustc_data_structures/src/graph/implementation/mod.rs new file mode 100644 index 000000000..1aa7ac024 --- /dev/null +++ b/compiler/rustc_data_structures/src/graph/implementation/mod.rs @@ -0,0 +1,366 @@ +//! A graph module for use in dataflow, region resolution, and elsewhere. +//! +//! # Interface details +//! +//! You customize the graph by specifying a "node data" type `N` and an +//! "edge data" type `E`. You can then later gain access (mutable or +//! immutable) to these "user-data" bits. Currently, you can only add +//! nodes or edges to the graph. You cannot remove or modify them once +//! added. This could be changed if we have a need. +//! +//! # Implementation details +//! +//! The main tricky thing about this code is the way that edges are +//! stored. The edges are stored in a central array, but they are also +//! threaded onto two linked lists for each node, one for incoming edges +//! and one for outgoing edges. Note that every edge is a member of some +//! incoming list and some outgoing list. Basically you can load the +//! first index of the linked list from the node data structures (the +//! field `first_edge`) and then, for each edge, load the next index from +//! the field `next_edge`). Each of those fields is an array that should +//! be indexed by the direction (see the type `Direction`). + +use crate::snapshot_vec::{SnapshotVec, SnapshotVecDelegate}; +use rustc_index::bit_set::BitSet; +use std::fmt::Debug; + +#[cfg(test)] +mod tests; + +pub struct Graph { + nodes: SnapshotVec>, + edges: SnapshotVec>, +} + +pub struct Node { + first_edge: [EdgeIndex; 2], // see module comment + pub data: N, +} + +#[derive(Debug)] +pub struct Edge { + next_edge: [EdgeIndex; 2], // see module comment + source: NodeIndex, + target: NodeIndex, + pub data: E, +} + +impl SnapshotVecDelegate for Node { + type Value = Node; + type Undo = (); + + fn reverse(_: &mut Vec>, _: ()) {} +} + +impl SnapshotVecDelegate for Edge { + type Value = Edge; + type Undo = (); + + fn reverse(_: &mut Vec>, _: ()) {} +} + +#[derive(Copy, Clone, PartialEq, Debug)] +pub struct NodeIndex(pub usize); + +#[derive(Copy, Clone, PartialEq, Debug)] +pub struct EdgeIndex(pub usize); + +pub const INVALID_EDGE_INDEX: EdgeIndex = EdgeIndex(usize::MAX); + +// Use a private field here to guarantee no more instances are created: +#[derive(Copy, Clone, Debug, PartialEq)] +pub struct Direction { + repr: usize, +} + +pub const OUTGOING: Direction = Direction { repr: 0 }; + +pub const INCOMING: Direction = Direction { repr: 1 }; + +impl NodeIndex { + /// Returns unique ID (unique with respect to the graph holding associated node). + pub fn node_id(self) -> usize { + self.0 + } +} + +impl Graph { + pub fn new() -> Graph { + Graph { nodes: SnapshotVec::new(), edges: SnapshotVec::new() } + } + + pub fn with_capacity(nodes: usize, edges: usize) -> Graph { + Graph { nodes: SnapshotVec::with_capacity(nodes), edges: SnapshotVec::with_capacity(edges) } + } + + // # Simple accessors + + #[inline] + pub fn all_nodes(&self) -> &[Node] { + &self.nodes + } + + #[inline] + pub fn len_nodes(&self) -> usize { + self.nodes.len() + } + + #[inline] + pub fn all_edges(&self) -> &[Edge] { + &self.edges + } + + #[inline] + pub fn len_edges(&self) -> usize { + self.edges.len() + } + + // # Node construction + + pub fn next_node_index(&self) -> NodeIndex { + NodeIndex(self.nodes.len()) + } + + pub fn add_node(&mut self, data: N) -> NodeIndex { + let idx = self.next_node_index(); + self.nodes.push(Node { first_edge: [INVALID_EDGE_INDEX, INVALID_EDGE_INDEX], data }); + idx + } + + pub fn mut_node_data(&mut self, idx: NodeIndex) -> &mut N { + &mut self.nodes[idx.0].data + } + + pub fn node_data(&self, idx: NodeIndex) -> &N { + &self.nodes[idx.0].data + } + + pub fn node(&self, idx: NodeIndex) -> &Node { + &self.nodes[idx.0] + } + + // # Edge construction and queries + + pub fn next_edge_index(&self) -> EdgeIndex { + EdgeIndex(self.edges.len()) + } + + pub fn add_edge(&mut self, source: NodeIndex, target: NodeIndex, data: E) -> EdgeIndex { + debug!("graph: add_edge({:?}, {:?}, {:?})", source, target, data); + + let idx = self.next_edge_index(); + + // read current first of the list of edges from each node + let source_first = self.nodes[source.0].first_edge[OUTGOING.repr]; + let target_first = self.nodes[target.0].first_edge[INCOMING.repr]; + + // create the new edge, with the previous firsts from each node + // as the next pointers + self.edges.push(Edge { next_edge: [source_first, target_first], source, target, data }); + + // adjust the firsts for each node target be the next object. + self.nodes[source.0].first_edge[OUTGOING.repr] = idx; + self.nodes[target.0].first_edge[INCOMING.repr] = idx; + + idx + } + + pub fn edge(&self, idx: EdgeIndex) -> &Edge { + &self.edges[idx.0] + } + + // # Iterating over nodes, edges + + pub fn enumerated_nodes(&self) -> impl Iterator)> { + self.nodes.iter().enumerate().map(|(idx, n)| (NodeIndex(idx), n)) + } + + pub fn enumerated_edges(&self) -> impl Iterator)> { + self.edges.iter().enumerate().map(|(idx, e)| (EdgeIndex(idx), e)) + } + + pub fn each_node<'a>(&'a self, mut f: impl FnMut(NodeIndex, &'a Node) -> bool) -> bool { + //! Iterates over all edges defined in the graph. + self.enumerated_nodes().all(|(node_idx, node)| f(node_idx, node)) + } + + pub fn each_edge<'a>(&'a self, mut f: impl FnMut(EdgeIndex, &'a Edge) -> bool) -> bool { + //! Iterates over all edges defined in the graph + self.enumerated_edges().all(|(edge_idx, edge)| f(edge_idx, edge)) + } + + pub fn outgoing_edges(&self, source: NodeIndex) -> AdjacentEdges<'_, N, E> { + self.adjacent_edges(source, OUTGOING) + } + + pub fn incoming_edges(&self, source: NodeIndex) -> AdjacentEdges<'_, N, E> { + self.adjacent_edges(source, INCOMING) + } + + pub fn adjacent_edges( + &self, + source: NodeIndex, + direction: Direction, + ) -> AdjacentEdges<'_, N, E> { + let first_edge = self.node(source).first_edge[direction.repr]; + AdjacentEdges { graph: self, direction, next: first_edge } + } + + pub fn successor_nodes<'a>( + &'a self, + source: NodeIndex, + ) -> impl Iterator + 'a { + self.outgoing_edges(source).targets() + } + + pub fn predecessor_nodes<'a>( + &'a self, + target: NodeIndex, + ) -> impl Iterator + 'a { + self.incoming_edges(target).sources() + } + + pub fn depth_traverse( + &self, + start: NodeIndex, + direction: Direction, + ) -> DepthFirstTraversal<'_, N, E> { + DepthFirstTraversal::with_start_node(self, start, direction) + } + + pub fn nodes_in_postorder( + &self, + direction: Direction, + entry_node: NodeIndex, + ) -> Vec { + let mut visited = BitSet::new_empty(self.len_nodes()); + let mut stack = vec![]; + let mut result = Vec::with_capacity(self.len_nodes()); + let mut push_node = |stack: &mut Vec<_>, node: NodeIndex| { + if visited.insert(node.0) { + stack.push((node, self.adjacent_edges(node, direction))); + } + }; + + for node in + Some(entry_node).into_iter().chain(self.enumerated_nodes().map(|(node, _)| node)) + { + push_node(&mut stack, node); + while let Some((node, mut iter)) = stack.pop() { + if let Some((_, child)) = iter.next() { + let target = child.source_or_target(direction); + // the current node needs more processing, so + // add it back to the stack + stack.push((node, iter)); + // and then push the new node + push_node(&mut stack, target); + } else { + result.push(node); + } + } + } + + assert_eq!(result.len(), self.len_nodes()); + result + } +} + +// # Iterators + +pub struct AdjacentEdges<'g, N, E> { + graph: &'g Graph, + direction: Direction, + next: EdgeIndex, +} + +impl<'g, N: Debug, E: Debug> AdjacentEdges<'g, N, E> { + fn targets(self) -> impl Iterator + 'g { + self.map(|(_, edge)| edge.target) + } + + fn sources(self) -> impl Iterator + 'g { + self.map(|(_, edge)| edge.source) + } +} + +impl<'g, N: Debug, E: Debug> Iterator for AdjacentEdges<'g, N, E> { + type Item = (EdgeIndex, &'g Edge); + + fn next(&mut self) -> Option<(EdgeIndex, &'g Edge)> { + let edge_index = self.next; + if edge_index == INVALID_EDGE_INDEX { + return None; + } + + let edge = self.graph.edge(edge_index); + self.next = edge.next_edge[self.direction.repr]; + Some((edge_index, edge)) + } + + fn size_hint(&self) -> (usize, Option) { + // At most, all the edges in the graph. + (0, Some(self.graph.len_edges())) + } +} + +pub struct DepthFirstTraversal<'g, N, E> { + graph: &'g Graph, + stack: Vec, + visited: BitSet, + direction: Direction, +} + +impl<'g, N: Debug, E: Debug> DepthFirstTraversal<'g, N, E> { + pub fn with_start_node( + graph: &'g Graph, + start_node: NodeIndex, + direction: Direction, + ) -> Self { + let mut visited = BitSet::new_empty(graph.len_nodes()); + visited.insert(start_node.node_id()); + DepthFirstTraversal { graph, stack: vec![start_node], visited, direction } + } + + fn visit(&mut self, node: NodeIndex) { + if self.visited.insert(node.node_id()) { + self.stack.push(node); + } + } +} + +impl<'g, N: Debug, E: Debug> Iterator for DepthFirstTraversal<'g, N, E> { + type Item = NodeIndex; + + fn next(&mut self) -> Option { + let next = self.stack.pop(); + if let Some(idx) = next { + for (_, edge) in self.graph.adjacent_edges(idx, self.direction) { + let target = edge.source_or_target(self.direction); + self.visit(target); + } + } + next + } + + fn size_hint(&self) -> (usize, Option) { + // We will visit every node in the graph exactly once. + let remaining = self.graph.len_nodes() - self.visited.count(); + (remaining, Some(remaining)) + } +} + +impl<'g, N: Debug, E: Debug> ExactSizeIterator for DepthFirstTraversal<'g, N, E> {} + +impl Edge { + pub fn source(&self) -> NodeIndex { + self.source + } + + pub fn target(&self) -> NodeIndex { + self.target + } + + pub fn source_or_target(&self, direction: Direction) -> NodeIndex { + if direction == OUTGOING { self.target } else { self.source } + } +} diff --git a/compiler/rustc_data_structures/src/graph/implementation/tests.rs b/compiler/rustc_data_structures/src/graph/implementation/tests.rs new file mode 100644 index 000000000..e4e4d0d44 --- /dev/null +++ b/compiler/rustc_data_structures/src/graph/implementation/tests.rs @@ -0,0 +1,131 @@ +use crate::graph::implementation::*; +use std::fmt::Debug; + +type TestGraph = Graph<&'static str, &'static str>; + +fn create_graph() -> TestGraph { + let mut graph = Graph::new(); + + // Create a simple graph + // + // F + // | + // V + // A --> B --> C + // | ^ + // v | + // D --> E + + let a = graph.add_node("A"); + let b = graph.add_node("B"); + let c = graph.add_node("C"); + let d = graph.add_node("D"); + let e = graph.add_node("E"); + let f = graph.add_node("F"); + + graph.add_edge(a, b, "AB"); + graph.add_edge(b, c, "BC"); + graph.add_edge(b, d, "BD"); + graph.add_edge(d, e, "DE"); + graph.add_edge(e, c, "EC"); + graph.add_edge(f, b, "FB"); + + return graph; +} + +#[test] +fn each_node() { + let graph = create_graph(); + let expected = ["A", "B", "C", "D", "E", "F"]; + graph.each_node(|idx, node| { + assert_eq!(&expected[idx.0], graph.node_data(idx)); + assert_eq!(expected[idx.0], node.data); + true + }); +} + +#[test] +fn each_edge() { + let graph = create_graph(); + let expected = ["AB", "BC", "BD", "DE", "EC", "FB"]; + graph.each_edge(|idx, edge| { + assert_eq!(expected[idx.0], edge.data); + true + }); +} + +fn test_adjacent_edges( + graph: &Graph, + start_index: NodeIndex, + start_data: N, + expected_incoming: &[(E, N)], + expected_outgoing: &[(E, N)], +) { + assert!(graph.node_data(start_index) == &start_data); + + let mut counter = 0; + for (edge_index, edge) in graph.incoming_edges(start_index) { + assert!(counter < expected_incoming.len()); + debug!( + "counter={:?} expected={:?} edge_index={:?} edge={:?}", + counter, expected_incoming[counter], edge_index, edge + ); + match expected_incoming[counter] { + (ref e, ref n) => { + assert!(e == &edge.data); + assert!(n == graph.node_data(edge.source())); + assert!(start_index == edge.target); + } + } + counter += 1; + } + assert_eq!(counter, expected_incoming.len()); + + let mut counter = 0; + for (edge_index, edge) in graph.outgoing_edges(start_index) { + assert!(counter < expected_outgoing.len()); + debug!( + "counter={:?} expected={:?} edge_index={:?} edge={:?}", + counter, expected_outgoing[counter], edge_index, edge + ); + match expected_outgoing[counter] { + (ref e, ref n) => { + assert!(e == &edge.data); + assert!(start_index == edge.source); + assert!(n == graph.node_data(edge.target)); + } + } + counter += 1; + } + assert_eq!(counter, expected_outgoing.len()); +} + +#[test] +fn each_adjacent_from_a() { + let graph = create_graph(); + test_adjacent_edges(&graph, NodeIndex(0), "A", &[], &[("AB", "B")]); +} + +#[test] +fn each_adjacent_from_b() { + let graph = create_graph(); + test_adjacent_edges( + &graph, + NodeIndex(1), + "B", + &[("FB", "F"), ("AB", "A")], + &[("BD", "D"), ("BC", "C")], + ); +} + +#[test] +fn each_adjacent_from_c() { + let graph = create_graph(); + test_adjacent_edges(&graph, NodeIndex(2), "C", &[("EC", "E"), ("BC", "B")], &[]); +} + +#[test] +fn each_adjacent_from_d() { + let graph = create_graph(); + test_adjacent_edges(&graph, NodeIndex(3), "D", &[("BD", "B")], &[("DE", "E")]); +} diff --git a/compiler/rustc_data_structures/src/graph/iterate/mod.rs b/compiler/rustc_data_structures/src/graph/iterate/mod.rs new file mode 100644 index 000000000..57007611a --- /dev/null +++ b/compiler/rustc_data_structures/src/graph/iterate/mod.rs @@ -0,0 +1,353 @@ +use super::{DirectedGraph, WithNumNodes, WithStartNode, WithSuccessors}; +use rustc_index::bit_set::BitSet; +use rustc_index::vec::IndexVec; +use std::ops::ControlFlow; + +#[cfg(test)] +mod tests; + +pub fn post_order_from( + graph: &G, + start_node: G::Node, +) -> Vec { + post_order_from_to(graph, start_node, None) +} + +pub fn post_order_from_to( + graph: &G, + start_node: G::Node, + end_node: Option, +) -> Vec { + let mut visited: IndexVec = IndexVec::from_elem_n(false, graph.num_nodes()); + let mut result: Vec = Vec::with_capacity(graph.num_nodes()); + if let Some(end_node) = end_node { + visited[end_node] = true; + } + post_order_walk(graph, start_node, &mut result, &mut visited); + result +} + +fn post_order_walk( + graph: &G, + node: G::Node, + result: &mut Vec, + visited: &mut IndexVec, +) { + struct PostOrderFrame { + node: Node, + iter: Iter, + } + + if visited[node] { + return; + } + + let mut stack = vec![PostOrderFrame { node, iter: graph.successors(node) }]; + + 'recurse: while let Some(frame) = stack.last_mut() { + let node = frame.node; + visited[node] = true; + + while let Some(successor) = frame.iter.next() { + if !visited[successor] { + stack.push(PostOrderFrame { node: successor, iter: graph.successors(successor) }); + continue 'recurse; + } + } + + let _ = stack.pop(); + result.push(node); + } +} + +pub fn reverse_post_order( + graph: &G, + start_node: G::Node, +) -> Vec { + let mut vec = post_order_from(graph, start_node); + vec.reverse(); + vec +} + +/// A "depth-first search" iterator for a directed graph. +pub struct DepthFirstSearch<'graph, G> +where + G: ?Sized + DirectedGraph + WithNumNodes + WithSuccessors, +{ + graph: &'graph G, + stack: Vec, + visited: BitSet, +} + +impl<'graph, G> DepthFirstSearch<'graph, G> +where + G: ?Sized + DirectedGraph + WithNumNodes + WithSuccessors, +{ + pub fn new(graph: &'graph G) -> Self { + Self { graph, stack: vec![], visited: BitSet::new_empty(graph.num_nodes()) } + } + + /// Version of `push_start_node` that is convenient for chained + /// use. + pub fn with_start_node(mut self, start_node: G::Node) -> Self { + self.push_start_node(start_node); + self + } + + /// Pushes another start node onto the stack. If the node + /// has not already been visited, then you will be able to + /// walk its successors (and so forth) after the current + /// contents of the stack are drained. If multiple start nodes + /// are added into the walk, then their mutual successors + /// will all be walked. You can use this method once the + /// iterator has been completely drained to add additional + /// start nodes. + pub fn push_start_node(&mut self, start_node: G::Node) { + if self.visited.insert(start_node) { + self.stack.push(start_node); + } + } + + /// Searches all nodes reachable from the current start nodes. + /// This is equivalent to just invoke `next` repeatedly until + /// you get a `None` result. + pub fn complete_search(&mut self) { + while let Some(_) = self.next() {} + } + + /// Returns true if node has been visited thus far. + /// A node is considered "visited" once it is pushed + /// onto the internal stack; it may not yet have been yielded + /// from the iterator. This method is best used after + /// the iterator is completely drained. + pub fn visited(&self, node: G::Node) -> bool { + self.visited.contains(node) + } +} + +impl std::fmt::Debug for DepthFirstSearch<'_, G> +where + G: ?Sized + DirectedGraph + WithNumNodes + WithSuccessors, +{ + fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut f = fmt.debug_set(); + for n in self.visited.iter() { + f.entry(&n); + } + f.finish() + } +} + +impl Iterator for DepthFirstSearch<'_, G> +where + G: ?Sized + DirectedGraph + WithNumNodes + WithSuccessors, +{ + type Item = G::Node; + + fn next(&mut self) -> Option { + let DepthFirstSearch { stack, visited, graph } = self; + let n = stack.pop()?; + stack.extend(graph.successors(n).filter(|&m| visited.insert(m))); + Some(n) + } +} + +/// The status of a node in the depth-first search. +/// +/// See the documentation of `TriColorDepthFirstSearch` to see how a node's status is updated +/// during DFS. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum NodeStatus { + /// This node has been examined by the depth-first search but is not yet `Settled`. + /// + /// Also referred to as "gray" or "discovered" nodes in [CLR]. + /// + /// [CLR]: https://en.wikipedia.org/wiki/Introduction_to_Algorithms + Visited, + + /// This node and all nodes reachable from it have been examined by the depth-first search. + /// + /// Also referred to as "black" or "finished" nodes in [CLR]. + /// + /// [CLR]: https://en.wikipedia.org/wiki/Introduction_to_Algorithms + Settled, +} + +struct Event { + node: N, + becomes: NodeStatus, +} + +/// A depth-first search that also tracks when all successors of a node have been examined. +/// +/// This is based on the DFS described in [Introduction to Algorithms (1st ed.)][CLR], hereby +/// referred to as **CLR**. However, we use the terminology in [`NodeStatus`] above instead of +/// "discovered"/"finished" or "white"/"grey"/"black". Each node begins the search with no status, +/// becomes `Visited` when it is first examined by the DFS and is `Settled` when all nodes +/// reachable from it have been examined. This allows us to differentiate between "tree", "back" +/// and "forward" edges (see [`TriColorVisitor::node_examined`]). +/// +/// Unlike the pseudocode in [CLR], this implementation is iterative and does not use timestamps. +/// We accomplish this by storing `Event`s on the stack that result in a (possible) state change +/// for each node. A `Visited` event signifies that we should examine this node if it has not yet +/// been `Visited` or `Settled`. When a node is examined for the first time, we mark it as +/// `Visited` and push a `Settled` event for it on stack followed by `Visited` events for all of +/// its predecessors, scheduling them for examination. Multiple `Visited` events for a single node +/// may exist on the stack simultaneously if a node has multiple predecessors, but only one +/// `Settled` event will ever be created for each node. After all `Visited` events for a node's +/// successors have been popped off the stack (as well as any new events triggered by visiting +/// those successors), we will pop off that node's `Settled` event. +/// +/// [CLR]: https://en.wikipedia.org/wiki/Introduction_to_Algorithms +pub struct TriColorDepthFirstSearch<'graph, G> +where + G: ?Sized + DirectedGraph + WithNumNodes + WithSuccessors, +{ + graph: &'graph G, + stack: Vec>, + visited: BitSet, + settled: BitSet, +} + +impl<'graph, G> TriColorDepthFirstSearch<'graph, G> +where + G: ?Sized + DirectedGraph + WithNumNodes + WithSuccessors, +{ + pub fn new(graph: &'graph G) -> Self { + TriColorDepthFirstSearch { + graph, + stack: vec![], + visited: BitSet::new_empty(graph.num_nodes()), + settled: BitSet::new_empty(graph.num_nodes()), + } + } + + /// Performs a depth-first search, starting from the given `root`. + /// + /// This won't visit nodes that are not reachable from `root`. + pub fn run_from(mut self, root: G::Node, visitor: &mut V) -> Option + where + V: TriColorVisitor, + { + use NodeStatus::{Settled, Visited}; + + self.stack.push(Event { node: root, becomes: Visited }); + + loop { + match self.stack.pop()? { + Event { node, becomes: Settled } => { + let not_previously_settled = self.settled.insert(node); + assert!(not_previously_settled, "A node should be settled exactly once"); + if let ControlFlow::Break(val) = visitor.node_settled(node) { + return Some(val); + } + } + + Event { node, becomes: Visited } => { + let not_previously_visited = self.visited.insert(node); + let prior_status = if not_previously_visited { + None + } else if self.settled.contains(node) { + Some(Settled) + } else { + Some(Visited) + }; + + if let ControlFlow::Break(val) = visitor.node_examined(node, prior_status) { + return Some(val); + } + + // If this node has already been examined, we are done. + if prior_status.is_some() { + continue; + } + + // Otherwise, push a `Settled` event for this node onto the stack, then + // schedule its successors for examination. + self.stack.push(Event { node, becomes: Settled }); + for succ in self.graph.successors(node) { + if !visitor.ignore_edge(node, succ) { + self.stack.push(Event { node: succ, becomes: Visited }); + } + } + } + } + } + } +} + +impl TriColorDepthFirstSearch<'_, G> +where + G: ?Sized + DirectedGraph + WithNumNodes + WithSuccessors + WithStartNode, +{ + /// Performs a depth-first search, starting from `G::start_node()`. + /// + /// This won't visit nodes that are not reachable from the start node. + pub fn run_from_start(self, visitor: &mut V) -> Option + where + V: TriColorVisitor, + { + let root = self.graph.start_node(); + self.run_from(root, visitor) + } +} + +/// What to do when a node is examined or becomes `Settled` during DFS. +pub trait TriColorVisitor +where + G: ?Sized + DirectedGraph, +{ + /// The value returned by this search. + type BreakVal; + + /// Called when a node is examined by the depth-first search. + /// + /// By checking the value of `prior_status`, this visitor can determine whether the edge + /// leading to this node was a tree edge (`None`), forward edge (`Some(Settled)`) or back edge + /// (`Some(Visited)`). For a full explanation of each edge type, see the "Depth-first Search" + /// chapter in [CLR] or [wikipedia]. + /// + /// If you want to know *both* nodes linked by each edge, you'll need to modify + /// `TriColorDepthFirstSearch` to store a `source` node for each `Visited` event. + /// + /// [wikipedia]: https://en.wikipedia.org/wiki/Depth-first_search#Output_of_a_depth-first_search + /// [CLR]: https://en.wikipedia.org/wiki/Introduction_to_Algorithms + fn node_examined( + &mut self, + _node: G::Node, + _prior_status: Option, + ) -> ControlFlow { + ControlFlow::CONTINUE + } + + /// Called after all nodes reachable from this one have been examined. + fn node_settled(&mut self, _node: G::Node) -> ControlFlow { + ControlFlow::CONTINUE + } + + /// Behave as if no edges exist from `source` to `target`. + fn ignore_edge(&mut self, _source: G::Node, _target: G::Node) -> bool { + false + } +} + +/// This `TriColorVisitor` looks for back edges in a graph, which indicate that a cycle exists. +pub struct CycleDetector; + +impl TriColorVisitor for CycleDetector +where + G: ?Sized + DirectedGraph, +{ + type BreakVal = (); + + fn node_examined( + &mut self, + _node: G::Node, + prior_status: Option, + ) -> ControlFlow { + match prior_status { + Some(NodeStatus::Visited) => ControlFlow::BREAK, + _ => ControlFlow::CONTINUE, + } + } +} diff --git a/compiler/rustc_data_structures/src/graph/iterate/tests.rs b/compiler/rustc_data_structures/src/graph/iterate/tests.rs new file mode 100644 index 000000000..c498c2893 --- /dev/null +++ b/compiler/rustc_data_structures/src/graph/iterate/tests.rs @@ -0,0 +1,38 @@ +use super::super::tests::TestGraph; + +use super::*; + +#[test] +fn diamond_post_order() { + let graph = TestGraph::new(0, &[(0, 1), (0, 2), (1, 3), (2, 3)]); + + let result = post_order_from(&graph, 0); + assert_eq!(result, vec![3, 1, 2, 0]); +} + +#[test] +fn is_cyclic() { + use super::super::is_cyclic; + + let diamond_acyclic = TestGraph::new(0, &[(0, 1), (0, 2), (1, 3), (2, 3)]); + let diamond_cyclic = TestGraph::new(0, &[(0, 1), (1, 2), (2, 3), (3, 0)]); + + assert!(!is_cyclic(&diamond_acyclic)); + assert!(is_cyclic(&diamond_cyclic)); +} + +#[test] +fn dfs() { + let graph = TestGraph::new(0, &[(0, 1), (0, 2), (1, 3), (2, 3), (3, 0)]); + + let result: Vec = DepthFirstSearch::new(&graph).with_start_node(0).collect(); + assert_eq!(result, vec![0, 2, 3, 1]); +} + +#[test] +fn dfs_debug() { + let graph = TestGraph::new(0, &[(0, 1), (0, 2), (1, 3), (2, 3), (3, 0)]); + let mut dfs = DepthFirstSearch::new(&graph).with_start_node(0); + dfs.complete_search(); + assert_eq!(format!("{{0, 1, 2, 3}}"), format!("{:?}", dfs)); +} diff --git a/compiler/rustc_data_structures/src/graph/mod.rs b/compiler/rustc_data_structures/src/graph/mod.rs new file mode 100644 index 000000000..3560df6e5 --- /dev/null +++ b/compiler/rustc_data_structures/src/graph/mod.rs @@ -0,0 +1,81 @@ +use rustc_index::vec::Idx; + +pub mod dominators; +pub mod implementation; +pub mod iterate; +mod reference; +pub mod scc; +pub mod vec_graph; + +#[cfg(test)] +mod tests; + +pub trait DirectedGraph { + type Node: Idx; +} + +pub trait WithNumNodes: DirectedGraph { + fn num_nodes(&self) -> usize; +} + +pub trait WithNumEdges: DirectedGraph { + fn num_edges(&self) -> usize; +} + +pub trait WithSuccessors: DirectedGraph +where + Self: for<'graph> GraphSuccessors<'graph, Item = ::Node>, +{ + fn successors(&self, node: Self::Node) -> >::Iter; + + fn depth_first_search(&self, from: Self::Node) -> iterate::DepthFirstSearch<'_, Self> + where + Self: WithNumNodes, + { + iterate::DepthFirstSearch::new(self).with_start_node(from) + } +} + +#[allow(unused_lifetimes)] +pub trait GraphSuccessors<'graph> { + type Item; + type Iter: Iterator; +} + +pub trait WithPredecessors: DirectedGraph +where + Self: for<'graph> GraphPredecessors<'graph, Item = ::Node>, +{ + fn predecessors(&self, node: Self::Node) -> >::Iter; +} + +#[allow(unused_lifetimes)] +pub trait GraphPredecessors<'graph> { + type Item; + type Iter: Iterator; +} + +pub trait WithStartNode: DirectedGraph { + fn start_node(&self) -> Self::Node; +} + +pub trait ControlFlowGraph: + DirectedGraph + WithStartNode + WithPredecessors + WithSuccessors + WithNumNodes +{ + // convenient trait +} + +impl ControlFlowGraph for T where + T: DirectedGraph + WithStartNode + WithPredecessors + WithSuccessors + WithNumNodes +{ +} + +/// Returns `true` if the graph has a cycle that is reachable from the start node. +pub fn is_cyclic(graph: &G) -> bool +where + G: ?Sized + DirectedGraph + WithStartNode + WithSuccessors + WithNumNodes, +{ + iterate::TriColorDepthFirstSearch::new(graph) + .run_from_start(&mut iterate::CycleDetector) + .is_some() +} diff --git a/compiler/rustc_data_structures/src/graph/reference.rs b/compiler/rustc_data_structures/src/graph/reference.rs new file mode 100644 index 000000000..c259fe56c --- /dev/null +++ b/compiler/rustc_data_structures/src/graph/reference.rs @@ -0,0 +1,39 @@ +use super::*; + +impl<'graph, G: DirectedGraph> DirectedGraph for &'graph G { + type Node = G::Node; +} + +impl<'graph, G: WithNumNodes> WithNumNodes for &'graph G { + fn num_nodes(&self) -> usize { + (**self).num_nodes() + } +} + +impl<'graph, G: WithStartNode> WithStartNode for &'graph G { + fn start_node(&self) -> Self::Node { + (**self).start_node() + } +} + +impl<'graph, G: WithSuccessors> WithSuccessors for &'graph G { + fn successors(&self, node: Self::Node) -> >::Iter { + (**self).successors(node) + } +} + +impl<'graph, G: WithPredecessors> WithPredecessors for &'graph G { + fn predecessors(&self, node: Self::Node) -> >::Iter { + (**self).predecessors(node) + } +} + +impl<'iter, 'graph, G: WithPredecessors> GraphPredecessors<'iter> for &'graph G { + type Item = G::Node; + type Iter = >::Iter; +} + +impl<'iter, 'graph, G: WithSuccessors> GraphSuccessors<'iter> for &'graph G { + type Item = G::Node; + type Iter = >::Iter; +} diff --git a/compiler/rustc_data_structures/src/graph/scc/mod.rs b/compiler/rustc_data_structures/src/graph/scc/mod.rs new file mode 100644 index 000000000..7099ca7eb --- /dev/null +++ b/compiler/rustc_data_structures/src/graph/scc/mod.rs @@ -0,0 +1,567 @@ +//! Routine to compute the strongly connected components (SCCs) of a graph. +//! +//! Also computes as the resulting DAG if each SCC is replaced with a +//! node in the graph. This uses [Tarjan's algorithm]( +//! https://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm) +//! that completes in *O*(*n*) time. + +use crate::fx::FxHashSet; +use crate::graph::vec_graph::VecGraph; +use crate::graph::{DirectedGraph, GraphSuccessors, WithNumEdges, WithNumNodes, WithSuccessors}; +use rustc_index::vec::{Idx, IndexVec}; +use std::cmp::Ord; +use std::ops::Range; + +#[cfg(test)] +mod tests; + +/// Strongly connected components (SCC) of a graph. The type `N` is +/// the index type for the graph nodes and `S` is the index type for +/// the SCCs. We can map from each node to the SCC that it +/// participates in, and we also have the successors of each SCC. +pub struct Sccs { + /// For each node, what is the SCC index of the SCC to which it + /// belongs. + scc_indices: IndexVec, + + /// Data about each SCC. + scc_data: SccData, +} + +struct SccData { + /// For each SCC, the range of `all_successors` where its + /// successors can be found. + ranges: IndexVec>, + + /// Contains the successors for all the Sccs, concatenated. The + /// range of indices corresponding to a given SCC is found in its + /// SccData. + all_successors: Vec, +} + +impl Sccs { + pub fn new(graph: &(impl DirectedGraph + WithNumNodes + WithSuccessors)) -> Self { + SccsConstruction::construct(graph) + } + + /// Returns the number of SCCs in the graph. + pub fn num_sccs(&self) -> usize { + self.scc_data.len() + } + + /// Returns an iterator over the SCCs in the graph. + /// + /// The SCCs will be iterated in **dependency order** (or **post order**), + /// meaning that if `S1 -> S2`, we will visit `S2` first and `S1` after. + /// This is convenient when the edges represent dependencies: when you visit + /// `S1`, the value for `S2` will already have been computed. + pub fn all_sccs(&self) -> impl Iterator { + (0..self.scc_data.len()).map(S::new) + } + + /// Returns the SCC to which a node `r` belongs. + pub fn scc(&self, r: N) -> S { + self.scc_indices[r] + } + + /// Returns the successors of the given SCC. + pub fn successors(&self, scc: S) -> &[S] { + self.scc_data.successors(scc) + } + + /// Construct the reverse graph of the SCC graph. + pub fn reverse(&self) -> VecGraph { + VecGraph::new( + self.num_sccs(), + self.all_sccs() + .flat_map(|source| { + self.successors(source).iter().map(move |&target| (target, source)) + }) + .collect(), + ) + } +} + +impl DirectedGraph for Sccs { + type Node = S; +} + +impl WithNumNodes for Sccs { + fn num_nodes(&self) -> usize { + self.num_sccs() + } +} + +impl WithNumEdges for Sccs { + fn num_edges(&self) -> usize { + self.scc_data.all_successors.len() + } +} + +impl<'graph, N: Idx, S: Idx> GraphSuccessors<'graph> for Sccs { + type Item = S; + + type Iter = std::iter::Cloned>; +} + +impl WithSuccessors for Sccs { + fn successors(&self, node: S) -> >::Iter { + self.successors(node).iter().cloned() + } +} + +impl SccData { + /// Number of SCCs, + fn len(&self) -> usize { + self.ranges.len() + } + + /// Returns the successors of the given SCC. + fn successors(&self, scc: S) -> &[S] { + // Annoyingly, `range` does not implement `Copy`, so we have + // to do `range.start..range.end`: + let range = &self.ranges[scc]; + &self.all_successors[range.start..range.end] + } + + /// Creates a new SCC with `successors` as its successors and + /// returns the resulting index. + fn create_scc(&mut self, successors: impl IntoIterator) -> S { + // Store the successors on `scc_successors_vec`, remembering + // the range of indices. + let all_successors_start = self.all_successors.len(); + self.all_successors.extend(successors); + let all_successors_end = self.all_successors.len(); + + debug!( + "create_scc({:?}) successors={:?}", + self.ranges.len(), + &self.all_successors[all_successors_start..all_successors_end], + ); + + self.ranges.push(all_successors_start..all_successors_end) + } +} + +struct SccsConstruction<'c, G: DirectedGraph + WithNumNodes + WithSuccessors, S: Idx> { + graph: &'c G, + + /// The state of each node; used during walk to record the stack + /// and after walk to record what cycle each node ended up being + /// in. + node_states: IndexVec>, + + /// The stack of nodes that we are visiting as part of the DFS. + node_stack: Vec, + + /// The stack of successors: as we visit a node, we mark our + /// position in this stack, and when we encounter a successor SCC, + /// we push it on the stack. When we complete an SCC, we can pop + /// everything off the stack that was found along the way. + successors_stack: Vec, + + /// A set used to strip duplicates. As we accumulate successors + /// into the successors_stack, we sometimes get duplicate entries. + /// We use this set to remove those -- we also keep its storage + /// around between successors to amortize memory allocation costs. + duplicate_set: FxHashSet, + + scc_data: SccData, +} + +#[derive(Copy, Clone, Debug)] +enum NodeState { + /// This node has not yet been visited as part of the DFS. + /// + /// After SCC construction is complete, this state ought to be + /// impossible. + NotVisited, + + /// This node is currently being walk as part of our DFS. It is on + /// the stack at the depth `depth`. + /// + /// After SCC construction is complete, this state ought to be + /// impossible. + BeingVisited { depth: usize }, + + /// Indicates that this node is a member of the given cycle. + InCycle { scc_index: S }, + + /// Indicates that this node is a member of whatever cycle + /// `parent` is a member of. This state is transient: whenever we + /// see it, we try to overwrite it with the current state of + /// `parent` (this is the "path compression" step of a union-find + /// algorithm). + InCycleWith { parent: N }, +} + +#[derive(Copy, Clone, Debug)] +enum WalkReturn { + Cycle { min_depth: usize }, + Complete { scc_index: S }, +} + +impl<'c, G, S> SccsConstruction<'c, G, S> +where + G: DirectedGraph + WithNumNodes + WithSuccessors, + S: Idx, +{ + /// Identifies SCCs in the graph `G` and computes the resulting + /// DAG. This uses a variant of [Tarjan's + /// algorithm][wikipedia]. The high-level summary of the algorithm + /// is that we do a depth-first search. Along the way, we keep a + /// stack of each node whose successors are being visited. We + /// track the depth of each node on this stack (there is no depth + /// if the node is not on the stack). When we find that some node + /// N with depth D can reach some other node N' with lower depth + /// D' (i.e., D' < D), we know that N, N', and all nodes in + /// between them on the stack are part of an SCC. + /// + /// [wikipedia]: https://bit.ly/2EZIx84 + fn construct(graph: &'c G) -> Sccs { + let num_nodes = graph.num_nodes(); + + let mut this = Self { + graph, + node_states: IndexVec::from_elem_n(NodeState::NotVisited, num_nodes), + node_stack: Vec::with_capacity(num_nodes), + successors_stack: Vec::new(), + scc_data: SccData { ranges: IndexVec::new(), all_successors: Vec::new() }, + duplicate_set: FxHashSet::default(), + }; + + let scc_indices = (0..num_nodes) + .map(G::Node::new) + .map(|node| match this.start_walk_from(node) { + WalkReturn::Complete { scc_index } => scc_index, + WalkReturn::Cycle { min_depth } => panic!( + "`start_walk_node({:?})` returned cycle with depth {:?}", + node, min_depth + ), + }) + .collect(); + + Sccs { scc_indices, scc_data: this.scc_data } + } + + fn start_walk_from(&mut self, node: G::Node) -> WalkReturn { + if let Some(result) = self.inspect_node(node) { + result + } else { + self.walk_unvisited_node(node) + } + } + + /// Inspect a node during the DFS. We first examine its current + /// state -- if it is not yet visited (`NotVisited`), return `None` so + /// that the caller might push it onto the stack and start walking its + /// successors. + /// + /// If it is already on the DFS stack it will be in the state + /// `BeingVisited`. In that case, we have found a cycle and we + /// return the depth from the stack. + /// + /// Otherwise, we are looking at a node that has already been + /// completely visited. We therefore return `WalkReturn::Complete` + /// with its associated SCC index. + fn inspect_node(&mut self, node: G::Node) -> Option> { + Some(match self.find_state(node) { + NodeState::InCycle { scc_index } => WalkReturn::Complete { scc_index }, + + NodeState::BeingVisited { depth: min_depth } => WalkReturn::Cycle { min_depth }, + + NodeState::NotVisited => return None, + + NodeState::InCycleWith { parent } => panic!( + "`find_state` returned `InCycleWith({:?})`, which ought to be impossible", + parent + ), + }) + } + + /// Fetches the state of the node `r`. If `r` is recorded as being + /// in a cycle with some other node `r2`, then fetches the state + /// of `r2` (and updates `r` to reflect current result). This is + /// basically the "find" part of a standard union-find algorithm + /// (with path compression). + fn find_state(&mut self, mut node: G::Node) -> NodeState { + // To avoid recursion we temporarily reuse the `parent` of each + // InCycleWith link to encode a downwards link while compressing + // the path. After we have found the root or deepest node being + // visited, we traverse the reverse links and correct the node + // states on the way. + // + // **Note**: This mutation requires that this is a leaf function + // or at least that none of the called functions inspects the + // current node states. Luckily, we are a leaf. + + // Remember one previous link. The termination condition when + // following links downwards is then simply as soon as we have + // found the initial self-loop. + let mut previous_node = node; + + // Ultimately assigned by the parent when following + // `InCycleWith` upwards. + let node_state = loop { + debug!("find_state(r = {:?} in state {:?})", node, self.node_states[node]); + match self.node_states[node] { + NodeState::InCycle { scc_index } => break NodeState::InCycle { scc_index }, + NodeState::BeingVisited { depth } => break NodeState::BeingVisited { depth }, + NodeState::NotVisited => break NodeState::NotVisited, + NodeState::InCycleWith { parent } => { + // We test this, to be extremely sure that we never + // ever break our termination condition for the + // reverse iteration loop. + assert!(node != parent, "Node can not be in cycle with itself"); + // Store the previous node as an inverted list link + self.node_states[node] = NodeState::InCycleWith { parent: previous_node }; + // Update to parent node. + previous_node = node; + node = parent; + } + } + }; + + // The states form a graph where up to one outgoing link is stored at + // each node. Initially in general, + // + // E + // ^ + // | + // InCycleWith/BeingVisited/NotVisited + // | + // A-InCycleWith->B-InCycleWith…>C-InCycleWith->D-+ + // | + // = node, previous_node + // + // After the first loop, this will look like + // E + // ^ + // | + // InCycleWith/BeingVisited/NotVisited + // | + // +>A<-InCycleWith-B<…InCycleWith-C<-InCycleWith-D-+ + // | | | | + // | InCycleWith | = node + // +-+ =previous_node + // + // Note in particular that A will be linked to itself in a self-cycle + // and no other self-cycles occur due to how InCycleWith is assigned in + // the find phase implemented by `walk_unvisited_node`. + // + // We now want to compress the path, that is assign the state of the + // link D-E to all other links. + // + // We can then walk backwards, starting from `previous_node`, and assign + // each node in the list with the updated state. The loop terminates + // when we reach the self-cycle. + + // Move backwards until we found the node where we started. We + // will know when we hit the state where previous_node == node. + loop { + // Back at the beginning, we can return. + if previous_node == node { + return node_state; + } + // Update to previous node in the link. + match self.node_states[previous_node] { + NodeState::InCycleWith { parent: previous } => { + node = previous_node; + previous_node = previous; + } + // Only InCycleWith nodes were added to the reverse linked list. + other => panic!("Invalid previous link while compressing cycle: {:?}", other), + } + + debug!("find_state: parent_state = {:?}", node_state); + + // Update the node state from the parent state. The assigned + // state is actually a loop invariant but it will only be + // evaluated if there is at least one backlink to follow. + // Fully trusting llvm here to find this loop optimization. + match node_state { + // Path compression, make current node point to the same root. + NodeState::InCycle { .. } => { + self.node_states[node] = node_state; + } + // Still visiting nodes, compress to cycle to the node + // at that depth. + NodeState::BeingVisited { depth } => { + self.node_states[node] = + NodeState::InCycleWith { parent: self.node_stack[depth] }; + } + // These are never allowed as parent nodes. InCycleWith + // should have been followed to a real parent and + // NotVisited can not be part of a cycle since it should + // have instead gotten explored. + NodeState::NotVisited | NodeState::InCycleWith { .. } => { + panic!("invalid parent state: {:?}", node_state) + } + } + } + } + + /// Walks a node that has never been visited before. + /// + /// Call this method when `inspect_node` has returned `None`. Having the + /// caller decide avoids mutual recursion between the two methods and allows + /// us to maintain an allocated stack for nodes on the path between calls. + #[instrument(skip(self, initial), level = "debug")] + fn walk_unvisited_node(&mut self, initial: G::Node) -> WalkReturn { + struct VisitingNodeFrame { + node: G::Node, + iter: Option, + depth: usize, + min_depth: usize, + successors_len: usize, + min_cycle_root: G::Node, + successor_node: G::Node, + } + + // Move the stack to a local variable. We want to utilize the existing allocation and + // mutably borrow it without borrowing self at the same time. + let mut successors_stack = core::mem::take(&mut self.successors_stack); + debug_assert_eq!(successors_stack.len(), 0); + + let mut stack: Vec> = vec![VisitingNodeFrame { + node: initial, + depth: 0, + min_depth: 0, + iter: None, + successors_len: 0, + min_cycle_root: initial, + successor_node: initial, + }]; + + let mut return_value = None; + + 'recurse: while let Some(frame) = stack.last_mut() { + let VisitingNodeFrame { + node, + depth, + iter, + successors_len, + min_depth, + min_cycle_root, + successor_node, + } = frame; + + let node = *node; + let depth = *depth; + + let successors = match iter { + Some(iter) => iter, + None => { + // This None marks that we still have the initialize this node's frame. + debug!(?depth, ?node); + + debug_assert!(matches!(self.node_states[node], NodeState::NotVisited)); + + // Push `node` onto the stack. + self.node_states[node] = NodeState::BeingVisited { depth }; + self.node_stack.push(node); + + // Walk each successor of the node, looking to see if any of + // them can reach a node that is presently on the stack. If + // so, that means they can also reach us. + *successors_len = successors_stack.len(); + // Set and return a reference, this is currently empty. + iter.get_or_insert(self.graph.successors(node)) + } + }; + + // Now that iter is initialized, this is a constant for this frame. + let successors_len = *successors_len; + + // Construct iterators for the nodes and walk results. There are two cases: + // * The walk of a successor node returned. + // * The remaining successor nodes. + let returned_walk = + return_value.take().into_iter().map(|walk| (*successor_node, Some(walk))); + + let successor_walk = successors.by_ref().map(|successor_node| { + debug!(?node, ?successor_node); + (successor_node, self.inspect_node(successor_node)) + }); + + for (successor_node, walk) in returned_walk.chain(successor_walk) { + match walk { + Some(WalkReturn::Cycle { min_depth: successor_min_depth }) => { + // Track the minimum depth we can reach. + assert!(successor_min_depth <= depth); + if successor_min_depth < *min_depth { + debug!(?node, ?successor_min_depth); + *min_depth = successor_min_depth; + *min_cycle_root = successor_node; + } + } + + Some(WalkReturn::Complete { scc_index: successor_scc_index }) => { + // Push the completed SCC indices onto + // the `successors_stack` for later. + debug!(?node, ?successor_scc_index); + successors_stack.push(successor_scc_index); + } + + None => { + let depth = depth + 1; + debug!(?depth, ?successor_node); + // Remember which node the return value will come from. + frame.successor_node = successor_node; + // Start a new stack frame the step into it. + stack.push(VisitingNodeFrame { + node: successor_node, + depth, + iter: None, + successors_len: 0, + min_depth: depth, + min_cycle_root: successor_node, + successor_node, + }); + continue 'recurse; + } + } + } + + // Completed walk, remove `node` from the stack. + let r = self.node_stack.pop(); + debug_assert_eq!(r, Some(node)); + + // Remove the frame, it's done. + let frame = stack.pop().unwrap(); + + // If `min_depth == depth`, then we are the root of the + // cycle: we can't reach anyone further down the stack. + + // Pass the 'return value' down the stack. + // We return one frame at a time so there can't be another return value. + debug_assert!(return_value.is_none()); + return_value = Some(if frame.min_depth == depth { + // Note that successor stack may have duplicates, so we + // want to remove those: + let deduplicated_successors = { + let duplicate_set = &mut self.duplicate_set; + duplicate_set.clear(); + successors_stack + .drain(successors_len..) + .filter(move |&i| duplicate_set.insert(i)) + }; + let scc_index = self.scc_data.create_scc(deduplicated_successors); + self.node_states[node] = NodeState::InCycle { scc_index }; + WalkReturn::Complete { scc_index } + } else { + // We are not the head of the cycle. Return back to our + // caller. They will take ownership of the + // `self.successors` data that we pushed. + self.node_states[node] = NodeState::InCycleWith { parent: frame.min_cycle_root }; + WalkReturn::Cycle { min_depth: frame.min_depth } + }); + } + + // Keep the allocation we used for successors_stack. + self.successors_stack = successors_stack; + debug_assert_eq!(self.successors_stack.len(), 0); + + return_value.unwrap() + } +} diff --git a/compiler/rustc_data_structures/src/graph/scc/tests.rs b/compiler/rustc_data_structures/src/graph/scc/tests.rs new file mode 100644 index 000000000..9940fee60 --- /dev/null +++ b/compiler/rustc_data_structures/src/graph/scc/tests.rs @@ -0,0 +1,216 @@ +extern crate test; + +use super::*; +use crate::graph::tests::TestGraph; + +#[test] +fn diamond() { + let graph = TestGraph::new(0, &[(0, 1), (0, 2), (1, 3), (2, 3)]); + let sccs: Sccs<_, usize> = Sccs::new(&graph); + assert_eq!(sccs.num_sccs(), 4); + assert_eq!(sccs.num_sccs(), 4); +} + +#[test] +fn test_big_scc() { + // The order in which things will be visited is important to this + // test. + // + // We will visit: + // + // 0 -> 1 -> 2 -> 0 + // + // and at this point detect a cycle. 2 will return back to 1 which + // will visit 3. 3 will visit 2 before the cycle is complete, and + // hence it too will return a cycle. + + /* + +-> 0 + | | + | v + | 1 -> 3 + | | | + | v | + +-- 2 <--+ + */ + let graph = TestGraph::new(0, &[(0, 1), (1, 2), (1, 3), (2, 0), (3, 2)]); + let sccs: Sccs<_, usize> = Sccs::new(&graph); + assert_eq!(sccs.num_sccs(), 1); +} + +#[test] +fn test_three_sccs() { + /* + 0 + | + v + +-> 1 3 + | | | + | v | + +-- 2 <--+ + */ + let graph = TestGraph::new(0, &[(0, 1), (1, 2), (2, 1), (3, 2)]); + let sccs: Sccs<_, usize> = Sccs::new(&graph); + assert_eq!(sccs.num_sccs(), 3); + assert_eq!(sccs.scc(0), 1); + assert_eq!(sccs.scc(1), 0); + assert_eq!(sccs.scc(2), 0); + assert_eq!(sccs.scc(3), 2); + assert_eq!(sccs.successors(0), &[]); + assert_eq!(sccs.successors(1), &[0]); + assert_eq!(sccs.successors(2), &[0]); +} + +#[test] +fn test_find_state_2() { + // The order in which things will be visited is important to this + // test. It tests part of the `find_state` behavior. Here is the + // graph: + // + // + // /----+ + // 0 <--+ | + // | | | + // v | | + // +-> 1 -> 3 4 + // | | | + // | v | + // +-- 2 <----+ + + let graph = TestGraph::new(0, &[(0, 1), (0, 4), (1, 2), (1, 3), (2, 1), (3, 0), (4, 2)]); + + // For this graph, we will start in our DFS by visiting: + // + // 0 -> 1 -> 2 -> 1 + // + // and at this point detect a cycle. The state of 2 will thus be + // `InCycleWith { 1 }`. We will then visit the 1 -> 3 edge, which + // will attempt to visit 0 as well, thus going to the state + // `InCycleWith { 0 }`. Finally, node 1 will complete; the lowest + // depth of any successor was 3 which had depth 0, and thus it + // will be in the state `InCycleWith { 3 }`. + // + // When we finally traverse the `0 -> 4` edge and then visit node 2, + // the states of the nodes are: + // + // 0 BeingVisited { 0 } + // 1 InCycleWith { 3 } + // 2 InCycleWith { 1 } + // 3 InCycleWith { 0 } + // + // and hence 4 will traverse the links, finding an ultimate depth of 0. + // If will also collapse the states to the following: + // + // 0 BeingVisited { 0 } + // 1 InCycleWith { 3 } + // 2 InCycleWith { 1 } + // 3 InCycleWith { 0 } + + let sccs: Sccs<_, usize> = Sccs::new(&graph); + assert_eq!(sccs.num_sccs(), 1); + assert_eq!(sccs.scc(0), 0); + assert_eq!(sccs.scc(1), 0); + assert_eq!(sccs.scc(2), 0); + assert_eq!(sccs.scc(3), 0); + assert_eq!(sccs.scc(4), 0); + assert_eq!(sccs.successors(0), &[]); +} + +#[test] +fn test_find_state_3() { + /* + /----+ + 0 <--+ | + | | | + v | | + +-> 1 -> 3 4 5 + | | | | + | v | | + +-- 2 <----+-+ + */ + let graph = + TestGraph::new(0, &[(0, 1), (0, 4), (1, 2), (1, 3), (2, 1), (3, 0), (4, 2), (5, 2)]); + let sccs: Sccs<_, usize> = Sccs::new(&graph); + assert_eq!(sccs.num_sccs(), 2); + assert_eq!(sccs.scc(0), 0); + assert_eq!(sccs.scc(1), 0); + assert_eq!(sccs.scc(2), 0); + assert_eq!(sccs.scc(3), 0); + assert_eq!(sccs.scc(4), 0); + assert_eq!(sccs.scc(5), 1); + assert_eq!(sccs.successors(0), &[]); + assert_eq!(sccs.successors(1), &[0]); +} + +#[test] +fn test_deep_linear() { + /* + 0 + | + v + 1 + | + v + 2 + | + v + … + */ + #[cfg(not(miri))] + const NR_NODES: usize = 1 << 14; + #[cfg(miri)] + const NR_NODES: usize = 1 << 3; + let mut nodes = vec![]; + for i in 1..NR_NODES { + nodes.push((i - 1, i)); + } + let graph = TestGraph::new(0, nodes.as_slice()); + let sccs: Sccs<_, usize> = Sccs::new(&graph); + assert_eq!(sccs.num_sccs(), NR_NODES); + assert_eq!(sccs.scc(0), NR_NODES - 1); + assert_eq!(sccs.scc(NR_NODES - 1), 0); +} + +#[bench] +fn bench_sccc(b: &mut test::Bencher) { + // Like `test_three_sccs` but each state is replaced by a group of + // three or four to have some amount of test data. + /* + 0-3 + | + v + +->4-6 11-14 + | | | + | v | + +--7-10<-+ + */ + fn make_3_clique(slice: &mut [(usize, usize)], base: usize) { + slice[0] = (base + 0, base + 1); + slice[1] = (base + 1, base + 2); + slice[2] = (base + 2, base + 0); + } + // Not actually a clique but strongly connected. + fn make_4_clique(slice: &mut [(usize, usize)], base: usize) { + slice[0] = (base + 0, base + 1); + slice[1] = (base + 1, base + 2); + slice[2] = (base + 2, base + 3); + slice[3] = (base + 3, base + 0); + slice[4] = (base + 1, base + 3); + slice[5] = (base + 2, base + 1); + } + + let mut graph = [(0, 0); 6 + 3 + 6 + 3 + 4]; + make_4_clique(&mut graph[0..6], 0); + make_3_clique(&mut graph[6..9], 4); + make_4_clique(&mut graph[9..15], 7); + make_3_clique(&mut graph[15..18], 11); + graph[18] = (0, 4); + graph[19] = (5, 7); + graph[20] = (11, 10); + graph[21] = (7, 4); + let graph = TestGraph::new(0, &graph[..]); + b.iter(|| { + let sccs: Sccs<_, usize> = Sccs::new(&graph); + assert_eq!(sccs.num_sccs(), 3); + }); +} diff --git a/compiler/rustc_data_structures/src/graph/tests.rs b/compiler/rustc_data_structures/src/graph/tests.rs new file mode 100644 index 000000000..7f4ef906b --- /dev/null +++ b/compiler/rustc_data_structures/src/graph/tests.rs @@ -0,0 +1,73 @@ +use crate::fx::FxHashMap; +use std::cmp::max; +use std::iter; +use std::slice; + +use super::*; + +pub struct TestGraph { + num_nodes: usize, + start_node: usize, + successors: FxHashMap>, + predecessors: FxHashMap>, +} + +impl TestGraph { + pub fn new(start_node: usize, edges: &[(usize, usize)]) -> Self { + let mut graph = TestGraph { + num_nodes: start_node + 1, + start_node, + successors: FxHashMap::default(), + predecessors: FxHashMap::default(), + }; + for &(source, target) in edges { + graph.num_nodes = max(graph.num_nodes, source + 1); + graph.num_nodes = max(graph.num_nodes, target + 1); + graph.successors.entry(source).or_default().push(target); + graph.predecessors.entry(target).or_default().push(source); + } + for node in 0..graph.num_nodes { + graph.successors.entry(node).or_default(); + graph.predecessors.entry(node).or_default(); + } + graph + } +} + +impl DirectedGraph for TestGraph { + type Node = usize; +} + +impl WithStartNode for TestGraph { + fn start_node(&self) -> usize { + self.start_node + } +} + +impl WithNumNodes for TestGraph { + fn num_nodes(&self) -> usize { + self.num_nodes + } +} + +impl WithPredecessors for TestGraph { + fn predecessors(&self, node: usize) -> >::Iter { + self.predecessors[&node].iter().cloned() + } +} + +impl WithSuccessors for TestGraph { + fn successors(&self, node: usize) -> >::Iter { + self.successors[&node].iter().cloned() + } +} + +impl<'graph> GraphPredecessors<'graph> for TestGraph { + type Item = usize; + type Iter = iter::Cloned>; +} + +impl<'graph> GraphSuccessors<'graph> for TestGraph { + type Item = usize; + type Iter = iter::Cloned>; +} diff --git a/compiler/rustc_data_structures/src/graph/vec_graph/mod.rs b/compiler/rustc_data_structures/src/graph/vec_graph/mod.rs new file mode 100644 index 000000000..3d91bcade --- /dev/null +++ b/compiler/rustc_data_structures/src/graph/vec_graph/mod.rs @@ -0,0 +1,109 @@ +use std::cmp::Ord; + +use crate::graph::{DirectedGraph, GraphSuccessors, WithNumEdges, WithNumNodes, WithSuccessors}; +use rustc_index::vec::{Idx, IndexVec}; + +#[cfg(test)] +mod tests; + +pub struct VecGraph { + /// Maps from a given node to an index where the set of successors + /// for that node starts. The index indexes into the `edges` + /// vector. To find the range for a given node, we look up the + /// start for that node and then the start for the next node + /// (i.e., with an index 1 higher) and get the range between the + /// two. This vector always has an extra entry so that this works + /// even for the max element. + node_starts: IndexVec, + + edge_targets: Vec, +} + +impl VecGraph { + pub fn new(num_nodes: usize, mut edge_pairs: Vec<(N, N)>) -> Self { + // Sort the edges by the source -- this is important. + edge_pairs.sort(); + + let num_edges = edge_pairs.len(); + + // Store the *target* of each edge into `edge_targets`. + let edge_targets: Vec = edge_pairs.iter().map(|&(_, target)| target).collect(); + + // Create the *edge starts* array. We are iterating over over + // the (sorted) edge pairs. We maintain the invariant that the + // length of the `node_starts` array is enough to store the + // current source node -- so when we see that the source node + // for an edge is greater than the current length, we grow the + // edge-starts array by just enough. + let mut node_starts = IndexVec::with_capacity(num_edges); + for (index, &(source, _)) in edge_pairs.iter().enumerate() { + // If we have a list like `[(0, x), (2, y)]`: + // + // - Start out with `node_starts` of `[]` + // - Iterate to `(0, x)` at index 0: + // - Push one entry because `node_starts.len()` (0) is <= the source (0) + // - Leaving us with `node_starts` of `[0]` + // - Iterate to `(2, y)` at index 1: + // - Push one entry because `node_starts.len()` (1) is <= the source (2) + // - Push one entry because `node_starts.len()` (2) is <= the source (2) + // - Leaving us with `node_starts` of `[0, 1, 1]` + // - Loop terminates + while node_starts.len() <= source.index() { + node_starts.push(index); + } + } + + // Pad out the `node_starts` array so that it has `num_nodes + + // 1` entries. Continuing our example above, if `num_nodes` is + // be `3`, we would push one more index: `[0, 1, 1, 2]`. + // + // Interpretation of that vector: + // + // [0, 1, 1, 2] + // ---- range for N=2 + // ---- range for N=1 + // ---- range for N=0 + while node_starts.len() <= num_nodes { + node_starts.push(edge_targets.len()); + } + + assert_eq!(node_starts.len(), num_nodes + 1); + + Self { node_starts, edge_targets } + } + + /// Gets the successors for `source` as a slice. + pub fn successors(&self, source: N) -> &[N] { + let start_index = self.node_starts[source]; + let end_index = self.node_starts[source.plus(1)]; + &self.edge_targets[start_index..end_index] + } +} + +impl DirectedGraph for VecGraph { + type Node = N; +} + +impl WithNumNodes for VecGraph { + fn num_nodes(&self) -> usize { + self.node_starts.len() - 1 + } +} + +impl WithNumEdges for VecGraph { + fn num_edges(&self) -> usize { + self.edge_targets.len() + } +} + +impl<'graph, N: Idx> GraphSuccessors<'graph> for VecGraph { + type Item = N; + + type Iter = std::iter::Cloned>; +} + +impl WithSuccessors for VecGraph { + fn successors(&self, node: N) -> >::Iter { + self.successors(node).iter().cloned() + } +} diff --git a/compiler/rustc_data_structures/src/graph/vec_graph/tests.rs b/compiler/rustc_data_structures/src/graph/vec_graph/tests.rs new file mode 100644 index 000000000..c8f979267 --- /dev/null +++ b/compiler/rustc_data_structures/src/graph/vec_graph/tests.rs @@ -0,0 +1,42 @@ +use super::*; + +fn create_graph() -> VecGraph { + // Create a simple graph + // + // 5 + // | + // V + // 0 --> 1 --> 2 + // | + // v + // 3 --> 4 + // + // 6 + + VecGraph::new(7, vec![(0, 1), (1, 2), (1, 3), (3, 4), (5, 1)]) +} + +#[test] +fn num_nodes() { + let graph = create_graph(); + assert_eq!(graph.num_nodes(), 7); +} + +#[test] +fn successors() { + let graph = create_graph(); + assert_eq!(graph.successors(0), &[1]); + assert_eq!(graph.successors(1), &[2, 3]); + assert_eq!(graph.successors(2), &[]); + assert_eq!(graph.successors(3), &[4]); + assert_eq!(graph.successors(4), &[]); + assert_eq!(graph.successors(5), &[1]); + assert_eq!(graph.successors(6), &[]); +} + +#[test] +fn dfs() { + let graph = create_graph(); + let dfs: Vec<_> = graph.depth_first_search(0).collect(); + assert_eq!(dfs, vec![0, 1, 3, 4, 2]); +} diff --git a/compiler/rustc_data_structures/src/intern.rs b/compiler/rustc_data_structures/src/intern.rs new file mode 100644 index 000000000..009b5d534 --- /dev/null +++ b/compiler/rustc_data_structures/src/intern.rs @@ -0,0 +1,196 @@ +use crate::stable_hasher::{HashStable, StableHasher}; +use std::cmp::Ordering; +use std::hash::{Hash, Hasher}; +use std::ops::Deref; +use std::ptr; + +use crate::fingerprint::Fingerprint; + +mod private { + #[derive(Clone, Copy, Debug)] + pub struct PrivateZst; +} + +/// A reference to a value that is interned, and is known to be unique. +/// +/// Note that it is possible to have a `T` and a `Interned` that are (or +/// refer to) equal but different values. But if you have two different +/// `Interned`s, they both refer to the same value, at a single location in +/// memory. This means that equality and hashing can be done on the value's +/// address rather than the value's contents, which can improve performance. +/// +/// The `PrivateZst` field means you can pattern match with `Interned(v, _)` +/// but you can only construct a `Interned` with `new_unchecked`, and not +/// directly. +#[derive(Debug)] +#[rustc_pass_by_value] +pub struct Interned<'a, T>(pub &'a T, pub private::PrivateZst); + +impl<'a, T> Interned<'a, T> { + /// Create a new `Interned` value. The value referred to *must* be interned + /// and thus be unique, and it *must* remain unique in the future. This + /// function has `_unchecked` in the name but is not `unsafe`, because if + /// the uniqueness condition is violated condition it will cause incorrect + /// behaviour but will not affect memory safety. + #[inline] + pub const fn new_unchecked(t: &'a T) -> Self { + Interned(t, private::PrivateZst) + } +} + +impl<'a, T> Clone for Interned<'a, T> { + fn clone(&self) -> Self { + *self + } +} + +impl<'a, T> Copy for Interned<'a, T> {} + +impl<'a, T> Deref for Interned<'a, T> { + type Target = T; + + #[inline] + fn deref(&self) -> &T { + self.0 + } +} + +impl<'a, T> PartialEq for Interned<'a, T> { + #[inline] + fn eq(&self, other: &Self) -> bool { + // Pointer equality implies equality, due to the uniqueness constraint. + ptr::eq(self.0, other.0) + } +} + +impl<'a, T> Eq for Interned<'a, T> {} + +impl<'a, T: PartialOrd> PartialOrd for Interned<'a, T> { + fn partial_cmp(&self, other: &Interned<'a, T>) -> Option { + // Pointer equality implies equality, due to the uniqueness constraint, + // but the contents must be compared otherwise. + if ptr::eq(self.0, other.0) { + Some(Ordering::Equal) + } else { + let res = self.0.partial_cmp(&other.0); + debug_assert_ne!(res, Some(Ordering::Equal)); + res + } + } +} + +impl<'a, T: Ord> Ord for Interned<'a, T> { + fn cmp(&self, other: &Interned<'a, T>) -> Ordering { + // Pointer equality implies equality, due to the uniqueness constraint, + // but the contents must be compared otherwise. + if ptr::eq(self.0, other.0) { + Ordering::Equal + } else { + let res = self.0.cmp(&other.0); + debug_assert_ne!(res, Ordering::Equal); + res + } + } +} + +impl<'a, T> Hash for Interned<'a, T> { + #[inline] + fn hash(&self, s: &mut H) { + // Pointer hashing is sufficient, due to the uniqueness constraint. + ptr::hash(self.0, s) + } +} + +impl HashStable for Interned<'_, T> +where + T: HashStable, +{ + fn hash_stable(&self, hcx: &mut CTX, hasher: &mut StableHasher) { + self.0.hash_stable(hcx, hasher); + } +} + +/// A helper trait so that `Interned` things can cache stable hashes reproducibly. +pub trait InternedHashingContext { + fn with_def_path_and_no_spans(&mut self, f: impl FnOnce(&mut Self)); +} + +/// A helper type that you can wrap round your own type in order to automatically +/// cache the stable hash on creation and not recompute it whenever the stable hash +/// of the type is computed. +/// This is only done in incremental mode. You can also opt out of caching by using +/// StableHash::ZERO for the hash, in which case the hash gets computed each time. +/// This is useful if you have values that you intern but never (can?) use for stable +/// hashing. +#[derive(Copy, Clone)] +pub struct WithStableHash { + pub internee: T, + pub stable_hash: Fingerprint, +} + +impl PartialEq for WithStableHash { + #[inline] + fn eq(&self, other: &Self) -> bool { + self.internee.eq(&other.internee) + } +} + +impl Eq for WithStableHash {} + +impl PartialOrd for WithStableHash { + fn partial_cmp(&self, other: &WithStableHash) -> Option { + Some(self.internee.cmp(&other.internee)) + } +} + +impl Ord for WithStableHash { + fn cmp(&self, other: &WithStableHash) -> Ordering { + self.internee.cmp(&other.internee) + } +} + +impl Deref for WithStableHash { + type Target = T; + + #[inline] + fn deref(&self) -> &T { + &self.internee + } +} + +impl Hash for WithStableHash { + #[inline] + fn hash(&self, s: &mut H) { + self.internee.hash(s) + } +} + +impl, CTX: InternedHashingContext> HashStable for WithStableHash { + fn hash_stable(&self, hcx: &mut CTX, hasher: &mut StableHasher) { + if self.stable_hash == Fingerprint::ZERO || cfg!(debug_assertions) { + // No cached hash available. This can only mean that incremental is disabled. + // We don't cache stable hashes in non-incremental mode, because they are used + // so rarely that the performance actually suffers. + + // We need to build the hash as if we cached it and then hash that hash, as + // otherwise the hashes will differ between cached and non-cached mode. + let stable_hash: Fingerprint = { + let mut hasher = StableHasher::new(); + hcx.with_def_path_and_no_spans(|hcx| self.internee.hash_stable(hcx, &mut hasher)); + hasher.finish() + }; + if cfg!(debug_assertions) && self.stable_hash != Fingerprint::ZERO { + assert_eq!( + stable_hash, self.stable_hash, + "cached stable hash does not match freshly computed stable hash" + ); + } + stable_hash.hash_stable(hcx, hasher); + } else { + self.stable_hash.hash_stable(hcx, hasher); + } + } +} + +#[cfg(test)] +mod tests; diff --git a/compiler/rustc_data_structures/src/intern/tests.rs b/compiler/rustc_data_structures/src/intern/tests.rs new file mode 100644 index 000000000..09810a085 --- /dev/null +++ b/compiler/rustc_data_structures/src/intern/tests.rs @@ -0,0 +1,59 @@ +use super::*; +use std::cmp::Ordering; + +#[derive(Debug)] +struct S(u32); + +impl PartialEq for S { + fn eq(&self, _other: &Self) -> bool { + panic!("shouldn't be called"); + } +} + +impl Eq for S {} + +impl PartialOrd for S { + fn partial_cmp(&self, other: &S) -> Option { + // The `==` case should be handled by `Interned`. + assert_ne!(self.0, other.0); + self.0.partial_cmp(&other.0) + } +} + +impl Ord for S { + fn cmp(&self, other: &S) -> Ordering { + // The `==` case should be handled by `Interned`. + assert_ne!(self.0, other.0); + self.0.cmp(&other.0) + } +} + +#[test] +fn test_uniq() { + let s1 = S(1); + let s2 = S(2); + let s3 = S(3); + let s4 = S(1); // violates uniqueness + + let v1 = Interned::new_unchecked(&s1); + let v2 = Interned::new_unchecked(&s2); + let v3a = Interned::new_unchecked(&s3); + let v3b = Interned::new_unchecked(&s3); + let v4 = Interned::new_unchecked(&s4); // violates uniqueness + + assert_ne!(v1, v2); + assert_ne!(v2, v3a); + assert_eq!(v1, v1); + assert_eq!(v3a, v3b); + assert_ne!(v1, v4); // same content but different addresses: not equal + + assert_eq!(v1.cmp(&v2), Ordering::Less); + assert_eq!(v3a.cmp(&v2), Ordering::Greater); + assert_eq!(v1.cmp(&v1), Ordering::Equal); // only uses Interned::eq, not S::cmp + assert_eq!(v3a.cmp(&v3b), Ordering::Equal); // only uses Interned::eq, not S::cmp + + assert_eq!(v1.partial_cmp(&v2), Some(Ordering::Less)); + assert_eq!(v3a.partial_cmp(&v2), Some(Ordering::Greater)); + assert_eq!(v1.partial_cmp(&v1), Some(Ordering::Equal)); // only uses Interned::eq, not S::cmp + assert_eq!(v3a.partial_cmp(&v3b), Some(Ordering::Equal)); // only uses Interned::eq, not S::cmp +} diff --git a/compiler/rustc_data_structures/src/jobserver.rs b/compiler/rustc_data_structures/src/jobserver.rs new file mode 100644 index 000000000..09baa3095 --- /dev/null +++ b/compiler/rustc_data_structures/src/jobserver.rs @@ -0,0 +1,40 @@ +pub use jobserver_crate::Client; +use std::sync::LazyLock; + +// We can only call `from_env` once per process + +// Note that this is unsafe because it may misinterpret file descriptors +// on Unix as jobserver file descriptors. We hopefully execute this near +// the beginning of the process though to ensure we don't get false +// positives, or in other words we try to execute this before we open +// any file descriptors ourselves. +// +// Pick a "reasonable maximum" if we don't otherwise have +// a jobserver in our environment, capping out at 32 so we +// don't take everything down by hogging the process run queue. +// The fixed number is used to have deterministic compilation +// across machines. +// +// Also note that we stick this in a global because there could be +// multiple rustc instances in this process, and the jobserver is +// per-process. +static GLOBAL_CLIENT: LazyLock = LazyLock::new(|| unsafe { + Client::from_env().unwrap_or_else(|| { + let client = Client::new(32).expect("failed to create jobserver"); + // Acquire a token for the main thread which we can release later + client.acquire_raw().ok(); + client + }) +}); + +pub fn client() -> Client { + GLOBAL_CLIENT.clone() +} + +pub fn acquire_thread() { + GLOBAL_CLIENT.acquire_raw().ok(); +} + +pub fn release_thread() { + GLOBAL_CLIENT.release_raw().ok(); +} diff --git a/compiler/rustc_data_structures/src/lib.rs b/compiler/rustc_data_structures/src/lib.rs new file mode 100644 index 000000000..265f45b72 --- /dev/null +++ b/compiler/rustc_data_structures/src/lib.rs @@ -0,0 +1,113 @@ +//! Various data structures used by the Rust compiler. The intention +//! is that code in here should be not be *specific* to rustc, so that +//! it can be easily unit tested and so forth. +//! +//! # Note +//! +//! This API is completely unstable and subject to change. + +#![doc(html_root_url = "https://doc.rust-lang.org/nightly/nightly-rustc/")] +#![feature(array_windows)] +#![feature(associated_type_bounds)] +#![feature(auto_traits)] +#![feature(cell_leak)] +#![feature(control_flow_enum)] +#![feature(extend_one)] +#![feature(let_else)] +#![feature(hash_raw_entry)] +#![feature(hasher_prefixfree_extras)] +#![feature(maybe_uninit_uninit_array)] +#![feature(min_specialization)] +#![feature(never_type)] +#![feature(type_alias_impl_trait)] +#![feature(new_uninit)] +#![feature(once_cell)] +#![feature(rustc_attrs)] +#![feature(test)] +#![feature(thread_id_value)] +#![feature(vec_into_raw_parts)] +#![allow(rustc::default_hash_types)] +#![allow(rustc::potential_query_instability)] + +#[macro_use] +extern crate tracing; +#[macro_use] +extern crate cfg_if; +#[macro_use] +extern crate rustc_macros; + +pub use rustc_index::static_assert_size; + +#[inline(never)] +#[cold] +pub fn cold_path R, R>(f: F) -> R { + f() +} + +pub mod base_n; +pub mod binary_search_util; +pub mod captures; +pub mod flock; +pub mod functor; +pub mod fx; +pub mod graph; +pub mod intern; +pub mod jobserver; +pub mod macros; +pub mod map_in_place; +pub mod obligation_forest; +pub mod owning_ref; +pub mod sip128; +pub mod small_c_str; +pub mod small_str; +pub mod snapshot_map; +pub mod svh; +pub use ena::snapshot_vec; +pub mod memmap; +pub mod sorted_map; +#[macro_use] +pub mod stable_hasher; +mod atomic_ref; +pub mod fingerprint; +pub mod profiling; +pub mod sharded; +pub mod stack; +pub mod sync; +pub mod thin_vec; +pub mod tiny_list; +pub mod transitive_relation; +pub mod vec_linked_list; +pub mod vec_map; +pub mod work_queue; +pub use atomic_ref::AtomicRef; +pub mod frozen; +pub mod sso; +pub mod steal; +pub mod tagged_ptr; +pub mod temp_dir; +pub mod unhash; + +pub use ena::undo_log; +pub use ena::unify; + +pub struct OnDrop(pub F); + +impl OnDrop { + /// Forgets the function which prevents it from running. + /// Ensure that the function owns no memory, otherwise it will be leaked. + #[inline] + pub fn disable(self) { + std::mem::forget(self); + } +} + +impl Drop for OnDrop { + #[inline] + fn drop(&mut self) { + (self.0)(); + } +} + +// See comments in src/librustc_middle/lib.rs +#[doc(hidden)] +pub fn __noop_fix_for_27438() {} diff --git a/compiler/rustc_data_structures/src/macros.rs b/compiler/rustc_data_structures/src/macros.rs new file mode 100644 index 000000000..e05491f6f --- /dev/null +++ b/compiler/rustc_data_structures/src/macros.rs @@ -0,0 +1,37 @@ +#[macro_export] +macro_rules! enum_from_u32 { + ($(#[$attr:meta])* pub enum $name:ident { + $($(#[$var_attr:meta])* $variant:ident = $e:expr,)* + }) => { + $(#[$attr])* + pub enum $name { + $($(#[$var_attr])* $variant = $e),* + } + + impl $name { + pub fn from_u32(u: u32) -> Option<$name> { + $(if u == $name::$variant as u32 { + return Some($name::$variant) + })* + None + } + } + }; + ($(#[$attr:meta])* pub enum $name:ident { + $($(#[$var_attr:meta])* $variant:ident,)* + }) => { + $(#[$attr])* + pub enum $name { + $($(#[$var_attr])* $variant,)* + } + + impl $name { + pub fn from_u32(u: u32) -> Option<$name> { + $(if u == $name::$variant as u32 { + return Some($name::$variant) + })* + None + } + } + } +} diff --git a/compiler/rustc_data_structures/src/map_in_place.rs b/compiler/rustc_data_structures/src/map_in_place.rs new file mode 100644 index 000000000..874de03d3 --- /dev/null +++ b/compiler/rustc_data_structures/src/map_in_place.rs @@ -0,0 +1,108 @@ +use smallvec::{Array, SmallVec}; +use std::ptr; + +pub trait MapInPlace: Sized { + fn map_in_place(&mut self, mut f: F) + where + F: FnMut(T) -> T, + { + self.flat_map_in_place(|e| Some(f(e))) + } + + fn flat_map_in_place(&mut self, f: F) + where + F: FnMut(T) -> I, + I: IntoIterator; +} + +impl MapInPlace for Vec { + fn flat_map_in_place(&mut self, mut f: F) + where + F: FnMut(T) -> I, + I: IntoIterator, + { + let mut read_i = 0; + let mut write_i = 0; + unsafe { + let mut old_len = self.len(); + self.set_len(0); // make sure we just leak elements in case of panic + + while read_i < old_len { + // move the read_i'th item out of the vector and map it + // to an iterator + let e = ptr::read(self.as_ptr().add(read_i)); + let iter = f(e).into_iter(); + read_i += 1; + + for e in iter { + if write_i < read_i { + ptr::write(self.as_mut_ptr().add(write_i), e); + write_i += 1; + } else { + // If this is reached we ran out of space + // in the middle of the vector. + // However, the vector is in a valid state here, + // so we just do a somewhat inefficient insert. + self.set_len(old_len); + self.insert(write_i, e); + + old_len = self.len(); + self.set_len(0); + + read_i += 1; + write_i += 1; + } + } + } + + // write_i tracks the number of actually written new items. + self.set_len(write_i); + } + } +} + +impl> MapInPlace for SmallVec { + fn flat_map_in_place(&mut self, mut f: F) + where + F: FnMut(T) -> I, + I: IntoIterator, + { + let mut read_i = 0; + let mut write_i = 0; + unsafe { + let mut old_len = self.len(); + self.set_len(0); // make sure we just leak elements in case of panic + + while read_i < old_len { + // move the read_i'th item out of the vector and map it + // to an iterator + let e = ptr::read(self.as_ptr().add(read_i)); + let iter = f(e).into_iter(); + read_i += 1; + + for e in iter { + if write_i < read_i { + ptr::write(self.as_mut_ptr().add(write_i), e); + write_i += 1; + } else { + // If this is reached we ran out of space + // in the middle of the vector. + // However, the vector is in a valid state here, + // so we just do a somewhat inefficient insert. + self.set_len(old_len); + self.insert(write_i, e); + + old_len = self.len(); + self.set_len(0); + + read_i += 1; + write_i += 1; + } + } + } + + // write_i tracks the number of actually written new items. + self.set_len(write_i); + } + } +} diff --git a/compiler/rustc_data_structures/src/memmap.rs b/compiler/rustc_data_structures/src/memmap.rs new file mode 100644 index 000000000..917416df6 --- /dev/null +++ b/compiler/rustc_data_structures/src/memmap.rs @@ -0,0 +1,108 @@ +use std::fs::File; +use std::io; +use std::ops::{Deref, DerefMut}; + +use crate::owning_ref::StableAddress; + +/// A trivial wrapper for [`memmap2::Mmap`] that implements [`StableAddress`]. +#[cfg(not(target_arch = "wasm32"))] +pub struct Mmap(memmap2::Mmap); + +#[cfg(target_arch = "wasm32")] +pub struct Mmap(Vec); + +#[cfg(not(target_arch = "wasm32"))] +impl Mmap { + #[inline] + pub unsafe fn map(file: File) -> io::Result { + memmap2::Mmap::map(&file).map(Mmap) + } +} + +#[cfg(target_arch = "wasm32")] +impl Mmap { + #[inline] + pub unsafe fn map(mut file: File) -> io::Result { + use std::io::Read; + + let mut data = Vec::new(); + file.read_to_end(&mut data)?; + Ok(Mmap(data)) + } +} + +impl Deref for Mmap { + type Target = [u8]; + + #[inline] + fn deref(&self) -> &[u8] { + &*self.0 + } +} + +// SAFETY: On architectures other than WASM, mmap is used as backing storage. The address of this +// memory map is stable. On WASM, `Vec` is used as backing storage. The `Mmap` type doesn't +// export any function that can cause the `Vec` to be re-allocated. As such the address of the +// bytes inside this `Vec` is stable. +unsafe impl StableAddress for Mmap {} + +#[cfg(not(target_arch = "wasm32"))] +pub struct MmapMut(memmap2::MmapMut); + +#[cfg(target_arch = "wasm32")] +pub struct MmapMut(Vec); + +#[cfg(not(target_arch = "wasm32"))] +impl MmapMut { + #[inline] + pub fn map_anon(len: usize) -> io::Result { + let mmap = memmap2::MmapMut::map_anon(len)?; + Ok(MmapMut(mmap)) + } + + #[inline] + pub fn flush(&mut self) -> io::Result<()> { + self.0.flush() + } + + #[inline] + pub fn make_read_only(self) -> std::io::Result { + let mmap = self.0.make_read_only()?; + Ok(Mmap(mmap)) + } +} + +#[cfg(target_arch = "wasm32")] +impl MmapMut { + #[inline] + pub fn map_anon(len: usize) -> io::Result { + let data = Vec::with_capacity(len); + Ok(MmapMut(data)) + } + + #[inline] + pub fn flush(&mut self) -> io::Result<()> { + Ok(()) + } + + #[inline] + pub fn make_read_only(self) -> std::io::Result { + Ok(Mmap(self.0)) + } +} + +impl Deref for MmapMut { + type Target = [u8]; + + #[inline] + fn deref(&self) -> &[u8] { + &*self.0 + } +} + +impl DerefMut for MmapMut { + #[inline] + fn deref_mut(&mut self) -> &mut [u8] { + &mut *self.0 + } +} diff --git a/compiler/rustc_data_structures/src/obligation_forest/graphviz.rs b/compiler/rustc_data_structures/src/obligation_forest/graphviz.rs new file mode 100644 index 000000000..3a268e4b4 --- /dev/null +++ b/compiler/rustc_data_structures/src/obligation_forest/graphviz.rs @@ -0,0 +1,90 @@ +use crate::obligation_forest::{ForestObligation, ObligationForest}; +use rustc_graphviz as dot; +use std::env::var_os; +use std::fs::File; +use std::io::BufWriter; +use std::path::Path; +use std::sync::atomic::AtomicUsize; +use std::sync::atomic::Ordering; + +impl ObligationForest { + /// Creates a graphviz representation of the obligation forest. Given a directory this will + /// create files with name of the format `_.gv`. The counter is + /// global and is maintained internally. + /// + /// Calling this will do nothing unless the environment variable + /// `DUMP_OBLIGATION_FOREST_GRAPHVIZ` is defined. + /// + /// A few post-processing that you might want to do make the forest easier to visualize: + /// + /// * `sed 's,std::[a-z]*::,,g'` — Deletes the `std::::` prefix of paths. + /// * `sed 's,"Binder(TraitPredicate(<\(.*\)>)) (\([^)]*\))","\1 (\2)",'` — Transforms + /// `Binder(TraitPredicate())` into just ``. + #[allow(dead_code)] + pub fn dump_graphviz>(&self, dir: P, description: &str) { + static COUNTER: AtomicUsize = AtomicUsize::new(0); + + if var_os("DUMP_OBLIGATION_FOREST_GRAPHVIZ").is_none() { + return; + } + + let counter = COUNTER.fetch_add(1, Ordering::AcqRel); + + let file_path = dir.as_ref().join(format!("{:010}_{}.gv", counter, description)); + + let mut gv_file = BufWriter::new(File::create(file_path).unwrap()); + + dot::render(&self, &mut gv_file).unwrap(); + } +} + +impl<'a, O: ForestObligation + 'a> dot::Labeller<'a> for &'a ObligationForest { + type Node = usize; + type Edge = (usize, usize); + + fn graph_id(&self) -> dot::Id<'_> { + dot::Id::new("trait_obligation_forest").unwrap() + } + + fn node_id(&self, index: &Self::Node) -> dot::Id<'_> { + dot::Id::new(format!("obligation_{}", index)).unwrap() + } + + fn node_label(&self, index: &Self::Node) -> dot::LabelText<'_> { + let node = &self.nodes[*index]; + let label = format!("{:?} ({:?})", node.obligation.as_cache_key(), node.state.get()); + + dot::LabelText::LabelStr(label.into()) + } + + fn edge_label(&self, (_index_source, _index_target): &Self::Edge) -> dot::LabelText<'_> { + dot::LabelText::LabelStr("".into()) + } +} + +impl<'a, O: ForestObligation + 'a> dot::GraphWalk<'a> for &'a ObligationForest { + type Node = usize; + type Edge = (usize, usize); + + fn nodes(&self) -> dot::Nodes<'_, Self::Node> { + (0..self.nodes.len()).collect() + } + + fn edges(&self) -> dot::Edges<'_, Self::Edge> { + (0..self.nodes.len()) + .flat_map(|i| { + let node = &self.nodes[i]; + + node.dependents.iter().map(move |&d| (d, i)) + }) + .collect() + } + + fn source(&self, (s, _): &Self::Edge) -> Self::Node { + *s + } + + fn target(&self, (_, t): &Self::Edge) -> Self::Node { + *t + } +} diff --git a/compiler/rustc_data_structures/src/obligation_forest/mod.rs b/compiler/rustc_data_structures/src/obligation_forest/mod.rs new file mode 100644 index 000000000..07a96dd7d --- /dev/null +++ b/compiler/rustc_data_structures/src/obligation_forest/mod.rs @@ -0,0 +1,698 @@ +//! The `ObligationForest` is a utility data structure used in trait +//! matching to track the set of outstanding obligations (those not yet +//! resolved to success or error). It also tracks the "backtrace" of each +//! pending obligation (why we are trying to figure this out in the first +//! place). +//! +//! ### External view +//! +//! `ObligationForest` supports two main public operations (there are a +//! few others not discussed here): +//! +//! 1. Add a new root obligations (`register_obligation`). +//! 2. Process the pending obligations (`process_obligations`). +//! +//! When a new obligation `N` is added, it becomes the root of an +//! obligation tree. This tree can also carry some per-tree state `T`, +//! which is given at the same time. This tree is a singleton to start, so +//! `N` is both the root and the only leaf. Each time the +//! `process_obligations` method is called, it will invoke its callback +//! with every pending obligation (so that will include `N`, the first +//! time). The callback also receives a (mutable) reference to the +//! per-tree state `T`. The callback should process the obligation `O` +//! that it is given and return a `ProcessResult`: +//! +//! - `Unchanged` -> ambiguous result. Obligation was neither a success +//! nor a failure. It is assumed that further attempts to process the +//! obligation will yield the same result unless something in the +//! surrounding environment changes. +//! - `Changed(C)` - the obligation was *shallowly successful*. The +//! vector `C` is a list of subobligations. The meaning of this is that +//! `O` was successful on the assumption that all the obligations in `C` +//! are also successful. Therefore, `O` is only considered a "true" +//! success if `C` is empty. Otherwise, `O` is put into a suspended +//! state and the obligations in `C` become the new pending +//! obligations. They will be processed the next time you call +//! `process_obligations`. +//! - `Error(E)` -> obligation failed with error `E`. We will collect this +//! error and return it from `process_obligations`, along with the +//! "backtrace" of obligations (that is, the list of obligations up to +//! and including the root of the failed obligation). No further +//! obligations from that same tree will be processed, since the tree is +//! now considered to be in error. +//! +//! When the call to `process_obligations` completes, you get back an `Outcome`, +//! which includes two bits of information: +//! +//! - `completed`: a list of obligations where processing was fully +//! completed without error (meaning that all transitive subobligations +//! have also been completed). So, for example, if the callback from +//! `process_obligations` returns `Changed(C)` for some obligation `O`, +//! then `O` will be considered completed right away if `C` is the +//! empty vector. Otherwise it will only be considered completed once +//! all the obligations in `C` have been found completed. +//! - `errors`: a list of errors that occurred and associated backtraces +//! at the time of error, which can be used to give context to the user. +//! +//! Upon completion, none of the existing obligations were *shallowly +//! successful* (that is, no callback returned `Changed(_)`). This implies that +//! all obligations were either errors or returned an ambiguous result. +//! +//! ### Implementation details +//! +//! For the most part, comments specific to the implementation are in the +//! code. This file only contains a very high-level overview. Basically, +//! the forest is stored in a vector. Each element of the vector is a node +//! in some tree. Each node in the vector has the index of its dependents, +//! including the first dependent which is known as the parent. It also +//! has a current state, described by `NodeState`. After each processing +//! step, we compress the vector to remove completed and error nodes, which +//! aren't needed anymore. + +use crate::fx::{FxHashMap, FxHashSet}; + +use std::cell::Cell; +use std::collections::hash_map::Entry; +use std::fmt::Debug; +use std::hash; +use std::marker::PhantomData; + +mod graphviz; + +#[cfg(test)] +mod tests; + +pub trait ForestObligation: Clone + Debug { + type CacheKey: Clone + hash::Hash + Eq + Debug; + + /// Converts this `ForestObligation` suitable for use as a cache key. + /// If two distinct `ForestObligations`s return the same cache key, + /// then it must be sound to use the result of processing one obligation + /// (e.g. success for error) for the other obligation + fn as_cache_key(&self) -> Self::CacheKey; +} + +pub trait ObligationProcessor { + type Obligation: ForestObligation; + type Error: Debug; + + fn needs_process_obligation(&self, obligation: &Self::Obligation) -> bool; + + fn process_obligation( + &mut self, + obligation: &mut Self::Obligation, + ) -> ProcessResult; + + /// As we do the cycle check, we invoke this callback when we + /// encounter an actual cycle. `cycle` is an iterator that starts + /// at the start of the cycle in the stack and walks **toward the + /// top**. + /// + /// In other words, if we had O1 which required O2 which required + /// O3 which required O1, we would give an iterator yielding O1, + /// O2, O3 (O1 is not yielded twice). + fn process_backedge<'c, I>(&mut self, cycle: I, _marker: PhantomData<&'c Self::Obligation>) + where + I: Clone + Iterator; +} + +/// The result type used by `process_obligation`. +#[derive(Debug)] +pub enum ProcessResult { + Unchanged, + Changed(Vec), + Error(E), +} + +#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)] +struct ObligationTreeId(usize); + +type ObligationTreeIdGenerator = + std::iter::Map, fn(usize) -> ObligationTreeId>; + +pub struct ObligationForest { + /// The list of obligations. In between calls to [Self::process_obligations], + /// this list only contains nodes in the `Pending` or `Waiting` state. + /// + /// `usize` indices are used here and throughout this module, rather than + /// [`rustc_index::newtype_index!`] indices, because this code is hot enough + /// that the `u32`-to-`usize` conversions that would be required are + /// significant, and space considerations are not important. + nodes: Vec>, + + /// A cache of predicates that have been successfully completed. + done_cache: FxHashSet, + + /// A cache of the nodes in `nodes`, indexed by predicate. Unfortunately, + /// its contents are not guaranteed to match those of `nodes`. See the + /// comments in `Self::process_obligation` for details. + active_cache: FxHashMap, + + /// A vector reused in [Self::compress()] and [Self::find_cycles_from_node()], + /// to avoid allocating new vectors. + reused_node_vec: Vec, + + obligation_tree_id_generator: ObligationTreeIdGenerator, + + /// Per tree error cache. This is used to deduplicate errors, + /// which is necessary to avoid trait resolution overflow in + /// some cases. + /// + /// See [this][details] for details. + /// + /// [details]: https://github.com/rust-lang/rust/pull/53255#issuecomment-421184780 + error_cache: FxHashMap>, +} + +#[derive(Debug)] +struct Node { + obligation: O, + state: Cell, + + /// Obligations that depend on this obligation for their completion. They + /// must all be in a non-pending state. + dependents: Vec, + + /// If true, `dependents[0]` points to a "parent" node, which requires + /// special treatment upon error but is otherwise treated the same. + /// (It would be more idiomatic to store the parent node in a separate + /// `Option` field, but that slows down the common case of + /// iterating over the parent and other descendants together.) + has_parent: bool, + + /// Identifier of the obligation tree to which this node belongs. + obligation_tree_id: ObligationTreeId, +} + +impl Node { + fn new(parent: Option, obligation: O, obligation_tree_id: ObligationTreeId) -> Node { + Node { + obligation, + state: Cell::new(NodeState::Pending), + dependents: if let Some(parent_index) = parent { vec![parent_index] } else { vec![] }, + has_parent: parent.is_some(), + obligation_tree_id, + } + } +} + +/// The state of one node in some tree within the forest. This represents the +/// current state of processing for the obligation (of type `O`) associated +/// with this node. +/// +/// The non-`Error` state transitions are as follows. +/// ```text +/// (Pre-creation) +/// | +/// | register_obligation_at() (called by process_obligations() and +/// v from outside the crate) +/// Pending +/// | +/// | process_obligations() +/// v +/// Success +/// | ^ +/// | | mark_successes() +/// | v +/// | Waiting +/// | +/// | process_cycles() +/// v +/// Done +/// | +/// | compress() +/// v +/// (Removed) +/// ``` +/// The `Error` state can be introduced in several places, via `error_at()`. +/// +/// Outside of `ObligationForest` methods, nodes should be either `Pending` or +/// `Waiting`. +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +enum NodeState { + /// This obligation has not yet been selected successfully. Cannot have + /// subobligations. + Pending, + + /// This obligation was selected successfully, but may or may not have + /// subobligations. + Success, + + /// This obligation was selected successfully, but it has a pending + /// subobligation. + Waiting, + + /// This obligation, along with its subobligations, are complete, and will + /// be removed in the next collection. + Done, + + /// This obligation was resolved to an error. It will be removed by the + /// next compression step. + Error, +} + +/// This trait allows us to have two different Outcome types: +/// - the normal one that does as little as possible +/// - one for tests that does some additional work and checking +pub trait OutcomeTrait { + type Error; + type Obligation; + + fn new() -> Self; + fn record_completed(&mut self, outcome: &Self::Obligation); + fn record_error(&mut self, error: Self::Error); +} + +#[derive(Debug)] +pub struct Outcome { + /// Backtrace of obligations that were found to be in error. + pub errors: Vec>, +} + +impl OutcomeTrait for Outcome { + type Error = Error; + type Obligation = O; + + fn new() -> Self { + Self { errors: vec![] } + } + + fn record_completed(&mut self, _outcome: &Self::Obligation) { + // do nothing + } + + fn record_error(&mut self, error: Self::Error) { + self.errors.push(error) + } +} + +#[derive(Debug, PartialEq, Eq)] +pub struct Error { + pub error: E, + pub backtrace: Vec, +} + +impl ObligationForest { + pub fn new() -> ObligationForest { + ObligationForest { + nodes: vec![], + done_cache: Default::default(), + active_cache: Default::default(), + reused_node_vec: vec![], + obligation_tree_id_generator: (0..).map(ObligationTreeId), + error_cache: Default::default(), + } + } + + /// Returns the total number of nodes in the forest that have not + /// yet been fully resolved. + pub fn len(&self) -> usize { + self.nodes.len() + } + + /// Registers an obligation. + pub fn register_obligation(&mut self, obligation: O) { + // Ignore errors here - there is no guarantee of success. + let _ = self.register_obligation_at(obligation, None); + } + + // Returns Err(()) if we already know this obligation failed. + fn register_obligation_at(&mut self, obligation: O, parent: Option) -> Result<(), ()> { + let cache_key = obligation.as_cache_key(); + if self.done_cache.contains(&cache_key) { + debug!("register_obligation_at: ignoring already done obligation: {:?}", obligation); + return Ok(()); + } + + match self.active_cache.entry(cache_key) { + Entry::Occupied(o) => { + let node = &mut self.nodes[*o.get()]; + if let Some(parent_index) = parent { + // If the node is already in `active_cache`, it has already + // had its chance to be marked with a parent. So if it's + // not already present, just dump `parent` into the + // dependents as a non-parent. + if !node.dependents.contains(&parent_index) { + node.dependents.push(parent_index); + } + } + if let NodeState::Error = node.state.get() { Err(()) } else { Ok(()) } + } + Entry::Vacant(v) => { + let obligation_tree_id = match parent { + Some(parent_index) => self.nodes[parent_index].obligation_tree_id, + None => self.obligation_tree_id_generator.next().unwrap(), + }; + + let already_failed = parent.is_some() + && self + .error_cache + .get(&obligation_tree_id) + .map_or(false, |errors| errors.contains(v.key())); + + if already_failed { + Err(()) + } else { + let new_index = self.nodes.len(); + v.insert(new_index); + self.nodes.push(Node::new(parent, obligation, obligation_tree_id)); + Ok(()) + } + } + } + } + + /// Converts all remaining obligations to the given error. + pub fn to_errors(&mut self, error: E) -> Vec> { + let errors = self + .nodes + .iter() + .enumerate() + .filter(|(_index, node)| node.state.get() == NodeState::Pending) + .map(|(index, _node)| Error { error: error.clone(), backtrace: self.error_at(index) }) + .collect(); + + self.compress(|_| assert!(false)); + errors + } + + /// Returns the set of obligations that are in a pending state. + pub fn map_pending_obligations(&self, f: F) -> Vec

+ where + F: Fn(&O) -> P, + { + self.nodes + .iter() + .filter(|node| node.state.get() == NodeState::Pending) + .map(|node| f(&node.obligation)) + .collect() + } + + fn insert_into_error_cache(&mut self, index: usize) { + let node = &self.nodes[index]; + self.error_cache + .entry(node.obligation_tree_id) + .or_default() + .insert(node.obligation.as_cache_key()); + } + + /// Performs a fixpoint computation over the obligation list. + #[inline(never)] + pub fn process_obligations(&mut self, processor: &mut P) -> OUT + where + P: ObligationProcessor, + OUT: OutcomeTrait>, + { + let mut outcome = OUT::new(); + + // Fixpoint computation: we repeat until the inner loop stalls. + loop { + let mut has_changed = false; + + // Note that the loop body can append new nodes, and those new nodes + // will then be processed by subsequent iterations of the loop. + // + // We can't use an iterator for the loop because `self.nodes` is + // appended to and the borrow checker would complain. We also can't use + // `for index in 0..self.nodes.len() { ... }` because the range would + // be computed with the initial length, and we would miss the appended + // nodes. Therefore we use a `while` loop. + let mut index = 0; + while let Some(node) = self.nodes.get_mut(index) { + if node.state.get() != NodeState::Pending + || !processor.needs_process_obligation(&node.obligation) + { + index += 1; + continue; + } + + // `processor.process_obligation` can modify the predicate within + // `node.obligation`, and that predicate is the key used for + // `self.active_cache`. This means that `self.active_cache` can get + // out of sync with `nodes`. It's not very common, but it does + // happen, and code in `compress` has to allow for it. + + match processor.process_obligation(&mut node.obligation) { + ProcessResult::Unchanged => { + // No change in state. + } + ProcessResult::Changed(children) => { + // We are not (yet) stalled. + has_changed = true; + node.state.set(NodeState::Success); + + for child in children { + let st = self.register_obligation_at(child, Some(index)); + if let Err(()) = st { + // Error already reported - propagate it + // to our node. + self.error_at(index); + } + } + } + ProcessResult::Error(err) => { + has_changed = true; + outcome.record_error(Error { error: err, backtrace: self.error_at(index) }); + } + } + index += 1; + } + + // If unchanged, then we saw no successful obligations, which means + // there is no point in further iteration. This is based on the + // assumption that when trait matching returns `Error` or + // `Unchanged`, those results do not affect environmental inference + // state. (Note that this will occur if we invoke + // `process_obligations` with no pending obligations.) + if !has_changed { + break; + } + + self.mark_successes(); + self.process_cycles(processor); + self.compress(|obl| outcome.record_completed(obl)); + } + + outcome + } + + /// Returns a vector of obligations for `p` and all of its + /// ancestors, putting them into the error state in the process. + fn error_at(&self, mut index: usize) -> Vec { + let mut error_stack: Vec = vec![]; + let mut trace = vec![]; + + loop { + let node = &self.nodes[index]; + node.state.set(NodeState::Error); + trace.push(node.obligation.clone()); + if node.has_parent { + // The first dependent is the parent, which is treated + // specially. + error_stack.extend(node.dependents.iter().skip(1)); + index = node.dependents[0]; + } else { + // No parent; treat all dependents non-specially. + error_stack.extend(node.dependents.iter()); + break; + } + } + + while let Some(index) = error_stack.pop() { + let node = &self.nodes[index]; + if node.state.get() != NodeState::Error { + node.state.set(NodeState::Error); + error_stack.extend(node.dependents.iter()); + } + } + + trace + } + + /// Mark all `Waiting` nodes as `Success`, except those that depend on a + /// pending node. + fn mark_successes(&self) { + // Convert all `Waiting` nodes to `Success`. + for node in &self.nodes { + if node.state.get() == NodeState::Waiting { + node.state.set(NodeState::Success); + } + } + + // Convert `Success` nodes that depend on a pending node back to + // `Waiting`. + for node in &self.nodes { + if node.state.get() == NodeState::Pending { + // This call site is hot. + self.inlined_mark_dependents_as_waiting(node); + } + } + } + + // This always-inlined function is for the hot call site. + #[inline(always)] + fn inlined_mark_dependents_as_waiting(&self, node: &Node) { + for &index in node.dependents.iter() { + let node = &self.nodes[index]; + let state = node.state.get(); + if state == NodeState::Success { + // This call site is cold. + self.uninlined_mark_dependents_as_waiting(node); + } else { + debug_assert!(state == NodeState::Waiting || state == NodeState::Error) + } + } + } + + // This never-inlined function is for the cold call site. + #[inline(never)] + fn uninlined_mark_dependents_as_waiting(&self, node: &Node) { + // Mark node Waiting in the cold uninlined code instead of the hot inlined + node.state.set(NodeState::Waiting); + self.inlined_mark_dependents_as_waiting(node) + } + + /// Report cycles between all `Success` nodes, and convert all `Success` + /// nodes to `Done`. This must be called after `mark_successes`. + fn process_cycles

(&mut self, processor: &mut P) + where + P: ObligationProcessor, + { + let mut stack = std::mem::take(&mut self.reused_node_vec); + for (index, node) in self.nodes.iter().enumerate() { + // For some benchmarks this state test is extremely hot. It's a win + // to handle the no-op cases immediately to avoid the cost of the + // function call. + if node.state.get() == NodeState::Success { + self.find_cycles_from_node(&mut stack, processor, index); + } + } + + debug_assert!(stack.is_empty()); + self.reused_node_vec = stack; + } + + fn find_cycles_from_node

(&self, stack: &mut Vec, processor: &mut P, index: usize) + where + P: ObligationProcessor, + { + let node = &self.nodes[index]; + if node.state.get() == NodeState::Success { + match stack.iter().rposition(|&n| n == index) { + None => { + stack.push(index); + for &dep_index in node.dependents.iter() { + self.find_cycles_from_node(stack, processor, dep_index); + } + stack.pop(); + node.state.set(NodeState::Done); + } + Some(rpos) => { + // Cycle detected. + processor.process_backedge( + stack[rpos..].iter().map(|&i| &self.nodes[i].obligation), + PhantomData, + ); + } + } + } + } + + /// Compresses the vector, removing all popped nodes. This adjusts the + /// indices and hence invalidates any outstanding indices. `process_cycles` + /// must be run beforehand to remove any cycles on `Success` nodes. + #[inline(never)] + fn compress(&mut self, mut outcome_cb: impl FnMut(&O)) { + let orig_nodes_len = self.nodes.len(); + let mut node_rewrites: Vec<_> = std::mem::take(&mut self.reused_node_vec); + debug_assert!(node_rewrites.is_empty()); + node_rewrites.extend(0..orig_nodes_len); + let mut dead_nodes = 0; + + // Move removable nodes to the end, preserving the order of the + // remaining nodes. + // + // LOOP INVARIANT: + // self.nodes[0..index - dead_nodes] are the first remaining nodes + // self.nodes[index - dead_nodes..index] are all dead + // self.nodes[index..] are unchanged + for index in 0..orig_nodes_len { + let node = &self.nodes[index]; + match node.state.get() { + NodeState::Pending | NodeState::Waiting => { + if dead_nodes > 0 { + self.nodes.swap(index, index - dead_nodes); + node_rewrites[index] -= dead_nodes; + } + } + NodeState::Done => { + // The removal lookup might fail because the contents of + // `self.active_cache` are not guaranteed to match those of + // `self.nodes`. See the comment in `process_obligation` + // for more details. + let cache_key = node.obligation.as_cache_key(); + self.active_cache.remove(&cache_key); + self.done_cache.insert(cache_key); + + // Extract the success stories. + outcome_cb(&node.obligation); + node_rewrites[index] = orig_nodes_len; + dead_nodes += 1; + } + NodeState::Error => { + // We *intentionally* remove the node from the cache at this point. Otherwise + // tests must come up with a different type on every type error they + // check against. + self.active_cache.remove(&node.obligation.as_cache_key()); + self.insert_into_error_cache(index); + node_rewrites[index] = orig_nodes_len; + dead_nodes += 1; + } + NodeState::Success => unreachable!(), + } + } + + if dead_nodes > 0 { + // Remove the dead nodes and rewrite indices. + self.nodes.truncate(orig_nodes_len - dead_nodes); + self.apply_rewrites(&node_rewrites); + } + + node_rewrites.truncate(0); + self.reused_node_vec = node_rewrites; + } + + #[inline(never)] + fn apply_rewrites(&mut self, node_rewrites: &[usize]) { + let orig_nodes_len = node_rewrites.len(); + + for node in &mut self.nodes { + let mut i = 0; + while let Some(dependent) = node.dependents.get_mut(i) { + let new_index = node_rewrites[*dependent]; + if new_index >= orig_nodes_len { + node.dependents.swap_remove(i); + if i == 0 && node.has_parent { + // We just removed the parent. + node.has_parent = false; + } + } else { + *dependent = new_index; + i += 1; + } + } + } + + // This updating of `self.active_cache` is necessary because the + // removal of nodes within `compress` can fail. See above. + self.active_cache.retain(|_predicate, index| { + let new_index = node_rewrites[*index]; + if new_index >= orig_nodes_len { + false + } else { + *index = new_index; + true + } + }); + } +} diff --git a/compiler/rustc_data_structures/src/obligation_forest/tests.rs b/compiler/rustc_data_structures/src/obligation_forest/tests.rs new file mode 100644 index 000000000..e2991aae1 --- /dev/null +++ b/compiler/rustc_data_structures/src/obligation_forest/tests.rs @@ -0,0 +1,479 @@ +use super::*; + +use std::fmt; +use std::marker::PhantomData; + +impl<'a> super::ForestObligation for &'a str { + type CacheKey = &'a str; + + fn as_cache_key(&self) -> Self::CacheKey { + self + } +} + +struct ClosureObligationProcessor { + process_obligation: OF, + _process_backedge: BF, + marker: PhantomData<(O, E)>, +} + +struct TestOutcome { + pub completed: Vec, + pub errors: Vec>, +} + +impl OutcomeTrait for TestOutcome +where + O: Clone, +{ + type Error = Error; + type Obligation = O; + + fn new() -> Self { + Self { errors: vec![], completed: vec![] } + } + + fn record_completed(&mut self, outcome: &Self::Obligation) { + self.completed.push(outcome.clone()) + } + + fn record_error(&mut self, error: Self::Error) { + self.errors.push(error) + } +} + +#[allow(non_snake_case)] +fn C(of: OF, bf: BF) -> ClosureObligationProcessor +where + OF: FnMut(&mut O) -> ProcessResult, + BF: FnMut(&[O]), +{ + ClosureObligationProcessor { + process_obligation: of, + _process_backedge: bf, + marker: PhantomData, + } +} + +impl ObligationProcessor for ClosureObligationProcessor +where + O: super::ForestObligation + fmt::Debug, + E: fmt::Debug, + OF: FnMut(&mut O) -> ProcessResult, + BF: FnMut(&[O]), +{ + type Obligation = O; + type Error = E; + + fn needs_process_obligation(&self, _obligation: &Self::Obligation) -> bool { + true + } + + fn process_obligation( + &mut self, + obligation: &mut Self::Obligation, + ) -> ProcessResult { + (self.process_obligation)(obligation) + } + + fn process_backedge<'c, I>(&mut self, _cycle: I, _marker: PhantomData<&'c Self::Obligation>) + where + I: Clone + Iterator, + { + } +} + +#[test] +fn push_pop() { + let mut forest = ObligationForest::new(); + forest.register_obligation("A"); + forest.register_obligation("B"); + forest.register_obligation("C"); + + // first round, B errors out, A has subtasks, and C completes, creating this: + // A |-> A.1 + // |-> A.2 + // |-> A.3 + let TestOutcome { completed: ok, errors: err, .. } = forest.process_obligations(&mut C( + |obligation| match *obligation { + "A" => ProcessResult::Changed(vec!["A.1", "A.2", "A.3"]), + "B" => ProcessResult::Error("B is for broken"), + "C" => ProcessResult::Changed(vec![]), + "A.1" | "A.2" | "A.3" => ProcessResult::Unchanged, + _ => unreachable!(), + }, + |_| {}, + )); + assert_eq!(ok, vec!["C"]); + assert_eq!(err, vec![Error { error: "B is for broken", backtrace: vec!["B"] }]); + + // second round: two delays, one success, creating an uneven set of subtasks: + // A |-> A.1 + // |-> A.2 + // |-> A.3 |-> A.3.i + // D |-> D.1 + // |-> D.2 + forest.register_obligation("D"); + let TestOutcome { completed: ok, errors: err, .. } = forest.process_obligations(&mut C( + |obligation| match *obligation { + "A.1" => ProcessResult::Unchanged, + "A.2" => ProcessResult::Unchanged, + "A.3" => ProcessResult::Changed(vec!["A.3.i"]), + "D" => ProcessResult::Changed(vec!["D.1", "D.2"]), + "A.3.i" | "D.1" | "D.2" => ProcessResult::Unchanged, + _ => unreachable!(), + }, + |_| {}, + )); + assert_eq!(ok, Vec::<&'static str>::new()); + assert_eq!(err, Vec::new()); + + // third round: ok in A.1 but trigger an error in A.2. Check that it + // propagates to A, but not D.1 or D.2. + // D |-> D.1 |-> D.1.i + // |-> D.2 |-> D.2.i + let TestOutcome { completed: ok, errors: err, .. } = forest.process_obligations(&mut C( + |obligation| match *obligation { + "A.1" => ProcessResult::Changed(vec![]), + "A.2" => ProcessResult::Error("A is for apple"), + "A.3.i" => ProcessResult::Changed(vec![]), + "D.1" => ProcessResult::Changed(vec!["D.1.i"]), + "D.2" => ProcessResult::Changed(vec!["D.2.i"]), + "D.1.i" | "D.2.i" => ProcessResult::Unchanged, + _ => unreachable!(), + }, + |_| {}, + )); + let mut ok = ok; + ok.sort(); + assert_eq!(ok, vec!["A.1", "A.3", "A.3.i"]); + assert_eq!(err, vec![Error { error: "A is for apple", backtrace: vec!["A.2", "A"] }]); + + // fourth round: error in D.1.i + let TestOutcome { completed: ok, errors: err, .. } = forest.process_obligations(&mut C( + |obligation| match *obligation { + "D.1.i" => ProcessResult::Error("D is for dumb"), + "D.2.i" => ProcessResult::Changed(vec![]), + _ => panic!("unexpected obligation {:?}", obligation), + }, + |_| {}, + )); + let mut ok = ok; + ok.sort(); + assert_eq!(ok, vec!["D.2", "D.2.i"]); + assert_eq!(err, vec![Error { error: "D is for dumb", backtrace: vec!["D.1.i", "D.1", "D"] }]); +} + +// Test that if a tree with grandchildren succeeds, everything is +// reported as expected: +// A +// A.1 +// A.2 +// A.2.i +// A.2.ii +// A.3 +#[test] +fn success_in_grandchildren() { + let mut forest = ObligationForest::new(); + forest.register_obligation("A"); + + let TestOutcome { completed: ok, errors: err, .. } = forest.process_obligations(&mut C( + |obligation| match *obligation { + "A" => ProcessResult::Changed(vec!["A.1", "A.2", "A.3"]), + "A.1" => ProcessResult::Changed(vec![]), + "A.2" => ProcessResult::Changed(vec!["A.2.i", "A.2.ii"]), + "A.3" => ProcessResult::Changed(vec![]), + "A.2.i" | "A.2.ii" => ProcessResult::Unchanged, + _ => unreachable!(), + }, + |_| {}, + )); + let mut ok = ok; + ok.sort(); + assert_eq!(ok, vec!["A.1", "A.3"]); + assert!(err.is_empty()); + + let TestOutcome { completed: ok, errors: err, .. } = forest.process_obligations(&mut C( + |obligation| match *obligation { + "A.2.i" => ProcessResult::Unchanged, + "A.2.ii" => ProcessResult::Changed(vec![]), + _ => unreachable!(), + }, + |_| {}, + )); + assert_eq!(ok, vec!["A.2.ii"]); + assert!(err.is_empty()); + + let TestOutcome { completed: ok, errors: err, .. } = forest.process_obligations(&mut C( + |obligation| match *obligation { + "A.2.i" => ProcessResult::Changed(vec!["A.2.i.a"]), + "A.2.i.a" => ProcessResult::Unchanged, + _ => unreachable!(), + }, + |_| {}, + )); + assert!(ok.is_empty()); + assert!(err.is_empty()); + + let TestOutcome { completed: ok, errors: err, .. } = forest.process_obligations(&mut C( + |obligation| match *obligation { + "A.2.i.a" => ProcessResult::Changed(vec![]), + _ => unreachable!(), + }, + |_| {}, + )); + let mut ok = ok; + ok.sort(); + assert_eq!(ok, vec!["A", "A.2", "A.2.i", "A.2.i.a"]); + assert!(err.is_empty()); + + let TestOutcome { completed: ok, errors: err, .. } = + forest.process_obligations(&mut C(|_| unreachable!(), |_| {})); + + assert!(ok.is_empty()); + assert!(err.is_empty()); +} + +#[test] +fn to_errors_no_throw() { + // check that converting multiple children with common parent (A) + // yields to correct errors (and does not panic, in particular). + let mut forest = ObligationForest::new(); + forest.register_obligation("A"); + let TestOutcome { completed: ok, errors: err, .. } = forest.process_obligations(&mut C( + |obligation| match *obligation { + "A" => ProcessResult::Changed(vec!["A.1", "A.2", "A.3"]), + "A.1" | "A.2" | "A.3" => ProcessResult::Unchanged, + _ => unreachable!(), + }, + |_| {}, + )); + assert_eq!(ok.len(), 0); + assert_eq!(err.len(), 0); + let errors = forest.to_errors(()); + assert_eq!(errors[0].backtrace, vec!["A.1", "A"]); + assert_eq!(errors[1].backtrace, vec!["A.2", "A"]); + assert_eq!(errors[2].backtrace, vec!["A.3", "A"]); + assert_eq!(errors.len(), 3); +} + +#[test] +fn diamond() { + // check that diamond dependencies are handled correctly + let mut forest = ObligationForest::new(); + forest.register_obligation("A"); + let TestOutcome { completed: ok, errors: err, .. } = forest.process_obligations(&mut C( + |obligation| match *obligation { + "A" => ProcessResult::Changed(vec!["A.1", "A.2"]), + "A.1" | "A.2" => ProcessResult::Unchanged, + _ => unreachable!(), + }, + |_| {}, + )); + assert_eq!(ok.len(), 0); + assert_eq!(err.len(), 0); + + let TestOutcome { completed: ok, errors: err, .. } = forest.process_obligations(&mut C( + |obligation| match *obligation { + "A.1" => ProcessResult::Changed(vec!["D"]), + "A.2" => ProcessResult::Changed(vec!["D"]), + "D" => ProcessResult::Unchanged, + _ => unreachable!(), + }, + |_| {}, + )); + assert_eq!(ok.len(), 0); + assert_eq!(err.len(), 0); + + let mut d_count = 0; + let TestOutcome { completed: ok, errors: err, .. } = forest.process_obligations(&mut C( + |obligation| match *obligation { + "D" => { + d_count += 1; + ProcessResult::Changed(vec![]) + } + _ => unreachable!(), + }, + |_| {}, + )); + assert_eq!(d_count, 1); + let mut ok = ok; + ok.sort(); + assert_eq!(ok, vec!["A", "A.1", "A.2", "D"]); + assert_eq!(err.len(), 0); + + let errors = forest.to_errors(()); + assert_eq!(errors.len(), 0); + + forest.register_obligation("A'"); + let TestOutcome { completed: ok, errors: err, .. } = forest.process_obligations(&mut C( + |obligation| match *obligation { + "A'" => ProcessResult::Changed(vec!["A'.1", "A'.2"]), + "A'.1" | "A'.2" => ProcessResult::Unchanged, + _ => unreachable!(), + }, + |_| {}, + )); + assert_eq!(ok.len(), 0); + assert_eq!(err.len(), 0); + + let TestOutcome { completed: ok, errors: err, .. } = forest.process_obligations(&mut C( + |obligation| match *obligation { + "A'.1" => ProcessResult::Changed(vec!["D'", "A'"]), + "A'.2" => ProcessResult::Changed(vec!["D'"]), + "D'" | "A'" => ProcessResult::Unchanged, + _ => unreachable!(), + }, + |_| {}, + )); + assert_eq!(ok.len(), 0); + assert_eq!(err.len(), 0); + + let mut d_count = 0; + let TestOutcome { completed: ok, errors: err, .. } = forest.process_obligations(&mut C( + |obligation| match *obligation { + "D'" => { + d_count += 1; + ProcessResult::Error("operation failed") + } + _ => unreachable!(), + }, + |_| {}, + )); + assert_eq!(d_count, 1); + assert_eq!(ok.len(), 0); + assert_eq!( + err, + vec![super::Error { error: "operation failed", backtrace: vec!["D'", "A'.1", "A'"] }] + ); + + let errors = forest.to_errors(()); + assert_eq!(errors.len(), 0); +} + +#[test] +fn done_dependency() { + // check that the local cache works + let mut forest = ObligationForest::new(); + forest.register_obligation("A: Sized"); + forest.register_obligation("B: Sized"); + forest.register_obligation("C: Sized"); + + let TestOutcome { completed: ok, errors: err, .. } = forest.process_obligations(&mut C( + |obligation| match *obligation { + "A: Sized" | "B: Sized" | "C: Sized" => ProcessResult::Changed(vec![]), + _ => unreachable!(), + }, + |_| {}, + )); + let mut ok = ok; + ok.sort(); + assert_eq!(ok, vec!["A: Sized", "B: Sized", "C: Sized"]); + assert_eq!(err.len(), 0); + + forest.register_obligation("(A,B,C): Sized"); + let TestOutcome { completed: ok, errors: err, .. } = forest.process_obligations(&mut C( + |obligation| match *obligation { + "(A,B,C): Sized" => ProcessResult::Changed(vec!["A: Sized", "B: Sized", "C: Sized"]), + _ => unreachable!(), + }, + |_| {}, + )); + assert_eq!(ok, vec!["(A,B,C): Sized"]); + assert_eq!(err.len(), 0); +} + +#[test] +fn orphan() { + // check that orphaned nodes are handled correctly + let mut forest = ObligationForest::new(); + forest.register_obligation("A"); + forest.register_obligation("B"); + forest.register_obligation("C1"); + forest.register_obligation("C2"); + + let TestOutcome { completed: ok, errors: err, .. } = forest.process_obligations(&mut C( + |obligation| match *obligation { + "A" => ProcessResult::Changed(vec!["D", "E"]), + "B" => ProcessResult::Unchanged, + "C1" => ProcessResult::Changed(vec![]), + "C2" => ProcessResult::Changed(vec![]), + "D" | "E" => ProcessResult::Unchanged, + _ => unreachable!(), + }, + |_| {}, + )); + let mut ok = ok; + ok.sort(); + assert_eq!(ok, vec!["C1", "C2"]); + assert_eq!(err.len(), 0); + + let TestOutcome { completed: ok, errors: err, .. } = forest.process_obligations(&mut C( + |obligation| match *obligation { + "D" | "E" => ProcessResult::Unchanged, + "B" => ProcessResult::Changed(vec!["D"]), + _ => unreachable!(), + }, + |_| {}, + )); + assert_eq!(ok.len(), 0); + assert_eq!(err.len(), 0); + + let TestOutcome { completed: ok, errors: err, .. } = forest.process_obligations(&mut C( + |obligation| match *obligation { + "D" => ProcessResult::Unchanged, + "E" => ProcessResult::Error("E is for error"), + _ => unreachable!(), + }, + |_| {}, + )); + assert_eq!(ok.len(), 0); + assert_eq!(err, vec![super::Error { error: "E is for error", backtrace: vec!["E", "A"] }]); + + let TestOutcome { completed: ok, errors: err, .. } = forest.process_obligations(&mut C( + |obligation| match *obligation { + "D" => ProcessResult::Error("D is dead"), + _ => unreachable!(), + }, + |_| {}, + )); + assert_eq!(ok.len(), 0); + assert_eq!(err, vec![super::Error { error: "D is dead", backtrace: vec!["D"] }]); + + let errors = forest.to_errors(()); + assert_eq!(errors.len(), 0); +} + +#[test] +fn simultaneous_register_and_error() { + // check that registering a failed obligation works correctly + let mut forest = ObligationForest::new(); + forest.register_obligation("A"); + forest.register_obligation("B"); + + let TestOutcome { completed: ok, errors: err, .. } = forest.process_obligations(&mut C( + |obligation| match *obligation { + "A" => ProcessResult::Error("An error"), + "B" => ProcessResult::Changed(vec!["A"]), + _ => unreachable!(), + }, + |_| {}, + )); + assert_eq!(ok.len(), 0); + assert_eq!(err, vec![super::Error { error: "An error", backtrace: vec!["A"] }]); + + let mut forest = ObligationForest::new(); + forest.register_obligation("B"); + forest.register_obligation("A"); + + let TestOutcome { completed: ok, errors: err, .. } = forest.process_obligations(&mut C( + |obligation| match *obligation { + "A" => ProcessResult::Error("An error"), + "B" => ProcessResult::Changed(vec!["A"]), + _ => unreachable!(), + }, + |_| {}, + )); + assert_eq!(ok.len(), 0); + assert_eq!(err, vec![super::Error { error: "An error", backtrace: vec!["A"] }]); +} diff --git a/compiler/rustc_data_structures/src/owning_ref/LICENSE b/compiler/rustc_data_structures/src/owning_ref/LICENSE new file mode 100644 index 000000000..dff72d1e4 --- /dev/null +++ b/compiler/rustc_data_structures/src/owning_ref/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2015 Marvin Löbel + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/compiler/rustc_data_structures/src/owning_ref/mod.rs b/compiler/rustc_data_structures/src/owning_ref/mod.rs new file mode 100644 index 000000000..ed5e56618 --- /dev/null +++ b/compiler/rustc_data_structures/src/owning_ref/mod.rs @@ -0,0 +1,1214 @@ +#![warn(missing_docs)] + +/*! +# An owning reference. + +This crate provides the _owning reference_ types `OwningRef` and `OwningRefMut` +that enables it to bundle a reference together with the owner of the data it points to. +This allows moving and dropping of an `OwningRef` without needing to recreate the reference. + +This can sometimes be useful because Rust borrowing rules normally prevent +moving a type that has been moved from. For example, this kind of code gets rejected: + +```compile_fail,E0515 +fn return_owned_and_referenced<'a>() -> (Vec, &'a [u8]) { + let v = vec![1, 2, 3, 4]; + let s = &v[1..3]; + (v, s) +} +``` + +Even though, from a memory-layout point of view, this can be entirely safe +if the new location of the vector still lives longer than the lifetime `'a` +of the reference because the backing allocation of the vector does not change. + +This library enables this safe usage by keeping the owner and the reference +bundled together in a wrapper type that ensure that lifetime constraint: + +``` +# use rustc_data_structures::owning_ref::OwningRef; +# fn main() { +fn return_owned_and_referenced() -> OwningRef, [u8]> { + let v = vec![1, 2, 3, 4]; + let or = OwningRef::new(v); + let or = or.map(|v| &v[1..3]); + or +} +# } +``` + +It works by requiring owner types to dereference to stable memory locations +and preventing mutable access to root containers, which in practice requires heap allocation +as provided by `Box`, `Rc`, etc. + +Also provided are typedefs for common owner type combinations, +which allow for less verbose type signatures. +For example, `BoxRef` instead of `OwningRef, T>`. + +The crate also provides the more advanced `OwningHandle` type, +which allows more freedom in bundling a dependent handle object +along with the data it depends on, at the cost of some unsafe needed in the API. +See the documentation around `OwningHandle` for more details. + +# Examples + +## Basics + +``` +use rustc_data_structures::owning_ref::BoxRef; + +fn main() { + // Create an array owned by a Box. + let arr = Box::new([1, 2, 3, 4]) as Box<[i32]>; + + // Transfer into a BoxRef. + let arr: BoxRef<[i32]> = BoxRef::new(arr); + assert_eq!(&*arr, &[1, 2, 3, 4]); + + // We can slice the array without losing ownership or changing type. + let arr: BoxRef<[i32]> = arr.map(|arr| &arr[1..3]); + assert_eq!(&*arr, &[2, 3]); + + // Also works for Arc, Rc, String and Vec! +} +``` + +## Caching a reference to a struct field + +``` +use rustc_data_structures::owning_ref::BoxRef; + +fn main() { + struct Foo { + tag: u32, + x: u16, + y: u16, + z: u16, + } + let foo = Foo { tag: 1, x: 100, y: 200, z: 300 }; + + let or = BoxRef::new(Box::new(foo)).map(|foo| { + match foo.tag { + 0 => &foo.x, + 1 => &foo.y, + 2 => &foo.z, + _ => panic!(), + } + }); + + assert_eq!(*or, 200); +} +``` + +## Caching a reference to an entry in a vector + +``` +use rustc_data_structures::owning_ref::VecRef; + +fn main() { + let v = VecRef::new(vec![1, 2, 3, 4, 5]).map(|v| &v[3]); + assert_eq!(*v, 4); +} +``` + +## Caching a subslice of a String + +``` +use rustc_data_structures::owning_ref::StringRef; + +fn main() { + let s = StringRef::new("hello world".to_owned()) + .map(|s| s.split(' ').nth(1).unwrap()); + + assert_eq!(&*s, "world"); +} +``` + +## Reference counted slices that share ownership of the backing storage + +``` +use rustc_data_structures::owning_ref::RcRef; +use std::rc::Rc; + +fn main() { + let rc: RcRef<[i32]> = RcRef::new(Rc::new([1, 2, 3, 4]) as Rc<[i32]>); + assert_eq!(&*rc, &[1, 2, 3, 4]); + + let rc_a: RcRef<[i32]> = rc.clone().map(|s| &s[0..2]); + let rc_b = rc.clone().map(|s| &s[1..3]); + let rc_c = rc.clone().map(|s| &s[2..4]); + assert_eq!(&*rc_a, &[1, 2]); + assert_eq!(&*rc_b, &[2, 3]); + assert_eq!(&*rc_c, &[3, 4]); + + let rc_c_a = rc_c.clone().map(|s| &s[1]); + assert_eq!(&*rc_c_a, &4); +} +``` + +## Atomic reference counted slices that share ownership of the backing storage + +``` +use rustc_data_structures::owning_ref::ArcRef; +use std::sync::Arc; + +fn main() { + use std::thread; + + fn par_sum(rc: ArcRef<[i32]>) -> i32 { + if rc.len() == 0 { + return 0; + } else if rc.len() == 1 { + return rc[0]; + } + let mid = rc.len() / 2; + let left = rc.clone().map(|s| &s[..mid]); + let right = rc.map(|s| &s[mid..]); + + let left = thread::spawn(move || par_sum(left)); + let right = thread::spawn(move || par_sum(right)); + + left.join().unwrap() + right.join().unwrap() + } + + let rc: Arc<[i32]> = Arc::new([1, 2, 3, 4]); + let rc: ArcRef<[i32]> = rc.into(); + + assert_eq!(par_sum(rc), 10); +} +``` + +## References into RAII locks + +``` +use rustc_data_structures::owning_ref::RefRef; +use std::cell::{RefCell, Ref}; + +fn main() { + let refcell = RefCell::new((1, 2, 3, 4)); + // Also works with Mutex and RwLock + + let refref = { + let refref = RefRef::new(refcell.borrow()).map(|x| &x.3); + assert_eq!(*refref, 4); + + // We move the RAII lock and the reference to one of + // the subfields in the data it guards here: + refref + }; + + assert_eq!(*refref, 4); + + drop(refref); + + assert_eq!(*refcell.borrow(), (1, 2, 3, 4)); +} +``` + +## Mutable reference + +When the owned container implements `DerefMut`, it is also possible to make +a _mutable owning reference_. (e.g., with `Box`, `RefMut`, `MutexGuard`) + +``` +use rustc_data_structures::owning_ref::RefMutRefMut; +use std::cell::{RefCell, RefMut}; + +fn main() { + let refcell = RefCell::new((1, 2, 3, 4)); + + let mut refmut_refmut = { + let mut refmut_refmut = RefMutRefMut::new(refcell.borrow_mut()).map_mut(|x| &mut x.3); + assert_eq!(*refmut_refmut, 4); + *refmut_refmut *= 2; + + refmut_refmut + }; + + assert_eq!(*refmut_refmut, 8); + *refmut_refmut *= 2; + + drop(refmut_refmut); + + assert_eq!(*refcell.borrow(), (1, 2, 3, 16)); +} +``` +*/ + +pub use stable_deref_trait::{ + CloneStableDeref as CloneStableAddress, StableDeref as StableAddress, +}; +use std::mem; + +/// An owning reference. +/// +/// This wraps an owner `O` and a reference `&T` pointing +/// at something reachable from `O::Target` while keeping +/// the ability to move `self` around. +/// +/// The owner is usually a pointer that points at some base type. +/// +/// For more details and examples, see the module and method docs. +pub struct OwningRef { + owner: O, + reference: *const T, +} + +/// An mutable owning reference. +/// +/// This wraps an owner `O` and a reference `&mut T` pointing +/// at something reachable from `O::Target` while keeping +/// the ability to move `self` around. +/// +/// The owner is usually a pointer that points at some base type. +/// +/// For more details and examples, see the module and method docs. +pub struct OwningRefMut { + owner: O, + reference: *mut T, +} + +/// Helper trait for an erased concrete type an owner dereferences to. +/// This is used in form of a trait object for keeping +/// something around to (virtually) call the destructor. +pub trait Erased {} +impl Erased for T {} + +/// Helper trait for erasing the concrete type of what an owner dereferences to, +/// for example `Box -> Box`. This would be unneeded with +/// higher kinded types support in the language. +#[allow(unused_lifetimes)] +pub unsafe trait IntoErased<'a> { + /// Owner with the dereference type substituted to `Erased`. + type Erased; + /// Performs the type erasure. + fn into_erased(self) -> Self::Erased; +} + +/// Helper trait for erasing the concrete type of what an owner dereferences to, +/// for example `Box -> Box`. This would be unneeded with +/// higher kinded types support in the language. +#[allow(unused_lifetimes)] +pub unsafe trait IntoErasedSend<'a> { + /// Owner with the dereference type substituted to `Erased + Send`. + type Erased: Send; + /// Performs the type erasure. + fn into_erased_send(self) -> Self::Erased; +} + +/// Helper trait for erasing the concrete type of what an owner dereferences to, +/// for example `Box -> Box`. This would be unneeded with +/// higher kinded types support in the language. +#[allow(unused_lifetimes)] +pub unsafe trait IntoErasedSendSync<'a> { + /// Owner with the dereference type substituted to `Erased + Send + Sync`. + type Erased: Send + Sync; + /// Performs the type erasure. + fn into_erased_send_sync(self) -> Self::Erased; +} + +///////////////////////////////////////////////////////////////////////////// +// OwningRef +///////////////////////////////////////////////////////////////////////////// + +impl OwningRef { + /// Creates a new owning reference from an owner + /// initialized to the direct dereference of it. + /// + /// # Example + /// ``` + /// use rustc_data_structures::owning_ref::OwningRef; + /// + /// fn main() { + /// let owning_ref = OwningRef::new(Box::new(42)); + /// assert_eq!(*owning_ref, 42); + /// } + /// ``` + pub fn new(o: O) -> Self + where + O: StableAddress, + O: Deref, + { + OwningRef { reference: &*o, owner: o } + } + + /// Like `new`, but doesn’t require `O` to implement the `StableAddress` trait. + /// Instead, the caller is responsible to make the same promises as implementing the trait. + /// + /// This is useful for cases where coherence rules prevents implementing the trait + /// without adding a dependency to this crate in a third-party library. + pub unsafe fn new_assert_stable_address(o: O) -> Self + where + O: Deref, + { + OwningRef { reference: &*o, owner: o } + } + + /// Converts `self` into a new owning reference that points at something reachable + /// from the previous one. + /// + /// This can be a reference to a field of `U`, something reachable from a field of + /// `U`, or even something unrelated with a `'static` lifetime. + /// + /// # Example + /// ``` + /// use rustc_data_structures::owning_ref::OwningRef; + /// + /// fn main() { + /// let owning_ref = OwningRef::new(Box::new([1, 2, 3, 4])); + /// + /// // create an owning reference that points at the + /// // third element of the array. + /// let owning_ref = owning_ref.map(|array| &array[2]); + /// assert_eq!(*owning_ref, 3); + /// } + /// ``` + pub fn map(self, f: F) -> OwningRef + where + O: StableAddress, + F: FnOnce(&T) -> &U, + { + OwningRef { reference: f(&self), owner: self.owner } + } + + /// Tries to convert `self` into a new owning reference that points + /// at something reachable from the previous one. + /// + /// This can be a reference to a field of `U`, something reachable from a field of + /// `U`, or even something unrelated with a `'static` lifetime. + /// + /// # Example + /// ``` + /// use rustc_data_structures::owning_ref::OwningRef; + /// + /// fn main() { + /// let owning_ref = OwningRef::new(Box::new([1, 2, 3, 4])); + /// + /// // create an owning reference that points at the + /// // third element of the array. + /// let owning_ref = owning_ref.try_map(|array| { + /// if array[2] == 3 { Ok(&array[2]) } else { Err(()) } + /// }); + /// assert_eq!(*owning_ref.unwrap(), 3); + /// } + /// ``` + pub fn try_map(self, f: F) -> Result, E> + where + O: StableAddress, + F: FnOnce(&T) -> Result<&U, E>, + { + Ok(OwningRef { reference: f(&self)?, owner: self.owner }) + } + + /// Converts `self` into a new owning reference with a different owner type. + /// + /// The new owner type needs to still contain the original owner in some way + /// so that the reference into it remains valid. This function is marked unsafe + /// because the user needs to manually uphold this guarantee. + pub unsafe fn map_owner(self, f: F) -> OwningRef + where + O: StableAddress, + P: StableAddress, + F: FnOnce(O) -> P, + { + OwningRef { reference: self.reference, owner: f(self.owner) } + } + + /// Converts `self` into a new owning reference where the owner is wrapped + /// in an additional `Box`. + /// + /// This can be used to safely erase the owner of any `OwningRef` + /// to an `OwningRef, T>`. + pub fn map_owner_box(self) -> OwningRef, T> { + OwningRef { reference: self.reference, owner: Box::new(self.owner) } + } + + /// Erases the concrete base type of the owner with a trait object. + /// + /// This allows mixing of owned references with different owner base types. + /// + /// # Example + /// ``` + /// use rustc_data_structures::owning_ref::{OwningRef, Erased}; + /// + /// fn main() { + /// // N.B., using the concrete types here for explicitness. + /// // For less verbose code type aliases like `BoxRef` are provided. + /// + /// let owning_ref_a: OwningRef, [i32; 4]> + /// = OwningRef::new(Box::new([1, 2, 3, 4])); + /// + /// let owning_ref_b: OwningRef>, Vec<(i32, bool)>> + /// = OwningRef::new(Box::new(vec![(0, false), (1, true)])); + /// + /// let owning_ref_a: OwningRef, i32> + /// = owning_ref_a.map(|a| &a[0]); + /// + /// let owning_ref_b: OwningRef>, i32> + /// = owning_ref_b.map(|a| &a[1].0); + /// + /// let owning_refs: [OwningRef, i32>; 2] + /// = [owning_ref_a.erase_owner(), owning_ref_b.erase_owner()]; + /// + /// assert_eq!(*owning_refs[0], 1); + /// assert_eq!(*owning_refs[1], 1); + /// } + /// ``` + pub fn erase_owner<'a>(self) -> OwningRef + where + O: IntoErased<'a>, + { + OwningRef { reference: self.reference, owner: self.owner.into_erased() } + } + + /// Erases the concrete base type of the owner with a trait object which implements `Send`. + /// + /// This allows mixing of owned references with different owner base types. + pub fn erase_send_owner<'a>(self) -> OwningRef + where + O: IntoErasedSend<'a>, + { + OwningRef { reference: self.reference, owner: self.owner.into_erased_send() } + } + + /// Erases the concrete base type of the owner with a trait object + /// which implements `Send` and `Sync`. + /// + /// This allows mixing of owned references with different owner base types. + pub fn erase_send_sync_owner<'a>(self) -> OwningRef + where + O: IntoErasedSendSync<'a>, + { + OwningRef { reference: self.reference, owner: self.owner.into_erased_send_sync() } + } + + // UNIMPLEMENTED: wrap_owner + + // FIXME: Naming convention? + /// A getter for the underlying owner. + pub fn owner(&self) -> &O { + &self.owner + } + + // FIXME: Naming convention? + /// Discards the reference and retrieves the owner. + pub fn into_inner(self) -> O { + self.owner + } +} + +impl OwningRefMut { + /// Creates a new owning reference from an owner + /// initialized to the direct dereference of it. + /// + /// # Example + /// ``` + /// use rustc_data_structures::owning_ref::OwningRefMut; + /// + /// fn main() { + /// let owning_ref_mut = OwningRefMut::new(Box::new(42)); + /// assert_eq!(*owning_ref_mut, 42); + /// } + /// ``` + pub fn new(mut o: O) -> Self + where + O: StableAddress, + O: DerefMut, + { + OwningRefMut { reference: &mut *o, owner: o } + } + + /// Like `new`, but doesn’t require `O` to implement the `StableAddress` trait. + /// Instead, the caller is responsible to make the same promises as implementing the trait. + /// + /// This is useful for cases where coherence rules prevents implementing the trait + /// without adding a dependency to this crate in a third-party library. + pub unsafe fn new_assert_stable_address(mut o: O) -> Self + where + O: DerefMut, + { + OwningRefMut { reference: &mut *o, owner: o } + } + + /// Converts `self` into a new _shared_ owning reference that points at + /// something reachable from the previous one. + /// + /// This can be a reference to a field of `U`, something reachable from a field of + /// `U`, or even something unrelated with a `'static` lifetime. + /// + /// # Example + /// ``` + /// use rustc_data_structures::owning_ref::OwningRefMut; + /// + /// fn main() { + /// let owning_ref_mut = OwningRefMut::new(Box::new([1, 2, 3, 4])); + /// + /// // create an owning reference that points at the + /// // third element of the array. + /// let owning_ref = owning_ref_mut.map(|array| &array[2]); + /// assert_eq!(*owning_ref, 3); + /// } + /// ``` + pub fn map(mut self, f: F) -> OwningRef + where + O: StableAddress, + F: FnOnce(&mut T) -> &U, + { + OwningRef { reference: f(&mut self), owner: self.owner } + } + + /// Converts `self` into a new _mutable_ owning reference that points at + /// something reachable from the previous one. + /// + /// This can be a reference to a field of `U`, something reachable from a field of + /// `U`, or even something unrelated with a `'static` lifetime. + /// + /// # Example + /// ``` + /// use rustc_data_structures::owning_ref::OwningRefMut; + /// + /// fn main() { + /// let owning_ref_mut = OwningRefMut::new(Box::new([1, 2, 3, 4])); + /// + /// // create an owning reference that points at the + /// // third element of the array. + /// let owning_ref_mut = owning_ref_mut.map_mut(|array| &mut array[2]); + /// assert_eq!(*owning_ref_mut, 3); + /// } + /// ``` + pub fn map_mut(mut self, f: F) -> OwningRefMut + where + O: StableAddress, + F: FnOnce(&mut T) -> &mut U, + { + OwningRefMut { reference: f(&mut self), owner: self.owner } + } + + /// Tries to convert `self` into a new _shared_ owning reference that points + /// at something reachable from the previous one. + /// + /// This can be a reference to a field of `U`, something reachable from a field of + /// `U`, or even something unrelated with a `'static` lifetime. + /// + /// # Example + /// ``` + /// use rustc_data_structures::owning_ref::OwningRefMut; + /// + /// fn main() { + /// let owning_ref_mut = OwningRefMut::new(Box::new([1, 2, 3, 4])); + /// + /// // create an owning reference that points at the + /// // third element of the array. + /// let owning_ref = owning_ref_mut.try_map(|array| { + /// if array[2] == 3 { Ok(&array[2]) } else { Err(()) } + /// }); + /// assert_eq!(*owning_ref.unwrap(), 3); + /// } + /// ``` + pub fn try_map(mut self, f: F) -> Result, E> + where + O: StableAddress, + F: FnOnce(&mut T) -> Result<&U, E>, + { + Ok(OwningRef { reference: f(&mut self)?, owner: self.owner }) + } + + /// Tries to convert `self` into a new _mutable_ owning reference that points + /// at something reachable from the previous one. + /// + /// This can be a reference to a field of `U`, something reachable from a field of + /// `U`, or even something unrelated with a `'static` lifetime. + /// + /// # Example + /// ``` + /// use rustc_data_structures::owning_ref::OwningRefMut; + /// + /// fn main() { + /// let owning_ref_mut = OwningRefMut::new(Box::new([1, 2, 3, 4])); + /// + /// // create an owning reference that points at the + /// // third element of the array. + /// let owning_ref_mut = owning_ref_mut.try_map_mut(|array| { + /// if array[2] == 3 { Ok(&mut array[2]) } else { Err(()) } + /// }); + /// assert_eq!(*owning_ref_mut.unwrap(), 3); + /// } + /// ``` + pub fn try_map_mut(mut self, f: F) -> Result, E> + where + O: StableAddress, + F: FnOnce(&mut T) -> Result<&mut U, E>, + { + Ok(OwningRefMut { reference: f(&mut self)?, owner: self.owner }) + } + + /// Converts `self` into a new owning reference with a different owner type. + /// + /// The new owner type needs to still contain the original owner in some way + /// so that the reference into it remains valid. This function is marked unsafe + /// because the user needs to manually uphold this guarantee. + pub unsafe fn map_owner(self, f: F) -> OwningRefMut + where + O: StableAddress, + P: StableAddress, + F: FnOnce(O) -> P, + { + OwningRefMut { reference: self.reference, owner: f(self.owner) } + } + + /// Converts `self` into a new owning reference where the owner is wrapped + /// in an additional `Box`. + /// + /// This can be used to safely erase the owner of any `OwningRefMut` + /// to an `OwningRefMut, T>`. + pub fn map_owner_box(self) -> OwningRefMut, T> { + OwningRefMut { reference: self.reference, owner: Box::new(self.owner) } + } + + /// Erases the concrete base type of the owner with a trait object. + /// + /// This allows mixing of owned references with different owner base types. + /// + /// # Example + /// ``` + /// use rustc_data_structures::owning_ref::{OwningRefMut, Erased}; + /// + /// fn main() { + /// // N.B., using the concrete types here for explicitness. + /// // For less verbose code type aliases like `BoxRef` are provided. + /// + /// let owning_ref_mut_a: OwningRefMut, [i32; 4]> + /// = OwningRefMut::new(Box::new([1, 2, 3, 4])); + /// + /// let owning_ref_mut_b: OwningRefMut>, Vec<(i32, bool)>> + /// = OwningRefMut::new(Box::new(vec![(0, false), (1, true)])); + /// + /// let owning_ref_mut_a: OwningRefMut, i32> + /// = owning_ref_mut_a.map_mut(|a| &mut a[0]); + /// + /// let owning_ref_mut_b: OwningRefMut>, i32> + /// = owning_ref_mut_b.map_mut(|a| &mut a[1].0); + /// + /// let owning_refs_mut: [OwningRefMut, i32>; 2] + /// = [owning_ref_mut_a.erase_owner(), owning_ref_mut_b.erase_owner()]; + /// + /// assert_eq!(*owning_refs_mut[0], 1); + /// assert_eq!(*owning_refs_mut[1], 1); + /// } + /// ``` + pub fn erase_owner<'a>(self) -> OwningRefMut + where + O: IntoErased<'a>, + { + OwningRefMut { reference: self.reference, owner: self.owner.into_erased() } + } + + // UNIMPLEMENTED: wrap_owner + + // FIXME: Naming convention? + /// A getter for the underlying owner. + pub fn owner(&self) -> &O { + &self.owner + } + + // FIXME: Naming convention? + /// Discards the reference and retrieves the owner. + pub fn into_inner(self) -> O { + self.owner + } +} + +///////////////////////////////////////////////////////////////////////////// +// OwningHandle +///////////////////////////////////////////////////////////////////////////// + +use std::ops::{Deref, DerefMut}; + +/// `OwningHandle` is a complement to `OwningRef`. Where `OwningRef` allows +/// consumers to pass around an owned object and a dependent reference, +/// `OwningHandle` contains an owned object and a dependent _object_. +/// +/// `OwningHandle` can encapsulate a `RefMut` along with its associated +/// `RefCell`, or an `RwLockReadGuard` along with its associated `RwLock`. +/// However, the API is completely generic and there are no restrictions on +/// what types of owning and dependent objects may be used. +/// +/// `OwningHandle` is created by passing an owner object (which dereferences +/// to a stable address) along with a callback which receives a pointer to +/// that stable location. The callback may then dereference the pointer and +/// mint a dependent object, with the guarantee that the returned object will +/// not outlive the referent of the pointer. +/// +/// Since the callback needs to dereference a raw pointer, it requires `unsafe` +/// code. To avoid forcing this unsafety on most callers, the `ToHandle` trait is +/// implemented for common data structures. Types that implement `ToHandle` can +/// be wrapped into an `OwningHandle` without passing a callback. +pub struct OwningHandle +where + O: StableAddress, + H: Deref, +{ + handle: H, + _owner: O, +} + +impl Deref for OwningHandle +where + O: StableAddress, + H: Deref, +{ + type Target = H::Target; + fn deref(&self) -> &H::Target { + self.handle.deref() + } +} + +unsafe impl StableAddress for OwningHandle +where + O: StableAddress, + H: StableAddress, +{ +} + +impl DerefMut for OwningHandle +where + O: StableAddress, + H: DerefMut, +{ + fn deref_mut(&mut self) -> &mut H::Target { + self.handle.deref_mut() + } +} + +/// Trait to implement the conversion of owner to handle for common types. +pub trait ToHandle { + /// The type of handle to be encapsulated by the OwningHandle. + type Handle: Deref; + + /// Given an appropriately-long-lived pointer to ourselves, create a + /// handle to be encapsulated by the `OwningHandle`. + unsafe fn to_handle(x: *const Self) -> Self::Handle; +} + +/// Trait to implement the conversion of owner to mutable handle for common types. +pub trait ToHandleMut { + /// The type of handle to be encapsulated by the OwningHandle. + type HandleMut: DerefMut; + + /// Given an appropriately-long-lived pointer to ourselves, create a + /// mutable handle to be encapsulated by the `OwningHandle`. + unsafe fn to_handle_mut(x: *const Self) -> Self::HandleMut; +} + +impl OwningHandle +where + O: StableAddress>, + H: Deref, +{ + /// Creates a new `OwningHandle` for a type that implements `ToHandle`. For types + /// that don't implement `ToHandle`, callers may invoke `new_with_fn`, which accepts + /// a callback to perform the conversion. + pub fn new(o: O) -> Self { + OwningHandle::new_with_fn(o, |x| unsafe { O::Target::to_handle(x) }) + } +} + +impl OwningHandle +where + O: StableAddress>, + H: DerefMut, +{ + /// Creates a new mutable `OwningHandle` for a type that implements `ToHandleMut`. + pub fn new_mut(o: O) -> Self { + OwningHandle::new_with_fn(o, |x| unsafe { O::Target::to_handle_mut(x) }) + } +} + +impl OwningHandle +where + O: StableAddress, + H: Deref, +{ + /// Creates a new OwningHandle. The provided callback will be invoked with + /// a pointer to the object owned by `o`, and the returned value is stored + /// as the object to which this `OwningHandle` will forward `Deref` and + /// `DerefMut`. + pub fn new_with_fn(o: O, f: F) -> Self + where + F: FnOnce(*const O::Target) -> H, + { + let h: H; + { + h = f(o.deref() as *const O::Target); + } + + OwningHandle { handle: h, _owner: o } + } + + /// Creates a new OwningHandle. The provided callback will be invoked with + /// a pointer to the object owned by `o`, and the returned value is stored + /// as the object to which this `OwningHandle` will forward `Deref` and + /// `DerefMut`. + pub fn try_new(o: O, f: F) -> Result + where + F: FnOnce(*const O::Target) -> Result, + { + let h: H; + { + h = f(o.deref() as *const O::Target)?; + } + + Ok(OwningHandle { handle: h, _owner: o }) + } +} + +///////////////////////////////////////////////////////////////////////////// +// std traits +///////////////////////////////////////////////////////////////////////////// + +use std::borrow::Borrow; +use std::cmp::{Eq, Ord, Ordering, PartialEq, PartialOrd}; +use std::convert::From; +use std::fmt::{self, Debug}; +use std::hash::{Hash, Hasher}; +use std::marker::{Send, Sync}; + +impl Deref for OwningRef { + type Target = T; + + fn deref(&self) -> &T { + unsafe { &*self.reference } + } +} + +impl Deref for OwningRefMut { + type Target = T; + + fn deref(&self) -> &T { + unsafe { &*self.reference } + } +} + +impl DerefMut for OwningRefMut { + fn deref_mut(&mut self) -> &mut T { + unsafe { &mut *self.reference } + } +} + +unsafe impl StableAddress for OwningRef {} + +impl AsRef for OwningRef { + fn as_ref(&self) -> &T { + &*self + } +} + +impl AsRef for OwningRefMut { + fn as_ref(&self) -> &T { + &*self + } +} + +impl AsMut for OwningRefMut { + fn as_mut(&mut self) -> &mut T { + &mut *self + } +} + +impl Borrow for OwningRef { + fn borrow(&self) -> &T { + &*self + } +} + +impl From for OwningRef +where + O: StableAddress, + O: Deref, +{ + fn from(owner: O) -> Self { + OwningRef::new(owner) + } +} + +impl From for OwningRefMut +where + O: StableAddress, + O: DerefMut, +{ + fn from(owner: O) -> Self { + OwningRefMut::new(owner) + } +} + +impl From> for OwningRef +where + O: StableAddress, + O: DerefMut, +{ + fn from(other: OwningRefMut) -> Self { + OwningRef { owner: other.owner, reference: other.reference } + } +} + +// ^ FIXME: Is an Into impl for calling into_inner() possible as well? + +impl Debug for OwningRef +where + O: Debug, + T: Debug, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "OwningRef {{ owner: {:?}, reference: {:?} }}", self.owner(), &**self) + } +} + +impl Debug for OwningRefMut +where + O: Debug, + T: Debug, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "OwningRefMut {{ owner: {:?}, reference: {:?} }}", self.owner(), &**self) + } +} + +impl Clone for OwningRef +where + O: CloneStableAddress, +{ + fn clone(&self) -> Self { + OwningRef { owner: self.owner.clone(), reference: self.reference } + } +} + +unsafe impl CloneStableAddress for OwningRef where O: CloneStableAddress {} + +unsafe impl Send for OwningRef +where + O: Send, + for<'a> &'a T: Send, +{ +} +unsafe impl Sync for OwningRef +where + O: Sync, + for<'a> &'a T: Sync, +{ +} + +unsafe impl Send for OwningRefMut +where + O: Send, + for<'a> &'a mut T: Send, +{ +} +unsafe impl Sync for OwningRefMut +where + O: Sync, + for<'a> &'a mut T: Sync, +{ +} + +impl Debug for dyn Erased { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "",) + } +} + +impl PartialEq for OwningRef +where + T: PartialEq, +{ + fn eq(&self, other: &Self) -> bool { + (&*self as &T).eq(&*other as &T) + } +} + +impl Eq for OwningRef where T: Eq {} + +impl PartialOrd for OwningRef +where + T: PartialOrd, +{ + fn partial_cmp(&self, other: &Self) -> Option { + (&*self as &T).partial_cmp(&*other as &T) + } +} + +impl Ord for OwningRef +where + T: Ord, +{ + fn cmp(&self, other: &Self) -> Ordering { + (&*self as &T).cmp(&*other as &T) + } +} + +impl Hash for OwningRef +where + T: Hash, +{ + fn hash(&self, state: &mut H) { + (&*self as &T).hash(state); + } +} + +impl PartialEq for OwningRefMut +where + T: PartialEq, +{ + fn eq(&self, other: &Self) -> bool { + (&*self as &T).eq(&*other as &T) + } +} + +impl Eq for OwningRefMut where T: Eq {} + +impl PartialOrd for OwningRefMut +where + T: PartialOrd, +{ + fn partial_cmp(&self, other: &Self) -> Option { + (&*self as &T).partial_cmp(&*other as &T) + } +} + +impl Ord for OwningRefMut +where + T: Ord, +{ + fn cmp(&self, other: &Self) -> Ordering { + (&*self as &T).cmp(&*other as &T) + } +} + +impl Hash for OwningRefMut +where + T: Hash, +{ + fn hash(&self, state: &mut H) { + (&*self as &T).hash(state); + } +} + +///////////////////////////////////////////////////////////////////////////// +// std types integration and convenience type defs +///////////////////////////////////////////////////////////////////////////// + +use std::boxed::Box; +use std::cell::{Ref, RefCell, RefMut}; +use std::rc::Rc; +use std::sync::Arc; +use std::sync::{MutexGuard, RwLockReadGuard, RwLockWriteGuard}; + +impl ToHandle for RefCell { + type Handle = Ref<'static, T>; + unsafe fn to_handle(x: *const Self) -> Self::Handle { + (*x).borrow() + } +} + +impl ToHandleMut for RefCell { + type HandleMut = RefMut<'static, T>; + unsafe fn to_handle_mut(x: *const Self) -> Self::HandleMut { + (*x).borrow_mut() + } +} + +// N.B., implementing ToHandle{,Mut} for Mutex and RwLock requires a decision +// about which handle creation to use (i.e., read() vs try_read()) as well as +// what to do with error results. + +/// Typedef of an owning reference that uses a `Box` as the owner. +pub type BoxRef = OwningRef, U>; +/// Typedef of an owning reference that uses a `Vec` as the owner. +pub type VecRef = OwningRef, U>; +/// Typedef of an owning reference that uses a `String` as the owner. +pub type StringRef = OwningRef; + +/// Typedef of an owning reference that uses an `Rc` as the owner. +pub type RcRef = OwningRef, U>; +/// Typedef of an owning reference that uses an `Arc` as the owner. +pub type ArcRef = OwningRef, U>; + +/// Typedef of an owning reference that uses a `Ref` as the owner. +pub type RefRef<'a, T, U = T> = OwningRef, U>; +/// Typedef of an owning reference that uses a `RefMut` as the owner. +pub type RefMutRef<'a, T, U = T> = OwningRef, U>; +/// Typedef of an owning reference that uses a `MutexGuard` as the owner. +pub type MutexGuardRef<'a, T, U = T> = OwningRef, U>; +/// Typedef of an owning reference that uses an `RwLockReadGuard` as the owner. +pub type RwLockReadGuardRef<'a, T, U = T> = OwningRef, U>; +/// Typedef of an owning reference that uses an `RwLockWriteGuard` as the owner. +pub type RwLockWriteGuardRef<'a, T, U = T> = OwningRef, U>; + +/// Typedef of a mutable owning reference that uses a `Box` as the owner. +pub type BoxRefMut = OwningRefMut, U>; +/// Typedef of a mutable owning reference that uses a `Vec` as the owner. +pub type VecRefMut = OwningRefMut, U>; +/// Typedef of a mutable owning reference that uses a `String` as the owner. +pub type StringRefMut = OwningRefMut; + +/// Typedef of a mutable owning reference that uses a `RefMut` as the owner. +pub type RefMutRefMut<'a, T, U = T> = OwningRefMut, U>; +/// Typedef of a mutable owning reference that uses a `MutexGuard` as the owner. +pub type MutexGuardRefMut<'a, T, U = T> = OwningRefMut, U>; +/// Typedef of a mutable owning reference that uses an `RwLockWriteGuard` as the owner. +pub type RwLockWriteGuardRefMut<'a, T, U = T> = OwningRef, U>; + +unsafe impl<'a, T: 'a> IntoErased<'a> for Box { + type Erased = Box; + fn into_erased(self) -> Self::Erased { + self + } +} +unsafe impl<'a, T: 'a> IntoErased<'a> for Rc { + type Erased = Rc; + fn into_erased(self) -> Self::Erased { + self + } +} +unsafe impl<'a, T: 'a> IntoErased<'a> for Arc { + type Erased = Arc; + fn into_erased(self) -> Self::Erased { + self + } +} + +unsafe impl<'a, T: Send + 'a> IntoErasedSend<'a> for Box { + type Erased = Box; + fn into_erased_send(self) -> Self::Erased { + self + } +} + +unsafe impl<'a, T: Send + 'a> IntoErasedSendSync<'a> for Box { + type Erased = Box; + fn into_erased_send_sync(self) -> Self::Erased { + let result: Box = self; + // This is safe since Erased can always implement Sync + // Only the destructor is available and it takes &mut self + unsafe { mem::transmute(result) } + } +} + +unsafe impl<'a, T: Send + Sync + 'a> IntoErasedSendSync<'a> for Arc { + type Erased = Arc; + fn into_erased_send_sync(self) -> Self::Erased { + self + } +} + +/// Typedef of an owning reference that uses an erased `Box` as the owner. +pub type ErasedBoxRef = OwningRef, U>; +/// Typedef of an owning reference that uses an erased `Rc` as the owner. +pub type ErasedRcRef = OwningRef, U>; +/// Typedef of an owning reference that uses an erased `Arc` as the owner. +pub type ErasedArcRef = OwningRef, U>; + +/// Typedef of a mutable owning reference that uses an erased `Box` as the owner. +pub type ErasedBoxRefMut = OwningRefMut, U>; + +#[cfg(test)] +mod tests; diff --git a/compiler/rustc_data_structures/src/owning_ref/tests.rs b/compiler/rustc_data_structures/src/owning_ref/tests.rs new file mode 100644 index 000000000..320c03d51 --- /dev/null +++ b/compiler/rustc_data_structures/src/owning_ref/tests.rs @@ -0,0 +1,711 @@ +// FIXME: owning_ref is not sound under stacked borrows. Preferably, get rid of it. +#[cfg(not(miri))] +mod owning_ref { + use super::super::OwningRef; + use super::super::{BoxRef, Erased, ErasedBoxRef, RcRef}; + use std::cmp::{Ord, Ordering, PartialEq, PartialOrd}; + use std::collections::hash_map::DefaultHasher; + use std::collections::HashMap; + use std::hash::{Hash, Hasher}; + use std::rc::Rc; + + #[derive(Debug, PartialEq)] + struct Example(u32, String, [u8; 3]); + fn example() -> Example { + Example(42, "hello world".to_string(), [1, 2, 3]) + } + + #[test] + fn new_deref() { + let or: OwningRef, ()> = OwningRef::new(Box::new(())); + assert_eq!(&*or, &()); + } + + #[test] + fn into() { + let or: OwningRef, ()> = Box::new(()).into(); + assert_eq!(&*or, &()); + } + + #[test] + fn map_offset_ref() { + let or: BoxRef = Box::new(example()).into(); + let or: BoxRef<_, u32> = or.map(|x| &x.0); + assert_eq!(&*or, &42); + + let or: BoxRef = Box::new(example()).into(); + let or: BoxRef<_, u8> = or.map(|x| &x.2[1]); + assert_eq!(&*or, &2); + } + + #[test] + fn map_heap_ref() { + let or: BoxRef = Box::new(example()).into(); + let or: BoxRef<_, str> = or.map(|x| &x.1[..5]); + assert_eq!(&*or, "hello"); + } + + #[test] + fn map_static_ref() { + let or: BoxRef<()> = Box::new(()).into(); + let or: BoxRef<_, str> = or.map(|_| "hello"); + assert_eq!(&*or, "hello"); + } + + #[test] + fn map_chained() { + let or: BoxRef = Box::new(example().1).into(); + let or: BoxRef<_, str> = or.map(|x| &x[1..5]); + let or: BoxRef<_, str> = or.map(|x| &x[..2]); + assert_eq!(&*or, "el"); + } + + #[test] + fn map_chained_inference() { + let or = BoxRef::new(Box::new(example().1)).map(|x| &x[..5]).map(|x| &x[1..3]); + assert_eq!(&*or, "el"); + } + + #[test] + fn owner() { + let or: BoxRef = Box::new(example().1).into(); + let or = or.map(|x| &x[..5]); + assert_eq!(&*or, "hello"); + assert_eq!(&**or.owner(), "hello world"); + } + + #[test] + fn into_inner() { + let or: BoxRef = Box::new(example().1).into(); + let or = or.map(|x| &x[..5]); + assert_eq!(&*or, "hello"); + let s = *or.into_inner(); + assert_eq!(&s, "hello world"); + } + + #[test] + fn fmt_debug() { + let or: BoxRef = Box::new(example().1).into(); + let or = or.map(|x| &x[..5]); + let s = format!("{:?}", or); + assert_eq!(&s, "OwningRef { owner: \"hello world\", reference: \"hello\" }"); + } + + #[test] + fn erased_owner() { + let o1: BoxRef = BoxRef::new(Box::new(example())).map(|x| &x.1[..]); + + let o2: BoxRef = BoxRef::new(Box::new(example().1)).map(|x| &x[..]); + + let os: Vec> = vec![o1.erase_owner(), o2.erase_owner()]; + assert!(os.iter().all(|e| &e[..] == "hello world")); + } + + #[test] + fn raii_locks() { + use super::super::{MutexGuardRef, RwLockReadGuardRef, RwLockWriteGuardRef}; + use super::super::{RefMutRef, RefRef}; + use std::cell::RefCell; + use std::sync::{Mutex, RwLock}; + + { + let a = RefCell::new(1); + let a = { + let a = RefRef::new(a.borrow()); + assert_eq!(*a, 1); + a + }; + assert_eq!(*a, 1); + drop(a); + } + { + let a = RefCell::new(1); + let a = { + let a = RefMutRef::new(a.borrow_mut()); + assert_eq!(*a, 1); + a + }; + assert_eq!(*a, 1); + drop(a); + } + { + let a = Mutex::new(1); + let a = { + let a = MutexGuardRef::new(a.lock().unwrap()); + assert_eq!(*a, 1); + a + }; + assert_eq!(*a, 1); + drop(a); + } + { + let a = RwLock::new(1); + let a = { + let a = RwLockReadGuardRef::new(a.read().unwrap()); + assert_eq!(*a, 1); + a + }; + assert_eq!(*a, 1); + drop(a); + } + { + let a = RwLock::new(1); + let a = { + let a = RwLockWriteGuardRef::new(a.write().unwrap()); + assert_eq!(*a, 1); + a + }; + assert_eq!(*a, 1); + drop(a); + } + } + + #[test] + fn eq() { + let or1: BoxRef<[u8]> = BoxRef::new(vec![1, 2, 3].into_boxed_slice()); + let or2: BoxRef<[u8]> = BoxRef::new(vec![1, 2, 3].into_boxed_slice()); + assert_eq!(or1.eq(&or2), true); + } + + #[test] + fn cmp() { + let or1: BoxRef<[u8]> = BoxRef::new(vec![1, 2, 3].into_boxed_slice()); + let or2: BoxRef<[u8]> = BoxRef::new(vec![4, 5, 6].into_boxed_slice()); + assert_eq!(or1.cmp(&or2), Ordering::Less); + } + + #[test] + fn partial_cmp() { + let or1: BoxRef<[u8]> = BoxRef::new(vec![4, 5, 6].into_boxed_slice()); + let or2: BoxRef<[u8]> = BoxRef::new(vec![1, 2, 3].into_boxed_slice()); + assert_eq!(or1.partial_cmp(&or2), Some(Ordering::Greater)); + } + + #[test] + fn hash() { + let mut h1 = DefaultHasher::new(); + let mut h2 = DefaultHasher::new(); + + let or1: BoxRef<[u8]> = BoxRef::new(vec![1, 2, 3].into_boxed_slice()); + let or2: BoxRef<[u8]> = BoxRef::new(vec![1, 2, 3].into_boxed_slice()); + + or1.hash(&mut h1); + or2.hash(&mut h2); + + assert_eq!(h1.finish(), h2.finish()); + } + + #[test] + fn borrow() { + let mut hash = HashMap::new(); + let key = RcRef::::new(Rc::new("foo-bar".to_string())).map(|s| &s[..]); + + hash.insert(key.clone().map(|s| &s[..3]), 42); + hash.insert(key.clone().map(|s| &s[4..]), 23); + + assert_eq!(hash.get("foo"), Some(&42)); + assert_eq!(hash.get("bar"), Some(&23)); + } + + #[test] + fn total_erase() { + let a: OwningRef, [u8]> = OwningRef::new(vec![]).map(|x| &x[..]); + let b: OwningRef, [u8]> = + OwningRef::new(vec![].into_boxed_slice()).map(|x| &x[..]); + + let c: OwningRef>, [u8]> = unsafe { a.map_owner(Rc::new) }; + let d: OwningRef>, [u8]> = unsafe { b.map_owner(Rc::new) }; + + let e: OwningRef, [u8]> = c.erase_owner(); + let f: OwningRef, [u8]> = d.erase_owner(); + + let _g = e.clone(); + let _h = f.clone(); + } + + #[test] + fn total_erase_box() { + let a: OwningRef, [u8]> = OwningRef::new(vec![]).map(|x| &x[..]); + let b: OwningRef, [u8]> = + OwningRef::new(vec![].into_boxed_slice()).map(|x| &x[..]); + + let c: OwningRef>, [u8]> = a.map_owner_box(); + let d: OwningRef>, [u8]> = b.map_owner_box(); + + let _e: OwningRef, [u8]> = c.erase_owner(); + let _f: OwningRef, [u8]> = d.erase_owner(); + } + + #[test] + fn try_map1() { + use std::any::Any; + + let x = Box::new(123_i32); + let y: Box = x; + + assert!(OwningRef::new(y).try_map(|x| x.downcast_ref::().ok_or(())).is_ok()); + } + + #[test] + fn try_map2() { + use std::any::Any; + + let x = Box::new(123_i32); + let y: Box = x; + + assert!(!OwningRef::new(y).try_map(|x| x.downcast_ref::().ok_or(())).is_err()); + } +} + +mod owning_handle { + use super::super::OwningHandle; + use super::super::RcRef; + use std::cell::RefCell; + use std::rc::Rc; + use std::sync::Arc; + use std::sync::RwLock; + + #[test] + fn owning_handle() { + use std::cell::RefCell; + let cell = Rc::new(RefCell::new(2)); + let cell_ref = RcRef::new(cell); + let mut handle = + OwningHandle::new_with_fn(cell_ref, |x| unsafe { x.as_ref() }.unwrap().borrow_mut()); + assert_eq!(*handle, 2); + *handle = 3; + assert_eq!(*handle, 3); + } + + #[test] + fn try_owning_handle_ok() { + use std::cell::RefCell; + let cell = Rc::new(RefCell::new(2)); + let cell_ref = RcRef::new(cell); + let mut handle = OwningHandle::try_new::<_, ()>(cell_ref, |x| { + Ok(unsafe { x.as_ref() }.unwrap().borrow_mut()) + }) + .unwrap(); + assert_eq!(*handle, 2); + *handle = 3; + assert_eq!(*handle, 3); + } + + #[test] + fn try_owning_handle_err() { + use std::cell::RefCell; + let cell = Rc::new(RefCell::new(2)); + let cell_ref = RcRef::new(cell); + let handle = OwningHandle::try_new::<_, ()>(cell_ref, |x| { + if false { + return Ok(unsafe { x.as_ref() }.unwrap().borrow_mut()); + } + Err(()) + }); + assert!(handle.is_err()); + } + + #[test] + fn nested() { + use std::cell::RefCell; + use std::sync::{Arc, RwLock}; + + let result = { + let complex = Rc::new(RefCell::new(Arc::new(RwLock::new("someString")))); + let curr = RcRef::new(complex); + let curr = + OwningHandle::new_with_fn(curr, |x| unsafe { x.as_ref() }.unwrap().borrow_mut()); + let mut curr = OwningHandle::new_with_fn(curr, |x| { + unsafe { x.as_ref() }.unwrap().try_write().unwrap() + }); + assert_eq!(*curr, "someString"); + *curr = "someOtherString"; + curr + }; + assert_eq!(*result, "someOtherString"); + } + + #[test] + fn owning_handle_safe() { + use std::cell::RefCell; + let cell = Rc::new(RefCell::new(2)); + let cell_ref = RcRef::new(cell); + let handle = OwningHandle::new(cell_ref); + assert_eq!(*handle, 2); + } + + #[test] + fn owning_handle_mut_safe() { + use std::cell::RefCell; + let cell = Rc::new(RefCell::new(2)); + let cell_ref = RcRef::new(cell); + let mut handle = OwningHandle::new_mut(cell_ref); + assert_eq!(*handle, 2); + *handle = 3; + assert_eq!(*handle, 3); + } + + #[test] + fn owning_handle_safe_2() { + let result = { + let complex = Rc::new(RefCell::new(Arc::new(RwLock::new("someString")))); + let curr = RcRef::new(complex); + let curr = + OwningHandle::new_with_fn(curr, |x| unsafe { x.as_ref() }.unwrap().borrow_mut()); + let mut curr = OwningHandle::new_with_fn(curr, |x| { + unsafe { x.as_ref() }.unwrap().try_write().unwrap() + }); + assert_eq!(*curr, "someString"); + *curr = "someOtherString"; + curr + }; + assert_eq!(*result, "someOtherString"); + } +} + +// FIXME: owning_ref is not sound under stacked borrows. Preferably, get rid of it. +#[cfg(not(miri))] +mod owning_ref_mut { + use super::super::BoxRef; + use super::super::{BoxRefMut, Erased, ErasedBoxRefMut, OwningRefMut}; + use std::cmp::{Ord, Ordering, PartialEq, PartialOrd}; + use std::collections::hash_map::DefaultHasher; + use std::collections::HashMap; + use std::hash::{Hash, Hasher}; + + #[derive(Debug, PartialEq)] + struct Example(u32, String, [u8; 3]); + fn example() -> Example { + Example(42, "hello world".to_string(), [1, 2, 3]) + } + + #[test] + fn new_deref() { + let or: OwningRefMut, ()> = OwningRefMut::new(Box::new(())); + assert_eq!(&*or, &()); + } + + #[test] + fn new_deref_mut() { + let mut or: OwningRefMut, ()> = OwningRefMut::new(Box::new(())); + assert_eq!(&mut *or, &mut ()); + } + + #[test] + fn mutate() { + let mut or: OwningRefMut, usize> = OwningRefMut::new(Box::new(0)); + assert_eq!(&*or, &0); + *or = 1; + assert_eq!(&*or, &1); + } + + #[test] + fn into() { + let or: OwningRefMut, ()> = Box::new(()).into(); + assert_eq!(&*or, &()); + } + + #[test] + fn map_offset_ref() { + let or: BoxRefMut = Box::new(example()).into(); + let or: BoxRef<_, u32> = or.map(|x| &mut x.0); + assert_eq!(&*or, &42); + + let or: BoxRefMut = Box::new(example()).into(); + let or: BoxRef<_, u8> = or.map(|x| &mut x.2[1]); + assert_eq!(&*or, &2); + } + + #[test] + fn map_heap_ref() { + let or: BoxRefMut = Box::new(example()).into(); + let or: BoxRef<_, str> = or.map(|x| &mut x.1[..5]); + assert_eq!(&*or, "hello"); + } + + #[test] + fn map_static_ref() { + let or: BoxRefMut<()> = Box::new(()).into(); + let or: BoxRef<_, str> = or.map(|_| "hello"); + assert_eq!(&*or, "hello"); + } + + #[test] + fn map_mut_offset_ref() { + let or: BoxRefMut = Box::new(example()).into(); + let or: BoxRefMut<_, u32> = or.map_mut(|x| &mut x.0); + assert_eq!(&*or, &42); + + let or: BoxRefMut = Box::new(example()).into(); + let or: BoxRefMut<_, u8> = or.map_mut(|x| &mut x.2[1]); + assert_eq!(&*or, &2); + } + + #[test] + fn map_mut_heap_ref() { + let or: BoxRefMut = Box::new(example()).into(); + let or: BoxRefMut<_, str> = or.map_mut(|x| &mut x.1[..5]); + assert_eq!(&*or, "hello"); + } + + #[test] + fn map_mut_static_ref() { + static mut MUT_S: [u8; 5] = *b"hello"; + + let mut_s: &'static mut [u8] = unsafe { &mut MUT_S }; + + let or: BoxRefMut<()> = Box::new(()).into(); + let or: BoxRefMut<_, [u8]> = or.map_mut(move |_| mut_s); + assert_eq!(&*or, b"hello"); + } + + #[test] + fn map_mut_chained() { + let or: BoxRefMut = Box::new(example().1).into(); + let or: BoxRefMut<_, str> = or.map_mut(|x| &mut x[1..5]); + let or: BoxRefMut<_, str> = or.map_mut(|x| &mut x[..2]); + assert_eq!(&*or, "el"); + } + + #[test] + fn map_chained_inference() { + let or = BoxRefMut::new(Box::new(example().1)) + .map_mut(|x| &mut x[..5]) + .map_mut(|x| &mut x[1..3]); + assert_eq!(&*or, "el"); + } + + #[test] + fn try_map_mut() { + let or: BoxRefMut = Box::new(example().1).into(); + let or: Result, ()> = or.try_map_mut(|x| Ok(&mut x[1..5])); + assert_eq!(&*or.unwrap(), "ello"); + + let or: BoxRefMut = Box::new(example().1).into(); + let or: Result, ()> = or.try_map_mut(|_| Err(())); + assert!(or.is_err()); + } + + #[test] + fn owner() { + let or: BoxRefMut = Box::new(example().1).into(); + let or = or.map_mut(|x| &mut x[..5]); + assert_eq!(&*or, "hello"); + assert_eq!(&**or.owner(), "hello world"); + } + + #[test] + fn into_inner() { + let or: BoxRefMut = Box::new(example().1).into(); + let or = or.map_mut(|x| &mut x[..5]); + assert_eq!(&*or, "hello"); + let s = *or.into_inner(); + assert_eq!(&s, "hello world"); + } + + #[test] + fn fmt_debug() { + let or: BoxRefMut = Box::new(example().1).into(); + let or = or.map_mut(|x| &mut x[..5]); + let s = format!("{:?}", or); + assert_eq!(&s, "OwningRefMut { owner: \"hello world\", reference: \"hello\" }"); + } + + #[test] + fn erased_owner() { + let o1: BoxRefMut = + BoxRefMut::new(Box::new(example())).map_mut(|x| &mut x.1[..]); + + let o2: BoxRefMut = + BoxRefMut::new(Box::new(example().1)).map_mut(|x| &mut x[..]); + + let os: Vec> = vec![o1.erase_owner(), o2.erase_owner()]; + assert!(os.iter().all(|e| &e[..] == "hello world")); + } + + #[test] + fn raii_locks() { + use super::super::RefMutRefMut; + use super::super::{MutexGuardRefMut, RwLockWriteGuardRefMut}; + use std::cell::RefCell; + use std::sync::{Mutex, RwLock}; + + { + let a = RefCell::new(1); + let a = { + let a = RefMutRefMut::new(a.borrow_mut()); + assert_eq!(*a, 1); + a + }; + assert_eq!(*a, 1); + drop(a); + } + { + let a = Mutex::new(1); + let a = { + let a = MutexGuardRefMut::new(a.lock().unwrap()); + assert_eq!(*a, 1); + a + }; + assert_eq!(*a, 1); + drop(a); + } + { + let a = RwLock::new(1); + let a = { + let a = RwLockWriteGuardRefMut::new(a.write().unwrap()); + assert_eq!(*a, 1); + a + }; + assert_eq!(*a, 1); + drop(a); + } + } + + #[test] + fn eq() { + let or1: BoxRefMut<[u8]> = BoxRefMut::new(vec![1, 2, 3].into_boxed_slice()); + let or2: BoxRefMut<[u8]> = BoxRefMut::new(vec![1, 2, 3].into_boxed_slice()); + assert_eq!(or1.eq(&or2), true); + } + + #[test] + fn cmp() { + let or1: BoxRefMut<[u8]> = BoxRefMut::new(vec![1, 2, 3].into_boxed_slice()); + let or2: BoxRefMut<[u8]> = BoxRefMut::new(vec![4, 5, 6].into_boxed_slice()); + assert_eq!(or1.cmp(&or2), Ordering::Less); + } + + #[test] + fn partial_cmp() { + let or1: BoxRefMut<[u8]> = BoxRefMut::new(vec![4, 5, 6].into_boxed_slice()); + let or2: BoxRefMut<[u8]> = BoxRefMut::new(vec![1, 2, 3].into_boxed_slice()); + assert_eq!(or1.partial_cmp(&or2), Some(Ordering::Greater)); + } + + #[test] + fn hash() { + let mut h1 = DefaultHasher::new(); + let mut h2 = DefaultHasher::new(); + + let or1: BoxRefMut<[u8]> = BoxRefMut::new(vec![1, 2, 3].into_boxed_slice()); + let or2: BoxRefMut<[u8]> = BoxRefMut::new(vec![1, 2, 3].into_boxed_slice()); + + or1.hash(&mut h1); + or2.hash(&mut h2); + + assert_eq!(h1.finish(), h2.finish()); + } + + #[test] + fn borrow() { + let mut hash = HashMap::new(); + let key1 = BoxRefMut::::new(Box::new("foo".to_string())).map(|s| &s[..]); + let key2 = BoxRefMut::::new(Box::new("bar".to_string())).map(|s| &s[..]); + + hash.insert(key1, 42); + hash.insert(key2, 23); + + assert_eq!(hash.get("foo"), Some(&42)); + assert_eq!(hash.get("bar"), Some(&23)); + } + + #[test] + fn total_erase() { + let a: OwningRefMut, [u8]> = OwningRefMut::new(vec![]).map_mut(|x| &mut x[..]); + let b: OwningRefMut, [u8]> = + OwningRefMut::new(vec![].into_boxed_slice()).map_mut(|x| &mut x[..]); + + let c: OwningRefMut>, [u8]> = unsafe { a.map_owner(Box::new) }; + let d: OwningRefMut>, [u8]> = unsafe { b.map_owner(Box::new) }; + + let _e: OwningRefMut, [u8]> = c.erase_owner(); + let _f: OwningRefMut, [u8]> = d.erase_owner(); + } + + #[test] + fn total_erase_box() { + let a: OwningRefMut, [u8]> = OwningRefMut::new(vec![]).map_mut(|x| &mut x[..]); + let b: OwningRefMut, [u8]> = + OwningRefMut::new(vec![].into_boxed_slice()).map_mut(|x| &mut x[..]); + + let c: OwningRefMut>, [u8]> = a.map_owner_box(); + let d: OwningRefMut>, [u8]> = b.map_owner_box(); + + let _e: OwningRefMut, [u8]> = c.erase_owner(); + let _f: OwningRefMut, [u8]> = d.erase_owner(); + } + + #[test] + fn try_map1() { + use std::any::Any; + + let x = Box::new(123_i32); + let y: Box = x; + + assert!(OwningRefMut::new(y).try_map_mut(|x| x.downcast_mut::().ok_or(())).is_ok()); + } + + #[test] + fn try_map2() { + use std::any::Any; + + let x = Box::new(123_i32); + let y: Box = x; + + assert!(!OwningRefMut::new(y).try_map_mut(|x| x.downcast_mut::().ok_or(())).is_err()); + } + + #[test] + fn try_map3() { + use std::any::Any; + + let x = Box::new(123_i32); + let y: Box = x; + + assert!(OwningRefMut::new(y).try_map(|x| x.downcast_ref::().ok_or(())).is_ok()); + } + + #[test] + fn try_map4() { + use std::any::Any; + + let x = Box::new(123_i32); + let y: Box = x; + + assert!(!OwningRefMut::new(y).try_map(|x| x.downcast_ref::().ok_or(())).is_err()); + } + + #[test] + fn into_owning_ref() { + use super::super::BoxRef; + + let or: BoxRefMut<()> = Box::new(()).into(); + let or: BoxRef<()> = or.into(); + assert_eq!(&*or, &()); + } + + struct Foo { + u: u32, + } + struct Bar { + f: Foo, + } + + #[test] + fn ref_mut() { + use std::cell::RefCell; + + let a = RefCell::new(Bar { f: Foo { u: 42 } }); + let mut b = OwningRefMut::new(a.borrow_mut()); + assert_eq!(b.f.u, 42); + b.f.u = 43; + let mut c = b.map_mut(|x| &mut x.f); + assert_eq!(c.u, 43); + c.u = 44; + let mut d = c.map_mut(|x| &mut x.u); + assert_eq!(*d, 44); + *d = 45; + assert_eq!(*d, 45); + } +} diff --git a/compiler/rustc_data_structures/src/profiling.rs b/compiler/rustc_data_structures/src/profiling.rs new file mode 100644 index 000000000..d8b26f984 --- /dev/null +++ b/compiler/rustc_data_structures/src/profiling.rs @@ -0,0 +1,861 @@ +//! # Rust Compiler Self-Profiling +//! +//! This module implements the basic framework for the compiler's self- +//! profiling support. It provides the `SelfProfiler` type which enables +//! recording "events". An event is something that starts and ends at a given +//! point in time and has an ID and a kind attached to it. This allows for +//! tracing the compiler's activity. +//! +//! Internally this module uses the custom tailored [measureme][mm] crate for +//! efficiently recording events to disk in a compact format that can be +//! post-processed and analyzed by the suite of tools in the `measureme` +//! project. The highest priority for the tracing framework is on incurring as +//! little overhead as possible. +//! +//! +//! ## Event Overview +//! +//! Events have a few properties: +//! +//! - The `event_kind` designates the broad category of an event (e.g. does it +//! correspond to the execution of a query provider or to loading something +//! from the incr. comp. on-disk cache, etc). +//! - The `event_id` designates the query invocation or function call it +//! corresponds to, possibly including the query key or function arguments. +//! - Each event stores the ID of the thread it was recorded on. +//! - The timestamp stores beginning and end of the event, or the single point +//! in time it occurred at for "instant" events. +//! +//! +//! ## Event Filtering +//! +//! Event generation can be filtered by event kind. Recording all possible +//! events generates a lot of data, much of which is not needed for most kinds +//! of analysis. So, in order to keep overhead as low as possible for a given +//! use case, the `SelfProfiler` will only record the kinds of events that +//! pass the filter specified as a command line argument to the compiler. +//! +//! +//! ## `event_id` Assignment +//! +//! As far as `measureme` is concerned, `event_id`s are just strings. However, +//! it would incur too much overhead to generate and persist each `event_id` +//! string at the point where the event is recorded. In order to make this more +//! efficient `measureme` has two features: +//! +//! - Strings can share their content, so that re-occurring parts don't have to +//! be copied over and over again. One allocates a string in `measureme` and +//! gets back a `StringId`. This `StringId` is then used to refer to that +//! string. `measureme` strings are actually DAGs of string components so that +//! arbitrary sharing of substrings can be done efficiently. This is useful +//! because `event_id`s contain lots of redundant text like query names or +//! def-path components. +//! +//! - `StringId`s can be "virtual" which means that the client picks a numeric +//! ID according to some application-specific scheme and can later make that +//! ID be mapped to an actual string. This is used to cheaply generate +//! `event_id`s while the events actually occur, causing little timing +//! distortion, and then later map those `StringId`s, in bulk, to actual +//! `event_id` strings. This way the largest part of the tracing overhead is +//! localized to one contiguous chunk of time. +//! +//! How are these `event_id`s generated in the compiler? For things that occur +//! infrequently (e.g. "generic activities"), we just allocate the string the +//! first time it is used and then keep the `StringId` in a hash table. This +//! is implemented in `SelfProfiler::get_or_alloc_cached_string()`. +//! +//! For queries it gets more interesting: First we need a unique numeric ID for +//! each query invocation (the `QueryInvocationId`). This ID is used as the +//! virtual `StringId` we use as `event_id` for a given event. This ID has to +//! be available both when the query is executed and later, together with the +//! query key, when we allocate the actual `event_id` strings in bulk. +//! +//! We could make the compiler generate and keep track of such an ID for each +//! query invocation but luckily we already have something that fits all the +//! the requirements: the query's `DepNodeIndex`. So we use the numeric value +//! of the `DepNodeIndex` as `event_id` when recording the event and then, +//! just before the query context is dropped, we walk the entire query cache +//! (which stores the `DepNodeIndex` along with the query key for each +//! invocation) and allocate the corresponding strings together with a mapping +//! for `DepNodeIndex as StringId`. +//! +//! [mm]: https://github.com/rust-lang/measureme/ + +use crate::cold_path; +use crate::fx::FxHashMap; + +use std::borrow::Borrow; +use std::collections::hash_map::Entry; +use std::convert::Into; +use std::error::Error; +use std::fs; +use std::path::Path; +use std::process; +use std::sync::Arc; +use std::time::{Duration, Instant}; + +pub use measureme::EventId; +use measureme::{EventIdBuilder, Profiler, SerializableString, StringId}; +use parking_lot::RwLock; +use smallvec::SmallVec; + +bitflags::bitflags! { + struct EventFilter: u32 { + const GENERIC_ACTIVITIES = 1 << 0; + const QUERY_PROVIDERS = 1 << 1; + const QUERY_CACHE_HITS = 1 << 2; + const QUERY_BLOCKED = 1 << 3; + const INCR_CACHE_LOADS = 1 << 4; + + const QUERY_KEYS = 1 << 5; + const FUNCTION_ARGS = 1 << 6; + const LLVM = 1 << 7; + const INCR_RESULT_HASHING = 1 << 8; + const ARTIFACT_SIZES = 1 << 9; + + const DEFAULT = Self::GENERIC_ACTIVITIES.bits | + Self::QUERY_PROVIDERS.bits | + Self::QUERY_BLOCKED.bits | + Self::INCR_CACHE_LOADS.bits | + Self::INCR_RESULT_HASHING.bits | + Self::ARTIFACT_SIZES.bits; + + const ARGS = Self::QUERY_KEYS.bits | Self::FUNCTION_ARGS.bits; + } +} + +// keep this in sync with the `-Z self-profile-events` help message in rustc_session/options.rs +const EVENT_FILTERS_BY_NAME: &[(&str, EventFilter)] = &[ + ("none", EventFilter::empty()), + ("all", EventFilter::all()), + ("default", EventFilter::DEFAULT), + ("generic-activity", EventFilter::GENERIC_ACTIVITIES), + ("query-provider", EventFilter::QUERY_PROVIDERS), + ("query-cache-hit", EventFilter::QUERY_CACHE_HITS), + ("query-blocked", EventFilter::QUERY_BLOCKED), + ("incr-cache-load", EventFilter::INCR_CACHE_LOADS), + ("query-keys", EventFilter::QUERY_KEYS), + ("function-args", EventFilter::FUNCTION_ARGS), + ("args", EventFilter::ARGS), + ("llvm", EventFilter::LLVM), + ("incr-result-hashing", EventFilter::INCR_RESULT_HASHING), + ("artifact-sizes", EventFilter::ARTIFACT_SIZES), +]; + +/// Something that uniquely identifies a query invocation. +pub struct QueryInvocationId(pub u32); + +/// A reference to the SelfProfiler. It can be cloned and sent across thread +/// boundaries at will. +#[derive(Clone)] +pub struct SelfProfilerRef { + // This field is `None` if self-profiling is disabled for the current + // compilation session. + profiler: Option>, + + // We store the filter mask directly in the reference because that doesn't + // cost anything and allows for filtering with checking if the profiler is + // actually enabled. + event_filter_mask: EventFilter, + + // Print verbose generic activities to stdout + print_verbose_generic_activities: bool, + + // Print extra verbose generic activities to stdout + print_extra_verbose_generic_activities: bool, +} + +impl SelfProfilerRef { + pub fn new( + profiler: Option>, + print_verbose_generic_activities: bool, + print_extra_verbose_generic_activities: bool, + ) -> SelfProfilerRef { + // If there is no SelfProfiler then the filter mask is set to NONE, + // ensuring that nothing ever tries to actually access it. + let event_filter_mask = + profiler.as_ref().map_or(EventFilter::empty(), |p| p.event_filter_mask); + + SelfProfilerRef { + profiler, + event_filter_mask, + print_verbose_generic_activities, + print_extra_verbose_generic_activities, + } + } + + /// This shim makes sure that calls only get executed if the filter mask + /// lets them pass. It also contains some trickery to make sure that + /// code is optimized for non-profiling compilation sessions, i.e. anything + /// past the filter check is never inlined so it doesn't clutter the fast + /// path. + #[inline(always)] + fn exec(&self, event_filter: EventFilter, f: F) -> TimingGuard<'_> + where + F: for<'a> FnOnce(&'a SelfProfiler) -> TimingGuard<'a>, + { + #[inline(never)] + #[cold] + fn cold_call(profiler_ref: &SelfProfilerRef, f: F) -> TimingGuard<'_> + where + F: for<'a> FnOnce(&'a SelfProfiler) -> TimingGuard<'a>, + { + let profiler = profiler_ref.profiler.as_ref().unwrap(); + f(&**profiler) + } + + if self.event_filter_mask.contains(event_filter) { + cold_call(self, f) + } else { + TimingGuard::none() + } + } + + /// Start profiling a verbose generic activity. Profiling continues until the + /// VerboseTimingGuard returned from this call is dropped. In addition to recording + /// a measureme event, "verbose" generic activities also print a timing entry to + /// stdout if the compiler is invoked with -Ztime or -Ztime-passes. + pub fn verbose_generic_activity<'a>( + &'a self, + event_label: &'static str, + ) -> VerboseTimingGuard<'a> { + let message = + if self.print_verbose_generic_activities { Some(event_label.to_owned()) } else { None }; + + VerboseTimingGuard::start(message, self.generic_activity(event_label)) + } + + /// Start profiling an extra verbose generic activity. Profiling continues until the + /// VerboseTimingGuard returned from this call is dropped. In addition to recording + /// a measureme event, "extra verbose" generic activities also print a timing entry to + /// stdout if the compiler is invoked with -Ztime-passes. + pub fn extra_verbose_generic_activity<'a, A>( + &'a self, + event_label: &'static str, + event_arg: A, + ) -> VerboseTimingGuard<'a> + where + A: Borrow + Into, + { + let message = if self.print_extra_verbose_generic_activities { + Some(format!("{}({})", event_label, event_arg.borrow())) + } else { + None + }; + + VerboseTimingGuard::start(message, self.generic_activity_with_arg(event_label, event_arg)) + } + + /// Start profiling a generic activity. Profiling continues until the + /// TimingGuard returned from this call is dropped. + #[inline(always)] + pub fn generic_activity(&self, event_label: &'static str) -> TimingGuard<'_> { + self.exec(EventFilter::GENERIC_ACTIVITIES, |profiler| { + let event_label = profiler.get_or_alloc_cached_string(event_label); + let event_id = EventId::from_label(event_label); + TimingGuard::start(profiler, profiler.generic_activity_event_kind, event_id) + }) + } + + /// Start profiling with some event filter for a given event. Profiling continues until the + /// TimingGuard returned from this call is dropped. + #[inline(always)] + pub fn generic_activity_with_event_id(&self, event_id: EventId) -> TimingGuard<'_> { + self.exec(EventFilter::GENERIC_ACTIVITIES, |profiler| { + TimingGuard::start(profiler, profiler.generic_activity_event_kind, event_id) + }) + } + + /// Start profiling a generic activity. Profiling continues until the + /// TimingGuard returned from this call is dropped. + #[inline(always)] + pub fn generic_activity_with_arg( + &self, + event_label: &'static str, + event_arg: A, + ) -> TimingGuard<'_> + where + A: Borrow + Into, + { + self.exec(EventFilter::GENERIC_ACTIVITIES, |profiler| { + let builder = EventIdBuilder::new(&profiler.profiler); + let event_label = profiler.get_or_alloc_cached_string(event_label); + let event_id = if profiler.event_filter_mask.contains(EventFilter::FUNCTION_ARGS) { + let event_arg = profiler.get_or_alloc_cached_string(event_arg); + builder.from_label_and_arg(event_label, event_arg) + } else { + builder.from_label(event_label) + }; + TimingGuard::start(profiler, profiler.generic_activity_event_kind, event_id) + }) + } + + /// Start profiling a generic activity, allowing costly arguments to be recorded. Profiling + /// continues until the `TimingGuard` returned from this call is dropped. + /// + /// If the arguments to a generic activity are cheap to create, use `generic_activity_with_arg` + /// or `generic_activity_with_args` for their simpler API. However, if they are costly or + /// require allocation in sufficiently hot contexts, then this allows for a closure to be called + /// only when arguments were asked to be recorded via `-Z self-profile-events=args`. + /// + /// In this case, the closure will be passed a `&mut EventArgRecorder`, to help with recording + /// one or many arguments within the generic activity being profiled, by calling its + /// `record_arg` method for example. + /// + /// This `EventArgRecorder` may implement more specific traits from other rustc crates, e.g. for + /// richer handling of rustc-specific argument types, while keeping this single entry-point API + /// for recording arguments. + /// + /// Note: recording at least one argument is *required* for the self-profiler to create the + /// `TimingGuard`. A panic will be triggered if that doesn't happen. This function exists + /// explicitly to record arguments, so it fails loudly when there are none to record. + /// + #[inline(always)] + pub fn generic_activity_with_arg_recorder( + &self, + event_label: &'static str, + mut f: F, + ) -> TimingGuard<'_> + where + F: FnMut(&mut EventArgRecorder<'_>), + { + // Ensure this event will only be recorded when self-profiling is turned on. + self.exec(EventFilter::GENERIC_ACTIVITIES, |profiler| { + let builder = EventIdBuilder::new(&profiler.profiler); + let event_label = profiler.get_or_alloc_cached_string(event_label); + + // Ensure the closure to create event arguments will only be called when argument + // recording is turned on. + let event_id = if profiler.event_filter_mask.contains(EventFilter::FUNCTION_ARGS) { + // Set up the builder and call the user-provided closure to record potentially + // costly event arguments. + let mut recorder = EventArgRecorder { profiler, args: SmallVec::new() }; + f(&mut recorder); + + // It is expected that the closure will record at least one argument. If that + // doesn't happen, it's a bug: we've been explicitly called in order to record + // arguments, so we fail loudly when there are none to record. + if recorder.args.is_empty() { + panic!( + "The closure passed to `generic_activity_with_arg_recorder` needs to \ + record at least one argument" + ); + } + + builder.from_label_and_args(event_label, &recorder.args) + } else { + builder.from_label(event_label) + }; + TimingGuard::start(profiler, profiler.generic_activity_event_kind, event_id) + }) + } + + /// Record the size of an artifact that the compiler produces + /// + /// `artifact_kind` is the class of artifact (e.g., query_cache, object_file, etc.) + /// `artifact_name` is an identifier to the specific artifact being stored (usually a filename) + #[inline(always)] + pub fn artifact_size(&self, artifact_kind: &str, artifact_name: A, size: u64) + where + A: Borrow + Into, + { + drop(self.exec(EventFilter::ARTIFACT_SIZES, |profiler| { + let builder = EventIdBuilder::new(&profiler.profiler); + let event_label = profiler.get_or_alloc_cached_string(artifact_kind); + let event_arg = profiler.get_or_alloc_cached_string(artifact_name); + let event_id = builder.from_label_and_arg(event_label, event_arg); + let thread_id = get_thread_id(); + + profiler.profiler.record_integer_event( + profiler.artifact_size_event_kind, + event_id, + thread_id, + size, + ); + + TimingGuard::none() + })) + } + + #[inline(always)] + pub fn generic_activity_with_args( + &self, + event_label: &'static str, + event_args: &[String], + ) -> TimingGuard<'_> { + self.exec(EventFilter::GENERIC_ACTIVITIES, |profiler| { + let builder = EventIdBuilder::new(&profiler.profiler); + let event_label = profiler.get_or_alloc_cached_string(event_label); + let event_id = if profiler.event_filter_mask.contains(EventFilter::FUNCTION_ARGS) { + let event_args: Vec<_> = event_args + .iter() + .map(|s| profiler.get_or_alloc_cached_string(&s[..])) + .collect(); + builder.from_label_and_args(event_label, &event_args) + } else { + builder.from_label(event_label) + }; + TimingGuard::start(profiler, profiler.generic_activity_event_kind, event_id) + }) + } + + /// Start profiling a query provider. Profiling continues until the + /// TimingGuard returned from this call is dropped. + #[inline(always)] + pub fn query_provider(&self) -> TimingGuard<'_> { + self.exec(EventFilter::QUERY_PROVIDERS, |profiler| { + TimingGuard::start(profiler, profiler.query_event_kind, EventId::INVALID) + }) + } + + /// Record a query in-memory cache hit. + #[inline(always)] + pub fn query_cache_hit(&self, query_invocation_id: QueryInvocationId) { + self.instant_query_event( + |profiler| profiler.query_cache_hit_event_kind, + query_invocation_id, + EventFilter::QUERY_CACHE_HITS, + ); + } + + /// Start profiling a query being blocked on a concurrent execution. + /// Profiling continues until the TimingGuard returned from this call is + /// dropped. + #[inline(always)] + pub fn query_blocked(&self) -> TimingGuard<'_> { + self.exec(EventFilter::QUERY_BLOCKED, |profiler| { + TimingGuard::start(profiler, profiler.query_blocked_event_kind, EventId::INVALID) + }) + } + + /// Start profiling how long it takes to load a query result from the + /// incremental compilation on-disk cache. Profiling continues until the + /// TimingGuard returned from this call is dropped. + #[inline(always)] + pub fn incr_cache_loading(&self) -> TimingGuard<'_> { + self.exec(EventFilter::INCR_CACHE_LOADS, |profiler| { + TimingGuard::start( + profiler, + profiler.incremental_load_result_event_kind, + EventId::INVALID, + ) + }) + } + + /// Start profiling how long it takes to hash query results for incremental compilation. + /// Profiling continues until the TimingGuard returned from this call is dropped. + #[inline(always)] + pub fn incr_result_hashing(&self) -> TimingGuard<'_> { + self.exec(EventFilter::INCR_RESULT_HASHING, |profiler| { + TimingGuard::start( + profiler, + profiler.incremental_result_hashing_event_kind, + EventId::INVALID, + ) + }) + } + + #[inline(always)] + fn instant_query_event( + &self, + event_kind: fn(&SelfProfiler) -> StringId, + query_invocation_id: QueryInvocationId, + event_filter: EventFilter, + ) { + drop(self.exec(event_filter, |profiler| { + let event_id = StringId::new_virtual(query_invocation_id.0); + let thread_id = get_thread_id(); + + profiler.profiler.record_instant_event( + event_kind(profiler), + EventId::from_virtual(event_id), + thread_id, + ); + + TimingGuard::none() + })); + } + + pub fn with_profiler(&self, f: impl FnOnce(&SelfProfiler)) { + if let Some(profiler) = &self.profiler { + f(&profiler) + } + } + + /// Gets a `StringId` for the given string. This method makes sure that + /// any strings going through it will only be allocated once in the + /// profiling data. + /// Returns `None` if the self-profiling is not enabled. + pub fn get_or_alloc_cached_string(&self, s: &str) -> Option { + self.profiler.as_ref().map(|p| p.get_or_alloc_cached_string(s)) + } + + #[inline] + pub fn enabled(&self) -> bool { + self.profiler.is_some() + } + + #[inline] + pub fn llvm_recording_enabled(&self) -> bool { + self.event_filter_mask.contains(EventFilter::LLVM) + } + #[inline] + pub fn get_self_profiler(&self) -> Option> { + self.profiler.clone() + } +} + +/// A helper for recording costly arguments to self-profiling events. Used with +/// `SelfProfilerRef::generic_activity_with_arg_recorder`. +pub struct EventArgRecorder<'p> { + /// The `SelfProfiler` used to intern the event arguments that users will ask to record. + profiler: &'p SelfProfiler, + + /// The interned event arguments to be recorded in the generic activity event. + /// + /// The most common case, when actually recording event arguments, is to have one argument. Then + /// followed by recording two, in a couple places. + args: SmallVec<[StringId; 2]>, +} + +impl EventArgRecorder<'_> { + /// Records a single argument within the current generic activity being profiled. + /// + /// Note: when self-profiling with costly event arguments, at least one argument + /// needs to be recorded. A panic will be triggered if that doesn't happen. + pub fn record_arg(&mut self, event_arg: A) + where + A: Borrow + Into, + { + let event_arg = self.profiler.get_or_alloc_cached_string(event_arg); + self.args.push(event_arg); + } +} + +pub struct SelfProfiler { + profiler: Profiler, + event_filter_mask: EventFilter, + + string_cache: RwLock>, + + query_event_kind: StringId, + generic_activity_event_kind: StringId, + incremental_load_result_event_kind: StringId, + incremental_result_hashing_event_kind: StringId, + query_blocked_event_kind: StringId, + query_cache_hit_event_kind: StringId, + artifact_size_event_kind: StringId, +} + +impl SelfProfiler { + pub fn new( + output_directory: &Path, + crate_name: Option<&str>, + event_filters: Option<&[String]>, + counter_name: &str, + ) -> Result> { + fs::create_dir_all(output_directory)?; + + let crate_name = crate_name.unwrap_or("unknown-crate"); + // HACK(eddyb) we need to pad the PID, strange as it may seem, as its + // length can behave as a source of entropy for heap addresses, when + // ASLR is disabled and the heap is otherwise determinic. + let pid: u32 = process::id(); + let filename = format!("{}-{:07}.rustc_profile", crate_name, pid); + let path = output_directory.join(&filename); + let profiler = + Profiler::with_counter(&path, measureme::counters::Counter::by_name(counter_name)?)?; + + let query_event_kind = profiler.alloc_string("Query"); + let generic_activity_event_kind = profiler.alloc_string("GenericActivity"); + let incremental_load_result_event_kind = profiler.alloc_string("IncrementalLoadResult"); + let incremental_result_hashing_event_kind = + profiler.alloc_string("IncrementalResultHashing"); + let query_blocked_event_kind = profiler.alloc_string("QueryBlocked"); + let query_cache_hit_event_kind = profiler.alloc_string("QueryCacheHit"); + let artifact_size_event_kind = profiler.alloc_string("ArtifactSize"); + + let mut event_filter_mask = EventFilter::empty(); + + if let Some(event_filters) = event_filters { + let mut unknown_events = vec![]; + for item in event_filters { + if let Some(&(_, mask)) = + EVENT_FILTERS_BY_NAME.iter().find(|&(name, _)| name == item) + { + event_filter_mask |= mask; + } else { + unknown_events.push(item.clone()); + } + } + + // Warn about any unknown event names + if !unknown_events.is_empty() { + unknown_events.sort(); + unknown_events.dedup(); + + warn!( + "Unknown self-profiler events specified: {}. Available options are: {}.", + unknown_events.join(", "), + EVENT_FILTERS_BY_NAME + .iter() + .map(|&(name, _)| name.to_string()) + .collect::>() + .join(", ") + ); + } + } else { + event_filter_mask = EventFilter::DEFAULT; + } + + Ok(SelfProfiler { + profiler, + event_filter_mask, + string_cache: RwLock::new(FxHashMap::default()), + query_event_kind, + generic_activity_event_kind, + incremental_load_result_event_kind, + incremental_result_hashing_event_kind, + query_blocked_event_kind, + query_cache_hit_event_kind, + artifact_size_event_kind, + }) + } + + /// Allocates a new string in the profiling data. Does not do any caching + /// or deduplication. + pub fn alloc_string(&self, s: &STR) -> StringId { + self.profiler.alloc_string(s) + } + + /// Gets a `StringId` for the given string. This method makes sure that + /// any strings going through it will only be allocated once in the + /// profiling data. + pub fn get_or_alloc_cached_string(&self, s: A) -> StringId + where + A: Borrow + Into, + { + // Only acquire a read-lock first since we assume that the string is + // already present in the common case. + { + let string_cache = self.string_cache.read(); + + if let Some(&id) = string_cache.get(s.borrow()) { + return id; + } + } + + let mut string_cache = self.string_cache.write(); + // Check if the string has already been added in the small time window + // between dropping the read lock and acquiring the write lock. + match string_cache.entry(s.into()) { + Entry::Occupied(e) => *e.get(), + Entry::Vacant(e) => { + let string_id = self.profiler.alloc_string(&e.key()[..]); + *e.insert(string_id) + } + } + } + + pub fn map_query_invocation_id_to_string(&self, from: QueryInvocationId, to: StringId) { + let from = StringId::new_virtual(from.0); + self.profiler.map_virtual_to_concrete_string(from, to); + } + + pub fn bulk_map_query_invocation_id_to_single_string(&self, from: I, to: StringId) + where + I: Iterator + ExactSizeIterator, + { + let from = from.map(|qid| StringId::new_virtual(qid.0)); + self.profiler.bulk_map_virtual_to_single_concrete_string(from, to); + } + + pub fn query_key_recording_enabled(&self) -> bool { + self.event_filter_mask.contains(EventFilter::QUERY_KEYS) + } + + pub fn event_id_builder(&self) -> EventIdBuilder<'_> { + EventIdBuilder::new(&self.profiler) + } +} + +#[must_use] +pub struct TimingGuard<'a>(Option>); + +impl<'a> TimingGuard<'a> { + #[inline] + pub fn start( + profiler: &'a SelfProfiler, + event_kind: StringId, + event_id: EventId, + ) -> TimingGuard<'a> { + let thread_id = get_thread_id(); + let raw_profiler = &profiler.profiler; + let timing_guard = + raw_profiler.start_recording_interval_event(event_kind, event_id, thread_id); + TimingGuard(Some(timing_guard)) + } + + #[inline] + pub fn finish_with_query_invocation_id(self, query_invocation_id: QueryInvocationId) { + if let Some(guard) = self.0 { + cold_path(|| { + let event_id = StringId::new_virtual(query_invocation_id.0); + let event_id = EventId::from_virtual(event_id); + guard.finish_with_override_event_id(event_id); + }); + } + } + + #[inline] + pub fn none() -> TimingGuard<'a> { + TimingGuard(None) + } + + #[inline(always)] + pub fn run(self, f: impl FnOnce() -> R) -> R { + let _timer = self; + f() + } +} + +#[must_use] +pub struct VerboseTimingGuard<'a> { + start_and_message: Option<(Instant, Option, String)>, + _guard: TimingGuard<'a>, +} + +impl<'a> VerboseTimingGuard<'a> { + pub fn start(message: Option, _guard: TimingGuard<'a>) -> Self { + VerboseTimingGuard { + _guard, + start_and_message: message.map(|msg| (Instant::now(), get_resident_set_size(), msg)), + } + } + + #[inline(always)] + pub fn run(self, f: impl FnOnce() -> R) -> R { + let _timer = self; + f() + } +} + +impl Drop for VerboseTimingGuard<'_> { + fn drop(&mut self) { + if let Some((start_time, start_rss, ref message)) = self.start_and_message { + let end_rss = get_resident_set_size(); + let dur = start_time.elapsed(); + + if should_print_passes(dur, start_rss, end_rss) { + print_time_passes_entry(&message, dur, start_rss, end_rss); + } + } + } +} + +fn should_print_passes(dur: Duration, start_rss: Option, end_rss: Option) -> bool { + if dur.as_millis() > 5 { + return true; + } + + if let (Some(start_rss), Some(end_rss)) = (start_rss, end_rss) { + let change_rss = end_rss.abs_diff(start_rss); + if change_rss > 0 { + return true; + } + } + + false +} + +pub fn print_time_passes_entry( + what: &str, + dur: Duration, + start_rss: Option, + end_rss: Option, +) { + let rss_to_mb = |rss| (rss as f64 / 1_000_000.0).round() as usize; + let rss_change_to_mb = |rss| (rss as f64 / 1_000_000.0).round() as i128; + + let mem_string = match (start_rss, end_rss) { + (Some(start_rss), Some(end_rss)) => { + let change_rss = end_rss as i128 - start_rss as i128; + + format!( + "; rss: {:>4}MB -> {:>4}MB ({:>+5}MB)", + rss_to_mb(start_rss), + rss_to_mb(end_rss), + rss_change_to_mb(change_rss), + ) + } + (Some(start_rss), None) => format!("; rss start: {:>4}MB", rss_to_mb(start_rss)), + (None, Some(end_rss)) => format!("; rss end: {:>4}MB", rss_to_mb(end_rss)), + (None, None) => String::new(), + }; + + eprintln!("time: {:>7}{}\t{}", duration_to_secs_str(dur), mem_string, what); +} + +// Hack up our own formatting for the duration to make it easier for scripts +// to parse (always use the same number of decimal places and the same unit). +pub fn duration_to_secs_str(dur: std::time::Duration) -> String { + format!("{:.3}", dur.as_secs_f64()) +} + +fn get_thread_id() -> u32 { + std::thread::current().id().as_u64().get() as u32 +} + +// Memory reporting +cfg_if! { + if #[cfg(windows)] { + pub fn get_resident_set_size() -> Option { + use std::mem::{self, MaybeUninit}; + use winapi::shared::minwindef::DWORD; + use winapi::um::processthreadsapi::GetCurrentProcess; + use winapi::um::psapi::{GetProcessMemoryInfo, PROCESS_MEMORY_COUNTERS}; + + let mut pmc = MaybeUninit::::uninit(); + match unsafe { + GetProcessMemoryInfo(GetCurrentProcess(), pmc.as_mut_ptr(), mem::size_of_val(&pmc) as DWORD) + } { + 0 => None, + _ => { + let pmc = unsafe { pmc.assume_init() }; + Some(pmc.WorkingSetSize as usize) + } + } + } + } else if #[cfg(target_os = "macos")] { + pub fn get_resident_set_size() -> Option { + use libc::{c_int, c_void, getpid, proc_pidinfo, proc_taskinfo, PROC_PIDTASKINFO}; + use std::mem; + const PROC_TASKINFO_SIZE: c_int = mem::size_of::() as c_int; + + unsafe { + let mut info: proc_taskinfo = mem::zeroed(); + let info_ptr = &mut info as *mut proc_taskinfo as *mut c_void; + let pid = getpid() as c_int; + let ret = proc_pidinfo(pid, PROC_PIDTASKINFO, 0, info_ptr, PROC_TASKINFO_SIZE); + if ret == PROC_TASKINFO_SIZE { + Some(info.pti_resident_size as usize) + } else { + None + } + } + } + } else if #[cfg(unix)] { + pub fn get_resident_set_size() -> Option { + let field = 1; + let contents = fs::read("/proc/self/statm").ok()?; + let contents = String::from_utf8(contents).ok()?; + let s = contents.split_whitespace().nth(field)?; + let npages = s.parse::().ok()?; + Some(npages * 4096) + } + } else { + pub fn get_resident_set_size() -> Option { + None + } + } +} diff --git a/compiler/rustc_data_structures/src/sharded.rs b/compiler/rustc_data_structures/src/sharded.rs new file mode 100644 index 000000000..01d292dde --- /dev/null +++ b/compiler/rustc_data_structures/src/sharded.rs @@ -0,0 +1,150 @@ +use crate::fx::{FxHashMap, FxHasher}; +use crate::sync::{Lock, LockGuard}; +use std::borrow::Borrow; +use std::collections::hash_map::RawEntryMut; +use std::hash::{Hash, Hasher}; +use std::mem; + +#[derive(Clone, Default)] +#[cfg_attr(parallel_compiler, repr(align(64)))] +struct CacheAligned(T); + +#[cfg(parallel_compiler)] +// 32 shards is sufficient to reduce contention on an 8-core Ryzen 7 1700, +// but this should be tested on higher core count CPUs. How the `Sharded` type gets used +// may also affect the ideal number of shards. +const SHARD_BITS: usize = 5; + +#[cfg(not(parallel_compiler))] +const SHARD_BITS: usize = 0; + +pub const SHARDS: usize = 1 << SHARD_BITS; + +/// An array of cache-line aligned inner locked structures with convenience methods. +#[derive(Clone)] +pub struct Sharded { + shards: [CacheAligned>; SHARDS], +} + +impl Default for Sharded { + #[inline] + fn default() -> Self { + Self::new(T::default) + } +} + +impl Sharded { + #[inline] + pub fn new(mut value: impl FnMut() -> T) -> Self { + Sharded { shards: [(); SHARDS].map(|()| CacheAligned(Lock::new(value()))) } + } + + /// The shard is selected by hashing `val` with `FxHasher`. + #[inline] + pub fn get_shard_by_value(&self, val: &K) -> &Lock { + if SHARDS == 1 { &self.shards[0].0 } else { self.get_shard_by_hash(make_hash(val)) } + } + + #[inline] + pub fn get_shard_by_hash(&self, hash: u64) -> &Lock { + &self.shards[get_shard_index_by_hash(hash)].0 + } + + #[inline] + pub fn get_shard_by_index(&self, i: usize) -> &Lock { + &self.shards[i].0 + } + + pub fn lock_shards(&self) -> Vec> { + (0..SHARDS).map(|i| self.shards[i].0.lock()).collect() + } + + pub fn try_lock_shards(&self) -> Option>> { + (0..SHARDS).map(|i| self.shards[i].0.try_lock()).collect() + } +} + +pub type ShardedHashMap = Sharded>; + +impl ShardedHashMap { + pub fn len(&self) -> usize { + self.lock_shards().iter().map(|shard| shard.len()).sum() + } +} + +impl ShardedHashMap { + #[inline] + pub fn intern_ref(&self, value: &Q, make: impl FnOnce() -> K) -> K + where + K: Borrow, + Q: Hash + Eq, + { + let hash = make_hash(value); + let mut shard = self.get_shard_by_hash(hash).lock(); + let entry = shard.raw_entry_mut().from_key_hashed_nocheck(hash, value); + + match entry { + RawEntryMut::Occupied(e) => *e.key(), + RawEntryMut::Vacant(e) => { + let v = make(); + e.insert_hashed_nocheck(hash, v, ()); + v + } + } + } + + #[inline] + pub fn intern(&self, value: Q, make: impl FnOnce(Q) -> K) -> K + where + K: Borrow, + Q: Hash + Eq, + { + let hash = make_hash(&value); + let mut shard = self.get_shard_by_hash(hash).lock(); + let entry = shard.raw_entry_mut().from_key_hashed_nocheck(hash, &value); + + match entry { + RawEntryMut::Occupied(e) => *e.key(), + RawEntryMut::Vacant(e) => { + let v = make(value); + e.insert_hashed_nocheck(hash, v, ()); + v + } + } + } +} + +pub trait IntoPointer { + /// Returns a pointer which outlives `self`. + fn into_pointer(&self) -> *const (); +} + +impl ShardedHashMap { + pub fn contains_pointer_to(&self, value: &T) -> bool { + let hash = make_hash(&value); + let shard = self.get_shard_by_hash(hash).lock(); + let value = value.into_pointer(); + shard.raw_entry().from_hash(hash, |entry| entry.into_pointer() == value).is_some() + } +} + +#[inline] +pub fn make_hash(val: &K) -> u64 { + let mut state = FxHasher::default(); + val.hash(&mut state); + state.finish() +} + +/// Get a shard with a pre-computed hash value. If `get_shard_by_value` is +/// ever used in combination with `get_shard_by_hash` on a single `Sharded` +/// instance, then `hash` must be computed with `FxHasher`. Otherwise, +/// `hash` can be computed with any hasher, so long as that hasher is used +/// consistently for each `Sharded` instance. +#[inline] +pub fn get_shard_index_by_hash(hash: u64) -> usize { + let hash_len = mem::size_of::(); + // Ignore the top 7 bits as hashbrown uses these and get the next SHARD_BITS highest bits. + // hashbrown also uses the lowest bits, so we can't use those + let bits = (hash >> (hash_len * 8 - 7 - SHARD_BITS)) as usize; + bits % SHARDS +} diff --git a/compiler/rustc_data_structures/src/sip128.rs b/compiler/rustc_data_structures/src/sip128.rs new file mode 100644 index 000000000..90793a97e --- /dev/null +++ b/compiler/rustc_data_structures/src/sip128.rs @@ -0,0 +1,496 @@ +//! This is a copy of `core::hash::sip` adapted to providing 128 bit hashes. + +use std::hash::Hasher; +use std::mem::{self, MaybeUninit}; +use std::ptr; + +#[cfg(test)] +mod tests; + +// The SipHash algorithm operates on 8-byte chunks. +const ELEM_SIZE: usize = mem::size_of::(); + +// Size of the buffer in number of elements, not including the spill. +// +// The selection of this size was guided by rustc-perf benchmark comparisons of +// different buffer sizes. It should be periodically reevaluated as the compiler +// implementation and input characteristics change. +// +// Using the same-sized buffer for everything we hash is a performance versus +// complexity tradeoff. The ideal buffer size, and whether buffering should even +// be used, depends on what is being hashed. It may be worth it to size the +// buffer appropriately (perhaps by making SipHasher128 generic over the buffer +// size) or disable buffering depending on what is being hashed. But at this +// time, we use the same buffer size for everything. +const BUFFER_CAPACITY: usize = 8; + +// Size of the buffer in bytes, not including the spill. +const BUFFER_SIZE: usize = BUFFER_CAPACITY * ELEM_SIZE; + +// Size of the buffer in number of elements, including the spill. +const BUFFER_WITH_SPILL_CAPACITY: usize = BUFFER_CAPACITY + 1; + +// Size of the buffer in bytes, including the spill. +const BUFFER_WITH_SPILL_SIZE: usize = BUFFER_WITH_SPILL_CAPACITY * ELEM_SIZE; + +// Index of the spill element in the buffer. +const BUFFER_SPILL_INDEX: usize = BUFFER_WITH_SPILL_CAPACITY - 1; + +#[derive(Debug, Clone)] +#[repr(C)] +pub struct SipHasher128 { + // The access pattern during hashing consists of accesses to `nbuf` and + // `buf` until the buffer is full, followed by accesses to `state` and + // `processed`, and then repetition of that pattern until hashing is done. + // This is the basis for the ordering of fields below. However, in practice + // the cache miss-rate for data access is extremely low regardless of order. + nbuf: usize, // how many bytes in buf are valid + buf: [MaybeUninit; BUFFER_WITH_SPILL_CAPACITY], // unprocessed bytes le + state: State, // hash State + processed: usize, // how many bytes we've processed +} + +#[derive(Debug, Clone, Copy)] +#[repr(C)] +struct State { + // v0, v2 and v1, v3 show up in pairs in the algorithm, + // and simd implementations of SipHash will use vectors + // of v02 and v13. By placing them in this order in the struct, + // the compiler can pick up on just a few simd optimizations by itself. + v0: u64, + v2: u64, + v1: u64, + v3: u64, +} + +macro_rules! compress { + ($state:expr) => {{ compress!($state.v0, $state.v1, $state.v2, $state.v3) }}; + ($v0:expr, $v1:expr, $v2:expr, $v3:expr) => {{ + $v0 = $v0.wrapping_add($v1); + $v1 = $v1.rotate_left(13); + $v1 ^= $v0; + $v0 = $v0.rotate_left(32); + $v2 = $v2.wrapping_add($v3); + $v3 = $v3.rotate_left(16); + $v3 ^= $v2; + $v0 = $v0.wrapping_add($v3); + $v3 = $v3.rotate_left(21); + $v3 ^= $v0; + $v2 = $v2.wrapping_add($v1); + $v1 = $v1.rotate_left(17); + $v1 ^= $v2; + $v2 = $v2.rotate_left(32); + }}; +} + +// Copies up to 8 bytes from source to destination. This performs better than +// `ptr::copy_nonoverlapping` on microbenchmarks and may perform better on real +// workloads since all of the copies have fixed sizes and avoid calling memcpy. +// +// This is specifically designed for copies of up to 8 bytes, because that's the +// maximum of number bytes needed to fill an 8-byte-sized element on which +// SipHash operates. Note that for variable-sized copies which are known to be +// less than 8 bytes, this function will perform more work than necessary unless +// the compiler is able to optimize the extra work away. +#[inline] +unsafe fn copy_nonoverlapping_small(src: *const u8, dst: *mut u8, count: usize) { + debug_assert!(count <= 8); + + if count == 8 { + ptr::copy_nonoverlapping(src, dst, 8); + return; + } + + let mut i = 0; + if i + 3 < count { + ptr::copy_nonoverlapping(src.add(i), dst.add(i), 4); + i += 4; + } + + if i + 1 < count { + ptr::copy_nonoverlapping(src.add(i), dst.add(i), 2); + i += 2 + } + + if i < count { + *dst.add(i) = *src.add(i); + i += 1; + } + + debug_assert_eq!(i, count); +} + +// # Implementation +// +// This implementation uses buffering to reduce the hashing cost for inputs +// consisting of many small integers. Buffering simplifies the integration of +// integer input--the integer write function typically just appends to the +// buffer with a statically sized write, updates metadata, and returns. +// +// Buffering also prevents alternating between writes that do and do not trigger +// the hashing process. Only when the entire buffer is full do we transition +// into hashing. This allows us to keep the hash state in registers for longer, +// instead of loading and storing it before and after processing each element. +// +// When a write fills the buffer, a buffer processing function is invoked to +// hash all of the buffered input. The buffer processing functions are marked +// `#[inline(never)]` so that they aren't inlined into the append functions, +// which ensures the more frequently called append functions remain inlineable +// and don't include register pushing/popping that would only be made necessary +// by inclusion of the complex buffer processing path which uses those +// registers. +// +// The buffer includes a "spill"--an extra element at the end--which simplifies +// the integer write buffer processing path. The value that fills the buffer can +// be written with a statically sized write that may spill over into the spill. +// After the buffer is processed, the part of the value that spilled over can be +// written from the spill to the beginning of the buffer with another statically +// sized write. This write may copy more bytes than actually spilled over, but +// we maintain the metadata such that any extra copied bytes will be ignored by +// subsequent processing. Due to the static sizes, this scheme performs better +// than copying the exact number of bytes needed into the end and beginning of +// the buffer. +// +// The buffer is uninitialized, which improves performance, but may preclude +// efficient implementation of alternative approaches. The improvement is not so +// large that an alternative approach should be disregarded because it cannot be +// efficiently implemented with an uninitialized buffer. On the other hand, an +// uninitialized buffer may become more important should a larger one be used. +// +// # Platform Dependence +// +// The SipHash algorithm operates on byte sequences. It parses the input stream +// as 8-byte little-endian integers. Therefore, given the same byte sequence, it +// produces the same result on big- and little-endian hardware. +// +// However, the Hasher trait has methods which operate on multi-byte integers. +// How they are converted into byte sequences can be endian-dependent (by using +// native byte order) or independent (by consistently using either LE or BE byte +// order). It can also be `isize` and `usize` size dependent (by using the +// native size), or independent (by converting to a common size), supposing the +// values can be represented in 32 bits. +// +// In order to make `SipHasher128` consistent with `SipHasher` in libstd, we +// choose to do the integer to byte sequence conversion in the platform- +// dependent way. Clients can achieve platform-independent hashing by widening +// `isize` and `usize` integers to 64 bits on 32-bit systems and byte-swapping +// integers on big-endian systems before passing them to the writing functions. +// This causes the input byte sequence to look identical on big- and little- +// endian systems (supposing `isize` and `usize` values can be represented in 32 +// bits), which ensures platform-independent results. +impl SipHasher128 { + #[inline] + pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher128 { + let mut hasher = SipHasher128 { + nbuf: 0, + buf: MaybeUninit::uninit_array(), + state: State { + v0: key0 ^ 0x736f6d6570736575, + // The XOR with 0xee is only done on 128-bit algorithm version. + v1: key1 ^ (0x646f72616e646f6d ^ 0xee), + v2: key0 ^ 0x6c7967656e657261, + v3: key1 ^ 0x7465646279746573, + }, + processed: 0, + }; + + unsafe { + // Initialize spill because we read from it in `short_write_process_buffer`. + *hasher.buf.get_unchecked_mut(BUFFER_SPILL_INDEX) = MaybeUninit::zeroed(); + } + + hasher + } + + #[inline] + pub fn short_write(&mut self, bytes: [u8; LEN]) { + let nbuf = self.nbuf; + debug_assert!(LEN <= 8); + debug_assert!(nbuf < BUFFER_SIZE); + debug_assert!(nbuf + LEN < BUFFER_WITH_SPILL_SIZE); + + if nbuf + LEN < BUFFER_SIZE { + unsafe { + // The memcpy call is optimized away because the size is known. + let dst = (self.buf.as_mut_ptr() as *mut u8).add(nbuf); + ptr::copy_nonoverlapping(bytes.as_ptr(), dst, LEN); + } + + self.nbuf = nbuf + LEN; + + return; + } + + unsafe { self.short_write_process_buffer(bytes) } + } + + // A specialized write function for values with size <= 8 that should only + // be called when the write would cause the buffer to fill. + // + // SAFETY: the write of `x` into `self.buf` starting at byte offset + // `self.nbuf` must cause `self.buf` to become fully initialized (and not + // overflow) if it wasn't already. + #[inline(never)] + unsafe fn short_write_process_buffer(&mut self, bytes: [u8; LEN]) { + let nbuf = self.nbuf; + debug_assert!(LEN <= 8); + debug_assert!(nbuf < BUFFER_SIZE); + debug_assert!(nbuf + LEN >= BUFFER_SIZE); + debug_assert!(nbuf + LEN < BUFFER_WITH_SPILL_SIZE); + + // Copy first part of input into end of buffer, possibly into spill + // element. The memcpy call is optimized away because the size is known. + let dst = (self.buf.as_mut_ptr() as *mut u8).add(nbuf); + ptr::copy_nonoverlapping(bytes.as_ptr(), dst, LEN); + + // Process buffer. + for i in 0..BUFFER_CAPACITY { + let elem = self.buf.get_unchecked(i).assume_init().to_le(); + self.state.v3 ^= elem; + Sip24Rounds::c_rounds(&mut self.state); + self.state.v0 ^= elem; + } + + // Copy remaining input into start of buffer by copying LEN - 1 + // elements from spill (at most LEN - 1 bytes could have overflowed + // into the spill). The memcpy call is optimized away because the size + // is known. And the whole copy is optimized away for LEN == 1. + let dst = self.buf.as_mut_ptr() as *mut u8; + let src = self.buf.get_unchecked(BUFFER_SPILL_INDEX) as *const _ as *const u8; + ptr::copy_nonoverlapping(src, dst, LEN - 1); + + // This function should only be called when the write fills the buffer. + // Therefore, when LEN == 1, the new `self.nbuf` must be zero. + // LEN is statically known, so the branch is optimized away. + self.nbuf = if LEN == 1 { 0 } else { nbuf + LEN - BUFFER_SIZE }; + self.processed += BUFFER_SIZE; + } + + // A write function for byte slices. + #[inline] + fn slice_write(&mut self, msg: &[u8]) { + let length = msg.len(); + let nbuf = self.nbuf; + debug_assert!(nbuf < BUFFER_SIZE); + + if nbuf + length < BUFFER_SIZE { + unsafe { + let dst = (self.buf.as_mut_ptr() as *mut u8).add(nbuf); + + if length <= 8 { + copy_nonoverlapping_small(msg.as_ptr(), dst, length); + } else { + // This memcpy is *not* optimized away. + ptr::copy_nonoverlapping(msg.as_ptr(), dst, length); + } + } + + self.nbuf = nbuf + length; + + return; + } + + unsafe { self.slice_write_process_buffer(msg) } + } + + // A write function for byte slices that should only be called when the + // write would cause the buffer to fill. + // + // SAFETY: `self.buf` must be initialized up to the byte offset `self.nbuf`, + // and `msg` must contain enough bytes to initialize the rest of the element + // containing the byte offset `self.nbuf`. + #[inline(never)] + unsafe fn slice_write_process_buffer(&mut self, msg: &[u8]) { + let length = msg.len(); + let nbuf = self.nbuf; + debug_assert!(nbuf < BUFFER_SIZE); + debug_assert!(nbuf + length >= BUFFER_SIZE); + + // Always copy first part of input into current element of buffer. + // This function should only be called when the write fills the buffer, + // so we know that there is enough input to fill the current element. + let valid_in_elem = nbuf % ELEM_SIZE; + let needed_in_elem = ELEM_SIZE - valid_in_elem; + + let src = msg.as_ptr(); + let dst = (self.buf.as_mut_ptr() as *mut u8).add(nbuf); + copy_nonoverlapping_small(src, dst, needed_in_elem); + + // Process buffer. + + // Using `nbuf / ELEM_SIZE + 1` rather than `(nbuf + needed_in_elem) / + // ELEM_SIZE` to show the compiler that this loop's upper bound is > 0. + // We know that is true, because last step ensured we have a full + // element in the buffer. + let last = nbuf / ELEM_SIZE + 1; + + for i in 0..last { + let elem = self.buf.get_unchecked(i).assume_init().to_le(); + self.state.v3 ^= elem; + Sip24Rounds::c_rounds(&mut self.state); + self.state.v0 ^= elem; + } + + // Process the remaining element-sized chunks of input. + let mut processed = needed_in_elem; + let input_left = length - processed; + let elems_left = input_left / ELEM_SIZE; + let extra_bytes_left = input_left % ELEM_SIZE; + + for _ in 0..elems_left { + let elem = (msg.as_ptr().add(processed) as *const u64).read_unaligned().to_le(); + self.state.v3 ^= elem; + Sip24Rounds::c_rounds(&mut self.state); + self.state.v0 ^= elem; + processed += ELEM_SIZE; + } + + // Copy remaining input into start of buffer. + let src = msg.as_ptr().add(processed); + let dst = self.buf.as_mut_ptr() as *mut u8; + copy_nonoverlapping_small(src, dst, extra_bytes_left); + + self.nbuf = extra_bytes_left; + self.processed += nbuf + processed; + } + + #[inline] + pub fn finish128(mut self) -> (u64, u64) { + debug_assert!(self.nbuf < BUFFER_SIZE); + + // Process full elements in buffer. + let last = self.nbuf / ELEM_SIZE; + + // Since we're consuming self, avoid updating members for a potential + // performance gain. + let mut state = self.state; + + for i in 0..last { + let elem = unsafe { self.buf.get_unchecked(i).assume_init().to_le() }; + state.v3 ^= elem; + Sip24Rounds::c_rounds(&mut state); + state.v0 ^= elem; + } + + // Get remaining partial element. + let elem = if self.nbuf % ELEM_SIZE != 0 { + unsafe { + // Ensure element is initialized by writing zero bytes. At most + // `ELEM_SIZE - 1` are required given the above check. It's safe + // to write this many because we have the spill and we maintain + // `self.nbuf` such that this write will start before the spill. + let dst = (self.buf.as_mut_ptr() as *mut u8).add(self.nbuf); + ptr::write_bytes(dst, 0, ELEM_SIZE - 1); + self.buf.get_unchecked(last).assume_init().to_le() + } + } else { + 0 + }; + + // Finalize the hash. + let length = self.processed + self.nbuf; + let b: u64 = ((length as u64 & 0xff) << 56) | elem; + + state.v3 ^= b; + Sip24Rounds::c_rounds(&mut state); + state.v0 ^= b; + + state.v2 ^= 0xee; + Sip24Rounds::d_rounds(&mut state); + let _0 = state.v0 ^ state.v1 ^ state.v2 ^ state.v3; + + state.v1 ^= 0xdd; + Sip24Rounds::d_rounds(&mut state); + let _1 = state.v0 ^ state.v1 ^ state.v2 ^ state.v3; + + (_0, _1) + } +} + +impl Hasher for SipHasher128 { + #[inline] + fn write_u8(&mut self, i: u8) { + self.short_write(i.to_ne_bytes()); + } + + #[inline] + fn write_u16(&mut self, i: u16) { + self.short_write(i.to_ne_bytes()); + } + + #[inline] + fn write_u32(&mut self, i: u32) { + self.short_write(i.to_ne_bytes()); + } + + #[inline] + fn write_u64(&mut self, i: u64) { + self.short_write(i.to_ne_bytes()); + } + + #[inline] + fn write_usize(&mut self, i: usize) { + self.short_write(i.to_ne_bytes()); + } + + #[inline] + fn write_i8(&mut self, i: i8) { + self.short_write((i as u8).to_ne_bytes()); + } + + #[inline] + fn write_i16(&mut self, i: i16) { + self.short_write((i as u16).to_ne_bytes()); + } + + #[inline] + fn write_i32(&mut self, i: i32) { + self.short_write((i as u32).to_ne_bytes()); + } + + #[inline] + fn write_i64(&mut self, i: i64) { + self.short_write((i as u64).to_ne_bytes()); + } + + #[inline] + fn write_isize(&mut self, i: isize) { + self.short_write((i as usize).to_ne_bytes()); + } + + #[inline] + fn write(&mut self, msg: &[u8]) { + self.slice_write(msg); + } + + #[inline] + fn write_str(&mut self, s: &str) { + // This hasher works byte-wise, and `0xFF` cannot show up in a `str`, + // so just hashing the one extra byte is enough to be prefix-free. + self.write(s.as_bytes()); + self.write_u8(0xFF); + } + + fn finish(&self) -> u64 { + panic!("SipHasher128 cannot provide valid 64 bit hashes") + } +} + +#[derive(Debug, Clone, Default)] +struct Sip24Rounds; + +impl Sip24Rounds { + #[inline] + fn c_rounds(state: &mut State) { + compress!(state); + compress!(state); + } + + #[inline] + fn d_rounds(state: &mut State) { + compress!(state); + compress!(state); + compress!(state); + compress!(state); + } +} diff --git a/compiler/rustc_data_structures/src/sip128/tests.rs b/compiler/rustc_data_structures/src/sip128/tests.rs new file mode 100644 index 000000000..5fe967c41 --- /dev/null +++ b/compiler/rustc_data_structures/src/sip128/tests.rs @@ -0,0 +1,497 @@ +use super::*; + +use std::hash::{Hash, Hasher}; + +// Hash just the bytes of the slice, without length prefix +struct Bytes<'a>(&'a [u8]); + +impl<'a> Hash for Bytes<'a> { + #[allow(unused_must_use)] + fn hash(&self, state: &mut H) { + for byte in self.0 { + state.write_u8(*byte); + } + } +} + +fn hash_with(mut st: SipHasher128, x: &T) -> (u64, u64) { + x.hash(&mut st); + st.finish128() +} + +fn hash(x: &T) -> (u64, u64) { + hash_with(SipHasher128::new_with_keys(0, 0), x) +} + +const TEST_VECTOR: [[u8; 16]; 64] = [ + [ + 0xa3, 0x81, 0x7f, 0x04, 0xba, 0x25, 0xa8, 0xe6, 0x6d, 0xf6, 0x72, 0x14, 0xc7, 0x55, 0x02, + 0x93, + ], + [ + 0xda, 0x87, 0xc1, 0xd8, 0x6b, 0x99, 0xaf, 0x44, 0x34, 0x76, 0x59, 0x11, 0x9b, 0x22, 0xfc, + 0x45, + ], + [ + 0x81, 0x77, 0x22, 0x8d, 0xa4, 0xa4, 0x5d, 0xc7, 0xfc, 0xa3, 0x8b, 0xde, 0xf6, 0x0a, 0xff, + 0xe4, + ], + [ + 0x9c, 0x70, 0xb6, 0x0c, 0x52, 0x67, 0xa9, 0x4e, 0x5f, 0x33, 0xb6, 0xb0, 0x29, 0x85, 0xed, + 0x51, + ], + [ + 0xf8, 0x81, 0x64, 0xc1, 0x2d, 0x9c, 0x8f, 0xaf, 0x7d, 0x0f, 0x6e, 0x7c, 0x7b, 0xcd, 0x55, + 0x79, + ], + [ + 0x13, 0x68, 0x87, 0x59, 0x80, 0x77, 0x6f, 0x88, 0x54, 0x52, 0x7a, 0x07, 0x69, 0x0e, 0x96, + 0x27, + ], + [ + 0x14, 0xee, 0xca, 0x33, 0x8b, 0x20, 0x86, 0x13, 0x48, 0x5e, 0xa0, 0x30, 0x8f, 0xd7, 0xa1, + 0x5e, + ], + [ + 0xa1, 0xf1, 0xeb, 0xbe, 0xd8, 0xdb, 0xc1, 0x53, 0xc0, 0xb8, 0x4a, 0xa6, 0x1f, 0xf0, 0x82, + 0x39, + ], + [ + 0x3b, 0x62, 0xa9, 0xba, 0x62, 0x58, 0xf5, 0x61, 0x0f, 0x83, 0xe2, 0x64, 0xf3, 0x14, 0x97, + 0xb4, + ], + [ + 0x26, 0x44, 0x99, 0x06, 0x0a, 0xd9, 0xba, 0xab, 0xc4, 0x7f, 0x8b, 0x02, 0xbb, 0x6d, 0x71, + 0xed, + ], + [ + 0x00, 0x11, 0x0d, 0xc3, 0x78, 0x14, 0x69, 0x56, 0xc9, 0x54, 0x47, 0xd3, 0xf3, 0xd0, 0xfb, + 0xba, + ], + [ + 0x01, 0x51, 0xc5, 0x68, 0x38, 0x6b, 0x66, 0x77, 0xa2, 0xb4, 0xdc, 0x6f, 0x81, 0xe5, 0xdc, + 0x18, + ], + [ + 0xd6, 0x26, 0xb2, 0x66, 0x90, 0x5e, 0xf3, 0x58, 0x82, 0x63, 0x4d, 0xf6, 0x85, 0x32, 0xc1, + 0x25, + ], + [ + 0x98, 0x69, 0xe2, 0x47, 0xe9, 0xc0, 0x8b, 0x10, 0xd0, 0x29, 0x93, 0x4f, 0xc4, 0xb9, 0x52, + 0xf7, + ], + [ + 0x31, 0xfc, 0xef, 0xac, 0x66, 0xd7, 0xde, 0x9c, 0x7e, 0xc7, 0x48, 0x5f, 0xe4, 0x49, 0x49, + 0x02, + ], + [ + 0x54, 0x93, 0xe9, 0x99, 0x33, 0xb0, 0xa8, 0x11, 0x7e, 0x08, 0xec, 0x0f, 0x97, 0xcf, 0xc3, + 0xd9, + ], + [ + 0x6e, 0xe2, 0xa4, 0xca, 0x67, 0xb0, 0x54, 0xbb, 0xfd, 0x33, 0x15, 0xbf, 0x85, 0x23, 0x05, + 0x77, + ], + [ + 0x47, 0x3d, 0x06, 0xe8, 0x73, 0x8d, 0xb8, 0x98, 0x54, 0xc0, 0x66, 0xc4, 0x7a, 0xe4, 0x77, + 0x40, + ], + [ + 0xa4, 0x26, 0xe5, 0xe4, 0x23, 0xbf, 0x48, 0x85, 0x29, 0x4d, 0xa4, 0x81, 0xfe, 0xae, 0xf7, + 0x23, + ], + [ + 0x78, 0x01, 0x77, 0x31, 0xcf, 0x65, 0xfa, 0xb0, 0x74, 0xd5, 0x20, 0x89, 0x52, 0x51, 0x2e, + 0xb1, + ], + [ + 0x9e, 0x25, 0xfc, 0x83, 0x3f, 0x22, 0x90, 0x73, 0x3e, 0x93, 0x44, 0xa5, 0xe8, 0x38, 0x39, + 0xeb, + ], + [ + 0x56, 0x8e, 0x49, 0x5a, 0xbe, 0x52, 0x5a, 0x21, 0x8a, 0x22, 0x14, 0xcd, 0x3e, 0x07, 0x1d, + 0x12, + ], + [ + 0x4a, 0x29, 0xb5, 0x45, 0x52, 0xd1, 0x6b, 0x9a, 0x46, 0x9c, 0x10, 0x52, 0x8e, 0xff, 0x0a, + 0xae, + ], + [ + 0xc9, 0xd1, 0x84, 0xdd, 0xd5, 0xa9, 0xf5, 0xe0, 0xcf, 0x8c, 0xe2, 0x9a, 0x9a, 0xbf, 0x69, + 0x1c, + ], + [ + 0x2d, 0xb4, 0x79, 0xae, 0x78, 0xbd, 0x50, 0xd8, 0x88, 0x2a, 0x8a, 0x17, 0x8a, 0x61, 0x32, + 0xad, + ], + [ + 0x8e, 0xce, 0x5f, 0x04, 0x2d, 0x5e, 0x44, 0x7b, 0x50, 0x51, 0xb9, 0xea, 0xcb, 0x8d, 0x8f, + 0x6f, + ], + [ + 0x9c, 0x0b, 0x53, 0xb4, 0xb3, 0xc3, 0x07, 0xe8, 0x7e, 0xae, 0xe0, 0x86, 0x78, 0x14, 0x1f, + 0x66, + ], + [ + 0xab, 0xf2, 0x48, 0xaf, 0x69, 0xa6, 0xea, 0xe4, 0xbf, 0xd3, 0xeb, 0x2f, 0x12, 0x9e, 0xeb, + 0x94, + ], + [ + 0x06, 0x64, 0xda, 0x16, 0x68, 0x57, 0x4b, 0x88, 0xb9, 0x35, 0xf3, 0x02, 0x73, 0x58, 0xae, + 0xf4, + ], + [ + 0xaa, 0x4b, 0x9d, 0xc4, 0xbf, 0x33, 0x7d, 0xe9, 0x0c, 0xd4, 0xfd, 0x3c, 0x46, 0x7c, 0x6a, + 0xb7, + ], + [ + 0xea, 0x5c, 0x7f, 0x47, 0x1f, 0xaf, 0x6b, 0xde, 0x2b, 0x1a, 0xd7, 0xd4, 0x68, 0x6d, 0x22, + 0x87, + ], + [ + 0x29, 0x39, 0xb0, 0x18, 0x32, 0x23, 0xfa, 0xfc, 0x17, 0x23, 0xde, 0x4f, 0x52, 0xc4, 0x3d, + 0x35, + ], + [ + 0x7c, 0x39, 0x56, 0xca, 0x5e, 0xea, 0xfc, 0x3e, 0x36, 0x3e, 0x9d, 0x55, 0x65, 0x46, 0xeb, + 0x68, + ], + [ + 0x77, 0xc6, 0x07, 0x71, 0x46, 0xf0, 0x1c, 0x32, 0xb6, 0xb6, 0x9d, 0x5f, 0x4e, 0xa9, 0xff, + 0xcf, + ], + [ + 0x37, 0xa6, 0x98, 0x6c, 0xb8, 0x84, 0x7e, 0xdf, 0x09, 0x25, 0xf0, 0xf1, 0x30, 0x9b, 0x54, + 0xde, + ], + [ + 0xa7, 0x05, 0xf0, 0xe6, 0x9d, 0xa9, 0xa8, 0xf9, 0x07, 0x24, 0x1a, 0x2e, 0x92, 0x3c, 0x8c, + 0xc8, + ], + [ + 0x3d, 0xc4, 0x7d, 0x1f, 0x29, 0xc4, 0x48, 0x46, 0x1e, 0x9e, 0x76, 0xed, 0x90, 0x4f, 0x67, + 0x11, + ], + [ + 0x0d, 0x62, 0xbf, 0x01, 0xe6, 0xfc, 0x0e, 0x1a, 0x0d, 0x3c, 0x47, 0x51, 0xc5, 0xd3, 0x69, + 0x2b, + ], + [ + 0x8c, 0x03, 0x46, 0x8b, 0xca, 0x7c, 0x66, 0x9e, 0xe4, 0xfd, 0x5e, 0x08, 0x4b, 0xbe, 0xe7, + 0xb5, + ], + [ + 0x52, 0x8a, 0x5b, 0xb9, 0x3b, 0xaf, 0x2c, 0x9c, 0x44, 0x73, 0xcc, 0xe5, 0xd0, 0xd2, 0x2b, + 0xd9, + ], + [ + 0xdf, 0x6a, 0x30, 0x1e, 0x95, 0xc9, 0x5d, 0xad, 0x97, 0xae, 0x0c, 0xc8, 0xc6, 0x91, 0x3b, + 0xd8, + ], + [ + 0x80, 0x11, 0x89, 0x90, 0x2c, 0x85, 0x7f, 0x39, 0xe7, 0x35, 0x91, 0x28, 0x5e, 0x70, 0xb6, + 0xdb, + ], + [ + 0xe6, 0x17, 0x34, 0x6a, 0xc9, 0xc2, 0x31, 0xbb, 0x36, 0x50, 0xae, 0x34, 0xcc, 0xca, 0x0c, + 0x5b, + ], + [ + 0x27, 0xd9, 0x34, 0x37, 0xef, 0xb7, 0x21, 0xaa, 0x40, 0x18, 0x21, 0xdc, 0xec, 0x5a, 0xdf, + 0x89, + ], + [ + 0x89, 0x23, 0x7d, 0x9d, 0xed, 0x9c, 0x5e, 0x78, 0xd8, 0xb1, 0xc9, 0xb1, 0x66, 0xcc, 0x73, + 0x42, + ], + [ + 0x4a, 0x6d, 0x80, 0x91, 0xbf, 0x5e, 0x7d, 0x65, 0x11, 0x89, 0xfa, 0x94, 0xa2, 0x50, 0xb1, + 0x4c, + ], + [ + 0x0e, 0x33, 0xf9, 0x60, 0x55, 0xe7, 0xae, 0x89, 0x3f, 0xfc, 0x0e, 0x3d, 0xcf, 0x49, 0x29, + 0x02, + ], + [ + 0xe6, 0x1c, 0x43, 0x2b, 0x72, 0x0b, 0x19, 0xd1, 0x8e, 0xc8, 0xd8, 0x4b, 0xdc, 0x63, 0x15, + 0x1b, + ], + [ + 0xf7, 0xe5, 0xae, 0xf5, 0x49, 0xf7, 0x82, 0xcf, 0x37, 0x90, 0x55, 0xa6, 0x08, 0x26, 0x9b, + 0x16, + ], + [ + 0x43, 0x8d, 0x03, 0x0f, 0xd0, 0xb7, 0xa5, 0x4f, 0xa8, 0x37, 0xf2, 0xad, 0x20, 0x1a, 0x64, + 0x03, + ], + [ + 0xa5, 0x90, 0xd3, 0xee, 0x4f, 0xbf, 0x04, 0xe3, 0x24, 0x7e, 0x0d, 0x27, 0xf2, 0x86, 0x42, + 0x3f, + ], + [ + 0x5f, 0xe2, 0xc1, 0xa1, 0x72, 0xfe, 0x93, 0xc4, 0xb1, 0x5c, 0xd3, 0x7c, 0xae, 0xf9, 0xf5, + 0x38, + ], + [ + 0x2c, 0x97, 0x32, 0x5c, 0xbd, 0x06, 0xb3, 0x6e, 0xb2, 0x13, 0x3d, 0xd0, 0x8b, 0x3a, 0x01, + 0x7c, + ], + [ + 0x92, 0xc8, 0x14, 0x22, 0x7a, 0x6b, 0xca, 0x94, 0x9f, 0xf0, 0x65, 0x9f, 0x00, 0x2a, 0xd3, + 0x9e, + ], + [ + 0xdc, 0xe8, 0x50, 0x11, 0x0b, 0xd8, 0x32, 0x8c, 0xfb, 0xd5, 0x08, 0x41, 0xd6, 0x91, 0x1d, + 0x87, + ], + [ + 0x67, 0xf1, 0x49, 0x84, 0xc7, 0xda, 0x79, 0x12, 0x48, 0xe3, 0x2b, 0xb5, 0x92, 0x25, 0x83, + 0xda, + ], + [ + 0x19, 0x38, 0xf2, 0xcf, 0x72, 0xd5, 0x4e, 0xe9, 0x7e, 0x94, 0x16, 0x6f, 0xa9, 0x1d, 0x2a, + 0x36, + ], + [ + 0x74, 0x48, 0x1e, 0x96, 0x46, 0xed, 0x49, 0xfe, 0x0f, 0x62, 0x24, 0x30, 0x16, 0x04, 0x69, + 0x8e, + ], + [ + 0x57, 0xfc, 0xa5, 0xde, 0x98, 0xa9, 0xd6, 0xd8, 0x00, 0x64, 0x38, 0xd0, 0x58, 0x3d, 0x8a, + 0x1d, + ], + [ + 0x9f, 0xec, 0xde, 0x1c, 0xef, 0xdc, 0x1c, 0xbe, 0xd4, 0x76, 0x36, 0x74, 0xd9, 0x57, 0x53, + 0x59, + ], + [ + 0xe3, 0x04, 0x0c, 0x00, 0xeb, 0x28, 0xf1, 0x53, 0x66, 0xca, 0x73, 0xcb, 0xd8, 0x72, 0xe7, + 0x40, + ], + [ + 0x76, 0x97, 0x00, 0x9a, 0x6a, 0x83, 0x1d, 0xfe, 0xcc, 0xa9, 0x1c, 0x59, 0x93, 0x67, 0x0f, + 0x7a, + ], + [ + 0x58, 0x53, 0x54, 0x23, 0x21, 0xf5, 0x67, 0xa0, 0x05, 0xd5, 0x47, 0xa4, 0xf0, 0x47, 0x59, + 0xbd, + ], + [ + 0x51, 0x50, 0xd1, 0x77, 0x2f, 0x50, 0x83, 0x4a, 0x50, 0x3e, 0x06, 0x9a, 0x97, 0x3f, 0xbd, + 0x7c, + ], +]; + +// Test vector from reference implementation +#[test] +fn test_siphash_2_4_test_vector() { + let k0 = 0x_07_06_05_04_03_02_01_00; + let k1 = 0x_0f_0e_0d_0c_0b_0a_09_08; + + let mut input: Vec = Vec::new(); + + for i in 0..64 { + let out = hash_with(SipHasher128::new_with_keys(k0, k1), &Bytes(&input[..])); + let expected = ( + ((TEST_VECTOR[i][0] as u64) << 0) + | ((TEST_VECTOR[i][1] as u64) << 8) + | ((TEST_VECTOR[i][2] as u64) << 16) + | ((TEST_VECTOR[i][3] as u64) << 24) + | ((TEST_VECTOR[i][4] as u64) << 32) + | ((TEST_VECTOR[i][5] as u64) << 40) + | ((TEST_VECTOR[i][6] as u64) << 48) + | ((TEST_VECTOR[i][7] as u64) << 56), + ((TEST_VECTOR[i][8] as u64) << 0) + | ((TEST_VECTOR[i][9] as u64) << 8) + | ((TEST_VECTOR[i][10] as u64) << 16) + | ((TEST_VECTOR[i][11] as u64) << 24) + | ((TEST_VECTOR[i][12] as u64) << 32) + | ((TEST_VECTOR[i][13] as u64) << 40) + | ((TEST_VECTOR[i][14] as u64) << 48) + | ((TEST_VECTOR[i][15] as u64) << 56), + ); + + assert_eq!(out, expected); + input.push(i as u8); + } +} + +#[test] +#[cfg(target_arch = "arm")] +fn test_hash_usize() { + let val = 0xdeadbeef_deadbeef_u64; + assert!(hash(&(val as u64)) != hash(&(val as usize))); + assert_eq!(hash(&(val as u32)), hash(&(val as usize))); +} +#[test] +#[cfg(target_arch = "x86_64")] +fn test_hash_usize() { + let val = 0xdeadbeef_deadbeef_u64; + assert_eq!(hash(&(val as u64)), hash(&(val as usize))); + assert!(hash(&(val as u32)) != hash(&(val as usize))); +} +#[test] +#[cfg(target_arch = "x86")] +fn test_hash_usize() { + let val = 0xdeadbeef_deadbeef_u64; + assert!(hash(&(val as u64)) != hash(&(val as usize))); + assert_eq!(hash(&(val as u32)), hash(&(val as usize))); +} + +#[test] +fn test_hash_idempotent() { + let val64 = 0xdeadbeef_deadbeef_u64; + assert_eq!(hash(&val64), hash(&val64)); + let val32 = 0xdeadbeef_u32; + assert_eq!(hash(&val32), hash(&val32)); +} + +#[test] +fn test_hash_no_bytes_dropped_64() { + let val = 0xdeadbeef_deadbeef_u64; + + assert!(hash(&val) != hash(&zero_byte(val, 0))); + assert!(hash(&val) != hash(&zero_byte(val, 1))); + assert!(hash(&val) != hash(&zero_byte(val, 2))); + assert!(hash(&val) != hash(&zero_byte(val, 3))); + assert!(hash(&val) != hash(&zero_byte(val, 4))); + assert!(hash(&val) != hash(&zero_byte(val, 5))); + assert!(hash(&val) != hash(&zero_byte(val, 6))); + assert!(hash(&val) != hash(&zero_byte(val, 7))); + + fn zero_byte(val: u64, byte: usize) -> u64 { + assert!(byte < 8); + val & !(0xff << (byte * 8)) + } +} + +#[test] +fn test_hash_no_bytes_dropped_32() { + let val = 0xdeadbeef_u32; + + assert!(hash(&val) != hash(&zero_byte(val, 0))); + assert!(hash(&val) != hash(&zero_byte(val, 1))); + assert!(hash(&val) != hash(&zero_byte(val, 2))); + assert!(hash(&val) != hash(&zero_byte(val, 3))); + + fn zero_byte(val: u32, byte: usize) -> u32 { + assert!(byte < 4); + val & !(0xff << (byte * 8)) + } +} + +#[test] +fn test_hash_no_concat_alias() { + let s = ("aa", "bb"); + let t = ("aabb", ""); + let u = ("a", "abb"); + + assert!(s != t && t != u); + assert!(hash(&s) != hash(&t) && hash(&s) != hash(&u)); + + let u = [1, 0, 0, 0]; + let v = (&u[..1], &u[1..3], &u[3..]); + let w = (&u[..], &u[4..4], &u[4..4]); + + assert!(v != w); + assert!(hash(&v) != hash(&w)); +} + +#[test] +fn test_short_write_works() { + let test_u8 = 0xFF_u8; + let test_u16 = 0x1122_u16; + let test_u32 = 0x22334455_u32; + let test_u64 = 0x33445566_778899AA_u64; + let test_u128 = 0x11223344_55667788_99AABBCC_DDEEFF77_u128; + let test_usize = 0xD0C0B0A0_usize; + + let test_i8 = -1_i8; + let test_i16 = -2_i16; + let test_i32 = -3_i32; + let test_i64 = -4_i64; + let test_i128 = -5_i128; + let test_isize = -6_isize; + + let mut h1 = SipHasher128::new_with_keys(0, 0); + h1.write(b"bytes"); + h1.write(b"string"); + h1.write_u8(test_u8); + h1.write_u16(test_u16); + h1.write_u32(test_u32); + h1.write_u64(test_u64); + h1.write_u128(test_u128); + h1.write_usize(test_usize); + h1.write_i8(test_i8); + h1.write_i16(test_i16); + h1.write_i32(test_i32); + h1.write_i64(test_i64); + h1.write_i128(test_i128); + h1.write_isize(test_isize); + + let mut h2 = SipHasher128::new_with_keys(0, 0); + h2.write(b"bytes"); + h2.write(b"string"); + h2.write(&test_u8.to_ne_bytes()); + h2.write(&test_u16.to_ne_bytes()); + h2.write(&test_u32.to_ne_bytes()); + h2.write(&test_u64.to_ne_bytes()); + h2.write(&test_u128.to_ne_bytes()); + h2.write(&test_usize.to_ne_bytes()); + h2.write(&test_i8.to_ne_bytes()); + h2.write(&test_i16.to_ne_bytes()); + h2.write(&test_i32.to_ne_bytes()); + h2.write(&test_i64.to_ne_bytes()); + h2.write(&test_i128.to_ne_bytes()); + h2.write(&test_isize.to_ne_bytes()); + + let h1_hash = h1.finish128(); + let h2_hash = h2.finish128(); + + assert_eq!(h1_hash, h2_hash); +} + +macro_rules! test_fill_buffer { + ($type:ty, $write_method:ident) => {{ + // Test filling and overfilling the buffer from all possible offsets + // for a given integer type and its corresponding write method. + const SIZE: usize = std::mem::size_of::<$type>(); + let input = [42; BUFFER_SIZE]; + let x = 0x01234567_89ABCDEF_76543210_FEDCBA98_u128 as $type; + let x_bytes = &x.to_ne_bytes(); + + for i in 1..=SIZE { + let s = &input[..BUFFER_SIZE - i]; + + let mut h1 = SipHasher128::new_with_keys(7, 13); + h1.write(s); + h1.$write_method(x); + + let mut h2 = SipHasher128::new_with_keys(7, 13); + h2.write(s); + h2.write(x_bytes); + + let h1_hash = h1.finish128(); + let h2_hash = h2.finish128(); + + assert_eq!(h1_hash, h2_hash); + } + }}; +} + +#[test] +fn test_fill_buffer() { + test_fill_buffer!(u8, write_u8); + test_fill_buffer!(u16, write_u16); + test_fill_buffer!(u32, write_u32); + test_fill_buffer!(u64, write_u64); + test_fill_buffer!(u128, write_u128); + test_fill_buffer!(usize, write_usize); + + test_fill_buffer!(i8, write_i8); + test_fill_buffer!(i16, write_i16); + test_fill_buffer!(i32, write_i32); + test_fill_buffer!(i64, write_i64); + test_fill_buffer!(i128, write_i128); + test_fill_buffer!(isize, write_isize); +} diff --git a/compiler/rustc_data_structures/src/small_c_str.rs b/compiler/rustc_data_structures/src/small_c_str.rs new file mode 100644 index 000000000..3a8ab8ff9 --- /dev/null +++ b/compiler/rustc_data_structures/src/small_c_str.rs @@ -0,0 +1,81 @@ +use std::ffi; +use std::ops::Deref; + +use smallvec::SmallVec; + +#[cfg(test)] +mod tests; + +const SIZE: usize = 36; + +/// Like SmallVec but for C strings. +#[derive(Clone)] +pub struct SmallCStr { + data: SmallVec<[u8; SIZE]>, +} + +impl SmallCStr { + #[inline] + pub fn new(s: &str) -> SmallCStr { + let len = s.len(); + let len1 = len + 1; + let data = if len < SIZE { + let mut buf = [0; SIZE]; + buf[..len].copy_from_slice(s.as_bytes()); + SmallVec::from_buf_and_len(buf, len1) + } else { + let mut data = Vec::with_capacity(len1); + data.extend_from_slice(s.as_bytes()); + data.push(0); + SmallVec::from_vec(data) + }; + if let Err(e) = ffi::CStr::from_bytes_with_nul(&data) { + panic!("The string \"{}\" cannot be converted into a CStr: {}", s, e); + } + SmallCStr { data } + } + + #[inline] + pub fn new_with_nul(s: &str) -> SmallCStr { + let b = s.as_bytes(); + if let Err(e) = ffi::CStr::from_bytes_with_nul(b) { + panic!("The string \"{}\" cannot be converted into a CStr: {}", s, e); + } + SmallCStr { data: SmallVec::from_slice(s.as_bytes()) } + } + + #[inline] + pub fn as_c_str(&self) -> &ffi::CStr { + unsafe { ffi::CStr::from_bytes_with_nul_unchecked(&self.data) } + } + + #[inline] + pub fn len_with_nul(&self) -> usize { + self.data.len() + } + + pub fn spilled(&self) -> bool { + self.data.spilled() + } +} + +impl Deref for SmallCStr { + type Target = ffi::CStr; + + #[inline] + fn deref(&self) -> &ffi::CStr { + self.as_c_str() + } +} + +impl<'a> FromIterator<&'a str> for SmallCStr { + fn from_iter>(iter: T) -> Self { + let mut data = + iter.into_iter().flat_map(|s| s.as_bytes()).copied().collect::>(); + data.push(0); + if let Err(e) = ffi::CStr::from_bytes_with_nul(&data) { + panic!("The iterator {:?} cannot be converted into a CStr: {}", data, e); + } + Self { data } + } +} diff --git a/compiler/rustc_data_structures/src/small_c_str/tests.rs b/compiler/rustc_data_structures/src/small_c_str/tests.rs new file mode 100644 index 000000000..47277604b --- /dev/null +++ b/compiler/rustc_data_structures/src/small_c_str/tests.rs @@ -0,0 +1,45 @@ +use super::*; + +#[test] +fn short() { + const TEXT: &str = "abcd"; + let reference = ffi::CString::new(TEXT.to_string()).unwrap(); + + let scs = SmallCStr::new(TEXT); + + assert_eq!(scs.len_with_nul(), TEXT.len() + 1); + assert_eq!(scs.as_c_str(), reference.as_c_str()); + assert!(!scs.spilled()); +} + +#[test] +fn empty() { + const TEXT: &str = ""; + let reference = ffi::CString::new(TEXT.to_string()).unwrap(); + + let scs = SmallCStr::new(TEXT); + + assert_eq!(scs.len_with_nul(), TEXT.len() + 1); + assert_eq!(scs.as_c_str(), reference.as_c_str()); + assert!(!scs.spilled()); +} + +#[test] +fn long() { + const TEXT: &str = "01234567890123456789012345678901234567890123456789\ + 01234567890123456789012345678901234567890123456789\ + 01234567890123456789012345678901234567890123456789"; + let reference = ffi::CString::new(TEXT.to_string()).unwrap(); + + let scs = SmallCStr::new(TEXT); + + assert_eq!(scs.len_with_nul(), TEXT.len() + 1); + assert_eq!(scs.as_c_str(), reference.as_c_str()); + assert!(scs.spilled()); +} + +#[test] +#[should_panic] +fn internal_nul() { + let _ = SmallCStr::new("abcd\0def"); +} diff --git a/compiler/rustc_data_structures/src/small_str.rs b/compiler/rustc_data_structures/src/small_str.rs new file mode 100644 index 000000000..800acb1b0 --- /dev/null +++ b/compiler/rustc_data_structures/src/small_str.rs @@ -0,0 +1,68 @@ +use smallvec::SmallVec; + +#[cfg(test)] +mod tests; + +/// Like SmallVec but for strings. +#[derive(Default)] +pub struct SmallStr(SmallVec<[u8; N]>); + +impl SmallStr { + #[inline] + pub fn new() -> Self { + SmallStr(SmallVec::default()) + } + + #[inline] + pub fn push_str(&mut self, s: &str) { + self.0.extend_from_slice(s.as_bytes()); + } + + #[inline] + pub fn empty(&self) -> bool { + self.0.is_empty() + } + + #[inline] + pub fn spilled(&self) -> bool { + self.0.spilled() + } + + #[inline] + pub fn as_str(&self) -> &str { + unsafe { std::str::from_utf8_unchecked(self.0.as_slice()) } + } +} + +impl std::ops::Deref for SmallStr { + type Target = str; + + #[inline] + fn deref(&self) -> &str { + self.as_str() + } +} + +impl> FromIterator for SmallStr { + #[inline] + fn from_iter(iter: T) -> Self + where + T: IntoIterator, + { + let mut s = SmallStr::default(); + s.extend(iter); + s + } +} + +impl> Extend for SmallStr { + #[inline] + fn extend(&mut self, iter: T) + where + T: IntoIterator, + { + for a in iter.into_iter() { + self.push_str(a.as_ref()); + } + } +} diff --git a/compiler/rustc_data_structures/src/small_str/tests.rs b/compiler/rustc_data_structures/src/small_str/tests.rs new file mode 100644 index 000000000..7635a9b72 --- /dev/null +++ b/compiler/rustc_data_structures/src/small_str/tests.rs @@ -0,0 +1,20 @@ +use super::*; + +#[test] +fn empty() { + let s = SmallStr::<1>::new(); + assert!(s.empty()); + assert_eq!("", s.as_str()); + assert!(!s.spilled()); +} + +#[test] +fn from_iter() { + let s = ["aa", "bb", "cc"].iter().collect::>(); + assert_eq!("aabbcc", s.as_str()); + assert!(!s.spilled()); + + let s = ["aa", "bb", "cc", "dd"].iter().collect::>(); + assert_eq!("aabbccdd", s.as_str()); + assert!(s.spilled()); +} diff --git a/compiler/rustc_data_structures/src/snapshot_map/mod.rs b/compiler/rustc_data_structures/src/snapshot_map/mod.rs new file mode 100644 index 000000000..8a50179cd --- /dev/null +++ b/compiler/rustc_data_structures/src/snapshot_map/mod.rs @@ -0,0 +1,143 @@ +use crate::fx::FxHashMap; +use crate::undo_log::{Rollback, Snapshots, UndoLogs, VecLog}; +use std::borrow::{Borrow, BorrowMut}; +use std::hash::Hash; +use std::marker::PhantomData; +use std::ops; + +pub use crate::undo_log::Snapshot; + +#[cfg(test)] +mod tests; + +pub type SnapshotMapStorage = SnapshotMap, ()>; +pub type SnapshotMapRef<'a, K, V, L> = SnapshotMap, &'a mut L>; + +#[derive(Clone)] +pub struct SnapshotMap, L = VecLog>> { + map: M, + undo_log: L, + _marker: PhantomData<(K, V)>, +} + +// HACK(eddyb) manual impl avoids `Default` bounds on `K` and `V`. +impl Default for SnapshotMap +where + M: Default, + L: Default, +{ + fn default() -> Self { + SnapshotMap { map: Default::default(), undo_log: Default::default(), _marker: PhantomData } + } +} + +#[derive(Clone)] +pub enum UndoLog { + Inserted(K), + Overwrite(K, V), + Purged, +} + +impl SnapshotMap { + #[inline] + pub fn with_log(&mut self, undo_log: L2) -> SnapshotMap { + SnapshotMap { map: &mut self.map, undo_log, _marker: PhantomData } + } +} + +impl SnapshotMap +where + K: Hash + Clone + Eq, + M: BorrowMut> + Borrow>, + L: UndoLogs>, +{ + pub fn clear(&mut self) { + self.map.borrow_mut().clear(); + self.undo_log.clear(); + } + + pub fn insert(&mut self, key: K, value: V) -> bool { + match self.map.borrow_mut().insert(key.clone(), value) { + None => { + self.undo_log.push(UndoLog::Inserted(key)); + true + } + Some(old_value) => { + self.undo_log.push(UndoLog::Overwrite(key, old_value)); + false + } + } + } + + pub fn remove(&mut self, key: K) -> bool { + match self.map.borrow_mut().remove(&key) { + Some(old_value) => { + self.undo_log.push(UndoLog::Overwrite(key, old_value)); + true + } + None => false, + } + } + + pub fn get(&self, key: &K) -> Option<&V> { + self.map.borrow().get(key) + } +} + +impl SnapshotMap +where + K: Hash + Clone + Eq, +{ + pub fn snapshot(&mut self) -> Snapshot { + self.undo_log.start_snapshot() + } + + pub fn commit(&mut self, snapshot: Snapshot) { + self.undo_log.commit(snapshot) + } + + pub fn rollback_to(&mut self, snapshot: Snapshot) { + let map = &mut self.map; + self.undo_log.rollback_to(|| map, snapshot) + } +} + +impl<'k, K, V, M, L> ops::Index<&'k K> for SnapshotMap +where + K: Hash + Clone + Eq, + M: Borrow>, +{ + type Output = V; + fn index(&self, key: &'k K) -> &V { + &self.map.borrow()[key] + } +} + +impl Rollback> for SnapshotMap +where + K: Eq + Hash, + M: Rollback>, +{ + fn reverse(&mut self, undo: UndoLog) { + self.map.reverse(undo) + } +} + +impl Rollback> for FxHashMap +where + K: Eq + Hash, +{ + fn reverse(&mut self, undo: UndoLog) { + match undo { + UndoLog::Inserted(key) => { + self.remove(&key); + } + + UndoLog::Overwrite(key, old_value) => { + self.insert(key, old_value); + } + + UndoLog::Purged => {} + } + } +} diff --git a/compiler/rustc_data_structures/src/snapshot_map/tests.rs b/compiler/rustc_data_structures/src/snapshot_map/tests.rs new file mode 100644 index 000000000..72ca53c2b --- /dev/null +++ b/compiler/rustc_data_structures/src/snapshot_map/tests.rs @@ -0,0 +1,43 @@ +use super::SnapshotMap; + +#[test] +fn basic() { + let mut map = SnapshotMap::default(); + map.insert(22, "twenty-two"); + let snapshot = map.snapshot(); + map.insert(22, "thirty-three"); + assert_eq!(map[&22], "thirty-three"); + map.insert(44, "forty-four"); + assert_eq!(map[&44], "forty-four"); + assert_eq!(map.get(&33), None); + map.rollback_to(snapshot); + assert_eq!(map[&22], "twenty-two"); + assert_eq!(map.get(&33), None); + assert_eq!(map.get(&44), None); +} + +#[test] +#[should_panic] +fn out_of_order() { + let mut map = SnapshotMap::default(); + map.insert(22, "twenty-two"); + let snapshot1 = map.snapshot(); + map.insert(33, "thirty-three"); + let snapshot2 = map.snapshot(); + map.insert(44, "forty-four"); + map.rollback_to(snapshot1); // bogus, but accepted + map.rollback_to(snapshot2); // asserts +} + +#[test] +fn nested_commit_then_rollback() { + let mut map = SnapshotMap::default(); + map.insert(22, "twenty-two"); + let snapshot1 = map.snapshot(); + let snapshot2 = map.snapshot(); + map.insert(22, "thirty-three"); + map.commit(snapshot2); + assert_eq!(map[&22], "thirty-three"); + map.rollback_to(snapshot1); + assert_eq!(map[&22], "twenty-two"); +} diff --git a/compiler/rustc_data_structures/src/sorted_map.rs b/compiler/rustc_data_structures/src/sorted_map.rs new file mode 100644 index 000000000..9efea1228 --- /dev/null +++ b/compiler/rustc_data_structures/src/sorted_map.rs @@ -0,0 +1,302 @@ +use crate::stable_hasher::{HashStable, StableHasher}; +use std::borrow::Borrow; +use std::cmp::Ordering; +use std::iter::FromIterator; +use std::mem; +use std::ops::{Bound, Index, IndexMut, RangeBounds}; + +mod index_map; + +pub use index_map::SortedIndexMultiMap; + +/// `SortedMap` is a data structure with similar characteristics as BTreeMap but +/// slightly different trade-offs: lookup, insertion, and removal are *O*(log(*n*)) +/// and elements can be iterated in order cheaply. +/// +/// `SortedMap` can be faster than a `BTreeMap` for small sizes (<50) since it +/// stores data in a more compact way. It also supports accessing contiguous +/// ranges of elements as a slice, and slices of already sorted elements can be +/// inserted efficiently. +#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug, Encodable, Decodable)] +pub struct SortedMap { + data: Vec<(K, V)>, +} + +impl Default for SortedMap { + #[inline] + fn default() -> SortedMap { + SortedMap { data: Vec::new() } + } +} + +impl SortedMap { + #[inline] + pub const fn new() -> SortedMap { + SortedMap { data: Vec::new() } + } +} + +impl SortedMap { + /// Construct a `SortedMap` from a presorted set of elements. This is faster + /// than creating an empty map and then inserting the elements individually. + /// + /// It is up to the caller to make sure that the elements are sorted by key + /// and that there are no duplicates. + #[inline] + pub fn from_presorted_elements(elements: Vec<(K, V)>) -> SortedMap { + debug_assert!(elements.array_windows().all(|[fst, snd]| fst.0 < snd.0)); + + SortedMap { data: elements } + } + + #[inline] + pub fn insert(&mut self, key: K, mut value: V) -> Option { + match self.lookup_index_for(&key) { + Ok(index) => { + let slot = unsafe { self.data.get_unchecked_mut(index) }; + mem::swap(&mut slot.1, &mut value); + Some(value) + } + Err(index) => { + self.data.insert(index, (key, value)); + None + } + } + } + + #[inline] + pub fn remove(&mut self, key: &K) -> Option { + match self.lookup_index_for(key) { + Ok(index) => Some(self.data.remove(index).1), + Err(_) => None, + } + } + + #[inline] + pub fn get(&self, key: &Q) -> Option<&V> + where + K: Borrow, + Q: Ord + ?Sized, + { + match self.lookup_index_for(key) { + Ok(index) => unsafe { Some(&self.data.get_unchecked(index).1) }, + Err(_) => None, + } + } + + #[inline] + pub fn get_mut(&mut self, key: &Q) -> Option<&mut V> + where + K: Borrow, + Q: Ord + ?Sized, + { + match self.lookup_index_for(key) { + Ok(index) => unsafe { Some(&mut self.data.get_unchecked_mut(index).1) }, + Err(_) => None, + } + } + + #[inline] + pub fn clear(&mut self) { + self.data.clear(); + } + + /// Iterate over elements, sorted by key + #[inline] + pub fn iter(&self) -> std::slice::Iter<'_, (K, V)> { + self.data.iter() + } + + /// Iterate over the keys, sorted + #[inline] + pub fn keys(&self) -> impl Iterator + ExactSizeIterator + DoubleEndedIterator { + self.data.iter().map(|&(ref k, _)| k) + } + + /// Iterate over values, sorted by key + #[inline] + pub fn values(&self) -> impl Iterator + ExactSizeIterator + DoubleEndedIterator { + self.data.iter().map(|&(_, ref v)| v) + } + + #[inline] + pub fn len(&self) -> usize { + self.data.len() + } + + #[inline] + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + #[inline] + pub fn range(&self, range: R) -> &[(K, V)] + where + R: RangeBounds, + { + let (start, end) = self.range_slice_indices(range); + &self.data[start..end] + } + + #[inline] + pub fn remove_range(&mut self, range: R) + where + R: RangeBounds, + { + let (start, end) = self.range_slice_indices(range); + self.data.splice(start..end, std::iter::empty()); + } + + /// Mutate all keys with the given function `f`. This mutation must not + /// change the sort-order of keys. + #[inline] + pub fn offset_keys(&mut self, f: F) + where + F: Fn(&mut K), + { + self.data.iter_mut().map(|&mut (ref mut k, _)| k).for_each(f); + } + + /// Inserts a presorted range of elements into the map. If the range can be + /// inserted as a whole in between to existing elements of the map, this + /// will be faster than inserting the elements individually. + /// + /// It is up to the caller to make sure that the elements are sorted by key + /// and that there are no duplicates. + #[inline] + pub fn insert_presorted(&mut self, mut elements: Vec<(K, V)>) { + if elements.is_empty() { + return; + } + + debug_assert!(elements.array_windows().all(|[fst, snd]| fst.0 < snd.0)); + + let start_index = self.lookup_index_for(&elements[0].0); + + let drain = match start_index { + Ok(index) => { + let mut drain = elements.drain(..); + self.data[index] = drain.next().unwrap(); + drain + } + Err(index) => { + if index == self.data.len() || elements.last().unwrap().0 < self.data[index].0 { + // We can copy the whole range without having to mix with + // existing elements. + self.data.splice(index..index, elements.drain(..)); + return; + } + + let mut drain = elements.drain(..); + self.data.insert(index, drain.next().unwrap()); + drain + } + }; + + // Insert the rest + for (k, v) in drain { + self.insert(k, v); + } + } + + /// Looks up the key in `self.data` via `slice::binary_search()`. + #[inline(always)] + fn lookup_index_for(&self, key: &Q) -> Result + where + K: Borrow, + Q: Ord + ?Sized, + { + self.data.binary_search_by(|&(ref x, _)| x.borrow().cmp(key)) + } + + #[inline] + fn range_slice_indices(&self, range: R) -> (usize, usize) + where + R: RangeBounds, + { + let start = match range.start_bound() { + Bound::Included(ref k) => match self.lookup_index_for(k) { + Ok(index) | Err(index) => index, + }, + Bound::Excluded(ref k) => match self.lookup_index_for(k) { + Ok(index) => index + 1, + Err(index) => index, + }, + Bound::Unbounded => 0, + }; + + let end = match range.end_bound() { + Bound::Included(ref k) => match self.lookup_index_for(k) { + Ok(index) => index + 1, + Err(index) => index, + }, + Bound::Excluded(ref k) => match self.lookup_index_for(k) { + Ok(index) | Err(index) => index, + }, + Bound::Unbounded => self.data.len(), + }; + + (start, end) + } + + #[inline] + pub fn contains_key(&self, key: &Q) -> bool + where + K: Borrow, + Q: Ord + ?Sized, + { + self.get(key).is_some() + } +} + +impl IntoIterator for SortedMap { + type Item = (K, V); + type IntoIter = std::vec::IntoIter<(K, V)>; + + fn into_iter(self) -> Self::IntoIter { + self.data.into_iter() + } +} + +impl<'a, K, Q, V> Index<&'a Q> for SortedMap +where + K: Ord + Borrow, + Q: Ord + ?Sized, +{ + type Output = V; + + fn index(&self, key: &Q) -> &Self::Output { + self.get(key).expect("no entry found for key") + } +} + +impl<'a, K, Q, V> IndexMut<&'a Q> for SortedMap +where + K: Ord + Borrow, + Q: Ord + ?Sized, +{ + fn index_mut(&mut self, key: &Q) -> &mut Self::Output { + self.get_mut(key).expect("no entry found for key") + } +} + +impl FromIterator<(K, V)> for SortedMap { + fn from_iter>(iter: T) -> Self { + let mut data: Vec<(K, V)> = iter.into_iter().collect(); + + data.sort_unstable_by(|&(ref k1, _), &(ref k2, _)| k1.cmp(k2)); + data.dedup_by(|&mut (ref k1, _), &mut (ref k2, _)| k1.cmp(k2) == Ordering::Equal); + + SortedMap { data } + } +} + +impl, V: HashStable, CTX> HashStable for SortedMap { + #[inline] + fn hash_stable(&self, ctx: &mut CTX, hasher: &mut StableHasher) { + self.data.hash_stable(ctx, hasher); + } +} + +#[cfg(test)] +mod tests; diff --git a/compiler/rustc_data_structures/src/sorted_map/index_map.rs b/compiler/rustc_data_structures/src/sorted_map/index_map.rs new file mode 100644 index 000000000..0ec32dc43 --- /dev/null +++ b/compiler/rustc_data_structures/src/sorted_map/index_map.rs @@ -0,0 +1,154 @@ +//! A variant of `SortedMap` that preserves insertion order. + +use std::hash::{Hash, Hasher}; +use std::iter::FromIterator; + +use crate::stable_hasher::{HashStable, StableHasher}; +use rustc_index::vec::{Idx, IndexVec}; + +/// An indexed multi-map that preserves insertion order while permitting both *O*(log *n*) lookup of +/// an item by key and *O*(1) lookup by index. +/// +/// This data structure is a hybrid of an [`IndexVec`] and a [`SortedMap`]. Like `IndexVec`, +/// `SortedIndexMultiMap` assigns a typed index to each item while preserving insertion order. +/// Like `SortedMap`, `SortedIndexMultiMap` has efficient lookup of items by key. However, this +/// is accomplished by sorting an array of item indices instead of the items themselves. +/// +/// Unlike `SortedMap`, this data structure can hold multiple equivalent items at once, so the +/// `get_by_key` method and its variants return an iterator instead of an `Option`. Equivalent +/// items will be yielded in insertion order. +/// +/// Unlike a general-purpose map like `BTreeSet` or `HashSet`, `SortedMap` and +/// `SortedIndexMultiMap` require *O*(*n*) time to insert a single item. This is because we may need +/// to insert into the middle of the sorted array. Users should avoid mutating this data structure +/// in-place. +/// +/// [`SortedMap`]: super::SortedMap +#[derive(Clone, Debug)] +pub struct SortedIndexMultiMap { + /// The elements of the map in insertion order. + items: IndexVec, + + /// Indices of the items in the set, sorted by the item's key. + idx_sorted_by_item_key: Vec, +} + +impl SortedIndexMultiMap { + #[inline] + pub fn new() -> Self { + SortedIndexMultiMap { items: IndexVec::new(), idx_sorted_by_item_key: Vec::new() } + } + + #[inline] + pub fn len(&self) -> usize { + self.items.len() + } + + #[inline] + pub fn is_empty(&self) -> bool { + self.items.is_empty() + } + + /// Returns an iterator over the items in the map in insertion order. + #[inline] + pub fn into_iter(self) -> impl DoubleEndedIterator { + self.items.into_iter() + } + + /// Returns an iterator over the items in the map in insertion order along with their indices. + #[inline] + pub fn into_iter_enumerated(self) -> impl DoubleEndedIterator { + self.items.into_iter_enumerated() + } + + /// Returns an iterator over the items in the map in insertion order. + #[inline] + pub fn iter(&self) -> impl '_ + DoubleEndedIterator { + self.items.iter().map(|(ref k, ref v)| (k, v)) + } + + /// Returns an iterator over the items in the map in insertion order along with their indices. + #[inline] + pub fn iter_enumerated(&self) -> impl '_ + DoubleEndedIterator { + self.items.iter_enumerated().map(|(i, (ref k, ref v))| (i, (k, v))) + } + + /// Returns the item in the map with the given index. + #[inline] + pub fn get(&self, idx: I) -> Option<&(K, V)> { + self.items.get(idx) + } + + /// Returns an iterator over the items in the map that are equal to `key`. + /// + /// If there are multiple items that are equivalent to `key`, they will be yielded in + /// insertion order. + #[inline] + pub fn get_by_key(&self, key: K) -> impl Iterator + '_ { + self.get_by_key_enumerated(key).map(|(_, v)| v) + } + + /// Returns an iterator over the items in the map that are equal to `key` along with their + /// indices. + /// + /// If there are multiple items that are equivalent to `key`, they will be yielded in + /// insertion order. + #[inline] + pub fn get_by_key_enumerated(&self, key: K) -> impl Iterator + '_ { + let lower_bound = self.idx_sorted_by_item_key.partition_point(|&i| self.items[i].0 < key); + self.idx_sorted_by_item_key[lower_bound..].iter().map_while(move |&i| { + let (k, v) = &self.items[i]; + (k == &key).then_some((i, v)) + }) + } +} + +impl Eq for SortedIndexMultiMap {} +impl PartialEq for SortedIndexMultiMap { + fn eq(&self, other: &Self) -> bool { + // No need to compare the sorted index. If the items are the same, the index will be too. + self.items == other.items + } +} + +impl Hash for SortedIndexMultiMap +where + K: Hash, + V: Hash, +{ + fn hash(&self, hasher: &mut H) { + self.items.hash(hasher) + } +} +impl HashStable for SortedIndexMultiMap +where + K: HashStable, + V: HashStable, +{ + fn hash_stable(&self, ctx: &mut C, hasher: &mut StableHasher) { + self.items.hash_stable(ctx, hasher) + } +} + +impl FromIterator<(K, V)> for SortedIndexMultiMap { + fn from_iter(iter: J) -> Self + where + J: IntoIterator, + { + let items = IndexVec::from_iter(iter); + let mut idx_sorted_by_item_key: Vec<_> = items.indices().collect(); + + // `sort_by_key` is stable, so insertion order is preserved for duplicate items. + idx_sorted_by_item_key.sort_by_key(|&idx| &items[idx].0); + + SortedIndexMultiMap { items, idx_sorted_by_item_key } + } +} + +impl std::ops::Index for SortedIndexMultiMap { + type Output = V; + + fn index(&self, idx: I) -> &Self::Output { + &self.items[idx].1 + } +} diff --git a/compiler/rustc_data_structures/src/sorted_map/tests.rs b/compiler/rustc_data_structures/src/sorted_map/tests.rs new file mode 100644 index 000000000..1e977d709 --- /dev/null +++ b/compiler/rustc_data_structures/src/sorted_map/tests.rs @@ -0,0 +1,222 @@ +use super::{SortedIndexMultiMap, SortedMap}; + +#[test] +fn test_sorted_index_multi_map() { + let entries: Vec<_> = vec![(2, 0), (1, 0), (2, 1), (3, 0), (2, 2)]; + let set: SortedIndexMultiMap = entries.iter().copied().collect(); + + // Insertion order is preserved. + assert!(entries.iter().map(|(ref k, ref v)| (k, v)).eq(set.iter())); + + // Indexing + for (i, expect) in entries.iter().enumerate() { + assert_eq!(set[i], expect.1); + } + + // `get_by_key` works. + assert_eq!(set.get_by_key(3).copied().collect::>(), vec![0]); + assert!(set.get_by_key(4).next().is_none()); + + // `get_by_key` returns items in insertion order. + let twos: Vec<_> = set.get_by_key_enumerated(2).collect(); + let idxs: Vec = twos.iter().map(|(i, _)| *i).collect(); + let values: Vec = twos.iter().map(|(_, &v)| v).collect(); + + assert_eq!(idxs, vec![0, 2, 4]); + assert_eq!(values, vec![0, 1, 2]); +} + +#[test] +fn test_insert_and_iter() { + let mut map = SortedMap::new(); + let mut expected = Vec::new(); + + for x in 0..100 { + assert_eq!(map.iter().cloned().collect::>(), expected); + + let x = 1000 - x * 2; + map.insert(x, x); + expected.insert(0, (x, x)); + } +} + +#[test] +fn test_get_and_index() { + let mut map = SortedMap::new(); + let mut expected = Vec::new(); + + for x in 0..100 { + let x = 1000 - x; + if x & 1 == 0 { + map.insert(x, x); + } + expected.push(x); + } + + for mut x in expected { + if x & 1 == 0 { + assert_eq!(map.get(&x), Some(&x)); + assert_eq!(map.get_mut(&x), Some(&mut x)); + assert_eq!(map[&x], x); + assert_eq!(&mut map[&x], &mut x); + } else { + assert_eq!(map.get(&x), None); + assert_eq!(map.get_mut(&x), None); + } + } +} + +#[test] +fn test_range() { + let mut map = SortedMap::new(); + map.insert(1, 1); + map.insert(3, 3); + map.insert(6, 6); + map.insert(9, 9); + + let keys = |s: &[(_, _)]| s.into_iter().map(|e| e.0).collect::>(); + + for start in 0..11 { + for end in 0..11 { + if end < start { + continue; + } + + let mut expected = vec![1, 3, 6, 9]; + expected.retain(|&x| x >= start && x < end); + + assert_eq!(keys(map.range(start..end)), expected, "range = {}..{}", start, end); + } + } +} + +#[test] +fn test_offset_keys() { + let mut map = SortedMap::new(); + map.insert(1, 1); + map.insert(3, 3); + map.insert(6, 6); + + map.offset_keys(|k| *k += 1); + + let mut expected = SortedMap::new(); + expected.insert(2, 1); + expected.insert(4, 3); + expected.insert(7, 6); + + assert_eq!(map, expected); +} + +fn keys(s: SortedMap) -> Vec { + s.into_iter().map(|(k, _)| k).collect::>() +} + +fn elements(s: SortedMap) -> Vec<(u32, u32)> { + s.into_iter().collect::>() +} + +#[test] +fn test_remove_range() { + let mut map = SortedMap::new(); + map.insert(1, 1); + map.insert(3, 3); + map.insert(6, 6); + map.insert(9, 9); + + for start in 0..11 { + for end in 0..11 { + if end < start { + continue; + } + + let mut expected = vec![1, 3, 6, 9]; + expected.retain(|&x| x < start || x >= end); + + let mut map = map.clone(); + map.remove_range(start..end); + + assert_eq!(keys(map), expected, "range = {}..{}", start, end); + } + } +} + +#[test] +fn test_remove() { + let mut map = SortedMap::new(); + let mut expected = Vec::new(); + + for x in 0..10 { + map.insert(x, x); + expected.push((x, x)); + } + + for x in 0..10 { + let mut map = map.clone(); + let mut expected = expected.clone(); + + assert_eq!(map.remove(&x), Some(x)); + expected.remove(x as usize); + + assert_eq!(map.iter().cloned().collect::>(), expected); + } +} + +#[test] +fn test_insert_presorted_non_overlapping() { + let mut map = SortedMap::new(); + map.insert(2, 0); + map.insert(8, 0); + + map.insert_presorted(vec![(3, 0), (7, 0)]); + + let expected = vec![2, 3, 7, 8]; + assert_eq!(keys(map), expected); +} + +#[test] +fn test_insert_presorted_first_elem_equal() { + let mut map = SortedMap::new(); + map.insert(2, 2); + map.insert(8, 8); + + map.insert_presorted(vec![(2, 0), (7, 7)]); + + let expected = vec![(2, 0), (7, 7), (8, 8)]; + assert_eq!(elements(map), expected); +} + +#[test] +fn test_insert_presorted_last_elem_equal() { + let mut map = SortedMap::new(); + map.insert(2, 2); + map.insert(8, 8); + + map.insert_presorted(vec![(3, 3), (8, 0)]); + + let expected = vec![(2, 2), (3, 3), (8, 0)]; + assert_eq!(elements(map), expected); +} + +#[test] +fn test_insert_presorted_shuffle() { + let mut map = SortedMap::new(); + map.insert(2, 2); + map.insert(7, 7); + + map.insert_presorted(vec![(1, 1), (3, 3), (8, 8)]); + + let expected = vec![(1, 1), (2, 2), (3, 3), (7, 7), (8, 8)]; + assert_eq!(elements(map), expected); +} + +#[test] +fn test_insert_presorted_at_end() { + let mut map = SortedMap::new(); + map.insert(1, 1); + map.insert(2, 2); + + map.insert_presorted(vec![(3, 3), (8, 8)]); + + let expected = vec![(1, 1), (2, 2), (3, 3), (8, 8)]; + assert_eq!(elements(map), expected); +} diff --git a/compiler/rustc_data_structures/src/sso/either_iter.rs b/compiler/rustc_data_structures/src/sso/either_iter.rs new file mode 100644 index 000000000..131eeef45 --- /dev/null +++ b/compiler/rustc_data_structures/src/sso/either_iter.rs @@ -0,0 +1,75 @@ +use std::fmt; +use std::iter::ExactSizeIterator; +use std::iter::FusedIterator; +use std::iter::Iterator; + +/// Iterator which may contain instance of +/// one of two specific implementations. +/// +/// Note: For most methods providing custom +/// implementation may marginally +/// improve performance by avoiding +/// doing Left/Right match on every step +/// and doing it only once instead. +#[derive(Clone)] +pub enum EitherIter { + Left(L), + Right(R), +} + +impl Iterator for EitherIter +where + L: Iterator, + R: Iterator, +{ + type Item = L::Item; + + fn next(&mut self) -> Option { + match self { + EitherIter::Left(l) => l.next(), + EitherIter::Right(r) => r.next(), + } + } + + fn size_hint(&self) -> (usize, Option) { + match self { + EitherIter::Left(l) => l.size_hint(), + EitherIter::Right(r) => r.size_hint(), + } + } +} + +impl ExactSizeIterator for EitherIter +where + L: ExactSizeIterator, + R: ExactSizeIterator, + EitherIter: Iterator, +{ + fn len(&self) -> usize { + match self { + EitherIter::Left(l) => l.len(), + EitherIter::Right(r) => r.len(), + } + } +} + +impl FusedIterator for EitherIter +where + L: FusedIterator, + R: FusedIterator, + EitherIter: Iterator, +{ +} + +impl fmt::Debug for EitherIter +where + L: fmt::Debug, + R: fmt::Debug, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + EitherIter::Left(l) => l.fmt(f), + EitherIter::Right(r) => r.fmt(f), + } + } +} diff --git a/compiler/rustc_data_structures/src/sso/map.rs b/compiler/rustc_data_structures/src/sso/map.rs new file mode 100644 index 000000000..ec6a62016 --- /dev/null +++ b/compiler/rustc_data_structures/src/sso/map.rs @@ -0,0 +1,557 @@ +use super::either_iter::EitherIter; +use crate::fx::FxHashMap; +use arrayvec::ArrayVec; +use std::fmt; +use std::hash::Hash; +use std::iter::FromIterator; +use std::ops::Index; + +// For pointer-sized arguments arrays +// are faster than set/map for up to 64 +// arguments. +// +// On the other hand such a big array +// hurts cache performance, makes passing +// sso structures around very expensive. +// +// Biggest performance benefit is gained +// for reasonably small arrays that stay +// small in vast majority of cases. +// +// '8' is chosen as a sane default, to be +// reevaluated later. +const SSO_ARRAY_SIZE: usize = 8; + +/// Small-storage-optimized implementation of a map. +/// +/// Stores elements in a small array up to a certain length +/// and switches to `HashMap` when that length is exceeded. +// +// FIXME: Implements subset of HashMap API. +// +// Missing HashMap API: +// all hasher-related +// try_reserve +// shrink_to (unstable) +// drain_filter (unstable) +// into_keys/into_values (unstable) +// all raw_entry-related +// PartialEq/Eq (requires sorting the array) +// Entry::or_insert_with_key +// Vacant/Occupied entries and related +// +// FIXME: In HashMap most methods accepting key reference +// accept reference to generic `Q` where `K: Borrow`. +// +// However, using this approach in `HashMap::get` apparently +// breaks inlining and noticeably reduces performance. +// +// Performance *should* be the same given that borrow is +// a NOP in most cases, but in practice that's not the case. +// +// Further investigation is required. +// +// Affected methods: +// SsoHashMap::get +// SsoHashMap::get_mut +// SsoHashMap::get_entry +// SsoHashMap::get_key_value +// SsoHashMap::contains_key +// SsoHashMap::remove +// SsoHashMap::remove_entry +// Index::index +// SsoHashSet::take +// SsoHashSet::get +// SsoHashSet::remove +// SsoHashSet::contains + +#[derive(Clone)] +pub enum SsoHashMap { + Array(ArrayVec<(K, V), SSO_ARRAY_SIZE>), + Map(FxHashMap), +} + +impl SsoHashMap { + /// Creates an empty `SsoHashMap`. + #[inline] + pub fn new() -> Self { + SsoHashMap::Array(ArrayVec::new()) + } + + /// Creates an empty `SsoHashMap` with the specified capacity. + pub fn with_capacity(cap: usize) -> Self { + if cap <= SSO_ARRAY_SIZE { + Self::new() + } else { + SsoHashMap::Map(FxHashMap::with_capacity_and_hasher(cap, Default::default())) + } + } + + /// Clears the map, removing all key-value pairs. Keeps the allocated memory + /// for reuse. + pub fn clear(&mut self) { + match self { + SsoHashMap::Array(array) => array.clear(), + SsoHashMap::Map(map) => map.clear(), + } + } + + /// Returns the number of elements the map can hold without reallocating. + pub fn capacity(&self) -> usize { + match self { + SsoHashMap::Array(_) => SSO_ARRAY_SIZE, + SsoHashMap::Map(map) => map.capacity(), + } + } + + /// Returns the number of elements in the map. + pub fn len(&self) -> usize { + match self { + SsoHashMap::Array(array) => array.len(), + SsoHashMap::Map(map) => map.len(), + } + } + + /// Returns `true` if the map contains no elements. + pub fn is_empty(&self) -> bool { + match self { + SsoHashMap::Array(array) => array.is_empty(), + SsoHashMap::Map(map) => map.is_empty(), + } + } + + /// An iterator visiting all key-value pairs in arbitrary order. + /// The iterator element type is `(&'a K, &'a V)`. + #[inline] + pub fn iter(&self) -> <&Self as IntoIterator>::IntoIter { + self.into_iter() + } + + /// An iterator visiting all key-value pairs in arbitrary order, + /// with mutable references to the values. + /// The iterator element type is `(&'a K, &'a mut V)`. + #[inline] + pub fn iter_mut(&mut self) -> impl Iterator { + self.into_iter() + } + + /// An iterator visiting all keys in arbitrary order. + /// The iterator element type is `&'a K`. + pub fn keys(&self) -> impl Iterator { + match self { + SsoHashMap::Array(array) => EitherIter::Left(array.iter().map(|(k, _v)| k)), + SsoHashMap::Map(map) => EitherIter::Right(map.keys()), + } + } + + /// An iterator visiting all values in arbitrary order. + /// The iterator element type is `&'a V`. + pub fn values(&self) -> impl Iterator { + match self { + SsoHashMap::Array(array) => EitherIter::Left(array.iter().map(|(_k, v)| v)), + SsoHashMap::Map(map) => EitherIter::Right(map.values()), + } + } + + /// An iterator visiting all values mutably in arbitrary order. + /// The iterator element type is `&'a mut V`. + pub fn values_mut(&mut self) -> impl Iterator { + match self { + SsoHashMap::Array(array) => EitherIter::Left(array.iter_mut().map(|(_k, v)| v)), + SsoHashMap::Map(map) => EitherIter::Right(map.values_mut()), + } + } + + /// Clears the map, returning all key-value pairs as an iterator. Keeps the + /// allocated memory for reuse. + pub fn drain(&mut self) -> impl Iterator + '_ { + match self { + SsoHashMap::Array(array) => EitherIter::Left(array.drain(..)), + SsoHashMap::Map(map) => EitherIter::Right(map.drain()), + } + } +} + +impl SsoHashMap { + /// Changes underlying storage from array to hashmap + /// if array is full. + fn migrate_if_full(&mut self) { + if let SsoHashMap::Array(array) = self { + if array.is_full() { + *self = SsoHashMap::Map(array.drain(..).collect()); + } + } + } + + /// Reserves capacity for at least `additional` more elements to be inserted + /// in the `SsoHashMap`. The collection may reserve more space to avoid + /// frequent reallocations. + pub fn reserve(&mut self, additional: usize) { + match self { + SsoHashMap::Array(array) => { + if SSO_ARRAY_SIZE < (array.len() + additional) { + let mut map: FxHashMap = array.drain(..).collect(); + map.reserve(additional); + *self = SsoHashMap::Map(map); + } + } + SsoHashMap::Map(map) => map.reserve(additional), + } + } + + /// Shrinks the capacity of the map as much as possible. It will drop + /// down as much as possible while maintaining the internal rules + /// and possibly leaving some space in accordance with the resize policy. + pub fn shrink_to_fit(&mut self) { + if let SsoHashMap::Map(map) = self { + if map.len() <= SSO_ARRAY_SIZE { + *self = SsoHashMap::Array(map.drain().collect()); + } else { + map.shrink_to_fit(); + } + } + } + + /// Retains only the elements specified by the predicate. + pub fn retain(&mut self, mut f: F) + where + F: FnMut(&K, &mut V) -> bool, + { + match self { + SsoHashMap::Array(array) => array.retain(|(k, v)| f(k, v)), + SsoHashMap::Map(map) => map.retain(f), + } + } + + /// Inserts a key-value pair into the map. + /// + /// If the map did not have this key present, [`None`] is returned. + /// + /// If the map did have this key present, the value is updated, and the old + /// value is returned. The key is not updated, though; this matters for + /// types that can be `==` without being identical. See the [module-level + /// documentation] for more. + pub fn insert(&mut self, key: K, value: V) -> Option { + match self { + SsoHashMap::Array(array) => { + for (k, v) in array.iter_mut() { + if *k == key { + let old_value = std::mem::replace(v, value); + return Some(old_value); + } + } + if let Err(error) = array.try_push((key, value)) { + let mut map: FxHashMap = array.drain(..).collect(); + let (key, value) = error.element(); + map.insert(key, value); + *self = SsoHashMap::Map(map); + } + None + } + SsoHashMap::Map(map) => map.insert(key, value), + } + } + + /// Removes a key from the map, returning the value at the key if the key + /// was previously in the map. + pub fn remove(&mut self, key: &K) -> Option { + match self { + SsoHashMap::Array(array) => { + if let Some(index) = array.iter().position(|(k, _v)| k == key) { + Some(array.swap_remove(index).1) + } else { + None + } + } + SsoHashMap::Map(map) => map.remove(key), + } + } + + /// Removes a key from the map, returning the stored key and value if the + /// key was previously in the map. + pub fn remove_entry(&mut self, key: &K) -> Option<(K, V)> { + match self { + SsoHashMap::Array(array) => { + if let Some(index) = array.iter().position(|(k, _v)| k == key) { + Some(array.swap_remove(index)) + } else { + None + } + } + SsoHashMap::Map(map) => map.remove_entry(key), + } + } + + /// Returns a reference to the value corresponding to the key. + pub fn get(&self, key: &K) -> Option<&V> { + match self { + SsoHashMap::Array(array) => { + for (k, v) in array { + if k == key { + return Some(v); + } + } + None + } + SsoHashMap::Map(map) => map.get(key), + } + } + + /// Returns a mutable reference to the value corresponding to the key. + pub fn get_mut(&mut self, key: &K) -> Option<&mut V> { + match self { + SsoHashMap::Array(array) => { + for (k, v) in array { + if k == key { + return Some(v); + } + } + None + } + SsoHashMap::Map(map) => map.get_mut(key), + } + } + + /// Returns the key-value pair corresponding to the supplied key. + pub fn get_key_value(&self, key: &K) -> Option<(&K, &V)> { + match self { + SsoHashMap::Array(array) => { + for (k, v) in array { + if k == key { + return Some((k, v)); + } + } + None + } + SsoHashMap::Map(map) => map.get_key_value(key), + } + } + + /// Returns `true` if the map contains a value for the specified key. + pub fn contains_key(&self, key: &K) -> bool { + match self { + SsoHashMap::Array(array) => array.iter().any(|(k, _v)| k == key), + SsoHashMap::Map(map) => map.contains_key(key), + } + } + + /// Gets the given key's corresponding entry in the map for in-place manipulation. + #[inline] + pub fn entry(&mut self, key: K) -> Entry<'_, K, V> { + Entry { ssomap: self, key } + } +} + +impl Default for SsoHashMap { + #[inline] + fn default() -> Self { + Self::new() + } +} + +impl FromIterator<(K, V)> for SsoHashMap { + fn from_iter>(iter: I) -> SsoHashMap { + let mut map: SsoHashMap = Default::default(); + map.extend(iter); + map + } +} + +impl Extend<(K, V)> for SsoHashMap { + fn extend(&mut self, iter: I) + where + I: IntoIterator, + { + for (key, value) in iter.into_iter() { + self.insert(key, value); + } + } + + #[inline] + fn extend_one(&mut self, (k, v): (K, V)) { + self.insert(k, v); + } + + fn extend_reserve(&mut self, additional: usize) { + match self { + SsoHashMap::Array(array) => { + if SSO_ARRAY_SIZE < (array.len() + additional) { + let mut map: FxHashMap = array.drain(..).collect(); + map.extend_reserve(additional); + *self = SsoHashMap::Map(map); + } + } + SsoHashMap::Map(map) => map.extend_reserve(additional), + } + } +} + +impl<'a, K, V> Extend<(&'a K, &'a V)> for SsoHashMap +where + K: Eq + Hash + Copy, + V: Copy, +{ + fn extend>(&mut self, iter: T) { + self.extend(iter.into_iter().map(|(k, v)| (*k, *v))) + } + + #[inline] + fn extend_one(&mut self, (&k, &v): (&'a K, &'a V)) { + self.insert(k, v); + } + + #[inline] + fn extend_reserve(&mut self, additional: usize) { + Extend::<(K, V)>::extend_reserve(self, additional) + } +} + +impl IntoIterator for SsoHashMap { + type IntoIter = EitherIter< + as IntoIterator>::IntoIter, + as IntoIterator>::IntoIter, + >; + type Item = ::Item; + + fn into_iter(self) -> Self::IntoIter { + match self { + SsoHashMap::Array(array) => EitherIter::Left(array.into_iter()), + SsoHashMap::Map(map) => EitherIter::Right(map.into_iter()), + } + } +} + +/// adapts Item of array reference iterator to Item of hashmap reference iterator. +#[inline(always)] +fn adapt_array_ref_it(pair: &(K, V)) -> (&K, &V) { + let (a, b) = pair; + (a, b) +} + +/// adapts Item of array mut reference iterator to Item of hashmap mut reference iterator. +#[inline(always)] +fn adapt_array_mut_it(pair: &mut (K, V)) -> (&K, &mut V) { + let (a, b) = pair; + (a, b) +} + +impl<'a, K, V> IntoIterator for &'a SsoHashMap { + type IntoIter = EitherIter< + std::iter::Map< + <&'a ArrayVec<(K, V), 8> as IntoIterator>::IntoIter, + fn(&'a (K, V)) -> (&'a K, &'a V), + >, + <&'a FxHashMap as IntoIterator>::IntoIter, + >; + type Item = ::Item; + + fn into_iter(self) -> Self::IntoIter { + match self { + SsoHashMap::Array(array) => EitherIter::Left(array.into_iter().map(adapt_array_ref_it)), + SsoHashMap::Map(map) => EitherIter::Right(map.iter()), + } + } +} + +impl<'a, K, V> IntoIterator for &'a mut SsoHashMap { + type IntoIter = EitherIter< + std::iter::Map< + <&'a mut ArrayVec<(K, V), 8> as IntoIterator>::IntoIter, + fn(&'a mut (K, V)) -> (&'a K, &'a mut V), + >, + <&'a mut FxHashMap as IntoIterator>::IntoIter, + >; + type Item = ::Item; + + fn into_iter(self) -> Self::IntoIter { + match self { + SsoHashMap::Array(array) => EitherIter::Left(array.into_iter().map(adapt_array_mut_it)), + SsoHashMap::Map(map) => EitherIter::Right(map.iter_mut()), + } + } +} + +impl fmt::Debug for SsoHashMap +where + K: fmt::Debug, + V: fmt::Debug, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_map().entries(self.iter()).finish() + } +} + +impl<'a, K, V> Index<&'a K> for SsoHashMap +where + K: Eq + Hash, +{ + type Output = V; + + #[inline] + fn index(&self, key: &K) -> &V { + self.get(key).expect("no entry found for key") + } +} + +/// A view into a single entry in a map. +pub struct Entry<'a, K, V> { + ssomap: &'a mut SsoHashMap, + key: K, +} + +impl<'a, K: Eq + Hash, V> Entry<'a, K, V> { + /// Provides in-place mutable access to an occupied entry before any + /// potential inserts into the map. + pub fn and_modify(self, f: F) -> Self + where + F: FnOnce(&mut V), + { + if let Some(value) = self.ssomap.get_mut(&self.key) { + f(value); + } + self + } + + /// Ensures a value is in the entry by inserting the default if empty, and returns + /// a mutable reference to the value in the entry. + #[inline] + pub fn or_insert(self, value: V) -> &'a mut V { + self.or_insert_with(|| value) + } + + /// Ensures a value is in the entry by inserting the result of the default function if empty, + /// and returns a mutable reference to the value in the entry. + pub fn or_insert_with V>(self, default: F) -> &'a mut V { + self.ssomap.migrate_if_full(); + match self.ssomap { + SsoHashMap::Array(array) => { + let key_ref = &self.key; + let found_index = array.iter().position(|(k, _v)| k == key_ref); + let index = if let Some(index) = found_index { + index + } else { + let index = array.len(); + array.try_push((self.key, default())).unwrap(); + index + }; + &mut array[index].1 + } + SsoHashMap::Map(map) => map.entry(self.key).or_insert_with(default), + } + } + + /// Returns a reference to this entry's key. + #[inline] + pub fn key(&self) -> &K { + &self.key + } +} + +impl<'a, K: Eq + Hash, V: Default> Entry<'a, K, V> { + /// Ensures a value is in the entry by inserting the default value if empty, + /// and returns a mutable reference to the value in the entry. + #[inline] + pub fn or_default(self) -> &'a mut V { + self.or_insert_with(Default::default) + } +} diff --git a/compiler/rustc_data_structures/src/sso/mod.rs b/compiler/rustc_data_structures/src/sso/mod.rs new file mode 100644 index 000000000..dd21bc8e6 --- /dev/null +++ b/compiler/rustc_data_structures/src/sso/mod.rs @@ -0,0 +1,6 @@ +mod either_iter; +mod map; +mod set; + +pub use map::SsoHashMap; +pub use set::SsoHashSet; diff --git a/compiler/rustc_data_structures/src/sso/set.rs b/compiler/rustc_data_structures/src/sso/set.rs new file mode 100644 index 000000000..4fda3adb7 --- /dev/null +++ b/compiler/rustc_data_structures/src/sso/set.rs @@ -0,0 +1,238 @@ +use std::fmt; +use std::hash::Hash; +use std::iter::FromIterator; + +use super::map::SsoHashMap; + +/// Small-storage-optimized implementation of a set. +/// +/// Stores elements in a small array up to a certain length +/// and switches to `HashSet` when that length is exceeded. +// +// FIXME: Implements subset of HashSet API. +// +// Missing HashSet API: +// all hasher-related +// try_reserve +// shrink_to (unstable) +// drain_filter (unstable) +// replace +// get_or_insert/get_or_insert_owned/get_or_insert_with (unstable) +// difference/symmetric_difference/intersection/union +// is_disjoint/is_subset/is_superset +// PartialEq/Eq (requires SsoHashMap implementation) +// BitOr/BitAnd/BitXor/Sub +#[derive(Clone)] +pub struct SsoHashSet { + map: SsoHashMap, +} + +/// Adapter function used ot return +/// result if SsoHashMap functions into +/// result SsoHashSet should return. +#[inline(always)] +fn entry_to_key((k, _v): (K, V)) -> K { + k +} + +impl SsoHashSet { + /// Creates an empty `SsoHashSet`. + #[inline] + pub fn new() -> Self { + Self { map: SsoHashMap::new() } + } + + /// Creates an empty `SsoHashSet` with the specified capacity. + #[inline] + pub fn with_capacity(cap: usize) -> Self { + Self { map: SsoHashMap::with_capacity(cap) } + } + + /// Clears the set, removing all values. + #[inline] + pub fn clear(&mut self) { + self.map.clear() + } + + /// Returns the number of elements the set can hold without reallocating. + #[inline] + pub fn capacity(&self) -> usize { + self.map.capacity() + } + + /// Returns the number of elements in the set. + #[inline] + pub fn len(&self) -> usize { + self.map.len() + } + + /// Returns `true` if the set contains no elements. + #[inline] + pub fn is_empty(&self) -> bool { + self.map.is_empty() + } + + /// An iterator visiting all elements in arbitrary order. + /// The iterator element type is `&'a T`. + #[inline] + pub fn iter(&self) -> impl Iterator { + self.into_iter() + } + + /// Clears the set, returning all elements in an iterator. + #[inline] + pub fn drain(&mut self) -> impl Iterator + '_ { + self.map.drain().map(entry_to_key) + } +} + +impl SsoHashSet { + /// Reserves capacity for at least `additional` more elements to be inserted + /// in the `SsoHashSet`. The collection may reserve more space to avoid + /// frequent reallocations. + #[inline] + pub fn reserve(&mut self, additional: usize) { + self.map.reserve(additional) + } + + /// Shrinks the capacity of the set as much as possible. It will drop + /// down as much as possible while maintaining the internal rules + /// and possibly leaving some space in accordance with the resize policy. + #[inline] + pub fn shrink_to_fit(&mut self) { + self.map.shrink_to_fit() + } + + /// Retains only the elements specified by the predicate. + #[inline] + pub fn retain(&mut self, mut f: F) + where + F: FnMut(&T) -> bool, + { + self.map.retain(|k, _v| f(k)) + } + + /// Removes and returns the value in the set, if any, that is equal to the given one. + #[inline] + pub fn take(&mut self, value: &T) -> Option { + self.map.remove_entry(value).map(entry_to_key) + } + + /// Returns a reference to the value in the set, if any, that is equal to the given value. + #[inline] + pub fn get(&self, value: &T) -> Option<&T> { + self.map.get_key_value(value).map(entry_to_key) + } + + /// Adds a value to the set. + /// + /// Returns whether the value was newly inserted. That is: + /// + /// - If the set did not previously contain this value, `true` is returned. + /// - If the set already contained this value, `false` is returned. + #[inline] + pub fn insert(&mut self, elem: T) -> bool { + self.map.insert(elem, ()).is_none() + } + + /// Removes a value from the set. Returns whether the value was + /// present in the set. + #[inline] + pub fn remove(&mut self, value: &T) -> bool { + self.map.remove(value).is_some() + } + + /// Returns `true` if the set contains a value. + #[inline] + pub fn contains(&self, value: &T) -> bool { + self.map.contains_key(value) + } +} + +impl FromIterator for SsoHashSet { + fn from_iter>(iter: I) -> SsoHashSet { + let mut set: SsoHashSet = Default::default(); + set.extend(iter); + set + } +} + +impl Default for SsoHashSet { + #[inline] + fn default() -> Self { + Self::new() + } +} + +impl Extend for SsoHashSet { + fn extend(&mut self, iter: I) + where + I: IntoIterator, + { + for val in iter.into_iter() { + self.insert(val); + } + } + + #[inline] + fn extend_one(&mut self, item: T) { + self.insert(item); + } + + #[inline] + fn extend_reserve(&mut self, additional: usize) { + self.map.extend_reserve(additional) + } +} + +impl<'a, T> Extend<&'a T> for SsoHashSet +where + T: 'a + Eq + Hash + Copy, +{ + #[inline] + fn extend>(&mut self, iter: I) { + self.extend(iter.into_iter().cloned()); + } + + #[inline] + fn extend_one(&mut self, &item: &'a T) { + self.insert(item); + } + + #[inline] + fn extend_reserve(&mut self, additional: usize) { + Extend::::extend_reserve(self, additional) + } +} + +impl IntoIterator for SsoHashSet { + type IntoIter = std::iter::Map< as IntoIterator>::IntoIter, fn((T, ())) -> T>; + type Item = ::Item; + + #[inline] + fn into_iter(self) -> Self::IntoIter { + self.map.into_iter().map(entry_to_key) + } +} + +impl<'a, T> IntoIterator for &'a SsoHashSet { + type IntoIter = std::iter::Map< + <&'a SsoHashMap as IntoIterator>::IntoIter, + fn((&'a T, &'a ())) -> &'a T, + >; + type Item = ::Item; + + #[inline] + fn into_iter(self) -> Self::IntoIter { + self.map.iter().map(entry_to_key) + } +} + +impl fmt::Debug for SsoHashSet +where + T: fmt::Debug, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_set().entries(self.iter()).finish() + } +} diff --git a/compiler/rustc_data_structures/src/stable_hasher.rs b/compiler/rustc_data_structures/src/stable_hasher.rs new file mode 100644 index 000000000..ce8591734 --- /dev/null +++ b/compiler/rustc_data_structures/src/stable_hasher.rs @@ -0,0 +1,650 @@ +use crate::sip128::SipHasher128; +use rustc_index::bit_set; +use rustc_index::vec; +use smallvec::SmallVec; +use std::hash::{BuildHasher, Hash, Hasher}; +use std::marker::PhantomData; +use std::mem; + +#[cfg(test)] +mod tests; + +/// When hashing something that ends up affecting properties like symbol names, +/// we want these symbol names to be calculated independently of other factors +/// like what architecture you're compiling *from*. +/// +/// To that end we always convert integers to little-endian format before +/// hashing and the architecture dependent `isize` and `usize` types are +/// extended to 64 bits if needed. +pub struct StableHasher { + state: SipHasher128, +} + +impl ::std::fmt::Debug for StableHasher { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{:?}", self.state) + } +} + +pub trait StableHasherResult: Sized { + fn finish(hasher: StableHasher) -> Self; +} + +impl StableHasher { + #[inline] + pub fn new() -> Self { + StableHasher { state: SipHasher128::new_with_keys(0, 0) } + } + + #[inline] + pub fn finish(self) -> W { + W::finish(self) + } +} + +impl StableHasherResult for u128 { + #[inline] + fn finish(hasher: StableHasher) -> Self { + let (_0, _1) = hasher.finalize(); + u128::from(_0) | (u128::from(_1) << 64) + } +} + +impl StableHasherResult for u64 { + #[inline] + fn finish(hasher: StableHasher) -> Self { + hasher.finalize().0 + } +} + +impl StableHasher { + #[inline] + pub fn finalize(self) -> (u64, u64) { + self.state.finish128() + } +} + +impl Hasher for StableHasher { + fn finish(&self) -> u64 { + panic!("use StableHasher::finalize instead"); + } + + #[inline] + fn write(&mut self, bytes: &[u8]) { + self.state.write(bytes); + } + + #[inline] + fn write_str(&mut self, s: &str) { + self.state.write_str(s); + } + + #[inline] + fn write_length_prefix(&mut self, len: usize) { + // Our impl for `usize` will extend it if needed. + self.write_usize(len); + } + + #[inline] + fn write_u8(&mut self, i: u8) { + self.state.write_u8(i); + } + + #[inline] + fn write_u16(&mut self, i: u16) { + self.state.short_write(i.to_le_bytes()); + } + + #[inline] + fn write_u32(&mut self, i: u32) { + self.state.short_write(i.to_le_bytes()); + } + + #[inline] + fn write_u64(&mut self, i: u64) { + self.state.short_write(i.to_le_bytes()); + } + + #[inline] + fn write_u128(&mut self, i: u128) { + self.state.write(&i.to_le_bytes()); + } + + #[inline] + fn write_usize(&mut self, i: usize) { + // Always treat usize as u64 so we get the same results on 32 and 64 bit + // platforms. This is important for symbol hashes when cross compiling, + // for example. + self.state.short_write((i as u64).to_le_bytes()); + } + + #[inline] + fn write_i8(&mut self, i: i8) { + self.state.write_i8(i); + } + + #[inline] + fn write_i16(&mut self, i: i16) { + self.state.short_write((i as u16).to_le_bytes()); + } + + #[inline] + fn write_i32(&mut self, i: i32) { + self.state.short_write((i as u32).to_le_bytes()); + } + + #[inline] + fn write_i64(&mut self, i: i64) { + self.state.short_write((i as u64).to_le_bytes()); + } + + #[inline] + fn write_i128(&mut self, i: i128) { + self.state.write(&(i as u128).to_le_bytes()); + } + + #[inline] + fn write_isize(&mut self, i: isize) { + // Always treat isize as a 64-bit number so we get the same results on 32 and 64 bit + // platforms. This is important for symbol hashes when cross compiling, + // for example. Sign extending here is preferable as it means that the + // same negative number hashes the same on both 32 and 64 bit platforms. + let value = i as u64; + + // Cold path + #[cold] + #[inline(never)] + fn hash_value(state: &mut SipHasher128, value: u64) { + state.write_u8(0xFF); + state.short_write(value.to_le_bytes()); + } + + // `isize` values often seem to have a small (positive) numeric value in practice. + // To exploit this, if the value is small, we will hash a smaller amount of bytes. + // However, we cannot just skip the leading zero bytes, as that would produce the same hash + // e.g. if you hash two values that have the same bit pattern when they are swapped. + // See https://github.com/rust-lang/rust/pull/93014 for context. + // + // Therefore, we employ the following strategy: + // 1) When we encounter a value that fits within a single byte (the most common case), we + // hash just that byte. This is the most common case that is being optimized. However, we do + // not do this for the value 0xFF, as that is a reserved prefix (a bit like in UTF-8). + // 2) When we encounter a larger value, we hash a "marker" 0xFF and then the corresponding + // 8 bytes. Since this prefix cannot occur when we hash a single byte, when we hash two + // `isize`s that fit within a different amount of bytes, they should always produce a different + // byte stream for the hasher. + if value < 0xFF { + self.state.write_u8(value as u8); + } else { + hash_value(&mut self.state, value); + } + } +} + +/// Something that implements `HashStable` can be hashed in a way that is +/// stable across multiple compilation sessions. +/// +/// Note that `HashStable` imposes rather more strict requirements than usual +/// hash functions: +/// +/// - Stable hashes are sometimes used as identifiers. Therefore they must +/// conform to the corresponding `PartialEq` implementations: +/// +/// - `x == y` implies `hash_stable(x) == hash_stable(y)`, and +/// - `x != y` implies `hash_stable(x) != hash_stable(y)`. +/// +/// That second condition is usually not required for hash functions +/// (e.g. `Hash`). In practice this means that `hash_stable` must feed any +/// information into the hasher that a `PartialEq` comparison takes into +/// account. See [#49300](https://github.com/rust-lang/rust/issues/49300) +/// for an example where violating this invariant has caused trouble in the +/// past. +/// +/// - `hash_stable()` must be independent of the current +/// compilation session. E.g. they must not hash memory addresses or other +/// things that are "randomly" assigned per compilation session. +/// +/// - `hash_stable()` must be independent of the host architecture. The +/// `StableHasher` takes care of endianness and `isize`/`usize` platform +/// differences. +pub trait HashStable { + fn hash_stable(&self, hcx: &mut CTX, hasher: &mut StableHasher); +} + +/// Implement this for types that can be turned into stable keys like, for +/// example, for DefId that can be converted to a DefPathHash. This is used for +/// bringing maps into a predictable order before hashing them. +pub trait ToStableHashKey { + type KeyType: Ord + Sized + HashStable; + fn to_stable_hash_key(&self, hcx: &HCX) -> Self::KeyType; +} + +/// Implement HashStable by just calling `Hash::hash()`. +/// +/// **WARNING** This is only valid for types that *really* don't need any context for fingerprinting. +/// But it is easy to misuse this macro (see [#96013](https://github.com/rust-lang/rust/issues/96013) +/// for examples). Therefore this macro is not exported and should only be used in the limited cases +/// here in this module. +/// +/// Use `#[derive(HashStable_Generic)]` instead. +macro_rules! impl_stable_hash_via_hash { + ($t:ty) => { + impl $crate::stable_hasher::HashStable for $t { + #[inline] + fn hash_stable(&self, _: &mut CTX, hasher: &mut $crate::stable_hasher::StableHasher) { + ::std::hash::Hash::hash(self, hasher); + } + } + }; +} + +impl_stable_hash_via_hash!(i8); +impl_stable_hash_via_hash!(i16); +impl_stable_hash_via_hash!(i32); +impl_stable_hash_via_hash!(i64); +impl_stable_hash_via_hash!(isize); + +impl_stable_hash_via_hash!(u8); +impl_stable_hash_via_hash!(u16); +impl_stable_hash_via_hash!(u32); +impl_stable_hash_via_hash!(u64); +impl_stable_hash_via_hash!(usize); + +impl_stable_hash_via_hash!(u128); +impl_stable_hash_via_hash!(i128); + +impl_stable_hash_via_hash!(char); +impl_stable_hash_via_hash!(()); + +impl HashStable for ! { + fn hash_stable(&self, _ctx: &mut CTX, _hasher: &mut StableHasher) { + unreachable!() + } +} + +impl HashStable for PhantomData { + fn hash_stable(&self, _ctx: &mut CTX, _hasher: &mut StableHasher) {} +} + +impl HashStable for ::std::num::NonZeroU32 { + #[inline] + fn hash_stable(&self, ctx: &mut CTX, hasher: &mut StableHasher) { + self.get().hash_stable(ctx, hasher) + } +} + +impl HashStable for ::std::num::NonZeroUsize { + #[inline] + fn hash_stable(&self, ctx: &mut CTX, hasher: &mut StableHasher) { + self.get().hash_stable(ctx, hasher) + } +} + +impl HashStable for f32 { + fn hash_stable(&self, ctx: &mut CTX, hasher: &mut StableHasher) { + let val: u32 = unsafe { ::std::mem::transmute(*self) }; + val.hash_stable(ctx, hasher); + } +} + +impl HashStable for f64 { + fn hash_stable(&self, ctx: &mut CTX, hasher: &mut StableHasher) { + let val: u64 = unsafe { ::std::mem::transmute(*self) }; + val.hash_stable(ctx, hasher); + } +} + +impl HashStable for ::std::cmp::Ordering { + #[inline] + fn hash_stable(&self, ctx: &mut CTX, hasher: &mut StableHasher) { + (*self as i8).hash_stable(ctx, hasher); + } +} + +impl, CTX> HashStable for (T1,) { + #[inline] + fn hash_stable(&self, ctx: &mut CTX, hasher: &mut StableHasher) { + let (ref _0,) = *self; + _0.hash_stable(ctx, hasher); + } +} + +impl, T2: HashStable, CTX> HashStable for (T1, T2) { + fn hash_stable(&self, ctx: &mut CTX, hasher: &mut StableHasher) { + let (ref _0, ref _1) = *self; + _0.hash_stable(ctx, hasher); + _1.hash_stable(ctx, hasher); + } +} + +impl HashStable for (T1, T2, T3) +where + T1: HashStable, + T2: HashStable, + T3: HashStable, +{ + fn hash_stable(&self, ctx: &mut CTX, hasher: &mut StableHasher) { + let (ref _0, ref _1, ref _2) = *self; + _0.hash_stable(ctx, hasher); + _1.hash_stable(ctx, hasher); + _2.hash_stable(ctx, hasher); + } +} + +impl HashStable for (T1, T2, T3, T4) +where + T1: HashStable, + T2: HashStable, + T3: HashStable, + T4: HashStable, +{ + fn hash_stable(&self, ctx: &mut CTX, hasher: &mut StableHasher) { + let (ref _0, ref _1, ref _2, ref _3) = *self; + _0.hash_stable(ctx, hasher); + _1.hash_stable(ctx, hasher); + _2.hash_stable(ctx, hasher); + _3.hash_stable(ctx, hasher); + } +} + +impl, CTX> HashStable for [T] { + default fn hash_stable(&self, ctx: &mut CTX, hasher: &mut StableHasher) { + self.len().hash_stable(ctx, hasher); + for item in self { + item.hash_stable(ctx, hasher); + } + } +} + +impl HashStable for [u8] { + fn hash_stable(&self, ctx: &mut CTX, hasher: &mut StableHasher) { + self.len().hash_stable(ctx, hasher); + hasher.write(self); + } +} + +impl, CTX> HashStable for Vec { + #[inline] + fn hash_stable(&self, ctx: &mut CTX, hasher: &mut StableHasher) { + (&self[..]).hash_stable(ctx, hasher); + } +} + +impl HashStable for indexmap::IndexMap +where + K: HashStable + Eq + Hash, + V: HashStable, + R: BuildHasher, +{ + #[inline] + fn hash_stable(&self, ctx: &mut CTX, hasher: &mut StableHasher) { + self.len().hash_stable(ctx, hasher); + for kv in self { + kv.hash_stable(ctx, hasher); + } + } +} + +impl HashStable for indexmap::IndexSet +where + K: HashStable + Eq + Hash, + R: BuildHasher, +{ + #[inline] + fn hash_stable(&self, ctx: &mut CTX, hasher: &mut StableHasher) { + self.len().hash_stable(ctx, hasher); + for key in self { + key.hash_stable(ctx, hasher); + } + } +} + +impl HashStable for SmallVec<[A; 1]> +where + A: HashStable, +{ + #[inline] + fn hash_stable(&self, ctx: &mut CTX, hasher: &mut StableHasher) { + (&self[..]).hash_stable(ctx, hasher); + } +} + +impl, CTX> HashStable for Box { + #[inline] + fn hash_stable(&self, ctx: &mut CTX, hasher: &mut StableHasher) { + (**self).hash_stable(ctx, hasher); + } +} + +impl, CTX> HashStable for ::std::rc::Rc { + #[inline] + fn hash_stable(&self, ctx: &mut CTX, hasher: &mut StableHasher) { + (**self).hash_stable(ctx, hasher); + } +} + +impl, CTX> HashStable for ::std::sync::Arc { + #[inline] + fn hash_stable(&self, ctx: &mut CTX, hasher: &mut StableHasher) { + (**self).hash_stable(ctx, hasher); + } +} + +impl HashStable for str { + #[inline] + fn hash_stable(&self, ctx: &mut CTX, hasher: &mut StableHasher) { + self.as_bytes().hash_stable(ctx, hasher); + } +} + +impl HashStable for String { + #[inline] + fn hash_stable(&self, hcx: &mut CTX, hasher: &mut StableHasher) { + (&self[..]).hash_stable(hcx, hasher); + } +} + +impl ToStableHashKey for String { + type KeyType = String; + #[inline] + fn to_stable_hash_key(&self, _: &HCX) -> Self::KeyType { + self.clone() + } +} + +impl HashStable for bool { + #[inline] + fn hash_stable(&self, ctx: &mut CTX, hasher: &mut StableHasher) { + (if *self { 1u8 } else { 0u8 }).hash_stable(ctx, hasher); + } +} + +impl HashStable for Option +where + T: HashStable, +{ + #[inline] + fn hash_stable(&self, ctx: &mut CTX, hasher: &mut StableHasher) { + if let Some(ref value) = *self { + 1u8.hash_stable(ctx, hasher); + value.hash_stable(ctx, hasher); + } else { + 0u8.hash_stable(ctx, hasher); + } + } +} + +impl HashStable for Result +where + T1: HashStable, + T2: HashStable, +{ + #[inline] + fn hash_stable(&self, ctx: &mut CTX, hasher: &mut StableHasher) { + mem::discriminant(self).hash_stable(ctx, hasher); + match *self { + Ok(ref x) => x.hash_stable(ctx, hasher), + Err(ref x) => x.hash_stable(ctx, hasher), + } + } +} + +impl<'a, T, CTX> HashStable for &'a T +where + T: HashStable + ?Sized, +{ + #[inline] + fn hash_stable(&self, ctx: &mut CTX, hasher: &mut StableHasher) { + (**self).hash_stable(ctx, hasher); + } +} + +impl HashStable for ::std::mem::Discriminant { + #[inline] + fn hash_stable(&self, _: &mut CTX, hasher: &mut StableHasher) { + ::std::hash::Hash::hash(self, hasher); + } +} + +impl HashStable for ::std::ops::RangeInclusive +where + T: HashStable, +{ + #[inline] + fn hash_stable(&self, ctx: &mut CTX, hasher: &mut StableHasher) { + self.start().hash_stable(ctx, hasher); + self.end().hash_stable(ctx, hasher); + } +} + +impl HashStable for vec::IndexVec +where + T: HashStable, +{ + fn hash_stable(&self, ctx: &mut CTX, hasher: &mut StableHasher) { + self.len().hash_stable(ctx, hasher); + for v in &self.raw { + v.hash_stable(ctx, hasher); + } + } +} + +impl HashStable for bit_set::BitSet { + fn hash_stable(&self, _ctx: &mut CTX, hasher: &mut StableHasher) { + ::std::hash::Hash::hash(self, hasher); + } +} + +impl HashStable for bit_set::BitMatrix { + fn hash_stable(&self, _ctx: &mut CTX, hasher: &mut StableHasher) { + ::std::hash::Hash::hash(self, hasher); + } +} + +impl HashStable for bit_set::FiniteBitSet +where + T: HashStable + bit_set::FiniteBitSetTy, +{ + fn hash_stable(&self, hcx: &mut CTX, hasher: &mut StableHasher) { + self.0.hash_stable(hcx, hasher); + } +} + +impl_stable_hash_via_hash!(::std::path::Path); +impl_stable_hash_via_hash!(::std::path::PathBuf); + +impl HashStable for ::std::collections::HashMap +where + K: ToStableHashKey + Eq, + V: HashStable, + R: BuildHasher, +{ + #[inline] + fn hash_stable(&self, hcx: &mut HCX, hasher: &mut StableHasher) { + stable_hash_reduce(hcx, hasher, self.iter(), self.len(), |hasher, hcx, (key, value)| { + let key = key.to_stable_hash_key(hcx); + key.hash_stable(hcx, hasher); + value.hash_stable(hcx, hasher); + }); + } +} + +impl HashStable for ::std::collections::HashSet +where + K: ToStableHashKey + Eq, + R: BuildHasher, +{ + fn hash_stable(&self, hcx: &mut HCX, hasher: &mut StableHasher) { + stable_hash_reduce(hcx, hasher, self.iter(), self.len(), |hasher, hcx, key| { + let key = key.to_stable_hash_key(hcx); + key.hash_stable(hcx, hasher); + }); + } +} + +impl HashStable for ::std::collections::BTreeMap +where + K: ToStableHashKey, + V: HashStable, +{ + fn hash_stable(&self, hcx: &mut HCX, hasher: &mut StableHasher) { + stable_hash_reduce(hcx, hasher, self.iter(), self.len(), |hasher, hcx, (key, value)| { + let key = key.to_stable_hash_key(hcx); + key.hash_stable(hcx, hasher); + value.hash_stable(hcx, hasher); + }); + } +} + +impl HashStable for ::std::collections::BTreeSet +where + K: ToStableHashKey, +{ + fn hash_stable(&self, hcx: &mut HCX, hasher: &mut StableHasher) { + stable_hash_reduce(hcx, hasher, self.iter(), self.len(), |hasher, hcx, key| { + let key = key.to_stable_hash_key(hcx); + key.hash_stable(hcx, hasher); + }); + } +} + +fn stable_hash_reduce( + hcx: &mut HCX, + hasher: &mut StableHasher, + mut collection: C, + length: usize, + hash_function: F, +) where + C: Iterator, + F: Fn(&mut StableHasher, &mut HCX, I), +{ + length.hash_stable(hcx, hasher); + + match length { + 1 => { + hash_function(hasher, hcx, collection.next().unwrap()); + } + _ => { + let hash = collection + .map(|value| { + let mut hasher = StableHasher::new(); + hash_function(&mut hasher, hcx, value); + hasher.finish::() + }) + .reduce(|accum, value| accum.wrapping_add(value)); + hash.hash_stable(hcx, hasher); + } + } +} + +/// Controls what data we do or do not hash. +/// Whenever a `HashStable` implementation caches its +/// result, it needs to include `HashingControls` as part +/// of the key, to ensure that it does not produce an incorrect +/// result (for example, using a `Fingerprint` produced while +/// hashing `Span`s when a `Fingerprint` without `Span`s is +/// being requested) +#[derive(Clone, Hash, Eq, PartialEq, Debug)] +pub struct HashingControls { + pub hash_spans: bool, +} diff --git a/compiler/rustc_data_structures/src/stable_hasher/tests.rs b/compiler/rustc_data_structures/src/stable_hasher/tests.rs new file mode 100644 index 000000000..b0d66c32a --- /dev/null +++ b/compiler/rustc_data_structures/src/stable_hasher/tests.rs @@ -0,0 +1,163 @@ +use super::*; + +// The tests below compare the computed hashes to particular expected values +// in order to test that we produce the same results on different platforms, +// regardless of endianness and `usize` and `isize` size differences (this +// of course assumes we run these tests on platforms that differ in those +// ways). The expected values depend on the hashing algorithm used, so they +// need to be updated whenever StableHasher changes its hashing algorithm. + +#[test] +fn test_hash_integers() { + // Test that integers are handled consistently across platforms. + let test_u8 = 0xAB_u8; + let test_u16 = 0xFFEE_u16; + let test_u32 = 0x445577AA_u32; + let test_u64 = 0x01234567_13243546_u64; + let test_u128 = 0x22114433_66557788_99AACCBB_EEDDFF77_u128; + let test_usize = 0xD0C0B0A0_usize; + + let test_i8 = -100_i8; + let test_i16 = -200_i16; + let test_i32 = -300_i32; + let test_i64 = -400_i64; + let test_i128 = -500_i128; + let test_isize = -600_isize; + + let mut h = StableHasher::new(); + test_u8.hash(&mut h); + test_u16.hash(&mut h); + test_u32.hash(&mut h); + test_u64.hash(&mut h); + test_u128.hash(&mut h); + test_usize.hash(&mut h); + test_i8.hash(&mut h); + test_i16.hash(&mut h); + test_i32.hash(&mut h); + test_i64.hash(&mut h); + test_i128.hash(&mut h); + test_isize.hash(&mut h); + + // This depends on the hashing algorithm. See note at top of file. + let expected = (1784307454142909076, 11471672289340283879); + + assert_eq!(h.finalize(), expected); +} + +#[test] +fn test_hash_usize() { + // Test that usize specifically is handled consistently across platforms. + let test_usize = 0xABCDEF01_usize; + + let mut h = StableHasher::new(); + test_usize.hash(&mut h); + + // This depends on the hashing algorithm. See note at top of file. + let expected = (5798740672699530587, 11186240177685111648); + + assert_eq!(h.finalize(), expected); +} + +#[test] +fn test_hash_isize() { + // Test that isize specifically is handled consistently across platforms. + let test_isize = -7_isize; + + let mut h = StableHasher::new(); + test_isize.hash(&mut h); + + // This depends on the hashing algorithm. See note at top of file. + let expected = (2789913510339652884, 674280939192711005); + + assert_eq!(h.finalize(), expected); +} + +fn hash>(t: &T) -> u128 { + let mut h = StableHasher::new(); + let ctx = &mut (); + t.hash_stable(ctx, &mut h); + h.finish() +} + +// Check that bit set hash includes the domain size. +#[test] +fn test_hash_bit_set() { + use rustc_index::bit_set::BitSet; + let a: BitSet = BitSet::new_empty(1); + let b: BitSet = BitSet::new_empty(2); + assert_ne!(a, b); + assert_ne!(hash(&a), hash(&b)); +} + +// Check that bit matrix hash includes the matrix dimensions. +#[test] +fn test_hash_bit_matrix() { + use rustc_index::bit_set::BitMatrix; + let a: BitMatrix = BitMatrix::new(1, 1); + let b: BitMatrix = BitMatrix::new(1, 2); + assert_ne!(a, b); + assert_ne!(hash(&a), hash(&b)); +} + +// Check that exchanging the value of two adjacent fields changes the hash. +#[test] +fn test_attribute_permutation() { + macro_rules! test_type { + ($ty: ty) => {{ + struct Foo { + a: $ty, + b: $ty, + } + + impl HashStable for Foo { + fn hash_stable(&self, hcx: &mut CTX, hasher: &mut StableHasher) { + self.a.hash_stable(hcx, hasher); + self.b.hash_stable(hcx, hasher); + } + } + + #[allow(overflowing_literals)] + let mut item = Foo { a: 0xFF, b: 0xFF_FF }; + let hash_a = hash(&item); + std::mem::swap(&mut item.a, &mut item.b); + let hash_b = hash(&item); + assert_ne!( + hash_a, + hash_b, + "The hash stayed the same after values were swapped for type `{}`!", + stringify!($ty) + ); + }}; + } + + test_type!(u16); + test_type!(u32); + test_type!(u64); + test_type!(u128); + + test_type!(i16); + test_type!(i32); + test_type!(i64); + test_type!(i128); +} + +// Check that the `isize` hashing optimization does not produce the same hash when permuting two +// values. +#[test] +fn test_isize_compression() { + fn check_hash(a: u64, b: u64) { + let hash_a = hash(&(a as isize, b as isize)); + let hash_b = hash(&(b as isize, a as isize)); + assert_ne!( + hash_a, hash_b, + "The hash stayed the same when permuting values `{a}` and `{b}!", + ); + } + + check_hash(0xAA, 0xAAAA); + check_hash(0xFF, 0xFFFF); + check_hash(0xAAAA, 0xAAAAAA); + check_hash(0xAAAAAA, 0xAAAAAAAA); + check_hash(0xFF, 0xFFFFFFFFFFFFFFFF); + check_hash(u64::MAX /* -1 */, 1); +} diff --git a/compiler/rustc_data_structures/src/stack.rs b/compiler/rustc_data_structures/src/stack.rs new file mode 100644 index 000000000..3bdd67512 --- /dev/null +++ b/compiler/rustc_data_structures/src/stack.rs @@ -0,0 +1,18 @@ +// This is the amount of bytes that need to be left on the stack before increasing the size. +// It must be at least as large as the stack required by any code that does not call +// `ensure_sufficient_stack`. +const RED_ZONE: usize = 100 * 1024; // 100k + +// Only the first stack that is pushed, grows exponentially (2^n * STACK_PER_RECURSION) from then +// on. This flag has performance relevant characteristics. Don't set it too high. +const STACK_PER_RECURSION: usize = 1 * 1024 * 1024; // 1MB + +/// Grows the stack on demand to prevent stack overflow. Call this in strategic locations +/// to "break up" recursive calls. E.g. almost any call to `visit_expr` or equivalent can benefit +/// from this. +/// +/// Should not be sprinkled around carelessly, as it causes a little bit of overhead. +#[inline] +pub fn ensure_sufficient_stack(f: impl FnOnce() -> R) -> R { + stacker::maybe_grow(RED_ZONE, STACK_PER_RECURSION, f) +} diff --git a/compiler/rustc_data_structures/src/steal.rs b/compiler/rustc_data_structures/src/steal.rs new file mode 100644 index 000000000..a3ece6550 --- /dev/null +++ b/compiler/rustc_data_structures/src/steal.rs @@ -0,0 +1,55 @@ +use crate::stable_hasher::{HashStable, StableHasher}; +use crate::sync::{MappedReadGuard, ReadGuard, RwLock}; + +/// The `Steal` struct is intended to used as the value for a query. +/// Specifically, we sometimes have queries (*cough* MIR *cough*) +/// where we create a large, complex value that we want to iteratively +/// update (e.g., optimize). We could clone the value for each +/// optimization, but that'd be expensive. And yet we don't just want +/// to mutate it in place, because that would spoil the idea that +/// queries are these pure functions that produce an immutable value +/// (since if you did the query twice, you could observe the mutations). +/// So instead we have the query produce a `&'tcx Steal>` +/// (to be very specific). Now we can read from this +/// as much as we want (using `borrow()`), but you can also +/// `steal()`. Once you steal, any further attempt to read will panic. +/// Therefore, we know that -- assuming no ICE -- nobody is observing +/// the fact that the MIR was updated. +/// +/// Obviously, whenever you have a query that yields a `Steal` value, +/// you must treat it with caution, and make sure that you know that +/// -- once the value is stolen -- it will never be read from again. +// +// FIXME(#41710): what is the best way to model linear queries? +#[derive(Debug)] +pub struct Steal { + value: RwLock>, +} + +impl Steal { + pub fn new(value: T) -> Self { + Steal { value: RwLock::new(Some(value)) } + } + + #[track_caller] + pub fn borrow(&self) -> MappedReadGuard<'_, T> { + let borrow = self.value.borrow(); + if borrow.is_none() { + panic!("attempted to read from stolen value: {}", std::any::type_name::()); + } + ReadGuard::map(borrow, |opt| opt.as_ref().unwrap()) + } + + #[track_caller] + pub fn steal(&self) -> T { + let value_ref = &mut *self.value.try_write().expect("stealing value which is locked"); + let value = value_ref.take(); + value.expect("attempt to steal from stolen value") + } +} + +impl> HashStable for Steal { + fn hash_stable(&self, hcx: &mut CTX, hasher: &mut StableHasher) { + self.borrow().hash_stable(hcx, hasher); + } +} diff --git a/compiler/rustc_data_structures/src/svh.rs b/compiler/rustc_data_structures/src/svh.rs new file mode 100644 index 000000000..61654b9e8 --- /dev/null +++ b/compiler/rustc_data_structures/src/svh.rs @@ -0,0 +1,69 @@ +//! Calculation and management of a Strict Version Hash for crates +//! +//! The SVH is used for incremental compilation to track when HIR +//! nodes have changed between compilations, and also to detect +//! mismatches where we have two versions of the same crate that were +//! compiled from distinct sources. + +use rustc_serialize::{Decodable, Decoder, Encodable, Encoder}; +use std::fmt; +use std::hash::{Hash, Hasher}; + +use crate::stable_hasher; + +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub struct Svh { + hash: u64, +} + +impl Svh { + /// Creates a new `Svh` given the hash. If you actually want to + /// compute the SVH from some HIR, you want the `calculate_svh` + /// function found in `rustc_incremental`. + pub fn new(hash: u64) -> Svh { + Svh { hash } + } + + pub fn as_u64(&self) -> u64 { + self.hash + } + + pub fn to_string(&self) -> String { + format!("{:016x}", self.hash) + } +} + +impl Hash for Svh { + fn hash(&self, state: &mut H) + where + H: Hasher, + { + self.hash.to_le().hash(state); + } +} + +impl fmt::Display for Svh { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.pad(&self.to_string()) + } +} + +impl Encodable for Svh { + fn encode(&self, s: &mut S) { + s.emit_u64(self.as_u64().to_le()); + } +} + +impl Decodable for Svh { + fn decode(d: &mut D) -> Svh { + Svh::new(u64::from_le(d.read_u64())) + } +} + +impl stable_hasher::HashStable for Svh { + #[inline] + fn hash_stable(&self, ctx: &mut T, hasher: &mut stable_hasher::StableHasher) { + let Svh { hash } = *self; + hash.hash_stable(ctx, hasher); + } +} diff --git a/compiler/rustc_data_structures/src/sync.rs b/compiler/rustc_data_structures/src/sync.rs new file mode 100644 index 000000000..52952a793 --- /dev/null +++ b/compiler/rustc_data_structures/src/sync.rs @@ -0,0 +1,630 @@ +//! This module defines types which are thread safe if cfg!(parallel_compiler) is true. +//! +//! `Lrc` is an alias of `Arc` if cfg!(parallel_compiler) is true, `Rc` otherwise. +//! +//! `Lock` is a mutex. +//! It internally uses `parking_lot::Mutex` if cfg!(parallel_compiler) is true, +//! `RefCell` otherwise. +//! +//! `RwLock` is a read-write lock. +//! It internally uses `parking_lot::RwLock` if cfg!(parallel_compiler) is true, +//! `RefCell` otherwise. +//! +//! `MTLock` is a mutex which disappears if cfg!(parallel_compiler) is false. +//! +//! `MTRef` is an immutable reference if cfg!(parallel_compiler), and a mutable reference otherwise. +//! +//! `rustc_erase_owner!` erases an OwningRef owner into Erased or Erased + Send + Sync +//! depending on the value of cfg!(parallel_compiler). + +use crate::owning_ref::{Erased, OwningRef}; +use std::collections::HashMap; +use std::hash::{BuildHasher, Hash}; +use std::ops::{Deref, DerefMut}; +use std::panic::{catch_unwind, resume_unwind, AssertUnwindSafe}; + +pub use std::sync::atomic::Ordering; +pub use std::sync::atomic::Ordering::SeqCst; + +cfg_if! { + if #[cfg(not(parallel_compiler))] { + pub auto trait Send {} + pub auto trait Sync {} + + impl Send for T {} + impl Sync for T {} + + #[macro_export] + macro_rules! rustc_erase_owner { + ($v:expr) => { + $v.erase_owner() + } + } + + use std::ops::Add; + + /// This is a single threaded variant of `AtomicU64`, `AtomicUsize`, etc. + /// It has explicit ordering arguments and is only intended for use with + /// the native atomic types. + /// You should use this type through the `AtomicU64`, `AtomicUsize`, etc, type aliases + /// as it's not intended to be used separately. + #[derive(Debug)] + pub struct Atomic(Cell); + + impl Atomic { + #[inline] + pub fn new(v: T) -> Self { + Atomic(Cell::new(v)) + } + } + + impl Atomic { + #[inline] + pub fn into_inner(self) -> T { + self.0.into_inner() + } + + #[inline] + pub fn load(&self, _: Ordering) -> T { + self.0.get() + } + + #[inline] + pub fn store(&self, val: T, _: Ordering) { + self.0.set(val) + } + + #[inline] + pub fn swap(&self, val: T, _: Ordering) -> T { + self.0.replace(val) + } + } + + impl Atomic { + #[inline] + pub fn compare_exchange(&self, + current: T, + new: T, + _: Ordering, + _: Ordering) + -> Result { + let read = self.0.get(); + if read == current { + self.0.set(new); + Ok(read) + } else { + Err(read) + } + } + } + + impl + Copy> Atomic { + #[inline] + pub fn fetch_add(&self, val: T, _: Ordering) -> T { + let old = self.0.get(); + self.0.set(old + val); + old + } + } + + pub type AtomicUsize = Atomic; + pub type AtomicBool = Atomic; + pub type AtomicU32 = Atomic; + pub type AtomicU64 = Atomic; + + pub fn join(oper_a: A, oper_b: B) -> (RA, RB) + where A: FnOnce() -> RA, + B: FnOnce() -> RB + { + (oper_a(), oper_b()) + } + + #[macro_export] + macro_rules! parallel { + ($($blocks:tt),*) => { + // We catch panics here ensuring that all the blocks execute. + // This makes behavior consistent with the parallel compiler. + let mut panic = None; + $( + if let Err(p) = ::std::panic::catch_unwind( + ::std::panic::AssertUnwindSafe(|| $blocks) + ) { + if panic.is_none() { + panic = Some(p); + } + } + )* + if let Some(panic) = panic { + ::std::panic::resume_unwind(panic); + } + } + } + + pub use std::iter::Iterator as ParallelIterator; + + pub fn par_iter(t: T) -> T::IntoIter { + t.into_iter() + } + + pub fn par_for_each_in(t: T, mut for_each: impl FnMut(T::Item) + Sync + Send) { + // We catch panics here ensuring that all the loop iterations execute. + // This makes behavior consistent with the parallel compiler. + let mut panic = None; + t.into_iter().for_each(|i| { + if let Err(p) = catch_unwind(AssertUnwindSafe(|| for_each(i))) { + if panic.is_none() { + panic = Some(p); + } + } + }); + if let Some(panic) = panic { + resume_unwind(panic); + } + } + + pub type MetadataRef = OwningRef, [u8]>; + + pub use std::rc::Rc as Lrc; + pub use std::rc::Weak as Weak; + pub use std::cell::Ref as ReadGuard; + pub use std::cell::Ref as MappedReadGuard; + pub use std::cell::RefMut as WriteGuard; + pub use std::cell::RefMut as MappedWriteGuard; + pub use std::cell::RefMut as LockGuard; + pub use std::cell::RefMut as MappedLockGuard; + + pub use std::cell::OnceCell; + + use std::cell::RefCell as InnerRwLock; + use std::cell::RefCell as InnerLock; + + use std::cell::Cell; + + #[derive(Debug)] + pub struct WorkerLocal(OneThread); + + impl WorkerLocal { + /// Creates a new worker local where the `initial` closure computes the + /// value this worker local should take for each thread in the thread pool. + #[inline] + pub fn new T>(mut f: F) -> WorkerLocal { + WorkerLocal(OneThread::new(f(0))) + } + + /// Returns the worker-local value for each thread + #[inline] + pub fn into_inner(self) -> Vec { + vec![OneThread::into_inner(self.0)] + } + } + + impl Deref for WorkerLocal { + type Target = T; + + #[inline(always)] + fn deref(&self) -> &T { + &*self.0 + } + } + + pub type MTRef<'a, T> = &'a mut T; + + #[derive(Debug, Default)] + pub struct MTLock(T); + + impl MTLock { + #[inline(always)] + pub fn new(inner: T) -> Self { + MTLock(inner) + } + + #[inline(always)] + pub fn into_inner(self) -> T { + self.0 + } + + #[inline(always)] + pub fn get_mut(&mut self) -> &mut T { + &mut self.0 + } + + #[inline(always)] + pub fn lock(&self) -> &T { + &self.0 + } + + #[inline(always)] + pub fn lock_mut(&mut self) -> &mut T { + &mut self.0 + } + } + + // FIXME: Probably a bad idea (in the threaded case) + impl Clone for MTLock { + #[inline] + fn clone(&self) -> Self { + MTLock(self.0.clone()) + } + } + } else { + pub use std::marker::Send as Send; + pub use std::marker::Sync as Sync; + + pub use parking_lot::RwLockReadGuard as ReadGuard; + pub use parking_lot::MappedRwLockReadGuard as MappedReadGuard; + pub use parking_lot::RwLockWriteGuard as WriteGuard; + pub use parking_lot::MappedRwLockWriteGuard as MappedWriteGuard; + + pub use parking_lot::MutexGuard as LockGuard; + pub use parking_lot::MappedMutexGuard as MappedLockGuard; + + pub use std::sync::OnceLock as OnceCell; + + pub use std::sync::atomic::{AtomicBool, AtomicUsize, AtomicU32, AtomicU64}; + + pub use std::sync::Arc as Lrc; + pub use std::sync::Weak as Weak; + + pub type MTRef<'a, T> = &'a T; + + #[derive(Debug, Default)] + pub struct MTLock(Lock); + + impl MTLock { + #[inline(always)] + pub fn new(inner: T) -> Self { + MTLock(Lock::new(inner)) + } + + #[inline(always)] + pub fn into_inner(self) -> T { + self.0.into_inner() + } + + #[inline(always)] + pub fn get_mut(&mut self) -> &mut T { + self.0.get_mut() + } + + #[inline(always)] + pub fn lock(&self) -> LockGuard<'_, T> { + self.0.lock() + } + + #[inline(always)] + pub fn lock_mut(&self) -> LockGuard<'_, T> { + self.lock() + } + } + + use parking_lot::Mutex as InnerLock; + use parking_lot::RwLock as InnerRwLock; + + use std::thread; + pub use rayon::{join, scope}; + + /// Runs a list of blocks in parallel. The first block is executed immediately on + /// the current thread. Use that for the longest running block. + #[macro_export] + macro_rules! parallel { + (impl $fblock:tt [$($c:tt,)*] [$block:tt $(, $rest:tt)*]) => { + parallel!(impl $fblock [$block, $($c,)*] [$($rest),*]) + }; + (impl $fblock:tt [$($blocks:tt,)*] []) => { + ::rustc_data_structures::sync::scope(|s| { + $( + s.spawn(|_| $blocks); + )* + $fblock; + }) + }; + ($fblock:tt, $($blocks:tt),*) => { + // Reverse the order of the later blocks since Rayon executes them in reverse order + // when using a single thread. This ensures the execution order matches that + // of a single threaded rustc + parallel!(impl $fblock [] [$($blocks),*]); + }; + } + + pub use rayon_core::WorkerLocal; + + pub use rayon::iter::ParallelIterator; + use rayon::iter::IntoParallelIterator; + + pub fn par_iter(t: T) -> T::Iter { + t.into_par_iter() + } + + pub fn par_for_each_in( + t: T, + for_each: impl Fn(T::Item) + Sync + Send, + ) { + let ps: Vec<_> = t.into_par_iter().map(|i| catch_unwind(AssertUnwindSafe(|| for_each(i)))).collect(); + ps.into_iter().for_each(|p| if let Err(panic) = p { + resume_unwind(panic) + }); + } + + pub type MetadataRef = OwningRef, [u8]>; + + /// This makes locks panic if they are already held. + /// It is only useful when you are running in a single thread + const ERROR_CHECKING: bool = false; + + #[macro_export] + macro_rules! rustc_erase_owner { + ($v:expr) => {{ + let v = $v; + ::rustc_data_structures::sync::assert_send_val(&v); + v.erase_send_sync_owner() + }} + } + } +} + +pub fn assert_sync() {} +pub fn assert_send() {} +pub fn assert_send_val(_t: &T) {} +pub fn assert_send_sync_val(_t: &T) {} + +pub trait HashMapExt { + /// Same as HashMap::insert, but it may panic if there's already an + /// entry for `key` with a value not equal to `value` + fn insert_same(&mut self, key: K, value: V); +} + +impl HashMapExt for HashMap { + fn insert_same(&mut self, key: K, value: V) { + self.entry(key).and_modify(|old| assert!(*old == value)).or_insert(value); + } +} + +#[derive(Debug)] +pub struct Lock(InnerLock); + +impl Lock { + #[inline(always)] + pub fn new(inner: T) -> Self { + Lock(InnerLock::new(inner)) + } + + #[inline(always)] + pub fn into_inner(self) -> T { + self.0.into_inner() + } + + #[inline(always)] + pub fn get_mut(&mut self) -> &mut T { + self.0.get_mut() + } + + #[cfg(parallel_compiler)] + #[inline(always)] + pub fn try_lock(&self) -> Option> { + self.0.try_lock() + } + + #[cfg(not(parallel_compiler))] + #[inline(always)] + pub fn try_lock(&self) -> Option> { + self.0.try_borrow_mut().ok() + } + + #[cfg(parallel_compiler)] + #[inline(always)] + pub fn lock(&self) -> LockGuard<'_, T> { + if ERROR_CHECKING { + self.0.try_lock().expect("lock was already held") + } else { + self.0.lock() + } + } + + #[cfg(not(parallel_compiler))] + #[inline(always)] + pub fn lock(&self) -> LockGuard<'_, T> { + self.0.borrow_mut() + } + + #[inline(always)] + pub fn with_lock R, R>(&self, f: F) -> R { + f(&mut *self.lock()) + } + + #[inline(always)] + pub fn borrow(&self) -> LockGuard<'_, T> { + self.lock() + } + + #[inline(always)] + pub fn borrow_mut(&self) -> LockGuard<'_, T> { + self.lock() + } +} + +impl Default for Lock { + #[inline] + fn default() -> Self { + Lock::new(T::default()) + } +} + +// FIXME: Probably a bad idea +impl Clone for Lock { + #[inline] + fn clone(&self) -> Self { + Lock::new(self.borrow().clone()) + } +} + +#[derive(Debug, Default)] +pub struct RwLock(InnerRwLock); + +impl RwLock { + #[inline(always)] + pub fn new(inner: T) -> Self { + RwLock(InnerRwLock::new(inner)) + } + + #[inline(always)] + pub fn into_inner(self) -> T { + self.0.into_inner() + } + + #[inline(always)] + pub fn get_mut(&mut self) -> &mut T { + self.0.get_mut() + } + + #[cfg(not(parallel_compiler))] + #[inline(always)] + pub fn read(&self) -> ReadGuard<'_, T> { + self.0.borrow() + } + + #[cfg(parallel_compiler)] + #[inline(always)] + pub fn read(&self) -> ReadGuard<'_, T> { + if ERROR_CHECKING { + self.0.try_read().expect("lock was already held") + } else { + self.0.read() + } + } + + #[inline(always)] + pub fn with_read_lock R, R>(&self, f: F) -> R { + f(&*self.read()) + } + + #[cfg(not(parallel_compiler))] + #[inline(always)] + pub fn try_write(&self) -> Result, ()> { + self.0.try_borrow_mut().map_err(|_| ()) + } + + #[cfg(parallel_compiler)] + #[inline(always)] + pub fn try_write(&self) -> Result, ()> { + self.0.try_write().ok_or(()) + } + + #[cfg(not(parallel_compiler))] + #[inline(always)] + pub fn write(&self) -> WriteGuard<'_, T> { + self.0.borrow_mut() + } + + #[cfg(parallel_compiler)] + #[inline(always)] + pub fn write(&self) -> WriteGuard<'_, T> { + if ERROR_CHECKING { + self.0.try_write().expect("lock was already held") + } else { + self.0.write() + } + } + + #[inline(always)] + pub fn with_write_lock R, R>(&self, f: F) -> R { + f(&mut *self.write()) + } + + #[inline(always)] + pub fn borrow(&self) -> ReadGuard<'_, T> { + self.read() + } + + #[inline(always)] + pub fn borrow_mut(&self) -> WriteGuard<'_, T> { + self.write() + } + + #[cfg(not(parallel_compiler))] + #[inline(always)] + pub fn clone_guard<'a>(rg: &ReadGuard<'a, T>) -> ReadGuard<'a, T> { + ReadGuard::clone(rg) + } + + #[cfg(parallel_compiler)] + #[inline(always)] + pub fn clone_guard<'a>(rg: &ReadGuard<'a, T>) -> ReadGuard<'a, T> { + ReadGuard::rwlock(&rg).read() + } + + #[cfg(not(parallel_compiler))] + #[inline(always)] + pub fn leak(&self) -> &T { + ReadGuard::leak(self.read()) + } + + #[cfg(parallel_compiler)] + #[inline(always)] + pub fn leak(&self) -> &T { + let guard = self.read(); + let ret = unsafe { &*(&*guard as *const T) }; + std::mem::forget(guard); + ret + } +} + +// FIXME: Probably a bad idea +impl Clone for RwLock { + #[inline] + fn clone(&self) -> Self { + RwLock::new(self.borrow().clone()) + } +} + +/// A type which only allows its inner value to be used in one thread. +/// It will panic if it is used on multiple threads. +#[derive(Debug)] +pub struct OneThread { + #[cfg(parallel_compiler)] + thread: thread::ThreadId, + inner: T, +} + +#[cfg(parallel_compiler)] +unsafe impl std::marker::Sync for OneThread {} +#[cfg(parallel_compiler)] +unsafe impl std::marker::Send for OneThread {} + +impl OneThread { + #[inline(always)] + fn check(&self) { + #[cfg(parallel_compiler)] + assert_eq!(thread::current().id(), self.thread); + } + + #[inline(always)] + pub fn new(inner: T) -> Self { + OneThread { + #[cfg(parallel_compiler)] + thread: thread::current().id(), + inner, + } + } + + #[inline(always)] + pub fn into_inner(value: Self) -> T { + value.check(); + value.inner + } +} + +impl Deref for OneThread { + type Target = T; + + fn deref(&self) -> &T { + self.check(); + &self.inner + } +} + +impl DerefMut for OneThread { + fn deref_mut(&mut self) -> &mut T { + self.check(); + &mut self.inner + } +} diff --git a/compiler/rustc_data_structures/src/tagged_ptr.rs b/compiler/rustc_data_structures/src/tagged_ptr.rs new file mode 100644 index 000000000..651bc556c --- /dev/null +++ b/compiler/rustc_data_structures/src/tagged_ptr.rs @@ -0,0 +1,168 @@ +//! This module implements tagged pointers. +//! +//! In order to utilize the pointer packing, you must have two types: a pointer, +//! and a tag. +//! +//! The pointer must implement the `Pointer` trait, with the primary requirement +//! being conversion to and from a usize. Note that the pointer must be +//! dereferenceable, so raw pointers generally cannot implement the `Pointer` +//! trait. This implies that the pointer must also be nonzero. +//! +//! Many common pointer types already implement the `Pointer` trait. +//! +//! The tag must implement the `Tag` trait. We assert that the tag and `Pointer` +//! are compatible at compile time. + +use std::mem::ManuallyDrop; +use std::ops::Deref; +use std::rc::Rc; +use std::sync::Arc; + +mod copy; +mod drop; + +pub use copy::CopyTaggedPtr; +pub use drop::TaggedPtr; + +/// This describes the pointer type encapsulated by TaggedPtr. +/// +/// # Safety +/// +/// The usize returned from `into_usize` must be a valid, dereferenceable, +/// pointer to `::Target`. Note that pointers to `Pointee` must +/// be thin, even though `Pointee` may not be sized. +/// +/// Note that the returned pointer from `into_usize` should be castable to `&mut +/// ::Target` if `Pointer: DerefMut`. +/// +/// The BITS constant must be correct. At least `BITS` bits, least-significant, +/// must be zero on all returned pointers from `into_usize`. +/// +/// For example, if the alignment of `Pointee` is 2, then `BITS` should be 1. +pub unsafe trait Pointer: Deref { + /// Most likely the value you want to use here is the following, unless + /// your Pointee type is unsized (e.g., `ty::List` in rustc) in which + /// case you'll need to manually figure out what the right type to pass to + /// align_of is. + /// + /// ```ignore UNSOLVED (what to do about the Self) + /// # use std::ops::Deref; + /// std::mem::align_of::<::Target>().trailing_zeros() as usize; + /// ``` + const BITS: usize; + fn into_usize(self) -> usize; + + /// # Safety + /// + /// The passed `ptr` must be returned from `into_usize`. + /// + /// This acts as `ptr::read` semantically, it should not be called more than + /// once on non-`Copy` `Pointer`s. + unsafe fn from_usize(ptr: usize) -> Self; + + /// This provides a reference to the `Pointer` itself, rather than the + /// `Deref::Target`. It is used for cases where we want to call methods that + /// may be implement differently for the Pointer than the Pointee (e.g., + /// `Rc::clone` vs cloning the inner value). + /// + /// # Safety + /// + /// The passed `ptr` must be returned from `into_usize`. + unsafe fn with_ref R>(ptr: usize, f: F) -> R; +} + +/// This describes tags that the `TaggedPtr` struct can hold. +/// +/// # Safety +/// +/// The BITS constant must be correct. +/// +/// No more than `BITS` least significant bits may be set in the returned usize. +pub unsafe trait Tag: Copy { + const BITS: usize; + + fn into_usize(self) -> usize; + + /// # Safety + /// + /// The passed `tag` must be returned from `into_usize`. + unsafe fn from_usize(tag: usize) -> Self; +} + +unsafe impl Pointer for Box { + const BITS: usize = std::mem::align_of::().trailing_zeros() as usize; + #[inline] + fn into_usize(self) -> usize { + Box::into_raw(self) as usize + } + #[inline] + unsafe fn from_usize(ptr: usize) -> Self { + Box::from_raw(ptr as *mut T) + } + unsafe fn with_ref R>(ptr: usize, f: F) -> R { + let raw = ManuallyDrop::new(Self::from_usize(ptr)); + f(&raw) + } +} + +unsafe impl Pointer for Rc { + const BITS: usize = std::mem::align_of::().trailing_zeros() as usize; + #[inline] + fn into_usize(self) -> usize { + Rc::into_raw(self) as usize + } + #[inline] + unsafe fn from_usize(ptr: usize) -> Self { + Rc::from_raw(ptr as *const T) + } + unsafe fn with_ref R>(ptr: usize, f: F) -> R { + let raw = ManuallyDrop::new(Self::from_usize(ptr)); + f(&raw) + } +} + +unsafe impl Pointer for Arc { + const BITS: usize = std::mem::align_of::().trailing_zeros() as usize; + #[inline] + fn into_usize(self) -> usize { + Arc::into_raw(self) as usize + } + #[inline] + unsafe fn from_usize(ptr: usize) -> Self { + Arc::from_raw(ptr as *const T) + } + unsafe fn with_ref R>(ptr: usize, f: F) -> R { + let raw = ManuallyDrop::new(Self::from_usize(ptr)); + f(&raw) + } +} + +unsafe impl<'a, T: 'a> Pointer for &'a T { + const BITS: usize = std::mem::align_of::().trailing_zeros() as usize; + #[inline] + fn into_usize(self) -> usize { + self as *const T as usize + } + #[inline] + unsafe fn from_usize(ptr: usize) -> Self { + &*(ptr as *const T) + } + unsafe fn with_ref R>(ptr: usize, f: F) -> R { + f(&*(&ptr as *const usize as *const Self)) + } +} + +unsafe impl<'a, T: 'a> Pointer for &'a mut T { + const BITS: usize = std::mem::align_of::().trailing_zeros() as usize; + #[inline] + fn into_usize(self) -> usize { + self as *mut T as usize + } + #[inline] + unsafe fn from_usize(ptr: usize) -> Self { + &mut *(ptr as *mut T) + } + unsafe fn with_ref R>(ptr: usize, f: F) -> R { + f(&*(&ptr as *const usize as *const Self)) + } +} diff --git a/compiler/rustc_data_structures/src/tagged_ptr/copy.rs b/compiler/rustc_data_structures/src/tagged_ptr/copy.rs new file mode 100644 index 000000000..e1d3e0bd3 --- /dev/null +++ b/compiler/rustc_data_structures/src/tagged_ptr/copy.rs @@ -0,0 +1,185 @@ +use super::{Pointer, Tag}; +use crate::stable_hasher::{HashStable, StableHasher}; +use std::fmt; +use std::marker::PhantomData; +use std::num::NonZeroUsize; + +/// A `Copy` TaggedPtr. +/// +/// You should use this instead of the `TaggedPtr` type in all cases where +/// `P: Copy`. +/// +/// If `COMPARE_PACKED` is true, then the pointers will be compared and hashed without +/// unpacking. Otherwise we don't implement PartialEq/Eq/Hash; if you want that, +/// wrap the TaggedPtr. +pub struct CopyTaggedPtr +where + P: Pointer, + T: Tag, +{ + packed: NonZeroUsize, + data: PhantomData<(P, T)>, +} + +impl Copy for CopyTaggedPtr +where + P: Pointer, + T: Tag, + P: Copy, +{ +} + +impl Clone for CopyTaggedPtr +where + P: Pointer, + T: Tag, + P: Copy, +{ + fn clone(&self) -> Self { + *self + } +} + +// We pack the tag into the *upper* bits of the pointer to ease retrieval of the +// value; a left shift is a multiplication and those are embeddable in +// instruction encoding. +impl CopyTaggedPtr +where + P: Pointer, + T: Tag, +{ + const TAG_BIT_SHIFT: usize = usize::BITS as usize - T::BITS; + const ASSERTION: () = { + assert!(T::BITS <= P::BITS); + // Used for the transmute_copy's below + assert!(std::mem::size_of::<&P::Target>() == std::mem::size_of::()); + }; + + pub fn new(pointer: P, tag: T) -> Self { + // Trigger assert! + let () = Self::ASSERTION; + let packed_tag = tag.into_usize() << Self::TAG_BIT_SHIFT; + + Self { + // SAFETY: We know that the pointer is non-null, as it must be + // dereferenceable per `Pointer` safety contract. + packed: unsafe { + NonZeroUsize::new_unchecked((P::into_usize(pointer) >> T::BITS) | packed_tag) + }, + data: PhantomData, + } + } + + pub(super) fn pointer_raw(&self) -> usize { + self.packed.get() << T::BITS + } + pub fn pointer(self) -> P + where + P: Copy, + { + // SAFETY: pointer_raw returns the original pointer + // + // Note that this isn't going to double-drop or anything because we have + // P: Copy + unsafe { P::from_usize(self.pointer_raw()) } + } + pub fn pointer_ref(&self) -> &P::Target { + // SAFETY: pointer_raw returns the original pointer + unsafe { std::mem::transmute_copy(&self.pointer_raw()) } + } + pub fn pointer_mut(&mut self) -> &mut P::Target + where + P: std::ops::DerefMut, + { + // SAFETY: pointer_raw returns the original pointer + unsafe { std::mem::transmute_copy(&self.pointer_raw()) } + } + #[inline] + pub fn tag(&self) -> T { + unsafe { T::from_usize(self.packed.get() >> Self::TAG_BIT_SHIFT) } + } + #[inline] + pub fn set_tag(&mut self, tag: T) { + let mut packed = self.packed.get(); + let new_tag = T::into_usize(tag) << Self::TAG_BIT_SHIFT; + let tag_mask = (1 << T::BITS) - 1; + packed &= !(tag_mask << Self::TAG_BIT_SHIFT); + packed |= new_tag; + self.packed = unsafe { NonZeroUsize::new_unchecked(packed) }; + } +} + +impl std::ops::Deref for CopyTaggedPtr +where + P: Pointer, + T: Tag, +{ + type Target = P::Target; + fn deref(&self) -> &Self::Target { + self.pointer_ref() + } +} + +impl std::ops::DerefMut for CopyTaggedPtr +where + P: Pointer + std::ops::DerefMut, + T: Tag, +{ + fn deref_mut(&mut self) -> &mut Self::Target { + self.pointer_mut() + } +} + +impl fmt::Debug for CopyTaggedPtr +where + P: Pointer, + P::Target: fmt::Debug, + T: Tag + fmt::Debug, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("CopyTaggedPtr") + .field("pointer", &self.pointer_ref()) + .field("tag", &self.tag()) + .finish() + } +} + +impl PartialEq for CopyTaggedPtr +where + P: Pointer, + T: Tag, +{ + fn eq(&self, other: &Self) -> bool { + self.packed == other.packed + } +} + +impl Eq for CopyTaggedPtr +where + P: Pointer, + T: Tag, +{ +} + +impl std::hash::Hash for CopyTaggedPtr +where + P: Pointer, + T: Tag, +{ + fn hash(&self, state: &mut H) { + self.packed.hash(state); + } +} + +impl HashStable for CopyTaggedPtr +where + P: Pointer + HashStable, + T: Tag + HashStable, +{ + fn hash_stable(&self, hcx: &mut HCX, hasher: &mut StableHasher) { + unsafe { + Pointer::with_ref(self.pointer_raw(), |p: &P| p.hash_stable(hcx, hasher)); + } + self.tag().hash_stable(hcx, hasher); + } +} diff --git a/compiler/rustc_data_structures/src/tagged_ptr/drop.rs b/compiler/rustc_data_structures/src/tagged_ptr/drop.rs new file mode 100644 index 000000000..d44ccd368 --- /dev/null +++ b/compiler/rustc_data_structures/src/tagged_ptr/drop.rs @@ -0,0 +1,133 @@ +use super::{Pointer, Tag}; +use crate::stable_hasher::{HashStable, StableHasher}; +use std::fmt; + +use super::CopyTaggedPtr; + +/// A TaggedPtr implementing `Drop`. +/// +/// If `COMPARE_PACKED` is true, then the pointers will be compared and hashed without +/// unpacking. Otherwise we don't implement PartialEq/Eq/Hash; if you want that, +/// wrap the TaggedPtr. +pub struct TaggedPtr +where + P: Pointer, + T: Tag, +{ + raw: CopyTaggedPtr, +} + +impl Clone for TaggedPtr +where + P: Pointer + Clone, + T: Tag, +{ + fn clone(&self) -> Self { + unsafe { Self::new(P::with_ref(self.raw.pointer_raw(), |p| p.clone()), self.raw.tag()) } + } +} + +// We pack the tag into the *upper* bits of the pointer to ease retrieval of the +// value; a right shift is a multiplication and those are embeddable in +// instruction encoding. +impl TaggedPtr +where + P: Pointer, + T: Tag, +{ + pub fn new(pointer: P, tag: T) -> Self { + TaggedPtr { raw: CopyTaggedPtr::new(pointer, tag) } + } + + pub fn pointer_ref(&self) -> &P::Target { + self.raw.pointer_ref() + } + pub fn tag(&self) -> T { + self.raw.tag() + } +} + +impl std::ops::Deref for TaggedPtr +where + P: Pointer, + T: Tag, +{ + type Target = P::Target; + fn deref(&self) -> &Self::Target { + self.raw.pointer_ref() + } +} + +impl std::ops::DerefMut for TaggedPtr +where + P: Pointer + std::ops::DerefMut, + T: Tag, +{ + fn deref_mut(&mut self) -> &mut Self::Target { + self.raw.pointer_mut() + } +} + +impl Drop for TaggedPtr +where + P: Pointer, + T: Tag, +{ + fn drop(&mut self) { + // No need to drop the tag, as it's Copy + unsafe { + std::mem::drop(P::from_usize(self.raw.pointer_raw())); + } + } +} + +impl fmt::Debug for TaggedPtr +where + P: Pointer, + P::Target: fmt::Debug, + T: Tag + fmt::Debug, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("TaggedPtr") + .field("pointer", &self.pointer_ref()) + .field("tag", &self.tag()) + .finish() + } +} + +impl PartialEq for TaggedPtr +where + P: Pointer, + T: Tag, +{ + fn eq(&self, other: &Self) -> bool { + self.raw.eq(&other.raw) + } +} + +impl Eq for TaggedPtr +where + P: Pointer, + T: Tag, +{ +} + +impl std::hash::Hash for TaggedPtr +where + P: Pointer, + T: Tag, +{ + fn hash(&self, state: &mut H) { + self.raw.hash(state); + } +} + +impl HashStable for TaggedPtr +where + P: Pointer + HashStable, + T: Tag + HashStable, +{ + fn hash_stable(&self, hcx: &mut HCX, hasher: &mut StableHasher) { + self.raw.hash_stable(hcx, hasher); + } +} diff --git a/compiler/rustc_data_structures/src/temp_dir.rs b/compiler/rustc_data_structures/src/temp_dir.rs new file mode 100644 index 000000000..a780d2386 --- /dev/null +++ b/compiler/rustc_data_structures/src/temp_dir.rs @@ -0,0 +1,34 @@ +use std::mem::ManuallyDrop; +use std::path::Path; +use tempfile::TempDir; + +/// This is used to avoid TempDir being dropped on error paths unintentionally. +#[derive(Debug)] +pub struct MaybeTempDir { + dir: ManuallyDrop, + // Whether the TempDir should be deleted on drop. + keep: bool, +} + +impl Drop for MaybeTempDir { + fn drop(&mut self) { + // SAFETY: We are in the destructor, and no further access will + // occur. + let dir = unsafe { ManuallyDrop::take(&mut self.dir) }; + if self.keep { + dir.into_path(); + } + } +} + +impl AsRef for MaybeTempDir { + fn as_ref(&self) -> &Path { + self.dir.path() + } +} + +impl MaybeTempDir { + pub fn new(dir: TempDir, keep_on_drop: bool) -> MaybeTempDir { + MaybeTempDir { dir: ManuallyDrop::new(dir), keep: keep_on_drop } + } +} diff --git a/compiler/rustc_data_structures/src/thin_vec.rs b/compiler/rustc_data_structures/src/thin_vec.rs new file mode 100644 index 000000000..716259142 --- /dev/null +++ b/compiler/rustc_data_structures/src/thin_vec.rs @@ -0,0 +1,135 @@ +use crate::stable_hasher::{HashStable, StableHasher}; + +use std::iter::FromIterator; + +/// A vector type optimized for cases where this size is usually 0 (cf. `SmallVec`). +/// The `Option>` wrapping allows us to represent a zero sized vector with `None`, +/// which uses only a single (null) pointer. +#[derive(Clone, Encodable, Decodable, Debug, Hash, Eq, PartialEq)] +pub struct ThinVec(Option>>); + +impl ThinVec { + pub fn new() -> Self { + ThinVec(None) + } + + pub fn iter(&self) -> std::slice::Iter<'_, T> { + self.into_iter() + } + + pub fn iter_mut(&mut self) -> std::slice::IterMut<'_, T> { + self.into_iter() + } + + pub fn push(&mut self, item: T) { + match *self { + ThinVec(Some(ref mut vec)) => vec.push(item), + ThinVec(None) => *self = vec![item].into(), + } + } +} + +impl From> for ThinVec { + fn from(vec: Vec) -> Self { + if vec.is_empty() { ThinVec(None) } else { ThinVec(Some(Box::new(vec))) } + } +} + +impl Into> for ThinVec { + fn into(self) -> Vec { + match self { + ThinVec(None) => Vec::new(), + ThinVec(Some(vec)) => *vec, + } + } +} + +impl ::std::ops::Deref for ThinVec { + type Target = [T]; + fn deref(&self) -> &[T] { + match *self { + ThinVec(None) => &[], + ThinVec(Some(ref vec)) => vec, + } + } +} + +impl ::std::ops::DerefMut for ThinVec { + fn deref_mut(&mut self) -> &mut [T] { + match *self { + ThinVec(None) => &mut [], + ThinVec(Some(ref mut vec)) => vec, + } + } +} + +impl FromIterator for ThinVec { + fn from_iter>(iter: I) -> Self { + // `Vec::from_iter()` should not allocate if the iterator is empty. + let vec: Vec<_> = iter.into_iter().collect(); + if vec.is_empty() { ThinVec(None) } else { ThinVec(Some(Box::new(vec))) } + } +} + +impl IntoIterator for ThinVec { + type Item = T; + type IntoIter = std::vec::IntoIter; + + fn into_iter(self) -> Self::IntoIter { + // This is still performant because `Vec::new()` does not allocate. + self.0.map_or_else(Vec::new, |ptr| *ptr).into_iter() + } +} + +impl<'a, T> IntoIterator for &'a ThinVec { + type Item = &'a T; + type IntoIter = std::slice::Iter<'a, T>; + + fn into_iter(self) -> Self::IntoIter { + self.as_ref().iter() + } +} + +impl<'a, T> IntoIterator for &'a mut ThinVec { + type Item = &'a mut T; + type IntoIter = std::slice::IterMut<'a, T>; + + fn into_iter(self) -> Self::IntoIter { + self.as_mut().iter_mut() + } +} + +impl Extend for ThinVec { + fn extend>(&mut self, iter: I) { + match *self { + ThinVec(Some(ref mut vec)) => vec.extend(iter), + ThinVec(None) => *self = iter.into_iter().collect::>().into(), + } + } + + fn extend_one(&mut self, item: T) { + self.push(item) + } + + fn extend_reserve(&mut self, additional: usize) { + match *self { + ThinVec(Some(ref mut vec)) => vec.reserve(additional), + ThinVec(None) => *self = Vec::with_capacity(additional).into(), + } + } +} + +impl, CTX> HashStable for ThinVec { + fn hash_stable(&self, hcx: &mut CTX, hasher: &mut StableHasher) { + (**self).hash_stable(hcx, hasher) + } +} + +impl Default for ThinVec { + fn default() -> Self { + Self(None) + } +} + +#[cfg(test)] +mod tests; diff --git a/compiler/rustc_data_structures/src/thin_vec/tests.rs b/compiler/rustc_data_structures/src/thin_vec/tests.rs new file mode 100644 index 000000000..0221b9912 --- /dev/null +++ b/compiler/rustc_data_structures/src/thin_vec/tests.rs @@ -0,0 +1,42 @@ +use super::*; + +impl ThinVec { + fn into_vec(self) -> Vec { + self.into() + } +} + +#[test] +fn test_from_iterator() { + assert_eq!(std::iter::empty().collect::>().into_vec(), Vec::::new()); + assert_eq!(std::iter::once(42).collect::>().into_vec(), vec![42]); + assert_eq!([1, 2].into_iter().collect::>().into_vec(), vec![1, 2]); + assert_eq!([1, 2, 3].into_iter().collect::>().into_vec(), vec![1, 2, 3]); +} + +#[test] +fn test_into_iterator_owned() { + assert_eq!(ThinVec::new().into_iter().collect::>(), Vec::::new()); + assert_eq!(ThinVec::from(vec![1]).into_iter().collect::>(), vec![1]); + assert_eq!(ThinVec::from(vec![1, 2]).into_iter().collect::>(), vec![1, 2]); + assert_eq!(ThinVec::from(vec![1, 2, 3]).into_iter().collect::>(), vec![1, 2, 3]); +} + +#[test] +fn test_into_iterator_ref() { + assert_eq!(ThinVec::new().iter().collect::>(), Vec::<&String>::new()); + assert_eq!(ThinVec::from(vec![1]).iter().collect::>(), vec![&1]); + assert_eq!(ThinVec::from(vec![1, 2]).iter().collect::>(), vec![&1, &2]); + assert_eq!(ThinVec::from(vec![1, 2, 3]).iter().collect::>(), vec![&1, &2, &3]); +} + +#[test] +fn test_into_iterator_ref_mut() { + assert_eq!(ThinVec::new().iter_mut().collect::>(), Vec::<&mut String>::new()); + assert_eq!(ThinVec::from(vec![1]).iter_mut().collect::>(), vec![&mut 1]); + assert_eq!(ThinVec::from(vec![1, 2]).iter_mut().collect::>(), vec![&mut 1, &mut 2]); + assert_eq!( + ThinVec::from(vec![1, 2, 3]).iter_mut().collect::>(), + vec![&mut 1, &mut 2, &mut 3], + ); +} diff --git a/compiler/rustc_data_structures/src/tiny_list.rs b/compiler/rustc_data_structures/src/tiny_list.rs new file mode 100644 index 000000000..9b07f8684 --- /dev/null +++ b/compiler/rustc_data_structures/src/tiny_list.rs @@ -0,0 +1,81 @@ +//! A singly-linked list. +//! +//! Using this data structure only makes sense under very specific +//! circumstances: +//! +//! - If you have a list that rarely stores more than one element, then this +//! data-structure can store the element without allocating and only uses as +//! much space as an `Option<(T, usize)>`. If T can double as the `Option` +//! discriminant, it will even only be as large as `T, usize`. +//! +//! If you expect to store more than 1 element in the common case, steer clear +//! and use a `Vec`, `Box<[T]>`, or a `SmallVec`. + +#[cfg(test)] +mod tests; + +#[derive(Clone)] +pub struct TinyList { + head: Option>, +} + +impl TinyList { + #[inline] + pub fn new() -> TinyList { + TinyList { head: None } + } + + #[inline] + pub fn new_single(data: T) -> TinyList { + TinyList { head: Some(Element { data, next: None }) } + } + + #[inline] + pub fn insert(&mut self, data: T) { + self.head = Some(Element { data, next: self.head.take().map(Box::new) }); + } + + #[inline] + pub fn remove(&mut self, data: &T) -> bool { + self.head = match self.head { + Some(ref mut head) if head.data == *data => head.next.take().map(|x| *x), + Some(ref mut head) => return head.remove_next(data), + None => return false, + }; + true + } + + #[inline] + pub fn contains(&self, data: &T) -> bool { + let mut elem = self.head.as_ref(); + while let Some(ref e) = elem { + if &e.data == data { + return true; + } + elem = e.next.as_deref(); + } + false + } +} + +#[derive(Clone)] +struct Element { + data: T, + next: Option>>, +} + +impl Element { + fn remove_next(&mut self, data: &T) -> bool { + let mut n = self; + loop { + match n.next { + Some(ref mut next) if next.data == *data => { + n.next = next.next.take(); + return true; + } + Some(ref mut next) => n = next, + None => return false, + } + } + } +} diff --git a/compiler/rustc_data_structures/src/tiny_list/tests.rs b/compiler/rustc_data_structures/src/tiny_list/tests.rs new file mode 100644 index 000000000..c0334d2e2 --- /dev/null +++ b/compiler/rustc_data_structures/src/tiny_list/tests.rs @@ -0,0 +1,155 @@ +use super::*; + +extern crate test; +use test::{black_box, Bencher}; + +impl TinyList { + fn len(&self) -> usize { + let (mut elem, mut count) = (self.head.as_ref(), 0); + while let Some(ref e) = elem { + count += 1; + elem = e.next.as_deref(); + } + count + } +} + +#[test] +fn test_contains_and_insert() { + fn do_insert(i: u32) -> bool { + i % 2 == 0 + } + + let mut list = TinyList::new(); + + for i in 0..10 { + for j in 0..i { + if do_insert(j) { + assert!(list.contains(&j)); + } else { + assert!(!list.contains(&j)); + } + } + + assert!(!list.contains(&i)); + + if do_insert(i) { + list.insert(i); + assert!(list.contains(&i)); + } + } +} + +#[test] +fn test_remove_first() { + let mut list = TinyList::new(); + list.insert(1); + list.insert(2); + list.insert(3); + list.insert(4); + assert_eq!(list.len(), 4); + + assert!(list.remove(&4)); + assert!(!list.contains(&4)); + + assert_eq!(list.len(), 3); + assert!(list.contains(&1)); + assert!(list.contains(&2)); + assert!(list.contains(&3)); +} + +#[test] +fn test_remove_last() { + let mut list = TinyList::new(); + list.insert(1); + list.insert(2); + list.insert(3); + list.insert(4); + assert_eq!(list.len(), 4); + + assert!(list.remove(&1)); + assert!(!list.contains(&1)); + + assert_eq!(list.len(), 3); + assert!(list.contains(&2)); + assert!(list.contains(&3)); + assert!(list.contains(&4)); +} + +#[test] +fn test_remove_middle() { + let mut list = TinyList::new(); + list.insert(1); + list.insert(2); + list.insert(3); + list.insert(4); + assert_eq!(list.len(), 4); + + assert!(list.remove(&2)); + assert!(!list.contains(&2)); + + assert_eq!(list.len(), 3); + assert!(list.contains(&1)); + assert!(list.contains(&3)); + assert!(list.contains(&4)); +} + +#[test] +fn test_remove_single() { + let mut list = TinyList::new(); + list.insert(1); + assert_eq!(list.len(), 1); + + assert!(list.remove(&1)); + assert!(!list.contains(&1)); + + assert_eq!(list.len(), 0); +} + +#[bench] +fn bench_insert_empty(b: &mut Bencher) { + b.iter(|| { + let mut list = black_box(TinyList::new()); + list.insert(1); + list + }) +} + +#[bench] +fn bench_insert_one(b: &mut Bencher) { + b.iter(|| { + let mut list = black_box(TinyList::new_single(0)); + list.insert(1); + list + }) +} + +#[bench] +fn bench_contains_empty(b: &mut Bencher) { + b.iter(|| black_box(TinyList::new()).contains(&1)); +} + +#[bench] +fn bench_contains_unknown(b: &mut Bencher) { + b.iter(|| black_box(TinyList::new_single(0)).contains(&1)); +} + +#[bench] +fn bench_contains_one(b: &mut Bencher) { + b.iter(|| black_box(TinyList::new_single(1)).contains(&1)); +} + +#[bench] +fn bench_remove_empty(b: &mut Bencher) { + b.iter(|| black_box(TinyList::new()).remove(&1)); +} + +#[bench] +fn bench_remove_unknown(b: &mut Bencher) { + b.iter(|| black_box(TinyList::new_single(0)).remove(&1)); +} + +#[bench] +fn bench_remove_one(b: &mut Bencher) { + b.iter(|| black_box(TinyList::new_single(1)).remove(&1)); +} diff --git a/compiler/rustc_data_structures/src/transitive_relation.rs b/compiler/rustc_data_structures/src/transitive_relation.rs new file mode 100644 index 000000000..0ff64969b --- /dev/null +++ b/compiler/rustc_data_structures/src/transitive_relation.rs @@ -0,0 +1,392 @@ +use crate::fx::FxIndexSet; +use crate::sync::Lock; +use rustc_index::bit_set::BitMatrix; +use std::fmt::Debug; +use std::hash::Hash; +use std::mem; + +#[cfg(test)] +mod tests; + +#[derive(Clone, Debug)] +pub struct TransitiveRelation { + // List of elements. This is used to map from a T to a usize. + elements: FxIndexSet, + + // List of base edges in the graph. Require to compute transitive + // closure. + edges: Vec, + + // This is a cached transitive closure derived from the edges. + // Currently, we build it lazily and just throw out any existing + // copy whenever a new edge is added. (The Lock is to permit + // the lazy computation.) This is kind of silly, except for the + // fact its size is tied to `self.elements.len()`, so I wanted to + // wait before building it up to avoid reallocating as new edges + // are added with new elements. Perhaps better would be to ask the + // user for a batch of edges to minimize this effect, but I + // already wrote the code this way. :P -nmatsakis + closure: Lock>>, +} + +// HACK(eddyb) manual impl avoids `Default` bound on `T`. +impl Default for TransitiveRelation { + fn default() -> Self { + TransitiveRelation { + elements: Default::default(), + edges: Default::default(), + closure: Default::default(), + } + } +} + +#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Debug)] +struct Index(usize); + +#[derive(Clone, PartialEq, Eq, Debug)] +struct Edge { + source: Index, + target: Index, +} + +impl TransitiveRelation { + pub fn is_empty(&self) -> bool { + self.edges.is_empty() + } + + pub fn elements(&self) -> impl Iterator { + self.elements.iter() + } + + fn index(&self, a: T) -> Option { + self.elements.get_index_of(&a).map(Index) + } + + fn add_index(&mut self, a: T) -> Index { + let (index, added) = self.elements.insert_full(a); + if added { + // if we changed the dimensions, clear the cache + *self.closure.get_mut() = None; + } + Index(index) + } + + /// Applies the (partial) function to each edge and returns a new + /// relation. If `f` returns `None` for any end-point, returns + /// `None`. + pub fn maybe_map(&self, mut f: F) -> Option> + where + F: FnMut(T) -> Option, + U: Clone + Debug + Eq + Hash + Copy, + { + let mut result = TransitiveRelation::default(); + for edge in &self.edges { + result.add(f(self.elements[edge.source.0])?, f(self.elements[edge.target.0])?); + } + Some(result) + } + + /// Indicate that `a < b` (where `<` is this relation) + pub fn add(&mut self, a: T, b: T) { + let a = self.add_index(a); + let b = self.add_index(b); + let edge = Edge { source: a, target: b }; + if !self.edges.contains(&edge) { + self.edges.push(edge); + + // added an edge, clear the cache + *self.closure.get_mut() = None; + } + } + + /// Checks whether `a < target` (transitively) + pub fn contains(&self, a: T, b: T) -> bool { + match (self.index(a), self.index(b)) { + (Some(a), Some(b)) => self.with_closure(|closure| closure.contains(a.0, b.0)), + (None, _) | (_, None) => false, + } + } + + /// Thinking of `x R y` as an edge `x -> y` in a graph, this + /// returns all things reachable from `a`. + /// + /// Really this probably ought to be `impl Iterator`, but + /// I'm too lazy to make that work, and -- given the caching + /// strategy -- it'd be a touch tricky anyhow. + pub fn reachable_from(&self, a: T) -> Vec { + match self.index(a) { + Some(a) => { + self.with_closure(|closure| closure.iter(a.0).map(|i| self.elements[i]).collect()) + } + None => vec![], + } + } + + /// Picks what I am referring to as the "postdominating" + /// upper-bound for `a` and `b`. This is usually the least upper + /// bound, but in cases where there is no single least upper + /// bound, it is the "mutual immediate postdominator", if you + /// imagine a graph where `a < b` means `a -> b`. + /// + /// This function is needed because region inference currently + /// requires that we produce a single "UB", and there is no best + /// choice for the LUB. Rather than pick arbitrarily, I pick a + /// less good, but predictable choice. This should help ensure + /// that region inference yields predictable results (though it + /// itself is not fully sufficient). + /// + /// Examples are probably clearer than any prose I could write + /// (there are corresponding tests below, btw). In each case, + /// the query is `postdom_upper_bound(a, b)`: + /// + /// ```text + /// // Returns Some(x), which is also LUB. + /// a -> a1 -> x + /// ^ + /// | + /// b -> b1 ---+ + /// + /// // Returns `Some(x)`, which is not LUB (there is none) + /// // diagonal edges run left-to-right. + /// a -> a1 -> x + /// \/ ^ + /// /\ | + /// b -> b1 ---+ + /// + /// // Returns `None`. + /// a -> a1 + /// b -> b1 + /// ``` + pub fn postdom_upper_bound(&self, a: T, b: T) -> Option { + let mubs = self.minimal_upper_bounds(a, b); + self.mutual_immediate_postdominator(mubs) + } + + /// Viewing the relation as a graph, computes the "mutual + /// immediate postdominator" of a set of points (if one + /// exists). See `postdom_upper_bound` for details. + pub fn mutual_immediate_postdominator<'a>(&'a self, mut mubs: Vec) -> Option { + loop { + match mubs.len() { + 0 => return None, + 1 => return Some(mubs[0]), + _ => { + let m = mubs.pop().unwrap(); + let n = mubs.pop().unwrap(); + mubs.extend(self.minimal_upper_bounds(n, m)); + } + } + } + } + + /// Returns the set of bounds `X` such that: + /// + /// - `a < X` and `b < X` + /// - there is no `Y != X` such that `a < Y` and `Y < X` + /// - except for the case where `X < a` (i.e., a strongly connected + /// component in the graph). In that case, the smallest + /// representative of the SCC is returned (as determined by the + /// internal indices). + /// + /// Note that this set can, in principle, have any size. + pub fn minimal_upper_bounds(&self, a: T, b: T) -> Vec { + let (Some(mut a), Some(mut b)) = (self.index(a), self.index(b)) else { + return vec![]; + }; + + // in some cases, there are some arbitrary choices to be made; + // it doesn't really matter what we pick, as long as we pick + // the same thing consistently when queried, so ensure that + // (a, b) are in a consistent relative order + if a > b { + mem::swap(&mut a, &mut b); + } + + let lub_indices = self.with_closure(|closure| { + // Easy case is when either a < b or b < a: + if closure.contains(a.0, b.0) { + return vec![b.0]; + } + if closure.contains(b.0, a.0) { + return vec![a.0]; + } + + // Otherwise, the tricky part is that there may be some c + // where a < c and b < c. In fact, there may be many such + // values. So here is what we do: + // + // 1. Find the vector `[X | a < X && b < X]` of all values + // `X` where `a < X` and `b < X`. In terms of the + // graph, this means all values reachable from both `a` + // and `b`. Note that this vector is also a set, but we + // use the term vector because the order matters + // to the steps below. + // - This vector contains upper bounds, but they are + // not minimal upper bounds. So you may have e.g. + // `[x, y, tcx, z]` where `x < tcx` and `y < tcx` and + // `z < x` and `z < y`: + // + // z --+---> x ----+----> tcx + // | | + // | | + // +---> y ----+ + // + // In this case, we really want to return just `[z]`. + // The following steps below achieve this by gradually + // reducing the list. + // 2. Pare down the vector using `pare_down`. This will + // remove elements from the vector that can be reached + // by an earlier element. + // - In the example above, this would convert `[x, y, + // tcx, z]` to `[x, y, z]`. Note that `x` and `y` are + // still in the vector; this is because while `z < x` + // (and `z < y`) holds, `z` comes after them in the + // vector. + // 3. Reverse the vector and repeat the pare down process. + // - In the example above, we would reverse to + // `[z, y, x]` and then pare down to `[z]`. + // 4. Reverse once more just so that we yield a vector in + // increasing order of index. Not necessary, but why not. + // + // I believe this algorithm yields a minimal set. The + // argument is that, after step 2, we know that no element + // can reach its successors (in the vector, not the graph). + // After step 3, we know that no element can reach any of + // its predecessors (because of step 2) nor successors + // (because we just called `pare_down`) + // + // This same algorithm is used in `parents` below. + + let mut candidates = closure.intersect_rows(a.0, b.0); // (1) + pare_down(&mut candidates, closure); // (2) + candidates.reverse(); // (3a) + pare_down(&mut candidates, closure); // (3b) + candidates + }); + + lub_indices + .into_iter() + .rev() // (4) + .map(|i| self.elements[i]) + .collect() + } + + /// Given an element A, returns the maximal set {B} of elements B + /// such that + /// + /// - A != B + /// - A R B is true + /// - for each i, j: `B[i]` R `B[j]` does not hold + /// + /// The intuition is that this moves "one step up" through a lattice + /// (where the relation is encoding the `<=` relation for the lattice). + /// So e.g., if the relation is `->` and we have + /// + /// ```text + /// a -> b -> d -> f + /// | ^ + /// +--> c -> e ---+ + /// ``` + /// + /// then `parents(a)` returns `[b, c]`. The `postdom_parent` function + /// would further reduce this to just `f`. + pub fn parents(&self, a: T) -> Vec { + let Some(a) = self.index(a) else { + return vec![]; + }; + + // Steal the algorithm for `minimal_upper_bounds` above, but + // with a slight tweak. In the case where `a R a`, we remove + // that from the set of candidates. + let ancestors = self.with_closure(|closure| { + let mut ancestors = closure.intersect_rows(a.0, a.0); + + // Remove anything that can reach `a`. If this is a + // reflexive relation, this will include `a` itself. + ancestors.retain(|&e| !closure.contains(e, a.0)); + + pare_down(&mut ancestors, closure); // (2) + ancestors.reverse(); // (3a) + pare_down(&mut ancestors, closure); // (3b) + ancestors + }); + + ancestors + .into_iter() + .rev() // (4) + .map(|i| self.elements[i]) + .collect() + } + + fn with_closure(&self, op: OP) -> R + where + OP: FnOnce(&BitMatrix) -> R, + { + let mut closure_cell = self.closure.borrow_mut(); + let mut closure = closure_cell.take(); + if closure.is_none() { + closure = Some(self.compute_closure()); + } + let result = op(closure.as_ref().unwrap()); + *closure_cell = closure; + result + } + + fn compute_closure(&self) -> BitMatrix { + let mut matrix = BitMatrix::new(self.elements.len(), self.elements.len()); + let mut changed = true; + while changed { + changed = false; + for edge in &self.edges { + // add an edge from S -> T + changed |= matrix.insert(edge.source.0, edge.target.0); + + // add all outgoing edges from T into S + changed |= matrix.union_rows(edge.target.0, edge.source.0); + } + } + matrix + } + + /// Lists all the base edges in the graph: the initial _non-transitive_ set of element + /// relations, which will be later used as the basis for the transitive closure computation. + pub fn base_edges(&self) -> impl Iterator + '_ { + self.edges + .iter() + .map(move |edge| (self.elements[edge.source.0], self.elements[edge.target.0])) + } +} + +/// Pare down is used as a step in the LUB computation. It edits the +/// candidates array in place by removing any element j for which +/// there exists an earlier element i j. That is, +/// after you run `pare_down`, you know that for all elements that +/// remain in candidates, they cannot reach any of the elements that +/// come after them. +/// +/// Examples follow. Assume that a -> b -> c and x -> y -> z. +/// +/// - Input: `[a, b, x]`. Output: `[a, x]`. +/// - Input: `[b, a, x]`. Output: `[b, a, x]`. +/// - Input: `[a, x, b, y]`. Output: `[a, x]`. +fn pare_down(candidates: &mut Vec, closure: &BitMatrix) { + let mut i = 0; + while let Some(&candidate_i) = candidates.get(i) { + i += 1; + + let mut j = i; + let mut dead = 0; + while let Some(&candidate_j) = candidates.get(j) { + if closure.contains(candidate_i, candidate_j) { + // If `i` can reach `j`, then we can remove `j`. So just + // mark it as dead and move on; subsequent indices will be + // shifted into its place. + dead += 1; + } else { + candidates[j - dead] = candidate_j; + } + j += 1; + } + candidates.truncate(j - dead); + } +} diff --git a/compiler/rustc_data_structures/src/transitive_relation/tests.rs b/compiler/rustc_data_structures/src/transitive_relation/tests.rs new file mode 100644 index 000000000..e1f4c7ee0 --- /dev/null +++ b/compiler/rustc_data_structures/src/transitive_relation/tests.rs @@ -0,0 +1,362 @@ +use super::*; + +impl TransitiveRelation { + /// A "best" parent in some sense. See `parents` and + /// `postdom_upper_bound` for more details. + fn postdom_parent(&self, a: T) -> Option { + self.mutual_immediate_postdominator(self.parents(a)) + } +} + +#[test] +fn test_one_step() { + let mut relation = TransitiveRelation::default(); + relation.add("a", "b"); + relation.add("a", "c"); + assert!(relation.contains("a", "c")); + assert!(relation.contains("a", "b")); + assert!(!relation.contains("b", "a")); + assert!(!relation.contains("a", "d")); +} + +#[test] +fn test_many_steps() { + let mut relation = TransitiveRelation::default(); + relation.add("a", "b"); + relation.add("a", "c"); + relation.add("a", "f"); + + relation.add("b", "c"); + relation.add("b", "d"); + relation.add("b", "e"); + + relation.add("e", "g"); + + assert!(relation.contains("a", "b")); + assert!(relation.contains("a", "c")); + assert!(relation.contains("a", "d")); + assert!(relation.contains("a", "e")); + assert!(relation.contains("a", "f")); + assert!(relation.contains("a", "g")); + + assert!(relation.contains("b", "g")); + + assert!(!relation.contains("a", "x")); + assert!(!relation.contains("b", "f")); +} + +#[test] +fn mubs_triangle() { + // a -> tcx + // ^ + // | + // b + let mut relation = TransitiveRelation::default(); + relation.add("a", "tcx"); + relation.add("b", "tcx"); + assert_eq!(relation.minimal_upper_bounds("a", "b"), vec!["tcx"]); + assert_eq!(relation.parents("a"), vec!["tcx"]); + assert_eq!(relation.parents("b"), vec!["tcx"]); +} + +#[test] +fn mubs_best_choice1() { + // 0 -> 1 <- 3 + // | ^ | + // | | | + // +--> 2 <--+ + // + // mubs(0,3) = [1] + + // This tests a particular state in the algorithm, in which we + // need the second pare down call to get the right result (after + // intersection, we have [1, 2], but 2 -> 1). + + let mut relation = TransitiveRelation::default(); + relation.add("0", "1"); + relation.add("0", "2"); + + relation.add("2", "1"); + + relation.add("3", "1"); + relation.add("3", "2"); + + assert_eq!(relation.minimal_upper_bounds("0", "3"), vec!["2"]); + assert_eq!(relation.parents("0"), vec!["2"]); + assert_eq!(relation.parents("2"), vec!["1"]); + assert!(relation.parents("1").is_empty()); +} + +#[test] +fn mubs_best_choice2() { + // 0 -> 1 <- 3 + // | | | + // | v | + // +--> 2 <--+ + // + // mubs(0,3) = [2] + + // Like the preceding test, but in this case intersection is [2, + // 1], and hence we rely on the first pare down call. + + let mut relation = TransitiveRelation::default(); + relation.add("0", "1"); + relation.add("0", "2"); + + relation.add("1", "2"); + + relation.add("3", "1"); + relation.add("3", "2"); + + assert_eq!(relation.minimal_upper_bounds("0", "3"), vec!["1"]); + assert_eq!(relation.parents("0"), vec!["1"]); + assert_eq!(relation.parents("1"), vec!["2"]); + assert!(relation.parents("2").is_empty()); +} + +#[test] +fn mubs_no_best_choice() { + // in this case, the intersection yields [1, 2], and the "pare + // down" calls find nothing to remove. + let mut relation = TransitiveRelation::default(); + relation.add("0", "1"); + relation.add("0", "2"); + + relation.add("3", "1"); + relation.add("3", "2"); + + assert_eq!(relation.minimal_upper_bounds("0", "3"), vec!["1", "2"]); + assert_eq!(relation.parents("0"), vec!["1", "2"]); + assert_eq!(relation.parents("3"), vec!["1", "2"]); +} + +#[test] +fn mubs_best_choice_scc() { + // in this case, 1 and 2 form a cycle; we pick arbitrarily (but + // consistently). + + let mut relation = TransitiveRelation::default(); + relation.add("0", "1"); + relation.add("0", "2"); + + relation.add("1", "2"); + relation.add("2", "1"); + + relation.add("3", "1"); + relation.add("3", "2"); + + assert_eq!(relation.minimal_upper_bounds("0", "3"), vec!["1"]); + assert_eq!(relation.parents("0"), vec!["1"]); +} + +#[test] +fn pdub_crisscross() { + // diagonal edges run left-to-right + // a -> a1 -> x + // \/ ^ + // /\ | + // b -> b1 ---+ + + let mut relation = TransitiveRelation::default(); + relation.add("a", "a1"); + relation.add("a", "b1"); + relation.add("b", "a1"); + relation.add("b", "b1"); + relation.add("a1", "x"); + relation.add("b1", "x"); + + assert_eq!(relation.minimal_upper_bounds("a", "b"), vec!["a1", "b1"]); + assert_eq!(relation.postdom_upper_bound("a", "b"), Some("x")); + assert_eq!(relation.postdom_parent("a"), Some("x")); + assert_eq!(relation.postdom_parent("b"), Some("x")); +} + +#[test] +fn pdub_crisscross_more() { + // diagonal edges run left-to-right + // a -> a1 -> a2 -> a3 -> x + // \/ \/ ^ + // /\ /\ | + // b -> b1 -> b2 ---------+ + + let mut relation = TransitiveRelation::default(); + relation.add("a", "a1"); + relation.add("a", "b1"); + relation.add("b", "a1"); + relation.add("b", "b1"); + + relation.add("a1", "a2"); + relation.add("a1", "b2"); + relation.add("b1", "a2"); + relation.add("b1", "b2"); + + relation.add("a2", "a3"); + + relation.add("a3", "x"); + relation.add("b2", "x"); + + assert_eq!(relation.minimal_upper_bounds("a", "b"), vec!["a1", "b1"]); + assert_eq!(relation.minimal_upper_bounds("a1", "b1"), vec!["a2", "b2"]); + assert_eq!(relation.postdom_upper_bound("a", "b"), Some("x")); + + assert_eq!(relation.postdom_parent("a"), Some("x")); + assert_eq!(relation.postdom_parent("b"), Some("x")); +} + +#[test] +fn pdub_lub() { + // a -> a1 -> x + // ^ + // | + // b -> b1 ---+ + + let mut relation = TransitiveRelation::default(); + relation.add("a", "a1"); + relation.add("b", "b1"); + relation.add("a1", "x"); + relation.add("b1", "x"); + + assert_eq!(relation.minimal_upper_bounds("a", "b"), vec!["x"]); + assert_eq!(relation.postdom_upper_bound("a", "b"), Some("x")); + + assert_eq!(relation.postdom_parent("a"), Some("a1")); + assert_eq!(relation.postdom_parent("b"), Some("b1")); + assert_eq!(relation.postdom_parent("a1"), Some("x")); + assert_eq!(relation.postdom_parent("b1"), Some("x")); +} + +#[test] +fn mubs_intermediate_node_on_one_side_only() { + // a -> c -> d + // ^ + // | + // b + + // "digraph { a -> c -> d; b -> d; }", + let mut relation = TransitiveRelation::default(); + relation.add("a", "c"); + relation.add("c", "d"); + relation.add("b", "d"); + + assert_eq!(relation.minimal_upper_bounds("a", "b"), vec!["d"]); +} + +#[test] +fn mubs_scc_1() { + // +-------------+ + // | +----+ | + // | v | | + // a -> c -> d <-+ + // ^ + // | + // b + + // "digraph { a -> c -> d; d -> c; a -> d; b -> d; }", + let mut relation = TransitiveRelation::default(); + relation.add("a", "c"); + relation.add("c", "d"); + relation.add("d", "c"); + relation.add("a", "d"); + relation.add("b", "d"); + + assert_eq!(relation.minimal_upper_bounds("a", "b"), vec!["c"]); +} + +#[test] +fn mubs_scc_2() { + // +----+ + // v | + // a -> c -> d + // ^ ^ + // | | + // +--- b + + // "digraph { a -> c -> d; d -> c; b -> d; b -> c; }", + let mut relation = TransitiveRelation::default(); + relation.add("a", "c"); + relation.add("c", "d"); + relation.add("d", "c"); + relation.add("b", "d"); + relation.add("b", "c"); + + assert_eq!(relation.minimal_upper_bounds("a", "b"), vec!["c"]); +} + +#[test] +fn mubs_scc_3() { + // +---------+ + // v | + // a -> c -> d -> e + // ^ ^ + // | | + // b ---+ + + // "digraph { a -> c -> d -> e -> c; b -> d; b -> e; }", + let mut relation = TransitiveRelation::default(); + relation.add("a", "c"); + relation.add("c", "d"); + relation.add("d", "e"); + relation.add("e", "c"); + relation.add("b", "d"); + relation.add("b", "e"); + + assert_eq!(relation.minimal_upper_bounds("a", "b"), vec!["c"]); +} + +#[test] +fn mubs_scc_4() { + // +---------+ + // v | + // a -> c -> d -> e + // | ^ ^ + // +---------+ | + // | + // b ---+ + + // "digraph { a -> c -> d -> e -> c; a -> d; b -> e; }" + let mut relation = TransitiveRelation::default(); + relation.add("a", "c"); + relation.add("c", "d"); + relation.add("d", "e"); + relation.add("e", "c"); + relation.add("a", "d"); + relation.add("b", "e"); + + assert_eq!(relation.minimal_upper_bounds("a", "b"), vec!["c"]); +} + +#[test] +fn parent() { + // An example that was misbehaving in the compiler. + // + // 4 -> 1 -> 3 + // \ | / + // \ v / + // 2 -> 0 + // + // plus a bunch of self-loops + // + // Here `->` represents `<=` and `0` is `'static`. + + let pairs = vec![ + (2, /*->*/ 0), + (2, /*->*/ 2), + (0, /*->*/ 0), + (0, /*->*/ 0), + (1, /*->*/ 0), + (1, /*->*/ 1), + (3, /*->*/ 0), + (3, /*->*/ 3), + (4, /*->*/ 0), + (4, /*->*/ 1), + (1, /*->*/ 3), + ]; + + let mut relation = TransitiveRelation::default(); + for (a, b) in pairs { + relation.add(a, b); + } + + let p = relation.postdom_parent(3); + assert_eq!(p, Some(0)); +} diff --git a/compiler/rustc_data_structures/src/unhash.rs b/compiler/rustc_data_structures/src/unhash.rs new file mode 100644 index 000000000..48e21a9da --- /dev/null +++ b/compiler/rustc_data_structures/src/unhash.rs @@ -0,0 +1,29 @@ +use std::collections::{HashMap, HashSet}; +use std::hash::{BuildHasherDefault, Hasher}; + +pub type UnhashMap = HashMap>; +pub type UnhashSet = HashSet>; + +/// This no-op hasher expects only a single `write_u64` call. It's intended for +/// map keys that already have hash-like quality, like `Fingerprint`. +#[derive(Default)] +pub struct Unhasher { + value: u64, +} + +impl Hasher for Unhasher { + #[inline] + fn finish(&self) -> u64 { + self.value + } + + fn write(&mut self, _bytes: &[u8]) { + unimplemented!("use write_u64"); + } + + #[inline] + fn write_u64(&mut self, value: u64) { + debug_assert_eq!(0, self.value, "Unhasher doesn't mix values!"); + self.value = value; + } +} diff --git a/compiler/rustc_data_structures/src/vec_linked_list.rs b/compiler/rustc_data_structures/src/vec_linked_list.rs new file mode 100644 index 000000000..ce60d40b2 --- /dev/null +++ b/compiler/rustc_data_structures/src/vec_linked_list.rs @@ -0,0 +1,70 @@ +use rustc_index::vec::{Idx, IndexVec}; + +pub fn iter( + first: Option, + links: &Ls, +) -> impl Iterator + '_ +where + Ls: Links, +{ + VecLinkedListIterator { links, current: first } +} + +pub struct VecLinkedListIterator +where + Ls: Links, +{ + links: Ls, + current: Option, +} + +impl Iterator for VecLinkedListIterator +where + Ls: Links, +{ + type Item = Ls::LinkIndex; + + fn next(&mut self) -> Option { + if let Some(c) = self.current { + self.current = ::next(&self.links, c); + Some(c) + } else { + None + } + } +} + +pub trait Links { + type LinkIndex: Copy; + + fn next(links: &Self, index: Self::LinkIndex) -> Option; +} + +impl Links for &Ls +where + Ls: Links, +{ + type LinkIndex = Ls::LinkIndex; + + fn next(links: &Self, index: Ls::LinkIndex) -> Option { + ::next(links, index) + } +} + +pub trait LinkElem { + type LinkIndex: Copy; + + fn next(elem: &Self) -> Option; +} + +impl Links for IndexVec +where + E: LinkElem, + L: Idx, +{ + type LinkIndex = L; + + fn next(links: &Self, index: L) -> Option { + ::next(&links[index]) + } +} diff --git a/compiler/rustc_data_structures/src/vec_map.rs b/compiler/rustc_data_structures/src/vec_map.rs new file mode 100644 index 000000000..86be0bd87 --- /dev/null +++ b/compiler/rustc_data_structures/src/vec_map.rs @@ -0,0 +1,194 @@ +use std::borrow::Borrow; +use std::fmt::Debug; +use std::iter::FromIterator; +use std::slice::Iter; +use std::vec::IntoIter; + +use crate::stable_hasher::{HashStable, StableHasher}; + +/// A map type implemented as a vector of pairs `K` (key) and `V` (value). +/// It currently provides a subset of all the map operations, the rest could be added as needed. +#[derive(Clone, Encodable, Decodable, Debug)] +pub struct VecMap(Vec<(K, V)>); + +impl VecMap +where + K: Debug + PartialEq, + V: Debug, +{ + pub fn new() -> Self { + VecMap(Default::default()) + } + + /// Sets the value of the entry, and returns the entry's old value. + pub fn insert(&mut self, k: K, v: V) -> Option { + if let Some(elem) = self.0.iter_mut().find(|(key, _)| *key == k) { + Some(std::mem::replace(&mut elem.1, v)) + } else { + self.0.push((k, v)); + None + } + } + + /// Removes the entry from the map and returns the removed value + pub fn remove(&mut self, k: &K) -> Option { + self.0.iter().position(|(k2, _)| k2 == k).map(|pos| self.0.remove(pos).1) + } + + /// Gets a reference to the value in the entry. + pub fn get(&self, k: &Q) -> Option<&V> + where + K: Borrow, + Q: Eq, + { + self.0.iter().find(|(key, _)| k == key.borrow()).map(|elem| &elem.1) + } + + /// Gets a mutable reference to the value in the entry. + pub fn get_mut(&mut self, k: &Q) -> Option<&mut V> + where + K: Borrow, + Q: Eq, + { + self.0.iter_mut().find(|(key, _)| k == key.borrow()).map(|elem| &mut elem.1) + } + + /// Returns the any value corresponding to the supplied predicate filter. + /// + /// The supplied predicate will be applied to each (key, value) pair and it will return a + /// reference to the values where the predicate returns `true`. + pub fn any_value_matching(&self, mut predicate: impl FnMut(&(K, V)) -> bool) -> Option<&V> { + self.0.iter().find(|kv| predicate(kv)).map(|elem| &elem.1) + } + + /// Returns the value corresponding to the supplied predicate filter. It crashes if there's + /// more than one matching element. + /// + /// The supplied predicate will be applied to each (key, value) pair and it will return a + /// reference to the value where the predicate returns `true`. + pub fn get_value_matching(&self, mut predicate: impl FnMut(&(K, V)) -> bool) -> Option<&V> { + let mut filter = self.0.iter().filter(|kv| predicate(kv)); + let (_, value) = filter.next()?; + // This should return just one element, otherwise it's a bug + assert!( + filter.next().is_none(), + "Collection {:#?} should have just one matching element", + self + ); + Some(value) + } + + /// Returns `true` if the map contains a value for the specified key. + /// + /// The key may be any borrowed form of the map's key type, + /// [`Eq`] on the borrowed form *must* match those for + /// the key type. + pub fn contains_key(&self, k: &Q) -> bool + where + K: Borrow, + Q: Eq, + { + self.get(k).is_some() + } + + /// Returns `true` if the map contains no elements. + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } + + pub fn iter(&self) -> Iter<'_, (K, V)> { + self.into_iter() + } + + pub fn iter_mut(&mut self) -> impl Iterator { + self.into_iter() + } + + pub fn retain(&mut self, f: impl Fn(&(K, V)) -> bool) { + self.0.retain(f) + } +} + +impl Default for VecMap { + #[inline] + fn default() -> Self { + Self(Default::default()) + } +} + +impl From> for VecMap { + fn from(vec: Vec<(K, V)>) -> Self { + Self(vec) + } +} + +impl Into> for VecMap { + fn into(self) -> Vec<(K, V)> { + self.0 + } +} + +impl FromIterator<(K, V)> for VecMap { + fn from_iter>(iter: I) -> Self { + Self(iter.into_iter().collect()) + } +} + +impl<'a, K, V> IntoIterator for &'a VecMap { + type Item = &'a (K, V); + type IntoIter = Iter<'a, (K, V)>; + + #[inline] + fn into_iter(self) -> Self::IntoIter { + self.0.iter() + } +} + +impl<'a, K: 'a, V: 'a> IntoIterator for &'a mut VecMap { + type Item = (&'a K, &'a mut V); + type IntoIter = impl Iterator; + + #[inline] + fn into_iter(self) -> Self::IntoIter { + self.0.iter_mut().map(|(k, v)| (&*k, v)) + } +} + +impl IntoIterator for VecMap { + type Item = (K, V); + type IntoIter = IntoIter<(K, V)>; + + #[inline] + fn into_iter(self) -> Self::IntoIter { + self.0.into_iter() + } +} + +impl Extend<(K, V)> for VecMap { + fn extend>(&mut self, iter: I) { + for (k, v) in iter { + self.insert(k, v); + } + } + + fn extend_one(&mut self, (k, v): (K, V)) { + self.insert(k, v); + } + + fn extend_reserve(&mut self, additional: usize) { + self.0.extend_reserve(additional); + } +} + +impl HashStable for VecMap +where + K: HashStable + Eq, + V: HashStable, +{ + fn hash_stable(&self, hcx: &mut CTX, hasher: &mut StableHasher) { + self.0.hash_stable(hcx, hasher) + } +} + +#[cfg(test)] +mod tests; diff --git a/compiler/rustc_data_structures/src/vec_map/tests.rs b/compiler/rustc_data_structures/src/vec_map/tests.rs new file mode 100644 index 000000000..458b60077 --- /dev/null +++ b/compiler/rustc_data_structures/src/vec_map/tests.rs @@ -0,0 +1,48 @@ +use super::*; + +impl VecMap { + fn into_vec(self) -> Vec<(K, V)> { + self.0.into() + } +} + +#[test] +fn test_from_iterator() { + assert_eq!( + std::iter::empty().collect::>().into_vec(), + Vec::<(i32, bool)>::new() + ); + assert_eq!(std::iter::once((42, true)).collect::>().into_vec(), vec![(42, true)]); + assert_eq!( + [(1, true), (2, false)].into_iter().collect::>().into_vec(), + vec![(1, true), (2, false)] + ); +} + +#[test] +fn test_into_iterator_owned() { + assert_eq!(VecMap::new().into_iter().collect::>(), Vec::<(i32, bool)>::new()); + assert_eq!(VecMap::from(vec![(1, true)]).into_iter().collect::>(), vec![(1, true)]); + assert_eq!( + VecMap::from(vec![(1, true), (2, false)]).into_iter().collect::>(), + vec![(1, true), (2, false)] + ); +} + +#[test] +fn test_insert() { + let mut v = VecMap::new(); + assert_eq!(v.insert(1, true), None); + assert_eq!(v.insert(2, false), None); + assert_eq!(v.clone().into_vec(), vec![(1, true), (2, false)]); + assert_eq!(v.insert(1, false), Some(true)); + assert_eq!(v.into_vec(), vec![(1, false), (2, false)]); +} + +#[test] +fn test_get() { + let v = [(1, true), (2, false)].into_iter().collect::>(); + assert_eq!(v.get(&1), Some(&true)); + assert_eq!(v.get(&2), Some(&false)); + assert_eq!(v.get(&3), None); +} diff --git a/compiler/rustc_data_structures/src/work_queue.rs b/compiler/rustc_data_structures/src/work_queue.rs new file mode 100644 index 000000000..10317f1af --- /dev/null +++ b/compiler/rustc_data_structures/src/work_queue.rs @@ -0,0 +1,44 @@ +use rustc_index::bit_set::BitSet; +use rustc_index::vec::Idx; +use std::collections::VecDeque; + +/// A work queue is a handy data structure for tracking work left to +/// do. (For example, basic blocks left to process.) It is basically a +/// de-duplicating queue; so attempting to insert X if X is already +/// enqueued has no effect. This implementation assumes that the +/// elements are dense indices, so it can allocate the queue to size +/// and also use a bit set to track occupancy. +pub struct WorkQueue { + deque: VecDeque, + set: BitSet, +} + +impl WorkQueue { + /// Creates a new work queue that starts empty, where elements range from (0..len). + #[inline] + pub fn with_none(len: usize) -> Self { + WorkQueue { deque: VecDeque::with_capacity(len), set: BitSet::new_empty(len) } + } + + /// Attempt to enqueue `element` in the work queue. Returns false if it was already present. + #[inline] + pub fn insert(&mut self, element: T) -> bool { + if self.set.insert(element) { + self.deque.push_back(element); + true + } else { + false + } + } + + /// Attempt to pop an element from the work queue. + #[inline] + pub fn pop(&mut self) -> Option { + if let Some(element) = self.deque.pop_front() { + self.set.remove(element); + Some(element) + } else { + None + } + } +} -- cgit v1.2.3