diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
commit | 26a029d407be480d791972afb5975cf62c9360a6 (patch) | |
tree | f435a8308119effd964b339f76abb83a57c29483 /third_party/rust/ahash/src | |
parent | Initial commit. (diff) | |
download | firefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz firefox-26a029d407be480d791972afb5975cf62c9360a6.zip |
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/ahash/src')
-rw-r--r-- | third_party/rust/ahash/src/aes_hash.rs | 439 | ||||
-rw-r--r-- | third_party/rust/ahash/src/convert.rs | 167 | ||||
-rw-r--r-- | third_party/rust/ahash/src/fallback_hash.rs | 392 | ||||
-rw-r--r-- | third_party/rust/ahash/src/hash_map.rs | 371 | ||||
-rw-r--r-- | third_party/rust/ahash/src/hash_quality_test.rs | 483 | ||||
-rw-r--r-- | third_party/rust/ahash/src/hash_set.rs | 313 | ||||
-rw-r--r-- | third_party/rust/ahash/src/lib.rs | 263 | ||||
-rw-r--r-- | third_party/rust/ahash/src/operations.rs | 330 | ||||
-rw-r--r-- | third_party/rust/ahash/src/random_state.rs | 358 | ||||
-rw-r--r-- | third_party/rust/ahash/src/specialize.rs | 239 |
10 files changed, 3355 insertions, 0 deletions
diff --git a/third_party/rust/ahash/src/aes_hash.rs b/third_party/rust/ahash/src/aes_hash.rs new file mode 100644 index 0000000000..1c98582cee --- /dev/null +++ b/third_party/rust/ahash/src/aes_hash.rs @@ -0,0 +1,439 @@ +use crate::convert::*; +#[cfg(feature = "specialize")] +use crate::fallback_hash::MULTIPLE; +use crate::operations::*; +use crate::RandomState; +use core::hash::Hasher; +use crate::random_state::PI; + +/// A `Hasher` for hashing an arbitrary stream of bytes. +/// +/// Instances of [`AHasher`] represent state that is updated while hashing data. +/// +/// Each method updates the internal state based on the new data provided. Once +/// all of the data has been provided, the resulting hash can be obtained by calling +/// `finish()` +/// +/// [Clone] is also provided in case you wish to calculate hashes for two different items that +/// start with the same data. +/// +#[derive(Debug, Clone)] +pub struct AHasher { + enc: u128, + sum: u128, + key: u128, +} + +impl AHasher { + /// Creates a new hasher keyed to the provided keys. + /// + /// Normally hashers are created via `AHasher::default()` for fixed keys or `RandomState::new()` for randomly + /// generated keys and `RandomState::with_seeds(a,b)` for seeds that are set and can be reused. All of these work at + /// map creation time (and hence don't have any overhead on a per-item bais). + /// + /// This method directly creates the hasher instance and performs no transformation on the provided seeds. This may + /// be useful where a HashBuilder is not desired, such as for testing purposes. + /// + /// # Example + /// + /// ``` + /// use std::hash::Hasher; + /// use ahash::AHasher; + /// + /// let mut hasher = AHasher::new_with_keys(1234, 5678); + /// + /// hasher.write_u32(1989); + /// hasher.write_u8(11); + /// hasher.write_u8(9); + /// hasher.write(b"Huh?"); + /// + /// println!("Hash is {:x}!", hasher.finish()); + /// ``` + #[inline] + pub fn new_with_keys(key1: u128, key2: u128) -> Self { + let pi: [u128; 2] = PI.convert(); + let key1 = key1 ^ pi[0]; + let key2 = key2 ^ pi[1]; + Self { + enc: key1, + sum: key2, + key: key1 ^ key2, + } + } + + #[allow(unused)] // False positive + pub(crate) fn test_with_keys(key1: u128, key2: u128) -> Self { + Self { + enc: key1, + sum: key2, + key: key1 ^ key2, + } + } + + + #[inline] + pub(crate) fn from_random_state(rand_state: &RandomState) -> Self { + let key1 = [rand_state.k0, rand_state.k1].convert(); + let key2 = [rand_state.k2, rand_state.k3].convert(); + Self { + enc: key1, + sum: key2, + key: key1 ^ key2, + } + } + + #[inline(always)] + fn add_in_length(&mut self, length: u64) { + //This will be scrambled by the next AES round. + let mut enc: [u64; 2] = self.enc.convert(); + enc[0] = enc[0].wrapping_add(length); + self.enc = enc.convert(); + } + + #[inline(always)] + fn hash_in(&mut self, new_value: u128) { + self.enc = aesenc(self.enc, new_value); + self.sum = shuffle_and_add(self.sum, new_value); + } + + #[inline(always)] + fn hash_in_2(&mut self, v1: u128, v2: u128) { + self.enc = aesenc(self.enc, v1); + self.sum = shuffle_and_add(self.sum, v1); + self.enc = aesenc(self.enc, v2); + self.sum = shuffle_and_add(self.sum, v2); + } + + #[inline] + #[cfg(feature = "specialize")] + fn short_finish(&self) -> u64 { + let combined = aesdec(self.sum, self.enc); + let result: [u64; 2] = aesenc(combined, combined).convert(); + result[0] + } +} + +/// Provides [Hasher] methods to hash all of the primitive types. +/// +/// [Hasher]: core::hash::Hasher +impl Hasher for AHasher { + #[inline] + fn write_u8(&mut self, i: u8) { + self.write_u64(i as u64); + } + + #[inline] + fn write_u16(&mut self, i: u16) { + self.write_u64(i as u64); + } + + #[inline] + fn write_u32(&mut self, i: u32) { + self.write_u64(i as u64); + } + + #[inline] + fn write_u128(&mut self, i: u128) { + self.hash_in(i); + } + + #[inline] + #[cfg(any(target_pointer_width = "64", target_pointer_width = "32", target_pointer_width = "16"))] + fn write_usize(&mut self, i: usize) { + self.write_u64(i as u64); + } + + #[inline] + #[cfg(target_pointer_width = "128")] + fn write_usize(&mut self, i: usize) { + self.write_u128(i as u128); + } + + #[inline] + fn write_u64(&mut self, i: u64) { + self.write_u128(i as u128); + } + + #[inline] + #[allow(clippy::collapsible_if)] + fn write(&mut self, input: &[u8]) { + let mut data = input; + let length = data.len(); + self.add_in_length(length as u64); + //A 'binary search' on sizes reduces the number of comparisons. + if data.len() <= 8 { + let value = read_small(data); + self.hash_in(value.convert()); + } else { + if data.len() > 32 { + if data.len() > 64 { + let tail = data.read_last_u128x4(); + let mut current: [u128; 4] = [self.key; 4]; + current[0] = aesenc(current[0], tail[0]); + current[1] = aesenc(current[1], tail[1]); + current[2] = aesenc(current[2], tail[2]); + current[3] = aesenc(current[3], tail[3]); + let mut sum: [u128; 2] = [self.key, self.key]; + sum[0] = add_by_64s(sum[0].convert(), tail[0].convert()).convert(); + sum[1] = add_by_64s(sum[1].convert(), tail[1].convert()).convert(); + sum[0] = shuffle_and_add(sum[0], tail[2]); + sum[1] = shuffle_and_add(sum[1], tail[3]); + while data.len() > 64 { + let (blocks, rest) = data.read_u128x4(); + current[0] = aesenc(current[0], blocks[0]); + current[1] = aesenc(current[1], blocks[1]); + current[2] = aesenc(current[2], blocks[2]); + current[3] = aesenc(current[3], blocks[3]); + sum[0] = shuffle_and_add(sum[0], blocks[0]); + sum[1] = shuffle_and_add(sum[1], blocks[1]); + sum[0] = shuffle_and_add(sum[0], blocks[2]); + sum[1] = shuffle_and_add(sum[1], blocks[3]); + data = rest; + } + self.hash_in_2(aesenc(current[0], current[1]), aesenc(current[2], current[3])); + self.hash_in(add_by_64s(sum[0].convert(), sum[1].convert()).convert()); + } else { + //len 33-64 + let (head, _) = data.read_u128x2(); + let tail = data.read_last_u128x2(); + self.hash_in_2(head[0], head[1]); + self.hash_in_2(tail[0], tail[1]); + } + } else { + if data.len() > 16 { + //len 17-32 + self.hash_in_2(data.read_u128().0, data.read_last_u128()); + } else { + //len 9-16 + let value: [u64; 2] = [data.read_u64().0, data.read_last_u64()]; + self.hash_in(value.convert()); + } + } + } + } + #[inline] + fn finish(&self) -> u64 { + let combined = aesdec(self.sum, self.enc); + let result: [u64; 2] = aesenc(aesenc(combined, self.key), combined).convert(); + result[0] + } +} + +#[cfg(feature = "specialize")] +pub(crate) struct AHasherU64 { + pub(crate) buffer: u64, + pub(crate) pad: u64, +} + +/// A specialized hasher for only primitives under 64 bits. +#[cfg(feature = "specialize")] +impl Hasher for AHasherU64 { + #[inline] + fn finish(&self) -> u64 { + let rot = (self.pad & 63) as u32; + self.buffer.rotate_left(rot) + } + + #[inline] + fn write(&mut self, _bytes: &[u8]) { + unreachable!("Specialized hasher was called with a different type of object") + } + + #[inline] + fn write_u8(&mut self, i: u8) { + self.write_u64(i as u64); + } + + #[inline] + fn write_u16(&mut self, i: u16) { + self.write_u64(i as u64); + } + + #[inline] + fn write_u32(&mut self, i: u32) { + self.write_u64(i as u64); + } + + #[inline] + fn write_u64(&mut self, i: u64) { + self.buffer = folded_multiply(i ^ self.buffer, MULTIPLE); + } + + #[inline] + fn write_u128(&mut self, _i: u128) { + unreachable!("Specialized hasher was called with a different type of object") + } + + #[inline] + fn write_usize(&mut self, _i: usize) { + unreachable!("Specialized hasher was called with a different type of object") + } +} + +#[cfg(feature = "specialize")] +pub(crate) struct AHasherFixed(pub AHasher); + +/// A specialized hasher for fixed size primitives larger than 64 bits. +#[cfg(feature = "specialize")] +impl Hasher for AHasherFixed { + #[inline] + fn finish(&self) -> u64 { + self.0.short_finish() + } + + #[inline] + fn write(&mut self, bytes: &[u8]) { + self.0.write(bytes) + } + + #[inline] + fn write_u8(&mut self, i: u8) { + self.write_u64(i as u64); + } + + #[inline] + fn write_u16(&mut self, i: u16) { + self.write_u64(i as u64); + } + + #[inline] + fn write_u32(&mut self, i: u32) { + self.write_u64(i as u64); + } + + #[inline] + fn write_u64(&mut self, i: u64) { + self.0.write_u64(i); + } + + #[inline] + fn write_u128(&mut self, i: u128) { + self.0.write_u128(i); + } + + #[inline] + fn write_usize(&mut self, i: usize) { + self.0.write_usize(i); + } +} + +#[cfg(feature = "specialize")] +pub(crate) struct AHasherStr(pub AHasher); + +/// A specialized hasher for strings +/// Note that the other types don't panic because the hash impl for String tacks on an unneeded call. (As does vec) +#[cfg(feature = "specialize")] +impl Hasher for AHasherStr { + #[inline] + fn finish(&self) -> u64 { + let result : [u64; 2] = self.0.enc.convert(); + result[0] + } + + #[inline] + fn write(&mut self, bytes: &[u8]) { + if bytes.len() > 8 { + self.0.write(bytes); + self.0.enc = aesdec(self.0.sum, self.0.enc); + self.0.enc = aesenc(aesenc(self.0.enc, self.0.key), self.0.enc); + } else { + self.0.add_in_length(bytes.len() as u64); + let value = read_small(bytes).convert(); + self.0.sum = shuffle_and_add(self.0.sum, value); + self.0.enc = aesdec(self.0.sum, self.0.enc); + self.0.enc = aesenc(aesenc(self.0.enc, self.0.key), self.0.enc); + } + } + + #[inline] + fn write_u8(&mut self, _i: u8) {} + + #[inline] + fn write_u16(&mut self, _i: u16) {} + + #[inline] + fn write_u32(&mut self, _i: u32) {} + + #[inline] + fn write_u64(&mut self, _i: u64) {} + + #[inline] + fn write_u128(&mut self, _i: u128) {} + + #[inline] + fn write_usize(&mut self, _i: usize) {} +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::convert::Convert; + use crate::operations::aesenc; + use crate::RandomState; + use std::hash::{BuildHasher, Hasher}; + #[test] + fn test_sanity() { + let mut hasher = RandomState::with_seeds(1, 2, 3, 4).build_hasher(); + hasher.write_u64(0); + let h1 = hasher.finish(); + hasher.write(&[1, 0, 0, 0, 0, 0, 0, 0]); + let h2 = hasher.finish(); + assert_ne!(h1, h2); + } + + #[cfg(feature = "compile-time-rng")] + #[test] + fn test_builder() { + use std::collections::HashMap; + use std::hash::BuildHasherDefault; + + let mut map = HashMap::<u32, u64, BuildHasherDefault<AHasher>>::default(); + map.insert(1, 3); + } + + #[cfg(feature = "compile-time-rng")] + #[test] + fn test_default() { + let hasher_a = AHasher::default(); + let a_enc: [u64; 2] = hasher_a.enc.convert(); + let a_sum: [u64; 2] = hasher_a.sum.convert(); + assert_ne!(0, a_enc[0]); + assert_ne!(0, a_enc[1]); + assert_ne!(0, a_sum[0]); + assert_ne!(0, a_sum[1]); + assert_ne!(a_enc[0], a_enc[1]); + assert_ne!(a_sum[0], a_sum[1]); + assert_ne!(a_enc[0], a_sum[0]); + assert_ne!(a_enc[1], a_sum[1]); + let hasher_b = AHasher::default(); + let b_enc: [u64; 2] = hasher_b.enc.convert(); + let b_sum: [u64; 2] = hasher_b.sum.convert(); + assert_eq!(a_enc[0], b_enc[0]); + assert_eq!(a_enc[1], b_enc[1]); + assert_eq!(a_sum[0], b_sum[0]); + assert_eq!(a_sum[1], b_sum[1]); + } + + #[test] + fn test_hash() { + let mut result: [u64; 2] = [0x6c62272e07bb0142, 0x62b821756295c58d]; + let value: [u64; 2] = [1 << 32, 0xFEDCBA9876543210]; + result = aesenc(value.convert(), result.convert()).convert(); + result = aesenc(result.convert(), result.convert()).convert(); + let mut result2: [u64; 2] = [0x6c62272e07bb0142, 0x62b821756295c58d]; + let value2: [u64; 2] = [1, 0xFEDCBA9876543210]; + result2 = aesenc(value2.convert(), result2.convert()).convert(); + result2 = aesenc(result2.convert(), result.convert()).convert(); + let result: [u8; 16] = result.convert(); + let result2: [u8; 16] = result2.convert(); + assert_ne!(hex::encode(result), hex::encode(result2)); + } + + #[test] + fn test_conversion() { + let input: &[u8] = "dddddddd".as_bytes(); + let bytes: u64 = as_array!(input, 8).convert(); + assert_eq!(bytes, 0x6464646464646464); + } +} + diff --git a/third_party/rust/ahash/src/convert.rs b/third_party/rust/ahash/src/convert.rs new file mode 100644 index 0000000000..4c0a00eb7c --- /dev/null +++ b/third_party/rust/ahash/src/convert.rs @@ -0,0 +1,167 @@ +pub(crate) trait Convert<To> { + fn convert(self) -> To; +} + +macro_rules! convert { + ($a:ty, $b:ty) => { + impl Convert<$b> for $a { + #[inline(always)] + fn convert(self) -> $b { + unsafe { + core::mem::transmute::<$a, $b>(self) + } + } + } + impl Convert<$a> for $b { + #[inline(always)] + fn convert(self) -> $a { + unsafe { + core::mem::transmute::<$b, $a>(self) + } + } + } + }; +} + +convert!([u128; 4], [u64; 8]); +convert!([u128; 4], [u32; 16]); +convert!([u128; 4], [u16; 32]); +convert!([u128; 4], [u8; 64]); +convert!([u128; 2], [u64; 4]); +convert!([u128; 2], [u32; 8]); +convert!([u128; 2], [u16; 16]); +convert!([u128; 2], [u8; 32]); +convert!(u128, [u64; 2]); +convert!(u128, [u32; 4]); +convert!(u128, [u16; 8]); +convert!(u128, [u8; 16]); +convert!([u64; 8], [u32; 16]); +convert!([u64; 8], [u16; 32]); +convert!([u64; 8], [u8; 64]); +convert!([u64; 4], [u32; 8]); +convert!([u64; 4], [u16; 16]); +convert!([u64; 4], [u8; 32]); +convert!([u64; 2], [u32; 4]); +convert!([u64; 2], [u16; 8]); +convert!([u64; 2], [u8; 16]); +convert!([u32; 4], [u16; 8]); +convert!([u32; 4], [u8; 16]); +convert!([u16; 8], [u8; 16]); +convert!(u64, [u32; 2]); +convert!(u64, [u16; 4]); +convert!(u64, [u8; 8]); +convert!([u32; 2], [u16; 4]); +convert!([u32; 2], [u8; 8]); +convert!(u32, [u16; 2]); +convert!(u32, [u8; 4]); +convert!([u16; 2], [u8; 4]); +convert!(u16, [u8; 2]); +convert!([[u64; 4]; 2], [u8; 64]); + +convert!([f64; 2], [u8; 16]); +convert!([f32; 4], [u8; 16]); +convert!(f64, [u8; 8]); +convert!([f32; 2], [u8; 8]); +convert!(f32, [u8; 4]); + +macro_rules! as_array { + ($input:expr, $len:expr) => {{ + { + #[inline(always)] + fn as_array<T>(slice: &[T]) -> &[T; $len] { + assert_eq!(slice.len(), $len); + unsafe { &*(slice.as_ptr() as *const [_; $len]) } + } + as_array($input) + } + }}; +} + +pub(crate) trait ReadFromSlice { + fn read_u16(&self) -> (u16, &[u8]); + fn read_u32(&self) -> (u32, &[u8]); + fn read_u64(&self) -> (u64, &[u8]); + fn read_u128(&self) -> (u128, &[u8]); + fn read_u128x2(&self) -> ([u128; 2], &[u8]); + fn read_u128x4(&self) -> ([u128; 4], &[u8]); + fn read_last_u16(&self) -> u16; + fn read_last_u32(&self) -> u32; + fn read_last_u64(&self) -> u64; + fn read_last_u128(&self) -> u128; + fn read_last_u128x2(&self) -> [u128; 2]; + fn read_last_u128x4(&self) -> [u128; 4]; +} + +impl ReadFromSlice for [u8] { + #[inline(always)] + fn read_u16(&self) -> (u16, &[u8]) { + let (value, rest) = self.split_at(2); + (as_array!(value, 2).convert(), rest) + } + + #[inline(always)] + fn read_u32(&self) -> (u32, &[u8]) { + let (value, rest) = self.split_at(4); + (as_array!(value, 4).convert(), rest) + } + + #[inline(always)] + fn read_u64(&self) -> (u64, &[u8]) { + let (value, rest) = self.split_at(8); + (as_array!(value, 8).convert(), rest) + } + + #[inline(always)] + fn read_u128(&self) -> (u128, &[u8]) { + let (value, rest) = self.split_at(16); + (as_array!(value, 16).convert(), rest) + } + + #[inline(always)] + fn read_u128x2(&self) -> ([u128; 2], &[u8]) { + let (value, rest) = self.split_at(32); + (as_array!(value, 32).convert(), rest) + } + + #[inline(always)] + fn read_u128x4(&self) -> ([u128; 4], &[u8]) { + let (value, rest) = self.split_at(64); + (as_array!(value, 64).convert(), rest) + } + + #[inline(always)] + fn read_last_u16(&self) -> u16 { + let (_, value) = self.split_at(self.len() - 2); + as_array!(value, 2).convert() + } + + #[inline(always)] + fn read_last_u32(&self) -> u32 { + let (_, value) = self.split_at(self.len() - 4); + as_array!(value, 4).convert() + } + + #[inline(always)] + fn read_last_u64(&self) -> u64 { + let (_, value) = self.split_at(self.len() - 8); + as_array!(value, 8).convert() + } + + #[inline(always)] + fn read_last_u128(&self) -> u128 { + let (_, value) = self.split_at(self.len() - 16); + as_array!(value, 16).convert() + } + + #[inline(always)] + fn read_last_u128x2(&self) -> [u128; 2] { + let (_, value) = self.split_at(self.len() - 32); + as_array!(value, 32).convert() + } + + #[inline(always)] + fn read_last_u128x4(&self) -> [u128; 4] { + let (_, value) = self.split_at(self.len() - 64); + as_array!(value, 64).convert() + } +} diff --git a/third_party/rust/ahash/src/fallback_hash.rs b/third_party/rust/ahash/src/fallback_hash.rs new file mode 100644 index 0000000000..aad9efc859 --- /dev/null +++ b/third_party/rust/ahash/src/fallback_hash.rs @@ -0,0 +1,392 @@ +use crate::convert::*; +use crate::operations::folded_multiply; +use crate::operations::read_small; +use crate::random_state::PI; +use crate::RandomState; +use core::hash::Hasher; + +///This constant come from Kunth's prng (Empirically it works better than those from splitmix32). +pub(crate) const MULTIPLE: u64 = 6364136223846793005; +const ROT: u32 = 23; //17 + +/// A `Hasher` for hashing an arbitrary stream of bytes. +/// +/// Instances of [`AHasher`] represent state that is updated while hashing data. +/// +/// Each method updates the internal state based on the new data provided. Once +/// all of the data has been provided, the resulting hash can be obtained by calling +/// `finish()` +/// +/// [Clone] is also provided in case you wish to calculate hashes for two different items that +/// start with the same data. +/// +#[derive(Debug, Clone)] +pub struct AHasher { + buffer: u64, + pad: u64, + extra_keys: [u64; 2], +} + +impl AHasher { + /// Creates a new hasher keyed to the provided key. + #[inline] + #[allow(dead_code)] // Is not called if non-fallback hash is used. + pub fn new_with_keys(key1: u128, key2: u128) -> AHasher { + let pi: [u128; 2] = PI.convert(); + let key1: [u64; 2] = (key1 ^ pi[0]).convert(); + let key2: [u64; 2] = (key2 ^ pi[1]).convert(); + AHasher { + buffer: key1[0], + pad: key1[1], + extra_keys: key2, + } + } + + #[allow(unused)] // False positive + pub(crate) fn test_with_keys(key1: u128, key2: u128) -> Self { + let key1: [u64; 2] = key1.convert(); + let key2: [u64; 2] = key2.convert(); + Self { + buffer: key1[0], + pad: key1[1], + extra_keys: key2, + } + } + + #[inline] + #[allow(dead_code)] // Is not called if non-fallback hash is used. + pub(crate) fn from_random_state(rand_state: &RandomState) -> AHasher { + AHasher { + buffer: rand_state.k0, + pad: rand_state.k1, + extra_keys: [rand_state.k2, rand_state.k3], + } + } + + /// This update function has the goal of updating the buffer with a single multiply + /// FxHash does this but is vulnerable to attack. To avoid this input needs to be masked to with an + /// unpredictable value. Other hashes such as murmurhash have taken this approach but were found vulnerable + /// to attack. The attack was based on the idea of reversing the pre-mixing (Which is necessarily + /// reversible otherwise bits would be lost) then placing a difference in the highest bit before the + /// multiply used to mix the data. Because a multiply can never affect the bits to the right of it, a + /// subsequent update that also differed in this bit could result in a predictable collision. + /// + /// This version avoids this vulnerability while still only using a single multiply. It takes advantage + /// of the fact that when a 64 bit multiply is performed the upper 64 bits are usually computed and thrown + /// away. Instead it creates two 128 bit values where the upper 64 bits are zeros and multiplies them. + /// (The compiler is smart enough to turn this into a 64 bit multiplication in the assembly) + /// Then the upper bits are xored with the lower bits to produce a single 64 bit result. + /// + /// To understand why this is a good scrambling function it helps to understand multiply-with-carry PRNGs: + /// https://en.wikipedia.org/wiki/Multiply-with-carry_pseudorandom_number_generator + /// If the multiple is chosen well, this creates a long period, decent quality PRNG. + /// Notice that this function is equivalent to this except the `buffer`/`state` is being xored with each + /// new block of data. In the event that data is all zeros, it is exactly equivalent to a MWC PRNG. + /// + /// This is impervious to attack because every bit buffer at the end is dependent on every bit in + /// `new_data ^ buffer`. For example suppose two inputs differed in only the 5th bit. Then when the + /// multiplication is performed the `result` will differ in bits 5-69. More specifically it will differ by + /// 2^5 * MULTIPLE. However in the next step bits 65-128 are turned into a separate 64 bit value. So the + /// differing bits will be in the lower 6 bits of this value. The two intermediate values that differ in + /// bits 5-63 and in bits 0-5 respectively get added together. Producing an output that differs in every + /// bit. The addition carries in the multiplication and at the end additionally mean that the even if an + /// attacker somehow knew part of (but not all) the contents of the buffer before hand, + /// they would not be able to predict any of the bits in the buffer at the end. + #[inline(always)] + #[cfg(feature = "folded_multiply")] + fn update(&mut self, new_data: u64) { + self.buffer = folded_multiply(new_data ^ self.buffer, MULTIPLE); + } + + #[inline(always)] + #[cfg(not(feature = "folded_multiply"))] + fn update(&mut self, new_data: u64) { + let d1 = (new_data ^ self.buffer).wrapping_mul(MULTIPLE); + self.pad = (self.pad ^ d1).rotate_left(8).wrapping_mul(MULTIPLE); + self.buffer = (self.buffer ^ self.pad).rotate_left(24); + } + + /// Similar to the above this function performs an update using a "folded multiply". + /// However it takes in 128 bits of data instead of 64. Both halves must be masked. + /// + /// This makes it impossible for an attacker to place a single bit difference between + /// two blocks so as to cancel each other. + /// + /// However this is not sufficient. to prevent (a,b) from hashing the same as (b,a) the buffer itself must + /// be updated between calls in a way that does not commute. To achieve this XOR and Rotate are used. + /// Add followed by xor is not the same as xor followed by add, and rotate ensures that the same out bits + /// can't be changed by the same set of input bits. To cancel this sequence with subsequent input would require + /// knowing the keys. + #[inline(always)] + #[cfg(feature = "folded_multiply")] + fn large_update(&mut self, new_data: u128) { + let block: [u64; 2] = new_data.convert(); + let combined = folded_multiply(block[0] ^ self.extra_keys[0], block[1] ^ self.extra_keys[1]); + self.buffer = (self.buffer.wrapping_add(self.pad) ^ combined).rotate_left(ROT); + } + + #[inline(always)] + #[cfg(not(feature = "folded_multiply"))] + fn large_update(&mut self, new_data: u128) { + let block: [u64; 2] = new_data.convert(); + self.update(block[0] ^ self.extra_keys[0]); + self.update(block[1] ^ self.extra_keys[1]); + } + + #[inline] + #[cfg(feature = "specialize")] + fn short_finish(&self) -> u64 { + self.buffer.wrapping_add(self.pad) + } +} + +/// Provides [Hasher] methods to hash all of the primitive types. +/// +/// [Hasher]: core::hash::Hasher +impl Hasher for AHasher { + #[inline] + fn write_u8(&mut self, i: u8) { + self.update(i as u64); + } + + #[inline] + fn write_u16(&mut self, i: u16) { + self.update(i as u64); + } + + #[inline] + fn write_u32(&mut self, i: u32) { + self.update(i as u64); + } + + #[inline] + fn write_u64(&mut self, i: u64) { + self.update(i as u64); + } + + #[inline] + fn write_u128(&mut self, i: u128) { + self.large_update(i); + } + + #[inline] + #[cfg(any(target_pointer_width = "64", target_pointer_width = "32", target_pointer_width = "16"))] + fn write_usize(&mut self, i: usize) { + self.write_u64(i as u64); + } + + #[inline] + #[cfg(target_pointer_width = "128")] + fn write_usize(&mut self, i: usize) { + self.write_u128(i as u128); + } + + #[inline] + #[allow(clippy::collapsible_if)] + fn write(&mut self, input: &[u8]) { + let mut data = input; + let length = data.len() as u64; + //Needs to be an add rather than an xor because otherwise it could be canceled with carefully formed input. + self.buffer = self.buffer.wrapping_add(length).wrapping_mul(MULTIPLE); + //A 'binary search' on sizes reduces the number of comparisons. + if data.len() > 8 { + if data.len() > 16 { + let tail = data.read_last_u128(); + self.large_update(tail); + while data.len() > 16 { + let (block, rest) = data.read_u128(); + self.large_update(block); + data = rest; + } + } else { + self.large_update([data.read_u64().0, data.read_last_u64()].convert()); + } + } else { + let value = read_small(data); + self.large_update(value.convert()); + } + } + + #[inline] + #[cfg(feature = "folded_multiply")] + fn finish(&self) -> u64 { + let rot = (self.buffer & 63) as u32; + folded_multiply(self.buffer, self.pad).rotate_left(rot) + } + + #[inline] + #[cfg(not(feature = "folded_multiply"))] + fn finish(&self) -> u64 { + let rot = (self.buffer & 63) as u32; + (self.buffer.wrapping_mul(MULTIPLE) ^ self.pad).rotate_left(rot) + } +} + +#[cfg(feature = "specialize")] +pub(crate) struct AHasherU64 { + pub(crate) buffer: u64, + pub(crate) pad: u64, +} + +/// A specialized hasher for only primitives under 64 bits. +#[cfg(feature = "specialize")] +impl Hasher for AHasherU64 { + #[inline] + fn finish(&self) -> u64 { + let rot = (self.pad & 63) as u32; + self.buffer.rotate_left(rot) + } + + #[inline] + fn write(&mut self, _bytes: &[u8]) { + unreachable!("Specialized hasher was called with a different type of object") + } + + #[inline] + fn write_u8(&mut self, i: u8) { + self.write_u64(i as u64); + } + + #[inline] + fn write_u16(&mut self, i: u16) { + self.write_u64(i as u64); + } + + #[inline] + fn write_u32(&mut self, i: u32) { + self.write_u64(i as u64); + } + + #[inline] + fn write_u64(&mut self, i: u64) { + self.buffer = folded_multiply(i ^ self.buffer, MULTIPLE); + } + + #[inline] + fn write_u128(&mut self, _i: u128) { + unreachable!("Specialized hasher was called with a different type of object") + } + + #[inline] + fn write_usize(&mut self, _i: usize) { + unreachable!("Specialized hasher was called with a different type of object") + } +} + +#[cfg(feature = "specialize")] +pub(crate) struct AHasherFixed(pub AHasher); + +/// A specialized hasher for fixed size primitives larger than 64 bits. +#[cfg(feature = "specialize")] +impl Hasher for AHasherFixed { + #[inline] + fn finish(&self) -> u64 { + self.0.short_finish() + } + + #[inline] + fn write(&mut self, bytes: &[u8]) { + self.0.write(bytes) + } + + #[inline] + fn write_u8(&mut self, i: u8) { + self.write_u64(i as u64); + } + + #[inline] + fn write_u16(&mut self, i: u16) { + self.write_u64(i as u64); + } + + #[inline] + fn write_u32(&mut self, i: u32) { + self.write_u64(i as u64); + } + + #[inline] + fn write_u64(&mut self, i: u64) { + self.0.write_u64(i); + } + + #[inline] + fn write_u128(&mut self, i: u128) { + self.0.write_u128(i); + } + + #[inline] + fn write_usize(&mut self, i: usize) { + self.0.write_usize(i); + } +} + +#[cfg(feature = "specialize")] +pub(crate) struct AHasherStr(pub AHasher); + +/// A specialized hasher for a single string +/// Note that the other types don't panic because the hash impl for String tacks on an unneeded call. (As does vec) +#[cfg(feature = "specialize")] +impl Hasher for AHasherStr { + #[inline] + fn finish(&self) -> u64 { + self.0.finish() + } + + #[inline] + fn write(&mut self, bytes: &[u8]) { + if bytes.len() > 8 { + self.0.write(bytes) + } else { + let value = read_small(bytes); + self.0.buffer = folded_multiply(value[0] ^ self.0.buffer, + value[1] ^ self.0.extra_keys[1]); + self.0.pad = self.0.pad.wrapping_add(bytes.len() as u64); + } + } + + #[inline] + fn write_u8(&mut self, _i: u8) {} + + #[inline] + fn write_u16(&mut self, _i: u16) {} + + #[inline] + fn write_u32(&mut self, _i: u32) {} + + #[inline] + fn write_u64(&mut self, _i: u64) {} + + #[inline] + fn write_u128(&mut self, _i: u128) {} + + #[inline] + fn write_usize(&mut self, _i: usize) {} +} + +#[cfg(test)] +mod tests { + use crate::convert::Convert; + use crate::fallback_hash::*; + + #[test] + fn test_hash() { + let mut hasher = AHasher::new_with_keys(0, 0); + let value: u64 = 1 << 32; + hasher.update(value); + let result = hasher.buffer; + let mut hasher = AHasher::new_with_keys(0, 0); + let value2: u64 = 1; + hasher.update(value2); + let result2 = hasher.buffer; + let result: [u8; 8] = result.convert(); + let result2: [u8; 8] = result2.convert(); + assert_ne!(hex::encode(result), hex::encode(result2)); + } + + #[test] + fn test_conversion() { + let input: &[u8] = "dddddddd".as_bytes(); + let bytes: u64 = as_array!(input, 8).convert(); + assert_eq!(bytes, 0x6464646464646464); + } +} diff --git a/third_party/rust/ahash/src/hash_map.rs b/third_party/rust/ahash/src/hash_map.rs new file mode 100644 index 0000000000..ec8fa433bb --- /dev/null +++ b/third_party/rust/ahash/src/hash_map.rs @@ -0,0 +1,371 @@ +use std::borrow::Borrow; +use std::collections::{hash_map, HashMap}; +use std::fmt::{self, Debug}; +use std::hash::{BuildHasher, Hash}; +use std::iter::FromIterator; +use std::ops::{Deref, DerefMut, Index}; +use std::panic::UnwindSafe; + +#[cfg(feature = "serde")] +use serde::{ + de::{Deserialize, Deserializer}, + ser::{Serialize, Serializer}, +}; + +use crate::RandomState; + +/// A [`HashMap`](std::collections::HashMap) using [`RandomState`](crate::RandomState) to hash the items. +/// (Requires the `std` feature to be enabled.) +#[derive(Clone)] +pub struct AHashMap<K, V, S = crate::RandomState>(HashMap<K, V, S>); + +impl<K, V> From<HashMap<K, V, crate::RandomState>> for AHashMap<K, V> { + fn from(item: HashMap<K, V, crate::RandomState>) -> Self { + AHashMap(item) + } +} + +impl<K, V> Into<HashMap<K, V, crate::RandomState>> for AHashMap<K, V> { + fn into(self) -> HashMap<K, V, crate::RandomState> { + self.0 + } +} + +impl<K, V> AHashMap<K, V, RandomState> { + pub fn new() -> Self { + AHashMap(HashMap::with_hasher(RandomState::default())) + } + + pub fn with_capacity(capacity: usize) -> Self { + AHashMap(HashMap::with_capacity_and_hasher(capacity, RandomState::default())) + } +} + +impl<K, V, S> AHashMap<K, V, S> +where + S: BuildHasher, +{ + pub fn with_hasher(hash_builder: S) -> Self { + AHashMap(HashMap::with_hasher(hash_builder)) + } + + pub fn with_capacity_and_hasher(capacity: usize, hash_builder: S) -> Self { + AHashMap(HashMap::with_capacity_and_hasher(capacity, hash_builder)) + } +} + +impl<K, V, S> AHashMap<K, V, S> +where + K: Hash + Eq, + S: BuildHasher, +{ + /// Returns a reference to the value corresponding to the key. + /// + /// The key may be any borrowed form of the map's key type, but + /// [`Hash`] and [`Eq`] on the borrowed form *must* match those for + /// the key type. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut map = HashMap::new(); + /// map.insert(1, "a"); + /// assert_eq!(map.get(&1), Some(&"a")); + /// assert_eq!(map.get(&2), None); + /// ``` + #[inline] + pub fn get<Q: ?Sized>(&self, k: &Q) -> Option<&V> + where + K: Borrow<Q>, + Q: Hash + Eq, + { + self.0.get(k) + } + + /// Returns the key-value pair corresponding to the supplied key. + /// + /// The supplied key may be any borrowed form of the map's key type, but + /// [`Hash`] and [`Eq`] on the borrowed form *must* match those for + /// the key type. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut map = HashMap::new(); + /// map.insert(1, "a"); + /// assert_eq!(map.get_key_value(&1), Some((&1, &"a"))); + /// assert_eq!(map.get_key_value(&2), None); + /// ``` + #[inline] + pub fn get_key_value<Q: ?Sized>(&self, k: &Q) -> Option<(&K, &V)> + where + K: Borrow<Q>, + Q: Hash + Eq, + { + self.0.get_key_value(k) + } + + /// Returns a mutable reference to the value corresponding to the key. + /// + /// The key may be any borrowed form of the map's key type, but + /// [`Hash`] and [`Eq`] on the borrowed form *must* match those for + /// the key type. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut map = HashMap::new(); + /// map.insert(1, "a"); + /// if let Some(x) = map.get_mut(&1) { + /// *x = "b"; + /// } + /// assert_eq!(map[&1], "b"); + /// ``` + #[inline] + pub fn get_mut<Q: ?Sized>(&mut self, k: &Q) -> Option<&mut V> + where + K: Borrow<Q>, + Q: Hash + Eq, + { + self.0.get_mut(k) + } + + /// Inserts a key-value pair into the map. + /// + /// If the map did not have this key present, [`None`] is returned. + /// + /// If the map did have this key present, the value is updated, and the old + /// value is returned. The key is not updated, though; this matters for + /// types that can be `==` without being identical. See the [module-level + /// documentation] for more. + /// + /// [module-level documentation]: crate::collections#insert-and-complex-keys + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut map = HashMap::new(); + /// assert_eq!(map.insert(37, "a"), None); + /// assert_eq!(map.is_empty(), false); + /// + /// map.insert(37, "b"); + /// assert_eq!(map.insert(37, "c"), Some("b")); + /// assert_eq!(map[&37], "c"); + /// ``` + #[inline] + pub fn insert(&mut self, k: K, v: V) -> Option<V> { + self.0.insert(k, v) + } + + /// Removes a key from the map, returning the value at the key if the key + /// was previously in the map. + /// + /// The key may be any borrowed form of the map's key type, but + /// [`Hash`] and [`Eq`] on the borrowed form *must* match those for + /// the key type. + /// + /// # Examples + /// + /// ``` + /// use std::collections::HashMap; + /// + /// let mut map = HashMap::new(); + /// map.insert(1, "a"); + /// assert_eq!(map.remove(&1), Some("a")); + /// assert_eq!(map.remove(&1), None); + /// ``` + #[inline] + pub fn remove<Q: ?Sized>(&mut self, k: &Q) -> Option<V> + where + K: Borrow<Q>, + Q: Hash + Eq, + { + self.0.remove(k) + } +} + +impl<K, V, S> Deref for AHashMap<K, V, S> { + type Target = HashMap<K, V, S>; + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl<K, V, S> DerefMut for AHashMap<K, V, S> { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} + +impl<K, V, S> UnwindSafe for AHashMap<K, V, S> +where + K: UnwindSafe, + V: UnwindSafe, +{ +} + +impl<K, V, S> PartialEq for AHashMap<K, V, S> +where + K: Eq + Hash, + V: PartialEq, + S: BuildHasher, +{ + fn eq(&self, other: &AHashMap<K, V, S>) -> bool { + self.0.eq(&other.0) + } +} + +impl<K, V, S> Eq for AHashMap<K, V, S> +where + K: Eq + Hash, + V: Eq, + S: BuildHasher, +{ +} + +impl<K, Q: ?Sized, V, S> Index<&Q> for AHashMap<K, V, S> +where + K: Eq + Hash + Borrow<Q>, + Q: Eq + Hash, + S: BuildHasher, +{ + type Output = V; + + /// Returns a reference to the value corresponding to the supplied key. + /// + /// # Panics + /// + /// Panics if the key is not present in the `HashMap`. + #[inline] + fn index(&self, key: &Q) -> &V { + self.0.index(key) + } +} + +impl<K, V, S> Debug for AHashMap<K, V, S> +where + K: Debug, + V: Debug, + S: BuildHasher, +{ + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + self.0.fmt(fmt) + } +} + +impl<K, V, S> FromIterator<(K, V)> for AHashMap<K, V, S> +where + K: Eq + Hash, + S: BuildHasher + Default, +{ + fn from_iter<T: IntoIterator<Item = (K, V)>>(iter: T) -> Self { + AHashMap(HashMap::from_iter(iter)) + } +} + +impl<'a, K, V, S> IntoIterator for &'a AHashMap<K, V, S> { + type Item = (&'a K, &'a V); + type IntoIter = hash_map::Iter<'a, K, V>; + fn into_iter(self) -> Self::IntoIter { + (&self.0).iter() + } +} + +impl<'a, K, V, S> IntoIterator for &'a mut AHashMap<K, V, S> { + type Item = (&'a K, &'a mut V); + type IntoIter = hash_map::IterMut<'a, K, V>; + fn into_iter(self) -> Self::IntoIter { + (&mut self.0).iter_mut() + } +} + +impl<K, V, S> IntoIterator for AHashMap<K, V, S> { + type Item = (K, V); + type IntoIter = hash_map::IntoIter<K, V>; + fn into_iter(self) -> Self::IntoIter { + self.0.into_iter() + } +} + +impl<K, V, S> Extend<(K, V)> for AHashMap<K, V, S> +where + K: Eq + Hash, + S: BuildHasher, +{ + #[inline] + fn extend<T: IntoIterator<Item = (K, V)>>(&mut self, iter: T) { + self.0.extend(iter) + } +} + +impl<'a, K, V, S> Extend<(&'a K, &'a V)> for AHashMap<K, V, S> +where + K: Eq + Hash + Copy + 'a, + V: Copy + 'a, + S: BuildHasher, +{ + #[inline] + fn extend<T: IntoIterator<Item = (&'a K, &'a V)>>(&mut self, iter: T) { + self.0.extend(iter) + } +} + +impl<K, V> Default for AHashMap<K, V, RandomState> { + #[inline] + fn default() -> AHashMap<K, V, RandomState> { + AHashMap::new() + } +} + +#[cfg(feature = "serde")] +impl<K, V> Serialize for AHashMap<K, V> +where + K: Serialize + Eq + Hash, + V: Serialize, +{ + fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> { + self.deref().serialize(serializer) + } +} + +#[cfg(feature = "serde")] +impl<'de, K, V> Deserialize<'de> for AHashMap<K, V> +where + K: Deserialize<'de> + Eq + Hash, + V: Deserialize<'de>, +{ + fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> { + let hash_map = HashMap::deserialize(deserializer); + hash_map.map(|hash_map| Self(hash_map)) + } +} + +#[cfg(test)] +mod test { + use super::*; + #[test] + fn test_borrow() { + let mut map: AHashMap<String, String> = AHashMap::new(); + map.insert("foo".to_string(), "Bar".to_string()); + map.insert("Bar".to_string(), map.get("foo").unwrap().to_owned()); + } + + #[cfg(feature = "serde")] + #[test] + fn test_serde() { + let mut map = AHashMap::new(); + map.insert("for".to_string(), 0); + map.insert("bar".to_string(), 1); + let serialization = serde_json::to_string(&map).unwrap(); + let deserialization: AHashMap<String, u64> = serde_json::from_str(&serialization).unwrap(); + assert_eq!(deserialization, map); + } +} diff --git a/third_party/rust/ahash/src/hash_quality_test.rs b/third_party/rust/ahash/src/hash_quality_test.rs new file mode 100644 index 0000000000..4cd3156afe --- /dev/null +++ b/third_party/rust/ahash/src/hash_quality_test.rs @@ -0,0 +1,483 @@ +use core::hash::{Hash, Hasher}; +use std::collections::HashMap; + +fn assert_sufficiently_different(a: u64, b: u64, tolerance: i32) { + let (same_byte_count, same_nibble_count) = count_same_bytes_and_nibbles(a, b); + assert!(same_byte_count <= tolerance, "{:x} vs {:x}: {:}", a, b, same_byte_count); + assert!( + same_nibble_count <= tolerance * 3, + "{:x} vs {:x}: {:}", + a, + b, + same_nibble_count + ); + let flipped_bits = (a ^ b).count_ones(); + assert!( + flipped_bits > 12 && flipped_bits < 52, + "{:x} and {:x}: {:}", + a, + b, + flipped_bits + ); + for rotate in 0..64 { + let flipped_bits2 = (a ^ (b.rotate_left(rotate))).count_ones(); + assert!( + flipped_bits2 > 10 && flipped_bits2 < 54, + "{:x} and {:x}: {:}", + a, + b.rotate_left(rotate), + flipped_bits2 + ); + } +} + +fn count_same_bytes_and_nibbles(a: u64, b: u64) -> (i32, i32) { + let mut same_byte_count = 0; + let mut same_nibble_count = 0; + for byte in 0..8 { + let ba = (a >> (8 * byte)) as u8; + let bb = (b >> (8 * byte)) as u8; + if ba == bb { + same_byte_count += 1; + } + if ba & 0xF0u8 == bb & 0xF0u8 { + same_nibble_count += 1; + } + if ba & 0x0Fu8 == bb & 0x0Fu8 { + same_nibble_count += 1; + } + } + (same_byte_count, same_nibble_count) +} + +fn gen_combinations(options: &[u32; 8], depth: u32, so_far: Vec<u32>, combinations: &mut Vec<Vec<u32>>) { + if depth == 0 { + return; + } + for option in options { + let mut next = so_far.clone(); + next.push(*option); + combinations.push(next.clone()); + gen_combinations(options, depth - 1, next, combinations); + } +} + +fn test_no_full_collisions<T: Hasher>(gen_hash: impl Fn() -> T) { + let options: [u32; 8] = [ + 0x00000000, 0x20000000, 0x40000000, 0x60000000, 0x80000000, 0xA0000000, 0xC0000000, 0xE0000000, + ]; + let mut combinations = Vec::new(); + gen_combinations(&options, 7, Vec::new(), &mut combinations); + let mut map: HashMap<u64, Vec<u8>> = HashMap::new(); + for combination in combinations { + let array = unsafe { + let (begin, middle, end) = combination.align_to::<u8>(); + assert_eq!(0, begin.len()); + assert_eq!(0, end.len()); + middle.to_vec() + }; + let mut hasher = gen_hash(); + hasher.write(&array); + let hash = hasher.finish(); + if let Some(value) = map.get(&hash) { + assert_eq!( + value, &array, + "Found a collision between {:x?} and {:x?}. Hash: {:x?}", + value, &array, &hash + ); + } else { + map.insert(hash, array); + } + } + assert_eq!(2396744, map.len()); +} + +fn test_keys_change_output<T: Hasher>(constructor: impl Fn(u128, u128) -> T) { + let mut a = constructor(1, 1); + let mut b = constructor(1, 2); + let mut c = constructor(2, 1); + let mut d = constructor(2, 2); + "test".hash(&mut a); + "test".hash(&mut b); + "test".hash(&mut c); + "test".hash(&mut d); + assert_sufficiently_different(a.finish(), b.finish(), 1); + assert_sufficiently_different(a.finish(), c.finish(), 1); + assert_sufficiently_different(a.finish(), d.finish(), 1); + assert_sufficiently_different(b.finish(), c.finish(), 1); + assert_sufficiently_different(b.finish(), d.finish(), 1); + assert_sufficiently_different(c.finish(), d.finish(), 1); +} + +fn test_input_affect_every_byte<T: Hasher>(constructor: impl Fn(u128, u128) -> T) { + let base = hash_with(&0, constructor(0, 0)); + for shift in 0..16 { + let mut alternitives = vec![]; + for v in 0..256 { + let input = (v as u128) << (shift * 8); + let hasher = constructor(0, 0); + alternitives.push(hash_with(&input, hasher)); + } + assert_each_byte_differs(shift, base, alternitives); + } +} + +///Ensures that for every bit in the output there is some value for each byte in the key that flips it. +fn test_keys_affect_every_byte<H: Hash, T: Hasher>(item: H, constructor: impl Fn(u128, u128) -> T) { + let base = hash_with(&item, constructor(0, 0)); + for shift in 0..16 { + let mut alternitives1 = vec![]; + let mut alternitives2 = vec![]; + for v in 0..256 { + let input = (v as u128) << (shift * 8); + let hasher1 = constructor(input, 0); + let hasher2 = constructor(0, input); + let h1 = hash_with(&item, hasher1); + let h2 = hash_with(&item, hasher2); + alternitives1.push(h1); + alternitives2.push(h2); + } + assert_each_byte_differs(shift, base, alternitives1); + assert_each_byte_differs(shift, base, alternitives2); + } +} + +fn assert_each_byte_differs(num: u64, base: u64, alternitives: Vec<u64>) { + let mut changed_bits = 0_u64; + for alternitive in alternitives { + changed_bits |= base ^ alternitive + } + assert_eq!(core::u64::MAX, changed_bits, "Bits changed: {:x} on num: {:?}", changed_bits, num); +} + +fn test_finish_is_consistent<T: Hasher>(constructor: impl Fn(u128, u128) -> T) { + let mut hasher = constructor(1, 2); + "Foo".hash(&mut hasher); + let a = hasher.finish(); + let b = hasher.finish(); + assert_eq!(a, b); +} + +fn test_single_key_bit_flip<T: Hasher>(constructor: impl Fn(u128, u128) -> T) { + for bit in 0..128 { + let mut a = constructor(0, 0); + let mut b = constructor(0, 1 << bit); + let mut c = constructor(1 << bit, 0); + "1234".hash(&mut a); + "1234".hash(&mut b); + "1234".hash(&mut c); + assert_sufficiently_different(a.finish(), b.finish(), 2); + assert_sufficiently_different(a.finish(), c.finish(), 2); + assert_sufficiently_different(b.finish(), c.finish(), 2); + let mut a = constructor(0, 0); + let mut b = constructor(0, 1 << bit); + let mut c = constructor(1 << bit, 0); + "12345678".hash(&mut a); + "12345678".hash(&mut b); + "12345678".hash(&mut c); + assert_sufficiently_different(a.finish(), b.finish(), 2); + assert_sufficiently_different(a.finish(), c.finish(), 2); + assert_sufficiently_different(b.finish(), c.finish(), 2); + let mut a = constructor(0, 0); + let mut b = constructor(0, 1 << bit); + let mut c = constructor(1 << bit, 0); + "1234567812345678".hash(&mut a); + "1234567812345678".hash(&mut b); + "1234567812345678".hash(&mut c); + assert_sufficiently_different(a.finish(), b.finish(), 2); + assert_sufficiently_different(a.finish(), c.finish(), 2); + assert_sufficiently_different(b.finish(), c.finish(), 2); + } +} + +fn test_all_bytes_matter<T: Hasher>(hasher: impl Fn() -> T) { + let mut item = vec![0; 256]; + let base_hash = hash(&item, &hasher); + for pos in 0..256 { + item[pos] = 255; + let hash = hash(&item, &hasher); + assert_ne!(base_hash, hash, "Position {} did not affect output", pos); + item[pos] = 0; + } +} + +fn test_no_pair_collisions<T: Hasher>(hasher: impl Fn() -> T) { + let base = [0_u64, 0_u64]; + let base_hash = hash(&base, &hasher); + for bitpos1 in 0..64 { + let a = 1_u64 << bitpos1; + for bitpos2 in 0..bitpos1 { + let b = 1_u64 << bitpos2; + let aa = hash(&[a, a], &hasher); + let ab = hash(&[a, b], &hasher); + let ba = hash(&[b, a], &hasher); + let bb = hash(&[b, b], &hasher); + assert_sufficiently_different(base_hash, aa, 3); + assert_sufficiently_different(base_hash, ab, 3); + assert_sufficiently_different(base_hash, ba, 3); + assert_sufficiently_different(base_hash, bb, 3); + assert_sufficiently_different(aa, ab, 3); + assert_sufficiently_different(ab, ba, 3); + assert_sufficiently_different(ba, bb, 3); + assert_sufficiently_different(aa, ba, 3); + assert_sufficiently_different(ab, bb, 3); + assert_sufficiently_different(aa, bb, 3); + } + } +} + +fn hash<H: Hash, T: Hasher>(b: &H, hash_builder: &dyn Fn() -> T) -> u64 { + let mut hasher = hash_builder(); + b.hash(&mut hasher); + hasher.finish() +} + +fn hash_with<H: Hash, T: Hasher>(b: &H, mut hasher: T) -> u64 { + b.hash(&mut hasher); + hasher.finish() +} + +fn test_single_bit_flip<T: Hasher>(hasher: impl Fn() -> T) { + let size = 32; + let compare_value = hash(&0u32, &hasher); + for pos in 0..size { + let test_value = hash(&(1u32 << pos), &hasher); + assert_sufficiently_different(compare_value, test_value, 2); + } + let size = 64; + let compare_value = hash(&0u64, &hasher); + for pos in 0..size { + let test_value = hash(&(1u64 << pos), &hasher); + assert_sufficiently_different(compare_value, test_value, 2); + } + let size = 128; + let compare_value = hash(&0u128, &hasher); + for pos in 0..size { + let test_value = hash(&(1u128 << pos), &hasher); + dbg!(compare_value, test_value); + assert_sufficiently_different(compare_value, test_value, 2); + } +} + +fn test_padding_doesnot_collide<T: Hasher>(hasher: impl Fn() -> T) { + for c in 0..128u8 { + for string in ["", "\0", "\x01", "1234", "12345678", "1234567812345678"].iter() { + let mut short = hasher(); + string.hash(&mut short); + let value = short.finish(); + let mut padded = string.to_string(); + for num in 1..=128 { + let mut long = hasher(); + padded.push(c as char); + padded.hash(&mut long); + let (same_bytes, same_nibbles) = count_same_bytes_and_nibbles(value, long.finish()); + assert!( + same_bytes <= 3, + "{} bytes of {} -> {:x} vs {:x}", num, c, value, long.finish() + ); + assert!( + same_nibbles <= 8, + "{} bytes of {} -> {:x} vs {:x}", num, c, value, long.finish() + ); + let flipped_bits = (value ^ long.finish()).count_ones(); + assert!(flipped_bits > 10); + } + if string.len() > 0 { + let mut padded = string[1..].to_string(); + padded.push(c as char); + for num in 2..=128 { + let mut long = hasher(); + padded.push(c as char); + padded.hash(&mut long); + let (same_bytes, same_nibbles) = count_same_bytes_and_nibbles(value, long.finish()); + assert!( + same_bytes <= 3, + "string {:?} + {} bytes of {} -> {:x} vs {:x}", + string, + num, + c, + value, + long.finish() + ); + assert!( + same_nibbles <= 8, + "string {:?} + {} bytes of {} -> {:x} vs {:x}", + string, + num, + c, + value, + long.finish() + ); + let flipped_bits = (value ^ long.finish()).count_ones(); + assert!(flipped_bits > 10); + } + } + } + } +} + +fn test_length_extension<T: Hasher>(hasher: impl Fn(u128, u128) -> T) { + for key in 0..256 { + let h1 = hasher(key, key); + let v1 = hash_with(&[0_u8, 0, 0, 0, 0, 0, 0, 0], h1); + let h2 = hasher(key, key); + let v2 = hash_with(&[1_u8, 0, 0, 0, 0, 0, 0, 0, 0], h2); + assert_ne!(v1, v2); + } +} + +#[cfg(test)] +mod fallback_tests { + use crate::fallback_hash::*; + use crate::hash_quality_test::*; + + #[test] + fn fallback_single_bit_flip() { + test_single_bit_flip(|| AHasher::new_with_keys(0, 0)) + } + + #[test] + fn fallback_single_key_bit_flip() { + test_single_key_bit_flip(AHasher::new_with_keys) + } + + #[test] + fn fallback_all_bytes_matter() { + test_all_bytes_matter(|| AHasher::new_with_keys(0, 0)); + } + + #[test] + fn fallback_test_no_pair_collisions() { + test_no_pair_collisions(|| AHasher::new_with_keys(0, 0)); + } + + #[test] + fn fallback_test_no_full_collisions() { + test_no_full_collisions(|| AHasher::new_with_keys(0, 0)); + } + + #[test] + fn fallback_keys_change_output() { + test_keys_change_output(AHasher::new_with_keys); + } + + #[test] + fn fallback_input_affect_every_byte() { + test_input_affect_every_byte(AHasher::new_with_keys); + } + + #[test] + fn fallback_keys_affect_every_byte() { + //For fallback second key is not used in every hash. + #[cfg(all(not(feature = "specialize"), feature = "folded_multiply"))] + test_keys_affect_every_byte(0, |a, b| AHasher::new_with_keys(a ^ b, a)); + test_keys_affect_every_byte("", |a, b| AHasher::new_with_keys(a ^ b, a)); + test_keys_affect_every_byte((0, 0), |a, b| AHasher::new_with_keys(a ^ b, a)); + } + + #[test] + fn fallback_finish_is_consistant() { + test_finish_is_consistent(AHasher::test_with_keys) + } + + #[test] + fn fallback_padding_doesnot_collide() { + test_padding_doesnot_collide(|| AHasher::new_with_keys(0, 0)); + test_padding_doesnot_collide(|| AHasher::new_with_keys(0, 2)); + test_padding_doesnot_collide(|| AHasher::new_with_keys(2, 0)); + test_padding_doesnot_collide(|| AHasher::new_with_keys(2, 2)); + } + + #[test] + fn fallback_length_extension() { + test_length_extension(|a, b| AHasher::new_with_keys(a, b)); + } +} + +///Basic sanity tests of the cypto properties of aHash. +#[cfg(any( + all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "aes", not(miri)), + all(any(target_arch = "arm", target_arch = "aarch64"), target_feature = "crypto", not(miri), feature = "stdsimd") +))] +#[cfg(test)] +mod aes_tests { + use crate::aes_hash::*; + use crate::hash_quality_test::*; + use std::hash::{Hash, Hasher}; + + //This encrypts to 0. + const BAD_KEY2: u128 = 0x6363_6363_6363_6363_6363_6363_6363_6363; + //This decrypts to 0. + const BAD_KEY: u128 = 0x5252_5252_5252_5252_5252_5252_5252_5252; + + #[test] + fn test_single_bit_in_byte() { + let mut hasher1 = AHasher::test_with_keys(0, 0); + 8_u32.hash(&mut hasher1); + let mut hasher2 = AHasher::test_with_keys(0, 0); + 0_u32.hash(&mut hasher2); + assert_sufficiently_different(hasher1.finish(), hasher2.finish(), 1); + } + + #[test] + fn aes_single_bit_flip() { + test_single_bit_flip(|| AHasher::test_with_keys(BAD_KEY, BAD_KEY)); + test_single_bit_flip(|| AHasher::test_with_keys(BAD_KEY2, BAD_KEY2)); + } + + #[test] + fn aes_single_key_bit_flip() { + test_single_key_bit_flip(AHasher::test_with_keys) + } + + #[test] + fn aes_all_bytes_matter() { + test_all_bytes_matter(|| AHasher::test_with_keys(BAD_KEY, BAD_KEY)); + test_all_bytes_matter(|| AHasher::test_with_keys(BAD_KEY2, BAD_KEY2)); + } + + #[test] + fn aes_test_no_pair_collisions() { + test_no_pair_collisions(|| AHasher::test_with_keys(BAD_KEY, BAD_KEY)); + test_no_pair_collisions(|| AHasher::test_with_keys(BAD_KEY2, BAD_KEY2)); + } + + #[test] + fn ase_test_no_full_collisions() { + test_no_full_collisions(|| AHasher::test_with_keys(12345, 67890)); + } + + #[test] + fn aes_keys_change_output() { + test_keys_change_output(AHasher::test_with_keys); + } + + #[test] + fn aes_input_affect_every_byte() { + test_input_affect_every_byte(AHasher::test_with_keys); + } + + #[test] + fn aes_keys_affect_every_byte() { + #[cfg(not(feature = "specialize"))] + test_keys_affect_every_byte(0, AHasher::test_with_keys); + test_keys_affect_every_byte("", AHasher::test_with_keys); + test_keys_affect_every_byte((0, 0), AHasher::test_with_keys); + } + + #[test] + fn aes_finish_is_consistant() { + test_finish_is_consistent(AHasher::test_with_keys) + } + + #[test] + fn aes_padding_doesnot_collide() { + test_padding_doesnot_collide(|| AHasher::test_with_keys(BAD_KEY, BAD_KEY)); + test_padding_doesnot_collide(|| AHasher::test_with_keys(BAD_KEY2, BAD_KEY2)); + } + + #[test] + fn aes_length_extension() { + test_length_extension(|a, b| AHasher::test_with_keys(a, b)); + } +} diff --git a/third_party/rust/ahash/src/hash_set.rs b/third_party/rust/ahash/src/hash_set.rs new file mode 100644 index 0000000000..9766b676ff --- /dev/null +++ b/third_party/rust/ahash/src/hash_set.rs @@ -0,0 +1,313 @@ +use crate::RandomState; +use std::collections::{hash_set, HashSet}; +use std::fmt::{self, Debug}; +use std::hash::{BuildHasher, Hash}; +use std::iter::FromIterator; +use std::ops::{BitAnd, BitOr, BitXor, Deref, DerefMut, Sub}; + +#[cfg(feature = "serde")] +use serde::{ + de::{Deserialize, Deserializer}, + ser::{Serialize, Serializer}, +}; + +/// A [`HashSet`](std::collections::HashSet) using [`RandomState`](crate::RandomState) to hash the items. +/// (Requires the `std` feature to be enabled.) +#[derive(Clone)] +pub struct AHashSet<T, S = crate::RandomState>(HashSet<T, S>); + +impl<T> From<HashSet<T, crate::RandomState>> for AHashSet<T> { + fn from(item: HashSet<T, crate::RandomState>) -> Self { + AHashSet(item) + } +} + +impl<T> Into<HashSet<T, crate::RandomState>> for AHashSet<T> { + fn into(self) -> HashSet<T, crate::RandomState> { + self.0 + } +} + +impl<T> AHashSet<T, RandomState> { + pub fn new() -> Self { + AHashSet(HashSet::with_hasher(RandomState::default())) + } + + pub fn with_capacity(capacity: usize) -> Self { + AHashSet(HashSet::with_capacity_and_hasher(capacity, RandomState::default())) + } +} + +impl<T, S> AHashSet<T, S> +where + S: BuildHasher, +{ + pub fn with_hasher(hash_builder: S) -> Self { + AHashSet(HashSet::with_hasher(hash_builder)) + } + + pub fn with_capacity_and_hasher(capacity: usize, hash_builder: S) -> Self { + AHashSet(HashSet::with_capacity_and_hasher(capacity, hash_builder)) + } +} + +impl<T, S> Deref for AHashSet<T, S> { + type Target = HashSet<T, S>; + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl<T, S> DerefMut for AHashSet<T, S> { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} + +impl<T, S> PartialEq for AHashSet<T, S> +where + T: Eq + Hash, + S: BuildHasher, +{ + fn eq(&self, other: &AHashSet<T, S>) -> bool { + self.0.eq(&other.0) + } +} + +impl<T, S> Eq for AHashSet<T, S> +where + T: Eq + Hash, + S: BuildHasher, +{ +} + +impl<T, S> BitOr<&AHashSet<T, S>> for &AHashSet<T, S> +where + T: Eq + Hash + Clone, + S: BuildHasher + Default, +{ + type Output = AHashSet<T, S>; + + /// Returns the union of `self` and `rhs` as a new `AHashSet<T, S>`. + /// + /// # Examples + /// + /// ``` + /// use ahash::AHashSet; + /// + /// let a: AHashSet<_> = vec![1, 2, 3].into_iter().collect(); + /// let b: AHashSet<_> = vec![3, 4, 5].into_iter().collect(); + /// + /// let set = &a | &b; + /// + /// let mut i = 0; + /// let expected = [1, 2, 3, 4, 5]; + /// for x in &set { + /// assert!(expected.contains(x)); + /// i += 1; + /// } + /// assert_eq!(i, expected.len()); + /// ``` + fn bitor(self, rhs: &AHashSet<T, S>) -> AHashSet<T, S> { + AHashSet(self.0.bitor(&rhs.0)) + } +} + +impl<T, S> BitAnd<&AHashSet<T, S>> for &AHashSet<T, S> +where + T: Eq + Hash + Clone, + S: BuildHasher + Default, +{ + type Output = AHashSet<T, S>; + + /// Returns the intersection of `self` and `rhs` as a new `AHashSet<T, S>`. + /// + /// # Examples + /// + /// ``` + /// use ahash::AHashSet; + /// + /// let a: AHashSet<_> = vec![1, 2, 3].into_iter().collect(); + /// let b: AHashSet<_> = vec![2, 3, 4].into_iter().collect(); + /// + /// let set = &a & &b; + /// + /// let mut i = 0; + /// let expected = [2, 3]; + /// for x in &set { + /// assert!(expected.contains(x)); + /// i += 1; + /// } + /// assert_eq!(i, expected.len()); + /// ``` + fn bitand(self, rhs: &AHashSet<T, S>) -> AHashSet<T, S> { + AHashSet(self.0.bitand(&rhs.0)) + } +} + +impl<T, S> BitXor<&AHashSet<T, S>> for &AHashSet<T, S> +where + T: Eq + Hash + Clone, + S: BuildHasher + Default, +{ + type Output = AHashSet<T, S>; + + /// Returns the symmetric difference of `self` and `rhs` as a new `AHashSet<T, S>`. + /// + /// # Examples + /// + /// ``` + /// use ahash::AHashSet; + /// + /// let a: AHashSet<_> = vec![1, 2, 3].into_iter().collect(); + /// let b: AHashSet<_> = vec![3, 4, 5].into_iter().collect(); + /// + /// let set = &a ^ &b; + /// + /// let mut i = 0; + /// let expected = [1, 2, 4, 5]; + /// for x in &set { + /// assert!(expected.contains(x)); + /// i += 1; + /// } + /// assert_eq!(i, expected.len()); + /// ``` + fn bitxor(self, rhs: &AHashSet<T, S>) -> AHashSet<T, S> { + AHashSet(self.0.bitxor(&rhs.0)) + } +} + +impl<T, S> Sub<&AHashSet<T, S>> for &AHashSet<T, S> +where + T: Eq + Hash + Clone, + S: BuildHasher + Default, +{ + type Output = AHashSet<T, S>; + + /// Returns the difference of `self` and `rhs` as a new `AHashSet<T, S>`. + /// + /// # Examples + /// + /// ``` + /// use ahash::AHashSet; + /// + /// let a: AHashSet<_> = vec![1, 2, 3].into_iter().collect(); + /// let b: AHashSet<_> = vec![3, 4, 5].into_iter().collect(); + /// + /// let set = &a - &b; + /// + /// let mut i = 0; + /// let expected = [1, 2]; + /// for x in &set { + /// assert!(expected.contains(x)); + /// i += 1; + /// } + /// assert_eq!(i, expected.len()); + /// ``` + fn sub(self, rhs: &AHashSet<T, S>) -> AHashSet<T, S> { + AHashSet(self.0.sub(&rhs.0)) + } +} + +impl<T, S> Debug for AHashSet<T, S> +where + T: Debug, + S: BuildHasher, +{ + fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { + self.0.fmt(fmt) + } +} + +impl<T, S> FromIterator<T> for AHashSet<T, S> +where + T: Eq + Hash, + S: BuildHasher + Default, +{ + #[inline] + fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> AHashSet<T, S> { + AHashSet(HashSet::from_iter(iter)) + } +} + +impl<'a, T, S> IntoIterator for &'a AHashSet<T, S> { + type Item = &'a T; + type IntoIter = hash_set::Iter<'a, T>; + fn into_iter(self) -> Self::IntoIter { + (&self.0).iter() + } +} + +impl<T, S> IntoIterator for AHashSet<T, S> { + type Item = T; + type IntoIter = hash_set::IntoIter<T>; + fn into_iter(self) -> Self::IntoIter { + self.0.into_iter() + } +} + +impl<T, S> Extend<T> for AHashSet<T, S> +where + T: Eq + Hash, + S: BuildHasher, +{ + #[inline] + fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) { + self.0.extend(iter) + } +} + +impl<'a, T, S> Extend<&'a T> for AHashSet<T, S> +where + T: 'a + Eq + Hash + Copy, + S: BuildHasher, +{ + #[inline] + fn extend<I: IntoIterator<Item = &'a T>>(&mut self, iter: I) { + self.0.extend(iter) + } +} + +impl<T> Default for AHashSet<T, RandomState> { + /// Creates an empty `AHashSet<T, S>` with the `Default` value for the hasher. + #[inline] + fn default() -> AHashSet<T, RandomState> { + AHashSet(HashSet::default()) + } +} + +#[cfg(feature = "serde")] +impl<T> Serialize for AHashSet<T> +where + T: Serialize + Eq + Hash, +{ + fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> { + self.deref().serialize(serializer) + } +} + +#[cfg(feature = "serde")] +impl<'de, T> Deserialize<'de> for AHashSet<T> +where + T: Deserialize<'de> + Eq + Hash, +{ + fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> { + let hash_set = HashSet::deserialize(deserializer); + hash_set.map(|hash_set| Self(hash_set)) + } +} + +#[cfg(all(test, feature = "serde"))] +mod test { + use super::*; + + #[test] + fn test_serde() { + let mut set = AHashSet::new(); + set.insert("for".to_string()); + set.insert("bar".to_string()); + let serialization = serde_json::to_string(&set).unwrap(); + let deserialization: AHashSet<String> = serde_json::from_str(&serialization).unwrap(); + assert_eq!(deserialization, set); + } +} diff --git a/third_party/rust/ahash/src/lib.rs b/third_party/rust/ahash/src/lib.rs new file mode 100644 index 0000000000..9964a7c47b --- /dev/null +++ b/third_party/rust/ahash/src/lib.rs @@ -0,0 +1,263 @@ +//! AHash is a hashing algorithm is intended to be a high performance, (hardware specific), keyed hash function. +//! This can be seen as a DOS resistant alternative to `FxHash`, or a fast equivalent to `SipHash`. +//! It provides a high speed hash algorithm, but where the result is not predictable without knowing a Key. +//! This allows it to be used in a `HashMap` without allowing for the possibility that an malicious user can +//! induce a collision. +//! +//! # How aHash works +//! +//! aHash uses the hardware AES instruction on x86 processors to provide a keyed hash function. +//! aHash is not a cryptographically secure hash. +//! +//! # Example +//! ``` +//! use ahash::{AHasher, RandomState}; +//! use std::collections::HashMap; +//! +//! let mut map: HashMap<i32, i32, RandomState> = HashMap::default(); +//! map.insert(12, 34); +//! ``` +//! For convinence wrappers called `AHashMap` and `AHashSet` are also provided. +//! These to the same thing with slightly less typing. +//! ```ignore +//! use ahash::AHashMap; +//! +//! let mut map: AHashMap<i32, i32> = AHashMap::with_capacity(4); +//! map.insert(12, 34); +//! map.insert(56, 78); +//! ``` +#![deny(clippy::correctness, clippy::complexity, clippy::perf)] +#![allow(clippy::pedantic, clippy::cast_lossless, clippy::unreadable_literal)] +#![cfg_attr(all(not(test), not(feature = "std")), no_std)] +#![cfg_attr(feature = "specialize", feature(min_specialization))] +#![cfg_attr(feature = "stdsimd", feature(stdsimd))] + +#[macro_use] +mod convert; + +#[cfg(any( + all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "aes", not(miri)), + all(any(target_arch = "arm", target_arch = "aarch64"), target_feature = "crypto", not(miri), feature = "stdsimd") +))] +mod aes_hash; +mod fallback_hash; +#[cfg(test)] +mod hash_quality_test; + +#[cfg(feature = "std")] +mod hash_map; +#[cfg(feature = "std")] +mod hash_set; +mod operations; +mod random_state; +mod specialize; + +#[cfg(any( + all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "aes", not(miri)), + all(any(target_arch = "arm", target_arch = "aarch64"), target_feature = "crypto", not(miri), feature = "stdsimd") +))] +pub use crate::aes_hash::AHasher; + +#[cfg(not(any( + all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "aes", not(miri)), + all(any(target_arch = "arm", target_arch = "aarch64"), target_feature = "crypto", not(miri), feature = "stdsimd") +)))] +pub use crate::fallback_hash::AHasher; +pub use crate::random_state::RandomState; + +pub use crate::specialize::CallHasher; + +#[cfg(feature = "std")] +pub use crate::hash_map::AHashMap; +#[cfg(feature = "std")] +pub use crate::hash_set::AHashSet; +use core::hash::BuildHasher; +use core::hash::Hash; +use core::hash::Hasher; + +/// Provides a default [Hasher] with fixed keys. +/// This is typically used in conjunction with [BuildHasherDefault] to create +/// [AHasher]s in order to hash the keys of the map. +/// +/// Generally it is preferable to use [RandomState] instead, so that different +/// hashmaps will have different keys. However if fixed keys are desireable this +/// may be used instead. +/// +/// # Example +/// ``` +/// use std::hash::BuildHasherDefault; +/// use ahash::{AHasher, RandomState}; +/// use std::collections::HashMap; +/// +/// let mut map: HashMap<i32, i32, BuildHasherDefault<AHasher>> = HashMap::default(); +/// map.insert(12, 34); +/// ``` +/// +/// [BuildHasherDefault]: std::hash::BuildHasherDefault +/// [Hasher]: std::hash::Hasher +/// [HashMap]: std::collections::HashMap +impl Default for AHasher { + /// Constructs a new [AHasher] with fixed keys. + /// If `std` is enabled these will be generated upon first invocation. + /// Otherwise if the `compile-time-rng`feature is enabled these will be generated at compile time. + /// If neither of these features are available, hardcoded constants will be used. + /// + /// Because the values are fixed, different hashers will all hash elements the same way. + /// This could make hash values predictable, if DOS attacks are a concern. If this behaviour is + /// not required, it may be preferable to use [RandomState] instead. + /// + /// # Examples + /// + /// ``` + /// use ahash::AHasher; + /// use std::hash::Hasher; + /// + /// let mut hasher_1 = AHasher::default(); + /// let mut hasher_2 = AHasher::default(); + /// + /// hasher_1.write_u32(1234); + /// hasher_2.write_u32(1234); + /// + /// assert_eq!(hasher_1.finish(), hasher_2.finish()); + /// ``` + #[inline] + fn default() -> AHasher { + RandomState::with_fixed_keys().build_hasher() + } +} + +/// Used for specialization. (Sealed) +pub(crate) trait BuildHasherExt: BuildHasher { + #[doc(hidden)] + fn hash_as_u64<T: Hash + ?Sized>(&self, value: &T) -> u64; + + #[doc(hidden)] + fn hash_as_fixed_length<T: Hash + ?Sized>(&self, value: &T) -> u64; + + #[doc(hidden)] + fn hash_as_str<T: Hash + ?Sized>(&self, value: &T) -> u64; +} + +impl<B: BuildHasher> BuildHasherExt for B { + #[inline] + #[cfg(feature = "specialize")] + default fn hash_as_u64<T: Hash + ?Sized>(&self, value: &T) -> u64 { + let mut hasher = self.build_hasher(); + value.hash(&mut hasher); + hasher.finish() + } + #[inline] + #[cfg(not(feature = "specialize"))] + fn hash_as_u64<T: Hash + ?Sized>(&self, value: &T) -> u64 { + let mut hasher = self.build_hasher(); + value.hash(&mut hasher); + hasher.finish() + } + #[inline] + #[cfg(feature = "specialize")] + default fn hash_as_fixed_length<T: Hash + ?Sized>(&self, value: &T) -> u64 { + let mut hasher = self.build_hasher(); + value.hash(&mut hasher); + hasher.finish() + } + #[inline] + #[cfg(not(feature = "specialize"))] + fn hash_as_fixed_length<T: Hash + ?Sized>(&self, value: &T) -> u64 { + let mut hasher = self.build_hasher(); + value.hash(&mut hasher); + hasher.finish() + } + #[inline] + #[cfg(feature = "specialize")] + default fn hash_as_str<T: Hash + ?Sized>(&self, value: &T) -> u64 { + let mut hasher = self.build_hasher(); + value.hash(&mut hasher); + hasher.finish() + } + #[inline] + #[cfg(not(feature = "specialize"))] + fn hash_as_str<T: Hash + ?Sized>(&self, value: &T) -> u64 { + let mut hasher = self.build_hasher(); + value.hash(&mut hasher); + hasher.finish() + } +} + +// #[inline(never)] +// #[doc(hidden)] +// pub fn hash_test(input: &[u8]) -> u64 { +// let a = RandomState::with_seeds(11, 22, 33, 44); +// <[u8]>::get_hash(input, &a) +// } + +#[cfg(feature = "std")] +#[cfg(test)] +mod test { + use crate::convert::Convert; + use crate::*; + use std::collections::HashMap; + use std::hash::Hash; + + #[test] + fn test_default_builder() { + use core::hash::BuildHasherDefault; + + let mut map = HashMap::<u32, u64, BuildHasherDefault<AHasher>>::default(); + map.insert(1, 3); + } + + #[test] + fn test_builder() { + let mut map = HashMap::<u32, u64, RandomState>::default(); + map.insert(1, 3); + } + + #[test] + fn test_conversion() { + let input: &[u8] = b"dddddddd"; + let bytes: u64 = as_array!(input, 8).convert(); + assert_eq!(bytes, 0x6464646464646464); + } + + + #[test] + fn test_non_zero() { + let mut hasher1 = AHasher::new_with_keys(0, 0); + let mut hasher2 = AHasher::new_with_keys(0, 0); + "foo".hash(&mut hasher1); + "bar".hash(&mut hasher2); + assert_ne!(hasher1.finish(), 0); + assert_ne!(hasher2.finish(), 0); + assert_ne!(hasher1.finish(), hasher2.finish()); + + let mut hasher1 = AHasher::new_with_keys(0, 0); + let mut hasher2 = AHasher::new_with_keys(0, 0); + 3_u64.hash(&mut hasher1); + 4_u64.hash(&mut hasher2); + assert_ne!(hasher1.finish(), 0); + assert_ne!(hasher2.finish(), 0); + assert_ne!(hasher1.finish(), hasher2.finish()); + } + + #[test] + fn test_non_zero_specialized() { + let hasher_build = RandomState::with_seeds(0,0,0,0); + + let h1 = str::get_hash("foo", &hasher_build); + let h2 = str::get_hash("bar", &hasher_build); + assert_ne!(h1, 0); + assert_ne!(h2, 0); + assert_ne!(h1, h2); + + let h1 = u64::get_hash(&3_u64, &hasher_build); + let h2 = u64::get_hash(&4_u64, &hasher_build); + assert_ne!(h1, 0); + assert_ne!(h2, 0); + assert_ne!(h1, h2); + } + + #[test] + fn test_ahasher_construction() { + let _ = AHasher::new_with_keys(1234, 5678); + } +} diff --git a/third_party/rust/ahash/src/operations.rs b/third_party/rust/ahash/src/operations.rs new file mode 100644 index 0000000000..b71fd5a743 --- /dev/null +++ b/third_party/rust/ahash/src/operations.rs @@ -0,0 +1,330 @@ +use crate::convert::*; + +/// This is a constant with a lot of special properties found by automated search. +/// See the unit tests below. (Below are alternative values) +#[cfg(all(target_feature = "ssse3", not(miri)))] +const SHUFFLE_MASK: u128 = 0x020a0700_0c01030e_050f0d08_06090b04_u128; +//const SHUFFLE_MASK: u128 = 0x000d0702_0a040301_05080f0c_0e0b0609_u128; +//const SHUFFLE_MASK: u128 = 0x040A0700_030E0106_0D050F08_020B0C09_u128; + +#[inline(always)] +pub(crate) const fn folded_multiply(s: u64, by: u64) -> u64 { + let result = (s as u128).wrapping_mul(by as u128); + ((result & 0xffff_ffff_ffff_ffff) as u64) ^ ((result >> 64) as u64) +} + + +/// Given a small (less than 8 byte slice) returns the same data stored in two u32s. +/// (order of and non-duplication of bytes is NOT guaranteed) +#[inline(always)] +pub(crate) fn read_small(data: &[u8]) -> [u64; 2] { + debug_assert!(data.len() <= 8); + if data.len() >= 2 { + if data.len() >= 4 { + //len 4-8 + [data.read_u32().0 as u64, data.read_last_u32() as u64] + } else { + //len 2-3 + [data.read_u16().0 as u64, data[data.len() - 1] as u64] + } + } else { + if data.len() > 0 { + [data[0] as u64, data[0] as u64] + } else { + [0, 0] + } + } +} + +#[inline(always)] +pub(crate) fn shuffle(a: u128) -> u128 { + #[cfg(all(target_feature = "ssse3", not(miri)))] + { + #[cfg(target_arch = "x86")] + use core::arch::x86::*; + #[cfg(target_arch = "x86_64")] + use core::arch::x86_64::*; + use core::mem::transmute; + unsafe { transmute(_mm_shuffle_epi8(transmute(a), transmute(SHUFFLE_MASK))) } + } + #[cfg(not(all(target_feature = "ssse3", not(miri))))] + { + a.swap_bytes() + } +} + +#[allow(unused)] //not used by fallback +#[inline(always)] +pub(crate) fn add_and_shuffle(a: u128, b: u128) -> u128 { + let sum = add_by_64s(a.convert(), b.convert()); + shuffle(sum.convert()) +} + +#[allow(unused)] //not used by fallbac +#[inline(always)] +pub(crate) fn shuffle_and_add(base: u128, to_add: u128) -> u128 { + let shuffled: [u64; 2] = shuffle(base).convert(); + add_by_64s(shuffled, to_add.convert()).convert() +} + +#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "sse2", not(miri)))] +#[inline(always)] +pub(crate) fn add_by_64s(a: [u64; 2], b: [u64; 2]) -> [u64; 2] { + use core::mem::transmute; + unsafe { + #[cfg(target_arch = "x86")] + use core::arch::x86::*; + #[cfg(target_arch = "x86_64")] + use core::arch::x86_64::*; + transmute(_mm_add_epi64(transmute(a), transmute(b))) + } +} + +#[cfg(not(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "sse2", not(miri))))] +#[inline(always)] +pub(crate) fn add_by_64s(a: [u64; 2], b: [u64; 2]) -> [u64; 2] { + [a[0].wrapping_add(b[0]), a[1].wrapping_add(b[1])] +} + +#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "aes", not(miri)))] +#[allow(unused)] +#[inline(always)] +pub(crate) fn aesenc(value: u128, xor: u128) -> u128 { + #[cfg(target_arch = "x86")] + use core::arch::x86::*; + #[cfg(target_arch = "x86_64")] + use core::arch::x86_64::*; + use core::mem::transmute; + unsafe { + let value = transmute(value); + transmute(_mm_aesenc_si128(value, transmute(xor))) + } +} + +#[cfg(all(any(target_arch = "arm", target_arch = "aarch64"), target_feature = "crypto", not(miri), feature = "stdsimd"))] +#[allow(unused)] +#[inline(always)] +pub(crate) fn aesenc(value: u128, xor: u128) -> u128 { + #[cfg(target_arch = "arm")] + use core::arch::arm::*; + #[cfg(target_arch = "aarch64")] + use core::arch::aarch64::*; + use core::mem::transmute; + unsafe { + let value = transmute(value); + transmute(vaesmcq_u8(vaeseq_u8(value, transmute(xor)))) + } +} + +#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "aes", not(miri)))] +#[allow(unused)] +#[inline(always)] +pub(crate) fn aesdec(value: u128, xor: u128) -> u128 { + #[cfg(target_arch = "x86")] + use core::arch::x86::*; + #[cfg(target_arch = "x86_64")] + use core::arch::x86_64::*; + use core::mem::transmute; + unsafe { + let value = transmute(value); + transmute(_mm_aesdec_si128(value, transmute(xor))) + } +} + +#[cfg(all(any(target_arch = "arm", target_arch = "aarch64"), target_feature = "crypto", not(miri), feature = "stdsimd"))] +#[allow(unused)] +#[inline(always)] +pub(crate) fn aesdec(value: u128, xor: u128) -> u128 { + #[cfg(target_arch = "arm")] + use core::arch::arm::*; + #[cfg(target_arch = "aarch64")] + use core::arch::aarch64::*; + use core::mem::transmute; + unsafe { + let value = transmute(value); + transmute(vaesimcq_u8(vaesdq_u8(value, transmute(xor)))) + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::convert::Convert; + + // This is code to search for the shuffle constant + // + //thread_local! { static MASK: Cell<u128> = Cell::new(0); } + // + // fn shuffle(a: u128) -> u128 { + // use std::intrinsics::transmute; + // #[cfg(target_arch = "x86")] + // use core::arch::x86::*; + // #[cfg(target_arch = "x86_64")] + // use core::arch::x86_64::*; + // MASK.with(|mask| { + // unsafe { transmute(_mm_shuffle_epi8(transmute(a), transmute(mask.get()))) } + // }) + // } + // + // #[test] + // fn find_shuffle() { + // use rand::prelude::*; + // use SliceRandom; + // use std::panic; + // use std::io::Write; + // + // let mut value: [u8; 16] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 ,13, 14, 15]; + // let mut rand = thread_rng(); + // let mut successful_list = HashMap::new(); + // for _attempt in 0..10000000 { + // rand.shuffle(&mut value); + // let test_val = value.convert(); + // MASK.with(|mask| { + // mask.set(test_val); + // }); + // if let Ok(successful) = panic::catch_unwind(|| { + // test_shuffle_does_not_collide_with_aes(); + // test_shuffle_moves_high_bits(); + // test_shuffle_moves_every_value(); + // //test_shuffle_does_not_loop(); + // value + // }) { + // let successful: u128 = successful.convert(); + // successful_list.insert(successful, iters_before_loop()); + // } + // } + // let write_file = File::create("/tmp/output").unwrap(); + // let mut writer = BufWriter::new(&write_file); + // + // for success in successful_list { + // writeln!(writer, "Found successful: {:x?} - {:?}", success.0, success.1); + // } + // } + // + // fn iters_before_loop() -> u32 { + // let numbered = 0x00112233_44556677_8899AABB_CCDDEEFF; + // let mut shuffled = shuffle(numbered); + // let mut count = 0; + // loop { + // // println!("{:>16x}", shuffled); + // if numbered == shuffled { + // break; + // } + // count += 1; + // shuffled = shuffle(shuffled); + // } + // count + // } + + #[cfg(all( + any(target_arch = "x86", target_arch = "x86_64"), + target_feature = "ssse3", + target_feature = "aes", + not(miri) + ))] + #[test] + fn test_shuffle_does_not_collide_with_aes() { + let mut value: [u8; 16] = [0; 16]; + let zero_mask_enc = aesenc(0, 0); + let zero_mask_dec = aesdec(0, 0); + for index in 0..16 { + value[index] = 1; + let excluded_positions_enc: [u8; 16] = aesenc(value.convert(), zero_mask_enc).convert(); + let excluded_positions_dec: [u8; 16] = aesdec(value.convert(), zero_mask_dec).convert(); + let actual_location: [u8; 16] = shuffle(value.convert()).convert(); + for pos in 0..16 { + if actual_location[pos] != 0 { + assert_eq!( + 0, excluded_positions_enc[pos], + "Forward Overlap between {:?} and {:?} at {}", + excluded_positions_enc, actual_location, index + ); + assert_eq!( + 0, excluded_positions_dec[pos], + "Reverse Overlap between {:?} and {:?} at {}", + excluded_positions_dec, actual_location, index + ); + } + } + value[index] = 0; + } + } + + #[test] + fn test_shuffle_contains_each_value() { + let value: [u8; 16] = 0x00010203_04050607_08090A0B_0C0D0E0F_u128.convert(); + let shuffled: [u8; 16] = shuffle(value.convert()).convert(); + for index in 0..16_u8 { + assert!(shuffled.contains(&index), "Value is missing {}", index); + } + } + + #[test] + fn test_shuffle_moves_every_value() { + let mut value: [u8; 16] = [0; 16]; + for index in 0..16 { + value[index] = 1; + let shuffled: [u8; 16] = shuffle(value.convert()).convert(); + assert_eq!(0, shuffled[index], "Value is not moved {}", index); + value[index] = 0; + } + } + + #[test] + fn test_shuffle_moves_high_bits() { + assert!( + shuffle(1) > (1_u128 << 80), + "Low bits must be moved to other half {:?} -> {:?}", + 0, + shuffle(1) + ); + + assert!( + shuffle(1_u128 << 58) >= (1_u128 << 64), + "High bits must be moved to other half {:?} -> {:?}", + 7, + shuffle(1_u128 << 58) + ); + assert!( + shuffle(1_u128 << 58) < (1_u128 << 112), + "High bits must not remain high {:?} -> {:?}", + 7, + shuffle(1_u128 << 58) + ); + assert!( + shuffle(1_u128 << 64) < (1_u128 << 64), + "Low bits must be moved to other half {:?} -> {:?}", + 8, + shuffle(1_u128 << 64) + ); + assert!( + shuffle(1_u128 << 64) >= (1_u128 << 16), + "Low bits must not remain low {:?} -> {:?}", + 8, + shuffle(1_u128 << 64) + ); + + assert!( + shuffle(1_u128 << 120) < (1_u128 << 50), + "High bits must be moved to low half {:?} -> {:?}", + 15, + shuffle(1_u128 << 120) + ); + } + + #[cfg(all( + any(target_arch = "x86", target_arch = "x86_64"), + target_feature = "ssse3", + not(miri) + ))] + #[test] + fn test_shuffle_does_not_loop() { + let numbered = 0x00112233_44556677_8899AABB_CCDDEEFF; + let mut shuffled = shuffle(numbered); + for count in 0..100 { + // println!("{:>16x}", shuffled); + assert_ne!(numbered, shuffled, "Equal after {} vs {:x}", count, shuffled); + shuffled = shuffle(shuffled); + } + } +} diff --git a/third_party/rust/ahash/src/random_state.rs b/third_party/rust/ahash/src/random_state.rs new file mode 100644 index 0000000000..c3628bf145 --- /dev/null +++ b/third_party/rust/ahash/src/random_state.rs @@ -0,0 +1,358 @@ +#[cfg(all(feature = "runtime-rng", not(all(feature = "compile-time-rng", test))))] +use crate::convert::Convert; +#[cfg(feature = "specialize")] +use crate::BuildHasherExt; + +#[cfg(any( + all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "aes", not(miri)), + all(any(target_arch = "arm", target_arch = "aarch64"), target_feature = "crypto", not(miri), feature = "stdsimd") +))] +pub use crate::aes_hash::*; + +#[cfg(not(any( + all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "aes", not(miri)), + all(any(target_arch = "arm", target_arch = "aarch64"), target_feature = "crypto", not(miri), feature = "stdsimd") +)))] +pub use crate::fallback_hash::*; + +#[cfg(all(feature = "compile-time-rng", any(not(feature = "runtime-rng"), test)))] +use const_random::const_random; +use core::any::{Any, TypeId}; +use core::fmt; +use core::hash::BuildHasher; +#[cfg(feature = "specialize")] +use core::hash::Hash; +use core::hash::Hasher; + +#[cfg(not(feature = "std"))] +extern crate alloc; +#[cfg(feature = "std")] +extern crate std as alloc; + +use alloc::boxed::Box; +use core::sync::atomic::{AtomicUsize, Ordering}; +#[cfg(not(all(target_arch = "arm", target_os = "none")))] +use once_cell::race::OnceBox; + +#[cfg(any( + all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "aes", not(miri)), + all(any(target_arch = "arm", target_arch = "aarch64"), target_feature = "crypto", not(miri), feature = "stdsimd") +))] +use crate::aes_hash::*; +#[cfg(not(any( + all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "aes", not(miri)), + all(any(target_arch = "arm", target_arch = "aarch64"), target_feature = "crypto", not(miri), feature = "stdsimd") +)))] +use crate::fallback_hash::*; + +#[cfg(not(all(target_arch = "arm", target_os = "none")))] +static RAND_SOURCE: OnceBox<Box<dyn RandomSource + Send + Sync>> = OnceBox::new(); + +/// A supplier of Randomness used for different hashers. +/// See [RandomState.set_random_source]. +pub trait RandomSource { + + fn get_fixed_seeds(&self) -> &'static [[u64; 4]; 2]; + + fn gen_hasher_seed(&self) -> usize; + +} + +pub(crate) const PI: [u64; 4] = [ + 0x243f_6a88_85a3_08d3, + 0x1319_8a2e_0370_7344, + 0xa409_3822_299f_31d0, + 0x082e_fa98_ec4e_6c89, +]; + +pub(crate) const PI2: [u64; 4] = [ + 0x4528_21e6_38d0_1377, + 0xbe54_66cf_34e9_0c6c, + 0xc0ac_29b7_c97c_50dd, + 0x3f84_d5b5_b547_0917, +]; + +struct DefaultRandomSource { + counter: AtomicUsize, +} + +impl DefaultRandomSource { + fn new() -> DefaultRandomSource { + DefaultRandomSource { + counter: AtomicUsize::new(&PI as *const _ as usize), + } + } + + const fn default() -> DefaultRandomSource { + DefaultRandomSource { + counter: AtomicUsize::new(PI[3] as usize), + } + } +} + +impl RandomSource for DefaultRandomSource { + + #[cfg(all(feature = "runtime-rng", not(all(feature = "compile-time-rng", test))))] + fn get_fixed_seeds(&self) -> &'static [[u64; 4]; 2] { + static SEEDS: OnceBox<[[u64; 4]; 2]> = OnceBox::new(); + + SEEDS.get_or_init(|| { + let mut result: [u8; 64] = [0; 64]; + getrandom::getrandom(&mut result).expect("getrandom::getrandom() failed."); + Box::new(result.convert()) + }) + } + + #[cfg(all(feature = "compile-time-rng", any(not(feature = "runtime-rng"), test)))] + fn get_fixed_seeds(&self) -> &'static [[u64; 4]; 2] { + const RAND: [[u64; 4]; 2] = [ + [ + const_random!(u64), + const_random!(u64), + const_random!(u64), + const_random!(u64), + ], [ + const_random!(u64), + const_random!(u64), + const_random!(u64), + const_random!(u64), + ] + ]; + &RAND + } + + #[cfg(all(not(feature = "runtime-rng"), not(feature = "compile-time-rng")))] + fn get_fixed_seeds(&self) -> &'static [[u64; 4]; 2] { + &[PI, PI2] + } + + #[cfg(not(all(target_arch = "arm", target_os = "none")))] + fn gen_hasher_seed(&self) -> usize { + let stack = self as *const _ as usize; + self.counter.fetch_add(stack, Ordering::Relaxed) + } + + #[cfg(all(target_arch = "arm", target_os = "none"))] + fn gen_hasher_seed(&self) -> usize { + let stack = self as *const _ as usize; + let previous = self.counter.load(Ordering::Relaxed); + let new = previous.wrapping_add(stack); + self.counter.store(new, Ordering::Relaxed); + new + } +} + +/// Provides a [Hasher] factory. This is typically used (e.g. by [HashMap]) to create +/// [AHasher]s in order to hash the keys of the map. See `build_hasher` below. +/// +/// [build_hasher]: ahash:: +/// [Hasher]: std::hash::Hasher +/// [BuildHasher]: std::hash::BuildHasher +/// [HashMap]: std::collections::HashMap +#[derive(Clone)] +pub struct RandomState { + pub(crate) k0: u64, + pub(crate) k1: u64, + pub(crate) k2: u64, + pub(crate) k3: u64, +} + +impl fmt::Debug for RandomState { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.pad("RandomState { .. }") + } +} + +impl RandomState { + + /// Provides an optional way to manually supply a source of randomness for Hasher keys. + /// + /// The provided [RandomSource] will be used to be used as a source of randomness by [RandomState] to generate new states. + /// If this method is not invoked the standard source of randomness is used as described in the Readme. + /// + /// The source of randomness can only be set once, and must be set before the first RandomState is created. + /// If the source has already been specified `Err` is returned with a `bool` indicating if the set failed because + /// method was previously invoked (true) or if the default source is already being used (false). + #[cfg(not(all(target_arch = "arm", target_os = "none")))] + pub fn set_random_source(source: impl RandomSource + Send + Sync + 'static) -> Result<(), bool> { + RAND_SOURCE.set(Box::new(Box::new(source))).map_err(|s| s.as_ref().type_id() != TypeId::of::<&DefaultRandomSource>()) + } + + #[inline] + #[cfg(not(all(target_arch = "arm", target_os = "none")))] + fn get_src() -> &'static dyn RandomSource { + RAND_SOURCE.get_or_init(|| Box::new(Box::new(DefaultRandomSource::new()))).as_ref() + } + + #[inline] + #[cfg(all(target_arch = "arm", target_os = "none"))] + fn get_src() -> &'static dyn RandomSource { + static RAND_SOURCE: DefaultRandomSource = DefaultRandomSource::default(); + &RAND_SOURCE + } + + /// Use randomly generated keys + #[inline] + pub fn new() -> RandomState { + let src = Self::get_src(); + let fixed = src.get_fixed_seeds(); + Self::from_keys(&fixed[0], &fixed[1], src.gen_hasher_seed()) + } + + /// Allows for supplying seeds, but each time it is called the resulting state will be different. + /// This is done using a static counter, so it can safely be used with a fixed keys. + #[inline] + pub fn generate_with(k0: u64, k1: u64, k2: u64, k3: u64) -> RandomState { + let src = Self::get_src(); + let fixed = src.get_fixed_seeds(); + RandomState::from_keys(&fixed[0], &[k0, k1, k2, k3], src.gen_hasher_seed()) + } + + fn from_keys(a: &[u64; 4], b: &[u64; 4], c: usize) -> RandomState { + let &[k0, k1, k2, k3] = a; + let mut hasher = AHasher::from_random_state(&RandomState { k0, k1, k2, k3 }); + hasher.write_usize(c); + let mix = |k: u64| { + let mut h = hasher.clone(); + h.write_u64(k); + h.finish() + }; + RandomState { + k0: mix(b[0]), + k1: mix(b[1]), + k2: mix(b[2]), + k3: mix(b[3]), + } + } + + /// Internal. Used by Default. + #[inline] + pub(crate) fn with_fixed_keys() -> RandomState { + let [k0, k1, k2, k3] = Self::get_src().get_fixed_seeds()[0]; + RandomState { k0, k1, k2, k3 } + } + + /// Allows for explicitly setting a seed to used. + /// + /// Note: This method does not require the provided seed to be strong. + #[inline] + pub fn with_seed(key: usize) -> RandomState { + let fixed = Self::get_src().get_fixed_seeds(); + RandomState::from_keys(&fixed[0], &fixed[1], key) + } + + /// Allows for explicitly setting the seeds to used. + /// + /// Note: This method is robust against 0s being passed for one or more of the parameters + /// or the same value being passed for more than one parameter. + #[inline] + pub const fn with_seeds(k0: u64, k1: u64, k2: u64, k3: u64) -> RandomState { + RandomState { k0: k0 ^ PI2[0], k1: k1 ^ PI2[1], k2: k2 ^ PI2[2], k3: k3 ^ PI2[3] } + } +} + +impl Default for RandomState { + #[inline] + fn default() -> Self { + Self::new() + } +} + +impl BuildHasher for RandomState { + type Hasher = AHasher; + + /// Constructs a new [AHasher] with keys based on this [RandomState] object. + /// This means that two different [RandomState]s will will generate + /// [AHasher]s that will return different hashcodes, but [Hasher]s created from the same [BuildHasher] + /// will generate the same hashes for the same input data. + /// + /// # Examples + /// + /// ``` + /// use ahash::{AHasher, RandomState}; + /// use std::hash::{Hasher, BuildHasher}; + /// + /// let build_hasher = RandomState::new(); + /// let mut hasher_1 = build_hasher.build_hasher(); + /// let mut hasher_2 = build_hasher.build_hasher(); + /// + /// hasher_1.write_u32(1234); + /// hasher_2.write_u32(1234); + /// + /// assert_eq!(hasher_1.finish(), hasher_2.finish()); + /// + /// let other_build_hasher = RandomState::new(); + /// let mut different_hasher = other_build_hasher.build_hasher(); + /// different_hasher.write_u32(1234); + /// assert_ne!(different_hasher.finish(), hasher_1.finish()); + /// ``` + /// [Hasher]: std::hash::Hasher + /// [BuildHasher]: std::hash::BuildHasher + /// [HashMap]: std::collections::HashMap + #[inline] + fn build_hasher(&self) -> AHasher { + AHasher::from_random_state(self) + } +} + +#[cfg(feature = "specialize")] +impl BuildHasherExt for RandomState { + #[inline] + fn hash_as_u64<T: Hash + ?Sized>(&self, value: &T) -> u64 { + let mut hasher = AHasherU64 { + buffer: self.k0, + pad: self.k1, + }; + value.hash(&mut hasher); + hasher.finish() + } + + #[inline] + fn hash_as_fixed_length<T: Hash + ?Sized>(&self, value: &T) -> u64 { + let mut hasher = AHasherFixed(self.build_hasher()); + value.hash(&mut hasher); + hasher.finish() + } + + #[inline] + fn hash_as_str<T: Hash + ?Sized>(&self, value: &T) -> u64 { + let mut hasher = AHasherStr(self.build_hasher()); + value.hash(&mut hasher); + hasher.finish() + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_unique() { + let a = RandomState::new(); + let b = RandomState::new(); + assert_ne!(a.build_hasher().finish(), b.build_hasher().finish()); + } + + #[cfg(all(feature = "runtime-rng", not(all(feature = "compile-time-rng", test))))] + #[test] + fn test_not_pi() { + assert_ne!(PI, RandomState::get_src().get_fixed_seeds()[0]); + } + + #[cfg(all(feature = "compile-time-rng", any(not(feature = "runtime-rng"), test)))] + #[test] + fn test_not_pi_const() { + assert_ne!(PI, RandomState::get_src().get_fixed_seeds()[0]); + } + + #[cfg(all(not(feature = "runtime-rng"), not(feature = "compile-time-rng")))] + #[test] + fn test_pi() { + assert_eq!(PI, RandomState::get_src().get_fixed_seeds()[0]); + } + + #[test] + fn test_with_seeds_const() { + const _CONST_RANDOM_STATE: RandomState = RandomState::with_seeds(17, 19, 21, 23); + } +} diff --git a/third_party/rust/ahash/src/specialize.rs b/third_party/rust/ahash/src/specialize.rs new file mode 100644 index 0000000000..d94a4eed0d --- /dev/null +++ b/third_party/rust/ahash/src/specialize.rs @@ -0,0 +1,239 @@ +use core::hash::BuildHasher; +use core::hash::Hash; +use core::hash::Hasher; + +#[cfg(not(feature = "std"))] +extern crate alloc; +#[cfg(feature = "std")] +extern crate std as alloc; + +#[cfg(feature = "specialize")] +use crate::BuildHasherExt; +#[cfg(feature = "specialize")] +use alloc::string::String; +#[cfg(feature = "specialize")] +use alloc::vec::Vec; + +/// Provides a way to get an optimized hasher for a given data type. +/// Rather than using a Hasher generically which can hash any value, this provides a way to get a specialized hash +/// for a specific type. So this may be faster for primitive types. +/// # Example +/// ``` +/// use std::hash::BuildHasher; +/// use ahash::RandomState; +/// use ahash::CallHasher; +/// +/// let hash_builder = RandomState::new(); +/// //... +/// let value: u32 = 17; +/// let hash = u32::get_hash(&value, &hash_builder); +/// ``` +/// Note that the type used to invoke `get_hash` must be the same a the type of value passed. +/// For example get a hasher specialized on `[u8]` can invoke: +/// ``` +/// /// use std::hash::BuildHasher; +/// # use ahash::RandomState; +/// # use ahash::CallHasher; +/// # let hash_builder = RandomState::new(); +/// let bytes: [u8; 4] = [1, 2, 3, 4]; +/// let hash = <[u8]>::get_hash(&bytes, &hash_builder); +/// ``` +pub trait CallHasher { + fn get_hash<H: Hash + ?Sized, B: BuildHasher>(value: &H, build_hasher: &B) -> u64; +} + +#[cfg(not(feature = "specialize"))] +impl<T> CallHasher for T +where + T: Hash + ?Sized, +{ + #[inline] + fn get_hash<H: Hash + ?Sized, B: BuildHasher>(value: &H, build_hasher: &B) -> u64 { + let mut hasher = build_hasher.build_hasher(); + value.hash(&mut hasher); + hasher.finish() + } +} + +#[cfg(feature = "specialize")] +impl<T> CallHasher for T +where + T: Hash + ?Sized, +{ + #[inline] + default fn get_hash<H: Hash + ?Sized, B: BuildHasher>(value: &H, build_hasher: &B) -> u64 { + let mut hasher = build_hasher.build_hasher(); + value.hash(&mut hasher); + hasher.finish() + } +} + +macro_rules! call_hasher_impl { + ($typ:ty) => { + #[cfg(feature = "specialize")] + impl CallHasher for $typ { + #[inline] + fn get_hash<H: Hash + ?Sized, B: BuildHasher>(value: &H, build_hasher: &B) -> u64 { + build_hasher.hash_as_u64(value) + } + } + }; +} +call_hasher_impl!(u8); +call_hasher_impl!(u16); +call_hasher_impl!(u32); +call_hasher_impl!(u64); +call_hasher_impl!(i8); +call_hasher_impl!(i16); +call_hasher_impl!(i32); +call_hasher_impl!(i64); + +#[cfg(feature = "specialize")] +impl CallHasher for u128 { + #[inline] + fn get_hash<H: Hash + ?Sized, B: BuildHasher>(value: &H, build_hasher: &B) -> u64 { + build_hasher.hash_as_fixed_length(value) + } +} + +#[cfg(feature = "specialize")] +impl CallHasher for i128 { + #[inline] + fn get_hash<H: Hash + ?Sized, B: BuildHasher>(value: &H, build_hasher: &B) -> u64 { + build_hasher.hash_as_fixed_length(value) + } +} + +#[cfg(feature = "specialize")] +impl CallHasher for usize { + #[inline] + fn get_hash<H: Hash + ?Sized, B: BuildHasher>(value: &H, build_hasher: &B) -> u64 { + build_hasher.hash_as_fixed_length(value) + } +} + +#[cfg(feature = "specialize")] +impl CallHasher for isize { + #[inline] + fn get_hash<H: Hash + ?Sized, B: BuildHasher>(value: &H, build_hasher: &B) -> u64 { + build_hasher.hash_as_fixed_length(value) + } +} + +#[cfg(feature = "specialize")] +impl CallHasher for [u8] { + #[inline] + fn get_hash<H: Hash + ?Sized, B: BuildHasher>(value: &H, build_hasher: &B) -> u64 { + build_hasher.hash_as_str(value) + } +} + +#[cfg(feature = "specialize")] +impl CallHasher for Vec<u8> { + #[inline] + fn get_hash<H: Hash + ?Sized, B: BuildHasher>(value: &H, build_hasher: &B) -> u64 { + build_hasher.hash_as_str(value) + } +} + +#[cfg(feature = "specialize")] +impl CallHasher for str { + #[inline] + fn get_hash<H: Hash + ?Sized, B: BuildHasher>(value: &H, build_hasher: &B) -> u64 { + build_hasher.hash_as_str(value) + } +} + +#[cfg(all(feature = "specialize"))] +impl CallHasher for String { + #[inline] + fn get_hash<H: Hash + ?Sized, B: BuildHasher>(value: &H, build_hasher: &B) -> u64 { + build_hasher.hash_as_str(value) + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::*; + + #[test] + #[cfg(feature = "specialize")] + pub fn test_specialized_invoked() { + let build_hasher = RandomState::with_seeds(1, 2, 3, 4); + let shortened = u64::get_hash(&0, &build_hasher); + let mut hasher = AHasher::new_with_keys(1, 2); + 0_u64.hash(&mut hasher); + assert_ne!(hasher.finish(), shortened); + } + + /// Tests that some non-trivial transformation takes place. + #[test] + pub fn test_input_processed() { + let build_hasher = RandomState::with_seeds(2, 2, 2, 2); + assert_ne!(0, u64::get_hash(&0, &build_hasher)); + assert_ne!(1, u64::get_hash(&0, &build_hasher)); + assert_ne!(2, u64::get_hash(&0, &build_hasher)); + assert_ne!(3, u64::get_hash(&0, &build_hasher)); + assert_ne!(4, u64::get_hash(&0, &build_hasher)); + assert_ne!(5, u64::get_hash(&0, &build_hasher)); + + assert_ne!(0, u64::get_hash(&1, &build_hasher)); + assert_ne!(1, u64::get_hash(&1, &build_hasher)); + assert_ne!(2, u64::get_hash(&1, &build_hasher)); + assert_ne!(3, u64::get_hash(&1, &build_hasher)); + assert_ne!(4, u64::get_hash(&1, &build_hasher)); + assert_ne!(5, u64::get_hash(&1, &build_hasher)); + + let xored = u64::get_hash(&0, &build_hasher) ^ u64::get_hash(&1, &build_hasher); + assert_ne!(0, xored); + assert_ne!(1, xored); + assert_ne!(2, xored); + assert_ne!(3, xored); + assert_ne!(4, xored); + assert_ne!(5, xored); + } + + #[test] + pub fn test_ref_independent() { + let build_hasher = RandomState::with_seeds(1, 2, 3, 4); + assert_eq!(u8::get_hash(&&1, &build_hasher), u8::get_hash(&1, &build_hasher)); + assert_eq!(u16::get_hash(&&2, &build_hasher), u16::get_hash(&2, &build_hasher)); + assert_eq!(u32::get_hash(&&3, &build_hasher), u32::get_hash(&3, &build_hasher)); + assert_eq!(u64::get_hash(&&4, &build_hasher), u64::get_hash(&4, &build_hasher)); + assert_eq!(u128::get_hash(&&5, &build_hasher), u128::get_hash(&5, &build_hasher)); + assert_eq!( + str::get_hash(&"test", &build_hasher), + str::get_hash("test", &build_hasher) + ); + assert_eq!( + str::get_hash(&"test", &build_hasher), + String::get_hash(&"test".to_string(), &build_hasher) + ); + #[cfg(feature = "specialize")] + assert_eq!( + str::get_hash(&"test", &build_hasher), + <[u8]>::get_hash("test".as_bytes(), &build_hasher) + ); + + let build_hasher = RandomState::with_seeds(10, 20, 30, 40); + assert_eq!(u8::get_hash(&&&1, &build_hasher), u8::get_hash(&1, &build_hasher)); + assert_eq!(u16::get_hash(&&&2, &build_hasher), u16::get_hash(&2, &build_hasher)); + assert_eq!(u32::get_hash(&&&3, &build_hasher), u32::get_hash(&3, &build_hasher)); + assert_eq!(u64::get_hash(&&&4, &build_hasher), u64::get_hash(&4, &build_hasher)); + assert_eq!(u128::get_hash(&&&5, &build_hasher), u128::get_hash(&5, &build_hasher)); + assert_eq!( + str::get_hash(&&"test", &build_hasher), + str::get_hash("test", &build_hasher) + ); + assert_eq!( + str::get_hash(&&"test", &build_hasher), + String::get_hash(&"test".to_string(), &build_hasher) + ); + #[cfg(feature = "specialize")] + assert_eq!( + str::get_hash(&&"test", &build_hasher), + <[u8]>::get_hash(&"test".to_string().into_bytes(), &build_hasher) + ); + } +} |