diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 12:19:13 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 12:19:13 +0000 |
commit | 218caa410aa38c29984be31a5229b9fa717560ee (patch) | |
tree | c54bd55eeb6e4c508940a30e94c0032fbd45d677 /vendor/twox-hash/src | |
parent | Releasing progress-linux version 1.67.1+dfsg1-1~progress7.99u1. (diff) | |
download | rustc-218caa410aa38c29984be31a5229b9fa717560ee.tar.xz rustc-218caa410aa38c29984be31a5229b9fa717560ee.zip |
Merging upstream version 1.68.2+dfsg1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r-- | vendor/twox-hash/src/bin/hash_file.rs | 28 | ||||
-rw-r--r-- | vendor/twox-hash/src/digest_0_10_support.rs | 92 | ||||
-rw-r--r-- | vendor/twox-hash/src/digest_0_9_support.rs | 179 | ||||
-rw-r--r-- | vendor/twox-hash/src/digest_support.rs | 179 | ||||
-rw-r--r-- | vendor/twox-hash/src/lib.rs | 121 | ||||
-rw-r--r-- | vendor/twox-hash/src/sixty_four.rs | 413 | ||||
-rw-r--r-- | vendor/twox-hash/src/std_support.rs | 113 | ||||
-rw-r--r-- | vendor/twox-hash/src/thirty_two.rs | 416 | ||||
-rw-r--r-- | vendor/twox-hash/src/xxh3.rs | 1666 |
9 files changed, 3207 insertions, 0 deletions
diff --git a/vendor/twox-hash/src/bin/hash_file.rs b/vendor/twox-hash/src/bin/hash_file.rs new file mode 100644 index 000000000..509b48d68 --- /dev/null +++ b/vendor/twox-hash/src/bin/hash_file.rs @@ -0,0 +1,28 @@ +use std::env; +use std::fs::File; +use std::hash::Hasher; +use std::io::{BufRead, BufReader}; +use twox_hash::XxHash64; + +fn main() { + for arg in env::args().skip(1) { + let f = File::open(&arg).unwrap(); + let mut f = BufReader::new(f); + + let mut hasher = XxHash64::with_seed(0); + + loop { + let consumed = { + let bytes = f.fill_buf().unwrap(); + if bytes.is_empty() { + break; + } + hasher.write(bytes); + bytes.len() + }; + f.consume(consumed); + } + + println!("{:16x} {}", hasher.finish(), arg); + } +} diff --git a/vendor/twox-hash/src/digest_0_10_support.rs b/vendor/twox-hash/src/digest_0_10_support.rs new file mode 100644 index 000000000..935c09692 --- /dev/null +++ b/vendor/twox-hash/src/digest_0_10_support.rs @@ -0,0 +1,92 @@ +use core::hash::Hasher; + +use digest_0_10::{ + generic_array::typenum::consts::{U16, U4, U8}, + FixedOutput, HashMarker, Output, OutputSizeUser, Update, +}; + +use crate::{xxh3, XxHash32, XxHash64}; + +// ---------- + +impl Update for XxHash32 { + fn update(&mut self, data: &[u8]) { + self.write(data); + } +} + +impl OutputSizeUser for XxHash32 { + type OutputSize = U4; +} + +impl FixedOutput for XxHash32 { + fn finalize_into(self, out: &mut Output<Self>) { + let tmp: &mut [u8; 4] = out.as_mut(); + *tmp = self.finish().to_be_bytes(); + } +} + +impl HashMarker for XxHash32 {} + +// ---------- + +impl Update for XxHash64 { + fn update(&mut self, data: &[u8]) { + self.write(data); + } +} + +impl OutputSizeUser for XxHash64 { + type OutputSize = U8; +} + +impl FixedOutput for XxHash64 { + fn finalize_into(self, out: &mut Output<Self>) { + let tmp: &mut [u8; 8] = out.as_mut(); + *tmp = self.finish().to_be_bytes(); + } +} + +impl HashMarker for XxHash64 {} + +// ---------- + +impl Update for xxh3::Hash64 { + fn update(&mut self, data: &[u8]) { + self.write(data); + } +} + +impl OutputSizeUser for xxh3::Hash64 { + type OutputSize = U8; +} + +impl FixedOutput for xxh3::Hash64 { + fn finalize_into(self, out: &mut Output<Self>) { + let tmp: &mut [u8; 8] = out.as_mut(); + *tmp = self.finish().to_be_bytes(); + } +} + +impl HashMarker for xxh3::Hash64 {} + +// ---------- + +impl Update for xxh3::Hash128 { + fn update(&mut self, data: &[u8]) { + self.write(data); + } +} + +impl OutputSizeUser for xxh3::Hash128 { + type OutputSize = U16; +} + +impl FixedOutput for xxh3::Hash128 { + fn finalize_into(self, out: &mut Output<Self>) { + let tmp: &mut [u8; 16] = out.as_mut(); + *tmp = xxh3::HasherExt::finish_ext(&self).to_be_bytes(); + } +} + +impl HashMarker for xxh3::Hash128 {} diff --git a/vendor/twox-hash/src/digest_0_9_support.rs b/vendor/twox-hash/src/digest_0_9_support.rs new file mode 100644 index 000000000..67788cd6c --- /dev/null +++ b/vendor/twox-hash/src/digest_0_9_support.rs @@ -0,0 +1,179 @@ +use core::hash::Hasher; + +use digest_0_9::{ + generic_array::{ + typenum::consts::{U16, U4, U8}, + GenericArray, + }, + Digest, +}; + +use crate::{xxh3, XxHash32, XxHash64}; + +impl Digest for XxHash32 { + type OutputSize = U4; + + fn new() -> Self { + Self::default() + } + + fn update(&mut self, data: impl AsRef<[u8]>) { + self.write(data.as_ref()); + } + + fn chain(mut self, data: impl AsRef<[u8]>) -> Self + where + Self: Sized, + { + self.update(data); + self + } + + fn finalize(self) -> GenericArray<u8, Self::OutputSize> { + self.finish().to_be_bytes().into() + } + + fn finalize_reset(&mut self) -> GenericArray<u8, Self::OutputSize> { + let result = self.finalize(); + self.reset(); + result + } + + fn reset(&mut self) { + *self = Self::default(); + } + + fn output_size() -> usize { + 4 + } + + fn digest(data: &[u8]) -> GenericArray<u8, Self::OutputSize> { + Self::new().chain(data).finalize() + } +} + +impl Digest for XxHash64 { + type OutputSize = U8; + + fn new() -> Self { + Self::default() + } + + fn update(&mut self, data: impl AsRef<[u8]>) { + self.write(data.as_ref()); + } + + fn chain(mut self, data: impl AsRef<[u8]>) -> Self + where + Self: Sized, + { + self.update(data); + self + } + + fn finalize(self) -> GenericArray<u8, Self::OutputSize> { + self.finish().to_be_bytes().into() + } + + fn finalize_reset(&mut self) -> GenericArray<u8, Self::OutputSize> { + let result = self.finalize(); + self.reset(); + result + } + + fn reset(&mut self) { + *self = Self::default(); + } + + fn output_size() -> usize { + 8 + } + + fn digest(data: &[u8]) -> GenericArray<u8, Self::OutputSize> { + Self::new().chain(data).finalize() + } +} + +impl Digest for xxh3::Hash64 { + type OutputSize = U8; + + fn new() -> Self { + Self::default() + } + + fn update(&mut self, data: impl AsRef<[u8]>) { + self.write(data.as_ref()); + } + + fn chain(mut self, data: impl AsRef<[u8]>) -> Self + where + Self: Sized, + { + self.update(data); + self + } + + fn finalize(self) -> GenericArray<u8, Self::OutputSize> { + self.finish().to_be_bytes().into() + } + + fn finalize_reset(&mut self) -> GenericArray<u8, Self::OutputSize> { + let result = self.clone().finalize(); + self.reset(); + result + } + + fn reset(&mut self) { + *self = Self::default(); + } + + fn output_size() -> usize { + 8 + } + + fn digest(data: &[u8]) -> GenericArray<u8, Self::OutputSize> { + Self::new().chain(data).finalize() + } +} + +impl Digest for xxh3::Hash128 { + type OutputSize = U16; + + fn new() -> Self { + Self::default() + } + + fn update(&mut self, data: impl AsRef<[u8]>) { + self.write(data.as_ref()); + } + + fn chain(mut self, data: impl AsRef<[u8]>) -> Self + where + Self: Sized, + { + self.update(data); + self + } + + fn finalize(self) -> GenericArray<u8, Self::OutputSize> { + xxh3::HasherExt::finish_ext(&self).to_be_bytes().into() + } + + fn finalize_reset(&mut self) -> GenericArray<u8, Self::OutputSize> { + let result = self.clone().finalize(); + self.reset(); + result + } + + fn reset(&mut self) { + *self = Self::default(); + } + + fn output_size() -> usize { + 8 + } + + fn digest(data: &[u8]) -> GenericArray<u8, Self::OutputSize> { + Self::new().chain(data).finalize() + } +} diff --git a/vendor/twox-hash/src/digest_support.rs b/vendor/twox-hash/src/digest_support.rs new file mode 100644 index 000000000..7b00b9d80 --- /dev/null +++ b/vendor/twox-hash/src/digest_support.rs @@ -0,0 +1,179 @@ +use core::hash::Hasher; + +use digest::{ + generic_array::{ + typenum::consts::{U16, U4, U8}, + GenericArray, + }, + Digest, +}; + +use crate::{xxh3, XxHash32, XxHash64}; + +impl Digest for XxHash32 { + type OutputSize = U4; + + fn new() -> Self { + Self::default() + } + + fn input<B: AsRef<[u8]>>(&mut self, data: B) { + self.write(data.as_ref()); + } + + fn chain<B: AsRef<[u8]>>(mut self, data: B) -> Self + where + Self: Sized, + { + self.input(data); + self + } + + fn result(self) -> GenericArray<u8, Self::OutputSize> { + self.finish().to_be_bytes().into() + } + + fn result_reset(&mut self) -> GenericArray<u8, Self::OutputSize> { + let result = self.result(); + self.reset(); + result + } + + fn reset(&mut self) { + *self = Self::default(); + } + + fn output_size() -> usize { + 4 + } + + fn digest(data: &[u8]) -> GenericArray<u8, Self::OutputSize> { + Self::new().chain(data).result() + } +} + +impl Digest for XxHash64 { + type OutputSize = U8; + + fn new() -> Self { + Self::default() + } + + fn input<B: AsRef<[u8]>>(&mut self, data: B) { + self.write(data.as_ref()); + } + + fn chain<B: AsRef<[u8]>>(mut self, data: B) -> Self + where + Self: Sized, + { + self.input(data); + self + } + + fn result(self) -> GenericArray<u8, Self::OutputSize> { + self.finish().to_be_bytes().into() + } + + fn result_reset(&mut self) -> GenericArray<u8, Self::OutputSize> { + let result = self.result(); + self.reset(); + result + } + + fn reset(&mut self) { + *self = Self::default(); + } + + fn output_size() -> usize { + 8 + } + + fn digest(data: &[u8]) -> GenericArray<u8, Self::OutputSize> { + Self::new().chain(data).result() + } +} + +impl Digest for xxh3::Hash64 { + type OutputSize = U8; + + fn new() -> Self { + Self::default() + } + + fn input<B: AsRef<[u8]>>(&mut self, data: B) { + self.write(data.as_ref()); + } + + fn chain<B: AsRef<[u8]>>(mut self, data: B) -> Self + where + Self: Sized, + { + self.input(data); + self + } + + fn result(self) -> GenericArray<u8, Self::OutputSize> { + self.finish().to_be_bytes().into() + } + + fn result_reset(&mut self) -> GenericArray<u8, Self::OutputSize> { + let result = self.clone().result(); + self.reset(); + result + } + + fn reset(&mut self) { + *self = Self::default(); + } + + fn output_size() -> usize { + 8 + } + + fn digest(data: &[u8]) -> GenericArray<u8, Self::OutputSize> { + Self::new().chain(data).result() + } +} + +impl Digest for xxh3::Hash128 { + type OutputSize = U16; + + fn new() -> Self { + Self::default() + } + + fn input<B: AsRef<[u8]>>(&mut self, data: B) { + self.write(data.as_ref()); + } + + fn chain<B: AsRef<[u8]>>(mut self, data: B) -> Self + where + Self: Sized, + { + self.input(data); + self + } + + fn result(self) -> GenericArray<u8, Self::OutputSize> { + xxh3::HasherExt::finish_ext(&self).to_be_bytes().into() + } + + fn result_reset(&mut self) -> GenericArray<u8, Self::OutputSize> { + let result = self.clone().result(); + self.reset(); + result + } + + fn reset(&mut self) { + *self = Self::default(); + } + + fn output_size() -> usize { + 8 + } + + fn digest(data: &[u8]) -> GenericArray<u8, Self::OutputSize> { + Self::new().chain(data).result() + } +} diff --git a/vendor/twox-hash/src/lib.rs b/vendor/twox-hash/src/lib.rs new file mode 100644 index 000000000..414dc8d42 --- /dev/null +++ b/vendor/twox-hash/src/lib.rs @@ -0,0 +1,121 @@ +//! A Rust implementation of the [XXHash] algorithm. +//! +//! [XXHash]: https://github.com/Cyan4973/xxHash +//! +//! ### With a fixed seed +//! +//! ```rust +//! use std::hash::BuildHasherDefault; +//! use std::collections::HashMap; +//! use twox_hash::XxHash64; +//! +//! let mut hash: HashMap<_, _, BuildHasherDefault<XxHash64>> = Default::default(); +//! hash.insert(42, "the answer"); +//! assert_eq!(hash.get(&42), Some(&"the answer")); +//! ``` +//! +//! ### With a random seed +//! +//! ```rust +//! use std::collections::HashMap; +//! use twox_hash::RandomXxHashBuilder64; +//! +//! let mut hash: HashMap<_, _, RandomXxHashBuilder64> = Default::default(); +//! hash.insert(42, "the answer"); +//! assert_eq!(hash.get(&42), Some(&"the answer")); +//! ``` + +#![no_std] + +extern crate alloc; + +#[cfg(test)] +extern crate std; + +use core::{marker::PhantomData, mem}; + +mod sixty_four; +mod thirty_two; +pub mod xxh3; + +#[cfg(feature = "std")] +mod std_support; +#[cfg(feature = "std")] +pub use std_support::sixty_four::RandomXxHashBuilder64; +#[cfg(feature = "std")] +pub use std_support::thirty_two::RandomXxHashBuilder32; +#[cfg(feature = "std")] +pub use std_support::xxh3::{ + RandomHashBuilder128 as RandomXxh3HashBuilder128, + RandomHashBuilder64 as RandomXxh3HashBuilder64, +}; + +#[cfg(feature = "digest")] +mod digest_support; + +#[cfg(feature = "digest_0_9")] +mod digest_0_9_support; + +#[cfg(feature = "digest_0_10")] +mod digest_0_10_support; + +pub use crate::sixty_four::XxHash64; +pub use crate::thirty_two::XxHash32; +pub use crate::xxh3::{Hash128 as Xxh3Hash128, Hash64 as Xxh3Hash64}; + +/// A backwards compatibility type alias. Consider directly using +/// `XxHash64` instead. +pub type XxHash = XxHash64; + +#[cfg(feature = "std")] +/// A backwards compatibility type alias. Consider directly using +/// `RandomXxHashBuilder64` instead. +pub type RandomXxHashBuilder = RandomXxHashBuilder64; + +/// An unaligned buffer with iteration support for `UnalignedItem`. +struct UnalignedBuffer<'a, T> { + buf: &'a [u8], + phantom: PhantomData<T>, +} + +/// Types implementing this trait must be transmutable from a `*const +/// u8` to `*const Self` at any possible alignment. +/// +/// The intent is to use this with only primitive integer types (and +/// tightly-packed arrays of those integers). +#[allow(clippy::missing_safety_doc)] +unsafe trait UnalignedItem {} + +unsafe impl UnalignedItem for [u64; 4] {} +unsafe impl UnalignedItem for [u32; 4] {} +unsafe impl UnalignedItem for u64 {} +unsafe impl UnalignedItem for u32 {} + +impl<'a, T: UnalignedItem> UnalignedBuffer<'a, T> { + #[inline] + fn new(buf: &'a [u8]) -> Self { + Self { + buf, + phantom: PhantomData, + } + } + + #[inline] + fn remaining(&self) -> &[u8] { + self.buf + } +} + +impl<'a, T: UnalignedItem> Iterator for UnalignedBuffer<'a, T> { + type Item = T; + + fn next(&mut self) -> Option<Self::Item> { + let size = mem::size_of::<T>(); + self.buf.get(size..).map(|remaining| { + // `self.buf` has at least `size` bytes that can be read as `T`. + let result = unsafe { (self.buf.as_ptr() as *const T).read_unaligned() }; + self.buf = remaining; + result + }) + } +} diff --git a/vendor/twox-hash/src/sixty_four.rs b/vendor/twox-hash/src/sixty_four.rs new file mode 100644 index 000000000..c15158693 --- /dev/null +++ b/vendor/twox-hash/src/sixty_four.rs @@ -0,0 +1,413 @@ +use crate::UnalignedBuffer; +use core::{cmp, hash::Hasher}; + +#[cfg(feature = "serialize")] +use serde::{Deserialize, Serialize}; + +const CHUNK_SIZE: usize = 32; + +pub const PRIME_1: u64 = 11_400_714_785_074_694_791; +pub const PRIME_2: u64 = 14_029_467_366_897_019_727; +pub const PRIME_3: u64 = 1_609_587_929_392_839_161; +pub const PRIME_4: u64 = 9_650_029_242_287_828_579; +pub const PRIME_5: u64 = 2_870_177_450_012_600_261; + +#[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))] +#[derive(Copy, Clone, PartialEq)] +struct XxCore { + v1: u64, + v2: u64, + v3: u64, + v4: u64, +} + +/// Calculates the 64-bit hash. +#[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))] +#[derive(Debug, Copy, Clone, PartialEq)] +pub struct XxHash64 { + total_len: u64, + seed: u64, + core: XxCore, + #[cfg_attr(feature = "serialize", serde(flatten))] + buffer: Buffer, +} + +impl XxCore { + fn with_seed(seed: u64) -> XxCore { + XxCore { + v1: seed.wrapping_add(PRIME_1).wrapping_add(PRIME_2), + v2: seed.wrapping_add(PRIME_2), + v3: seed, + v4: seed.wrapping_sub(PRIME_1), + } + } + + #[inline(always)] + fn ingest_chunks<I>(&mut self, values: I) + where + I: IntoIterator<Item = [u64; 4]>, + { + #[inline(always)] + fn ingest_one_number(mut current_value: u64, mut value: u64) -> u64 { + value = value.wrapping_mul(PRIME_2); + current_value = current_value.wrapping_add(value); + current_value = current_value.rotate_left(31); + current_value.wrapping_mul(PRIME_1) + } + + // By drawing these out, we can avoid going back and forth to + // memory. It only really helps for large files, when we need + // to iterate multiple times here. + + let mut v1 = self.v1; + let mut v2 = self.v2; + let mut v3 = self.v3; + let mut v4 = self.v4; + + for [n1, n2, n3, n4] in values { + v1 = ingest_one_number(v1, n1.to_le()); + v2 = ingest_one_number(v2, n2.to_le()); + v3 = ingest_one_number(v3, n3.to_le()); + v4 = ingest_one_number(v4, n4.to_le()); + } + + self.v1 = v1; + self.v2 = v2; + self.v3 = v3; + self.v4 = v4; + } + + #[inline(always)] + fn finish(&self) -> u64 { + // The original code pulls out local vars for v[1234] + // here. Performance tests did not show that to be effective + // here, presumably because this method is not called in a + // tight loop. + + #[allow(unknown_lints, clippy::needless_late_init)] // keeping things parallel + let mut hash; + + hash = self.v1.rotate_left(1); + hash = hash.wrapping_add(self.v2.rotate_left(7)); + hash = hash.wrapping_add(self.v3.rotate_left(12)); + hash = hash.wrapping_add(self.v4.rotate_left(18)); + + #[inline(always)] + fn mix_one(mut hash: u64, mut value: u64) -> u64 { + value = value.wrapping_mul(PRIME_2); + value = value.rotate_left(31); + value = value.wrapping_mul(PRIME_1); + hash ^= value; + hash = hash.wrapping_mul(PRIME_1); + hash.wrapping_add(PRIME_4) + } + + hash = mix_one(hash, self.v1); + hash = mix_one(hash, self.v2); + hash = mix_one(hash, self.v3); + hash = mix_one(hash, self.v4); + + hash + } +} + +impl core::fmt::Debug for XxCore { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> Result<(), core::fmt::Error> { + write!( + f, + "XxCore {{ {:016x} {:016x} {:016x} {:016x} }}", + self.v1, self.v2, self.v3, self.v4 + ) + } +} + +#[cfg_attr(feature = "serialize", derive(Serialize, Deserialize))] +#[derive(Debug, Copy, Clone, Default, PartialEq)] +#[repr(align(8))] +#[cfg_attr(feature = "serialize", serde(transparent))] +struct AlignToU64<T>(T); + +#[cfg_attr(feature = "serialize", derive(Serialize, Deserialize))] +#[derive(Debug, Copy, Clone, Default, PartialEq)] +struct Buffer { + #[cfg_attr(feature = "serialize", serde(rename = "buffer"))] + data: AlignToU64<[u8; CHUNK_SIZE]>, + #[cfg_attr(feature = "serialize", serde(rename = "buffer_usage"))] + len: usize, +} + +impl Buffer { + fn data(&self) -> &[u8] { + &self.data.0[..self.len] + } + + /// Consumes as much of the parameter as it can, returning the unused part. + fn consume<'a>(&mut self, data: &'a [u8]) -> &'a [u8] { + let to_use = cmp::min(self.available(), data.len()); + let (data, remaining) = data.split_at(to_use); + self.data.0[self.len..][..to_use].copy_from_slice(data); + self.len += to_use; + remaining + } + + fn set_data(&mut self, data: &[u8]) { + debug_assert!(self.is_empty()); + debug_assert!(data.len() < CHUNK_SIZE); + self.data.0[..data.len()].copy_from_slice(data); + self.len = data.len(); + } + + fn available(&self) -> usize { + CHUNK_SIZE - self.len + } + + fn is_empty(&self) -> bool { + self.len == 0 + } + + fn is_full(&self) -> bool { + self.len == CHUNK_SIZE + } +} + +impl XxHash64 { + /// Constructs the hash with an initial seed + pub fn with_seed(seed: u64) -> XxHash64 { + XxHash64 { + total_len: 0, + seed, + core: XxCore::with_seed(seed), + buffer: Buffer::default(), + } + } + + pub(crate) fn write(&mut self, bytes: &[u8]) { + let remaining = self.maybe_consume_bytes(bytes); + if !remaining.is_empty() { + let mut remaining = UnalignedBuffer::new(remaining); + self.core.ingest_chunks(&mut remaining); + self.buffer.set_data(remaining.remaining()); + } + self.total_len += bytes.len() as u64; + } + + // Consume bytes and try to make `self.buffer` empty. + // If there are not enough bytes, `self.buffer` can be non-empty, and this + // function returns an empty slice. + fn maybe_consume_bytes<'a>(&mut self, data: &'a [u8]) -> &'a [u8] { + if self.buffer.is_empty() { + data + } else { + let data = self.buffer.consume(data); + if self.buffer.is_full() { + let mut u64s = UnalignedBuffer::new(self.buffer.data()); + self.core.ingest_chunks(&mut u64s); + debug_assert!(u64s.remaining().is_empty()); + self.buffer.len = 0; + } + data + } + } + + pub(crate) fn finish(&self) -> u64 { + let mut hash = if self.total_len >= CHUNK_SIZE as u64 { + // We have processed at least one full chunk + self.core.finish() + } else { + self.seed.wrapping_add(PRIME_5) + }; + + hash = hash.wrapping_add(self.total_len); + + let mut buffered_u64s = UnalignedBuffer::<u64>::new(self.buffer.data()); + for buffered_u64 in &mut buffered_u64s { + let mut k1 = buffered_u64.to_le().wrapping_mul(PRIME_2); + k1 = k1.rotate_left(31); + k1 = k1.wrapping_mul(PRIME_1); + hash ^= k1; + hash = hash.rotate_left(27); + hash = hash.wrapping_mul(PRIME_1); + hash = hash.wrapping_add(PRIME_4); + } + + let mut buffered_u32s = UnalignedBuffer::<u32>::new(buffered_u64s.remaining()); + for buffered_u32 in &mut buffered_u32s { + let k1 = u64::from(buffered_u32.to_le()).wrapping_mul(PRIME_1); + hash ^= k1; + hash = hash.rotate_left(23); + hash = hash.wrapping_mul(PRIME_2); + hash = hash.wrapping_add(PRIME_3); + } + + let buffered_u8s = buffered_u32s.remaining(); + for &buffered_u8 in buffered_u8s { + let k1 = u64::from(buffered_u8).wrapping_mul(PRIME_5); + hash ^= k1; + hash = hash.rotate_left(11); + hash = hash.wrapping_mul(PRIME_1); + } + + // The final intermixing + hash ^= hash >> 33; + hash = hash.wrapping_mul(PRIME_2); + hash ^= hash >> 29; + hash = hash.wrapping_mul(PRIME_3); + hash ^= hash >> 32; + + hash + } + + pub fn seed(&self) -> u64 { + self.seed + } + + pub fn total_len(&self) -> u64 { + self.total_len + } +} + +impl Default for XxHash64 { + fn default() -> XxHash64 { + XxHash64::with_seed(0) + } +} + +impl Hasher for XxHash64 { + fn finish(&self) -> u64 { + XxHash64::finish(self) + } + + fn write(&mut self, bytes: &[u8]) { + XxHash64::write(self, bytes) + } +} + +#[cfg(feature = "std")] +pub use crate::std_support::sixty_four::RandomXxHashBuilder64; + +#[cfg(test)] +mod test { + use super::{RandomXxHashBuilder64, XxHash64}; + use std::collections::HashMap; + use std::hash::BuildHasherDefault; + use std::prelude::v1::*; + + #[test] + fn ingesting_byte_by_byte_is_equivalent_to_large_chunks() { + let bytes: Vec<_> = (0..32).map(|_| 0).collect(); + + let mut byte_by_byte = XxHash64::with_seed(0); + for byte in bytes.chunks(1) { + byte_by_byte.write(byte); + } + + let mut one_chunk = XxHash64::with_seed(0); + one_chunk.write(&bytes); + + assert_eq!(byte_by_byte.core, one_chunk.core); + } + + #[test] + fn hash_of_nothing_matches_c_implementation() { + let mut hasher = XxHash64::with_seed(0); + hasher.write(&[]); + assert_eq!(hasher.finish(), 0xef46_db37_51d8_e999); + } + + #[test] + fn hash_of_single_byte_matches_c_implementation() { + let mut hasher = XxHash64::with_seed(0); + hasher.write(&[42]); + assert_eq!(hasher.finish(), 0x0a9e_dece_beb0_3ae4); + } + + #[test] + fn hash_of_multiple_bytes_matches_c_implementation() { + let mut hasher = XxHash64::with_seed(0); + hasher.write(b"Hello, world!\0"); + assert_eq!(hasher.finish(), 0x7b06_c531_ea43_e89f); + } + + #[test] + fn hash_of_multiple_chunks_matches_c_implementation() { + let bytes: Vec<_> = (0..100).collect(); + let mut hasher = XxHash64::with_seed(0); + hasher.write(&bytes); + assert_eq!(hasher.finish(), 0x6ac1_e580_3216_6597); + } + + #[test] + fn hash_with_different_seed_matches_c_implementation() { + let mut hasher = XxHash64::with_seed(0xae05_4331_1b70_2d91); + hasher.write(&[]); + assert_eq!(hasher.finish(), 0x4b6a_04fc_df7a_4672); + } + + #[test] + fn hash_with_different_seed_and_multiple_chunks_matches_c_implementation() { + let bytes: Vec<_> = (0..100).collect(); + let mut hasher = XxHash64::with_seed(0xae05_4331_1b70_2d91); + hasher.write(&bytes); + assert_eq!(hasher.finish(), 0x567e_355e_0682_e1f1); + } + + #[test] + fn can_be_used_in_a_hashmap_with_a_default_seed() { + let mut hash: HashMap<_, _, BuildHasherDefault<XxHash64>> = Default::default(); + hash.insert(42, "the answer"); + assert_eq!(hash.get(&42), Some(&"the answer")); + } + + #[test] + fn can_be_used_in_a_hashmap_with_a_random_seed() { + let mut hash: HashMap<_, _, RandomXxHashBuilder64> = Default::default(); + hash.insert(42, "the answer"); + assert_eq!(hash.get(&42), Some(&"the answer")); + } + + #[cfg(feature = "serialize")] + type TestResult<T = ()> = Result<T, Box<dyn std::error::Error>>; + + #[cfg(feature = "serialize")] + #[test] + fn test_serialization_cycle() -> TestResult { + let mut hasher = XxHash64::with_seed(0); + hasher.write(b"Hello, world!\0"); + hasher.finish(); + + let serialized = serde_json::to_string(&hasher)?; + let unserialized: XxHash64 = serde_json::from_str(&serialized)?; + assert_eq!(hasher, unserialized); + Ok(()) + } + + #[cfg(feature = "serialize")] + #[test] + fn test_serialization_stability() -> TestResult { + let mut hasher = XxHash64::with_seed(0); + hasher.write(b"Hello, world!\0"); + hasher.finish(); + + let serialized = r#"{ + "total_len": 14, + "seed": 0, + "core": { + "v1": 6983438078262162902, + "v2": 14029467366897019727, + "v3": 0, + "v4": 7046029288634856825 + }, + "buffer": [ + 72, 101, 108, 108, 111, 44, 32, 119, + 111, 114, 108, 100, 33, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 + ], + "buffer_usage": 14 + }"#; + + let unserialized: XxHash64 = serde_json::from_str(serialized).unwrap(); + assert_eq!(hasher, unserialized); + Ok(()) + } +} diff --git a/vendor/twox-hash/src/std_support.rs b/vendor/twox-hash/src/std_support.rs new file mode 100644 index 000000000..d79085e26 --- /dev/null +++ b/vendor/twox-hash/src/std_support.rs @@ -0,0 +1,113 @@ +pub mod sixty_four { + use crate::XxHash64; + use core::hash::BuildHasher; + use rand::{self, Rng}; + + #[derive(Clone)] + /// Constructs a randomized seed and reuses it for multiple hasher instances. + pub struct RandomXxHashBuilder64(u64); + + impl RandomXxHashBuilder64 { + fn new() -> RandomXxHashBuilder64 { + RandomXxHashBuilder64(rand::thread_rng().gen()) + } + } + + impl Default for RandomXxHashBuilder64 { + fn default() -> RandomXxHashBuilder64 { + RandomXxHashBuilder64::new() + } + } + + impl BuildHasher for RandomXxHashBuilder64 { + type Hasher = XxHash64; + + fn build_hasher(&self) -> XxHash64 { + XxHash64::with_seed(self.0) + } + } +} + +pub mod thirty_two { + use crate::XxHash32; + use core::hash::BuildHasher; + use rand::{self, Rng}; + + #[derive(Clone)] + /// Constructs a randomized seed and reuses it for multiple hasher instances. See the usage warning on `XxHash32`. + pub struct RandomXxHashBuilder32(u32); + + impl RandomXxHashBuilder32 { + fn new() -> RandomXxHashBuilder32 { + RandomXxHashBuilder32(rand::thread_rng().gen()) + } + } + + impl Default for RandomXxHashBuilder32 { + fn default() -> RandomXxHashBuilder32 { + RandomXxHashBuilder32::new() + } + } + + impl BuildHasher for RandomXxHashBuilder32 { + type Hasher = XxHash32; + + fn build_hasher(&self) -> XxHash32 { + XxHash32::with_seed(self.0) + } + } +} + +pub mod xxh3 { + use crate::xxh3::{Hash128, Hash64}; + use core::hash::BuildHasher; + use rand::{self, Rng}; + + #[derive(Clone)] + /// Constructs a randomized seed and reuses it for multiple hasher instances. + pub struct RandomHashBuilder64(u64); + + impl RandomHashBuilder64 { + fn new() -> RandomHashBuilder64 { + RandomHashBuilder64(rand::thread_rng().gen()) + } + } + + impl Default for RandomHashBuilder64 { + fn default() -> RandomHashBuilder64 { + RandomHashBuilder64::new() + } + } + + impl BuildHasher for RandomHashBuilder64 { + type Hasher = Hash64; + + fn build_hasher(&self) -> Hash64 { + Hash64::with_seed(self.0) + } + } + + #[derive(Clone)] + /// Constructs a randomized seed and reuses it for multiple hasher instances. + pub struct RandomHashBuilder128(u64); + + impl RandomHashBuilder128 { + fn new() -> RandomHashBuilder128 { + RandomHashBuilder128(rand::thread_rng().gen()) + } + } + + impl Default for RandomHashBuilder128 { + fn default() -> RandomHashBuilder128 { + RandomHashBuilder128::new() + } + } + + impl BuildHasher for RandomHashBuilder128 { + type Hasher = Hash128; + + fn build_hasher(&self) -> Hash128 { + Hash128::with_seed(self.0) + } + } +} diff --git a/vendor/twox-hash/src/thirty_two.rs b/vendor/twox-hash/src/thirty_two.rs new file mode 100644 index 000000000..cfa44cdbc --- /dev/null +++ b/vendor/twox-hash/src/thirty_two.rs @@ -0,0 +1,416 @@ +use crate::UnalignedBuffer; +use core::{cmp, hash::Hasher}; + +#[cfg(feature = "serialize")] +use serde::{Deserialize, Serialize}; + +const CHUNK_SIZE: usize = 16; + +pub const PRIME_1: u32 = 2_654_435_761; +pub const PRIME_2: u32 = 2_246_822_519; +pub const PRIME_3: u32 = 3_266_489_917; +pub const PRIME_4: u32 = 668_265_263; +pub const PRIME_5: u32 = 374_761_393; + +#[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))] +#[derive(Copy, Clone, PartialEq)] +struct XxCore { + v1: u32, + v2: u32, + v3: u32, + v4: u32, +} + +/// Calculates the 32-bit hash. Care should be taken when using this +/// hash. +/// +/// Although this struct implements `Hasher`, it only calculates a +/// 32-bit number, leaving the upper bits as 0. This means it is +/// unlikely to be correct to use this in places like a `HashMap`. +#[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))] +#[derive(Debug, Copy, Clone, PartialEq)] +pub struct XxHash32 { + total_len: u64, + seed: u32, + core: XxCore, + #[cfg_attr(feature = "serialize", serde(flatten))] + buffer: Buffer, +} + +impl XxCore { + fn with_seed(seed: u32) -> XxCore { + XxCore { + v1: seed.wrapping_add(PRIME_1).wrapping_add(PRIME_2), + v2: seed.wrapping_add(PRIME_2), + v3: seed, + v4: seed.wrapping_sub(PRIME_1), + } + } + + #[inline(always)] + fn ingest_chunks<I>(&mut self, values: I) + where + I: IntoIterator<Item = [u32; 4]>, + { + #[inline(always)] + fn ingest_one_number(mut current_value: u32, mut value: u32) -> u32 { + value = value.wrapping_mul(PRIME_2); + current_value = current_value.wrapping_add(value); + current_value = current_value.rotate_left(13); + current_value.wrapping_mul(PRIME_1) + } + + // By drawing these out, we can avoid going back and forth to + // memory. It only really helps for large files, when we need + // to iterate multiple times here. + + let mut v1 = self.v1; + let mut v2 = self.v2; + let mut v3 = self.v3; + let mut v4 = self.v4; + + for [n1, n2, n3, n4] in values { + v1 = ingest_one_number(v1, n1.to_le()); + v2 = ingest_one_number(v2, n2.to_le()); + v3 = ingest_one_number(v3, n3.to_le()); + v4 = ingest_one_number(v4, n4.to_le()); + } + + self.v1 = v1; + self.v2 = v2; + self.v3 = v3; + self.v4 = v4; + } + + #[inline(always)] + fn finish(&self) -> u32 { + // The original code pulls out local vars for v[1234] + // here. Performance tests did not show that to be effective + // here, presumably because this method is not called in a + // tight loop. + + #[allow(unknown_lints, clippy::needless_late_init)] // keeping things parallel + let mut hash; + + hash = self.v1.rotate_left(1); + hash = hash.wrapping_add(self.v2.rotate_left(7)); + hash = hash.wrapping_add(self.v3.rotate_left(12)); + hash = hash.wrapping_add(self.v4.rotate_left(18)); + + hash + } +} + +impl core::fmt::Debug for XxCore { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> Result<(), core::fmt::Error> { + write!( + f, + "XxCore {{ {:016x} {:016x} {:016x} {:016x} }}", + self.v1, self.v2, self.v3, self.v4 + ) + } +} + +#[cfg_attr(feature = "serialize", derive(Serialize, Deserialize))] +#[derive(Debug, Copy, Clone, Default, PartialEq)] +#[repr(align(4))] +#[cfg_attr(feature = "serialize", serde(transparent))] +struct AlignToU32<T>(T); + +#[cfg_attr(feature = "serialize", derive(Serialize, Deserialize))] +#[derive(Debug, Copy, Clone, Default, PartialEq)] +struct Buffer { + #[cfg_attr(feature = "serialize", serde(rename = "buffer"))] + data: AlignToU32<[u8; CHUNK_SIZE]>, + #[cfg_attr(feature = "serialize", serde(rename = "buffer_usage"))] + len: usize, +} + +impl Buffer { + fn data(&self) -> &[u8] { + &self.data.0[..self.len] + } + + /// Consumes as much of the parameter as it can, returning the unused part. + fn consume<'a>(&mut self, data: &'a [u8]) -> &'a [u8] { + let to_use = cmp::min(self.available(), data.len()); + let (data, remaining) = data.split_at(to_use); + self.data.0[self.len..][..to_use].copy_from_slice(data); + self.len += to_use; + remaining + } + + fn set_data(&mut self, data: &[u8]) { + debug_assert!(self.is_empty()); + debug_assert!(data.len() < CHUNK_SIZE); + self.data.0[..data.len()].copy_from_slice(data); + self.len = data.len(); + } + + fn available(&self) -> usize { + CHUNK_SIZE - self.len + } + + fn is_empty(&self) -> bool { + self.len == 0 + } + + fn is_full(&self) -> bool { + self.len == CHUNK_SIZE + } +} + +impl XxHash32 { + /// Constructs the hash with an initial seed + pub fn with_seed(seed: u32) -> XxHash32 { + XxHash32 { + total_len: 0, + seed, + core: XxCore::with_seed(seed), + buffer: Buffer::default(), + } + } + + pub(crate) fn write(&mut self, bytes: &[u8]) { + let remaining = self.maybe_consume_bytes(bytes); + if !remaining.is_empty() { + let mut remaining = UnalignedBuffer::new(remaining); + self.core.ingest_chunks(&mut remaining); + self.buffer.set_data(remaining.remaining()); + } + self.total_len += bytes.len() as u64; + } + + // Consume bytes and try to make `self.buffer` empty. + // If there are not enough bytes, `self.buffer` can be non-empty, and this + // function returns an empty slice. + fn maybe_consume_bytes<'a>(&mut self, data: &'a [u8]) -> &'a [u8] { + if self.buffer.is_empty() { + data + } else { + let data = self.buffer.consume(data); + if self.buffer.is_full() { + let mut u32s = UnalignedBuffer::new(self.buffer.data()); + self.core.ingest_chunks(&mut u32s); + debug_assert!(u32s.remaining().is_empty()); + self.buffer.len = 0; + } + data + } + } + + pub(crate) fn finish(&self) -> u32 { + let mut hash = if self.total_len >= CHUNK_SIZE as u64 { + // We have processed at least one full chunk + self.core.finish() + } else { + self.seed.wrapping_add(PRIME_5) + }; + + hash = hash.wrapping_add(self.total_len as u32); + + let mut buffered_u32s = UnalignedBuffer::<u32>::new(self.buffer.data()); + for buffered_u32 in &mut buffered_u32s { + let k1 = buffered_u32.to_le().wrapping_mul(PRIME_3); + hash = hash.wrapping_add(k1); + hash = hash.rotate_left(17); + hash = hash.wrapping_mul(PRIME_4); + } + + let buffered_u8s = buffered_u32s.remaining(); + for &buffered_u8 in buffered_u8s { + let k1 = u32::from(buffered_u8).wrapping_mul(PRIME_5); + hash = hash.wrapping_add(k1); + hash = hash.rotate_left(11); + hash = hash.wrapping_mul(PRIME_1); + } + + // The final intermixing + hash ^= hash >> 15; + hash = hash.wrapping_mul(PRIME_2); + hash ^= hash >> 13; + hash = hash.wrapping_mul(PRIME_3); + hash ^= hash >> 16; + + hash + } + + pub fn seed(&self) -> u32 { + self.seed + } + + /// Get the total number of bytes hashed, truncated to 32 bits. + /// For the full 64-bit byte count, use `total_len_64` + pub fn total_len(&self) -> u32 { + self.total_len as u32 + } + + /// Get the total number of bytes hashed. + pub fn total_len_64(&self) -> u64 { + self.total_len + } +} + +impl Default for XxHash32 { + fn default() -> XxHash32 { + XxHash32::with_seed(0) + } +} + +impl Hasher for XxHash32 { + fn finish(&self) -> u64 { + u64::from(XxHash32::finish(self)) + } + + fn write(&mut self, bytes: &[u8]) { + XxHash32::write(self, bytes) + } +} + +#[cfg(feature = "std")] +pub use crate::std_support::thirty_two::RandomXxHashBuilder32; + +#[cfg(test)] +mod test { + use super::{RandomXxHashBuilder32, XxHash32}; + use std::collections::HashMap; + use std::hash::BuildHasherDefault; + use std::prelude::v1::*; + + #[test] + fn ingesting_byte_by_byte_is_equivalent_to_large_chunks() { + let bytes: Vec<_> = (0..32).map(|_| 0).collect(); + + let mut byte_by_byte = XxHash32::with_seed(0); + for byte in bytes.chunks(1) { + byte_by_byte.write(byte); + } + + let mut one_chunk = XxHash32::with_seed(0); + one_chunk.write(&bytes); + + assert_eq!(byte_by_byte.core, one_chunk.core); + } + + #[test] + fn hash_of_nothing_matches_c_implementation() { + let mut hasher = XxHash32::with_seed(0); + hasher.write(&[]); + assert_eq!(hasher.finish(), 0x02cc_5d05); + } + + #[test] + fn hash_of_single_byte_matches_c_implementation() { + let mut hasher = XxHash32::with_seed(0); + hasher.write(&[42]); + assert_eq!(hasher.finish(), 0xe0fe_705f); + } + + #[test] + fn hash_of_multiple_bytes_matches_c_implementation() { + let mut hasher = XxHash32::with_seed(0); + hasher.write(b"Hello, world!\0"); + assert_eq!(hasher.finish(), 0x9e5e_7e93); + } + + #[test] + fn hash_of_multiple_chunks_matches_c_implementation() { + let bytes: Vec<_> = (0..100).collect(); + let mut hasher = XxHash32::with_seed(0); + hasher.write(&bytes); + assert_eq!(hasher.finish(), 0x7f89_ba44); + } + + #[test] + fn hash_with_different_seed_matches_c_implementation() { + let mut hasher = XxHash32::with_seed(0x42c9_1977); + hasher.write(&[]); + assert_eq!(hasher.finish(), 0xd6bf_8459); + } + + #[test] + fn hash_with_different_seed_and_multiple_chunks_matches_c_implementation() { + let bytes: Vec<_> = (0..100).collect(); + let mut hasher = XxHash32::with_seed(0x42c9_1977); + hasher.write(&bytes); + assert_eq!(hasher.finish(), 0x6d2f_6c17); + } + + #[test] + fn can_be_used_in_a_hashmap_with_a_default_seed() { + let mut hash: HashMap<_, _, BuildHasherDefault<XxHash32>> = Default::default(); + hash.insert(42, "the answer"); + assert_eq!(hash.get(&42), Some(&"the answer")); + } + + #[test] + fn can_be_used_in_a_hashmap_with_a_random_seed() { + let mut hash: HashMap<_, _, RandomXxHashBuilder32> = Default::default(); + hash.insert(42, "the answer"); + assert_eq!(hash.get(&42), Some(&"the answer")); + } + + #[cfg(feature = "serialize")] + type TestResult<T = ()> = Result<T, Box<dyn std::error::Error>>; + + #[cfg(feature = "serialize")] + #[test] + fn test_serialization_cycle() -> TestResult { + let mut hasher = XxHash32::with_seed(0); + hasher.write(b"Hello, world!\0"); + hasher.finish(); + + let serialized = serde_json::to_string(&hasher)?; + let unserialized: XxHash32 = serde_json::from_str(&serialized)?; + assert_eq!(hasher, unserialized); + Ok(()) + } + + #[cfg(feature = "serialize")] + #[test] + fn test_serialization_stability() -> TestResult { + let mut hasher = XxHash32::with_seed(0); + hasher.write(b"Hello, world!\0"); + hasher.finish(); + + let serialized = r#"{ + "total_len": 14, + "seed": 0, + "core": { + "v1": 606290984, + "v2": 2246822519, + "v3": 0, + "v4": 1640531535 + }, + "buffer": [ + 72, 101, 108, 108, 111, 44, 32, 119, + 111, 114, 108, 100, 33, 0, 0, 0 + ], + "buffer_usage": 14 + }"#; + + let unserialized: XxHash32 = serde_json::from_str(serialized).unwrap(); + assert_eq!(hasher, unserialized); + Ok(()) + } + + // This test validates wraparound/truncation behavior for very large inputs + // of a 32-bit hash, but runs very slowly in the normal "cargo test" + // build config since it hashes 4.3GB of data. It runs reasonably quick + // under "cargo test --release". + /* + #[test] + fn len_overflow_32bit() { + // Hash 4.3 billion (4_300_000_000) bytes, which overflows a u32. + let bytes200: Vec<u8> = (0..200).collect(); + let mut hasher = XxHash32::with_seed(0); + for _ in 0..(4_300_000_000u64 / 200u64) { + hasher.write(&bytes200); + } + assert_eq!(hasher.total_len_64(), 0x0000_0001_004c_cb00); + assert_eq!(hasher.total_len(), 0x004c_cb00); + // retult is tested against the C implementation + assert_eq!(hasher.finish(), 0x1522_4ca7); + } + */ +} diff --git a/vendor/twox-hash/src/xxh3.rs b/vendor/twox-hash/src/xxh3.rs new file mode 100644 index 000000000..0ffc54189 --- /dev/null +++ b/vendor/twox-hash/src/xxh3.rs @@ -0,0 +1,1666 @@ +//! The in-progress XXH3 algorithm. +//! +//! Please read [the notes in original implementation][warning] to +//! learn about when to use these algorithms. Specifically, the +//! version of code this crate reproduces says: +//! +//! > The algorithm is currently in development, meaning its return +//! values might still change in future versions. However, the API +//! is stable, and can be used in production, typically for +//! generation of ephemeral hashes (produced and consumed in same +//! session). +//! +//! [warning]: https://github.com/Cyan4973/xxHash#new-hash-algorithms + +use alloc::vec::Vec; + +use core::convert::TryInto; +use core::hash::Hasher; +use core::mem; +use core::ops::{Deref, DerefMut}; +use core::slice; + +#[cfg(target_arch = "x86")] +use core::arch::x86::*; +#[cfg(target_arch = "x86_64")] +use core::arch::x86_64::*; + +use cfg_if::cfg_if; +use static_assertions::{const_assert, const_assert_eq}; + +#[cfg(feature = "serialize")] +use serde::{Deserialize, Serialize}; + +use crate::sixty_four::{ + PRIME_1 as PRIME64_1, PRIME_2 as PRIME64_2, PRIME_3 as PRIME64_3, PRIME_4 as PRIME64_4, + PRIME_5 as PRIME64_5, +}; +use crate::thirty_two::{PRIME_1 as PRIME32_1, PRIME_2 as PRIME32_2, PRIME_3 as PRIME32_3}; + +#[cfg(feature = "std")] +pub use crate::std_support::xxh3::{RandomHashBuilder128, RandomHashBuilder64}; + +#[inline(always)] +pub fn hash64(data: &[u8]) -> u64 { + hash64_with_seed(data, 0) +} + +#[inline(always)] +pub fn hash64_with_seed(data: &[u8], seed: u64) -> u64 { + let len = data.len(); + + if len <= 16 { + hash_len_0to16_64bits(data, len, &SECRET, seed) + } else if len <= 128 { + hash_len_17to128_64bits(data, len, &SECRET, seed) + } else if len <= MIDSIZE_MAX { + hash_len_129to240_64bits(data, len, &SECRET, seed) + } else { + hash_long_64bits_with_seed(data, len, seed) + } +} + +#[inline(always)] +pub fn hash64_with_secret(data: &[u8], secret: &[u8]) -> u64 { + debug_assert!(secret.len() >= SECRET_SIZE_MIN); + + let len = data.len(); + + if len <= 16 { + hash_len_0to16_64bits(data, len, secret, 0) + } else if len <= 128 { + hash_len_17to128_64bits(data, len, secret, 0) + } else if len <= MIDSIZE_MAX { + hash_len_129to240_64bits(data, len, secret, 0) + } else { + hash_long_64bits_with_secret(data, len, secret) + } +} + +#[inline(always)] +pub fn hash128(data: &[u8]) -> u128 { + hash128_with_seed(data, 0) +} + +#[inline(always)] +pub fn hash128_with_seed(data: &[u8], seed: u64) -> u128 { + let len = data.len(); + + if len <= 16 { + hash_len_0to16_128bits(data, len, &SECRET, seed) + } else if len <= 128 { + hash_len_17to128_128bits(data, len, &SECRET, seed) + } else if len <= MIDSIZE_MAX { + hash_len_129to240_128bits(data, len, &SECRET, seed) + } else { + hash_long_128bits_with_seed(data, len, seed) + } +} + +#[inline(always)] +pub fn hash128_with_secret(data: &[u8], secret: &[u8]) -> u128 { + debug_assert!(secret.len() >= SECRET_SIZE_MIN); + + let len = data.len(); + + if len <= 16 { + hash_len_0to16_128bits(data, len, secret, 0) + } else if len <= 128 { + hash_len_17to128_128bits(data, len, secret, 0) + } else if len <= MIDSIZE_MAX { + hash_len_129to240_128bits(data, len, secret, 0) + } else { + hash_long_128bits_with_secret(data, len, secret) + } +} + +/// Calculates the 64-bit hash. +#[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))] +#[derive(Clone, Default)] +pub struct Hash64(State); + +impl Hash64 { + pub fn with_seed(seed: u64) -> Self { + Self(State::with_seed(seed)) + } + + pub fn with_secret<S: Into<Vec<u8>>>(secret: S) -> Self { + Self(State::with_secret(secret)) + } +} + +impl Hasher for Hash64 { + #[inline(always)] + fn finish(&self) -> u64 { + self.0.digest64() + } + + #[inline(always)] + fn write(&mut self, bytes: &[u8]) { + self.0.update(bytes, AccWidth::Acc64Bits) + } +} + +/// Calculates the 128-bit hash. +#[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))] +#[derive(Clone, Default)] +pub struct Hash128(State); + +impl Hash128 { + pub fn with_seed(seed: u64) -> Self { + Self(State::with_seed(seed)) + } + + pub fn with_secret<S: Into<Vec<u8>>>(secret: S) -> Self { + Self(State::with_secret(secret)) + } +} + +impl Hasher for Hash128 { + #[inline(always)] + fn finish(&self) -> u64 { + self.0.digest128() as u64 + } + + #[inline(always)] + fn write(&mut self, bytes: &[u8]) { + self.0.update(bytes, AccWidth::Acc128Bits) + } +} + +pub trait HasherExt: Hasher { + fn finish_ext(&self) -> u128; +} + +impl HasherExt for Hash128 { + #[inline(always)] + fn finish_ext(&self) -> u128 { + self.0.digest128() + } +} + +/* ========================================== + * XXH3 default settings + * ========================================== */ + +const SECRET_DEFAULT_SIZE: usize = 192; +const SECRET_SIZE_MIN: usize = 136; + +const SECRET: Secret = Secret([ + 0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c, + 0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f, + 0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21, + 0xb8, 0x08, 0x46, 0x74, 0xf7, 0x43, 0x24, 0x8e, 0xe0, 0x35, 0x90, 0xe6, 0x81, 0x3a, 0x26, 0x4c, + 0x3c, 0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb, 0x88, 0xd0, 0x65, 0x8b, 0x1b, 0x53, 0x2e, 0xa3, + 0x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e, 0x38, 0x19, 0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8, + 0xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f, 0xf9, 0xdc, 0xbb, 0xc7, 0xc7, 0x0b, 0x4f, 0x1d, + 0x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31, 0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64, + 0xea, 0xc5, 0xac, 0x83, 0x34, 0xd3, 0xeb, 0xc3, 0xc5, 0x81, 0xa0, 0xff, 0xfa, 0x13, 0x63, 0xeb, + 0x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0, 0xda, 0x49, 0xd3, 0x16, 0x55, 0x26, 0x29, 0xd4, 0x68, 0x9e, + 0x2b, 0x16, 0xbe, 0x58, 0x7d, 0x47, 0xa1, 0xfc, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce, + 0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e, +]); + +#[repr(align(64))] +#[derive(Clone)] +struct Secret([u8; SECRET_DEFAULT_SIZE]); + +const_assert_eq!(mem::size_of::<Secret>() % 16, 0); + +impl Default for Secret { + #[inline(always)] + fn default() -> Self { + SECRET + } +} + +impl Deref for Secret { + type Target = [u8]; + + #[inline(always)] + fn deref(&self) -> &Self::Target { + &self.0[..] + } +} + +cfg_if! { + if #[cfg(feature = "serialize")] { + impl Serialize for Secret { + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + where + S: serde::Serializer, + { + serializer.serialize_bytes(self) + } + } + + impl<'de> Deserialize<'de> for Secret { + fn deserialize<D>(deserializer: D) -> Result<Secret, D::Error> + where + D: serde::Deserializer<'de>, + { + deserializer.deserialize_bytes(SecretVisitor) + } + } + + struct SecretVisitor; + + impl<'de> serde::de::Visitor<'de> for SecretVisitor { + type Value = Secret; + + fn expecting(&self, formatter: &mut core::fmt::Formatter) -> core::fmt::Result { + formatter.write_str("secret with a bytes array") + } + + fn visit_bytes<E>(self, v: &[u8]) -> Result<Self::Value, E> + where + E: serde::de::Error, + { + if v.len() == SECRET_DEFAULT_SIZE { + let mut secret = [0; SECRET_DEFAULT_SIZE]; + + secret.copy_from_slice(v); + + Ok(Secret(secret)) + } else { + Err(E::custom("incomplete secret data")) + } + } + } + } +} + +impl Secret { + #[inline(always)] + pub fn with_seed(seed: u64) -> Self { + let mut secret = [0; SECRET_DEFAULT_SIZE]; + + for off in (0..SECRET_DEFAULT_SIZE).step_by(16) { + secret[off..].write_u64_le(SECRET[off..].read_u64_le().wrapping_add(seed)); + secret[off + 8..].write_u64_le(SECRET[off + 8..].read_u64_le().wrapping_sub(seed)); + } + + Secret(secret) + } +} + +cfg_if! { + if #[cfg(target_feature = "avx2")] { + #[repr(align(32))] + #[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))] + #[derive(Clone)] + struct Acc([u64; ACC_NB]); + } else if #[cfg(target_feature = "sse2")] { + #[repr(align(16))] + #[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))] + #[derive(Clone)] + struct Acc([u64; ACC_NB]); + } else { + #[repr(align(8))] + #[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))] + #[derive(Clone)] + struct Acc([u64; ACC_NB]); + } +} + +const ACC_SIZE: usize = mem::size_of::<Acc>(); + +const_assert_eq!(ACC_SIZE, 64); + +impl Default for Acc { + #[inline(always)] + fn default() -> Self { + Acc([ + u64::from(PRIME32_3), + PRIME64_1, + PRIME64_2, + PRIME64_3, + PRIME64_4, + u64::from(PRIME32_2), + PRIME64_5, + u64::from(PRIME32_1), + ]) + } +} + +impl Deref for Acc { + type Target = [u64]; + + #[inline(always)] + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl DerefMut for Acc { + #[inline(always)] + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} + +trait Buf { + fn read_u32_le(&self) -> u32; + + fn read_u64_le(&self) -> u64; +} + +trait BufMut { + fn write_u32_le(&mut self, n: u32); + + fn write_u64_le(&mut self, n: u64); +} + +impl Buf for [u8] { + #[inline(always)] + fn read_u32_le(&self) -> u32 { + let buf = &self[..mem::size_of::<u32>()]; + u32::from_le_bytes(buf.try_into().unwrap()) + } + + #[inline(always)] + fn read_u64_le(&self) -> u64 { + let buf = &self[..mem::size_of::<u64>()]; + u64::from_le_bytes(buf.try_into().unwrap()) + } +} + +impl BufMut for [u8] { + #[inline(always)] + fn write_u32_le(&mut self, n: u32) { + self[..mem::size_of::<u32>()].copy_from_slice(&n.to_le_bytes()[..]); + } + + #[inline(always)] + fn write_u64_le(&mut self, n: u64) { + self[..mem::size_of::<u64>()].copy_from_slice(&n.to_le_bytes()[..]); + } +} + +/* ========================================== + * Short keys + * ========================================== */ + +#[inline(always)] +fn hash_len_0to16_64bits(data: &[u8], len: usize, key: &[u8], seed: u64) -> u64 { + debug_assert!(len <= 16); + + if len > 8 { + hash_len_9to16_64bits(data, len, key, seed) + } else if len >= 4 { + hash_len_4to8_64bits(data, len, key, seed) + } else if len > 0 { + hash_len_1to3_64bits(data, len, key, seed) + } else { + 0 + } +} + +#[inline(always)] +fn hash_len_9to16_64bits(data: &[u8], len: usize, key: &[u8], seed: u64) -> u64 { + debug_assert!((9..=16).contains(&len)); + + let ll1 = data.read_u64_le() ^ key.read_u64_le().wrapping_add(seed); + let ll2 = data[len - 8..].read_u64_le() ^ key[8..].read_u64_le().wrapping_sub(seed); + let acc = (len as u64) + .wrapping_add(ll1) + .wrapping_add(ll2) + .wrapping_add(mul128_fold64(ll1, ll2)); + + avalanche(acc) +} + +#[inline(always)] +fn hash_len_4to8_64bits(data: &[u8], len: usize, key: &[u8], seed: u64) -> u64 { + debug_assert!((4..=8).contains(&len)); + + let in1 = u64::from(data.read_u32_le()); + let in2 = u64::from(data[len - 4..].read_u32_le()); + let in64 = in1.wrapping_add(in2 << 32); + let keyed = in64 ^ key.read_u64_le().wrapping_add(seed); + let mix64 = + (len as u64).wrapping_add((keyed ^ (keyed >> 51)).wrapping_mul(u64::from(PRIME32_1))); + + avalanche((mix64 ^ (mix64 >> 47)).wrapping_mul(PRIME64_2)) +} + +#[inline(always)] +fn hash_len_1to3_64bits(data: &[u8], len: usize, key: &[u8], seed: u64) -> u64 { + debug_assert!((1..=3).contains(&len)); + + let c1 = u32::from(data[0]); + let c2 = u32::from(data[len >> 1]); + let c3 = u32::from(data[len - 1]); + let combined = c1 + (c2 << 8) + (c3 << 16) + ((len as u32) << 24); + let keyed = u64::from(combined) ^ u64::from(key.read_u32_le()).wrapping_add(seed); + let mixed = keyed.wrapping_mul(PRIME64_1); + + avalanche(mixed) +} + +#[inline(always)] +fn hash_len_17to128_64bits(data: &[u8], len: usize, secret: &[u8], seed: u64) -> u64 { + debug_assert!((17..=128).contains(&len)); + debug_assert!(secret.len() >= SECRET_SIZE_MIN); + + let mut acc = PRIME64_1.wrapping_mul(len as u64); + + if len > 32 { + if len > 64 { + if len > 96 { + acc = acc + .wrapping_add(mix_16bytes(&data[48..], &secret[96..], seed)) + .wrapping_add(mix_16bytes(&data[len - 64..], &secret[112..], seed)); + } + acc = acc + .wrapping_add(mix_16bytes(&data[32..], &secret[64..], seed)) + .wrapping_add(mix_16bytes(&data[len - 48..], &secret[80..], seed)); + } + + acc = acc + .wrapping_add(mix_16bytes(&data[16..], &secret[32..], seed)) + .wrapping_add(mix_16bytes(&data[len - 32..], &secret[48..], seed)); + } + + acc = acc + .wrapping_add(mix_16bytes(data, secret, seed)) + .wrapping_add(mix_16bytes(&data[len - 16..], &secret[16..], seed)); + + avalanche(acc) +} + +const MIDSIZE_MAX: usize = 240; +const MIDSIZE_STARTOFFSET: usize = 3; +const MIDSIZE_LASTOFFSET: usize = 17; + +#[inline(always)] +fn hash_len_129to240_64bits(data: &[u8], len: usize, secret: &[u8], seed: u64) -> u64 { + debug_assert!((129..=MIDSIZE_MAX).contains(&len)); + debug_assert!(secret.len() >= SECRET_SIZE_MIN); + + let acc = (len as u64).wrapping_mul(PRIME64_1); + let acc = (0..8).fold(acc, |acc, i| { + acc.wrapping_add(mix_16bytes(&data[16 * i..], &secret[16 * i..], seed)) + }); + let acc = avalanche(acc); + + let nb_rounds = len / 16; + debug_assert!(nb_rounds >= 8); + + let acc = (8..nb_rounds).fold(acc, |acc, i| { + acc.wrapping_add(mix_16bytes( + &data[16 * i..], + &secret[16 * (i - 8) + MIDSIZE_STARTOFFSET..], + seed, + )) + }); + + avalanche(acc.wrapping_add(mix_16bytes( + &data[len - 16..], + &secret[SECRET_SIZE_MIN - MIDSIZE_LASTOFFSET..], + seed, + ))) +} + +/* ========================================== + * Long keys + * ========================================== */ + +const STRIPE_LEN: usize = 64; +const SECRET_CONSUME_RATE: usize = 8; // nb of secret bytes consumed at each accumulation +const SECRET_MERGEACCS_START: usize = 11; // do not align on 8, so that secret is different from accumulator +const SECRET_LASTACC_START: usize = 7; // do not align on 8, so that secret is different from scrambler +const ACC_NB: usize = STRIPE_LEN / mem::size_of::<u64>(); + +#[derive(Debug, Clone, Copy, PartialEq)] +pub(crate) enum AccWidth { + Acc64Bits, + Acc128Bits, +} + +#[inline(always)] +fn hash_long_64bits_with_default_secret(data: &[u8], len: usize) -> u64 { + hash_long_internal(data, len, &SECRET) +} + +#[inline(always)] +fn hash_long_64bits_with_secret(data: &[u8], len: usize, secret: &[u8]) -> u64 { + hash_long_internal(data, len, secret) +} + +/// Generate a custom key, based on alteration of default kSecret with the seed, +/// and then use this key for long mode hashing. +/// +/// This operation is decently fast but nonetheless costs a little bit of time. +/// Try to avoid it whenever possible (typically when `seed.is_none()`). +#[inline(always)] +fn hash_long_64bits_with_seed(data: &[u8], len: usize, seed: u64) -> u64 { + if seed == 0 { + hash_long_64bits_with_default_secret(data, len) + } else { + let secret = Secret::with_seed(seed); + + hash_long_internal(data, len, &secret) + } +} + +#[inline(always)] +fn hash_long_internal(data: &[u8], len: usize, secret: &[u8]) -> u64 { + let mut acc = Acc::default(); + + hash_long_internal_loop(&mut acc, data, len, secret, AccWidth::Acc64Bits); + + merge_accs( + &acc, + &secret[SECRET_MERGEACCS_START..], + (len as u64).wrapping_mul(PRIME64_1), + ) +} + +#[inline(always)] +fn hash_long_internal_loop( + acc: &mut [u64], + data: &[u8], + len: usize, + secret: &[u8], + acc_width: AccWidth, +) { + let secret_len = secret.len(); + let nb_rounds = (secret_len - STRIPE_LEN) / SECRET_CONSUME_RATE; + let block_len = STRIPE_LEN * nb_rounds; + + debug_assert!(secret_len >= SECRET_SIZE_MIN); + + let mut chunks = data.chunks_exact(block_len); + + for chunk in &mut chunks { + accumulate(acc, chunk, secret, nb_rounds, acc_width); + unsafe { + scramble_acc(acc, &secret[secret_len - STRIPE_LEN..]); + } + } + + /* last partial block */ + debug_assert!(len > STRIPE_LEN); + + let nb_stripes = (len % block_len) / STRIPE_LEN; + + debug_assert!(nb_stripes < (secret_len / SECRET_CONSUME_RATE)); + + accumulate(acc, chunks.remainder(), secret, nb_stripes, acc_width); + + /* last stripe */ + if (len & (STRIPE_LEN - 1)) != 0 { + unsafe { + accumulate512( + acc, + &data[len - STRIPE_LEN..], + &secret[secret_len - STRIPE_LEN - SECRET_LASTACC_START..], + acc_width, + ); + } + } +} + +#[inline(always)] +fn accumulate(acc: &mut [u64], data: &[u8], secret: &[u8], nb_stripes: usize, acc_width: AccWidth) { + for n in 0..nb_stripes { + unsafe { + accumulate512( + acc, + &data[n * STRIPE_LEN..], + &secret[n * SECRET_CONSUME_RATE..], + acc_width, + ); + } + } +} + +#[inline(always)] +const fn _mm_shuffle(z: u32, y: u32, x: u32, w: u32) -> i32 { + ((z << 6) | (y << 4) | (x << 2) | w) as i32 +} + +#[cfg(target_feature = "avx2")] +mod avx2 { + use super::*; + + #[target_feature(enable = "avx2")] + pub(crate) unsafe fn accumulate512( + acc: &mut [u64], + data: &[u8], + keys: &[u8], + acc_width: AccWidth, + ) { + let xacc = acc.as_mut_ptr() as *mut __m256i; + let xdata = data.as_ptr() as *const __m256i; + let xkey = keys.as_ptr() as *const __m256i; + + for i in 0..STRIPE_LEN / mem::size_of::<__m256i>() { + let d = _mm256_loadu_si256(xdata.add(i)); + let k = _mm256_loadu_si256(xkey.add(i)); + let dk = _mm256_xor_si256(d, k); // uint32 dk[8] = {d0+k0, d1+k1, d2+k2, d3+k3, ...} + let mul = _mm256_mul_epu32(dk, _mm256_shuffle_epi32(dk, 0x31)); // uint64 res[4] = {dk0*dk1, dk2*dk3, ...} + + xacc.add(i).write(if acc_width == AccWidth::Acc128Bits { + let dswap = _mm256_shuffle_epi32(d, _mm_shuffle(1, 0, 3, 2)); + let add = _mm256_add_epi64(xacc.add(i).read(), dswap); + _mm256_add_epi64(mul, add) + } else { + let add = _mm256_add_epi64(xacc.add(i).read(), d); + _mm256_add_epi64(mul, add) + }) + } + } + + #[target_feature(enable = "avx2")] + pub unsafe fn scramble_acc(acc: &mut [u64], key: &[u8]) { + let xacc = acc.as_mut_ptr() as *mut __m256i; + let xkey = key.as_ptr() as *const __m256i; + let prime32 = _mm256_set1_epi32(PRIME32_1 as i32); + + for i in 0..STRIPE_LEN / mem::size_of::<__m256i>() { + let data = xacc.add(i).read(); + let shifted = _mm256_srli_epi64(data, 47); + let data = _mm256_xor_si256(data, shifted); + + let k = _mm256_loadu_si256(xkey.add(i)); + let dk = _mm256_xor_si256(data, k); /* U32 dk[4] = {d0+k0, d1+k1, d2+k2, d3+k3} */ + let dk1 = _mm256_mul_epu32(dk, prime32); + + let d2 = _mm256_shuffle_epi32(dk, 0x31); + let dk2 = _mm256_mul_epu32(d2, prime32); + let dk2h = _mm256_slli_epi64(dk2, 32); + + xacc.add(i).write(_mm256_add_epi64(dk1, dk2h)); + } + } +} + +#[cfg(all(target_feature = "sse2", not(target_feature = "avx2")))] +mod sse2 { + use super::*; + + #[target_feature(enable = "sse2")] + #[allow(clippy::cast_ptr_alignment)] + pub(crate) unsafe fn accumulate512( + acc: &mut [u64], + data: &[u8], + keys: &[u8], + acc_width: AccWidth, + ) { + let xacc = acc.as_mut_ptr() as *mut __m128i; + let xdata = data.as_ptr() as *const __m128i; + let xkey = keys.as_ptr() as *const __m128i; + + for i in 0..STRIPE_LEN / mem::size_of::<__m128i>() { + let d = _mm_loadu_si128(xdata.add(i)); + let k = _mm_loadu_si128(xkey.add(i)); + let dk = _mm_xor_si128(d, k); // uint32 dk[4] = {d0+k0, d1+k1, d2+k2, d3+k3} */ + let mul = _mm_mul_epu32(dk, _mm_shuffle_epi32(dk, 0x31)); // uint64 res[4] = {dk0*dk1, dk2*dk3, ...} */ + xacc.add(i).write(if acc_width == AccWidth::Acc128Bits { + let dswap = _mm_shuffle_epi32(d, _mm_shuffle(1, 0, 3, 2)); + let add = _mm_add_epi64(xacc.add(i).read(), dswap); + _mm_add_epi64(mul, add) + } else { + let add = _mm_add_epi64(xacc.add(i).read(), d); + _mm_add_epi64(mul, add) + }) + } + } + + #[target_feature(enable = "sse2")] + #[allow(clippy::cast_ptr_alignment)] + pub unsafe fn scramble_acc(acc: &mut [u64], key: &[u8]) { + let xacc = acc.as_mut_ptr() as *mut __m128i; + let xkey = key.as_ptr() as *const __m128i; + let prime32 = _mm_set1_epi32(PRIME32_1 as i32); + + for i in 0..STRIPE_LEN / mem::size_of::<__m128i>() { + let data = xacc.add(i).read(); + let shifted = _mm_srli_epi64(data, 47); + let data = _mm_xor_si128(data, shifted); + + let k = _mm_loadu_si128(xkey.add(i)); + let dk = _mm_xor_si128(data, k); + + let dk1 = _mm_mul_epu32(dk, prime32); + + let d2 = _mm_shuffle_epi32(dk, 0x31); + let dk2 = _mm_mul_epu32(d2, prime32); + let dk2h = _mm_slli_epi64(dk2, 32); + + xacc.add(i).write(_mm_add_epi64(dk1, dk2h)); + } + } +} + +#[cfg(not(any(target_feature = "avx2", target_feature = "sse2")))] +mod generic { + use super::*; + + #[inline(always)] + pub(crate) unsafe fn accumulate512( + acc: &mut [u64], + data: &[u8], + key: &[u8], + acc_width: AccWidth, + ) { + for i in (0..ACC_NB).step_by(2) { + let in1 = data[8 * i..].read_u64_le(); + let in2 = data[8 * (i + 1)..].read_u64_le(); + let key1 = key[8 * i..].read_u64_le(); + let key2 = key[8 * (i + 1)..].read_u64_le(); + let data_key1 = key1 ^ in1; + let data_key2 = key2 ^ in2; + acc[i] = acc[i].wrapping_add(mul32_to64(data_key1, data_key1 >> 32)); + acc[i + 1] = acc[i + 1].wrapping_add(mul32_to64(data_key2, data_key2 >> 32)); + + if acc_width == AccWidth::Acc128Bits { + acc[i] = acc[i].wrapping_add(in2); + acc[i + 1] = acc[i + 1].wrapping_add(in1); + } else { + acc[i] = acc[i].wrapping_add(in1); + acc[i + 1] = acc[i + 1].wrapping_add(in2); + } + } + } + + #[inline(always)] + fn mul32_to64(a: u64, b: u64) -> u64 { + (a & 0xFFFFFFFF).wrapping_mul(b & 0xFFFFFFFF) + } + + #[inline(always)] + pub unsafe fn scramble_acc(acc: &mut [u64], key: &[u8]) { + for i in 0..ACC_NB { + let key64 = key[8 * i..].read_u64_le(); + let mut acc64 = acc[i]; + acc64 ^= acc64 >> 47; + acc64 ^= key64; + acc64 = acc64.wrapping_mul(u64::from(PRIME32_1)); + acc[i] = acc64; + } + } +} + +cfg_if! { + if #[cfg(target_feature = "avx2")] { + use avx2::{accumulate512, scramble_acc}; + } else if #[cfg(target_feature = "sse2")] { + use sse2::{accumulate512, scramble_acc}; + } else { + use generic::{accumulate512, scramble_acc}; + } +} + +#[inline(always)] +fn merge_accs(acc: &[u64], secret: &[u8], start: u64) -> u64 { + avalanche( + start + .wrapping_add(mix2accs(acc, secret)) + .wrapping_add(mix2accs(&acc[2..], &secret[16..])) + .wrapping_add(mix2accs(&acc[4..], &secret[32..])) + .wrapping_add(mix2accs(&acc[6..], &secret[48..])), + ) +} + +#[inline(always)] +fn mix2accs(acc: &[u64], secret: &[u8]) -> u64 { + mul128_fold64( + acc[0] ^ secret.read_u64_le(), + acc[1] ^ secret[8..].read_u64_le(), + ) +} + +#[inline(always)] +fn mix_16bytes(data: &[u8], key: &[u8], seed: u64) -> u64 { + let ll1 = data.read_u64_le(); + let ll2 = data[8..].read_u64_le(); + + mul128_fold64( + ll1 ^ key.read_u64_le().wrapping_add(seed), + ll2 ^ key[8..].read_u64_le().wrapping_sub(seed), + ) +} + +#[inline(always)] +fn mul128_fold64(ll1: u64, ll2: u64) -> u64 { + let lll = u128::from(ll1).wrapping_mul(u128::from(ll2)); + + (lll as u64) ^ ((lll >> 64) as u64) +} + +#[inline(always)] +fn avalanche(mut h64: u64) -> u64 { + h64 ^= h64 >> 37; + h64 = h64.wrapping_mul(PRIME64_3); + h64 ^ (h64 >> 32) +} + +/* === XXH3 streaming === */ + +const INTERNAL_BUFFER_SIZE: usize = 256; +const INTERNAL_BUFFER_STRIPES: usize = INTERNAL_BUFFER_SIZE / STRIPE_LEN; + +const_assert!(INTERNAL_BUFFER_SIZE >= MIDSIZE_MAX); +const_assert_eq!(INTERNAL_BUFFER_SIZE % STRIPE_LEN, 0); + +#[repr(align(64))] +#[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))] +#[derive(Clone)] +struct State { + acc: Acc, + secret: With, + buf: Vec<u8>, + seed: u64, + total_len: usize, + nb_stripes_so_far: usize, +} + +#[cfg_attr(feature = "serialize", derive(Deserialize, Serialize))] +#[derive(Clone)] +enum With { + Default(Secret), + Custom(Secret), + Ref(Vec<u8>), +} + +impl Deref for With { + type Target = [u8]; + + fn deref(&self) -> &Self::Target { + match self { + With::Default(secret) | With::Custom(secret) => &secret.0[..], + With::Ref(secret) => secret, + } + } +} + +impl Default for State { + fn default() -> Self { + Self::new(0, With::Default(Secret::default())) + } +} + +impl State { + fn new(seed: u64, secret: With) -> Self { + State { + acc: Acc::default(), + secret, + buf: Vec::with_capacity(INTERNAL_BUFFER_SIZE), + seed, + total_len: 0, + nb_stripes_so_far: 0, + } + } + + fn with_seed(seed: u64) -> Self { + Self::new(seed, With::Custom(Secret::with_seed(seed))) + } + + fn with_secret<S: Into<Vec<u8>>>(secret: S) -> State { + let secret = secret.into(); + + debug_assert!(secret.len() >= SECRET_SIZE_MIN); + + Self::new(0, With::Ref(secret)) + } + + #[inline(always)] + fn secret_limit(&self) -> usize { + self.secret.len() - STRIPE_LEN + } + + #[inline(always)] + fn nb_stripes_per_block(&self) -> usize { + self.secret_limit() / SECRET_CONSUME_RATE + } + + #[inline(always)] + fn update(&mut self, mut input: &[u8], acc_width: AccWidth) { + let len = input.len(); + + if len == 0 { + return; + } + + self.total_len += len; + + if self.buf.len() + len <= self.buf.capacity() { + self.buf.extend_from_slice(input); + return; + } + + let nb_stripes_per_block = self.nb_stripes_per_block(); + let secret_limit = self.secret_limit(); + + if !self.buf.is_empty() { + // some data within internal buffer: fill then consume it + let (load, rest) = input.split_at(self.buf.capacity() - self.buf.len()); + self.buf.extend_from_slice(load); + input = rest; + self.nb_stripes_so_far = consume_stripes( + &mut self.acc, + self.nb_stripes_so_far, + nb_stripes_per_block, + &self.buf, + INTERNAL_BUFFER_STRIPES, + &self.secret, + secret_limit, + acc_width, + ); + self.buf.clear(); + } + + // consume input by full buffer quantities + let mut chunks = input.chunks_exact(INTERNAL_BUFFER_SIZE); + + for chunk in &mut chunks { + self.nb_stripes_so_far = consume_stripes( + &mut self.acc, + self.nb_stripes_so_far, + nb_stripes_per_block, + chunk, + INTERNAL_BUFFER_STRIPES, + &self.secret, + secret_limit, + acc_width, + ); + } + + // some remaining input data : buffer it + self.buf.extend_from_slice(chunks.remainder()) + } + + #[inline(always)] + fn digest_long(&self, acc_width: AccWidth) -> Acc { + let mut acc = self.acc.clone(); + let secret_limit = self.secret_limit(); + + if self.buf.len() >= STRIPE_LEN { + // digest locally, state remains unaltered, and can continue ingesting more data afterwards + let total_nb_stripes = self.buf.len() / STRIPE_LEN; + let _nb_stripes_so_far = consume_stripes( + &mut acc, + self.nb_stripes_so_far, + self.nb_stripes_per_block(), + &self.buf, + total_nb_stripes, + &self.secret, + secret_limit, + acc_width, + ); + if (self.buf.len() % STRIPE_LEN) != 0 { + unsafe { + accumulate512( + &mut acc, + &self.buf[self.buf.len() - STRIPE_LEN..], + &self.secret[secret_limit - SECRET_LASTACC_START..], + acc_width, + ); + } + } + } else if !self.buf.is_empty() { + // one last stripe + let mut last_stripe = [0u8; STRIPE_LEN]; + let catchup_size = STRIPE_LEN - self.buf.len(); + + last_stripe[..catchup_size].copy_from_slice(unsafe { + slice::from_raw_parts( + self.buf.as_ptr().add(self.buf.capacity() - catchup_size), + catchup_size, + ) + }); + last_stripe[catchup_size..].copy_from_slice(&self.buf); + + unsafe { + accumulate512( + &mut acc, + &last_stripe[..], + &self.secret[secret_limit - SECRET_LASTACC_START..], + acc_width, + ); + } + } + + acc + } + + #[inline(always)] + fn digest64(&self) -> u64 { + if self.total_len > MIDSIZE_MAX { + let acc = self.digest_long(AccWidth::Acc64Bits); + + merge_accs( + &acc, + &self.secret[SECRET_MERGEACCS_START..], + (self.total_len as u64).wrapping_mul(PRIME64_1), + ) + } else if self.seed != 0 { + hash64_with_seed(&self.buf, self.seed) + } else { + hash64_with_secret(&self.buf, &self.secret[..self.secret_limit() + STRIPE_LEN]) + } + } + + #[inline(always)] + fn digest128(&self) -> u128 { + let secret_limit = self.secret_limit(); + + if self.total_len > MIDSIZE_MAX { + let acc = self.digest_long(AccWidth::Acc128Bits); + + debug_assert!(secret_limit + STRIPE_LEN >= ACC_SIZE + SECRET_MERGEACCS_START); + + let total_len = self.total_len as u64; + + let low64 = merge_accs( + &acc, + &self.secret[SECRET_MERGEACCS_START..], + total_len.wrapping_mul(PRIME64_1), + ); + let high64 = merge_accs( + &acc, + &self.secret[secret_limit + STRIPE_LEN - ACC_SIZE - SECRET_MERGEACCS_START..], + !total_len.wrapping_mul(PRIME64_2), + ); + + u128::from(low64) + (u128::from(high64) << 64) + } else if self.seed != 0 { + hash128_with_seed(&self.buf, self.seed) + } else { + hash128_with_secret(&self.buf, &self.secret[..secret_limit + STRIPE_LEN]) + } + } +} + +#[inline(always)] +#[allow(clippy::too_many_arguments)] +fn consume_stripes( + acc: &mut [u64], + nb_stripes_so_far: usize, + nb_stripes_per_block: usize, + data: &[u8], + total_stripes: usize, + secret: &[u8], + secret_limit: usize, + acc_width: AccWidth, +) -> usize { + debug_assert!(nb_stripes_so_far < nb_stripes_per_block); + + if nb_stripes_per_block - nb_stripes_so_far <= total_stripes { + let nb_stripes = nb_stripes_per_block - nb_stripes_so_far; + + accumulate( + acc, + data, + &secret[nb_stripes_so_far * SECRET_CONSUME_RATE..], + nb_stripes, + acc_width, + ); + unsafe { + scramble_acc(acc, &secret[secret_limit..]); + } + accumulate( + acc, + &data[nb_stripes * STRIPE_LEN..], + secret, + total_stripes - nb_stripes, + acc_width, + ); + + total_stripes - nb_stripes + } else { + accumulate( + acc, + data, + &secret[nb_stripes_so_far * SECRET_CONSUME_RATE..], + total_stripes, + acc_width, + ); + + nb_stripes_so_far + total_stripes + } +} + +/* ========================================== + * XXH3 128 bits (=> XXH128) + * ========================================== */ + +#[inline(always)] +fn hash_len_0to16_128bits(data: &[u8], len: usize, secret: &[u8], seed: u64) -> u128 { + debug_assert!(len <= 16); + + if len > 8 { + hash_len_9to16_128bits(data, len, secret, seed) + } else if len >= 4 { + hash_len_4to8_128bits(data, len, secret, seed) + } else if len > 0 { + hash_len_1to3_128bits(data, len, secret, seed) + } else { + 0 + } +} + +#[inline(always)] +fn hash_len_1to3_128bits(data: &[u8], len: usize, key: &[u8], seed: u64) -> u128 { + debug_assert!((1..=3).contains(&len)); + + let c1 = u32::from(data[0]); + let c2 = u32::from(data[len >> 1]); + let c3 = u32::from(data[len - 1]); + let combinedl = c1 + (c2 << 8) + (c3 << 16) + ((len as u32) << 24); + let combinedh = combinedl.swap_bytes(); + let keyedl = u64::from(combinedl) ^ u64::from(key.read_u32_le()).wrapping_add(seed); + let keyedh = u64::from(combinedh) ^ u64::from(key[4..].read_u32_le()).wrapping_sub(seed); + let mixedl = keyedl.wrapping_mul(PRIME64_1); + let mixedh = keyedh.wrapping_mul(PRIME64_2); + + u128::from(avalanche(mixedl)) + (u128::from(avalanche(mixedh)) << 64) +} + +#[inline(always)] +fn hash_len_4to8_128bits(data: &[u8], len: usize, key: &[u8], seed: u64) -> u128 { + debug_assert!((4..=8).contains(&len)); + + let in1 = u64::from(data.read_u32_le()); + let in2 = u64::from(data[len - 4..].read_u32_le()); + let in64l = in1.wrapping_add(in2 << 32); + let in64h = in64l.swap_bytes(); + let keyedl = in64l ^ key.read_u64_le().wrapping_add(seed); + let keyedh = in64h ^ key[8..].read_u64_le().wrapping_sub(seed); + let mix64l1 = + (len as u64).wrapping_add((keyedl ^ (keyedl >> 51)).wrapping_mul(u64::from(PRIME32_1))); + let mix64l2 = (mix64l1 ^ (mix64l1 >> 47)).wrapping_mul(PRIME64_2); + let mix64h1 = (keyedh ^ (keyedh >> 47)) + .wrapping_mul(PRIME64_1) + .wrapping_sub(len as u64); + let mix64h2 = (mix64h1 ^ (mix64h1 >> 43)).wrapping_mul(PRIME64_4); + + u128::from(avalanche(mix64l2)) + (u128::from(avalanche(mix64h2)) << 64) +} + +#[inline(always)] +fn hash_len_9to16_128bits(data: &[u8], len: usize, key: &[u8], seed: u64) -> u128 { + debug_assert!((9..=16).contains(&len)); + + let ll1 = data.read_u64_le() ^ key.read_u64_le().wrapping_add(seed); + let ll2 = data[len - 8..].read_u64_le() ^ key[8..].read_u64_le().wrapping_sub(seed); + let inlow = ll1 ^ ll2; + + let m128 = u128::from(inlow).wrapping_mul(u128::from(PRIME64_1)); + let high64 = ((m128 >> 64) as u64).wrapping_add(ll2.wrapping_mul(PRIME64_1)); + let low64 = (m128 as u64) ^ (high64 >> 32); + + let h128 = u128::from(low64).wrapping_mul(u128::from(PRIME64_2)); + let high64 = ((h128 >> 64) as u64).wrapping_add(high64.wrapping_mul(PRIME64_2)); + let low64 = h128 as u64; + + u128::from(avalanche(low64)) + (u128::from(avalanche(high64)) << 64) +} + +#[inline(always)] +fn hash_len_17to128_128bits(data: &[u8], len: usize, secret: &[u8], seed: u64) -> u128 { + debug_assert!((17..=128).contains(&len)); + debug_assert!(secret.len() >= SECRET_SIZE_MIN); + + let mut acc1 = PRIME64_1.wrapping_mul(len as u64); + let mut acc2 = 0u64; + + if len > 32 { + if len > 64 { + if len > 96 { + acc1 = acc1.wrapping_add(mix_16bytes(&data[48..], &secret[96..], seed)); + acc2 = acc2.wrapping_add(mix_16bytes(&data[len - 64..], &secret[112..], seed)); + } + acc1 = acc1.wrapping_add(mix_16bytes(&data[32..], &secret[64..], seed)); + acc2 = acc2.wrapping_add(mix_16bytes(&data[len - 48..], &secret[80..], seed)); + } + + acc1 = acc1.wrapping_add(mix_16bytes(&data[16..], &secret[32..], seed)); + acc2 = acc2.wrapping_add(mix_16bytes(&data[len - 32..], &secret[48..], seed)); + } + + acc1 = acc1.wrapping_add(mix_16bytes(data, secret, seed)); + acc2 = acc2.wrapping_add(mix_16bytes(&data[len - 16..], &secret[16..], seed)); + + let low64 = acc1.wrapping_add(acc2); + let high64 = acc1 + .wrapping_mul(PRIME64_1) + .wrapping_add(acc2.wrapping_mul(PRIME64_4)) + .wrapping_add((len as u64).wrapping_sub(seed).wrapping_mul(PRIME64_2)); + + u128::from(avalanche(low64)) + (u128::from(0u64.wrapping_sub(avalanche(high64))) << 64) +} + +#[inline(always)] +fn hash_len_129to240_128bits(data: &[u8], len: usize, secret: &[u8], seed: u64) -> u128 { + debug_assert!((129..=MIDSIZE_MAX).contains(&len)); + debug_assert!(secret.len() >= SECRET_SIZE_MIN); + + let acc1 = (len as u64).wrapping_mul(PRIME64_1); + let acc2 = 0u64; + + let (acc1, acc2) = (0..4).fold((acc1, acc2), |(acc1, acc2), i| { + ( + acc1.wrapping_add(mix_16bytes(&data[32 * i..], &secret[32 * i..], seed)), + acc2.wrapping_add(mix_16bytes( + &data[32 * i + 16..], + &secret[32 * i + 16..], + 0u64.wrapping_sub(seed), + )), + ) + }); + let acc1 = avalanche(acc1); + let acc2 = avalanche(acc2); + + let nb_rounds = len / 32; + debug_assert!(nb_rounds >= 4); + + let (acc1, acc2) = (4..nb_rounds).fold((acc1, acc2), |(acc1, acc2), i| { + ( + acc1.wrapping_add(mix_16bytes( + &data[32 * i..], + &secret[32 * (i - 4) + MIDSIZE_STARTOFFSET..], + seed, + )), + acc2.wrapping_add(mix_16bytes( + &data[32 * i + 16..], + &secret[32 * (i - 4) + 16 + MIDSIZE_STARTOFFSET..], + 0u64.wrapping_sub(seed), + )), + ) + }); + + // last bytes + let acc1 = acc1.wrapping_add(mix_16bytes( + &data[len - 16..], + &secret[SECRET_SIZE_MIN - MIDSIZE_LASTOFFSET..], + seed, + )); + let acc2 = acc2.wrapping_add(mix_16bytes( + &data[len - 32..], + &secret[SECRET_SIZE_MIN - MIDSIZE_LASTOFFSET - 16..], + 0u64.wrapping_sub(seed), + )); + + let low64 = acc1.wrapping_add(acc2); + let high64 = acc1 + .wrapping_mul(PRIME64_1) + .wrapping_add(acc2.wrapping_mul(PRIME64_4)) + .wrapping_add((len as u64).wrapping_sub(seed).wrapping_mul(PRIME64_2)); + + u128::from(avalanche(low64)) + (u128::from(0u64.wrapping_sub(avalanche(high64))) << 64) +} + +#[inline] +fn hash_long_128bits_with_default_secret(data: &[u8], len: usize) -> u128 { + hash_long_128bits_internal(data, len, &SECRET) +} + +#[inline] +fn hash_long_128bits_with_secret(data: &[u8], len: usize, secret: &[u8]) -> u128 { + hash_long_128bits_internal(data, len, secret) +} + +#[inline] +fn hash_long_128bits_with_seed(data: &[u8], len: usize, seed: u64) -> u128 { + if seed == 0 { + hash_long_128bits_with_default_secret(data, len) + } else { + let secret = Secret::with_seed(seed); + + hash_long_128bits_internal(data, len, &secret) + } +} + +#[inline(always)] +fn hash_long_128bits_internal(data: &[u8], len: usize, secret: &[u8]) -> u128 { + let mut acc = Acc::default(); + + hash_long_internal_loop(&mut acc, data, len, secret, AccWidth::Acc128Bits); + + debug_assert!(secret.len() >= acc.len() + SECRET_MERGEACCS_START); + + let low64 = merge_accs( + &acc, + &secret[SECRET_MERGEACCS_START..], + (len as u64).wrapping_mul(PRIME64_1), + ); + let high64 = merge_accs( + &acc, + &secret[secret.len() - ACC_SIZE - SECRET_MERGEACCS_START..], + !(len as u64).wrapping_mul(PRIME64_2), + ); + + u128::from(low64) + (u128::from(high64) << 64) +} + +/* === XXH3 128-bit streaming === */ + +/* all the functions are actually the same as for 64-bit streaming variant, +just the reset one is different (different initial acc values for 0,5,6,7), +and near the end of the digest function */ + +#[cfg(test)] +mod tests { + use alloc::vec; + + use super::*; + + const PRIME: u64 = 2654435761; + const PRIME64: u64 = 11400714785074694797; + const SANITY_BUFFER_SIZE: usize = 2243; + + fn sanity_buffer() -> [u8; SANITY_BUFFER_SIZE] { + let mut buf = [0; SANITY_BUFFER_SIZE]; + let mut byte_gen: u64 = PRIME; + + for b in buf.iter_mut() { + *b = (byte_gen >> 56) as u8; + byte_gen = byte_gen.wrapping_mul(PRIME64); + } + + buf + } + + #[test] + fn hash_64bits_sanity_check() { + let buf = sanity_buffer(); + + let test_cases = vec![ + (&[][..], 0, 0), /* zero-length hash is always 0 */ + (&[][..], PRIME64, 0), + (&buf[..1], 0, 0x7198D737CFE7F386), /* 1 - 3 */ + (&buf[..1], PRIME64, 0xB70252DB7161C2BD), /* 1 - 3 */ + (&buf[..6], 0, 0x22CBF5F3E1F6257C), /* 4 - 8 */ + (&buf[..6], PRIME64, 0x6398631C12AB94CE), /* 4 - 8 */ + (&buf[..12], 0, 0xD5361CCEEBB5A0CC), /* 9 - 16 */ + (&buf[..12], PRIME64, 0xC4C125E75A808C3D), /* 9 - 16 */ + (&buf[..24], 0, 0x46796F3F78B20F6B), /* 17 - 32 */ + (&buf[..24], PRIME64, 0x60171A7CD0A44C10), /* 17 - 32 */ + (&buf[..48], 0, 0xD8D4D3590D136E11), /* 33 - 64 */ + (&buf[..48], PRIME64, 0x05441F2AEC2A1296), /* 33 - 64 */ + (&buf[..80], 0, 0xA1DC8ADB3145B86A), /* 65 - 96 */ + (&buf[..80], PRIME64, 0xC9D55256965B7093), /* 65 - 96 */ + (&buf[..112], 0, 0xE43E5717A61D3759), /* 97 -128 */ + (&buf[..112], PRIME64, 0x5A5F89A3FECE44A5), /* 97 -128 */ + (&buf[..195], 0, 0x6F747739CBAC22A5), /* 129-240 */ + (&buf[..195], PRIME64, 0x33368E23C7F95810), /* 129-240 */ + (&buf[..403], 0, 0x4834389B15D981E8), /* one block, last stripe is overlapping */ + (&buf[..403], PRIME64, 0x85CE5DFFC7B07C87), /* one block, last stripe is overlapping */ + (&buf[..512], 0, 0x6A1B982631F059A8), /* one block, finishing at stripe boundary */ + (&buf[..512], PRIME64, 0x10086868CF0ADC99), /* one block, finishing at stripe boundary */ + (&buf[..2048], 0, 0xEFEFD4449323CDD4), /* 2 blocks, finishing at block boundary */ + (&buf[..2048], PRIME64, 0x01C85E405ECA3F6E), /* 2 blocks, finishing at block boundary */ + (&buf[..2240], 0, 0x998C0437486672C7), /* 3 blocks, finishing at stripe boundary */ + (&buf[..2240], PRIME64, 0x4ED38056B87ABC7F), /* 3 blocks, finishing at stripe boundary */ + (&buf[..2243], 0, 0xA559D20581D742D3), /* 3 blocks, last stripe is overlapping */ + (&buf[..2243], PRIME64, 0x96E051AB57F21FC8), /* 3 blocks, last stripe is overlapping */ + ]; + + for (buf, seed, result) in test_cases { + { + let hash = hash64_with_seed(buf, seed); + + assert_eq!( + hash, + result, + "hash64_with_seed(&buf[..{}], seed={}) failed, got 0x{:X}, expected 0x{:X}", + buf.len(), + seed, + hash, + result + ); + } + + // streaming API test + + // single ingestio + { + let mut hasher = Hash64::with_seed(seed); + hasher.write(buf); + let hash = hasher.finish(); + + assert_eq!( + hash, + result, + "Hash64::update(&buf[..{}]) with seed={} failed, got 0x{:X}, expected 0x{:X}", + buf.len(), + seed, + hash, + result + ); + } + + if buf.len() > 3 { + // 2 ingestions + let mut hasher = Hash64::with_seed(seed); + hasher.write(&buf[..3]); + hasher.write(&buf[3..]); + let hash = hasher.finish(); + + assert_eq!( + hash, + result, + "Hash64::update(&buf[..3], &buf[3..{}]) with seed={} failed, got 0x{:X}, expected 0x{:X}", + buf.len(), + seed, + hash, + result + ); + } + + // byte by byte ingestion + { + let mut hasher = Hash64::with_seed(seed); + + for chunk in buf.chunks(1) { + hasher.write(chunk); + } + + let hash = hasher.finish(); + + assert_eq!( + hash, + result, + "Hash64::update(&buf[..{}].chunks(1)) with seed={} failed, got 0x{:X}, expected 0x{:X}", + buf.len(), + seed, + hash, + result + ); + } + } + } + + #[test] + fn hash_64bits_with_secret_sanity_check() { + let buf = sanity_buffer(); + let secret = &buf[7..7 + SECRET_SIZE_MIN + 11]; + + let test_cases = vec![ + (&[][..], secret, 0), /* zero-length hash is always 0 */ + (&buf[..1], secret, 0x7F69735D618DB3F0), /* 1 - 3 */ + (&buf[..6], secret, 0xBFCC7CB1B3554DCE), /* 6 - 8 */ + (&buf[..12], secret, 0x8C50DC90AC9206FC), /* 9 - 16 */ + (&buf[..24], secret, 0x1CD2C2EE9B9A0928), /* 17 - 32 */ + (&buf[..48], secret, 0xA785256D9D65D514), /* 33 - 64 */ + (&buf[..80], secret, 0x6F3053360D21BBB7), /* 65 - 96 */ + (&buf[..112], secret, 0x560E82D25684154C), /* 97 -128 */ + (&buf[..195], secret, 0xBA5BDDBC5A767B11), /* 129-240 */ + (&buf[..403], secret, 0xFC3911BBA656DB58), /* one block, last stripe is overlapping */ + (&buf[..512], secret, 0x306137DD875741F1), /* one block, finishing at stripe boundary */ + (&buf[..2048], secret, 0x2836B83880AD3C0C), /* > one block, at least one scrambling */ + (&buf[..2243], secret, 0x3446E248A00CB44A), /* > one block, at least one scrambling, last stripe unaligned */ + ]; + + for (buf, secret, result) in test_cases { + { + let hash = hash64_with_secret(buf, secret); + + assert_eq!( + hash, + result, + "hash64_with_secret(&buf[..{}], secret) failed, got 0x{:X}, expected 0x{:X}", + buf.len(), + hash, + result + ); + } + + // streaming API test + + // single ingestio + { + let mut hasher = Hash64::with_secret(secret); + hasher.write(buf); + let hash = hasher.finish(); + + assert_eq!( + hash, + result, + "Hash64::update(&buf[..{}]) with secret failed, got 0x{:X}, expected 0x{:X}", + buf.len(), + hash, + result + ); + } + + // byte by byte ingestion + { + let mut hasher = Hash64::with_secret(secret); + + for chunk in buf.chunks(1) { + hasher.write(chunk); + } + + let hash = hasher.finish(); + + assert_eq!( + hash, + result, + "Hash64::update(&buf[..{}].chunks(1)) with secret failed, got 0x{:X}, expected 0x{:X}", + buf.len(), + hash, + result + ); + } + } + } + + #[test] + fn hash_128bits_sanity_check() { + let buf = sanity_buffer(); + + let test_cases = vec![ + (&[][..], 0, 0u64, 0u64), /* zero-length hash is { seed, -seed } by default */ + (&[][..], PRIME, 0, 0), + (&buf[..1], 0, 0x7198D737CFE7F386, 0x3EE70EA338F3F1E8), /* 1-3 */ + (&buf[..1], PRIME, 0x8E05996EC27C0F46, 0x90DFC659A8BDCC0C), /* 1-3 */ + (&buf[..6], 0, 0x22CBF5F3E1F6257C, 0xD4E6C2B94FFC3BFA), /* 4-8 */ + (&buf[..6], PRIME, 0x97B28D3079F8541F, 0xEFC0B954298E6555), /* 4-8 */ + (&buf[..12], 0, 0x0E0CD01F05AC2F0D, 0x2B55C95951070D4B), /* 9-16 */ + (&buf[..12], PRIME, 0xA9DE561CA04CDF37, 0x609E31FDC00A43C9), /* 9-16 */ + (&buf[..24], 0, 0x46796F3F78B20F6B, 0x58FF55C3926C13FA), /* 17-32 */ + (&buf[..24], PRIME, 0x30D5C4E9EB415C55, 0x8868344B3A4645D0), /* 17-32 */ + (&buf[..48], 0, 0xD8D4D3590D136E11, 0x5527A42843020A62), /* 33-64 */ + (&buf[..48], PRIME, 0x1D8834E1A5407A1C, 0x44375B9FB060F541), /* 33-64 */ + (&buf[..81], 0, 0x4B9B448ED8DFD3DD, 0xE805A6D1A43D70E5), /* 65-96 */ + (&buf[..81], PRIME, 0xD2D6B075945617BA, 0xE58BE5736F6E7550), /* 65-96 */ + (&buf[..103], 0, 0xC5A9F97B29EFA44E, 0x254DB7BE881E125C), /* 97-128 */ + (&buf[..103], PRIME, 0xFA2086367CDB177F, 0x0AEDEA68C988B0C0), /* 97-128 */ + (&buf[..192], 0, 0xC3142FDDD9102A3F, 0x06F1747E77185F97), /* 129-240 */ + (&buf[..192], PRIME, 0xA89F07B35987540F, 0xCF1B35FB2C557F54), /* 129-240 */ + (&buf[..222], 0, 0xA61AC4EB3295F86B, 0x33FA7B7598C28A07), /* 129-240 */ + (&buf[..222], PRIME, 0x54135EB88AD8B75E, 0xBC45CE6AE50BCF53), /* 129-240 */ + (&buf[..403], 0, 0xB0C48E6D18E9D084, 0xB16FC17E992FF45D), /* one block, last stripe is overlapping */ + (&buf[..403], PRIME64, 0x0A1D320C9520871D, 0xCE11CB376EC93252), /* one block, last stripe is overlapping */ + (&buf[..512], 0, 0xA03428558AC97327, 0x4ECF51281BA406F7), /* one block, finishing at stripe boundary */ + (&buf[..512], PRIME64, 0xAF67A482D6C893F2, 0x1382D92F25B84D90), /* one block, finishing at stripe boundary */ + (&buf[..2048], 0, 0x21901B416B3B9863, 0x212AF8E6326F01E0), /* two blocks, finishing at block boundary */ + (&buf[..2048], PRIME, 0xBDBB2282577DADEC, 0xF78CDDC2C9A9A692), /* two blocks, finishing at block boundary */ + (&buf[..2240], 0, 0x00AD52FA9385B6FE, 0xC705BAD3356CE302), /* two blocks, ends at stripe boundary */ + (&buf[..2240], PRIME, 0x10FD0072EC68BFAA, 0xE1312F3458817F15), /* two blocks, ends at stripe boundary */ + (&buf[..2237], 0, 0x970C91411533862C, 0x4BBD06FF7BFF0AB1), /* two blocks, ends at stripe boundary */ + (&buf[..2237], PRIME, 0xD80282846D814431, 0x14EBB157B84D9785), /* two blocks, ends at stripe boundary */ + ]; + + for (buf, seed, lo, hi) in test_cases { + let result = u128::from(lo) + (u128::from(hi) << 64); + + { + let hash = hash128_with_seed(buf, seed); + + assert_eq!( + hash, + result, + "hash128_with_seed(&buf[..{}], seed={}) failed, got 0x{:X}, expected 0x{:X}", + buf.len(), + seed, + hash, + result + ); + } + + // streaming API test + + // single ingestio + { + let mut hasher = Hash128::with_seed(seed); + hasher.write(buf); + let hash = hasher.finish_ext(); + + assert_eq!( + hash, + result, + "Hash128::update(&buf[..{}]) with seed={} failed, got 0x{:X}, expected 0x{:X}", + buf.len(), + seed, + hash, + result + ); + } + + if buf.len() > 3 { + // 2 ingestions + let mut hasher = Hash128::with_seed(seed); + hasher.write(&buf[..3]); + hasher.write(&buf[3..]); + let hash = hasher.finish_ext(); + + assert_eq!( + hash, + result, + "Hash64::update(&buf[..3], &buf[3..{}]) with seed={} failed, got 0x{:X}, expected 0x{:X}", + buf.len(), + seed, + hash, + result + ); + } + + // byte by byte ingestion + { + let mut hasher = Hash128::with_seed(seed); + + for chunk in buf.chunks(1) { + hasher.write(chunk); + } + + let hash = hasher.finish_ext(); + + assert_eq!( + hash, + result, + "Hash64::update(&buf[..{}].chunks(1)) with seed={} failed, got 0x{:X}, expected 0x{:X}", + buf.len(), + seed, + hash, + result + ); + } + } + } +} |