diff options
Diffstat (limited to 'third_party/rust/rand/src/seq')
-rw-r--r-- | third_party/rust/rand/src/seq/index.rs | 678 | ||||
-rw-r--r-- | third_party/rust/rand/src/seq/mod.rs | 1356 |
2 files changed, 2034 insertions, 0 deletions
diff --git a/third_party/rust/rand/src/seq/index.rs b/third_party/rust/rand/src/seq/index.rs new file mode 100644 index 0000000000..b38e4649d1 --- /dev/null +++ b/third_party/rust/rand/src/seq/index.rs @@ -0,0 +1,678 @@ +// Copyright 2018 Developers of the Rand project. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! Low-level API for sampling indices + +#[cfg(feature = "alloc")] use core::slice; + +#[cfg(feature = "alloc")] use alloc::vec::{self, Vec}; +// BTreeMap is not as fast in tests, but better than nothing. +#[cfg(all(feature = "alloc", not(feature = "std")))] +use alloc::collections::BTreeSet; +#[cfg(feature = "std")] use std::collections::HashSet; + +#[cfg(feature = "std")] +use crate::distributions::WeightedError; + +#[cfg(feature = "alloc")] +use crate::{Rng, distributions::{uniform::SampleUniform, Distribution, Uniform}}; + +#[cfg(feature = "serde1")] +use serde::{Serialize, Deserialize}; + +/// A vector of indices. +/// +/// Multiple internal representations are possible. +#[derive(Clone, Debug)] +#[cfg_attr(feature = "serde1", derive(Serialize, Deserialize))] +pub enum IndexVec { + #[doc(hidden)] + U32(Vec<u32>), + #[doc(hidden)] + USize(Vec<usize>), +} + +impl IndexVec { + /// Returns the number of indices + #[inline] + pub fn len(&self) -> usize { + match *self { + IndexVec::U32(ref v) => v.len(), + IndexVec::USize(ref v) => v.len(), + } + } + + /// Returns `true` if the length is 0. + #[inline] + pub fn is_empty(&self) -> bool { + match *self { + IndexVec::U32(ref v) => v.is_empty(), + IndexVec::USize(ref v) => v.is_empty(), + } + } + + /// Return the value at the given `index`. + /// + /// (Note: we cannot implement [`std::ops::Index`] because of lifetime + /// restrictions.) + #[inline] + pub fn index(&self, index: usize) -> usize { + match *self { + IndexVec::U32(ref v) => v[index] as usize, + IndexVec::USize(ref v) => v[index], + } + } + + /// Return result as a `Vec<usize>`. Conversion may or may not be trivial. + #[inline] + pub fn into_vec(self) -> Vec<usize> { + match self { + IndexVec::U32(v) => v.into_iter().map(|i| i as usize).collect(), + IndexVec::USize(v) => v, + } + } + + /// Iterate over the indices as a sequence of `usize` values + #[inline] + pub fn iter(&self) -> IndexVecIter<'_> { + match *self { + IndexVec::U32(ref v) => IndexVecIter::U32(v.iter()), + IndexVec::USize(ref v) => IndexVecIter::USize(v.iter()), + } + } +} + +impl IntoIterator for IndexVec { + type Item = usize; + type IntoIter = IndexVecIntoIter; + + /// Convert into an iterator over the indices as a sequence of `usize` values + #[inline] + fn into_iter(self) -> IndexVecIntoIter { + match self { + IndexVec::U32(v) => IndexVecIntoIter::U32(v.into_iter()), + IndexVec::USize(v) => IndexVecIntoIter::USize(v.into_iter()), + } + } +} + +impl PartialEq for IndexVec { + fn eq(&self, other: &IndexVec) -> bool { + use self::IndexVec::*; + match (self, other) { + (&U32(ref v1), &U32(ref v2)) => v1 == v2, + (&USize(ref v1), &USize(ref v2)) => v1 == v2, + (&U32(ref v1), &USize(ref v2)) => { + (v1.len() == v2.len()) && (v1.iter().zip(v2.iter()).all(|(x, y)| *x as usize == *y)) + } + (&USize(ref v1), &U32(ref v2)) => { + (v1.len() == v2.len()) && (v1.iter().zip(v2.iter()).all(|(x, y)| *x == *y as usize)) + } + } + } +} + +impl From<Vec<u32>> for IndexVec { + #[inline] + fn from(v: Vec<u32>) -> Self { + IndexVec::U32(v) + } +} + +impl From<Vec<usize>> for IndexVec { + #[inline] + fn from(v: Vec<usize>) -> Self { + IndexVec::USize(v) + } +} + +/// Return type of `IndexVec::iter`. +#[derive(Debug)] +pub enum IndexVecIter<'a> { + #[doc(hidden)] + U32(slice::Iter<'a, u32>), + #[doc(hidden)] + USize(slice::Iter<'a, usize>), +} + +impl<'a> Iterator for IndexVecIter<'a> { + type Item = usize; + + #[inline] + fn next(&mut self) -> Option<usize> { + use self::IndexVecIter::*; + match *self { + U32(ref mut iter) => iter.next().map(|i| *i as usize), + USize(ref mut iter) => iter.next().cloned(), + } + } + + #[inline] + fn size_hint(&self) -> (usize, Option<usize>) { + match *self { + IndexVecIter::U32(ref v) => v.size_hint(), + IndexVecIter::USize(ref v) => v.size_hint(), + } + } +} + +impl<'a> ExactSizeIterator for IndexVecIter<'a> {} + +/// Return type of `IndexVec::into_iter`. +#[derive(Clone, Debug)] +pub enum IndexVecIntoIter { + #[doc(hidden)] + U32(vec::IntoIter<u32>), + #[doc(hidden)] + USize(vec::IntoIter<usize>), +} + +impl Iterator for IndexVecIntoIter { + type Item = usize; + + #[inline] + fn next(&mut self) -> Option<Self::Item> { + use self::IndexVecIntoIter::*; + match *self { + U32(ref mut v) => v.next().map(|i| i as usize), + USize(ref mut v) => v.next(), + } + } + + #[inline] + fn size_hint(&self) -> (usize, Option<usize>) { + use self::IndexVecIntoIter::*; + match *self { + U32(ref v) => v.size_hint(), + USize(ref v) => v.size_hint(), + } + } +} + +impl ExactSizeIterator for IndexVecIntoIter {} + + +/// Randomly sample exactly `amount` distinct indices from `0..length`, and +/// return them in random order (fully shuffled). +/// +/// This method is used internally by the slice sampling methods, but it can +/// sometimes be useful to have the indices themselves so this is provided as +/// an alternative. +/// +/// The implementation used is not specified; we automatically select the +/// fastest available algorithm for the `length` and `amount` parameters +/// (based on detailed profiling on an Intel Haswell CPU). Roughly speaking, +/// complexity is `O(amount)`, except that when `amount` is small, performance +/// is closer to `O(amount^2)`, and when `length` is close to `amount` then +/// `O(length)`. +/// +/// Note that performance is significantly better over `u32` indices than over +/// `u64` indices. Because of this we hide the underlying type behind an +/// abstraction, `IndexVec`. +/// +/// If an allocation-free `no_std` function is required, it is suggested +/// to adapt the internal `sample_floyd` implementation. +/// +/// Panics if `amount > length`. +pub fn sample<R>(rng: &mut R, length: usize, amount: usize) -> IndexVec +where R: Rng + ?Sized { + if amount > length { + panic!("`amount` of samples must be less than or equal to `length`"); + } + if length > (::core::u32::MAX as usize) { + // We never want to use inplace here, but could use floyd's alg + // Lazy version: always use the cache alg. + return sample_rejection(rng, length, amount); + } + let amount = amount as u32; + let length = length as u32; + + // Choice of algorithm here depends on both length and amount. See: + // https://github.com/rust-random/rand/pull/479 + // We do some calculations with f32. Accuracy is not very important. + + if amount < 163 { + const C: [[f32; 2]; 2] = [[1.6, 8.0 / 45.0], [10.0, 70.0 / 9.0]]; + let j = if length < 500_000 { 0 } else { 1 }; + let amount_fp = amount as f32; + let m4 = C[0][j] * amount_fp; + // Short-cut: when amount < 12, floyd's is always faster + if amount > 11 && (length as f32) < (C[1][j] + m4) * amount_fp { + sample_inplace(rng, length, amount) + } else { + sample_floyd(rng, length, amount) + } + } else { + const C: [f32; 2] = [270.0, 330.0 / 9.0]; + let j = if length < 500_000 { 0 } else { 1 }; + if (length as f32) < C[j] * (amount as f32) { + sample_inplace(rng, length, amount) + } else { + sample_rejection(rng, length, amount) + } + } +} + +/// Randomly sample exactly `amount` distinct indices from `0..length`, and +/// return them in an arbitrary order (there is no guarantee of shuffling or +/// ordering). The weights are to be provided by the input function `weights`, +/// which will be called once for each index. +/// +/// This method is used internally by the slice sampling methods, but it can +/// sometimes be useful to have the indices themselves so this is provided as +/// an alternative. +/// +/// This implementation uses `O(length + amount)` space and `O(length)` time +/// if the "nightly" feature is enabled, or `O(length)` space and +/// `O(length + amount * log length)` time otherwise. +/// +/// Panics if `amount > length`. +#[cfg(feature = "std")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "std")))] +pub fn sample_weighted<R, F, X>( + rng: &mut R, length: usize, weight: F, amount: usize, +) -> Result<IndexVec, WeightedError> +where + R: Rng + ?Sized, + F: Fn(usize) -> X, + X: Into<f64>, +{ + if length > (core::u32::MAX as usize) { + sample_efraimidis_spirakis(rng, length, weight, amount) + } else { + assert!(amount <= core::u32::MAX as usize); + let amount = amount as u32; + let length = length as u32; + sample_efraimidis_spirakis(rng, length, weight, amount) + } +} + + +/// Randomly sample exactly `amount` distinct indices from `0..length`, and +/// return them in an arbitrary order (there is no guarantee of shuffling or +/// ordering). The weights are to be provided by the input function `weights`, +/// which will be called once for each index. +/// +/// This implementation uses the algorithm described by Efraimidis and Spirakis +/// in this paper: https://doi.org/10.1016/j.ipl.2005.11.003 +/// It uses `O(length + amount)` space and `O(length)` time if the +/// "nightly" feature is enabled, or `O(length)` space and `O(length +/// + amount * log length)` time otherwise. +/// +/// Panics if `amount > length`. +#[cfg(feature = "std")] +fn sample_efraimidis_spirakis<R, F, X, N>( + rng: &mut R, length: N, weight: F, amount: N, +) -> Result<IndexVec, WeightedError> +where + R: Rng + ?Sized, + F: Fn(usize) -> X, + X: Into<f64>, + N: UInt, + IndexVec: From<Vec<N>>, +{ + if amount == N::zero() { + return Ok(IndexVec::U32(Vec::new())); + } + + if amount > length { + panic!("`amount` of samples must be less than or equal to `length`"); + } + + struct Element<N> { + index: N, + key: f64, + } + impl<N> PartialOrd for Element<N> { + fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> { + self.key.partial_cmp(&other.key) + } + } + impl<N> Ord for Element<N> { + fn cmp(&self, other: &Self) -> core::cmp::Ordering { + // partial_cmp will always produce a value, + // because we check that the weights are not nan + self.partial_cmp(other).unwrap() + } + } + impl<N> PartialEq for Element<N> { + fn eq(&self, other: &Self) -> bool { + self.key == other.key + } + } + impl<N> Eq for Element<N> {} + + #[cfg(feature = "nightly")] + { + let mut candidates = Vec::with_capacity(length.as_usize()); + let mut index = N::zero(); + while index < length { + let weight = weight(index.as_usize()).into(); + if !(weight >= 0.) { + return Err(WeightedError::InvalidWeight); + } + + let key = rng.gen::<f64>().powf(1.0 / weight); + candidates.push(Element { index, key }); + + index += N::one(); + } + + // Partially sort the array to find the `amount` elements with the greatest + // keys. Do this by using `select_nth_unstable` to put the elements with + // the *smallest* keys at the beginning of the list in `O(n)` time, which + // provides equivalent information about the elements with the *greatest* keys. + let (_, mid, greater) + = candidates.select_nth_unstable(length.as_usize() - amount.as_usize()); + + let mut result: Vec<N> = Vec::with_capacity(amount.as_usize()); + result.push(mid.index); + for element in greater { + result.push(element.index); + } + Ok(IndexVec::from(result)) + } + + #[cfg(not(feature = "nightly"))] + { + use alloc::collections::BinaryHeap; + + // Partially sort the array such that the `amount` elements with the largest + // keys are first using a binary max heap. + let mut candidates = BinaryHeap::with_capacity(length.as_usize()); + let mut index = N::zero(); + while index < length { + let weight = weight(index.as_usize()).into(); + if !(weight >= 0.) { + return Err(WeightedError::InvalidWeight); + } + + let key = rng.gen::<f64>().powf(1.0 / weight); + candidates.push(Element { index, key }); + + index += N::one(); + } + + let mut result: Vec<N> = Vec::with_capacity(amount.as_usize()); + while result.len() < amount.as_usize() { + result.push(candidates.pop().unwrap().index); + } + Ok(IndexVec::from(result)) + } +} + +/// Randomly sample exactly `amount` indices from `0..length`, using Floyd's +/// combination algorithm. +/// +/// The output values are fully shuffled. (Overhead is under 50%.) +/// +/// This implementation uses `O(amount)` memory and `O(amount^2)` time. +fn sample_floyd<R>(rng: &mut R, length: u32, amount: u32) -> IndexVec +where R: Rng + ?Sized { + // For small amount we use Floyd's fully-shuffled variant. For larger + // amounts this is slow due to Vec::insert performance, so we shuffle + // afterwards. Benchmarks show little overhead from extra logic. + let floyd_shuffle = amount < 50; + + debug_assert!(amount <= length); + let mut indices = Vec::with_capacity(amount as usize); + for j in length - amount..length { + let t = rng.gen_range(0..=j); + if floyd_shuffle { + if let Some(pos) = indices.iter().position(|&x| x == t) { + indices.insert(pos, j); + continue; + } + } else if indices.contains(&t) { + indices.push(j); + continue; + } + indices.push(t); + } + if !floyd_shuffle { + // Reimplement SliceRandom::shuffle with smaller indices + for i in (1..amount).rev() { + // invariant: elements with index > i have been locked in place. + indices.swap(i as usize, rng.gen_range(0..=i) as usize); + } + } + IndexVec::from(indices) +} + +/// Randomly sample exactly `amount` indices from `0..length`, using an inplace +/// partial Fisher-Yates method. +/// Sample an amount of indices using an inplace partial fisher yates method. +/// +/// This allocates the entire `length` of indices and randomizes only the first `amount`. +/// It then truncates to `amount` and returns. +/// +/// This method is not appropriate for large `length` and potentially uses a lot +/// of memory; because of this we only implement for `u32` index (which improves +/// performance in all cases). +/// +/// Set-up is `O(length)` time and memory and shuffling is `O(amount)` time. +fn sample_inplace<R>(rng: &mut R, length: u32, amount: u32) -> IndexVec +where R: Rng + ?Sized { + debug_assert!(amount <= length); + let mut indices: Vec<u32> = Vec::with_capacity(length as usize); + indices.extend(0..length); + for i in 0..amount { + let j: u32 = rng.gen_range(i..length); + indices.swap(i as usize, j as usize); + } + indices.truncate(amount as usize); + debug_assert_eq!(indices.len(), amount as usize); + IndexVec::from(indices) +} + +trait UInt: Copy + PartialOrd + Ord + PartialEq + Eq + SampleUniform + + core::hash::Hash + core::ops::AddAssign { + fn zero() -> Self; + fn one() -> Self; + fn as_usize(self) -> usize; +} +impl UInt for u32 { + #[inline] + fn zero() -> Self { + 0 + } + + #[inline] + fn one() -> Self { + 1 + } + + #[inline] + fn as_usize(self) -> usize { + self as usize + } +} +impl UInt for usize { + #[inline] + fn zero() -> Self { + 0 + } + + #[inline] + fn one() -> Self { + 1 + } + + #[inline] + fn as_usize(self) -> usize { + self + } +} + +/// Randomly sample exactly `amount` indices from `0..length`, using rejection +/// sampling. +/// +/// Since `amount <<< length` there is a low chance of a random sample in +/// `0..length` being a duplicate. We test for duplicates and resample where +/// necessary. The algorithm is `O(amount)` time and memory. +/// +/// This function is generic over X primarily so that results are value-stable +/// over 32-bit and 64-bit platforms. +fn sample_rejection<X: UInt, R>(rng: &mut R, length: X, amount: X) -> IndexVec +where + R: Rng + ?Sized, + IndexVec: From<Vec<X>>, +{ + debug_assert!(amount < length); + #[cfg(feature = "std")] + let mut cache = HashSet::with_capacity(amount.as_usize()); + #[cfg(not(feature = "std"))] + let mut cache = BTreeSet::new(); + let distr = Uniform::new(X::zero(), length); + let mut indices = Vec::with_capacity(amount.as_usize()); + for _ in 0..amount.as_usize() { + let mut pos = distr.sample(rng); + while !cache.insert(pos) { + pos = distr.sample(rng); + } + indices.push(pos); + } + + debug_assert_eq!(indices.len(), amount.as_usize()); + IndexVec::from(indices) +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + #[cfg(feature = "serde1")] + fn test_serialization_index_vec() { + let some_index_vec = IndexVec::from(vec![254_usize, 234, 2, 1]); + let de_some_index_vec: IndexVec = bincode::deserialize(&bincode::serialize(&some_index_vec).unwrap()).unwrap(); + match (some_index_vec, de_some_index_vec) { + (IndexVec::U32(a), IndexVec::U32(b)) => { + assert_eq!(a, b); + }, + (IndexVec::USize(a), IndexVec::USize(b)) => { + assert_eq!(a, b); + }, + _ => {panic!("failed to seralize/deserialize `IndexVec`")} + } + } + + #[cfg(feature = "alloc")] use alloc::vec; + + #[test] + fn test_sample_boundaries() { + let mut r = crate::test::rng(404); + + assert_eq!(sample_inplace(&mut r, 0, 0).len(), 0); + assert_eq!(sample_inplace(&mut r, 1, 0).len(), 0); + assert_eq!(sample_inplace(&mut r, 1, 1).into_vec(), vec![0]); + + assert_eq!(sample_rejection(&mut r, 1u32, 0).len(), 0); + + assert_eq!(sample_floyd(&mut r, 0, 0).len(), 0); + assert_eq!(sample_floyd(&mut r, 1, 0).len(), 0); + assert_eq!(sample_floyd(&mut r, 1, 1).into_vec(), vec![0]); + + // These algorithms should be fast with big numbers. Test average. + let sum: usize = sample_rejection(&mut r, 1 << 25, 10u32).into_iter().sum(); + assert!(1 << 25 < sum && sum < (1 << 25) * 25); + + let sum: usize = sample_floyd(&mut r, 1 << 25, 10).into_iter().sum(); + assert!(1 << 25 < sum && sum < (1 << 25) * 25); + } + + #[test] + #[cfg_attr(miri, ignore)] // Miri is too slow + fn test_sample_alg() { + let seed_rng = crate::test::rng; + + // We can't test which algorithm is used directly, but Floyd's alg + // should produce different results from the others. (Also, `inplace` + // and `cached` currently use different sizes thus produce different results.) + + // A small length and relatively large amount should use inplace + let (length, amount): (usize, usize) = (100, 50); + let v1 = sample(&mut seed_rng(420), length, amount); + let v2 = sample_inplace(&mut seed_rng(420), length as u32, amount as u32); + assert!(v1.iter().all(|e| e < length)); + assert_eq!(v1, v2); + + // Test Floyd's alg does produce different results + let v3 = sample_floyd(&mut seed_rng(420), length as u32, amount as u32); + assert!(v1 != v3); + + // A large length and small amount should use Floyd + let (length, amount): (usize, usize) = (1 << 20, 50); + let v1 = sample(&mut seed_rng(421), length, amount); + let v2 = sample_floyd(&mut seed_rng(421), length as u32, amount as u32); + assert!(v1.iter().all(|e| e < length)); + assert_eq!(v1, v2); + + // A large length and larger amount should use cache + let (length, amount): (usize, usize) = (1 << 20, 600); + let v1 = sample(&mut seed_rng(422), length, amount); + let v2 = sample_rejection(&mut seed_rng(422), length as u32, amount as u32); + assert!(v1.iter().all(|e| e < length)); + assert_eq!(v1, v2); + } + + #[cfg(feature = "std")] + #[test] + fn test_sample_weighted() { + let seed_rng = crate::test::rng; + for &(amount, len) in &[(0, 10), (5, 10), (10, 10)] { + let v = sample_weighted(&mut seed_rng(423), len, |i| i as f64, amount).unwrap(); + match v { + IndexVec::U32(mut indices) => { + assert_eq!(indices.len(), amount); + indices.sort_unstable(); + indices.dedup(); + assert_eq!(indices.len(), amount); + for &i in &indices { + assert!((i as usize) < len); + } + }, + IndexVec::USize(_) => panic!("expected `IndexVec::U32`"), + } + } + } + + #[test] + fn value_stability_sample() { + let do_test = |length, amount, values: &[u32]| { + let mut buf = [0u32; 8]; + let mut rng = crate::test::rng(410); + + let res = sample(&mut rng, length, amount); + let len = res.len().min(buf.len()); + for (x, y) in res.into_iter().zip(buf.iter_mut()) { + *y = x as u32; + } + assert_eq!( + &buf[0..len], + values, + "failed sampling {}, {}", + length, + amount + ); + }; + + do_test(10, 6, &[8, 0, 3, 5, 9, 6]); // floyd + do_test(25, 10, &[18, 15, 14, 9, 0, 13, 5, 24]); // floyd + do_test(300, 8, &[30, 283, 150, 1, 73, 13, 285, 35]); // floyd + do_test(300, 80, &[31, 289, 248, 154, 5, 78, 19, 286]); // inplace + do_test(300, 180, &[31, 289, 248, 154, 5, 78, 19, 286]); // inplace + + do_test(1_000_000, 8, &[ + 103717, 963485, 826422, 509101, 736394, 807035, 5327, 632573, + ]); // floyd + do_test(1_000_000, 180, &[ + 103718, 963490, 826426, 509103, 736396, 807036, 5327, 632573, + ]); // rejection + } +} diff --git a/third_party/rust/rand/src/seq/mod.rs b/third_party/rust/rand/src/seq/mod.rs new file mode 100644 index 0000000000..069e9e6b19 --- /dev/null +++ b/third_party/rust/rand/src/seq/mod.rs @@ -0,0 +1,1356 @@ +// Copyright 2018 Developers of the Rand project. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! Sequence-related functionality +//! +//! This module provides: +//! +//! * [`SliceRandom`] slice sampling and mutation +//! * [`IteratorRandom`] iterator sampling +//! * [`index::sample`] low-level API to choose multiple indices from +//! `0..length` +//! +//! Also see: +//! +//! * [`crate::distributions::WeightedIndex`] distribution which provides +//! weighted index sampling. +//! +//! In order to make results reproducible across 32-64 bit architectures, all +//! `usize` indices are sampled as a `u32` where possible (also providing a +//! small performance boost in some cases). + + +#[cfg(feature = "alloc")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "alloc")))] +pub mod index; + +#[cfg(feature = "alloc")] use core::ops::Index; + +#[cfg(feature = "alloc")] use alloc::vec::Vec; + +#[cfg(feature = "alloc")] +use crate::distributions::uniform::{SampleBorrow, SampleUniform}; +#[cfg(feature = "alloc")] use crate::distributions::WeightedError; +use crate::Rng; + +/// Extension trait on slices, providing random mutation and sampling methods. +/// +/// This trait is implemented on all `[T]` slice types, providing several +/// methods for choosing and shuffling elements. You must `use` this trait: +/// +/// ``` +/// use rand::seq::SliceRandom; +/// +/// let mut rng = rand::thread_rng(); +/// let mut bytes = "Hello, random!".to_string().into_bytes(); +/// bytes.shuffle(&mut rng); +/// let str = String::from_utf8(bytes).unwrap(); +/// println!("{}", str); +/// ``` +/// Example output (non-deterministic): +/// ```none +/// l,nmroHado !le +/// ``` +pub trait SliceRandom { + /// The element type. + type Item; + + /// Returns a reference to one random element of the slice, or `None` if the + /// slice is empty. + /// + /// For slices, complexity is `O(1)`. + /// + /// # Example + /// + /// ``` + /// use rand::thread_rng; + /// use rand::seq::SliceRandom; + /// + /// let choices = [1, 2, 4, 8, 16, 32]; + /// let mut rng = thread_rng(); + /// println!("{:?}", choices.choose(&mut rng)); + /// assert_eq!(choices[..0].choose(&mut rng), None); + /// ``` + fn choose<R>(&self, rng: &mut R) -> Option<&Self::Item> + where R: Rng + ?Sized; + + /// Returns a mutable reference to one random element of the slice, or + /// `None` if the slice is empty. + /// + /// For slices, complexity is `O(1)`. + fn choose_mut<R>(&mut self, rng: &mut R) -> Option<&mut Self::Item> + where R: Rng + ?Sized; + + /// Chooses `amount` elements from the slice at random, without repetition, + /// and in random order. The returned iterator is appropriate both for + /// collection into a `Vec` and filling an existing buffer (see example). + /// + /// In case this API is not sufficiently flexible, use [`index::sample`]. + /// + /// For slices, complexity is the same as [`index::sample`]. + /// + /// # Example + /// ``` + /// use rand::seq::SliceRandom; + /// + /// let mut rng = &mut rand::thread_rng(); + /// let sample = "Hello, audience!".as_bytes(); + /// + /// // collect the results into a vector: + /// let v: Vec<u8> = sample.choose_multiple(&mut rng, 3).cloned().collect(); + /// + /// // store in a buffer: + /// let mut buf = [0u8; 5]; + /// for (b, slot) in sample.choose_multiple(&mut rng, buf.len()).zip(buf.iter_mut()) { + /// *slot = *b; + /// } + /// ``` + #[cfg(feature = "alloc")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "alloc")))] + fn choose_multiple<R>(&self, rng: &mut R, amount: usize) -> SliceChooseIter<Self, Self::Item> + where R: Rng + ?Sized; + + /// Similar to [`choose`], but where the likelihood of each outcome may be + /// specified. + /// + /// The specified function `weight` maps each item `x` to a relative + /// likelihood `weight(x)`. The probability of each item being selected is + /// therefore `weight(x) / s`, where `s` is the sum of all `weight(x)`. + /// + /// For slices of length `n`, complexity is `O(n)`. + /// See also [`choose_weighted_mut`], [`distributions::weighted`]. + /// + /// # Example + /// + /// ``` + /// use rand::prelude::*; + /// + /// let choices = [('a', 2), ('b', 1), ('c', 1)]; + /// let mut rng = thread_rng(); + /// // 50% chance to print 'a', 25% chance to print 'b', 25% chance to print 'c' + /// println!("{:?}", choices.choose_weighted(&mut rng, |item| item.1).unwrap().0); + /// ``` + /// [`choose`]: SliceRandom::choose + /// [`choose_weighted_mut`]: SliceRandom::choose_weighted_mut + /// [`distributions::weighted`]: crate::distributions::weighted + #[cfg(feature = "alloc")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "alloc")))] + fn choose_weighted<R, F, B, X>( + &self, rng: &mut R, weight: F, + ) -> Result<&Self::Item, WeightedError> + where + R: Rng + ?Sized, + F: Fn(&Self::Item) -> B, + B: SampleBorrow<X>, + X: SampleUniform + + for<'a> ::core::ops::AddAssign<&'a X> + + ::core::cmp::PartialOrd<X> + + Clone + + Default; + + /// Similar to [`choose_mut`], but where the likelihood of each outcome may + /// be specified. + /// + /// The specified function `weight` maps each item `x` to a relative + /// likelihood `weight(x)`. The probability of each item being selected is + /// therefore `weight(x) / s`, where `s` is the sum of all `weight(x)`. + /// + /// For slices of length `n`, complexity is `O(n)`. + /// See also [`choose_weighted`], [`distributions::weighted`]. + /// + /// [`choose_mut`]: SliceRandom::choose_mut + /// [`choose_weighted`]: SliceRandom::choose_weighted + /// [`distributions::weighted`]: crate::distributions::weighted + #[cfg(feature = "alloc")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "alloc")))] + fn choose_weighted_mut<R, F, B, X>( + &mut self, rng: &mut R, weight: F, + ) -> Result<&mut Self::Item, WeightedError> + where + R: Rng + ?Sized, + F: Fn(&Self::Item) -> B, + B: SampleBorrow<X>, + X: SampleUniform + + for<'a> ::core::ops::AddAssign<&'a X> + + ::core::cmp::PartialOrd<X> + + Clone + + Default; + + /// Similar to [`choose_multiple`], but where the likelihood of each element's + /// inclusion in the output may be specified. The elements are returned in an + /// arbitrary, unspecified order. + /// + /// The specified function `weight` maps each item `x` to a relative + /// likelihood `weight(x)`. The probability of each item being selected is + /// therefore `weight(x) / s`, where `s` is the sum of all `weight(x)`. + /// + /// If all of the weights are equal, even if they are all zero, each element has + /// an equal likelihood of being selected. + /// + /// The complexity of this method depends on the feature `partition_at_index`. + /// If the feature is enabled, then for slices of length `n`, the complexity + /// is `O(n)` space and `O(n)` time. Otherwise, the complexity is `O(n)` space and + /// `O(n * log amount)` time. + /// + /// # Example + /// + /// ``` + /// use rand::prelude::*; + /// + /// let choices = [('a', 2), ('b', 1), ('c', 1)]; + /// let mut rng = thread_rng(); + /// // First Draw * Second Draw = total odds + /// // ----------------------- + /// // (50% * 50%) + (25% * 67%) = 41.7% chance that the output is `['a', 'b']` in some order. + /// // (50% * 50%) + (25% * 67%) = 41.7% chance that the output is `['a', 'c']` in some order. + /// // (25% * 33%) + (25% * 33%) = 16.6% chance that the output is `['b', 'c']` in some order. + /// println!("{:?}", choices.choose_multiple_weighted(&mut rng, 2, |item| item.1).unwrap().collect::<Vec<_>>()); + /// ``` + /// [`choose_multiple`]: SliceRandom::choose_multiple + // + // Note: this is feature-gated on std due to usage of f64::powf. + // If necessary, we may use alloc+libm as an alternative (see PR #1089). + #[cfg(feature = "std")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "std")))] + fn choose_multiple_weighted<R, F, X>( + &self, rng: &mut R, amount: usize, weight: F, + ) -> Result<SliceChooseIter<Self, Self::Item>, WeightedError> + where + R: Rng + ?Sized, + F: Fn(&Self::Item) -> X, + X: Into<f64>; + + /// Shuffle a mutable slice in place. + /// + /// For slices of length `n`, complexity is `O(n)`. + /// + /// # Example + /// + /// ``` + /// use rand::seq::SliceRandom; + /// use rand::thread_rng; + /// + /// let mut rng = thread_rng(); + /// let mut y = [1, 2, 3, 4, 5]; + /// println!("Unshuffled: {:?}", y); + /// y.shuffle(&mut rng); + /// println!("Shuffled: {:?}", y); + /// ``` + fn shuffle<R>(&mut self, rng: &mut R) + where R: Rng + ?Sized; + + /// Shuffle a slice in place, but exit early. + /// + /// Returns two mutable slices from the source slice. The first contains + /// `amount` elements randomly permuted. The second has the remaining + /// elements that are not fully shuffled. + /// + /// This is an efficient method to select `amount` elements at random from + /// the slice, provided the slice may be mutated. + /// + /// If you only need to choose elements randomly and `amount > self.len()/2` + /// then you may improve performance by taking + /// `amount = values.len() - amount` and using only the second slice. + /// + /// If `amount` is greater than the number of elements in the slice, this + /// will perform a full shuffle. + /// + /// For slices, complexity is `O(m)` where `m = amount`. + fn partial_shuffle<R>( + &mut self, rng: &mut R, amount: usize, + ) -> (&mut [Self::Item], &mut [Self::Item]) + where R: Rng + ?Sized; +} + +/// Extension trait on iterators, providing random sampling methods. +/// +/// This trait is implemented on all iterators `I` where `I: Iterator + Sized` +/// and provides methods for +/// choosing one or more elements. You must `use` this trait: +/// +/// ``` +/// use rand::seq::IteratorRandom; +/// +/// let mut rng = rand::thread_rng(); +/// +/// let faces = "πππππ π’"; +/// println!("I am {}!", faces.chars().choose(&mut rng).unwrap()); +/// ``` +/// Example output (non-deterministic): +/// ```none +/// I am π! +/// ``` +pub trait IteratorRandom: Iterator + Sized { + /// Choose one element at random from the iterator. + /// + /// Returns `None` if and only if the iterator is empty. + /// + /// This method uses [`Iterator::size_hint`] for optimisation. With an + /// accurate hint and where [`Iterator::nth`] is a constant-time operation + /// this method can offer `O(1)` performance. Where no size hint is + /// available, complexity is `O(n)` where `n` is the iterator length. + /// Partial hints (where `lower > 0`) also improve performance. + /// + /// Note that the output values and the number of RNG samples used + /// depends on size hints. In particular, `Iterator` combinators that don't + /// change the values yielded but change the size hints may result in + /// `choose` returning different elements. If you want consistent results + /// and RNG usage consider using [`IteratorRandom::choose_stable`]. + fn choose<R>(mut self, rng: &mut R) -> Option<Self::Item> + where R: Rng + ?Sized { + let (mut lower, mut upper) = self.size_hint(); + let mut consumed = 0; + let mut result = None; + + // Handling for this condition outside the loop allows the optimizer to eliminate the loop + // when the Iterator is an ExactSizeIterator. This has a large performance impact on e.g. + // seq_iter_choose_from_1000. + if upper == Some(lower) { + return if lower == 0 { + None + } else { + self.nth(gen_index(rng, lower)) + }; + } + + // Continue until the iterator is exhausted + loop { + if lower > 1 { + let ix = gen_index(rng, lower + consumed); + let skip = if ix < lower { + result = self.nth(ix); + lower - (ix + 1) + } else { + lower + }; + if upper == Some(lower) { + return result; + } + consumed += lower; + if skip > 0 { + self.nth(skip - 1); + } + } else { + let elem = self.next(); + if elem.is_none() { + return result; + } + consumed += 1; + if gen_index(rng, consumed) == 0 { + result = elem; + } + } + + let hint = self.size_hint(); + lower = hint.0; + upper = hint.1; + } + } + + /// Choose one element at random from the iterator. + /// + /// Returns `None` if and only if the iterator is empty. + /// + /// This method is very similar to [`choose`] except that the result + /// only depends on the length of the iterator and the values produced by + /// `rng`. Notably for any iterator of a given length this will make the + /// same requests to `rng` and if the same sequence of values are produced + /// the same index will be selected from `self`. This may be useful if you + /// need consistent results no matter what type of iterator you are working + /// with. If you do not need this stability prefer [`choose`]. + /// + /// Note that this method still uses [`Iterator::size_hint`] to skip + /// constructing elements where possible, however the selection and `rng` + /// calls are the same in the face of this optimization. If you want to + /// force every element to be created regardless call `.inspect(|e| ())`. + /// + /// [`choose`]: IteratorRandom::choose + fn choose_stable<R>(mut self, rng: &mut R) -> Option<Self::Item> + where R: Rng + ?Sized { + let mut consumed = 0; + let mut result = None; + + loop { + // Currently the only way to skip elements is `nth()`. So we need to + // store what index to access next here. + // This should be replaced by `advance_by()` once it is stable: + // https://github.com/rust-lang/rust/issues/77404 + let mut next = 0; + + let (lower, _) = self.size_hint(); + if lower >= 2 { + let highest_selected = (0..lower) + .filter(|ix| gen_index(rng, consumed+ix+1) == 0) + .last(); + + consumed += lower; + next = lower; + + if let Some(ix) = highest_selected { + result = self.nth(ix); + next -= ix + 1; + debug_assert!(result.is_some(), "iterator shorter than size_hint().0"); + } + } + + let elem = self.nth(next); + if elem.is_none() { + return result + } + + if gen_index(rng, consumed+1) == 0 { + result = elem; + } + consumed += 1; + } + } + + /// Collects values at random from the iterator into a supplied buffer + /// until that buffer is filled. + /// + /// Although the elements are selected randomly, the order of elements in + /// the buffer is neither stable nor fully random. If random ordering is + /// desired, shuffle the result. + /// + /// Returns the number of elements added to the buffer. This equals the length + /// of the buffer unless the iterator contains insufficient elements, in which + /// case this equals the number of elements available. + /// + /// Complexity is `O(n)` where `n` is the length of the iterator. + /// For slices, prefer [`SliceRandom::choose_multiple`]. + fn choose_multiple_fill<R>(mut self, rng: &mut R, buf: &mut [Self::Item]) -> usize + where R: Rng + ?Sized { + let amount = buf.len(); + let mut len = 0; + while len < amount { + if let Some(elem) = self.next() { + buf[len] = elem; + len += 1; + } else { + // Iterator exhausted; stop early + return len; + } + } + + // Continue, since the iterator was not exhausted + for (i, elem) in self.enumerate() { + let k = gen_index(rng, i + 1 + amount); + if let Some(slot) = buf.get_mut(k) { + *slot = elem; + } + } + len + } + + /// Collects `amount` values at random from the iterator into a vector. + /// + /// This is equivalent to `choose_multiple_fill` except for the result type. + /// + /// Although the elements are selected randomly, the order of elements in + /// the buffer is neither stable nor fully random. If random ordering is + /// desired, shuffle the result. + /// + /// The length of the returned vector equals `amount` unless the iterator + /// contains insufficient elements, in which case it equals the number of + /// elements available. + /// + /// Complexity is `O(n)` where `n` is the length of the iterator. + /// For slices, prefer [`SliceRandom::choose_multiple`]. + #[cfg(feature = "alloc")] + #[cfg_attr(doc_cfg, doc(cfg(feature = "alloc")))] + fn choose_multiple<R>(mut self, rng: &mut R, amount: usize) -> Vec<Self::Item> + where R: Rng + ?Sized { + let mut reservoir = Vec::with_capacity(amount); + reservoir.extend(self.by_ref().take(amount)); + + // Continue unless the iterator was exhausted + // + // note: this prevents iterators that "restart" from causing problems. + // If the iterator stops once, then so do we. + if reservoir.len() == amount { + for (i, elem) in self.enumerate() { + let k = gen_index(rng, i + 1 + amount); + if let Some(slot) = reservoir.get_mut(k) { + *slot = elem; + } + } + } else { + // Don't hang onto extra memory. There is a corner case where + // `amount` was much less than `self.len()`. + reservoir.shrink_to_fit(); + } + reservoir + } +} + + +impl<T> SliceRandom for [T] { + type Item = T; + + fn choose<R>(&self, rng: &mut R) -> Option<&Self::Item> + where R: Rng + ?Sized { + if self.is_empty() { + None + } else { + Some(&self[gen_index(rng, self.len())]) + } + } + + fn choose_mut<R>(&mut self, rng: &mut R) -> Option<&mut Self::Item> + where R: Rng + ?Sized { + if self.is_empty() { + None + } else { + let len = self.len(); + Some(&mut self[gen_index(rng, len)]) + } + } + + #[cfg(feature = "alloc")] + fn choose_multiple<R>(&self, rng: &mut R, amount: usize) -> SliceChooseIter<Self, Self::Item> + where R: Rng + ?Sized { + let amount = ::core::cmp::min(amount, self.len()); + SliceChooseIter { + slice: self, + _phantom: Default::default(), + indices: index::sample(rng, self.len(), amount).into_iter(), + } + } + + #[cfg(feature = "alloc")] + fn choose_weighted<R, F, B, X>( + &self, rng: &mut R, weight: F, + ) -> Result<&Self::Item, WeightedError> + where + R: Rng + ?Sized, + F: Fn(&Self::Item) -> B, + B: SampleBorrow<X>, + X: SampleUniform + + for<'a> ::core::ops::AddAssign<&'a X> + + ::core::cmp::PartialOrd<X> + + Clone + + Default, + { + use crate::distributions::{Distribution, WeightedIndex}; + let distr = WeightedIndex::new(self.iter().map(weight))?; + Ok(&self[distr.sample(rng)]) + } + + #[cfg(feature = "alloc")] + fn choose_weighted_mut<R, F, B, X>( + &mut self, rng: &mut R, weight: F, + ) -> Result<&mut Self::Item, WeightedError> + where + R: Rng + ?Sized, + F: Fn(&Self::Item) -> B, + B: SampleBorrow<X>, + X: SampleUniform + + for<'a> ::core::ops::AddAssign<&'a X> + + ::core::cmp::PartialOrd<X> + + Clone + + Default, + { + use crate::distributions::{Distribution, WeightedIndex}; + let distr = WeightedIndex::new(self.iter().map(weight))?; + Ok(&mut self[distr.sample(rng)]) + } + + #[cfg(feature = "std")] + fn choose_multiple_weighted<R, F, X>( + &self, rng: &mut R, amount: usize, weight: F, + ) -> Result<SliceChooseIter<Self, Self::Item>, WeightedError> + where + R: Rng + ?Sized, + F: Fn(&Self::Item) -> X, + X: Into<f64>, + { + let amount = ::core::cmp::min(amount, self.len()); + Ok(SliceChooseIter { + slice: self, + _phantom: Default::default(), + indices: index::sample_weighted( + rng, + self.len(), + |idx| weight(&self[idx]).into(), + amount, + )? + .into_iter(), + }) + } + + fn shuffle<R>(&mut self, rng: &mut R) + where R: Rng + ?Sized { + for i in (1..self.len()).rev() { + // invariant: elements with index > i have been locked in place. + self.swap(i, gen_index(rng, i + 1)); + } + } + + fn partial_shuffle<R>( + &mut self, rng: &mut R, amount: usize, + ) -> (&mut [Self::Item], &mut [Self::Item]) + where R: Rng + ?Sized { + // This applies Durstenfeld's algorithm for the + // [FisherβYates shuffle](https://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle#The_modern_algorithm) + // for an unbiased permutation, but exits early after choosing `amount` + // elements. + + let len = self.len(); + let end = if amount >= len { 0 } else { len - amount }; + + for i in (end..len).rev() { + // invariant: elements with index > i have been locked in place. + self.swap(i, gen_index(rng, i + 1)); + } + let r = self.split_at_mut(end); + (r.1, r.0) + } +} + +impl<I> IteratorRandom for I where I: Iterator + Sized {} + + +/// An iterator over multiple slice elements. +/// +/// This struct is created by +/// [`SliceRandom::choose_multiple`](trait.SliceRandom.html#tymethod.choose_multiple). +#[cfg(feature = "alloc")] +#[cfg_attr(doc_cfg, doc(cfg(feature = "alloc")))] +#[derive(Debug)] +pub struct SliceChooseIter<'a, S: ?Sized + 'a, T: 'a> { + slice: &'a S, + _phantom: ::core::marker::PhantomData<T>, + indices: index::IndexVecIntoIter, +} + +#[cfg(feature = "alloc")] +impl<'a, S: Index<usize, Output = T> + ?Sized + 'a, T: 'a> Iterator for SliceChooseIter<'a, S, T> { + type Item = &'a T; + + fn next(&mut self) -> Option<Self::Item> { + // TODO: investigate using SliceIndex::get_unchecked when stable + self.indices.next().map(|i| &self.slice[i as usize]) + } + + fn size_hint(&self) -> (usize, Option<usize>) { + (self.indices.len(), Some(self.indices.len())) + } +} + +#[cfg(feature = "alloc")] +impl<'a, S: Index<usize, Output = T> + ?Sized + 'a, T: 'a> ExactSizeIterator + for SliceChooseIter<'a, S, T> +{ + fn len(&self) -> usize { + self.indices.len() + } +} + + +// Sample a number uniformly between 0 and `ubound`. Uses 32-bit sampling where +// possible, primarily in order to produce the same output on 32-bit and 64-bit +// platforms. +#[inline] +fn gen_index<R: Rng + ?Sized>(rng: &mut R, ubound: usize) -> usize { + if ubound <= (core::u32::MAX as usize) { + rng.gen_range(0..ubound as u32) as usize + } else { + rng.gen_range(0..ubound) + } +} + + +#[cfg(test)] +mod test { + use super::*; + #[cfg(feature = "alloc")] use crate::Rng; + #[cfg(all(feature = "alloc", not(feature = "std")))] use alloc::vec::Vec; + + #[test] + fn test_slice_choose() { + let mut r = crate::test::rng(107); + let chars = [ + 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', + ]; + let mut chosen = [0i32; 14]; + // The below all use a binomial distribution with n=1000, p=1/14. + // binocdf(40, 1000, 1/14) ~= 2e-5; 1-binocdf(106, ..) ~= 2e-5 + for _ in 0..1000 { + let picked = *chars.choose(&mut r).unwrap(); + chosen[(picked as usize) - ('a' as usize)] += 1; + } + for count in chosen.iter() { + assert!(40 < *count && *count < 106); + } + + chosen.iter_mut().for_each(|x| *x = 0); + for _ in 0..1000 { + *chosen.choose_mut(&mut r).unwrap() += 1; + } + for count in chosen.iter() { + assert!(40 < *count && *count < 106); + } + + let mut v: [isize; 0] = []; + assert_eq!(v.choose(&mut r), None); + assert_eq!(v.choose_mut(&mut r), None); + } + + #[test] + fn value_stability_slice() { + let mut r = crate::test::rng(413); + let chars = [ + 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', + ]; + let mut nums = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]; + + assert_eq!(chars.choose(&mut r), Some(&'l')); + assert_eq!(nums.choose_mut(&mut r), Some(&mut 10)); + + #[cfg(feature = "alloc")] + assert_eq!( + &chars + .choose_multiple(&mut r, 8) + .cloned() + .collect::<Vec<char>>(), + &['d', 'm', 'b', 'n', 'c', 'k', 'h', 'e'] + ); + + #[cfg(feature = "alloc")] + assert_eq!(chars.choose_weighted(&mut r, |_| 1), Ok(&'f')); + #[cfg(feature = "alloc")] + assert_eq!(nums.choose_weighted_mut(&mut r, |_| 1), Ok(&mut 5)); + + let mut r = crate::test::rng(414); + nums.shuffle(&mut r); + assert_eq!(nums, [9, 5, 3, 10, 7, 12, 8, 11, 6, 4, 0, 2, 1]); + nums = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]; + let res = nums.partial_shuffle(&mut r, 6); + assert_eq!(res.0, &mut [7, 4, 8, 6, 9, 3]); + assert_eq!(res.1, &mut [0, 1, 2, 12, 11, 5, 10]); + } + + #[derive(Clone)] + struct UnhintedIterator<I: Iterator + Clone> { + iter: I, + } + impl<I: Iterator + Clone> Iterator for UnhintedIterator<I> { + type Item = I::Item; + + fn next(&mut self) -> Option<Self::Item> { + self.iter.next() + } + } + + #[derive(Clone)] + struct ChunkHintedIterator<I: ExactSizeIterator + Iterator + Clone> { + iter: I, + chunk_remaining: usize, + chunk_size: usize, + hint_total_size: bool, + } + impl<I: ExactSizeIterator + Iterator + Clone> Iterator for ChunkHintedIterator<I> { + type Item = I::Item; + + fn next(&mut self) -> Option<Self::Item> { + if self.chunk_remaining == 0 { + self.chunk_remaining = ::core::cmp::min(self.chunk_size, self.iter.len()); + } + self.chunk_remaining = self.chunk_remaining.saturating_sub(1); + + self.iter.next() + } + + fn size_hint(&self) -> (usize, Option<usize>) { + ( + self.chunk_remaining, + if self.hint_total_size { + Some(self.iter.len()) + } else { + None + }, + ) + } + } + + #[derive(Clone)] + struct WindowHintedIterator<I: ExactSizeIterator + Iterator + Clone> { + iter: I, + window_size: usize, + hint_total_size: bool, + } + impl<I: ExactSizeIterator + Iterator + Clone> Iterator for WindowHintedIterator<I> { + type Item = I::Item; + + fn next(&mut self) -> Option<Self::Item> { + self.iter.next() + } + + fn size_hint(&self) -> (usize, Option<usize>) { + ( + ::core::cmp::min(self.iter.len(), self.window_size), + if self.hint_total_size { + Some(self.iter.len()) + } else { + None + }, + ) + } + } + + #[test] + #[cfg_attr(miri, ignore)] // Miri is too slow + fn test_iterator_choose() { + let r = &mut crate::test::rng(109); + fn test_iter<R: Rng + ?Sized, Iter: Iterator<Item = usize> + Clone>(r: &mut R, iter: Iter) { + let mut chosen = [0i32; 9]; + for _ in 0..1000 { + let picked = iter.clone().choose(r).unwrap(); + chosen[picked] += 1; + } + for count in chosen.iter() { + // Samples should follow Binomial(1000, 1/9) + // Octave: binopdf(x, 1000, 1/9) gives the prob of *count == x + // Note: have seen 153, which is unlikely but not impossible. + assert!( + 72 < *count && *count < 154, + "count not close to 1000/9: {}", + count + ); + } + } + + test_iter(r, 0..9); + test_iter(r, [0, 1, 2, 3, 4, 5, 6, 7, 8].iter().cloned()); + #[cfg(feature = "alloc")] + test_iter(r, (0..9).collect::<Vec<_>>().into_iter()); + test_iter(r, UnhintedIterator { iter: 0..9 }); + test_iter(r, ChunkHintedIterator { + iter: 0..9, + chunk_size: 4, + chunk_remaining: 4, + hint_total_size: false, + }); + test_iter(r, ChunkHintedIterator { + iter: 0..9, + chunk_size: 4, + chunk_remaining: 4, + hint_total_size: true, + }); + test_iter(r, WindowHintedIterator { + iter: 0..9, + window_size: 2, + hint_total_size: false, + }); + test_iter(r, WindowHintedIterator { + iter: 0..9, + window_size: 2, + hint_total_size: true, + }); + + assert_eq!((0..0).choose(r), None); + assert_eq!(UnhintedIterator { iter: 0..0 }.choose(r), None); + } + + #[test] + #[cfg_attr(miri, ignore)] // Miri is too slow + fn test_iterator_choose_stable() { + let r = &mut crate::test::rng(109); + fn test_iter<R: Rng + ?Sized, Iter: Iterator<Item = usize> + Clone>(r: &mut R, iter: Iter) { + let mut chosen = [0i32; 9]; + for _ in 0..1000 { + let picked = iter.clone().choose_stable(r).unwrap(); + chosen[picked] += 1; + } + for count in chosen.iter() { + // Samples should follow Binomial(1000, 1/9) + // Octave: binopdf(x, 1000, 1/9) gives the prob of *count == x + // Note: have seen 153, which is unlikely but not impossible. + assert!( + 72 < *count && *count < 154, + "count not close to 1000/9: {}", + count + ); + } + } + + test_iter(r, 0..9); + test_iter(r, [0, 1, 2, 3, 4, 5, 6, 7, 8].iter().cloned()); + #[cfg(feature = "alloc")] + test_iter(r, (0..9).collect::<Vec<_>>().into_iter()); + test_iter(r, UnhintedIterator { iter: 0..9 }); + test_iter(r, ChunkHintedIterator { + iter: 0..9, + chunk_size: 4, + chunk_remaining: 4, + hint_total_size: false, + }); + test_iter(r, ChunkHintedIterator { + iter: 0..9, + chunk_size: 4, + chunk_remaining: 4, + hint_total_size: true, + }); + test_iter(r, WindowHintedIterator { + iter: 0..9, + window_size: 2, + hint_total_size: false, + }); + test_iter(r, WindowHintedIterator { + iter: 0..9, + window_size: 2, + hint_total_size: true, + }); + + assert_eq!((0..0).choose(r), None); + assert_eq!(UnhintedIterator { iter: 0..0 }.choose(r), None); + } + + #[test] + #[cfg_attr(miri, ignore)] // Miri is too slow + fn test_iterator_choose_stable_stability() { + fn test_iter(iter: impl Iterator<Item = usize> + Clone) -> [i32; 9] { + let r = &mut crate::test::rng(109); + let mut chosen = [0i32; 9]; + for _ in 0..1000 { + let picked = iter.clone().choose_stable(r).unwrap(); + chosen[picked] += 1; + } + chosen + } + + let reference = test_iter(0..9); + assert_eq!(test_iter([0, 1, 2, 3, 4, 5, 6, 7, 8].iter().cloned()), reference); + + #[cfg(feature = "alloc")] + assert_eq!(test_iter((0..9).collect::<Vec<_>>().into_iter()), reference); + assert_eq!(test_iter(UnhintedIterator { iter: 0..9 }), reference); + assert_eq!(test_iter(ChunkHintedIterator { + iter: 0..9, + chunk_size: 4, + chunk_remaining: 4, + hint_total_size: false, + }), reference); + assert_eq!(test_iter(ChunkHintedIterator { + iter: 0..9, + chunk_size: 4, + chunk_remaining: 4, + hint_total_size: true, + }), reference); + assert_eq!(test_iter(WindowHintedIterator { + iter: 0..9, + window_size: 2, + hint_total_size: false, + }), reference); + assert_eq!(test_iter(WindowHintedIterator { + iter: 0..9, + window_size: 2, + hint_total_size: true, + }), reference); + } + + #[test] + #[cfg_attr(miri, ignore)] // Miri is too slow + fn test_shuffle() { + let mut r = crate::test::rng(108); + let empty: &mut [isize] = &mut []; + empty.shuffle(&mut r); + let mut one = [1]; + one.shuffle(&mut r); + let b: &[_] = &[1]; + assert_eq!(one, b); + + let mut two = [1, 2]; + two.shuffle(&mut r); + assert!(two == [1, 2] || two == [2, 1]); + + fn move_last(slice: &mut [usize], pos: usize) { + // use slice[pos..].rotate_left(1); once we can use that + let last_val = slice[pos]; + for i in pos..slice.len() - 1 { + slice[i] = slice[i + 1]; + } + *slice.last_mut().unwrap() = last_val; + } + let mut counts = [0i32; 24]; + for _ in 0..10000 { + let mut arr: [usize; 4] = [0, 1, 2, 3]; + arr.shuffle(&mut r); + let mut permutation = 0usize; + let mut pos_value = counts.len(); + for i in 0..4 { + pos_value /= 4 - i; + let pos = arr.iter().position(|&x| x == i).unwrap(); + assert!(pos < (4 - i)); + permutation += pos * pos_value; + move_last(&mut arr, pos); + assert_eq!(arr[3], i); + } + for (i, &a) in arr.iter().enumerate() { + assert_eq!(a, i); + } + counts[permutation] += 1; + } + for count in counts.iter() { + // Binomial(10000, 1/24) with average 416.667 + // Octave: binocdf(n, 10000, 1/24) + // 99.9% chance samples lie within this range: + assert!(352 <= *count && *count <= 483, "count: {}", count); + } + } + + #[test] + fn test_partial_shuffle() { + let mut r = crate::test::rng(118); + + let mut empty: [u32; 0] = []; + let res = empty.partial_shuffle(&mut r, 10); + assert_eq!((res.0.len(), res.1.len()), (0, 0)); + + let mut v = [1, 2, 3, 4, 5]; + let res = v.partial_shuffle(&mut r, 2); + assert_eq!((res.0.len(), res.1.len()), (2, 3)); + assert!(res.0[0] != res.0[1]); + // First elements are only modified if selected, so at least one isn't modified: + assert!(res.1[0] == 1 || res.1[1] == 2 || res.1[2] == 3); + } + + #[test] + #[cfg(feature = "alloc")] + fn test_sample_iter() { + let min_val = 1; + let max_val = 100; + + let mut r = crate::test::rng(401); + let vals = (min_val..max_val).collect::<Vec<i32>>(); + let small_sample = vals.iter().choose_multiple(&mut r, 5); + let large_sample = vals.iter().choose_multiple(&mut r, vals.len() + 5); + + assert_eq!(small_sample.len(), 5); + assert_eq!(large_sample.len(), vals.len()); + // no randomization happens when amount >= len + assert_eq!(large_sample, vals.iter().collect::<Vec<_>>()); + + assert!(small_sample + .iter() + .all(|e| { **e >= min_val && **e <= max_val })); + } + + #[test] + #[cfg(feature = "alloc")] + #[cfg_attr(miri, ignore)] // Miri is too slow + fn test_weighted() { + let mut r = crate::test::rng(406); + const N_REPS: u32 = 3000; + let weights = [1u32, 2, 3, 0, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7]; + let total_weight = weights.iter().sum::<u32>() as f32; + + let verify = |result: [i32; 14]| { + for (i, count) in result.iter().enumerate() { + let exp = (weights[i] * N_REPS) as f32 / total_weight; + let mut err = (*count as f32 - exp).abs(); + if err != 0.0 { + err /= exp; + } + assert!(err <= 0.25); + } + }; + + // choose_weighted + fn get_weight<T>(item: &(u32, T)) -> u32 { + item.0 + } + let mut chosen = [0i32; 14]; + let mut items = [(0u32, 0usize); 14]; // (weight, index) + for (i, item) in items.iter_mut().enumerate() { + *item = (weights[i], i); + } + for _ in 0..N_REPS { + let item = items.choose_weighted(&mut r, get_weight).unwrap(); + chosen[item.1] += 1; + } + verify(chosen); + + // choose_weighted_mut + let mut items = [(0u32, 0i32); 14]; // (weight, count) + for (i, item) in items.iter_mut().enumerate() { + *item = (weights[i], 0); + } + for _ in 0..N_REPS { + items.choose_weighted_mut(&mut r, get_weight).unwrap().1 += 1; + } + for (ch, item) in chosen.iter_mut().zip(items.iter()) { + *ch = item.1; + } + verify(chosen); + + // Check error cases + let empty_slice = &mut [10][0..0]; + assert_eq!( + empty_slice.choose_weighted(&mut r, |_| 1), + Err(WeightedError::NoItem) + ); + assert_eq!( + empty_slice.choose_weighted_mut(&mut r, |_| 1), + Err(WeightedError::NoItem) + ); + assert_eq!( + ['x'].choose_weighted_mut(&mut r, |_| 0), + Err(WeightedError::AllWeightsZero) + ); + assert_eq!( + [0, -1].choose_weighted_mut(&mut r, |x| *x), + Err(WeightedError::InvalidWeight) + ); + assert_eq!( + [-1, 0].choose_weighted_mut(&mut r, |x| *x), + Err(WeightedError::InvalidWeight) + ); + } + + #[test] + fn value_stability_choose() { + fn choose<I: Iterator<Item = u32>>(iter: I) -> Option<u32> { + let mut rng = crate::test::rng(411); + iter.choose(&mut rng) + } + + assert_eq!(choose([].iter().cloned()), None); + assert_eq!(choose(0..100), Some(33)); + assert_eq!(choose(UnhintedIterator { iter: 0..100 }), Some(40)); + assert_eq!( + choose(ChunkHintedIterator { + iter: 0..100, + chunk_size: 32, + chunk_remaining: 32, + hint_total_size: false, + }), + Some(39) + ); + assert_eq!( + choose(ChunkHintedIterator { + iter: 0..100, + chunk_size: 32, + chunk_remaining: 32, + hint_total_size: true, + }), + Some(39) + ); + assert_eq!( + choose(WindowHintedIterator { + iter: 0..100, + window_size: 32, + hint_total_size: false, + }), + Some(90) + ); + assert_eq!( + choose(WindowHintedIterator { + iter: 0..100, + window_size: 32, + hint_total_size: true, + }), + Some(90) + ); + } + + #[test] + fn value_stability_choose_stable() { + fn choose<I: Iterator<Item = u32>>(iter: I) -> Option<u32> { + let mut rng = crate::test::rng(411); + iter.choose_stable(&mut rng) + } + + assert_eq!(choose([].iter().cloned()), None); + assert_eq!(choose(0..100), Some(40)); + assert_eq!(choose(UnhintedIterator { iter: 0..100 }), Some(40)); + assert_eq!( + choose(ChunkHintedIterator { + iter: 0..100, + chunk_size: 32, + chunk_remaining: 32, + hint_total_size: false, + }), + Some(40) + ); + assert_eq!( + choose(ChunkHintedIterator { + iter: 0..100, + chunk_size: 32, + chunk_remaining: 32, + hint_total_size: true, + }), + Some(40) + ); + assert_eq!( + choose(WindowHintedIterator { + iter: 0..100, + window_size: 32, + hint_total_size: false, + }), + Some(40) + ); + assert_eq!( + choose(WindowHintedIterator { + iter: 0..100, + window_size: 32, + hint_total_size: true, + }), + Some(40) + ); + } + + #[test] + fn value_stability_choose_multiple() { + fn do_test<I: Iterator<Item = u32>>(iter: I, v: &[u32]) { + let mut rng = crate::test::rng(412); + let mut buf = [0u32; 8]; + assert_eq!(iter.choose_multiple_fill(&mut rng, &mut buf), v.len()); + assert_eq!(&buf[0..v.len()], v); + } + + do_test(0..4, &[0, 1, 2, 3]); + do_test(0..8, &[0, 1, 2, 3, 4, 5, 6, 7]); + do_test(0..100, &[58, 78, 80, 92, 43, 8, 96, 7]); + + #[cfg(feature = "alloc")] + { + fn do_test<I: Iterator<Item = u32>>(iter: I, v: &[u32]) { + let mut rng = crate::test::rng(412); + assert_eq!(iter.choose_multiple(&mut rng, v.len()), v); + } + + do_test(0..4, &[0, 1, 2, 3]); + do_test(0..8, &[0, 1, 2, 3, 4, 5, 6, 7]); + do_test(0..100, &[58, 78, 80, 92, 43, 8, 96, 7]); + } + } + + #[test] + #[cfg(feature = "std")] + fn test_multiple_weighted_edge_cases() { + use super::*; + + let mut rng = crate::test::rng(413); + + // Case 1: One of the weights is 0 + let choices = [('a', 2), ('b', 1), ('c', 0)]; + for _ in 0..100 { + let result = choices + .choose_multiple_weighted(&mut rng, 2, |item| item.1) + .unwrap() + .collect::<Vec<_>>(); + + assert_eq!(result.len(), 2); + assert!(!result.iter().any(|val| val.0 == 'c')); + } + + // Case 2: All of the weights are 0 + let choices = [('a', 0), ('b', 0), ('c', 0)]; + + assert_eq!(choices + .choose_multiple_weighted(&mut rng, 2, |item| item.1) + .unwrap().count(), 2); + + // Case 3: Negative weights + let choices = [('a', -1), ('b', 1), ('c', 1)]; + assert_eq!( + choices + .choose_multiple_weighted(&mut rng, 2, |item| item.1) + .unwrap_err(), + WeightedError::InvalidWeight + ); + + // Case 4: Empty list + let choices = []; + assert_eq!(choices + .choose_multiple_weighted(&mut rng, 0, |_: &()| 0) + .unwrap().count(), 0); + + // Case 5: NaN weights + let choices = [('a', core::f64::NAN), ('b', 1.0), ('c', 1.0)]; + assert_eq!( + choices + .choose_multiple_weighted(&mut rng, 2, |item| item.1) + .unwrap_err(), + WeightedError::InvalidWeight + ); + + // Case 6: +infinity weights + let choices = [('a', core::f64::INFINITY), ('b', 1.0), ('c', 1.0)]; + for _ in 0..100 { + let result = choices + .choose_multiple_weighted(&mut rng, 2, |item| item.1) + .unwrap() + .collect::<Vec<_>>(); + assert_eq!(result.len(), 2); + assert!(result.iter().any(|val| val.0 == 'a')); + } + + // Case 7: -infinity weights + let choices = [('a', core::f64::NEG_INFINITY), ('b', 1.0), ('c', 1.0)]; + assert_eq!( + choices + .choose_multiple_weighted(&mut rng, 2, |item| item.1) + .unwrap_err(), + WeightedError::InvalidWeight + ); + + // Case 8: -0 weights + let choices = [('a', -0.0), ('b', 1.0), ('c', 1.0)]; + assert!(choices + .choose_multiple_weighted(&mut rng, 2, |item| item.1) + .is_ok()); + } + + #[test] + #[cfg(feature = "std")] + fn test_multiple_weighted_distributions() { + use super::*; + + // The theoretical probabilities of the different outcomes are: + // AB: 0.5 * 0.5 = 0.250 + // AC: 0.5 * 0.5 = 0.250 + // BA: 0.25 * 0.67 = 0.167 + // BC: 0.25 * 0.33 = 0.082 + // CA: 0.25 * 0.67 = 0.167 + // CB: 0.25 * 0.33 = 0.082 + let choices = [('a', 2), ('b', 1), ('c', 1)]; + let mut rng = crate::test::rng(414); + + let mut results = [0i32; 3]; + let expected_results = [4167, 4167, 1666]; + for _ in 0..10000 { + let result = choices + .choose_multiple_weighted(&mut rng, 2, |item| item.1) + .unwrap() + .collect::<Vec<_>>(); + + assert_eq!(result.len(), 2); + + match (result[0].0, result[1].0) { + ('a', 'b') | ('b', 'a') => { + results[0] += 1; + } + ('a', 'c') | ('c', 'a') => { + results[1] += 1; + } + ('b', 'c') | ('c', 'b') => { + results[2] += 1; + } + (_, _) => panic!("unexpected result"), + } + } + + let mut diffs = results + .iter() + .zip(&expected_results) + .map(|(a, b)| (a - b).abs()); + assert!(!diffs.any(|deviation| deviation > 100)); + } +} |