diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-18 02:49:42 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-18 02:49:42 +0000 |
commit | 837b550238aa671a591ccf282dddeab29cadb206 (patch) | |
tree | 914b6b8862bace72bd3245ca184d374b08d8a672 /vendor/criterion/src/stats | |
parent | Adding debian version 1.70.0+dfsg2-1. (diff) | |
download | rustc-837b550238aa671a591ccf282dddeab29cadb206.tar.xz rustc-837b550238aa671a591ccf282dddeab29cadb206.zip |
Merging upstream version 1.71.1+dfsg1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/criterion/src/stats')
19 files changed, 2022 insertions, 0 deletions
diff --git a/vendor/criterion/src/stats/bivariate/bootstrap.rs b/vendor/criterion/src/stats/bivariate/bootstrap.rs new file mode 100755 index 000000000..9eb7fa7b5 --- /dev/null +++ b/vendor/criterion/src/stats/bivariate/bootstrap.rs @@ -0,0 +1,91 @@ +#[cfg(test)] +macro_rules! test { + ($ty:ident) => { + mod $ty { + use quickcheck::TestResult; + use quickcheck::quickcheck; + use approx::relative_eq; + + use crate::stats::bivariate::regression::Slope; + use crate::stats::bivariate::Data; + + quickcheck! { + fn means(size: u8, start: u8, + offset: u8, nresamples: u8) -> TestResult { + let size = size as usize; + let start = start as usize; + let offset = offset as usize; + let nresamples = nresamples as usize; + if let Some(x) = crate::stats::test::vec::<$ty>(size, start) { + let y = crate::stats::test::vec::<$ty>(size + offset, start + offset).unwrap(); + let data = Data::new(&x[start..], &y[start+offset..]); + + let (x_means, y_means) = if nresamples > 0 { + data.bootstrap(nresamples, |d| (d.x().mean(), d.y().mean())) + } else { + return TestResult::discard(); + }; + + let x_min = data.x().min(); + let x_max = data.x().max(); + let y_min = data.y().min(); + let y_max = data.y().max(); + + TestResult::from_bool( + // Computed the correct number of resamples + x_means.len() == nresamples && + y_means.len() == nresamples && + // No uninitialized values + x_means.iter().all(|&x| { + (x > x_min || relative_eq!(x, x_min)) && + (x < x_max || relative_eq!(x, x_max)) + }) && + y_means.iter().all(|&y| { + (y > y_min || relative_eq!(y, y_min)) && + (y < y_max || relative_eq!(y, y_max)) + }) + ) + } else { + TestResult::discard() + } + } + } + + quickcheck! { + fn slope(size: u8, start: u8, + offset: u8, nresamples: u8) -> TestResult { + let size = size as usize; + let start = start as usize; + let offset = offset as usize; + let nresamples = nresamples as usize; + if let Some(x) = crate::stats::test::vec::<$ty>(size, start) { + let y = crate::stats::test::vec::<$ty>(size + offset, start + offset).unwrap(); + let data = Data::new(&x[start..], &y[start+offset..]); + + let slopes = if nresamples > 0 { + data.bootstrap(nresamples, |d| (Slope::fit(&d),)).0 + } else { + return TestResult::discard(); + }; + + TestResult::from_bool( + // Computed the correct number of resamples + slopes.len() == nresamples && + // No uninitialized values + slopes.iter().all(|s| s.0 > 0.) + ) + } else { + TestResult::discard() + } + } + } + + } + }; +} + +#[cfg(test)] +mod test { + test!(f32); + test!(f64); +} diff --git a/vendor/criterion/src/stats/bivariate/mod.rs b/vendor/criterion/src/stats/bivariate/mod.rs new file mode 100755 index 000000000..d1e8df703 --- /dev/null +++ b/vendor/criterion/src/stats/bivariate/mod.rs @@ -0,0 +1,131 @@ +//! Bivariate analysis + +mod bootstrap; +pub mod regression; +mod resamples; + +use crate::stats::bivariate::resamples::Resamples; +use crate::stats::float::Float; +use crate::stats::tuple::{Tuple, TupledDistributionsBuilder}; +use crate::stats::univariate::Sample; +use rayon::iter::{IntoParallelIterator, ParallelIterator}; + +/// Bivariate `(X, Y)` data +/// +/// Invariants: +/// +/// - No `NaN`s in the data +/// - At least two data points in the set +pub struct Data<'a, X, Y>(&'a [X], &'a [Y]); + +impl<'a, X, Y> Copy for Data<'a, X, Y> {} + +#[cfg_attr(feature = "cargo-clippy", allow(clippy::expl_impl_clone_on_copy))] +impl<'a, X, Y> Clone for Data<'a, X, Y> { + fn clone(&self) -> Data<'a, X, Y> { + *self + } +} + +impl<'a, X, Y> Data<'a, X, Y> { + /// Returns the length of the data set + pub fn len(&self) -> usize { + self.0.len() + } + + /// Iterate over the data set + pub fn iter(&self) -> Pairs<'a, X, Y> { + Pairs { + data: *self, + state: 0, + } + } +} + +impl<'a, X, Y> Data<'a, X, Y> +where + X: Float, + Y: Float, +{ + /// Creates a new data set from two existing slices + pub fn new(xs: &'a [X], ys: &'a [Y]) -> Data<'a, X, Y> { + assert!( + xs.len() == ys.len() + && xs.len() > 1 + && xs.iter().all(|x| !x.is_nan()) + && ys.iter().all(|y| !y.is_nan()) + ); + + Data(xs, ys) + } + + // TODO Remove the `T` parameter in favor of `S::Output` + /// Returns the bootstrap distributions of the parameters estimated by the `statistic` + /// + /// - Multi-threaded + /// - Time: `O(nresamples)` + /// - Memory: `O(nresamples)` + pub fn bootstrap<T, S>(&self, nresamples: usize, statistic: S) -> T::Distributions + where + S: Fn(Data<X, Y>) -> T + Sync, + T: Tuple + Send, + T::Distributions: Send, + T::Builder: Send, + { + (0..nresamples) + .into_par_iter() + .map_init( + || Resamples::new(*self), + |resamples, _| statistic(resamples.next()), + ) + .fold( + || T::Builder::new(0), + |mut sub_distributions, sample| { + sub_distributions.push(sample); + sub_distributions + }, + ) + .reduce( + || T::Builder::new(0), + |mut a, mut b| { + a.extend(&mut b); + a + }, + ) + .complete() + } + + /// Returns a view into the `X` data + pub fn x(&self) -> &'a Sample<X> { + Sample::new(self.0) + } + + /// Returns a view into the `Y` data + pub fn y(&self) -> &'a Sample<Y> { + Sample::new(self.1) + } +} + +/// Iterator over `Data` +pub struct Pairs<'a, X: 'a, Y: 'a> { + data: Data<'a, X, Y>, + state: usize, +} + +impl<'a, X, Y> Iterator for Pairs<'a, X, Y> { + type Item = (&'a X, &'a Y); + + fn next(&mut self) -> Option<(&'a X, &'a Y)> { + if self.state < self.data.len() { + let i = self.state; + self.state += 1; + + // This is safe because i will always be < self.data.{0,1}.len() + debug_assert!(i < self.data.0.len()); + debug_assert!(i < self.data.1.len()); + unsafe { Some((self.data.0.get_unchecked(i), self.data.1.get_unchecked(i))) } + } else { + None + } + } +} diff --git a/vendor/criterion/src/stats/bivariate/regression.rs b/vendor/criterion/src/stats/bivariate/regression.rs new file mode 100755 index 000000000..f09443f10 --- /dev/null +++ b/vendor/criterion/src/stats/bivariate/regression.rs @@ -0,0 +1,53 @@ +//! Regression analysis + +use crate::stats::bivariate::Data; +use crate::stats::float::Float; + +/// A straight line that passes through the origin `y = m * x` +#[derive(Clone, Copy)] +pub struct Slope<A>(pub A) +where + A: Float; + +impl<A> Slope<A> +where + A: Float, +{ + /// Fits the data to a straight line that passes through the origin using ordinary least + /// squares + /// + /// - Time: `O(length)` + pub fn fit(data: &Data<'_, A, A>) -> Slope<A> { + let xs = data.0; + let ys = data.1; + + let xy = crate::stats::dot(xs, ys); + let x2 = crate::stats::dot(xs, xs); + + Slope(xy / x2) + } + + /// Computes the goodness of fit (coefficient of determination) for this data set + /// + /// - Time: `O(length)` + pub fn r_squared(&self, data: &Data<'_, A, A>) -> A { + let _0 = A::cast(0); + let _1 = A::cast(1); + let m = self.0; + let xs = data.0; + let ys = data.1; + + let n = A::cast(xs.len()); + let y_bar = crate::stats::sum(ys) / n; + + let mut ss_res = _0; + let mut ss_tot = _0; + + for (&x, &y) in data.iter() { + ss_res = ss_res + (y - m * x).powi(2); + ss_tot = ss_res + (y - y_bar).powi(2); + } + + _1 - ss_res / ss_tot + } +} diff --git a/vendor/criterion/src/stats/bivariate/resamples.rs b/vendor/criterion/src/stats/bivariate/resamples.rs new file mode 100755 index 000000000..e254dc792 --- /dev/null +++ b/vendor/criterion/src/stats/bivariate/resamples.rs @@ -0,0 +1,61 @@ +use crate::stats::bivariate::Data; +use crate::stats::float::Float; +use crate::stats::rand_util::{new_rng, Rng}; + +pub struct Resamples<'a, X, Y> +where + X: 'a + Float, + Y: 'a + Float, +{ + rng: Rng, + data: (&'a [X], &'a [Y]), + stage: Option<(Vec<X>, Vec<Y>)>, +} + +#[cfg_attr(feature = "cargo-clippy", allow(clippy::should_implement_trait))] +impl<'a, X, Y> Resamples<'a, X, Y> +where + X: 'a + Float, + Y: 'a + Float, +{ + pub fn new(data: Data<'a, X, Y>) -> Resamples<'a, X, Y> { + Resamples { + rng: new_rng(), + data: (data.x(), data.y()), + stage: None, + } + } + + pub fn next(&mut self) -> Data<'_, X, Y> { + let n = self.data.0.len(); + + match self.stage { + None => { + let mut stage = (Vec::with_capacity(n), Vec::with_capacity(n)); + + for _ in 0..n { + let i = self.rng.rand_range(0u64..(self.data.0.len() as u64)) as usize; + + stage.0.push(self.data.0[i]); + stage.1.push(self.data.1[i]); + } + + self.stage = Some(stage); + } + Some(ref mut stage) => { + for i in 0..n { + let j = self.rng.rand_range(0u64..(self.data.0.len() as u64)) as usize; + + stage.0[i] = self.data.0[j]; + stage.1[i] = self.data.1[j]; + } + } + } + + if let Some((ref x, ref y)) = self.stage { + Data(x, y) + } else { + unreachable!(); + } + } +} diff --git a/vendor/criterion/src/stats/float.rs b/vendor/criterion/src/stats/float.rs new file mode 100755 index 000000000..b7748ddb5 --- /dev/null +++ b/vendor/criterion/src/stats/float.rs @@ -0,0 +1,15 @@ +//! Float trait + +use cast::From; +use num_traits::float; + +/// This is an extension of `num_traits::float::Float` that adds safe +/// casting and Sync + Send. Once `num_traits` has these features this +/// can be removed. +pub trait Float: + float::Float + From<usize, Output = Self> + From<f32, Output = Self> + Sync + Send +{ +} + +impl Float for f32 {} +impl Float for f64 {} diff --git a/vendor/criterion/src/stats/mod.rs b/vendor/criterion/src/stats/mod.rs new file mode 100755 index 000000000..4f926debd --- /dev/null +++ b/vendor/criterion/src/stats/mod.rs @@ -0,0 +1,112 @@ +//! [Criterion]'s statistics library. +//! +//! [Criterion]: https://github.com/bheisler/criterion.rs +//! +//! **WARNING** This library is criterion's implementation detail and there no plans to stabilize +//! it. In other words, the API may break at any time without notice. + +#[cfg(test)] +mod test; + +pub mod bivariate; +pub mod tuple; +pub mod univariate; + +mod float; +mod rand_util; + +use std::mem; +use std::ops::Deref; + +use crate::stats::float::Float; +use crate::stats::univariate::Sample; + +/// The bootstrap distribution of some parameter +#[derive(Clone)] +pub struct Distribution<A>(Box<[A]>); + +impl<A> Distribution<A> +where + A: Float, +{ + /// Create a distribution from the given values + pub fn from(values: Box<[A]>) -> Distribution<A> { + Distribution(values) + } + + /// Computes the confidence interval of the population parameter using percentiles + /// + /// # Panics + /// + /// Panics if the `confidence_level` is not in the `(0, 1)` range. + pub fn confidence_interval(&self, confidence_level: A) -> (A, A) + where + usize: cast::From<A, Output = Result<usize, cast::Error>>, + { + let _0 = A::cast(0); + let _1 = A::cast(1); + let _50 = A::cast(50); + + assert!(confidence_level > _0 && confidence_level < _1); + + let percentiles = self.percentiles(); + + // FIXME(privacy) this should use the `at_unchecked()` method + ( + percentiles.at(_50 * (_1 - confidence_level)), + percentiles.at(_50 * (_1 + confidence_level)), + ) + } + + /// Computes the "likelihood" of seeing the value `t` or "more extreme" values in the + /// distribution. + pub fn p_value(&self, t: A, tails: &Tails) -> A { + use std::cmp; + + let n = self.0.len(); + let hits = self.0.iter().filter(|&&x| x < t).count(); + + let tails = A::cast(match *tails { + Tails::One => 1, + Tails::Two => 2, + }); + + A::cast(cmp::min(hits, n - hits)) / A::cast(n) * tails + } +} + +impl<A> Deref for Distribution<A> { + type Target = Sample<A>; + + fn deref(&self) -> &Sample<A> { + let slice: &[_] = &self.0; + + unsafe { mem::transmute(slice) } + } +} + +/// Number of tails for significance testing +pub enum Tails { + /// One tailed test + One, + /// Two tailed test + Two, +} + +fn dot<A>(xs: &[A], ys: &[A]) -> A +where + A: Float, +{ + xs.iter() + .zip(ys) + .fold(A::cast(0), |acc, (&x, &y)| acc + x * y) +} + +fn sum<A>(xs: &[A]) -> A +where + A: Float, +{ + use std::ops::Add; + + xs.iter().cloned().fold(A::cast(0), Add::add) +} diff --git a/vendor/criterion/src/stats/rand_util.rs b/vendor/criterion/src/stats/rand_util.rs new file mode 100755 index 000000000..ed374cf98 --- /dev/null +++ b/vendor/criterion/src/stats/rand_util.rs @@ -0,0 +1,21 @@ +use oorandom::Rand64; +use std::cell::RefCell; +use std::time::{SystemTime, UNIX_EPOCH}; + +pub type Rng = Rand64; + +thread_local! { + static SEED_RAND: RefCell<Rand64> = RefCell::new(Rand64::new( + SystemTime::now().duration_since(UNIX_EPOCH) + .expect("Time went backwards") + .as_millis() + )); +} + +pub fn new_rng() -> Rng { + SEED_RAND.with(|r| { + let mut r = r.borrow_mut(); + let seed = ((r.rand_u64() as u128) << 64) | (r.rand_u64() as u128); + Rand64::new(seed) + }) +} diff --git a/vendor/criterion/src/stats/test.rs b/vendor/criterion/src/stats/test.rs new file mode 100755 index 000000000..9e13f3084 --- /dev/null +++ b/vendor/criterion/src/stats/test.rs @@ -0,0 +1,16 @@ +use rand::distributions::{Distribution, Standard}; +use rand::prelude::*; +use rand::rngs::StdRng; + +pub fn vec<T>(size: usize, start: usize) -> Option<Vec<T>> +where + Standard: Distribution<T>, +{ + if size > start + 2 { + let mut rng = StdRng::from_entropy(); + + Some((0..size).map(|_| rng.gen()).collect()) + } else { + None + } +} diff --git a/vendor/criterion/src/stats/tuple.rs b/vendor/criterion/src/stats/tuple.rs new file mode 100755 index 000000000..1c075159e --- /dev/null +++ b/vendor/criterion/src/stats/tuple.rs @@ -0,0 +1,253 @@ +//! Helper traits for tupling/untupling + +use crate::stats::Distribution; + +/// Any tuple: `(A, B, ..)` +pub trait Tuple: Sized { + /// A tuple of distributions associated with this tuple + type Distributions: TupledDistributions<Item = Self>; + + /// A tuple of vectors associated with this tuple + type Builder: TupledDistributionsBuilder<Item = Self>; +} + +/// A tuple of distributions: `(Distribution<A>, Distribution<B>, ..)` +pub trait TupledDistributions: Sized { + /// A tuple that can be pushed/inserted into the tupled distributions + type Item: Tuple<Distributions = Self>; +} + +/// A tuple of vecs used to build distributions. +pub trait TupledDistributionsBuilder: Sized { + /// A tuple that can be pushed/inserted into the tupled distributions + type Item: Tuple<Builder = Self>; + + /// Creates a new tuple of vecs + fn new(size: usize) -> Self; + + /// Push one element into each of the vecs + fn push(&mut self, tuple: Self::Item); + + /// Append one tuple of vecs to this one, leaving the vecs in the other tuple empty + fn extend(&mut self, other: &mut Self); + + /// Convert the tuple of vectors into a tuple of distributions + fn complete(self) -> <Self::Item as Tuple>::Distributions; +} + +impl<A> Tuple for (A,) +where + A: Copy, +{ + type Distributions = (Distribution<A>,); + type Builder = (Vec<A>,); +} + +impl<A> TupledDistributions for (Distribution<A>,) +where + A: Copy, +{ + type Item = (A,); +} +impl<A> TupledDistributionsBuilder for (Vec<A>,) +where + A: Copy, +{ + type Item = (A,); + + fn new(size: usize) -> (Vec<A>,) { + (Vec::with_capacity(size),) + } + + fn push(&mut self, tuple: (A,)) { + (self.0).push(tuple.0); + } + + fn extend(&mut self, other: &mut (Vec<A>,)) { + (self.0).append(&mut other.0); + } + + fn complete(self) -> (Distribution<A>,) { + (Distribution(self.0.into_boxed_slice()),) + } +} + +impl<A, B> Tuple for (A, B) +where + A: Copy, + B: Copy, +{ + type Distributions = (Distribution<A>, Distribution<B>); + type Builder = (Vec<A>, Vec<B>); +} + +impl<A, B> TupledDistributions for (Distribution<A>, Distribution<B>) +where + A: Copy, + B: Copy, +{ + type Item = (A, B); +} +impl<A, B> TupledDistributionsBuilder for (Vec<A>, Vec<B>) +where + A: Copy, + B: Copy, +{ + type Item = (A, B); + + fn new(size: usize) -> (Vec<A>, Vec<B>) { + (Vec::with_capacity(size), Vec::with_capacity(size)) + } + + fn push(&mut self, tuple: (A, B)) { + (self.0).push(tuple.0); + (self.1).push(tuple.1); + } + + fn extend(&mut self, other: &mut (Vec<A>, Vec<B>)) { + (self.0).append(&mut other.0); + (self.1).append(&mut other.1); + } + + fn complete(self) -> (Distribution<A>, Distribution<B>) { + ( + Distribution(self.0.into_boxed_slice()), + Distribution(self.1.into_boxed_slice()), + ) + } +} + +impl<A, B, C> Tuple for (A, B, C) +where + A: Copy, + B: Copy, + C: Copy, +{ + type Distributions = (Distribution<A>, Distribution<B>, Distribution<C>); + type Builder = (Vec<A>, Vec<B>, Vec<C>); +} + +impl<A, B, C> TupledDistributions for (Distribution<A>, Distribution<B>, Distribution<C>) +where + A: Copy, + B: Copy, + C: Copy, +{ + type Item = (A, B, C); +} +impl<A, B, C> TupledDistributionsBuilder for (Vec<A>, Vec<B>, Vec<C>) +where + A: Copy, + B: Copy, + C: Copy, +{ + type Item = (A, B, C); + + fn new(size: usize) -> (Vec<A>, Vec<B>, Vec<C>) { + ( + Vec::with_capacity(size), + Vec::with_capacity(size), + Vec::with_capacity(size), + ) + } + + fn push(&mut self, tuple: (A, B, C)) { + (self.0).push(tuple.0); + (self.1).push(tuple.1); + (self.2).push(tuple.2); + } + + fn extend(&mut self, other: &mut (Vec<A>, Vec<B>, Vec<C>)) { + (self.0).append(&mut other.0); + (self.1).append(&mut other.1); + (self.2).append(&mut other.2); + } + + fn complete(self) -> (Distribution<A>, Distribution<B>, Distribution<C>) { + ( + Distribution(self.0.into_boxed_slice()), + Distribution(self.1.into_boxed_slice()), + Distribution(self.2.into_boxed_slice()), + ) + } +} + +impl<A, B, C, D> Tuple for (A, B, C, D) +where + A: Copy, + B: Copy, + C: Copy, + D: Copy, +{ + type Distributions = ( + Distribution<A>, + Distribution<B>, + Distribution<C>, + Distribution<D>, + ); + type Builder = (Vec<A>, Vec<B>, Vec<C>, Vec<D>); +} + +impl<A, B, C, D> TupledDistributions + for ( + Distribution<A>, + Distribution<B>, + Distribution<C>, + Distribution<D>, + ) +where + A: Copy, + B: Copy, + C: Copy, + D: Copy, +{ + type Item = (A, B, C, D); +} +impl<A, B, C, D> TupledDistributionsBuilder for (Vec<A>, Vec<B>, Vec<C>, Vec<D>) +where + A: Copy, + B: Copy, + C: Copy, + D: Copy, +{ + type Item = (A, B, C, D); + + fn new(size: usize) -> (Vec<A>, Vec<B>, Vec<C>, Vec<D>) { + ( + Vec::with_capacity(size), + Vec::with_capacity(size), + Vec::with_capacity(size), + Vec::with_capacity(size), + ) + } + + fn push(&mut self, tuple: (A, B, C, D)) { + (self.0).push(tuple.0); + (self.1).push(tuple.1); + (self.2).push(tuple.2); + (self.3).push(tuple.3); + } + + fn extend(&mut self, other: &mut (Vec<A>, Vec<B>, Vec<C>, Vec<D>)) { + (self.0).append(&mut other.0); + (self.1).append(&mut other.1); + (self.2).append(&mut other.2); + (self.3).append(&mut other.3); + } + + fn complete( + self, + ) -> ( + Distribution<A>, + Distribution<B>, + Distribution<C>, + Distribution<D>, + ) { + ( + Distribution(self.0.into_boxed_slice()), + Distribution(self.1.into_boxed_slice()), + Distribution(self.2.into_boxed_slice()), + Distribution(self.3.into_boxed_slice()), + ) + } +} diff --git a/vendor/criterion/src/stats/univariate/bootstrap.rs b/vendor/criterion/src/stats/univariate/bootstrap.rs new file mode 100755 index 000000000..21c914011 --- /dev/null +++ b/vendor/criterion/src/stats/univariate/bootstrap.rs @@ -0,0 +1,177 @@ +#[cfg(test)] +macro_rules! test { + ($ty:ident) => { + mod $ty { + use approx::relative_eq; + use quickcheck::quickcheck; + use quickcheck::TestResult; + + use crate::stats::univariate::{Sample, mixed, self}; + + quickcheck!{ + fn mean(size: u8, start: u8, nresamples: u8) -> TestResult { + let size = size as usize; + let start = start as usize; + let nresamples = nresamples as usize; + if let Some(v) = crate::stats::test::vec::<$ty>(size, start) { + let sample = Sample::new(&v[start..]); + + let means = if nresamples > 0 { + sample.bootstrap(nresamples, |s| (s.mean(),)).0 + } else { + return TestResult::discard(); + }; + + let min = sample.min(); + let max = sample.max(); + + TestResult::from_bool( + // Computed the correct number of resamples + means.len() == nresamples && + // No uninitialized values + means.iter().all(|&x| { + (x > min || relative_eq!(x, min)) && + (x < max || relative_eq!(x, max)) + }) + ) + } else { + TestResult::discard() + } + } + } + + quickcheck!{ + fn mean_median(size: u8, start: u8, nresamples: u8) -> TestResult { + let size = size as usize; + let start = start as usize; + let nresamples = nresamples as usize; + if let Some(v) = crate::stats::test::vec::<$ty>(size, start) { + let sample = Sample::new(&v[start..]); + + let (means, medians) = if nresamples > 0 { + sample.bootstrap(nresamples, |s| (s.mean(), s.median())) + } else { + return TestResult::discard(); + }; + + let min = sample.min(); + let max = sample.max(); + + TestResult::from_bool( + // Computed the correct number of resamples + means.len() == nresamples && + medians.len() == nresamples && + // No uninitialized values + means.iter().all(|&x| { + (x > min || relative_eq!(x, min)) && + (x < max || relative_eq!(x, max)) + }) && + medians.iter().all(|&x| { + (x > min || relative_eq!(x, min)) && + (x < max || relative_eq!(x, max)) + }) + ) + } else { + TestResult::discard() + } + } + } + + quickcheck!{ + fn mixed_two_sample( + a_size: u8, a_start: u8, + b_size: u8, b_start: u8, + nresamples: u8 + ) -> TestResult { + let a_size = a_size as usize; + let b_size = b_size as usize; + let a_start = a_start as usize; + let b_start = b_start as usize; + let nresamples = nresamples as usize; + if let (Some(a), Some(b)) = + (crate::stats::test::vec::<$ty>(a_size, a_start), crate::stats::test::vec::<$ty>(b_size, b_start)) + { + let a = Sample::new(&a); + let b = Sample::new(&b); + + let distribution = if nresamples > 0 { + mixed::bootstrap(a, b, nresamples, |a, b| (a.mean() - b.mean(),)).0 + } else { + return TestResult::discard(); + }; + + let min = <$ty>::min(a.min() - b.max(), b.min() - a.max()); + let max = <$ty>::max(a.max() - b.min(), b.max() - a.min()); + + TestResult::from_bool( + // Computed the correct number of resamples + distribution.len() == nresamples && + // No uninitialized values + distribution.iter().all(|&x| { + (x > min || relative_eq!(x, min)) && + (x < max || relative_eq!(x, max)) + }) + ) + } else { + TestResult::discard() + } + } + } + + quickcheck!{ + fn two_sample( + a_size: u8, a_start: u8, + b_size: u8, b_start: u8, + nresamples: u8 + ) -> TestResult { + let a_size = a_size as usize; + let b_size = b_size as usize; + let a_start = a_start as usize; + let b_start = b_start as usize; + let nresamples = nresamples as usize; + if let (Some(a), Some(b)) = + (crate::stats::test::vec::<$ty>(a_size, a_start), crate::stats::test::vec::<$ty>(b_size, b_start)) + { + let a = Sample::new(&a[a_start..]); + let b = Sample::new(&b[b_start..]); + + let distribution = if nresamples > 0 { + univariate::bootstrap(a, b, nresamples, |a, b| (a.mean() - b.mean(),)).0 + } else { + return TestResult::discard(); + }; + + let min = <$ty>::min(a.min() - b.max(), b.min() - a.max()); + let max = <$ty>::max(a.max() - b.min(), b.max() - a.min()); + + // Computed the correct number of resamples + let pass = distribution.len() == nresamples && + // No uninitialized values + distribution.iter().all(|&x| { + (x > min || relative_eq!(x, min)) && + (x < max || relative_eq!(x, max)) + }); + + if !pass { + println!("A: {:?} (len={})", a.as_ref(), a.len()); + println!("B: {:?} (len={})", b.as_ref(), b.len()); + println!("Dist: {:?} (len={})", distribution.as_ref(), distribution.len()); + println!("Min: {}, Max: {}, nresamples: {}", + min, max, nresamples); + } + + TestResult::from_bool(pass) + } else { + TestResult::discard() + } + } + } + } + } +} + +#[cfg(test)] +mod test { + test!(f32); + test!(f64); +} diff --git a/vendor/criterion/src/stats/univariate/kde/kernel.rs b/vendor/criterion/src/stats/univariate/kde/kernel.rs new file mode 100755 index 000000000..c3d0ff513 --- /dev/null +++ b/vendor/criterion/src/stats/univariate/kde/kernel.rs @@ -0,0 +1,84 @@ +//! Kernels + +use crate::stats::float::Float; + +/// Kernel function +pub trait Kernel<A>: Copy + Sync +where + A: Float, +{ + /// Apply the kernel function to the given x-value. + fn evaluate(&self, x: A) -> A; +} + +/// Gaussian kernel +#[derive(Clone, Copy)] +pub struct Gaussian; + +impl<A> Kernel<A> for Gaussian +where + A: Float, +{ + fn evaluate(&self, x: A) -> A { + use std::f32::consts::PI; + + (x.powi(2).exp() * A::cast(2. * PI)).sqrt().recip() + } +} + +#[cfg(test)] +macro_rules! test { + ($ty:ident) => { + mod $ty { + mod gaussian { + use approx::relative_eq; + use quickcheck::quickcheck; + use quickcheck::TestResult; + + use crate::stats::univariate::kde::kernel::{Gaussian, Kernel}; + + quickcheck! { + fn symmetric(x: $ty) -> bool { + x.is_nan() || relative_eq!(Gaussian.evaluate(-x), Gaussian.evaluate(x)) + } + } + + // Any [a b] integral should be in the range [0 1] + quickcheck! { + fn integral(a: $ty, b: $ty) -> TestResult { + let a = a.sin().abs(); // map the value to [0 1] + let b = b.sin().abs(); // map the value to [0 1] + const DX: $ty = 1e-3; + + if a > b { + TestResult::discard() + } else { + let mut acc = 0.; + let mut x = a; + let mut y = Gaussian.evaluate(a); + + while x < b { + acc += DX * y / 2.; + + x += DX; + y = Gaussian.evaluate(x); + + acc += DX * y / 2.; + } + + TestResult::from_bool( + (acc > 0. || relative_eq!(acc, 0.)) && + (acc < 1. || relative_eq!(acc, 1.))) + } + } + } + } + } + }; +} + +#[cfg(test)] +mod test { + test!(f32); + test!(f64); +} diff --git a/vendor/criterion/src/stats/univariate/kde/mod.rs b/vendor/criterion/src/stats/univariate/kde/mod.rs new file mode 100755 index 000000000..9b0836d74 --- /dev/null +++ b/vendor/criterion/src/stats/univariate/kde/mod.rs @@ -0,0 +1,142 @@ +//! Kernel density estimation + +pub mod kernel; + +use self::kernel::Kernel; +use crate::stats::float::Float; +use crate::stats::univariate::Sample; +use rayon::prelude::*; + +/// Univariate kernel density estimator +pub struct Kde<'a, A, K> +where + A: Float, + K: Kernel<A>, +{ + bandwidth: A, + kernel: K, + sample: &'a Sample<A>, +} + +impl<'a, A, K> Kde<'a, A, K> +where + A: 'a + Float, + K: Kernel<A>, +{ + /// Creates a new kernel density estimator from the `sample`, using a kernel and estimating + /// the bandwidth using the method `bw` + pub fn new(sample: &'a Sample<A>, kernel: K, bw: Bandwidth) -> Kde<'a, A, K> { + Kde { + bandwidth: bw.estimate(sample), + kernel, + sample, + } + } + + /// Returns the bandwidth used by the estimator + pub fn bandwidth(&self) -> A { + self.bandwidth + } + + /// Maps the KDE over `xs` + /// + /// - Multihreaded + pub fn map(&self, xs: &[A]) -> Box<[A]> { + xs.par_iter() + .map(|&x| self.estimate(x)) + .collect::<Vec<_>>() + .into_boxed_slice() + } + + /// Estimates the probability density of `x` + pub fn estimate(&self, x: A) -> A { + let _0 = A::cast(0); + let slice = self.sample; + let h = self.bandwidth; + let n = A::cast(slice.len()); + + let sum = slice + .iter() + .fold(_0, |acc, &x_i| acc + self.kernel.evaluate((x - x_i) / h)); + + sum / (h * n) + } +} + +/// Method to estimate the bandwidth +pub enum Bandwidth { + /// Use Silverman's rule of thumb to estimate the bandwidth from the sample + Silverman, +} + +impl Bandwidth { + fn estimate<A: Float>(self, sample: &Sample<A>) -> A { + match self { + Bandwidth::Silverman => { + let factor = A::cast(4. / 3.); + let exponent = A::cast(1. / 5.); + let n = A::cast(sample.len()); + let sigma = sample.std_dev(None); + + sigma * (factor / n).powf(exponent) + } + } + } +} + +#[cfg(test)] +macro_rules! test { + ($ty:ident) => { + mod $ty { + use approx::relative_eq; + use quickcheck::quickcheck; + use quickcheck::TestResult; + + use crate::stats::univariate::kde::kernel::Gaussian; + use crate::stats::univariate::kde::{Bandwidth, Kde}; + use crate::stats::univariate::Sample; + + // The [-inf inf] integral of the estimated PDF should be one + quickcheck! { + fn integral(size: u8, start: u8) -> TestResult { + let size = size as usize; + let start = start as usize; + const DX: $ty = 1e-3; + + if let Some(v) = crate::stats::test::vec::<$ty>(size, start) { + let slice = &v[start..]; + let data = Sample::new(slice); + let kde = Kde::new(data, Gaussian, Bandwidth::Silverman); + let h = kde.bandwidth(); + // NB Obviously a [-inf inf] integral is not feasible, but this range works + // quite well + let (a, b) = (data.min() - 5. * h, data.max() + 5. * h); + + let mut acc = 0.; + let mut x = a; + let mut y = kde.estimate(a); + + while x < b { + acc += DX * y / 2.; + + x += DX; + y = kde.estimate(x); + + acc += DX * y / 2.; + } + + TestResult::from_bool(relative_eq!(acc, 1., epsilon = 2e-5)) + } else { + TestResult::discard() + } + } + } + } + }; +} + +#[cfg(test)] +mod test { + test!(f32); + test!(f64); +} diff --git a/vendor/criterion/src/stats/univariate/mixed.rs b/vendor/criterion/src/stats/univariate/mixed.rs new file mode 100755 index 000000000..5c0a59fac --- /dev/null +++ b/vendor/criterion/src/stats/univariate/mixed.rs @@ -0,0 +1,57 @@ +//! Mixed bootstrap + +use crate::stats::float::Float; +use crate::stats::tuple::{Tuple, TupledDistributionsBuilder}; +use crate::stats::univariate::Resamples; +use crate::stats::univariate::Sample; +use rayon::prelude::*; + +/// Performs a *mixed* two-sample bootstrap +pub fn bootstrap<A, T, S>( + a: &Sample<A>, + b: &Sample<A>, + nresamples: usize, + statistic: S, +) -> T::Distributions +where + A: Float, + S: Fn(&Sample<A>, &Sample<A>) -> T + Sync, + T: Tuple + Send, + T::Distributions: Send, + T::Builder: Send, +{ + let n_a = a.len(); + let n_b = b.len(); + let mut c = Vec::with_capacity(n_a + n_b); + c.extend_from_slice(a); + c.extend_from_slice(b); + let c = Sample::new(&c); + + (0..nresamples) + .into_par_iter() + .map_init( + || Resamples::new(c), + |resamples, _| { + let resample = resamples.next(); + let a: &Sample<A> = Sample::new(&resample[..n_a]); + let b: &Sample<A> = Sample::new(&resample[n_a..]); + + statistic(a, b) + }, + ) + .fold( + || T::Builder::new(0), + |mut sub_distributions, sample| { + sub_distributions.push(sample); + sub_distributions + }, + ) + .reduce( + || T::Builder::new(0), + |mut a, mut b| { + a.extend(&mut b); + a + }, + ) + .complete() +} diff --git a/vendor/criterion/src/stats/univariate/mod.rs b/vendor/criterion/src/stats/univariate/mod.rs new file mode 100755 index 000000000..8dfb5f8a9 --- /dev/null +++ b/vendor/criterion/src/stats/univariate/mod.rs @@ -0,0 +1,72 @@ +//! Univariate analysis + +mod bootstrap; +mod percentiles; +mod resamples; +mod sample; + +pub mod kde; +pub mod mixed; +pub mod outliers; + +use crate::stats::float::Float; +use crate::stats::tuple::{Tuple, TupledDistributionsBuilder}; +use rayon::prelude::*; +use std::cmp; + +use self::resamples::Resamples; + +pub use self::percentiles::Percentiles; +pub use self::sample::Sample; + +/// Performs a two-sample bootstrap +/// +/// - Multithreaded +/// - Time: `O(nresamples)` +/// - Memory: `O(nresamples)` +#[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_lossless))] +pub fn bootstrap<A, B, T, S>( + a: &Sample<A>, + b: &Sample<B>, + nresamples: usize, + statistic: S, +) -> T::Distributions +where + A: Float, + B: Float, + S: Fn(&Sample<A>, &Sample<B>) -> T + Sync, + T: Tuple + Send, + T::Distributions: Send, + T::Builder: Send, +{ + let nresamples_sqrt = (nresamples as f64).sqrt().ceil() as usize; + let per_chunk = (nresamples + nresamples_sqrt - 1) / nresamples_sqrt; + + (0..nresamples_sqrt) + .into_par_iter() + .map_init( + || (Resamples::new(a), Resamples::new(b)), + |(a_resamples, b_resamples), i| { + let start = i * per_chunk; + let end = cmp::min((i + 1) * per_chunk, nresamples); + let a_resample = a_resamples.next(); + + let mut sub_distributions: T::Builder = + TupledDistributionsBuilder::new(end - start); + + for _ in start..end { + let b_resample = b_resamples.next(); + sub_distributions.push(statistic(a_resample, b_resample)); + } + sub_distributions + }, + ) + .reduce( + || T::Builder::new(0), + |mut a, mut b| { + a.extend(&mut b); + a + }, + ) + .complete() +} diff --git a/vendor/criterion/src/stats/univariate/outliers/mod.rs b/vendor/criterion/src/stats/univariate/outliers/mod.rs new file mode 100755 index 000000000..b8ed7c744 --- /dev/null +++ b/vendor/criterion/src/stats/univariate/outliers/mod.rs @@ -0,0 +1,7 @@ +//! Classification of outliers +//! +//! WARNING: There's no formal/mathematical definition of what an outlier actually is. Therefore, +//! all outlier classifiers are *subjective*, however some classifiers that have become *de facto* +//! standard are provided here. + +pub mod tukey; diff --git a/vendor/criterion/src/stats/univariate/outliers/tukey.rs b/vendor/criterion/src/stats/univariate/outliers/tukey.rs new file mode 100755 index 000000000..70713ac57 --- /dev/null +++ b/vendor/criterion/src/stats/univariate/outliers/tukey.rs @@ -0,0 +1,276 @@ +//! Tukey's method +//! +//! The original method uses two "fences" to classify the data. All the observations "inside" the +//! fences are considered "normal", and the rest are considered outliers. +//! +//! The fences are computed from the quartiles of the sample, according to the following formula: +//! +//! ``` ignore +//! // q1, q3 are the first and third quartiles +//! let iqr = q3 - q1; // The interquartile range +//! let (f1, f2) = (q1 - 1.5 * iqr, q3 + 1.5 * iqr); // the "fences" +//! +//! let is_outlier = |x| if x > f1 && x < f2 { true } else { false }; +//! ``` +//! +//! The classifier provided here adds two extra outer fences: +//! +//! ``` ignore +//! let (f3, f4) = (q1 - 3 * iqr, q3 + 3 * iqr); // the outer "fences" +//! ``` +//! +//! The extra fences add a sense of "severity" to the classification. Data points outside of the +//! outer fences are considered "severe" outliers, whereas points outside the inner fences are just +//! "mild" outliers, and, as the original method, everything inside the inner fences is considered +//! "normal" data. +//! +//! Some ASCII art for the visually oriented people: +//! +//! ``` ignore +//! LOW-ish NORMAL-ish HIGH-ish +//! x | + | o o o o o o o | + | x +//! f3 f1 f2 f4 +//! +//! Legend: +//! o: "normal" data (not an outlier) +//! +: "mild" outlier +//! x: "severe" outlier +//! ``` + +use std::iter::IntoIterator; +use std::ops::{Deref, Index}; +use std::slice; + +use crate::stats::float::Float; +use crate::stats::univariate::Sample; + +use self::Label::*; + +/// A classified/labeled sample. +/// +/// The labeled data can be accessed using the indexing operator. The order of the data points is +/// retained. +/// +/// NOTE: Due to limitations in the indexing traits, only the label is returned. Once the +/// `IndexGet` trait lands in stdlib, the indexing operation will return a `(data_point, label)` +/// pair. +#[derive(Clone, Copy)] +pub struct LabeledSample<'a, A> +where + A: Float, +{ + fences: (A, A, A, A), + sample: &'a Sample<A>, +} + +impl<'a, A> LabeledSample<'a, A> +where + A: Float, +{ + /// Returns the number of data points per label + /// + /// - Time: `O(length)` + #[cfg_attr(feature = "cargo-clippy", allow(clippy::similar_names))] + pub fn count(&self) -> (usize, usize, usize, usize, usize) { + let (mut los, mut lom, mut noa, mut him, mut his) = (0, 0, 0, 0, 0); + + for (_, label) in self { + match label { + LowSevere => { + los += 1; + } + LowMild => { + lom += 1; + } + NotAnOutlier => { + noa += 1; + } + HighMild => { + him += 1; + } + HighSevere => { + his += 1; + } + } + } + + (los, lom, noa, him, his) + } + + /// Returns the fences used to classify the outliers + pub fn fences(&self) -> (A, A, A, A) { + self.fences + } + + /// Returns an iterator over the labeled data + pub fn iter(&self) -> Iter<'a, A> { + Iter { + fences: self.fences, + iter: self.sample.iter(), + } + } +} + +impl<'a, A> Deref for LabeledSample<'a, A> +where + A: Float, +{ + type Target = Sample<A>; + + fn deref(&self) -> &Sample<A> { + self.sample + } +} + +// FIXME Use the `IndexGet` trait +impl<'a, A> Index<usize> for LabeledSample<'a, A> +where + A: Float, +{ + type Output = Label; + + #[cfg_attr(feature = "cargo-clippy", allow(clippy::similar_names))] + fn index(&self, i: usize) -> &Label { + static LOW_SEVERE: Label = LowSevere; + static LOW_MILD: Label = LowMild; + static HIGH_MILD: Label = HighMild; + static HIGH_SEVERE: Label = HighSevere; + static NOT_AN_OUTLIER: Label = NotAnOutlier; + + let x = self.sample[i]; + let (lost, lomt, himt, hist) = self.fences; + + if x < lost { + &LOW_SEVERE + } else if x > hist { + &HIGH_SEVERE + } else if x < lomt { + &LOW_MILD + } else if x > himt { + &HIGH_MILD + } else { + &NOT_AN_OUTLIER + } + } +} + +impl<'a, 'b, A> IntoIterator for &'b LabeledSample<'a, A> +where + A: Float, +{ + type Item = (A, Label); + type IntoIter = Iter<'a, A>; + + fn into_iter(self) -> Iter<'a, A> { + self.iter() + } +} + +/// Iterator over the labeled data +pub struct Iter<'a, A> +where + A: Float, +{ + fences: (A, A, A, A), + iter: slice::Iter<'a, A>, +} + +impl<'a, A> Iterator for Iter<'a, A> +where + A: Float, +{ + type Item = (A, Label); + + #[cfg_attr(feature = "cargo-clippy", allow(clippy::similar_names))] + fn next(&mut self) -> Option<(A, Label)> { + self.iter.next().map(|&x| { + let (lost, lomt, himt, hist) = self.fences; + + let label = if x < lost { + LowSevere + } else if x > hist { + HighSevere + } else if x < lomt { + LowMild + } else if x > himt { + HighMild + } else { + NotAnOutlier + }; + + (x, label) + }) + } + + fn size_hint(&self) -> (usize, Option<usize>) { + self.iter.size_hint() + } +} + +/// Labels used to classify outliers +pub enum Label { + /// A "mild" outlier in the "high" spectrum + HighMild, + /// A "severe" outlier in the "high" spectrum + HighSevere, + /// A "mild" outlier in the "low" spectrum + LowMild, + /// A "severe" outlier in the "low" spectrum + LowSevere, + /// A normal data point + NotAnOutlier, +} + +impl Label { + /// Checks if the data point has an "unusually" high value + pub fn is_high(&self) -> bool { + matches!(*self, HighMild | HighSevere) + } + + /// Checks if the data point is labeled as a "mild" outlier + pub fn is_mild(&self) -> bool { + matches!(*self, HighMild | LowMild) + } + + /// Checks if the data point has an "unusually" low value + pub fn is_low(&self) -> bool { + matches!(*self, LowMild | LowSevere) + } + + /// Checks if the data point is labeled as an outlier + pub fn is_outlier(&self) -> bool { + matches!(*self, NotAnOutlier) + } + + /// Checks if the data point is labeled as a "severe" outlier + pub fn is_severe(&self) -> bool { + matches!(*self, HighSevere | LowSevere) + } +} + +/// Classifies the sample, and returns a labeled sample. +/// +/// - Time: `O(N log N) where N = length` +pub fn classify<A>(sample: &Sample<A>) -> LabeledSample<'_, A> +where + A: Float, + usize: cast::From<A, Output = Result<usize, cast::Error>>, +{ + let (q1, _, q3) = sample.percentiles().quartiles(); + let iqr = q3 - q1; + + // Mild + let k_m = A::cast(1.5_f32); + // Severe + let k_s = A::cast(3); + + LabeledSample { + fences: ( + q1 - k_s * iqr, + q1 - k_m * iqr, + q3 + k_m * iqr, + q3 + k_s * iqr, + ), + sample, + } +} diff --git a/vendor/criterion/src/stats/univariate/percentiles.rs b/vendor/criterion/src/stats/univariate/percentiles.rs new file mode 100755 index 000000000..be6bcf391 --- /dev/null +++ b/vendor/criterion/src/stats/univariate/percentiles.rs @@ -0,0 +1,80 @@ +use crate::stats::float::Float; +use cast::{self, usize}; + +/// A "view" into the percentiles of a sample +pub struct Percentiles<A>(Box<[A]>) +where + A: Float; + +// TODO(rust-lang/rfcs#735) move this `impl` into a private percentiles module +impl<A> Percentiles<A> +where + A: Float, + usize: cast::From<A, Output = Result<usize, cast::Error>>, +{ + /// Returns the percentile at `p`% + /// + /// Safety: + /// + /// - Make sure that `p` is in the range `[0, 100]` + unsafe fn at_unchecked(&self, p: A) -> A { + let _100 = A::cast(100); + debug_assert!(p >= A::cast(0) && p <= _100); + debug_assert!(self.0.len() > 0); + let len = self.0.len() - 1; + + if p == _100 { + self.0[len] + } else { + let rank = (p / _100) * A::cast(len); + let integer = rank.floor(); + let fraction = rank - integer; + let n = usize(integer).unwrap(); + let &floor = self.0.get_unchecked(n); + let &ceiling = self.0.get_unchecked(n + 1); + + floor + (ceiling - floor) * fraction + } + } + + /// Returns the percentile at `p`% + /// + /// # Panics + /// + /// Panics if `p` is outside the closed `[0, 100]` range + pub fn at(&self, p: A) -> A { + let _0 = A::cast(0); + let _100 = A::cast(100); + + assert!(p >= _0 && p <= _100); + assert!(self.0.len() > 0); + + unsafe { self.at_unchecked(p) } + } + + /// Returns the interquartile range + pub fn iqr(&self) -> A { + unsafe { + let q1 = self.at_unchecked(A::cast(25)); + let q3 = self.at_unchecked(A::cast(75)); + + q3 - q1 + } + } + + /// Returns the 50th percentile + pub fn median(&self) -> A { + unsafe { self.at_unchecked(A::cast(50)) } + } + + /// Returns the 25th, 50th and 75th percentiles + pub fn quartiles(&self) -> (A, A, A) { + unsafe { + ( + self.at_unchecked(A::cast(25)), + self.at_unchecked(A::cast(50)), + self.at_unchecked(A::cast(75)), + ) + } + } +} diff --git a/vendor/criterion/src/stats/univariate/resamples.rs b/vendor/criterion/src/stats/univariate/resamples.rs new file mode 100755 index 000000000..923669d59 --- /dev/null +++ b/vendor/criterion/src/stats/univariate/resamples.rs @@ -0,0 +1,119 @@ +use std::mem; + +use crate::stats::float::Float; +use crate::stats::rand_util::{new_rng, Rng}; +use crate::stats::univariate::Sample; + +pub struct Resamples<'a, A> +where + A: Float, +{ + rng: Rng, + sample: &'a [A], + stage: Option<Vec<A>>, +} + +#[cfg_attr(feature = "cargo-clippy", allow(clippy::should_implement_trait))] +impl<'a, A> Resamples<'a, A> +where + A: 'a + Float, +{ + pub fn new(sample: &'a Sample<A>) -> Resamples<'a, A> { + let slice = sample; + + Resamples { + rng: new_rng(), + sample: slice, + stage: None, + } + } + + pub fn next(&mut self) -> &Sample<A> { + let n = self.sample.len(); + let rng = &mut self.rng; + + match self.stage { + None => { + let mut stage = Vec::with_capacity(n); + + for _ in 0..n { + let idx = rng.rand_range(0u64..(self.sample.len() as u64)); + stage.push(self.sample[idx as usize]) + } + + self.stage = Some(stage); + } + Some(ref mut stage) => { + for elem in stage.iter_mut() { + let idx = rng.rand_range(0u64..(self.sample.len() as u64)); + *elem = self.sample[idx as usize] + } + } + } + + if let Some(ref v) = self.stage { + unsafe { mem::transmute::<&[_], _>(v) } + } else { + unreachable!(); + } + } +} + +#[cfg(test)] +mod test { + use quickcheck::quickcheck; + use quickcheck::TestResult; + use std::collections::HashSet; + + use crate::stats::univariate::resamples::Resamples; + use crate::stats::univariate::Sample; + + // Check that the resample is a subset of the sample + quickcheck! { + fn subset(size: u8, nresamples: u8) -> TestResult { + let size = size as usize; + let nresamples = nresamples as usize; + if size > 1 { + let v: Vec<_> = (0..size).map(|i| i as f32).collect(); + let sample = Sample::new(&v); + let mut resamples = Resamples::new(sample); + let sample = v.iter().map(|&x| x as i64).collect::<HashSet<_>>(); + + TestResult::from_bool((0..nresamples).all(|_| { + let resample = resamples.next() + + .iter() + .map(|&x| x as i64) + .collect::<HashSet<_>>(); + + resample.is_subset(&sample) + })) + } else { + TestResult::discard() + } + } + } + + #[test] + fn different_subsets() { + let size = 1000; + let v: Vec<_> = (0..size).map(|i| i as f32).collect(); + let sample = Sample::new(&v); + let mut resamples = Resamples::new(sample); + + // Hypothetically, we might see one duplicate, but more than one is likely to be a bug. + let mut num_duplicated = 0; + for _ in 0..1000 { + let sample_1 = resamples.next().iter().cloned().collect::<Vec<_>>(); + let sample_2 = resamples.next().iter().cloned().collect::<Vec<_>>(); + + if sample_1 == sample_2 { + num_duplicated += 1; + } + } + + if num_duplicated > 1 { + panic!("Found {} duplicate samples", num_duplicated); + } + } +} diff --git a/vendor/criterion/src/stats/univariate/sample.rs b/vendor/criterion/src/stats/univariate/sample.rs new file mode 100755 index 000000000..8f10db7b1 --- /dev/null +++ b/vendor/criterion/src/stats/univariate/sample.rs @@ -0,0 +1,255 @@ +use std::{mem, ops}; + +use crate::stats::float::Float; +use crate::stats::tuple::{Tuple, TupledDistributionsBuilder}; +use crate::stats::univariate::Percentiles; +use crate::stats::univariate::Resamples; +use rayon::prelude::*; + +/// A collection of data points drawn from a population +/// +/// Invariants: +/// +/// - The sample contains at least 2 data points +/// - The sample contains no `NaN`s +pub struct Sample<A>([A]); + +// TODO(rust-lang/rfcs#735) move this `impl` into a private percentiles module +impl<A> Sample<A> +where + A: Float, +{ + /// Creates a new sample from an existing slice + /// + /// # Panics + /// + /// Panics if `slice` contains any `NaN` or if `slice` has less than two elements + #[cfg_attr(feature = "cargo-clippy", allow(clippy::new_ret_no_self))] + pub fn new(slice: &[A]) -> &Sample<A> { + assert!(slice.len() > 1 && slice.iter().all(|x| !x.is_nan())); + + unsafe { mem::transmute(slice) } + } + + /// Returns the biggest element in the sample + /// + /// - Time: `O(length)` + pub fn max(&self) -> A { + let mut elems = self.iter(); + + match elems.next() { + Some(&head) => elems.fold(head, |a, &b| a.max(b)), + // NB `unreachable!` because `Sample` is guaranteed to have at least one data point + None => unreachable!(), + } + } + + /// Returns the arithmetic average of the sample + /// + /// - Time: `O(length)` + pub fn mean(&self) -> A { + let n = self.len(); + + self.sum() / A::cast(n) + } + + /// Returns the median absolute deviation + /// + /// The `median` can be optionally passed along to speed up (2X) the computation + /// + /// - Time: `O(length)` + /// - Memory: `O(length)` + pub fn median_abs_dev(&self, median: Option<A>) -> A + where + usize: cast::From<A, Output = Result<usize, cast::Error>>, + { + let median = median.unwrap_or_else(|| self.percentiles().median()); + + // NB Although this operation can be SIMD accelerated, the gain is negligible because the + // bottle neck is the sorting operation which is part of the computation of the median + let abs_devs = self.iter().map(|&x| (x - median).abs()).collect::<Vec<_>>(); + + let abs_devs: &Self = Self::new(&abs_devs); + + abs_devs.percentiles().median() * A::cast(1.4826) + } + + /// Returns the median absolute deviation as a percentage of the median + /// + /// - Time: `O(length)` + /// - Memory: `O(length)` + pub fn median_abs_dev_pct(&self) -> A + where + usize: cast::From<A, Output = Result<usize, cast::Error>>, + { + let _100 = A::cast(100); + let median = self.percentiles().median(); + let mad = self.median_abs_dev(Some(median)); + + (mad / median) * _100 + } + + /// Returns the smallest element in the sample + /// + /// - Time: `O(length)` + pub fn min(&self) -> A { + let mut elems = self.iter(); + + match elems.next() { + Some(&elem) => elems.fold(elem, |a, &b| a.min(b)), + // NB `unreachable!` because `Sample` is guaranteed to have at least one data point + None => unreachable!(), + } + } + + /// Returns a "view" into the percentiles of the sample + /// + /// This "view" makes consecutive computations of percentiles much faster (`O(1)`) + /// + /// - Time: `O(N log N) where N = length` + /// - Memory: `O(length)` + pub fn percentiles(&self) -> Percentiles<A> + where + usize: cast::From<A, Output = Result<usize, cast::Error>>, + { + use std::cmp::Ordering; + + // NB This function assumes that there are no `NaN`s in the sample + fn cmp<T>(a: &T, b: &T) -> Ordering + where + T: PartialOrd, + { + match a.partial_cmp(b) { + Some(o) => o, + // Arbitrary way to handle NaNs that should never happen + None => Ordering::Equal, + } + } + + let mut v = self.to_vec().into_boxed_slice(); + v.par_sort_unstable_by(cmp); + + // NB :-1: to intra-crate privacy rules + unsafe { mem::transmute(v) } + } + + /// Returns the standard deviation of the sample + /// + /// The `mean` can be optionally passed along to speed up (2X) the computation + /// + /// - Time: `O(length)` + pub fn std_dev(&self, mean: Option<A>) -> A { + self.var(mean).sqrt() + } + + /// Returns the standard deviation as a percentage of the mean + /// + /// - Time: `O(length)` + pub fn std_dev_pct(&self) -> A { + let _100 = A::cast(100); + let mean = self.mean(); + let std_dev = self.std_dev(Some(mean)); + + (std_dev / mean) * _100 + } + + /// Returns the sum of all the elements of the sample + /// + /// - Time: `O(length)` + pub fn sum(&self) -> A { + crate::stats::sum(self) + } + + /// Returns the t score between these two samples + /// + /// - Time: `O(length)` + pub fn t(&self, other: &Sample<A>) -> A { + let (x_bar, y_bar) = (self.mean(), other.mean()); + let (s2_x, s2_y) = (self.var(Some(x_bar)), other.var(Some(y_bar))); + let n_x = A::cast(self.len()); + let n_y = A::cast(other.len()); + let num = x_bar - y_bar; + let den = (s2_x / n_x + s2_y / n_y).sqrt(); + + num / den + } + + /// Returns the variance of the sample + /// + /// The `mean` can be optionally passed along to speed up (2X) the computation + /// + /// - Time: `O(length)` + pub fn var(&self, mean: Option<A>) -> A { + use std::ops::Add; + + let mean = mean.unwrap_or_else(|| self.mean()); + let slice = self; + + let sum = slice + .iter() + .map(|&x| (x - mean).powi(2)) + .fold(A::cast(0), Add::add); + + sum / A::cast(slice.len() - 1) + } + + // TODO Remove the `T` parameter in favor of `S::Output` + /// Returns the bootstrap distributions of the parameters estimated by the 1-sample statistic + /// + /// - Multi-threaded + /// - Time: `O(nresamples)` + /// - Memory: `O(nresamples)` + pub fn bootstrap<T, S>(&self, nresamples: usize, statistic: S) -> T::Distributions + where + S: Fn(&Sample<A>) -> T + Sync, + T: Tuple + Send, + T::Distributions: Send, + T::Builder: Send, + { + (0..nresamples) + .into_par_iter() + .map_init( + || Resamples::new(self), + |resamples, _| statistic(resamples.next()), + ) + .fold( + || T::Builder::new(0), + |mut sub_distributions, sample| { + sub_distributions.push(sample); + sub_distributions + }, + ) + .reduce( + || T::Builder::new(0), + |mut a, mut b| { + a.extend(&mut b); + a + }, + ) + .complete() + } + + #[cfg(test)] + pub fn iqr(&self) -> A + where + usize: cast::From<A, Output = Result<usize, cast::Error>>, + { + self.percentiles().iqr() + } + + #[cfg(test)] + pub fn median(&self) -> A + where + usize: cast::From<A, Output = Result<usize, cast::Error>>, + { + self.percentiles().median() + } +} + +impl<A> ops::Deref for Sample<A> { + type Target = [A]; + + fn deref(&self) -> &[A] { + &self.0 + } +} |