// This Source Code Form is subject to the terms of the Mozilla Public // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. use std::collections::HashMap; use once_cell::sync::OnceCell; use serde::{Deserialize, Serialize}; use super::{Bucketing, Histogram}; use crate::util::floating_point_context::FloatingPointContext; /// Create the possible ranges in an exponential distribution from `min` to `max` with /// `bucket_count` buckets. /// /// This algorithm calculates the bucket sizes using a natural log approach to get `bucket_count` number of buckets, /// exponentially spaced between `min` and `max` /// /// Bucket limits are the minimal bucket value. /// That means values in a bucket `i` are `bucket[i] <= value < bucket[i+1]`. /// It will always contain an underflow bucket (`< 1`). fn exponential_range(min: u64, max: u64, bucket_count: usize) -> Vec { // Set the FPU control flag to the required state within this function let _fpc = FloatingPointContext::new(); let log_max = (max as f64).ln(); let mut ranges = Vec::with_capacity(bucket_count); let mut current = min; if current == 0 { current = 1; } // undeflow bucket ranges.push(0); ranges.push(current); for i in 2..bucket_count { let log_current = (current as f64).ln(); let log_ratio = (log_max - log_current) / (bucket_count - i) as f64; let log_next = log_current + log_ratio; let next_value = log_next.exp().round() as u64; current = if next_value > current { next_value } else { current + 1 }; ranges.push(current); } ranges } /// An exponential bucketing algorithm. /// /// Buckets are pre-computed at instantiation with an exponential distribution from `min` to `max` /// and `bucket_count` buckets. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] pub struct PrecomputedExponential { // Don't serialize the (potentially large) array of ranges, instead compute them on first // access. #[serde(skip)] bucket_ranges: OnceCell>, min: u64, max: u64, bucket_count: usize, } impl Bucketing for PrecomputedExponential { /// Get the bucket for the sample. /// /// This uses a binary search to locate the index `i` of the bucket such that: /// bucket[i] <= sample < bucket[i+1] fn sample_to_bucket_minimum(&self, sample: u64) -> u64 { let limit = match self.ranges().binary_search(&sample) { // Found an exact match to fit it in Ok(i) => i, // Sorted it fits after the bucket's limit, therefore it fits into the previous bucket Err(i) => i - 1, }; self.ranges()[limit] } fn ranges(&self) -> &[u64] { // Create the exponential range on first access. self.bucket_ranges .get_or_init(|| exponential_range(self.min, self.max, self.bucket_count)) } } impl Histogram { /// Creates a histogram with `count` exponential buckets in the range `min` to `max`. pub fn exponential( min: u64, max: u64, bucket_count: usize, ) -> Histogram { Histogram { values: HashMap::new(), count: 0, sum: 0, bucketing: PrecomputedExponential { bucket_ranges: OnceCell::new(), min, max, bucket_count, }, } } } #[cfg(test)] mod test { use super::*; const DEFAULT_BUCKET_COUNT: usize = 100; const DEFAULT_RANGE_MIN: u64 = 0; const DEFAULT_RANGE_MAX: u64 = 60_000; #[test] fn can_count() { let mut hist = Histogram::exponential(1, 500, 10); assert!(hist.is_empty()); for i in 1..=10 { hist.accumulate(i); } assert_eq!(10, hist.count()); assert_eq!(55, hist.sum()); } #[test] fn overflow_values_accumulate_in_the_last_bucket() { let mut hist = Histogram::exponential(DEFAULT_RANGE_MIN, DEFAULT_RANGE_MAX, DEFAULT_BUCKET_COUNT); hist.accumulate(DEFAULT_RANGE_MAX + 100); assert_eq!(1, hist.values[&DEFAULT_RANGE_MAX]); } #[test] fn short_exponential_buckets_are_correct() { let test_buckets = vec![0, 1, 2, 3, 5, 9, 16, 29, 54, 100]; assert_eq!(test_buckets, exponential_range(1, 100, 10)); // There's always a zero bucket, so we increase the lower limit. assert_eq!(test_buckets, exponential_range(0, 100, 10)); } #[test] fn default_exponential_buckets_are_correct() { // Hand calculated values using current default range 0 - 60000 and bucket count of 100. // NOTE: The final bucket, regardless of width, represents the overflow bucket to hold any // values beyond the maximum (in this case the maximum is 60000) let test_buckets = vec![ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 17, 19, 21, 23, 25, 28, 31, 34, 38, 42, 46, 51, 56, 62, 68, 75, 83, 92, 101, 111, 122, 135, 149, 164, 181, 200, 221, 244, 269, 297, 328, 362, 399, 440, 485, 535, 590, 651, 718, 792, 874, 964, 1064, 1174, 1295, 1429, 1577, 1740, 1920, 2118, 2337, 2579, 2846, 3140, 3464, 3822, 4217, 4653, 5134, 5665, 6250, 6896, 7609, 8395, 9262, 10219, 11275, 12440, 13726, 15144, 16709, 18436, 20341, 22443, 24762, 27321, 30144, 33259, 36696, 40488, 44672, 49288, 54381, 60000, ]; assert_eq!( test_buckets, exponential_range(DEFAULT_RANGE_MIN, DEFAULT_RANGE_MAX, DEFAULT_BUCKET_COUNT) ); } #[test] fn default_buckets_correctly_accumulate() { let mut hist = Histogram::exponential(DEFAULT_RANGE_MIN, DEFAULT_RANGE_MAX, DEFAULT_BUCKET_COUNT); for i in &[1, 10, 100, 1000, 10000] { hist.accumulate(*i); } assert_eq!(11111, hist.sum()); assert_eq!(5, hist.count()); assert_eq!(None, hist.values.get(&0)); // underflow is empty assert_eq!(1, hist.values[&1]); // bucket_ranges[1] = 1 assert_eq!(1, hist.values[&10]); // bucket_ranges[10] = 10 assert_eq!(1, hist.values[&92]); // bucket_ranges[33] = 92 assert_eq!(1, hist.values[&964]); // bucket_ranges[57] = 964 assert_eq!(1, hist.values[&9262]); // bucket_ranges[80] = 9262 } #[test] fn accumulate_large_numbers() { let mut hist = Histogram::exponential(1, 500, 10); hist.accumulate(u64::max_value()); hist.accumulate(u64::max_value()); assert_eq!(2, hist.count()); // Saturate before overflowing assert_eq!(u64::max_value(), hist.sum()); assert_eq!(2, hist.values[&500]); } }