diff options
Diffstat (limited to 'src/lib/stats-dist.c')
-rw-r--r-- | src/lib/stats-dist.c | 183 |
1 files changed, 183 insertions, 0 deletions
diff --git a/src/lib/stats-dist.c b/src/lib/stats-dist.c new file mode 100644 index 0000000..882bde1 --- /dev/null +++ b/src/lib/stats-dist.c @@ -0,0 +1,183 @@ +/* Copyright (c) 2015-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "stats-dist.h" +#include "sort.h" + +/* In order to have a vaguely accurate 95th percentile, you need way + more than 20 in your subsample. */ +#define TIMING_DEFAULT_SUBSAMPLING_BUFFER (20*24) /* 20*24 fits in a page */ + +struct stats_dist { + unsigned int sample_count; + unsigned int count; + bool sorted; + uint64_t min; + uint64_t max; + uint64_t sum; + uint64_t samples[]; +}; + +struct stats_dist *stats_dist_init(void) +{ + return stats_dist_init_with_size(TIMING_DEFAULT_SUBSAMPLING_BUFFER); +} + +struct stats_dist *stats_dist_init_with_size(unsigned int sample_count) +{ + i_assert(sample_count > 0); + + struct stats_dist *stats = + i_malloc(sizeof(struct stats_dist) + + sizeof(uint64_t) * sample_count); + stats->sample_count = sample_count; + return stats; +} + +void stats_dist_deinit(struct stats_dist **_stats) +{ + i_free_and_null(*_stats); +} + +void stats_dist_reset(struct stats_dist *stats) +{ + unsigned int sample_count = stats->sample_count; + i_zero(stats); + stats->sample_count = sample_count; +} + +void stats_dist_add(struct stats_dist *stats, uint64_t value) +{ + if (stats->count < stats->sample_count) { + stats->samples[stats->count] = value; + if (stats->count == 0) + stats->min = stats->max = value; + } else { + unsigned int idx = i_rand_limit(stats->count); + if (idx < stats->sample_count) + stats->samples[idx] = value; + } + + stats->count++; + stats->sum += value; + if (stats->max < value) + stats->max = value; + if (stats->min > value) + stats->min = value; + stats->sorted = FALSE; +} + +unsigned int stats_dist_get_count(const struct stats_dist *stats) +{ + return stats->count; +} + +uint64_t stats_dist_get_sum(const struct stats_dist *stats) +{ + return stats->sum; +} + +uint64_t stats_dist_get_min(const struct stats_dist *stats) +{ + return stats->min; +} + +uint64_t stats_dist_get_max(const struct stats_dist *stats) +{ + return stats->max; +} + +double stats_dist_get_avg(const struct stats_dist *stats) +{ + if (stats->count == 0) + return 0; + + return (double)stats->sum / stats->count; +} + +static void stats_dist_ensure_sorted(struct stats_dist *stats) +{ + if (stats->sorted) + return; + + unsigned int count = (stats->count < stats->sample_count) + ? stats->count + : stats->sample_count; + i_qsort(stats->samples, count, sizeof(*stats->samples), + uint64_cmp); + stats->sorted = TRUE; +} + +uint64_t stats_dist_get_median(struct stats_dist *stats) +{ + if (stats->count == 0) + return 0; + /* cast-away const - reading requires sorting */ + stats_dist_ensure_sorted(stats); + unsigned int count = (stats->count < stats->sample_count) + ? stats->count + : stats->sample_count; + unsigned int idx1 = (count-1)/2, idx2 = count/2; + return (stats->samples[idx1] + stats->samples[idx2]) / 2; +} + +double stats_dist_get_variance(const struct stats_dist *stats) +{ + double sum = 0; + if (stats->count == 0) + return 0; + + double avg = stats_dist_get_avg(stats); + double count = (stats->count < stats->sample_count) + ? stats->count + : stats->sample_count; + + for(unsigned int i = 0; i < count; i++) { + sum += (stats->samples[i] - avg)*(stats->samples[i] - avg); + } + + return sum / count; +} + +/* This is independent of the stats framework, useful for any selection task */ +static unsigned int stats_dist_get_index(unsigned int range, double fraction) +{ + /* With out of range fractions, we can give the caller what + they probably want rather than just crashing. */ + if (fraction >= 1.) + return range - 1; + if (fraction <= 0.) + return 0; + + double idx_float = range * fraction; + unsigned int idx = idx_float; /* C defaults to rounding down */ + idx_float -= idx; + /* Exact boundaries belong to the open range below them. + As FP isn't exact, and ratios may be specified inexactly, + include a small amount of fuzz around the exact boundary. */ + if (idx_float < 1e-8*range) + idx--; + + return idx; +} + +uint64_t stats_dist_get_percentile(struct stats_dist *stats, double fraction) +{ + if (stats->count == 0) + return 0; + stats_dist_ensure_sorted(stats); + unsigned int count = (stats->count < stats->sample_count) + ? stats->count + : stats->sample_count; + unsigned int idx = stats_dist_get_index(count, fraction); + return stats->samples[idx]; +} + +const uint64_t *stats_dist_get_samples(const struct stats_dist *stats, + unsigned int *count_r) +{ + *count_r = (stats->count < stats->sample_count) + ? stats->count + : stats->sample_count; + return stats->samples; +} |