/* * hg64 - 64-bit histograms * * Written by Tony Finch * * Copyright (C) Internet Systems Consortium, Inc. ("ISC") * * SPDX-License-Identifier: MPL-2.0 * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, you can obtain one at https://mozilla.org/MPL/2.0/. */ #include #include #include #include #include #include #include #include #include "hg64.h" /* number of bins is same as number of bits in a value */ #define BINS 64 typedef atomic_uint_fast64_t counter; typedef _Atomic(counter*) bin_ptr; struct hg64 { unsigned sigbits; bin_ptr bin[BINS]; }; static inline counter* get_bin(hg64* hg, unsigned b) { /* key_to_new_counter() below has the matching store / release */ return (atomic_load_explicit(&hg->bin[b], memory_order_acquire)); } /* * when we only care about the histogram precision */ struct hg64p { unsigned sigbits; }; #ifdef __has_attribute #if __has_attribute(__transparent_union__) #define TRANSPARENT __attribute__((__transparent_union__)) #endif #endif #ifdef TRANSPARENT typedef union hg64u { hg64* hg; const struct hg64p* hp; } hg64u TRANSPARENT; #define hg64p(hu) ((hu).hp) #else typedef void* hg64u; #define hg64p(hu) ((const struct hg64p*)(hu)) #endif /* * The bins arrays have a static size for simplicity, but that means We * waste a little extra space that could be saved by omitting the * exponents that land in the denormal number bin. The following macros * calculate (at run time) the exact number of keys when we need to do * accurate bounds checks. */ #define DENORMALS(hp) ((hp)->sigbits - 1) #define EXPONENTS(hp) (BINS - DENORMALS(hp)) #define MANTISSAS(hp) (1 << (hp)->sigbits) #define KEYS(hp) (EXPONENTS(hp) * MANTISSAS(hp)) #define BINSIZE(hp) MANTISSAS(hp) /**********************************************************************/ #define OUTARG(ptr, val) (void)(((ptr) != NULL) && (bool)(*(ptr) = (val))) /**********************************************************************/ hg64* hg64_create(unsigned sigbits) { if (sigbits < 1 || 15 < sigbits) { return (NULL); } hg64* hg = malloc(sizeof(*hg)); hg->sigbits = sigbits; /* * it is probably portable to zero-initialize atomics but the * C standard says we shouldn't rely on it; but this loop * should optimize to memset() on most target systems */ for (unsigned b = 0; b < BINS; b++) { atomic_init(&hg->bin[b], NULL); } return (hg); } void hg64_destroy(hg64* hg) { for (unsigned b = 0; b < BINS; b++) { free(get_bin(hg, b)); } *hg = (hg64) { 0 }; free(hg); } /**********************************************************************/ static inline uint64_t key_to_minval(hg64u hu, unsigned key) { unsigned binsize = BINSIZE(hg64p(hu)); unsigned exponent = (key / binsize) - 1; uint64_t mantissa = (key % binsize) + binsize; return (key < binsize ? key : mantissa << exponent); } /* * don't shift by 64, and don't underflow exponent; instead, * reduce shift by 1 for each hazard and pre-shift UINT64_MAX */ static inline uint64_t key_to_maxval(hg64u hu, unsigned key) { unsigned binsize = BINSIZE(hg64p(hu)); unsigned shift = 63 - (key / binsize); uint64_t range = UINT64_MAX / 4 >> shift; return (key_to_minval(hu, key) + range); } /* * This branchless conversion is due to Paul Khuong: see bin_down_of() in * https://pvk.ca/Blog/2015/06/27/linear-log-bucketing-fast-versatile-simple/ */ static inline unsigned value_to_key(hg64u hu, uint64_t value) { /* fast path */ const struct hg64p* hp = hg64p(hu); /* ensure that denormal numbers are all in the same bin */ uint64_t binned = value | BINSIZE(hp); int clz = __builtin_clzll((unsigned long long)(binned)); /* actually 1 less than the exponent except for denormals */ unsigned exponent = 63 - hp->sigbits - clz; /* mantissa has leading bit set except for denormals */ unsigned mantissa = value >> exponent; /* leading bit of mantissa adds one to exponent */ return ((exponent << hp->sigbits) + mantissa); } static counter* key_to_new_counter(hg64* hg, unsigned key) { /* slow path */ unsigned binsize = BINSIZE(hg); unsigned b = key / binsize; unsigned c = key % binsize; counter* old_bp = NULL; counter* new_bp = malloc(sizeof(counter) * binsize); /* see comment in hg64_create() above */ for (unsigned i = 0; i < binsize; i++) { atomic_init(new_bp + i, 0); } bin_ptr* bpp = &hg->bin[b]; if (atomic_compare_exchange_strong_explicit(bpp, &old_bp, new_bp, memory_order_acq_rel, memory_order_acquire)) { return (new_bp + c); } else { /* lost the race, so use the winner's counters */ free(new_bp); return (old_bp + c); } } static inline counter* key_to_counter(hg64* hg, unsigned key) { /* fast path */ unsigned binsize = BINSIZE(hg); unsigned b = key / binsize; unsigned c = key % binsize; counter* bp = get_bin(hg, b); return (bp == NULL ? NULL : bp + c); } static inline uint64_t get_key_count(hg64* hg, unsigned key) { counter* ctr = key_to_counter(hg, key); return (ctr == NULL ? 0 : atomic_load_explicit(ctr, memory_order_relaxed)); } static inline void add_key_count(hg64* hg, unsigned key, uint64_t inc) { if (inc == 0) return; counter* ctr = key_to_counter(hg, key); ctr = ctr ? ctr : key_to_new_counter(hg, key); atomic_fetch_add_explicit(ctr, inc, memory_order_relaxed); } /**********************************************************************/ void hg64_inc(hg64* hg, uint64_t value) { add_key_count(hg, value_to_key(hg, value), 1); } bool hg64_get(hg64* hg, unsigned key, uint64_t* pmin, uint64_t* pmax, uint64_t* pcount) { if (key < KEYS(hg)) { OUTARG(pmin, key_to_minval(hg, key)); OUTARG(pmax, key_to_maxval(hg, key)); OUTARG(pcount, get_key_count(hg, key)); return (true); } else { return (false); } } unsigned hg64_next(hg64* hg, unsigned key) { key++; while (key < KEYS(hg) && (key & (BINSIZE(hg) - 1)) == 0 && key_to_counter(hg, key) == NULL) { key += BINSIZE(hg); } return (key); } /* * https://fanf2.user.srcf.net/hermes/doc/antiforgery/stats.pdf */ void hg64_mean_variance(hg64* hg, double* pmean, double* pvar) { double pop = 0.0; double mean = 0.0; double sigma = 0.0; uint64_t min, max, count; for (unsigned key = 0; hg64_get(hg, key, &min, &max, &count); key = hg64_next(hg, key)) { double delta = (double)min / 2.0 + (double)max / 2.0 - mean; if (count != 0) { /* avoid division by zero */ pop += count; mean += count * delta / pop; sigma += count * delta * (min + max - mean); } } OUTARG(pmean, mean); OUTARG(pvar, sigma / pop); } /**********************************************************************/ void hg64_merge(hg64* target, hg64* source) { uint64_t count; for (unsigned skey = 0; hg64_get(source, skey, NULL, NULL, &count); skey = hg64_next(source, skey)) { uint64_t svmin = key_to_minval(source, skey); uint64_t svmax = key_to_maxval(source, skey); unsigned tkmin = value_to_key(target, svmin); unsigned tkmax = value_to_key(target, svmax); unsigned keys = tkmax - tkmin + 1; /* is there a more cunning way to spread out the remainder? */ uint64_t div = count / keys; uint64_t rem = count % keys; for (unsigned tkey = tkmin; tkey <= tkmax; tkey++) { uint64_t inc = div + (uint64_t)(tkey < rem); add_key_count(target, tkey, inc); } } } void hg64_diff(hg64* a, hg64* b, hg64* diff) { assert((a->sigbits == b->sigbits) && (b->sigbits == diff->sigbits)); uint64_t count_a = 0; uint64_t count_b = 0; for (unsigned key = 0; hg64_get(a, key, NULL, NULL, &count_a); key++) { hg64_get(b, key, NULL, NULL, &count_b); add_key_count(diff, key, count_a - count_b); } } unsigned hg64_min_key(hg64* hg) { uint64_t pcount; for (unsigned key = 0; hg64_get(hg, key, NULL, NULL, &pcount); key = hg64_next(hg, key)) { if (pcount > 0) return key; } return 0; } unsigned hg64_max_key(hg64* hg) { unsigned last_key = 0; uint64_t pcount; for (unsigned key = 0; hg64_get(hg, key, NULL, NULL, &pcount); key = hg64_next(hg, key)) { if (pcount > 0) last_key = key; } return last_key; }