diff options
Diffstat (limited to 'src/ext')
-rw-r--r-- | src/ext/hg64.c | 327 | ||||
-rw-r--r-- | src/ext/hg64.h | 88 | ||||
-rw-r--r-- | src/ext/parse_uri.c | 112 | ||||
-rw-r--r-- | src/ext/parse_uri.h | 41 |
4 files changed, 568 insertions, 0 deletions
diff --git a/src/ext/hg64.c b/src/ext/hg64.c new file mode 100644 index 0000000..9cd4b4f --- /dev/null +++ b/src/ext/hg64.c @@ -0,0 +1,327 @@ +/* + * hg64 - 64-bit histograms + * + * Written by Tony Finch <dot@dotat.at> <fanf@isc.org> + * + * Copyright (C) Internet Systems Consortium, Inc. ("ISC") + * + * SPDX-License-Identifier: MPL-2.0 + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, you can obtain one at https://mozilla.org/MPL/2.0/. + */ + +#include <assert.h> +#include <errno.h> +#include <stdatomic.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "hg64.h" + +/* number of bins is same as number of bits in a value */ +#define BINS 64 + +typedef atomic_uint_fast64_t counter; +typedef _Atomic(counter*) bin_ptr; + +struct hg64 { + unsigned sigbits; + bin_ptr bin[BINS]; +}; + +static inline counter* +get_bin(hg64* hg, unsigned b) +{ + /* key_to_new_counter() below has the matching store / release */ + return (atomic_load_explicit(&hg->bin[b], memory_order_acquire)); +} + +/* + * when we only care about the histogram precision + */ +struct hg64p { + unsigned sigbits; +}; + +#ifdef __has_attribute +#if __has_attribute(__transparent_union__) +#define TRANSPARENT __attribute__((__transparent_union__)) +#endif +#endif + +#ifdef TRANSPARENT + +typedef union hg64u { + hg64* hg; + const struct hg64p* hp; +} hg64u TRANSPARENT; + +#define hg64p(hu) ((hu).hp) +#else + +typedef void* hg64u; + +#define hg64p(hu) ((const struct hg64p*)(hu)) +#endif + +/* + * The bins arrays have a static size for simplicity, but that means We + * waste a little extra space that could be saved by omitting the + * exponents that land in the denormal number bin. The following macros + * calculate (at run time) the exact number of keys when we need to do + * accurate bounds checks. + */ +#define DENORMALS(hp) ((hp)->sigbits - 1) +#define EXPONENTS(hp) (BINS - DENORMALS(hp)) +#define MANTISSAS(hp) (1 << (hp)->sigbits) +#define KEYS(hp) (EXPONENTS(hp) * MANTISSAS(hp)) + +#define BINSIZE(hp) MANTISSAS(hp) + +/**********************************************************************/ + +#define OUTARG(ptr, val) (void)(((ptr) != NULL) && (bool)(*(ptr) = (val))) + +/**********************************************************************/ + +hg64* hg64_create(unsigned sigbits) +{ + if (sigbits < 1 || 15 < sigbits) { + return (NULL); + } + hg64* hg = malloc(sizeof(*hg)); + hg->sigbits = sigbits; + /* + * it is probably portable to zero-initialize atomics but the + * C standard says we shouldn't rely on it; but this loop + * should optimize to memset() on most target systems + */ + for (unsigned b = 0; b < BINS; b++) { + atomic_init(&hg->bin[b], NULL); + } + return (hg); +} + +void hg64_destroy(hg64* hg) +{ + for (unsigned b = 0; b < BINS; b++) { + free(get_bin(hg, b)); + } + *hg = (hg64) { 0 }; + free(hg); +} + +/**********************************************************************/ + +static inline uint64_t +key_to_minval(hg64u hu, unsigned key) +{ + unsigned binsize = BINSIZE(hg64p(hu)); + unsigned exponent = (key / binsize) - 1; + uint64_t mantissa = (key % binsize) + binsize; + return (key < binsize ? key : mantissa << exponent); +} + +/* + * don't shift by 64, and don't underflow exponent; instead, + * reduce shift by 1 for each hazard and pre-shift UINT64_MAX + */ +static inline uint64_t +key_to_maxval(hg64u hu, unsigned key) +{ + unsigned binsize = BINSIZE(hg64p(hu)); + unsigned shift = 63 - (key / binsize); + uint64_t range = UINT64_MAX / 4 >> shift; + return (key_to_minval(hu, key) + range); +} + +/* + * This branchless conversion is due to Paul Khuong: see bin_down_of() in + * https://pvk.ca/Blog/2015/06/27/linear-log-bucketing-fast-versatile-simple/ + */ +static inline unsigned +value_to_key(hg64u hu, uint64_t value) +{ + /* fast path */ + const struct hg64p* hp = hg64p(hu); + /* ensure that denormal numbers are all in the same bin */ + uint64_t binned = value | BINSIZE(hp); + int clz = __builtin_clzll((unsigned long long)(binned)); + /* actually 1 less than the exponent except for denormals */ + unsigned exponent = 63 - hp->sigbits - clz; + /* mantissa has leading bit set except for denormals */ + unsigned mantissa = value >> exponent; + /* leading bit of mantissa adds one to exponent */ + return ((exponent << hp->sigbits) + mantissa); +} + +static counter* +key_to_new_counter(hg64* hg, unsigned key) +{ + /* slow path */ + unsigned binsize = BINSIZE(hg); + unsigned b = key / binsize; + unsigned c = key % binsize; + counter* old_bp = NULL; + counter* new_bp = malloc(sizeof(counter) * binsize); + /* see comment in hg64_create() above */ + for (unsigned i = 0; i < binsize; i++) { + atomic_init(new_bp + i, 0); + } + bin_ptr* bpp = &hg->bin[b]; + if (atomic_compare_exchange_strong_explicit(bpp, &old_bp, new_bp, + memory_order_acq_rel, memory_order_acquire)) { + return (new_bp + c); + } else { + /* lost the race, so use the winner's counters */ + free(new_bp); + return (old_bp + c); + } +} + +static inline counter* +key_to_counter(hg64* hg, unsigned key) +{ + /* fast path */ + unsigned binsize = BINSIZE(hg); + unsigned b = key / binsize; + unsigned c = key % binsize; + counter* bp = get_bin(hg, b); + return (bp == NULL ? NULL : bp + c); +} + +static inline uint64_t +get_key_count(hg64* hg, unsigned key) +{ + counter* ctr = key_to_counter(hg, key); + return (ctr == NULL ? 0 : atomic_load_explicit(ctr, memory_order_relaxed)); +} + +static inline void +add_key_count(hg64* hg, unsigned key, uint64_t inc) +{ + if (inc == 0) + return; + counter* ctr = key_to_counter(hg, key); + ctr = ctr ? ctr : key_to_new_counter(hg, key); + atomic_fetch_add_explicit(ctr, inc, memory_order_relaxed); +} + +/**********************************************************************/ + +void hg64_inc(hg64* hg, uint64_t value) +{ + add_key_count(hg, value_to_key(hg, value), 1); +} + +bool hg64_get(hg64* hg, unsigned key, + uint64_t* pmin, uint64_t* pmax, uint64_t* pcount) +{ + if (key < KEYS(hg)) { + OUTARG(pmin, key_to_minval(hg, key)); + OUTARG(pmax, key_to_maxval(hg, key)); + OUTARG(pcount, get_key_count(hg, key)); + return (true); + } else { + return (false); + } +} + +unsigned +hg64_next(hg64* hg, unsigned key) +{ + key++; + while (key < KEYS(hg) && (key & (BINSIZE(hg) - 1)) == 0 && key_to_counter(hg, key) == NULL) { + key += BINSIZE(hg); + } + return (key); +} + +/* + * https://fanf2.user.srcf.net/hermes/doc/antiforgery/stats.pdf + */ +void hg64_mean_variance(hg64* hg, double* pmean, double* pvar) +{ + double pop = 0.0; + double mean = 0.0; + double sigma = 0.0; + uint64_t min, max, count; + for (unsigned key = 0; + hg64_get(hg, key, &min, &max, &count); + key = hg64_next(hg, key)) { + double delta = (double)min / 2.0 + (double)max / 2.0 - mean; + if (count != 0) { /* avoid division by zero */ + pop += count; + mean += count * delta / pop; + sigma += count * delta * (min + max - mean); + } + } + OUTARG(pmean, mean); + OUTARG(pvar, sigma / pop); +} + +/**********************************************************************/ + +void hg64_merge(hg64* target, hg64* source) +{ + uint64_t count; + for (unsigned skey = 0; + hg64_get(source, skey, NULL, NULL, &count); + skey = hg64_next(source, skey)) { + uint64_t svmin = key_to_minval(source, skey); + uint64_t svmax = key_to_maxval(source, skey); + unsigned tkmin = value_to_key(target, svmin); + unsigned tkmax = value_to_key(target, svmax); + unsigned keys = tkmax - tkmin + 1; + /* is there a more cunning way to spread out the remainder? */ + uint64_t div = count / keys; + uint64_t rem = count % keys; + for (unsigned tkey = tkmin; tkey <= tkmax; tkey++) { + uint64_t inc = div + (uint64_t)(tkey < rem); + add_key_count(target, tkey, inc); + } + } +} + +void hg64_diff(hg64* a, hg64* b, hg64* diff) +{ + assert((a->sigbits == b->sigbits) && (b->sigbits == diff->sigbits)); + uint64_t count_a = 0; + uint64_t count_b = 0; + for (unsigned key = 0; + hg64_get(a, key, NULL, NULL, &count_a); + key++) { + hg64_get(b, key, NULL, NULL, &count_b); + add_key_count(diff, key, count_a - count_b); + } +} + +unsigned hg64_min_key(hg64* hg) +{ + uint64_t pcount; + for (unsigned key = 0; + hg64_get(hg, key, NULL, NULL, &pcount); + key = hg64_next(hg, key)) { + if (pcount > 0) + return key; + } + return 0; +} + +unsigned hg64_max_key(hg64* hg) +{ + unsigned last_key = 0; + uint64_t pcount; + for (unsigned key = 0; + hg64_get(hg, key, NULL, NULL, &pcount); + key = hg64_next(hg, key)) { + if (pcount > 0) + last_key = key; + } + return last_key; +} diff --git a/src/ext/hg64.h b/src/ext/hg64.h new file mode 100644 index 0000000..21ec3d3 --- /dev/null +++ b/src/ext/hg64.h @@ -0,0 +1,88 @@ +/* + * hg64 - 64-bit histograms + * + * Written by Tony Finch <dot@dotat.at> <fanf@isc.org> + * + * Copyright (C) Internet Systems Consortium, Inc. ("ISC") + * + * SPDX-License-Identifier: MPL-2.0 + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, you can obtain one at https://mozilla.org/MPL/2.0/. + */ + +#ifndef HG64_H +#define HG64_H 1 + +typedef struct hg64 hg64; + +/* + * Allocate a new histogram. `sigbits` must be between 1 and 15 + * inclusive; it is the number of significant bits of each value + * to use when mapping values to buckets. + */ +hg64* hg64_create(unsigned sigbits); + +/* + * Free the memory used by a histogram + */ +void hg64_destroy(hg64* hg); + +/* + * Add 1 to the value's bucket + */ +void hg64_inc(hg64* hg, uint64_t value); + +/* + * Get information about a bucket. This can be used as an iterator, + * by initializing `key` to zero and incrementing by one or using + * `hg64_next()` until `hg64_get()` returns `false`. The number of + * iterations is a little less than `1 << (6 + sigbits)`. + * + * If `pmin` is non-NULL it is set to the bucket's minimum inclusive value. + * + * If `pmax` is non-NULL it is set to the bucket's maximum inclusive value. + * + * If `pcount` is non-NULL it is set to the bucket's counter, which + * can be zero. (Empty buckets are included in the iterator.) + */ +bool hg64_get(hg64* hg, unsigned key, + uint64_t* pmin, uint64_t* pmax, uint64_t* pcount); + +/* + * Skip to the next key, omitting groups of nonexistent buckets. + */ +unsigned hg64_next(hg64* hg, unsigned key); + +/* + * Get summary statistics about the histogram. + * + * If `pmean` is non-NULL it is set to the mean of the recorded data. + * + * If `pvar` is non-NULL it is set to the variance of the recorded + * data. The standard deviation is the square root of the variance. + */ +void hg64_mean_variance(hg64* hg, double* pmean, double* pvar); + +/* + * Increase the counts in `target` by the counts recorded in `source` + */ +void hg64_merge(hg64* target, hg64* source); + +/* + * diff = a - b + */ +void hg64_diff(hg64* a, hg64* b, hg64* diff); + +/* + * Get highest key with non-zero value. Returns 0 if all values are 0. + */ +unsigned hg64_max_key(hg64* hg); + +/* + * Get lowest key with non-zero value. Returns 0 if all values are 0. + */ +unsigned hg64_min_key(hg64* hg); + +#endif diff --git a/src/ext/parse_uri.c b/src/ext/parse_uri.c new file mode 100644 index 0000000..fa74c85 --- /dev/null +++ b/src/ext/parse_uri.c @@ -0,0 +1,112 @@ +#include "parse_uri.h" + +#include <string.h> + +// From: https://github.com/nghttp2/nghttp2/blob/master/examples/client.c +/* + * nghttp2 - HTTP/2 C Library + * + * Copyright (c) 2013 Tatsuhiro Tsujikawa + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +int parse_uri(struct URI* res, const char* uri) +{ + /* We only interested in https */ + size_t len, i, offset; + int ipv6addr = 0; + memset(res, 0, sizeof(struct URI)); + len = strlen(uri); + if (len < 9 || memcmp("https://", uri, 8) != 0) { + return -1; + } + offset = 8; + res->host = res->hostport = &uri[offset]; + res->hostlen = 0; + if (uri[offset] == '[') { + /* IPv6 literal address */ + ++offset; + ++res->host; + ipv6addr = 1; + for (i = offset; i < len; ++i) { + if (uri[i] == ']') { + res->hostlen = i - offset; + offset = i + 1; + break; + } + } + } else { + const char delims[] = ":/?#"; + for (i = offset; i < len; ++i) { + if (strchr(delims, uri[i]) != NULL) { + break; + } + } + res->hostlen = i - offset; + offset = i; + } + if (res->hostlen == 0) { + return -1; + } + /* Assuming https */ + res->port = 443; + if (offset < len) { + if (uri[offset] == ':') { + /* port */ + const char delims[] = "/?#"; + int port = 0; + ++offset; + for (i = offset; i < len; ++i) { + if (strchr(delims, uri[i]) != NULL) { + break; + } + if ('0' <= uri[i] && uri[i] <= '9') { + port *= 10; + port += uri[i] - '0'; + if (port > 65535) { + return -1; + } + } else { + return -1; + } + } + if (port == 0) { + return -1; + } + offset = i; + res->port = (uint16_t)port; + } + } + res->hostportlen = (size_t)(uri + offset + ipv6addr - res->host); + for (i = offset; i < len; ++i) { + if (uri[i] == '#') { + break; + } + } + if (i - offset == 0) { + res->path = "/"; + res->pathlen = 1; + } else { + res->path = &uri[offset]; + res->pathlen = i - offset; + } + return 0; +} diff --git a/src/ext/parse_uri.h b/src/ext/parse_uri.h new file mode 100644 index 0000000..bb69e79 --- /dev/null +++ b/src/ext/parse_uri.h @@ -0,0 +1,41 @@ +// From: https://github.com/nghttp2/nghttp2/blob/master/examples/client.c +/* + * nghttp2 - HTTP/2 C Library + * + * Copyright (c) 2013 Tatsuhiro Tsujikawa + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <inttypes.h> +#include <stdlib.h> + +struct URI { + const char* host; + /* In this program, path contains query component as well. */ + const char* path; + size_t pathlen; + const char* hostport; + size_t hostlen; + size_t hostportlen; + uint16_t port; +}; + +int parse_uri(struct URI* res, const char* uri); |