diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-03-09 13:19:48 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-03-09 13:20:02 +0000 |
commit | 58daab21cd043e1dc37024a7f99b396788372918 (patch) | |
tree | 96771e43bb69f7c1c2b0b4f7374cb74d7866d0cb /web/server/h2o/libh2o/deps/libgkc | |
parent | Releasing debian version 1.43.2-1. (diff) | |
download | netdata-58daab21cd043e1dc37024a7f99b396788372918.tar.xz netdata-58daab21cd043e1dc37024a7f99b396788372918.zip |
Merging upstream version 1.44.3.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'web/server/h2o/libh2o/deps/libgkc')
-rw-r--r-- | web/server/h2o/libh2o/deps/libgkc/gkc.c | 439 | ||||
-rw-r--r-- | web/server/h2o/libh2o/deps/libgkc/gkc.h | 37 | ||||
-rw-r--r-- | web/server/h2o/libh2o/deps/libgkc/test.c | 141 |
3 files changed, 617 insertions, 0 deletions
diff --git a/web/server/h2o/libh2o/deps/libgkc/gkc.c b/web/server/h2o/libh2o/deps/libgkc/gkc.c new file mode 100644 index 000000000..2d4b61737 --- /dev/null +++ b/web/server/h2o/libh2o/deps/libgkc/gkc.c @@ -0,0 +1,439 @@ +/* + * Copyright (c) 2016 Fastly, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/* + * A streaming quantile library. + * + * + * A `gkc_summary` structure is used to summarize observations + * within a given error range. Observations are inserted using + * `gkc_insert_value`, quantile queries can then be performed with + * `gkc_query` against the summary. + * Provided two summaries are using the same epsilon, they can be merged + * by calling `gkc_combine`. + * + * The algorithm guaranties a bounded memory usage to: + * (11/(2 x epsilon))*log(2 * epsilon * N) + * + * For epsilon = 0.01 and N = 2^64, this is only 10k max in the + * theoritical worse case. In practice, it's reliably using less: + * inserting random data gets us * ~100 max insertions for > 50 millions + * of entries. + * + * See www.cis.upenn.edu/~sanjeev/papers/sigmod01_quantiles.pdf for + * the paper describing this algorithm and data structure. + * + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <limits.h> +#include <inttypes.h> + +struct list { + struct list *prev, *next; +}; + +struct gkc_summary { + size_t nr_elems; + double epsilon; + uint64_t alloced; + uint64_t max_alloced; + struct list head; + struct freelist *fl; +}; + + +static inline int list_empty(struct list *l) +{ + return l->next == l; +} +static inline void list_init(struct list *n) +{ + n->next = n; + n->prev = n; +} + +static inline void list_del(struct list *n) +{ + n->next->prev = n->prev; + n->prev->next = n->next; +} + +static inline void list_add(struct list *l, struct list *n) +{ + n->next = l->next; + n->next->prev = n; + l->next = n; + n->prev = l; +} + +static inline void list_add_tail(struct list *l, struct list *n) +{ + list_add(l->prev, n); +} + +#undef offsetof +#ifdef __compiler_offsetof +#define offsetof(TYPE,MEMBER) __compiler_offsetof(TYPE,MEMBER) +#else +#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER) +#endif + +#define container_of(ptr, type, member) ((type *)((char *)(ptr) - offsetof(type, member))) + +struct freelist { + struct freelist *next; +}; + +static uint64_t ullog2(uint64_t x) +{ + static const uint64_t debruijn_magic = 0x022fdd63cc95386dULL; + + static const uint64_t magic_table[] = { + 0, 1, 2, 53, 3, 7, 54, 27, 4, 38, 41, 8, 34, 55, 48, 28, + 62, 5, 39, 46, 44, 42, 22, 9, 24, 35, 59, 56, 49, 18, 29, 11, + 63, 52, 6, 26, 37, 40, 33, 47, 61, 45, 43, 21, 23, 58, 17, 10, + 51, 25, 36, 32, 60, 20, 57, 16, 50, 31, 19, 15, 30, 14, 13, 12, + }; + + x |= (x >> 1); + x |= (x >> 2); + x |= (x >> 4); + x |= (x >> 8); + x |= (x >> 16); + x |= (x >> 32); + return (magic_table[((x & ~(x>>1))*debruijn_magic)>>58]); +} + +struct gkc_tuple { + uint64_t value; + double g; + uint64_t delta; + struct list node; +}; +#define list_to_tuple(ln) (container_of((ln), struct gkc_tuple, node)) + + +void gkc_summary_init(struct gkc_summary *s, double epsilon) +{ + list_init(&s->head); + s->epsilon = epsilon; +} + +struct gkc_summary *gkc_summary_alloc(double epsilon) +{ + struct gkc_summary *s; + s = calloc(1, sizeof(*s)); + gkc_summary_init(s, epsilon); + return s; +} + +#include <assert.h> +/* debug only, checks a number of properties that s must satisfy at all times */ +void gkc_sanity_check(struct gkc_summary *s) +{ + uint64_t nr_elems, nr_alloced; + struct list *cur; + struct gkc_tuple *tcur; + + nr_elems = 0; + nr_alloced = 0; + cur = s->head.next; + while (cur != &s->head) { + tcur = list_to_tuple(cur); + cur = cur->next; + nr_elems += tcur->g; + nr_alloced++; + if (s->nr_elems > (1/s->epsilon)) { + /* there must be enough observations for this to become true */ + assert(tcur->g + tcur->delta <= (s->nr_elems * s->epsilon * 2)); + } + assert(nr_alloced <= s->alloced); + } + assert(nr_elems == s->nr_elems); + assert(nr_alloced == s->alloced); +} + +static struct gkc_tuple *gkc_alloc(struct gkc_summary *s) +{ + s->alloced++; + if (s->alloced > s->max_alloced) { + s->max_alloced = s->alloced; + } + + if (s->fl) { + void *ret; + ret = s->fl; + s->fl = s->fl->next; + return ret; + } + return malloc(sizeof(struct gkc_tuple)); +} + +static void gkc_free(struct gkc_summary *s, struct gkc_tuple *p) +{ + struct freelist *flp = (void *)p; + s->alloced--; + + flp->next = s->fl; + s->fl = flp; +} + +void gkc_summary_free(struct gkc_summary *s) +{ + struct freelist *fl; + struct list *cur; + + cur = s->head.next; + while (cur != &s->head) { + struct list *next; + next = cur->next; + gkc_free(s, list_to_tuple(cur)); + cur = next; + } + fl = s->fl; + while (fl) { + void *p; + p = fl; + fl = fl->next; + free(p); + } + free(s); +} + +uint64_t gkc_query(struct gkc_summary *s, double q) +{ + struct list *cur, *next; + int rank; + double gi; + double ne; + + rank = 0.5 + q * s->nr_elems; + ne = s->nr_elems * s->epsilon; + gi = 0; + if (list_empty(&s->head)) { + return 0; + } + + cur = s->head.next; + + while (1) { + struct gkc_tuple *tcur, *tnext; + + tcur = list_to_tuple(cur); + next = cur->next; + if (next == &s->head) { + return tcur->value; + } + tnext = list_to_tuple(next); + + gi += tcur->g; + if ((rank + ne) < (gi + tnext->g + tnext->delta)) { + if ((rank + ne) < (gi + tnext->g)) { + return tcur->value; + } + return tnext->value; + } + cur = next; + } +} + +static uint64_t band(struct gkc_summary *s, uint64_t delta) +{ + uint64_t diff; + + diff = 1 + (s->epsilon * s->nr_elems * 2) - delta; + + if (diff == 1) { + return 0; + } else { + return ullog2(diff)/ullog2(2); + } +} + +static void gkc_compress(struct gkc_summary *s) +{ + int max_compress; + struct list *cur, *prev; + struct gkc_tuple *tcur, *tprev; + uint64_t bi, b_plus_1; + + max_compress = 2 * s->epsilon * s->nr_elems; + if (s->nr_elems < 2) { + return; + } + + prev = s->head.prev; + cur = prev->prev; + + while (cur != &s->head) { + tcur = list_to_tuple(cur); + tprev = list_to_tuple(prev); + + b_plus_1 = band(s, tprev->delta); + bi = band(s, tcur->delta); + + if (bi <= b_plus_1 && ((tcur->g + tprev->g + tprev->delta) <= max_compress)) { + tprev->g += tcur->g; + list_del(cur); + gkc_free(s, tcur); + cur = prev->prev; + continue; + } + prev = cur; + cur = cur->prev; + } +} + +void gkc_insert_value(struct gkc_summary *s, double value) +{ + struct list *cur = NULL; + struct gkc_tuple *new, *tcur, *tnext = NULL; + + new = gkc_alloc(s); + memset(new, 0, sizeof(*new)); + new->value = value; + new->g = 1; + list_init(&new->node); + + + s->nr_elems++; + + + /* first insert */ + if (list_empty(&s->head)) { + list_add(&s->head, &new->node); + return; + } + + cur = s->head.next; + tcur = list_to_tuple(cur); + /* v < v0, new min */ + if (tcur->value > new->value) { + list_add(&s->head, &new->node); + goto out; + } + + double gi = 0; + while (cur->next != &s->head) { + tnext = list_to_tuple(cur->next); + tcur = list_to_tuple(cur); + + gi += tcur->g; + if (tcur->value <= new->value && new->value < tnext->value) { + /* INSERT "(v, 1, Δ)" into S between vi and vi+1; */ + new->delta = tcur->g + tcur->delta - 1; + list_add(cur, &new->node); + goto out; + } + cur = cur->next; + } + /* v > vs-1, new max */ + list_add_tail(&s->head, &new->node); +out: + if (s->nr_elems % (int)(1/(2*s->epsilon))) { + gkc_compress(s); + } +} + +void gkc_print_summary(struct gkc_summary *s) +{ + struct gkc_tuple *tcur; + struct list *cur; + + fprintf(stderr, "nr_elems: %zu, epsilon: %.02f, alloced: %" PRIu64 ", overfilled: %.02f, max_alloced: %" PRIu64 "\n", + s->nr_elems, s->epsilon, s->alloced, 2 * s->epsilon * s->nr_elems, s->max_alloced); + if (list_empty(&s->head)) { + fprintf(stderr, "Empty summary\n"); + return; + } + + cur = s->head.next; + while (cur != &s->head) { + tcur = list_to_tuple(cur); + fprintf(stderr, "(v: %" PRIu64 ", g: %.02f, d: %" PRIu64 ") ", tcur->value, tcur->g, tcur->delta); + cur = cur->next; + } + fprintf(stderr, "\n"); +} + +struct gkc_summary *gkc_combine(struct gkc_summary *s1, struct gkc_summary *s2) +{ + struct gkc_summary *snew; + struct list *cur1, *cur2; + struct gkc_tuple *tcur1, *tcur2, *tnew; + + if (s1->epsilon != s2->epsilon) { + return NULL; + } + snew = gkc_summary_alloc(s1->epsilon); + + cur1 = s1->head.next; + cur2 = s2->head.next; + while (cur1 != &s1->head && cur2 != &s2->head) { + tcur1 = list_to_tuple(cur1); + tcur2 = list_to_tuple(cur2); + + tnew = gkc_alloc(snew); + if (tcur1->value < tcur2->value) { + tnew->value = tcur1->value; + tnew->g = tcur1->g; + tnew->delta = tcur1->delta; + cur1 = cur1->next; + } else { + tnew->value = tcur2->value; + tnew->g = tcur2->g; + tnew->delta = tcur2->delta; + cur2 = cur2->next; + } + list_add_tail(&snew->head, &tnew->node); + snew->nr_elems += tnew->g; + } + while (cur1 != &s1->head) { + tcur1 = list_to_tuple(cur1); + + tnew = gkc_alloc(snew); + tnew->value = tcur1->value; + tnew->g = tcur1->g; + tnew->delta = tcur1->delta; + list_add_tail(&snew->head, &tnew->node); + snew->nr_elems += tnew->g; + cur1 = cur1->next; + } + while (cur2 != &s2->head) { + tcur2 = list_to_tuple(cur2); + + tnew = gkc_alloc(snew); + tnew->value = tcur2->value; + tnew->g = tcur2->g; + tnew->delta = tcur2->delta; + list_add_tail(&snew->head, &tnew->node); + snew->nr_elems += tnew->g; + cur2 = cur2->next; + } + snew->max_alloced = snew->alloced; + gkc_compress(snew); + + return snew; +} diff --git a/web/server/h2o/libh2o/deps/libgkc/gkc.h b/web/server/h2o/libh2o/deps/libgkc/gkc.h new file mode 100644 index 000000000..cbf0878f3 --- /dev/null +++ b/web/server/h2o/libh2o/deps/libgkc/gkc.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2016 Fastly, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef __GKC_H__ +#define __GKC_H__ + +struct gkc_summary; + +void gkc_print_summary(struct gkc_summary *s); +void gkc_insert_value(struct gkc_summary *s, double value); +unsigned long gkc_query(struct gkc_summary *s, double q); +void gkc_summary_free(struct gkc_summary *s); +struct gkc_summary *gkc_summary_alloc(double epsilon); +void gkc_summary_init(struct gkc_summary *s, double epsilon); +void gkc_sanity_check(struct gkc_summary *s); +struct gkc_summary *gkc_combine(struct gkc_summary *s1, struct gkc_summary *s2); + +#endif /* __GKC_H__ */ diff --git a/web/server/h2o/libh2o/deps/libgkc/test.c b/web/server/h2o/libh2o/deps/libgkc/test.c new file mode 100644 index 000000000..d0cc251bd --- /dev/null +++ b/web/server/h2o/libh2o/deps/libgkc/test.c @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2016 Fastly, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <time.h> + +#include "gkc.h" + +void print_query(struct gkc_summary *s, double q) +{ + double v = gkc_query(s, q); + fprintf(stderr, "queried: %.02f, found: %.02f\n", q, v); +} + +int main(void) +{ +#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0])) + unsigned int i; +#if 0 + double input[] = { + 3658, 3673, 3693, 3715, 3723, 3724, 3724, 3690, 3695, 3689, 3695, 3700, + 3690, 3699, 3699, 3701, 3704, 3704, 3714, 3707, 3698, 3701, 3697, 3697, + 3712, 3713, 3714, 3715, 3717, 3712, 3712, 3717, 3728, 3728, 3744, 3751, + 3764, 3751, 3798, 3802, 3800, 3824, 3810, 3824, 3811, 3802, 3811, 3801, + 3791, 3796, 3803, 3817, 3819, 3818, 3815, 3804, 3796, 3784, 3783, 3784, + 3774, 3776, 3776, 3764, 3763, 3806, 3819, 3835, 3825, 3786, 3795, 3795, + 3776, 3760, 3789, 3786, 3771, 3778, 3782, 3776, 3781, 3784, 3801, 3810, + 3815, 3792, 3764, 3770, 3746, 3741, 3746, 3756, 3755, 3775, 3776, 3773, + 3777, 3801, 3804, 3807 + }; +#else + double input[] = { 12, 6, 10, 1 }; +#endif + FILE *out; + struct gkc_summary *summary; + struct gkc_summary *s1, *s2, *snew; + + summary = gkc_summary_alloc(0.01); + print_query(summary, 0.1); + goto test_combine; + summary = gkc_summary_alloc(0.01); + +#if 0 + for (i = 0; i < ARRAY_SIZE(input); i++) { + gkc_insert_value(&summary, input[i]); + } + gkc_print_summary(&summary); + print_query(&summary, 0); + print_query(&summary, .25); + print_query(&summary, .5); + print_query(&summary, .75); + print_query(&summary, 1); + return 0; +#else + (void)input; +#endif + +#define tofile 0 + if (tofile) { + out = fopen("data", "w+"); + } + srandom(time(NULL)); + for (i = 0; i < 10 * 1000 * 1000; i++) { + long r = random() % 10000; + gkc_insert_value(summary, r); + if (tofile) { + fprintf(out, "%ld\n", r); + } + } + if (tofile) { + fclose(out); + } + gkc_print_summary(summary); + print_query(summary, .02); + print_query(summary, .1); + print_query(summary, .25); + print_query(summary, .5); + print_query(summary, .75); + print_query(summary, .82); + print_query(summary, .88); + print_query(summary, .86); + print_query(summary, .99); + + gkc_sanity_check(summary); + + gkc_summary_free(summary); + +test_combine: + s1 = gkc_summary_alloc(0.01); + s2 = gkc_summary_alloc(0.01); + + for (i = 0; i < 1 * 10 * 1000; i++) { + long r = random() % 10000; + gkc_insert_value(s1, r); + } +#if 0 + for (i = 0; i < 1 * 1 * 1000; i++) { + gkc_insert_value(s2, 111); + } +#endif + snew = gkc_combine(s1, s2); + gkc_summary_free(s1); + gkc_summary_free(s2); + + gkc_print_summary(snew); + print_query(snew, .02); + print_query(snew, .1); + print_query(snew, .25); + print_query(snew, .5); + print_query(snew, .75); + print_query(snew, .82); + print_query(snew, .88); + print_query(snew, .86); + print_query(snew, .99); + + gkc_sanity_check(snew); + + gkc_summary_free(snew); + + return 0; +} |