summaryrefslogtreecommitdiffstats
path: root/web/server/h2o/libh2o/deps/libgkc/gkc.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--web/server/h2o/libh2o/deps/libgkc/gkc.c439
1 files changed, 439 insertions, 0 deletions
diff --git a/web/server/h2o/libh2o/deps/libgkc/gkc.c b/web/server/h2o/libh2o/deps/libgkc/gkc.c
new file mode 100644
index 00000000..2d4b6173
--- /dev/null
+++ b/web/server/h2o/libh2o/deps/libgkc/gkc.c
@@ -0,0 +1,439 @@
+/*
+ * Copyright (c) 2016 Fastly, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/*
+ * A streaming quantile library.
+ *
+ *
+ * A `gkc_summary` structure is used to summarize observations
+ * within a given error range. Observations are inserted using
+ * `gkc_insert_value`, quantile queries can then be performed with
+ * `gkc_query` against the summary.
+ * Provided two summaries are using the same epsilon, they can be merged
+ * by calling `gkc_combine`.
+ *
+ * The algorithm guaranties a bounded memory usage to:
+ * (11/(2 x epsilon))*log(2 * epsilon * N)
+ *
+ * For epsilon = 0.01 and N = 2^64, this is only 10k max in the
+ * theoritical worse case. In practice, it's reliably using less:
+ * inserting random data gets us * ~100 max insertions for > 50 millions
+ * of entries.
+ *
+ * See www.cis.upenn.edu/~sanjeev/papers/sigmod01_quantiles.pdf for
+ * the paper describing this algorithm and data structure.
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include <inttypes.h>
+
+struct list {
+ struct list *prev, *next;
+};
+
+struct gkc_summary {
+ size_t nr_elems;
+ double epsilon;
+ uint64_t alloced;
+ uint64_t max_alloced;
+ struct list head;
+ struct freelist *fl;
+};
+
+
+static inline int list_empty(struct list *l)
+{
+ return l->next == l;
+}
+static inline void list_init(struct list *n)
+{
+ n->next = n;
+ n->prev = n;
+}
+
+static inline void list_del(struct list *n)
+{
+ n->next->prev = n->prev;
+ n->prev->next = n->next;
+}
+
+static inline void list_add(struct list *l, struct list *n)
+{
+ n->next = l->next;
+ n->next->prev = n;
+ l->next = n;
+ n->prev = l;
+}
+
+static inline void list_add_tail(struct list *l, struct list *n)
+{
+ list_add(l->prev, n);
+}
+
+#undef offsetof
+#ifdef __compiler_offsetof
+#define offsetof(TYPE,MEMBER) __compiler_offsetof(TYPE,MEMBER)
+#else
+#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
+#endif
+
+#define container_of(ptr, type, member) ((type *)((char *)(ptr) - offsetof(type, member)))
+
+struct freelist {
+ struct freelist *next;
+};
+
+static uint64_t ullog2(uint64_t x)
+{
+ static const uint64_t debruijn_magic = 0x022fdd63cc95386dULL;
+
+ static const uint64_t magic_table[] = {
+ 0, 1, 2, 53, 3, 7, 54, 27, 4, 38, 41, 8, 34, 55, 48, 28,
+ 62, 5, 39, 46, 44, 42, 22, 9, 24, 35, 59, 56, 49, 18, 29, 11,
+ 63, 52, 6, 26, 37, 40, 33, 47, 61, 45, 43, 21, 23, 58, 17, 10,
+ 51, 25, 36, 32, 60, 20, 57, 16, 50, 31, 19, 15, 30, 14, 13, 12,
+ };
+
+ x |= (x >> 1);
+ x |= (x >> 2);
+ x |= (x >> 4);
+ x |= (x >> 8);
+ x |= (x >> 16);
+ x |= (x >> 32);
+ return (magic_table[((x & ~(x>>1))*debruijn_magic)>>58]);
+}
+
+struct gkc_tuple {
+ uint64_t value;
+ double g;
+ uint64_t delta;
+ struct list node;
+};
+#define list_to_tuple(ln) (container_of((ln), struct gkc_tuple, node))
+
+
+void gkc_summary_init(struct gkc_summary *s, double epsilon)
+{
+ list_init(&s->head);
+ s->epsilon = epsilon;
+}
+
+struct gkc_summary *gkc_summary_alloc(double epsilon)
+{
+ struct gkc_summary *s;
+ s = calloc(1, sizeof(*s));
+ gkc_summary_init(s, epsilon);
+ return s;
+}
+
+#include <assert.h>
+/* debug only, checks a number of properties that s must satisfy at all times */
+void gkc_sanity_check(struct gkc_summary *s)
+{
+ uint64_t nr_elems, nr_alloced;
+ struct list *cur;
+ struct gkc_tuple *tcur;
+
+ nr_elems = 0;
+ nr_alloced = 0;
+ cur = s->head.next;
+ while (cur != &s->head) {
+ tcur = list_to_tuple(cur);
+ cur = cur->next;
+ nr_elems += tcur->g;
+ nr_alloced++;
+ if (s->nr_elems > (1/s->epsilon)) {
+ /* there must be enough observations for this to become true */
+ assert(tcur->g + tcur->delta <= (s->nr_elems * s->epsilon * 2));
+ }
+ assert(nr_alloced <= s->alloced);
+ }
+ assert(nr_elems == s->nr_elems);
+ assert(nr_alloced == s->alloced);
+}
+
+static struct gkc_tuple *gkc_alloc(struct gkc_summary *s)
+{
+ s->alloced++;
+ if (s->alloced > s->max_alloced) {
+ s->max_alloced = s->alloced;
+ }
+
+ if (s->fl) {
+ void *ret;
+ ret = s->fl;
+ s->fl = s->fl->next;
+ return ret;
+ }
+ return malloc(sizeof(struct gkc_tuple));
+}
+
+static void gkc_free(struct gkc_summary *s, struct gkc_tuple *p)
+{
+ struct freelist *flp = (void *)p;
+ s->alloced--;
+
+ flp->next = s->fl;
+ s->fl = flp;
+}
+
+void gkc_summary_free(struct gkc_summary *s)
+{
+ struct freelist *fl;
+ struct list *cur;
+
+ cur = s->head.next;
+ while (cur != &s->head) {
+ struct list *next;
+ next = cur->next;
+ gkc_free(s, list_to_tuple(cur));
+ cur = next;
+ }
+ fl = s->fl;
+ while (fl) {
+ void *p;
+ p = fl;
+ fl = fl->next;
+ free(p);
+ }
+ free(s);
+}
+
+uint64_t gkc_query(struct gkc_summary *s, double q)
+{
+ struct list *cur, *next;
+ int rank;
+ double gi;
+ double ne;
+
+ rank = 0.5 + q * s->nr_elems;
+ ne = s->nr_elems * s->epsilon;
+ gi = 0;
+ if (list_empty(&s->head)) {
+ return 0;
+ }
+
+ cur = s->head.next;
+
+ while (1) {
+ struct gkc_tuple *tcur, *tnext;
+
+ tcur = list_to_tuple(cur);
+ next = cur->next;
+ if (next == &s->head) {
+ return tcur->value;
+ }
+ tnext = list_to_tuple(next);
+
+ gi += tcur->g;
+ if ((rank + ne) < (gi + tnext->g + tnext->delta)) {
+ if ((rank + ne) < (gi + tnext->g)) {
+ return tcur->value;
+ }
+ return tnext->value;
+ }
+ cur = next;
+ }
+}
+
+static uint64_t band(struct gkc_summary *s, uint64_t delta)
+{
+ uint64_t diff;
+
+ diff = 1 + (s->epsilon * s->nr_elems * 2) - delta;
+
+ if (diff == 1) {
+ return 0;
+ } else {
+ return ullog2(diff)/ullog2(2);
+ }
+}
+
+static void gkc_compress(struct gkc_summary *s)
+{
+ int max_compress;
+ struct list *cur, *prev;
+ struct gkc_tuple *tcur, *tprev;
+ uint64_t bi, b_plus_1;
+
+ max_compress = 2 * s->epsilon * s->nr_elems;
+ if (s->nr_elems < 2) {
+ return;
+ }
+
+ prev = s->head.prev;
+ cur = prev->prev;
+
+ while (cur != &s->head) {
+ tcur = list_to_tuple(cur);
+ tprev = list_to_tuple(prev);
+
+ b_plus_1 = band(s, tprev->delta);
+ bi = band(s, tcur->delta);
+
+ if (bi <= b_plus_1 && ((tcur->g + tprev->g + tprev->delta) <= max_compress)) {
+ tprev->g += tcur->g;
+ list_del(cur);
+ gkc_free(s, tcur);
+ cur = prev->prev;
+ continue;
+ }
+ prev = cur;
+ cur = cur->prev;
+ }
+}
+
+void gkc_insert_value(struct gkc_summary *s, double value)
+{
+ struct list *cur = NULL;
+ struct gkc_tuple *new, *tcur, *tnext = NULL;
+
+ new = gkc_alloc(s);
+ memset(new, 0, sizeof(*new));
+ new->value = value;
+ new->g = 1;
+ list_init(&new->node);
+
+
+ s->nr_elems++;
+
+
+ /* first insert */
+ if (list_empty(&s->head)) {
+ list_add(&s->head, &new->node);
+ return;
+ }
+
+ cur = s->head.next;
+ tcur = list_to_tuple(cur);
+ /* v < v0, new min */
+ if (tcur->value > new->value) {
+ list_add(&s->head, &new->node);
+ goto out;
+ }
+
+ double gi = 0;
+ while (cur->next != &s->head) {
+ tnext = list_to_tuple(cur->next);
+ tcur = list_to_tuple(cur);
+
+ gi += tcur->g;
+ if (tcur->value <= new->value && new->value < tnext->value) {
+ /* INSERT "(v, 1, Δ)" into S between vi and vi+1; */
+ new->delta = tcur->g + tcur->delta - 1;
+ list_add(cur, &new->node);
+ goto out;
+ }
+ cur = cur->next;
+ }
+ /* v > vs-1, new max */
+ list_add_tail(&s->head, &new->node);
+out:
+ if (s->nr_elems % (int)(1/(2*s->epsilon))) {
+ gkc_compress(s);
+ }
+}
+
+void gkc_print_summary(struct gkc_summary *s)
+{
+ struct gkc_tuple *tcur;
+ struct list *cur;
+
+ fprintf(stderr, "nr_elems: %zu, epsilon: %.02f, alloced: %" PRIu64 ", overfilled: %.02f, max_alloced: %" PRIu64 "\n",
+ s->nr_elems, s->epsilon, s->alloced, 2 * s->epsilon * s->nr_elems, s->max_alloced);
+ if (list_empty(&s->head)) {
+ fprintf(stderr, "Empty summary\n");
+ return;
+ }
+
+ cur = s->head.next;
+ while (cur != &s->head) {
+ tcur = list_to_tuple(cur);
+ fprintf(stderr, "(v: %" PRIu64 ", g: %.02f, d: %" PRIu64 ") ", tcur->value, tcur->g, tcur->delta);
+ cur = cur->next;
+ }
+ fprintf(stderr, "\n");
+}
+
+struct gkc_summary *gkc_combine(struct gkc_summary *s1, struct gkc_summary *s2)
+{
+ struct gkc_summary *snew;
+ struct list *cur1, *cur2;
+ struct gkc_tuple *tcur1, *tcur2, *tnew;
+
+ if (s1->epsilon != s2->epsilon) {
+ return NULL;
+ }
+ snew = gkc_summary_alloc(s1->epsilon);
+
+ cur1 = s1->head.next;
+ cur2 = s2->head.next;
+ while (cur1 != &s1->head && cur2 != &s2->head) {
+ tcur1 = list_to_tuple(cur1);
+ tcur2 = list_to_tuple(cur2);
+
+ tnew = gkc_alloc(snew);
+ if (tcur1->value < tcur2->value) {
+ tnew->value = tcur1->value;
+ tnew->g = tcur1->g;
+ tnew->delta = tcur1->delta;
+ cur1 = cur1->next;
+ } else {
+ tnew->value = tcur2->value;
+ tnew->g = tcur2->g;
+ tnew->delta = tcur2->delta;
+ cur2 = cur2->next;
+ }
+ list_add_tail(&snew->head, &tnew->node);
+ snew->nr_elems += tnew->g;
+ }
+ while (cur1 != &s1->head) {
+ tcur1 = list_to_tuple(cur1);
+
+ tnew = gkc_alloc(snew);
+ tnew->value = tcur1->value;
+ tnew->g = tcur1->g;
+ tnew->delta = tcur1->delta;
+ list_add_tail(&snew->head, &tnew->node);
+ snew->nr_elems += tnew->g;
+ cur1 = cur1->next;
+ }
+ while (cur2 != &s2->head) {
+ tcur2 = list_to_tuple(cur2);
+
+ tnew = gkc_alloc(snew);
+ tnew->value = tcur2->value;
+ tnew->g = tcur2->g;
+ tnew->delta = tcur2->delta;
+ list_add_tail(&snew->head, &tnew->node);
+ snew->nr_elems += tnew->g;
+ cur2 = cur2->next;
+ }
+ snew->max_alloced = snew->alloced;
+ gkc_compress(snew);
+
+ return snew;
+}