diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-03-09 13:19:48 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-03-09 13:20:02 +0000 |
commit | 58daab21cd043e1dc37024a7f99b396788372918 (patch) | |
tree | 96771e43bb69f7c1c2b0b4f7374cb74d7866d0cb /web/server/h2o/libh2o/deps/golombset | |
parent | Releasing debian version 1.43.2-1. (diff) | |
download | netdata-58daab21cd043e1dc37024a7f99b396788372918.tar.xz netdata-58daab21cd043e1dc37024a7f99b396788372918.zip |
Merging upstream version 1.44.3.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'web/server/h2o/libh2o/deps/golombset')
-rw-r--r-- | web/server/h2o/libh2o/deps/golombset/README.md | 26 | ||||
-rw-r--r-- | web/server/h2o/libh2o/deps/golombset/golombset.h | 162 | ||||
-rw-r--r-- | web/server/h2o/libh2o/deps/golombset/test.c | 58 |
3 files changed, 246 insertions, 0 deletions
diff --git a/web/server/h2o/libh2o/deps/golombset/README.md b/web/server/h2o/libh2o/deps/golombset/README.md new file mode 100644 index 000000000..5bebfb774 --- /dev/null +++ b/web/server/h2o/libh2o/deps/golombset/README.md @@ -0,0 +1,26 @@ +golombset +=== + +Golombset is a pure-C, header-file-only implementation of Golomb coded set, which is an compressed form of [Bloom filter](https://en.wikipedia.org/Bloom_filter). + +It is compresses every zero-range of Bloom filter (e.g. `0000...1`) using [Golomb coding](https://en.wikipedia.org/wiki/Golomb_coding). +Please refer to [Golomb-coded sets: smaller than Bloom filters](http://giovanni.bajo.it/post/47119962313/golomb-coded-sets-smaller-than-bloom-filters) for more information about the algorithm. + +API +--- + +__`int golombset_encode(unsigned fixed_bits, const unsigned *keys, size_t num_keys, void *buf, size_t *bufsize);`__ + +The function encodes an pre-sorted array of keys into given buffer. + +The function returns zero if successful, or -1 if otherwise (e.g. the size of the buffer is not sufficient). +`bufsize` is an input-output parameter. +Upon calling the function the value of the pointer must specify the size of the buffer being supplied. +When the function returns successfully, the value is updated the length of the bytes actually used to store the encoded data. + +__`int golombset_decode(unsigned fixed_bits, const void *buf, size_t bufsize, unsigned *keys, size_t *num_keys);`__ + +The function decodes the compressed data into an array of keys. + +The function returns zero if successful, or -1 if the size of the `keys` buffer is too small (specified by `*num_keys` when the function is being called). +Upon successful return, the number of keys decoded will be stored in `*num_keys`. diff --git a/web/server/h2o/libh2o/deps/golombset/golombset.h b/web/server/h2o/libh2o/deps/golombset/golombset.h new file mode 100644 index 000000000..207f0d61c --- /dev/null +++ b/web/server/h2o/libh2o/deps/golombset/golombset.h @@ -0,0 +1,162 @@ +/* + * Copyright (c) 2015 Kazuho Oku + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#ifndef GOLOMBSET_H +#define GOLOMBSET_H + +#include <inttypes.h> + +struct st_golombset_encode_t { + unsigned char *dst; + unsigned char *dst_max; + unsigned dst_shift; +}; + +struct st_golombset_decode_t { + const unsigned char *src; + const unsigned char *src_max; + unsigned src_shift; +}; + +static int golombset_encode_bit(struct st_golombset_encode_t *ctx, int bit) +{ + if (ctx->dst_shift == 0) { + if (++ctx->dst == ctx->dst_max) + return -1; + *ctx->dst = 0; + ctx->dst_shift = 8; + } + --ctx->dst_shift; + if (bit) + *ctx->dst |= 1 << ctx->dst_shift; + return 0; +} + +static int golombset_encode_bits(struct st_golombset_encode_t *ctx, unsigned bits, uint64_t value) +{ + if (bits != 0) { + do { + --bits; + if (golombset_encode_bit(ctx, (value >> bits) & 1) != 0) + return -1; + } while (bits != 0); + } + return 0; +} + +static int golombset_decode_bit(struct st_golombset_decode_t *ctx) +{ + if (ctx->src_shift == 0) { + if (++ctx->src == ctx->src_max) + return -1; + ctx->src_shift = 8; + } + return (*ctx->src >> --ctx->src_shift) & 1; +} + +static int golombset_decode_bits(struct st_golombset_decode_t *ctx, unsigned bits, uint64_t *value) +{ + int bit; + + *value = 0; + for (; bits != 0; --bits) { + if ((bit = golombset_decode_bit(ctx)) == -1) + return -1; + *value = (*value * 2) + bit; + } + + return 0; +} + +static int golombset_encode_value(struct st_golombset_encode_t *ctx, unsigned fixed_bits, uint64_t value) +{ + /* emit quontient */ + uint64_t unary_bits = value >> fixed_bits; + for (; unary_bits != 0; --unary_bits) + if (golombset_encode_bit(ctx, 0) != 0) + return -1; + if (golombset_encode_bit(ctx, 1) != 0) + return -1; + /* emit remainder */ + return golombset_encode_bits(ctx, fixed_bits, value); +} + +static int golombset_decode_value(struct st_golombset_decode_t *ctx, unsigned fixed_bits, uint64_t *value) +{ + uint64_t q; + int bit; + + /* decode quontient */ + for (q = 0; ; ++q) { + if ((bit = golombset_decode_bit(ctx)) == -1) + return -1; + if (bit) + break; + } + /* decode remainder */ + if (golombset_decode_bits(ctx, fixed_bits, value) == -1) + return -1; + /* merge q and r */ + *value += q << fixed_bits; + + return 0; +} + +static int golombset_encode(unsigned fixed_bits, uint64_t *keys, size_t num_keys, void *buf, size_t *bufsize) +{ + struct st_golombset_encode_t ctx = {(unsigned char *)buf - 1, (unsigned char *)buf + *bufsize}; + size_t i; + uint64_t next_min = 0; + + for (i = 0; i != num_keys; ++i) { + if (golombset_encode_value(&ctx, fixed_bits, keys[i] - next_min) != 0) + return -1; + next_min = keys[i] + 1; + } + + *bufsize = ctx.dst + 1 - (unsigned char *)buf; + + return 0; +} + +static int golombset_decode(unsigned fixed_bits, const void *buf, size_t bufsize, uint64_t *keys, size_t *num_keys) +{ + struct st_golombset_decode_t ctx = {buf, (unsigned char *)buf + bufsize, 8}; + size_t index = 0; + uint64_t next_min = 0; + + while (1) { + uint64_t value; + if (golombset_decode_value(&ctx, fixed_bits, &value) != 0) + break; + if (index == *num_keys) { + /* not enough space */ + return -1; + } + value += next_min; + keys[index++] = value; + next_min = value + 1; + } + *num_keys = index; + return 0; +} + +#endif diff --git a/web/server/h2o/libh2o/deps/golombset/test.c b/web/server/h2o/libh2o/deps/golombset/test.c new file mode 100644 index 000000000..42a8ac8f4 --- /dev/null +++ b/web/server/h2o/libh2o/deps/golombset/test.c @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2015 Kazuho Oku + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#include <inttypes.h> +#include <stdio.h> +#include <string.h> +#include "golombset.h" + +int main(int argc, char **argv) +{ + uint64_t keys[] = {151, 192, 208, 269, 461, 512, 526, 591, 662, 806, 831, 866, 890, + 997, 1005, 1017, 1134, 1207, 1231, 1327, 1378, 1393, 1418, 1525, 1627, 1630}; + const size_t num_keys = sizeof(keys) / sizeof(keys[0]); + unsigned char buf[1024]; + size_t bufsize = sizeof(buf); + + if (golombset_encode(6, keys, num_keys, buf, &bufsize) != 0) { + fprintf(stderr, "golombset_encode failed\n"); + return 111; + } + printf("encoded %zu entries into %zu bytes\n", num_keys, bufsize); + + uint64_t decoded_keys[num_keys]; + size_t num_decoded_keys = num_keys; + if (golombset_decode(6, buf, bufsize, decoded_keys, &num_decoded_keys) != 0) { + fprintf(stderr, "golombset_decode failed\n"); + return 111; + } + + if (num_decoded_keys != num_keys) { + fprintf(stderr, "unexpected number of outputs\n"); + return 111; + } + if (memcmp(keys, decoded_keys, sizeof(keys[0]) * num_keys) != 0) { + fprintf(stderr, "output mismatch\n"); + return 111; + } + + return 0; +} |