summaryrefslogtreecommitdiffstats
path: root/fluent-bit/lib/avro/src/value-hash.c
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 02:57:58 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 02:57:58 +0000
commitbe1c7e50e1e8809ea56f2c9d472eccd8ffd73a97 (patch)
tree9754ff1ca740f6346cf8483ec915d4054bc5da2d /fluent-bit/lib/avro/src/value-hash.c
parentInitial commit. (diff)
downloadnetdata-be1c7e50e1e8809ea56f2c9d472eccd8ffd73a97.tar.xz
netdata-be1c7e50e1e8809ea56f2c9d472eccd8ffd73a97.zip
Adding upstream version 1.44.3.upstream/1.44.3upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'fluent-bit/lib/avro/src/value-hash.c')
-rw-r--r--fluent-bit/lib/avro/src/value-hash.c294
1 files changed, 294 insertions, 0 deletions
diff --git a/fluent-bit/lib/avro/src/value-hash.c b/fluent-bit/lib/avro/src/value-hash.c
new file mode 100644
index 00000000..c717924b
--- /dev/null
+++ b/fluent-bit/lib/avro/src/value-hash.c
@@ -0,0 +1,294 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied. See the License for the specific language governing
+ * permissions and limitations under the License.
+ */
+
+#include <avro/platform.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "avro/allocation.h"
+#include "avro/data.h"
+#include "avro/errors.h"
+#include "avro/value.h"
+#include "avro_private.h"
+
+#define check_return(retval, call) \
+ do { \
+ int rval = call; \
+ if (rval != 0) { return (retval); } \
+ } while (0)
+
+/*
+ * We currently use MurmurHash3 [1], which is public domain, as our hash
+ * implementation.
+ *
+ * [1] https://code.google.com/p/smhasher/
+ */
+
+/* Our seed is the MurmurHash3 of the string "avro.value" */
+#define SEED 0xaf4c78df
+
+#define ROTL32(a,b) (((a) << ((b) & 0x1f)) | ((a) >> (32 - ((b) & 0x1f))))
+
+static inline uint32_t
+fmix(uint32_t h)
+{
+ h ^= h >> 16;
+ h *= 0x85ebca6b;
+ h ^= h >> 13;
+ h *= 0xc2b2ae35;
+ h ^= h >> 16;
+ return h;
+}
+
+static const uint32_t c1 = 0xcc9e2d51;
+static const uint32_t c2 = 0x1b873593;
+
+static inline uint32_t
+add_hash(uint32_t start, uint32_t current)
+{
+ current *= c1;
+ current = ROTL32(current, 15);
+ current *= c2;
+
+ start ^= current;
+ start = ROTL32(start, 13);
+ start = start * 5 + 0xe6546b64;
+
+ return start;
+}
+
+static inline uint32_t
+hash_buffer(uint32_t start, const void *src, size_t len)
+{
+ const uint8_t *data = (const uint8_t *) src;
+ const int nblocks = len / 4;
+
+ uint32_t h1 = start;
+
+ //----------
+ // body
+
+ const uint32_t *blocks = (const uint32_t *) (data + nblocks*4);
+ int i;
+
+ for (i = -nblocks; i != 0; i++) {
+ uint32_t k1 = blocks[i];
+
+ k1 *= c1;
+ k1 = ROTL32(k1,15);
+ k1 *= c2;
+
+ h1 ^= k1;
+ h1 = ROTL32(h1,13);
+ h1 = h1*5+0xe6546b64;
+ }
+
+ //----------
+ // tail
+
+ const uint8_t *tail = (const uint8_t *) (data + nblocks*4);
+
+ uint32_t k1 = 0;
+
+ switch (len & 3)
+ {
+ case 3: k1 ^= tail[2] << 16;
+ case 2: k1 ^= tail[1] << 8;
+ case 1: k1 ^= tail[0];
+ k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
+ };
+
+ //----------
+ // finalization
+
+ h1 ^= len;
+ return h1;
+}
+
+static uint32_t
+avro_value_hash_fast(avro_value_t *value, uint32_t start)
+{
+ avro_type_t type = avro_value_get_type(value);
+
+ switch (type) {
+ case AVRO_BOOLEAN:
+ {
+ int v;
+ check_return(0, avro_value_get_boolean(value, &v));
+ return add_hash(start, v);
+ }
+
+ case AVRO_BYTES:
+ {
+ const void *buf;
+ size_t size;
+ check_return(0, avro_value_get_bytes(value, &buf, &size));
+ return hash_buffer(start, buf, size);
+ }
+
+ case AVRO_DOUBLE:
+ {
+ union {
+ double d;
+ uint32_t u32[2];
+ } v;
+ check_return(0, avro_value_get_double(value, &v.d));
+ return add_hash(add_hash(start, v.u32[0]), v.u32[1]);
+ }
+
+ case AVRO_FLOAT:
+ {
+ union {
+ float f;
+ uint32_t u32;
+ } v;
+ check_return(0, avro_value_get_float(value, &v.f));
+ return add_hash(start, v.u32);
+ }
+
+ case AVRO_INT32:
+ {
+ int32_t v;
+ check_return(0, avro_value_get_int(value, &v));
+ return add_hash(start, v);
+ }
+
+ case AVRO_INT64:
+ {
+ union {
+ int64_t u64;
+ uint32_t u32[2];
+ } v;
+ check_return(0, avro_value_get_long(value, &v.u64));
+ return add_hash(add_hash(start, v.u32[0]), v.u32[1]);
+ }
+
+ case AVRO_NULL:
+ {
+ check_return(0, avro_value_get_null(value));
+ return add_hash(start, 0);
+ }
+
+ case AVRO_STRING:
+ {
+ const char *buf;
+ size_t size;
+ check_return(0, avro_value_get_string(value, &buf, &size));
+ return hash_buffer(start, buf, size);
+ }
+
+ case AVRO_ARRAY:
+ {
+ size_t count;
+ size_t i;
+ check_return(0, avro_value_get_size(value, &count));
+
+ for (i = 0; i < count; i++) {
+ avro_value_t child;
+ check_return(0, avro_value_get_by_index
+ (value, i, &child, NULL));
+ start = avro_value_hash_fast(&child, start);
+ }
+
+ start ^= count;
+ return start;
+ }
+
+ case AVRO_ENUM:
+ {
+ int v;
+ check_return(0, avro_value_get_enum(value, &v));
+ return add_hash(start, v);
+ }
+
+ case AVRO_FIXED:
+ {
+ const void *buf;
+ size_t size;
+ check_return(0, avro_value_get_fixed(value, &buf, &size));
+ return hash_buffer(start, buf, size);
+ }
+
+ case AVRO_MAP:
+ {
+ size_t count;
+ size_t i;
+ check_return(0, avro_value_get_size(value, &count));
+
+ /*
+ * The hash for a map must be built up without
+ * taking into account the order of the elements
+ */
+ uint32_t map_hash = 0;
+ for (i = 0; i < count; i++) {
+ avro_value_t child;
+ const char *key;
+ check_return(0, avro_value_get_by_index
+ (value, i, &child, &key));
+
+ uint32_t element = SEED;
+ element = hash_buffer(element, key, strlen(key));
+ element = avro_value_hash_fast(&child, element);
+ element = fmix(element);
+
+ map_hash ^= element;
+ }
+ map_hash ^= count;
+
+ return add_hash(start, map_hash);
+ }
+
+ case AVRO_RECORD:
+ {
+ size_t count;
+ size_t i;
+ check_return(0, avro_value_get_size(value, &count));
+
+ for (i = 0; i < count; i++) {
+ avro_value_t child;
+ check_return(0, avro_value_get_by_index
+ (value, i, &child, NULL));
+ start = avro_value_hash_fast(&child, start);
+ }
+
+ start ^= count;
+ return start;
+ }
+
+ case AVRO_UNION:
+ {
+ int disc;
+ avro_value_t branch;
+ check_return(0, avro_value_get_discriminant(value, &disc));
+ check_return(0, avro_value_get_current_branch(value, &branch));
+
+ start = add_hash(start, disc);
+ start = avro_value_hash_fast(&branch, start);
+ return start;
+ }
+
+ default:
+ return 0;
+ }
+}
+
+uint32_t
+avro_value_hash(avro_value_t *value)
+{
+ uint32_t hash = avro_value_hash_fast(value, SEED);
+ return (hash == 0)? hash: fmix(hash);
+}