diff options
Diffstat (limited to 'lib/generic')
-rw-r--r-- | lib/generic/README.rst | 60 | ||||
-rw-r--r-- | lib/generic/array.h | 166 | ||||
-rw-r--r-- | lib/generic/lru.c | 236 | ||||
-rw-r--r-- | lib/generic/lru.h | 249 | ||||
-rw-r--r-- | lib/generic/map.c | 354 | ||||
-rw-r--r-- | lib/generic/map.h | 115 | ||||
-rw-r--r-- | lib/generic/pack.h | 249 | ||||
-rw-r--r-- | lib/generic/queue.c | 124 | ||||
-rw-r--r-- | lib/generic/queue.h | 260 | ||||
-rw-r--r-- | lib/generic/set.h | 105 | ||||
-rw-r--r-- | lib/generic/trie.c | 912 | ||||
-rw-r--r-- | lib/generic/trie.h | 150 |
12 files changed, 2980 insertions, 0 deletions
diff --git a/lib/generic/README.rst b/lib/generic/README.rst new file mode 100644 index 0000000..7adff86 --- /dev/null +++ b/lib/generic/README.rst @@ -0,0 +1,60 @@ +Generics library +---------------- + +This small collection of "generics" was born out of frustration that I couldn't find no +such thing for C. It's either bloated, has poor interface, null-checking is absent or +doesn't allow custom allocation scheme. BSD-licensed (or compatible) code is allowed here, +as long as it comes with a test case in `tests/test_generics.c`. + +* array_ - a set of simple macros to make working with dynamic arrays easier. +* queue_ - a FIFO + LIFO queue. +* map_ - a `Crit-bit tree`_ key-value map implementation (public domain) that comes with tests. +* set_ - set abstraction implemented on top of ``map`` (unused now). +* pack_ - length-prefixed list of objects (i.e. array-list). +* lru_ - LRU-like hash table +* trie_ - a trie-based key-value map, taken from knot-dns + +array +~~~~~ + +.. doxygenfile:: array.h + :project: libkres + +queue +~~~~~ + +.. doxygenfile:: queue.h + :project: libkres + +map +~~~ + +.. doxygenfile:: map.h + :project: libkres + +set +~~~ + +.. doxygenfile:: set.h + :project: libkres + +pack +~~~~ + +.. doxygenfile:: pack.h + :project: libkres + +lru +~~~ + +.. doxygenfile:: lru.h + :project: libkres + +trie +~~~~ + +.. doxygenfile:: trie.h + :project: libkres + + +.. _`Crit-bit tree`: https://cr.yp.to/critbit.html diff --git a/lib/generic/array.h b/lib/generic/array.h new file mode 100644 index 0000000..ece4dd1 --- /dev/null +++ b/lib/generic/array.h @@ -0,0 +1,166 @@ +/* Copyright (C) 2015-2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +/** + * + * @file array.h + * @brief A set of simple macros to make working with dynamic arrays easier. + * + * @note The C has no generics, so it is implemented mostly using macros. + * Be aware of that, as direct usage of the macros in the evaluating macros + * may lead to different expectations: + * + * @code{.c} + * MIN(array_push(arr, val), other) + * @endcode + * + * May evaluate the code twice, leading to unexpected behaviour. + * This is a price to pay for the absence of proper generics. + * + * # Example usage: + * + * @code{.c} + * array_t(const char*) arr; + * array_init(arr); + * + * // Reserve memory in advance + * if (array_reserve(arr, 2) < 0) { + * return ENOMEM; + * } + * + * // Already reserved, cannot fail + * array_push(arr, "princess"); + * array_push(arr, "leia"); + * + * // Not reserved, may fail + * if (array_push(arr, "han") < 0) { + * return ENOMEM; + * } + * + * // It does not hide what it really is + * for (size_t i = 0; i < arr.len; ++i) { + * printf("%s\n", arr.at[i]); + * } + * + * // Random delete + * array_del(arr, 0); + * @endcode + * \addtogroup generics + * @{ + */ + +#pragma once +#include <stdlib.h> + +/** Simplified Qt containers growth strategy. */ +static inline size_t array_next_count(size_t want) +{ + if (want < 2048) { + return (want < 20) ? want + 4 : want * 2; + } else { + return want + 2048; + } +} + +/** @internal Incremental memory reservation */ +static inline int array_std_reserve(void *baton, char **mem, size_t elm_size, size_t want, size_t *have) +{ + if (*have >= want) { + return 0; + } + /* Simplified Qt containers growth strategy */ + size_t next_size = array_next_count(want); + void *mem_new = realloc(*mem, next_size * elm_size); + if (mem_new != NULL) { + *mem = mem_new; + *have = next_size; + return 0; + } + return -1; +} + +/** @internal Wrapper for stdlib free. */ +static inline void array_std_free(void *baton, void *p) +{ + free(p); +} + +/** Declare an array structure. */ +#define array_t(type) struct {type * at; size_t len; size_t cap; } + +/** Zero-initialize the array. */ +#define array_init(array) ((array).at = NULL, (array).len = (array).cap = 0) + +/** Free and zero-initialize the array (plain malloc/free). */ +#define array_clear(array) \ + array_clear_mm(array, array_std_free, NULL) + +/** Make the array empty and free pointed-to memory. + * Mempool usage: pass mm_free and a knot_mm_t* . */ +#define array_clear_mm(array, free, baton) \ + (free)((baton), (array).at), array_init(array) + +/** Reserve capacity for at least n elements. + * @return 0 if success, <0 on failure */ +#define array_reserve(array, n) \ + array_reserve_mm(array, n, array_std_reserve, NULL) + +/** Reserve capacity for at least n elements. + * Mempool usage: pass kr_memreserve and a knot_mm_t* . + * @return 0 if success, <0 on failure */ +#define array_reserve_mm(array, n, reserve, baton) \ + (reserve)((baton), (char **) &(array).at, sizeof((array).at[0]), (n), &(array).cap) + +/** + * Push value at the end of the array, resize it if necessary. + * Mempool usage: pass kr_memreserve and a knot_mm_t* . + * @note May fail if the capacity is not reserved. + * @return element index on success, <0 on failure + */ +#define array_push_mm(array, val, reserve, baton) \ + (int)((array).len < (array).cap ? ((array).at[(array).len] = val, (array).len++) \ + : (array_reserve_mm(array, ((array).cap + 1), reserve, baton) < 0 ? -1 \ + : ((array).at[(array).len] = val, (array).len++))) + +/** + * Push value at the end of the array, resize it if necessary (plain malloc/free). + * @note May fail if the capacity is not reserved. + * @return element index on success, <0 on failure + */ +#define array_push(array, val) \ + array_push_mm(array, val, array_std_reserve, NULL) + +/** + * Pop value from the end of the array. + */ +#define array_pop(array) \ + (array).len -= 1 + +/** + * Remove value at given index. + * @return 0 on success, <0 on failure + */ +#define array_del(array, i) \ + (int)((i) < (array).len ? ((array).len -= 1,(array).at[i] = (array).at[(array).len], 0) : -1) + +/** + * Return last element of the array. + * @warning Undefined if the array is empty. + */ +#define array_tail(array) \ + (array).at[(array).len - 1] + +/** @} */ diff --git a/lib/generic/lru.c b/lib/generic/lru.c new file mode 100644 index 0000000..12bba4e --- /dev/null +++ b/lib/generic/lru.c @@ -0,0 +1,236 @@ +/* Copyright (C) 2016-2018 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +#include "lib/generic/lru.h" +#include "contrib/murmurhash3/murmurhash3.h" + +typedef struct lru_group lru_group_t; + +struct lru_item { + uint16_t key_len, val_len; /**< Two bytes should be enough for our purposes. */ + char data[]; + /**< Place for both key and value. + * + * We use "char" to satisfy the C99+ aliasing rules. + * See C99 section 6.5 Expressions, paragraph 7. + * Any type can be accessed through char-pointer, + * so we can use a common struct definition + * for all types being held. + */ +}; + +/** @internal Compute offset of value in struct lru_item. */ +static uint val_offset(uint key_len) +{ + uint key_end = offsetof(struct lru_item, data) + key_len; + // align it to the closest multiple of four + return round_power(key_end, 2); +} + +/** @internal Return pointer to value in an item. */ +static void * item_val(struct lru_item *it) +{ + return it->data + val_offset(it->key_len) - offsetof(struct lru_item, data); +} + +/** @internal Compute the size of an item. ATM we don't align/pad the end of it. */ +static uint item_size(uint key_len, uint val_len) +{ + return val_offset(key_len) + val_len; +} + +/** @internal Free each item. */ +KR_EXPORT void lru_free_items_impl(struct lru *lru) +{ + assert(lru); + for (size_t i = 0; i < (1 << (size_t)lru->log_groups); ++i) { + lru_group_t *g = &lru->groups[i]; + for (int j = 0; j < LRU_ASSOC; ++j) + mm_free(lru->mm, g->items[j]); + } +} + +/** @internal See lru_apply. */ +KR_EXPORT void lru_apply_impl(struct lru *lru, lru_apply_fun f, void *baton) +{ + bool ok = lru && f; + if (!ok) { + assert(false); + return; + } + for (size_t i = 0; i < (1 << (size_t)lru->log_groups); ++i) { + lru_group_t *g = &lru->groups[i]; + for (uint j = 0; j < LRU_ASSOC; ++j) { + struct lru_item *it = g->items[j]; + if (!it) + continue; + enum lru_apply_do ret = + f(it->data, it->key_len, item_val(it), baton); + switch(ret) { + case LRU_APPLY_DO_EVICT: // evict + mm_free(lru->mm, it); + g->items[j] = NULL; + g->counts[j] = 0; + g->hashes[j] = 0; + break; + default: + assert(ret == LRU_APPLY_DO_NOTHING); + } + } + } +} + +/** @internal See lru_create. */ +KR_EXPORT struct lru * lru_create_impl(uint max_slots, knot_mm_t *mm_array, knot_mm_t *mm) +{ + assert(max_slots); + if (!max_slots) + return NULL; + // let lru->log_groups = ceil(log2(max_slots / (float) assoc)) + // without trying for efficiency + uint group_count = (max_slots - 1) / LRU_ASSOC + 1; + uint log_groups = 0; + for (uint s = group_count - 1; s; s /= 2) + ++log_groups; + group_count = 1 << log_groups; + assert(max_slots <= group_count * LRU_ASSOC && group_count * LRU_ASSOC < 2 * max_slots); + + size_t size = offsetof(struct lru, groups[group_count]); + struct lru *lru = mm_alloc(mm_array, size); + if (unlikely(lru == NULL)) + return NULL; + *lru = (struct lru){ + .mm = mm, + .mm_array = mm_array, + .log_groups = log_groups, + }; + // zeros are a good init + memset(lru->groups, 0, size - offsetof(struct lru, groups)); + return lru; +} + +/** @internal Decrement all counters within a group. */ +static void group_dec_counts(lru_group_t *g) { + g->counts[LRU_TRACKED] = LRU_TRACKED; + for (uint i = 0; i < LRU_TRACKED + 1; ++i) + if (likely(g->counts[i])) + --g->counts[i]; +} + +/** @internal Increment a counter within a group. */ +static void group_inc_count(lru_group_t *g, int i) { + if (likely(++(g->counts[i]))) + return; + g->counts[i] = -1; + // We could've decreased or halved all of them, but let's keep the max. +} + +/** @internal Implementation of both getting and insertion. + * Note: val_len is only meaningful if do_insert. + * *is_new is only meaningful when return value isn't NULL, contains + * true when returned lru entry has been allocated right now + * if return value is NULL, *is_new remains untouched. + */ +KR_EXPORT void * lru_get_impl(struct lru *lru, const char *key, uint key_len, + uint val_len, bool do_insert, bool *is_new) +{ + bool ok = lru && (key || !key_len) && key_len <= UINT16_MAX + && (!do_insert || val_len <= UINT16_MAX); + if (!ok) { + assert(false); + return NULL; // reasonable fallback when not debugging + } + bool is_new_entry = false; + // find the right group + uint32_t khash = hash(key, key_len); + uint16_t khash_top = khash >> 16; + lru_group_t *g = &lru->groups[khash & ((1 << lru->log_groups) - 1)]; + struct lru_item *it = NULL; + uint i; + // scan the *stored* elements in the group + for (i = 0; i < LRU_ASSOC; ++i) { + if (g->hashes[i] == khash_top) { + it = g->items[i]; + if (likely(it && it->key_len == key_len + && (key_len == 0 || memcmp(it->data, key, key_len) == 0))) { + /* Found a key, but trying to insert a value larger than available + * space in the allocated slot, so the entry must be resized to fit. */ + if (unlikely(do_insert && val_len > it->val_len)) { + goto insert; + } else { + goto found; // to reduce huge nesting depth + } + } + } + } + // key not found; first try an empty/counted-out place to insert + if (do_insert) + for (i = 0; i < LRU_ASSOC; ++i) + if (g->items[i] == NULL || g->counts[i] == 0) + goto insert; + // check if we track key's count at least + for (i = LRU_ASSOC; i < LRU_TRACKED; ++i) { + if (g->hashes[i] == khash_top) { + group_inc_count(g, i); + if (!do_insert) + return NULL; + // check if we trumped some stored key + for (uint j = 0; j < LRU_ASSOC; ++j) + if (unlikely(g->counts[i] > g->counts[j])) { + // evict key j, i.e. swap with i + --g->counts[i]; // we increment it below + SWAP(g->counts[i], g->counts[j]); + SWAP(g->hashes[i], g->hashes[j]); + i = j; + goto insert; + } + return NULL; + } + } + // not found at all: decrement all counts but only on every LRU_TRACKED occasion + if (g->counts[LRU_TRACKED]) + --g->counts[LRU_TRACKED]; + else + group_dec_counts(g); + return NULL; +insert: // insert into position i (incl. key) + assert(i < LRU_ASSOC); + g->hashes[i] = khash_top; + it = g->items[i]; + uint new_size = item_size(key_len, val_len); + if (it == NULL || new_size != item_size(it->key_len, it->val_len)) { + // (re)allocate + mm_free(lru->mm, it); + it = g->items[i] = mm_alloc(lru->mm, new_size); + if (it == NULL) + return NULL; + } + it->key_len = key_len; + it->val_len = val_len; + if (key_len > 0) { + memcpy(it->data, key, key_len); + } + memset(item_val(it), 0, val_len); // clear the value + is_new_entry = true; +found: // key and hash OK on g->items[i]; now update stamps + assert(i < LRU_ASSOC); + group_inc_count(g, i); + if (is_new) { + *is_new = is_new_entry; + } + return item_val(g->items[i]); +} + diff --git a/lib/generic/lru.h b/lib/generic/lru.h new file mode 100644 index 0000000..b5c9bcd --- /dev/null +++ b/lib/generic/lru.h @@ -0,0 +1,249 @@ +/* Copyright (C) 2016-2018 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ +/** + * @file lru.h + * @brief A lossy cache. + * + * @note The implementation tries to keep frequent keys and avoid others, + * even if "used recently", so it may refuse to store it on lru_get_new(). + * It uses hashing to split the problem pseudo-randomly into smaller groups, + * and within each it tries to approximate relative usage counts of several + * most frequent keys/hashes. This tracking is done for *more* keys than + * those that are actually stored. + * + * Example usage: + * @code{.c} + * // Define new LRU type + * typedef lru_t(int) lru_int_t; + * + * // Create LRU + * lru_int_t *lru; + * lru_create(&lru, 5, NULL, NULL); + * + * // Insert some values + * int *pi = lru_get_new(lru, "luke", strlen("luke"), NULL); + * if (pi) + * *pi = 42; + * pi = lru_get_new(lru, "leia", strlen("leia"), NULL); + * if (pi) + * *pi = 24; + * + * // Retrieve values + * int *ret = lru_get_try(lru, "luke", strlen("luke"), NULL); + * if (!ret) printf("luke dropped out!\n"); + * else printf("luke's number is %d\n", *ret); + * + * char *enemies[] = {"goro", "raiden", "subzero", "scorpion"}; + * for (int i = 0; i < 4; ++i) { + * int *val = lru_get_new(lru, enemies[i], strlen(enemies[i]), NULL); + * if (val) + * *val = i; + * } + * + * // We're done + * lru_free(lru); + * @endcode + * + * \addtogroup generics + * @{ + */ + +#pragma once + +#include <assert.h> +#include <stdint.h> +#include <stddef.h> + +#include "contrib/ucw/lib.h" +#include "lib/utils.h" +#include "libknot/mm_ctx.h" + +/* ================================ Interface ================================ */ + +/** @brief The type for LRU, parametrized by value type. */ +#define lru_t(type) \ + union { \ + type *pdata_t; /* only the *type* information is used */ \ + struct lru lru; \ + } + +/** + * @brief Allocate and initialize an LRU with default associativity. + * + * The real limit on the number of slots can be a bit larger but less than double. + * + * @param ptable pointer to a pointer to the LRU + * @param max_slots number of slots + * @param mm_ctx_array memory context to use for the huge array, NULL for default + * @param mm_ctx memory context to use for individual key-value pairs, NULL for default + * + * @note The pointers to memory contexts need to remain valid + * during the whole life of the structure (or be NULL). + */ +#define lru_create(ptable, max_slots, mm_ctx_array, mm_ctx) do { \ + (void)(((__typeof__((*(ptable))->pdata_t))0) == (void *)0); /* typecheck lru_t */ \ + *(ptable) = (__typeof__(*(ptable))) \ + lru_create_impl((max_slots), (mm_ctx_array), (mm_ctx)); \ + } while (false) + +/** @brief Free an LRU created by lru_create (it can be NULL). */ +#define lru_free(table) \ + lru_free_impl(&(table)->lru) + +/** @brief Reset an LRU to the empty state (but preserve any settings). */ +#define lru_reset(table) \ + lru_reset_impl(&(table)->lru) + +/** + * @brief Find key in the LRU and return pointer to the corresponding value. + * + * @param table pointer to LRU + * @param key_ lookup key + * @param len_ key length + * @return pointer to data or NULL if not found + */ +#define lru_get_try(table, key_, len_) \ + (__typeof__((table)->pdata_t)) \ + lru_get_impl(&(table)->lru, (key_), (len_), -1, false, NULL) + +/** + * @brief Return pointer to value, inserting if needed (zeroed). + * + * @param table pointer to LRU + * @param key_ lookup key + * @param len_ key lengthkeys + * @param is_new pointer to bool to store result of operation + * (true if entry is newly added, false otherwise; can be NULL). + * @return pointer to data or NULL (can be even if memory could be allocated!) + */ +#define lru_get_new(table, key_, len_, is_new) \ + (__typeof__((table)->pdata_t)) \ + lru_get_impl(&(table)->lru, (key_), (len_), \ + sizeof(*(table)->pdata_t), true, is_new) + +/** + * @brief Apply a function to every item in LRU. + * + * @param table pointer to LRU + * @param function enum lru_apply_do (*function)(const char *key, uint len, val_type *val, void *baton) + * See enum lru_apply_do for the return type meanings. + * @param baton extra pointer passed to each function invocation + */ +#define lru_apply(table, function, baton) do { \ + lru_apply_fun_g(fun_dummy, __typeof__(*(table)->pdata_t)) = 0; \ + (void)(fun_dummy == (function)); /* produce a warning with incompatible function type */ \ + lru_apply_impl(&(table)->lru, (lru_apply_fun)(function), (baton)); \ + } while (false) + +/** @brief Possible actions to do with an element. */ +enum lru_apply_do { + LRU_APPLY_DO_NOTHING, + LRU_APPLY_DO_EVICT, + /* maybe more in future*/ +}; + +/** + * @brief Return the real capacity - maximum number of keys holdable within. + * + * @param table pointer to LRU + */ +#define lru_capacity(table) lru_capacity_impl(&(table)->lru) + + +/** @brief Round the value up to a multiple of (1 << power). */ +static inline uint round_power(uint size, uint power) +{ + uint res = ((size - 1) & ~((1 << power) - 1)) + (1 << power); + assert(__builtin_ctz(res) >= power); + assert(size <= res && res < size + (1 << power)); + return res; +} + + +/* ======================== Inlined part of implementation ======================== */ +/** @cond internal */ + +#define lru_apply_fun_g(name, val_type) \ + enum lru_apply_do (*(name))(const char *key, uint len, val_type *val, void *baton) +typedef lru_apply_fun_g(lru_apply_fun, void); + +#if __GNUC__ >= 4 + #define CACHE_ALIGNED __attribute__((aligned(64))) +#else + #define CACHE_ALIGNED +#endif + +struct lru; +void lru_free_items_impl(struct lru *lru); +struct lru * lru_create_impl(uint max_slots, knot_mm_t *mm_array, knot_mm_t *mm); +void * lru_get_impl(struct lru *lru, const char *key, uint key_len, + uint val_len, bool do_insert, bool *is_new); +void lru_apply_impl(struct lru *lru, lru_apply_fun f, void *baton); + +struct lru_item; + +#if SIZE_MAX > (1 << 32) + /** @internal The number of keys stored within each group. */ + #define LRU_ASSOC 3 +#else + #define LRU_ASSOC 4 +#endif +/** @internal The number of hashes tracked within each group: 10-1 or 12-1. */ +#define LRU_TRACKED ((64 - sizeof(size_t) * LRU_ASSOC) / 4 - 1) + +struct lru_group { + uint16_t counts[LRU_TRACKED+1]; /*!< Occurrence counters; the last one is special. */ + uint16_t hashes[LRU_TRACKED+1]; /*!< Top halves of hashes; the last one is unused. */ + struct lru_item *items[LRU_ASSOC]; /*!< The full items. */ +} CACHE_ALIGNED; + +/* The sizes are chosen so lru_group just fits into a single x86 cache line. */ +static_assert(64 == sizeof(struct lru_group) + && 64 == LRU_ASSOC * sizeof(void*) + (LRU_TRACKED+1) * 4, + "bad sizing for your sizeof(void*)"); + +struct lru { + struct knot_mm *mm, /**< Memory context to use for keys. */ + *mm_array; /**< Memory context to use for this structure itself. */ + uint log_groups; /**< Logarithm of the number of LRU groups. */ + struct lru_group groups[] CACHE_ALIGNED; /**< The groups of items. */ +}; + +/** @internal See lru_free. */ +static inline void lru_free_impl(struct lru *lru) +{ + if (!lru) + return; + lru_free_items_impl(lru); + mm_free(lru->mm_array, lru); +} + +/** @internal See lru_reset. */ +static inline void lru_reset_impl(struct lru *lru) +{ + lru_free_items_impl(lru); + memset(lru->groups, 0, sizeof(lru->groups[0]) * (1 << lru->log_groups)); +} + +/** @internal See lru_capacity. */ +static inline uint lru_capacity_impl(struct lru *lru) +{ + assert(lru); + return (1 << lru->log_groups) * LRU_ASSOC; +} + +/** @endcond */ +/** @} (addtogroup generics) */ diff --git a/lib/generic/map.c b/lib/generic/map.c new file mode 100644 index 0000000..3e06520 --- /dev/null +++ b/lib/generic/map.c @@ -0,0 +1,354 @@ +/* + * critbit89 - A crit-bit tree implementation for strings in C89 + * Written by Jonas Gehring <jonas@jgehring.net> + * Implemented key-value storing by Marek Vavrusa <marek.vavrusa@nic.cz> + * + * The code makes the assumption that malloc returns pointers aligned at at + * least a two-byte boundary. Since the C standard requires that malloc return + * pointers that can store any type, there are no commonly-used toolchains for + * which this assumption is false. + * + * See https://github.com/agl/critbit/blob/master/critbit.pdf for reference. + */ + +#include <errno.h> +#include <string.h> +#include <stdlib.h> + +#include "map.h" +#include "lib/utils.h" + + /* Exports */ +#if defined _WIN32 || defined __CYGWIN__ + #define EXPORT __attribute__ ((dllexport)) +#else + #define EXPORT __attribute__ ((visibility ("default"))) +#endif + +#ifdef _MSC_VER /* MSVC */ + typedef unsigned __int8 uint8_t; + typedef unsigned __int32 uint32_t; + #ifdef _WIN64 + typedef signed __int64 intptr_t; + #else + typedef _W64 signed int intptr_t; + #endif +#else /* Not MSVC */ + #include <stdint.h> +#endif + +typedef struct { + void* value; + uint8_t key[]; +} cb_data_t; + +typedef struct { + void *child[2]; + uint32_t byte; + uint8_t otherbits; +} cb_node_t; + +/* Return true if ptr is internal node. */ +static inline int ref_is_internal(const uint8_t *p) +{ + return 1 & (intptr_t)p; +} + +/* Get internal node. */ +static inline cb_node_t *ref_get_internal(uint8_t *p) +{ + return (cb_node_t *)(p - 1); +} + +/* Static helper functions */ +static void cbt_traverse_delete(map_t *map, void *top) +{ + uint8_t *p = top; + if (ref_is_internal(p)) { + cb_node_t *q = ref_get_internal(p); + cbt_traverse_delete(map, q->child[0]); + cbt_traverse_delete(map, q->child[1]); + mm_free(map->pool, q); + } else { + mm_free(map->pool, p); + } +} + +static int cbt_traverse_prefixed(void *top, + int (*callback)(const char *, void *, void *), void *baton) +{ + uint8_t *p = top; + cb_data_t *x = (cb_data_t *)top; + + if (ref_is_internal(p)) { + cb_node_t *q = ref_get_internal(p); + int ret = 0; + + ret = cbt_traverse_prefixed(q->child[0], callback, baton); + if (ret != 0) { + return ret; + } + ret = cbt_traverse_prefixed(q->child[1], callback, baton); + if (ret != 0) { + return ret; + } + return 0; + } + + return (callback)((const char *)x->key, x->value, baton); +} + +static cb_data_t *cbt_make_data(map_t *map, const uint8_t *str, size_t len, void *value) +{ + cb_data_t *x = mm_alloc(map->pool, sizeof(cb_data_t) + len); + if (x != NULL) { + x->value = value; + memcpy(x->key, str, len); + } + return x; +} + +/*! Like map_contains, but also set the value, if passed and found. */ +static int cbt_get(map_t *map, const char *str, void **value) +{ + const uint8_t *ubytes = (void *)str; + const size_t ulen = strlen(str); + uint8_t *p = map->root; + cb_data_t *x = NULL; + + if (p == NULL) { + return 0; + } + + while (ref_is_internal(p)) { + cb_node_t *q = ref_get_internal(p); + uint8_t c = 0; + int direction; + + if (q->byte < ulen) { + c = ubytes[q->byte]; + } + direction = (1 + (q->otherbits | c)) >> 8; + + p = q->child[direction]; + } + + x = (cb_data_t *)p; + if (strcmp(str, (const char *)x->key) == 0) { + if (value != NULL) { + *value = x->value; + } + return 1; + } + + return 0; +} + +/*! Returns non-zero if map contains str */ +EXPORT int map_contains(map_t *map, const char *str) +{ + return cbt_get(map, str, NULL); +} + +EXPORT void *map_get(map_t *map, const char *str) +{ + void *v = NULL; + cbt_get(map, str, &v); + return v; +} + +EXPORT int map_set(map_t *map, const char *str, void *val) +{ + const uint8_t *const ubytes = (void *)str; + const size_t ulen = strlen(str); + uint8_t *p = map->root; + uint8_t c = 0, *x = NULL; + uint32_t newbyte = 0; + uint32_t newotherbits = 0; + int direction = 0, newdirection = 0; + cb_node_t *newnode = NULL; + cb_data_t *data = NULL; + void **wherep = NULL; + + if (p == NULL) { + map->root = cbt_make_data(map, (const uint8_t *)str, ulen + 1, val); + if (map->root == NULL) { + return ENOMEM; + } + return 0; + } + + while (ref_is_internal(p)) { + cb_node_t *q = ref_get_internal(p); + c = 0; + if (q->byte < ulen) { + c = ubytes[q->byte]; + } + direction = (1 + (q->otherbits | c)) >> 8; + + p = q->child[direction]; + } + + data = (cb_data_t *)p; + for (newbyte = 0; newbyte < ulen; ++newbyte) { + if (data->key[newbyte] != ubytes[newbyte]) { + newotherbits = data->key[newbyte] ^ ubytes[newbyte]; + goto different_byte_found; + } + } + + if (data->key[newbyte] != 0) { + newotherbits = data->key[newbyte]; + goto different_byte_found; + } + data->value = val; + return 1; + +different_byte_found: + newotherbits |= newotherbits >> 1; + newotherbits |= newotherbits >> 2; + newotherbits |= newotherbits >> 4; + newotherbits = (newotherbits & ~(newotherbits >> 1)) ^ 255; + c = data->key[newbyte]; + newdirection = (1 + (newotherbits | c)) >> 8; + + newnode = mm_alloc(map->pool, sizeof(cb_node_t)); + if (newnode == NULL) { + return ENOMEM; + } + + x = (uint8_t *)cbt_make_data(map, ubytes, ulen + 1, val); + if (x == NULL) { + mm_free(map->pool, newnode); + return ENOMEM; + } + + newnode->byte = newbyte; + newnode->otherbits = newotherbits; + newnode->child[1 - newdirection] = x; + + /* Insert into map */ + wherep = &map->root; + for (;;) { + cb_node_t *q; + p = *wherep; + if (!ref_is_internal(p)) { + break; + } + + q = ref_get_internal(p); + if (q->byte > newbyte) { + break; + } + if (q->byte == newbyte && q->otherbits > newotherbits) { + break; + } + + c = 0; + if (q->byte < ulen) { + c = ubytes[q->byte]; + } + direction = (1 + (q->otherbits | c)) >> 8; + wherep = q->child + direction; + } + + newnode->child[newdirection] = *wherep; + *wherep = (void *)(1 + (char *)newnode); + return 0; +} + +/*! Deletes str from the map, returns 0 on success */ +EXPORT int map_del(map_t *map, const char *str) +{ + const uint8_t *ubytes = (void *)str; + const size_t ulen = strlen(str); + uint8_t *p = map->root; + void **wherep = NULL, **whereq = NULL; + cb_node_t *q = NULL; + cb_data_t *data = NULL; + int direction = 0; + + if (map->root == NULL) { + return 1; + } + wherep = &map->root; + + while (ref_is_internal(p)) { + uint8_t c = 0; + whereq = wherep; + q = ref_get_internal(p); + + if (q->byte < ulen) { + c = ubytes[q->byte]; + } + direction = (1 + (q->otherbits | c)) >> 8; + wherep = q->child + direction; + p = *wherep; + } + + data = (cb_data_t *)p; + if (strcmp(str, (const char *)data->key) != 0) { + return 1; + } + mm_free(map->pool, p); + + if (!whereq) { + map->root = NULL; + return 0; + } + + *whereq = q->child[1 - direction]; + mm_free(map->pool, q); + return 0; +} + +/*! Clears the given map */ +EXPORT void map_clear(map_t *map) +{ + if (map->root) { + cbt_traverse_delete(map, map->root); + } + map->root = NULL; +} + +/*! Calls callback for all strings in map with the given prefix */ +EXPORT int map_walk_prefixed(map_t *map, const char *prefix, + int (*callback)(const char *, void *, void *), void *baton) +{ + if (!map) { + return 0; + } + + const uint8_t *ubytes = (void *)prefix; + const size_t ulen = strlen(prefix); + uint8_t *p = map->root; + uint8_t *top = p; + cb_data_t *data = NULL; + + if (p == NULL) { + return 0; + } + + while (ref_is_internal(p)) { + cb_node_t *q = ref_get_internal(p); + uint8_t c = 0; + int direction; + + if (q->byte < ulen) { + c = ubytes[q->byte]; + } + direction = (1 + (q->otherbits | c)) >> 8; + + p = q->child[direction]; + if (q->byte < ulen) { + top = p; + } + } + + data = (cb_data_t *)p; + if (strlen((const char *)data->key) < ulen || memcmp(data->key, prefix, ulen) != 0) { + return 0; /* No strings match */ + } + + return cbt_traverse_prefixed(top, callback, baton); +} diff --git a/lib/generic/map.h b/lib/generic/map.h new file mode 100644 index 0000000..73ce4c0 --- /dev/null +++ b/lib/generic/map.h @@ -0,0 +1,115 @@ +/* + * critbit89 - A crit-bit map implementation for strings in C89 + * Written by Jonas Gehring <jonas@jgehring.net> + */ + +/** + * @file map.h + * @brief A Crit-bit tree key-value map implementation. + * + * @warning If the user provides a custom allocator, it must return addresses aligned to 2B boundary. + * + * # Example usage: + * + * @code{.c} + * map_t map = map_make(NULL); + * + * // Custom allocator (optional) + * map.malloc = &mymalloc; + * map.baton = &mymalloc_context; + * + * // Insert k-v pairs + * int values = { 42, 53, 64 }; + * if (map_set(&map, "princess", &values[0]) != 0 || + * map_set(&map, "prince", &values[1]) != 0 || + * map_set(&map, "leia", &values[2]) != 0) { + * fail(); + * } + * + * // Test membership + * if (map_contains(&map, "leia")) { + * success(); + * } + * + * // Prefix search + * int i = 0; + * int count(const char *k, void *v, void *ext) { (*(int *)ext)++; return 0; } + * if (map_walk_prefixed(map, "princ", count, &i) == 0) { + * printf("%d matches\n", i); + * } + * + * // Delete + * if (map_del(&map, "badkey") != 0) { + * fail(); // No such key + * } + * + * // Clear the map + * map_clear(&map); + * @endcode + * + * \addtogroup generics + * @{ + */ + +#pragma once + +#include <stddef.h> + +#ifdef __cplusplus +extern "C" { +#endif + +struct knot_mm; /* avoid the unnecessary include */ + +/** Main data structure */ +typedef struct { + void *root; + struct knot_mm *pool; +} map_t; + +/** Creates an new empty critbit map. Pass NULL for malloc+free. */ +static inline map_t map_make(struct knot_mm *pool) +{ + return (map_t){ .root = NULL, .pool = pool }; +} + +/** Returns non-zero if map contains str */ +int map_contains(map_t *map, const char *str); + +/** Returns value if map contains str. Note: NULL may mean two different things. */ +void *map_get(map_t *map, const char *str); + +/** Inserts str into map. Returns 0 if new, 1 if replaced, or ENOMEM. */ +int map_set(map_t *map, const char *str, void *val); + +/** Deletes str from the map, returns 0 on suceess */ +int map_del(map_t *map, const char *str); + +/** Clears the given map */ +void map_clear(map_t *map); + +/** + * Calls callback for all strings in map + * See @fn map_walk_prefixed() for documentation on parameters. + */ +#define map_walk(map, callback, baton) \ + map_walk_prefixed((map), "", (callback), (baton)) + +/** + * Calls callback for all strings in map with the given prefix. + * Returns value immediately if a callback returns nonzero. + * + * @param map + * @param prefix required string prefix (empty => all strings) + * @param callback callback parameters are (key, value, baton) + * @param baton passed uservalue + */ +int map_walk_prefixed(map_t *map, const char *prefix, + int (*callback)(const char *, void *, void *), void *baton); + + +#ifdef __cplusplus +} +#endif + +/** @} */ diff --git a/lib/generic/pack.h b/lib/generic/pack.h new file mode 100644 index 0000000..dc7a975 --- /dev/null +++ b/lib/generic/pack.h @@ -0,0 +1,249 @@ +/* Copyright (C) 2015-2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +/** + * @file pack.h + * @brief A length-prefixed list of objects, also an array list. + * + * Each object is prefixed by item length, unlike array this structure + * permits variable-length data. It is also equivallent to forward-only list + * backed by an array. + * + * @note Maximum object size is 2^16 bytes, see ::pack_objlen_t + * @TODO If some mistake happens somewhere, the access may end up in an infinite loop. + * (equality comparison on pointers) + * + * # Example usage: + * + * @code{.c} + * pack_t pack; + * pack_init(pack); + * + * // Reserve 2 objects, 6 bytes total + * pack_reserve(pack, 2, 4 + 2); + * + * // Push 2 objects + * pack_obj_push(pack, U8("jedi"), 4) + * pack_obj_push(pack, U8("\xbe\xef"), 2); + * + * // Iterate length-value pairs + * uint8_t *it = pack_head(pack); + * while (it != pack_tail(pack)) { + * uint8_t *val = pack_obj_val(it); + * it = pack_obj_next(it); + * } + * + * // Remove object + * pack_obj_del(pack, U8("jedi"), 4); + * + * pack_clear(pack); + * @endcode + * + * \addtogroup generics + * @{ + */ + +#pragma once + +#include <stdint.h> +#include <string.h> +#include "array.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** Packed object length type. */ +typedef uint16_t pack_objlen_t; + +/** Pack is defined as an array of bytes */ +typedef array_t(uint8_t) pack_t; + +/** Zero-initialize the pack. */ +#define pack_init(pack) \ + array_init(pack) + +/** Make the pack empty and free pointed-to memory (plain malloc/free). */ +#define pack_clear(pack) \ + array_clear(pack) + +/** Make the pack empty and free pointed-to memory. + * Mempool usage: pass mm_free and a knot_mm_t* . */ +#define pack_clear_mm(pack, free, baton) \ + array_clear_mm((pack), (free), (baton)) + +/** Reserve space for *additional* objects in the pack (plain malloc/free). + * @return 0 if success, <0 on failure */ +#define pack_reserve(pack, objs_count, objs_len) \ + pack_reserve_mm((pack), (objs_count), (objs_len), array_std_reserve, NULL) + +/** Reserve space for *additional* objects in the pack. + * Mempool usage: pass kr_memreserve and a knot_mm_t* . + * @return 0 if success, <0 on failure */ +#define pack_reserve_mm(pack, objs_count, objs_len, reserve, baton) \ + array_reserve_mm((pack), (pack).len + (sizeof(pack_objlen_t)*(objs_count) + (objs_len)), (reserve), (baton)) + +/** Return pointer to first packed object. + * + * Recommended way to iterate: + * for (uint8_t *it = pack_head(pack); it != pack_tail(pack); it = pack_obj_next(it)) + */ +#define pack_head(pack) \ + ((pack).len > 0 ? &((pack).at[0]) : NULL) + +/** Return pack end pointer. */ +#define pack_tail(pack) \ + ((pack).len > 0 ? &((pack).at[(pack).len]) : NULL) + +/** Return packed object length. */ +static inline pack_objlen_t pack_obj_len(uint8_t *it) +{ + pack_objlen_t len = 0; + if (it != NULL) + memcpy(&len, it, sizeof(len)); + return len; +} + +/** Return packed object value. */ +static inline uint8_t *pack_obj_val(uint8_t *it) +{ + if (it == NULL) { + assert(it); + return NULL; + } + return it + sizeof(pack_objlen_t); +} + +/** Return pointer to next packed object. */ +static inline uint8_t *pack_obj_next(uint8_t *it) +{ + if (it == NULL) { + assert(it); + return NULL; + } + return pack_obj_val(it) + pack_obj_len(it); +} + +/** Return pointer to the last packed object. */ +static inline uint8_t *pack_last(pack_t pack) +{ + if (pack.len == 0) { + return NULL; + } + uint8_t *it = pack_head(pack); + uint8_t *tail = pack_tail(pack); + while (true) { + uint8_t *next = pack_obj_next(it); + if (next == tail) { + return it; + } + it = next; + } +} + +/** Push object to the end of the pack + * @return 0 on success, negative number on failure + */ +static inline int pack_obj_push(pack_t *pack, const uint8_t *obj, pack_objlen_t len) +{ + if (pack == NULL || obj == NULL) { + assert(false); + return kr_error(EINVAL); + } + size_t packed_len = len + sizeof(len); + if (pack->len + packed_len > pack->cap) { + return kr_error(ENOSPC); + } + + uint8_t *endp = pack->at + pack->len; + memcpy(endp, (char *)&len, sizeof(len)); + memcpy(endp + sizeof(len), obj, len); + pack->len += packed_len; + return 0; +} + +/** Returns a pointer to packed object. + * @return pointer to packed object or NULL + */ +static inline uint8_t *pack_obj_find(pack_t *pack, const uint8_t *obj, pack_objlen_t len) +{ + if (pack == NULL || obj == NULL) { + assert(obj != NULL); + return NULL; + } + uint8_t *endp = pack_tail(*pack); + uint8_t *it = pack_head(*pack); + while (it != endp) { + uint8_t *val = pack_obj_val(it); + if (pack_obj_len(it) == len && memcmp(obj, val, len) == 0) { + return it; + } + it = pack_obj_next(it); + } + return NULL; +} + +/** Delete object from the pack + * @return 0 on success, negative number on failure + */ +static inline int pack_obj_del(pack_t *pack, const uint8_t *obj, pack_objlen_t len) +{ + if (pack == NULL || obj == NULL) { + assert(obj != NULL); + return kr_error(EINVAL); + } + uint8_t *endp = pack_tail(*pack); + uint8_t *it = pack_obj_find(pack, obj, len); + if (it) { + size_t packed_len = len + sizeof(len); + memmove(it, it + packed_len, endp - it - packed_len); + pack->len -= packed_len; + return 0; + } + return -1; +} + +/** Clone a pack, replacing destination pack; (*dst == NULL) is valid input. + * @return kr_error(ENOMEM) on allocation failure. */ +static inline int pack_clone(pack_t **dst, const pack_t *src, knot_mm_t *pool) +{ + if (!dst || !src) { + assert(false); + return kr_error(EINVAL); + } + /* Get a valid pack_t. */ + if (!*dst) { + *dst = mm_alloc(pool, sizeof(pack_t)); + if (!*dst) return kr_error(ENOMEM); + pack_init(**dst); + /* Clone data only if needed */ + if (src->len == 0) return kr_ok(); + } + /* Replace the contents of the pack_t. */ + int ret = array_reserve_mm(**dst, src->len, kr_memreserve, pool); + if (ret < 0) { + return kr_error(ENOMEM); + } + memcpy((*dst)->at, src->at, src->len); + (*dst)->len = src->len; + return kr_ok(); +} + +#ifdef __cplusplus +} +#endif + +/** @} */ diff --git a/lib/generic/queue.c b/lib/generic/queue.c new file mode 100644 index 0000000..45657c7 --- /dev/null +++ b/lib/generic/queue.c @@ -0,0 +1,124 @@ +/* Copyright (C) 2018 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "lib/generic/queue.h" +#include <string.h> + +KR_EXPORT void queue_init_impl(struct queue *q, size_t item_size) +{ + q->len = 0; + q->item_size = item_size; + q->head = q->tail = NULL; + /* Take 128 B (two x86 cache lines), except a small margin + * that the allocator can use for its overhead. + * Normally (64-bit pointers) this means 16 B header + 13*8 B data. */ + q->chunk_cap = (128 - offsetof(struct queue_chunk, data) + - sizeof(size_t) + ) / item_size; + if (!q->chunk_cap) q->chunk_cap = 1; /* item_size big enough by itself */ +} + +KR_EXPORT void queue_deinit_impl(struct queue *q) +{ + assert(q); + struct queue_chunk *p = q->head; + while (p != NULL) { + struct queue_chunk *pf = p; + p = p->next; + free(pf); + } +#ifndef NDEBUG + memset(q, 0, sizeof(*q)); +#endif +} + +static struct queue_chunk * queue_chunk_new(const struct queue *q) +{ + struct queue_chunk *c = malloc(offsetof(struct queue_chunk, data) + + q->chunk_cap * q->item_size); + if (unlikely(!c)) abort(); // simplify stuff + memset(c, 0, offsetof(struct queue_chunk, data)); + c->cap = q->chunk_cap; + /* ->begin and ->end are zero, i.e. we optimize for _push + * and not _push_head, by default. */ + return c; +} + +/* Return pointer to the space for the new element. */ +KR_EXPORT void * queue_push_impl(struct queue *q) +{ + assert(q); + struct queue_chunk *t = q->tail; // shorthand + if (unlikely(!t)) { + assert(!q->head && !q->len); + q->head = q->tail = t = queue_chunk_new(q); + } else + if (t->end == t->cap) { + if (t->begin * 2 >= t->cap) { + /* Utilization is below 50%, so let's shift (no overlap). */ + memcpy(t->data, t->data + t->begin * q->item_size, + (t->end - t->begin) * q->item_size); + t->end -= t->begin; + t->begin = 0; + } else { + /* Let's grow the tail by another chunk. */ + assert(!t->next); + t->next = queue_chunk_new(q); + t = q->tail = t->next; + } + } + assert(t->end < t->cap); + ++(q->len); + ++(t->end); + return t->data + q->item_size * (t->end - 1); +} + +/* Return pointer to the space for the new element. */ +KR_EXPORT void * queue_push_head_impl(struct queue *q) +{ + /* When we have choice, we optimize for further _push_head, + * i.e. when shifting or allocating a chunk, + * we store items on the tail-end of the chunk. */ + assert(q); + struct queue_chunk *h = q->head; // shorthand + if (unlikely(!h)) { + assert(!q->tail && !q->len); + h = q->head = q->tail = queue_chunk_new(q); + h->begin = h->end = h->cap; + } else + if (h->begin == 0) { + if (h->end * 2 <= h->cap) { + /* Utilization is below 50%, so let's shift (no overlap). + * Computations here are simplified due to h->begin == 0. */ + const int cnt = h->end; + memcpy(h->data + (h->cap - cnt) * q->item_size, h->data, + cnt * q->item_size); + h->begin = h->cap - cnt; + h->end = h->cap; + } else { + /* Let's grow the head by another chunk. */ + h = queue_chunk_new(q); + h->next = q->head; + q->head = h; + h->begin = h->end = h->cap; + } + } + assert(h->begin > 0); + --(h->begin); + ++(q->len); + return h->data + q->item_size * h->begin; +} + diff --git a/lib/generic/queue.h b/lib/generic/queue.h new file mode 100644 index 0000000..755e759 --- /dev/null +++ b/lib/generic/queue.h @@ -0,0 +1,260 @@ +/* Copyright (C) 2018 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +/** + * @file queue.h + * @brief A queue, usable for FIFO and LIFO simultaneously. + * + * Both the head and tail of the queue can be accessed and pushed to, + * but only the head can be popped from. + * + * @note The implementation uses a singly linked list of blocks + * where each block stores an array of values (for better efficiency). + * + * Example usage: + * @code{.c} + // define new queue type, and init a new queue instance + typedef queue_t(int) queue_int_t; + queue_int_t q; + queue_init(q); + // do some operations + queue_push(q, 1); + queue_push(q, 2); + queue_push(q, 3); + queue_push(q, 4); + queue_pop(q); + assert(queue_head(q) == 2); + assert(queue_tail(q) == 4); + + // you may iterate + typedef queue_it_t(int) queue_it_int_t; + for (queue_it_int_t it = queue_it_begin(q); !queue_it_finished(it); + queue_it_next(it)) { + ++queue_it_val(it); + } + assert(queue_tail(q) == 5); + + queue_push_head(q, 0); + ++queue_tail(q); + assert(queue_tail(q) == 6); + // free it up + queue_deinit(q); + + // you may use dynamic allocation for the type itself + queue_int_t *qm = malloc(sizeof(queue_int_t)); + queue_init(*qm); + queue_deinit(*qm); + free(qm); + * @endcode + * + * \addtogroup generics + * @{ + */ + +#pragma once + +#include "lib/defines.h" +#include "contrib/ucw/lib.h" +#include <assert.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> +#include <stdlib.h> + +/** @brief The type for queue, parametrized by value type. */ +#define queue_t(type) \ + union { \ + type *pdata_t; /* only the *type* information is used */ \ + struct queue queue; \ + } + +/** @brief Initialize a queue. You can malloc() it the usual way. */ +#define queue_init(q) do { \ + (void)(((__typeof__(((q).pdata_t)))0) == (void *)0); /* typecheck queue_t */ \ + queue_init_impl(&(q).queue, sizeof(*(q).pdata_t)); \ + } while (false) + +/** @brief De-initialize a queue: make it invalid and free any inner allocations. */ +#define queue_deinit(q) \ + queue_deinit_impl(&(q).queue) + +/** @brief Push data to queue's tail. (Type-safe version; use _impl() otherwise.) */ +#define queue_push(q, data) \ + *((__typeof__((q).pdata_t)) queue_push_impl(&(q).queue)) = data + +/** @brief Push data to queue's head. (Type-safe version; use _impl() otherwise.) */ +#define queue_push_head(q, data) \ + *((__typeof__((q).pdata_t)) queue_push_head_impl(&(q).queue)) = data + +/** @brief Remove the element at the head. + * The queue must not be empty. */ +#define queue_pop(q) \ + queue_pop_impl(&(q).queue) + +/** @brief Return a "reference" to the element at the head (it's an L-value). + * The queue must not be empty. */ +#define queue_head(q) \ + ( *(__typeof__((q).pdata_t)) queue_head_impl(&(q).queue) ) + +/** @brief Return a "reference" to the element at the tail (it's an L-value). + * The queue must not be empty. */ +#define queue_tail(q) \ + ( *(__typeof__((q).pdata_t)) queue_tail_impl(&(q).queue) ) + +/** @brief Return the number of elements in the queue (very efficient). */ +#define queue_len(q) \ + ((const size_t)(q).queue.len) + + +/** @brief Type for queue iterator, parametrized by value type. + * It's a simple structure that owns no other resources. + * You may NOT use it after doing any push or pop (without _begin again). */ +#define queue_it_t(type) \ + union { \ + type *pdata_t; /* only the *type* information is used */ \ + struct queue_it iter; \ + } + +/** @brief Initialize a queue iterator at the head of the queue. + * If you use this in assignment (instead of initialization), + * you will unfortunately need to add corresponding type-cast in front. + * Beware: there's no type-check between queue and iterator! */ +#define queue_it_begin(q) \ + { .iter = queue_it_begin_impl(&(q).queue) } + +/** @brief Return a "reference" to the current element (it's an L-value) . */ +#define queue_it_val(it) \ + ( *(__typeof__((it).pdata_t)) queue_it_val_impl(&(it).iter) ) + +/** @brief Test if the iterator has gone past the last element. + * If it has, you may not use _val or _next. */ +#define queue_it_finished(it) \ + queue_it_finished_impl(&(it).iter) + +/** @brief Advance the iterator to the next element. */ +#define queue_it_next(it) \ + queue_it_next_impl(&(it).iter) + + + +/* ====================== Internal for the implementation ================== */ +/** @cond internal */ + +struct queue; +/* Non-inline functions are exported to be usable from daemon. */ +void queue_init_impl(struct queue *q, size_t item_size); +void queue_deinit_impl(struct queue *q); +void * queue_push_impl(struct queue *q); +void * queue_push_head_impl(struct queue *q); + +struct queue_chunk; +struct queue { + size_t len; + uint16_t chunk_cap, item_size; + struct queue_chunk *head, *tail; +}; + +struct queue_chunk { + struct queue_chunk *next; /*< head -> ... -> tail */ + int16_t begin, end, cap, pad_; /*< indices: zero is closest to head */ + /*< We could fit into uint8_t for example, but the choice of (3+1)*2 bytes + * is a compromise between wasting space and getting a good alignment. + * In particular, queue_t(type*) will store the pointers on addresses + * aligned to the pointer size, in both 64-bit and 32-bit platforms. + */ + char data[]; + /**< The item data. We use "char" to satisfy the C99+ aliasing rules. + * See C99 section 6.5 Expressions, paragraph 7. + * Any type can be accessed through char-pointer, + * so we can use a common struct definition + * for all types being held. + */ +}; + +static inline void * queue_head_impl(const struct queue *q) +{ + assert(q); + struct queue_chunk *h = q->head; + if (unlikely(!h)) + return NULL; + assert(h->end > h->begin); + return h->data + h->begin * q->item_size; +} + +static inline void * queue_tail_impl(const struct queue *q) +{ + assert(q); + struct queue_chunk *t = q->tail; + if (unlikely(!t)) + return NULL; + assert(t->end > t->begin); + return t->data + (t->end - 1) * q->item_size; +} + +static inline void queue_pop_impl(struct queue *q) +{ + assert(q); + struct queue_chunk *h = q->head; + assert(h && h->end > h->begin); + if (h->end - h->begin == 1) { + /* removing the last element in the chunk */ + q->head = h->next; + free(h); + } else { + ++(h->begin); + } + --(q->len); +} + + +struct queue_it { + struct queue_chunk *chunk; + int16_t pos, item_size; +}; + +static inline struct queue_it queue_it_begin_impl(struct queue *q) +{ + assert(q); + return (struct queue_it){ + .chunk = q->head, + .pos = q->head ? q->head->begin : -1, + .item_size = q->item_size, + }; +} + +static inline bool queue_it_finished_impl(struct queue_it *it) +{ + return it->chunk == NULL || it->pos >= it->chunk->end; +} + +static inline void * queue_it_val_impl(struct queue_it *it) +{ + assert(!queue_it_finished_impl(it)); + return it->chunk->data + it->pos * it->item_size; +} + +static inline void queue_it_next_impl(struct queue_it *it) +{ + assert(!queue_it_finished_impl(it)); + ++(it->pos); + if (it->pos < it->chunk->end) + return; + it->chunk = it->chunk->next; + it->pos = it->chunk ? it->chunk->begin : -1; +} + +/** @endcond (internal) */ +/** @} (addtogroup generics) */ + diff --git a/lib/generic/set.h b/lib/generic/set.h new file mode 100644 index 0000000..332c1aa --- /dev/null +++ b/lib/generic/set.h @@ -0,0 +1,105 @@ +/* Copyright (C) 2015-2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +/** + * @file set.h + * @brief A set abstraction implemented on top of map. + * + * @note The API is based on map.h, see it for more examples. + * + * # Example usage: + * + * @code{.c} + * set_t set = set_make(NULL); + * + * // Insert keys + * if (set_add(&set, "princess") != 0 || + * set_add(&set, "prince") != 0 || + * set_add(&set, "leia") != 0) { + * fail(); + * } + * + * // Test membership + * if (set_contains(&set, "leia")) { + * success(); + * } + * + * // Prefix search + * int i = 0; + * int count(const char *s, void *n) { (*(int *)n)++; return 0; } + * if (set_walk_prefixed(set, "princ", count, &i) == 0) { + * printf("%d matches\n", i); + * } + * + * // Delete + * if (set_del(&set, "badkey") != 0) { + * fail(); // No such key + * } + * + * // Clear the set + * set_clear(&set); + * @endcode + * + * \addtogroup generics + * @{ + */ + +#pragma once + +#include <stddef.h> +#include "map.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef map_t set_t; +typedef int (set_walk_cb)(const char *, void *); + +/*! Creates an new, empty critbit set */ +#define set_make \ + map_make + +/*! Returns non-zero if set contains str */ +#define set_contains(set, str) \ + map_contains((set), (str)) + +/*! Inserts str into set. Returns 0 if new, 1 if already present, or ENOMEM. */ +#define set_add(set, str) \ + map_set((set), (str), (void *)1) + +/*! Deletes str from the set, returns 0 on suceess */ +#define set_del(set, str) \ + map_del((set), (str)) + +/*! Clears the given set */ +#define set_clear(set) \ + map_clear(set) + +/*! Calls callback for all strings in map */ +#define set_walk(set, callback, baton) \ + map_walk_prefixed((set), "", (callback), (baton)) + +/*! Calls callback for all strings in set with the given prefix */ +#define set_walk_prefixed(set, prefix, callback, baton) \ + map_walk_prefixed((set), (prefix), (callback), (baton)) + + +#ifdef __cplusplus +} +#endif + +/** @} */ diff --git a/lib/generic/trie.c b/lib/generic/trie.c new file mode 100644 index 0000000..0009eef --- /dev/null +++ b/lib/generic/trie.c @@ -0,0 +1,912 @@ +/* Copyright (C) 2016-2018 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + + The code originated from https://github.com/fanf2/qp/blob/master/qp.c + at revision 5f6d93753. + */ + +#include <assert.h> +#include <stdlib.h> +#include <string.h> + +#include "lib/generic/trie.h" +#include "lib/utils.h" +#include "contrib/ucw/lib.h" + +#if defined(__i386) || defined(__x86_64) || defined(_M_IX86) \ + || (defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN) \ + && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + + /*! + * \brief Use a pointer alignment hack to save memory. + * + * When on, isbranch() relies on the fact that in leaf_t the first pointer + * is aligned on multiple of 4 bytes and that the flags bitfield is + * overlaid over the lowest two bits of that pointer. + * Neither is really guaranteed by the C standards; the second part should + * be OK with x86_64 ABI and most likely any other little-endian platform. + * It would be possible to manipulate the right bits portably, but it would + * complicate the code nontrivially. C++ doesn't even guarantee type-punning. + * In debug mode we check this works OK when creating a new trie instance. + */ + #define FLAGS_HACK 1 +#else + #define FLAGS_HACK 0 +#endif + +typedef unsigned char byte; +#ifndef uint +typedef unsigned int uint; +#define uint uint +#endif +typedef uint bitmap_t; /*! Bit-maps, using the range of 1<<0 to 1<<16 (inclusive). */ + +typedef struct { + uint32_t len; // 32 bits are enough for key lengths; probably even 16 bits would be. + char chars[]; +} tkey_t; + +/*! \brief Leaf of trie. */ +typedef struct { + #if !FLAGS_HACK + byte flags; + #endif + tkey_t *key; /*!< The pointer must be aligned to 4-byte multiples! */ + trie_val_t val; +} leaf_t; + +/*! \brief A trie node is either leaf_t or branch_t. */ +typedef union node node_t; + +/*! + * \brief Branch node of trie. + * + * - The flags distinguish whether the node is a leaf_t (0), or a branch + * testing the more-important nibble (1) or the less-important one (2). + * - It stores the index of the byte that the node tests. The combined + * value (index*4 + flags) increases in branch nodes as you go deeper + * into the trie. All the keys below a branch are identical up to the + * nibble identified by the branch. Indices have to be stored because + * we skip any branch nodes that would have a single child. + * (Consequently, the skipped parts of key have to be validated in a leaf.) + * - The bitmap indicates which subtries are present. The present child nodes + * are stored in the twigs array (with no holes between them). + * - To simplify storing keys that are prefixes of each other, the end-of-string + * position is treated as another nibble value, ordered before all others. + * That affects the bitmap and twigs fields. + * + * \note The branch nodes are never allocated individually, but they are + * always part of either the root node or the twigs array of the parent. + */ +typedef struct { + #if FLAGS_HACK + uint32_t flags : 2, + bitmap : 17; /*!< The first bitmap bit is for end-of-string child. */ + #else + byte flags; + uint32_t bitmap; + #endif + uint32_t index; + node_t *twigs; +} branch_t; + +union node { + leaf_t leaf; + branch_t branch; +}; + +struct trie { + node_t root; // undefined when weight == 0, see empty_root() + size_t weight; + knot_mm_t mm; +}; + +/*! \brief Make the root node empty (debug-only). */ +static inline void empty_root(node_t *root) { +#ifndef NDEBUG + *root = (node_t){ .branch = { + .flags = 3, // invalid value that fits + .bitmap = 0, + .index = -1, + .twigs = NULL + } }; +#endif +} + +/*! \brief Check that unportable code works OK (debug-only). */ +static void assert_portability(void) { +#if FLAGS_HACK + assert(((union node){ .leaf = { + .key = (tkey_t *)(((uint8_t *)NULL) + 1), + .val = NULL + } }).branch.flags == 1); +#endif +} + +/*! \brief Propagate error codes. */ +#define ERR_RETURN(x) \ + do { \ + int err_code_ = x; \ + if (unlikely(err_code_ != KNOT_EOK)) \ + return err_code_; \ + } while (false) + +/*! + * \brief Count the number of set bits. + * + * \TODO This implementation may be relatively slow on some HW. + */ +static uint bitmap_weight(bitmap_t w) +{ + assert((w & ~((1 << 17) - 1)) == 0); // using the least-important 17 bits + return __builtin_popcount(w); +} + +/*! \brief Only keep the lowest bit in the bitmap (least significant -> twigs[0]). */ +static bitmap_t bitmap_lowest_bit(bitmap_t w) +{ + assert((w & ~((1 << 17) - 1)) == 0); // using the least-important 17 bits + return 1 << __builtin_ctz(w); +} + +/*! \brief Test flags to determine type of this node. */ +static bool isbranch(const node_t *t) +{ + uint f = t->branch.flags; + assert(f <= 2); + return f != 0; +} + +/*! \brief Make a bitmask for testing a branch bitmap. */ +static bitmap_t nibbit(byte k, uint flags) +{ + uint shift = (2 - flags) << 2; + uint nibble = (k >> shift) & 0xf; + return 1 << (nibble + 1/*because of prefix keys*/); +} + +/*! \brief Extract a nibble from a key and turn it into a bitmask. */ +static bitmap_t twigbit(const node_t *t, const char *key, uint32_t len) +{ + assert(isbranch(t)); + uint i = t->branch.index; + + if (i >= len) + return 1 << 0; // leaf position + + return nibbit((byte)key[i], t->branch.flags); +} + +/*! \brief Test if a branch node has a child indicated by a bitmask. */ +static bool hastwig(const node_t *t, bitmap_t bit) +{ + assert(isbranch(t)); + return t->branch.bitmap & bit; +} + +/*! \brief Compute offset of an existing child in a branch node. */ +static uint twigoff(const node_t *t, bitmap_t b) +{ + assert(isbranch(t)); + return bitmap_weight(t->branch.bitmap & (b - 1)); +} + +/*! \brief Get pointer to a particular child of a branch node. */ +static node_t* twig(node_t *t, uint i) +{ + assert(isbranch(t)); + return &t->branch.twigs[i]; +} + +/*! + * \brief For a branch nod, compute offset of a child and child count. + * + * Having this separate might be meaningful for performance optimization. + */ +#define TWIGOFFMAX(off, max, t, b) do { \ + off = twigoff(t, b); \ + max = bitmap_weight(t->branch.bitmap); \ + } while(0) + +/*! \brief Simple string comparator. */ +static int key_cmp(const char *k1, uint32_t k1_len, const char *k2, uint32_t k2_len) +{ + int ret = memcmp(k1, k2, MIN(k1_len, k2_len)); + if (ret != 0) { + return ret; + } + + /* Key string is equal, compare lengths. */ + if (k1_len == k2_len) { + return 0; + } else if (k1_len < k2_len) { + return -1; + } else { + return 1; + } +} + +trie_t* trie_create(knot_mm_t *mm) +{ + assert_portability(); + trie_t *trie = mm_alloc(mm, sizeof(trie_t)); + if (trie != NULL) { + empty_root(&trie->root); + trie->weight = 0; + if (mm != NULL) + trie->mm = *mm; + else + mm_ctx_init(&trie->mm); + } + return trie; +} + +/*! \brief Free anything under the trie node, except for the passed pointer itself. */ +static void clear_trie(node_t *trie, knot_mm_t *mm) +{ + if (!isbranch(trie)) { + mm_free(mm, trie->leaf.key); + } else { + branch_t *b = &trie->branch; + int len = bitmap_weight(b->bitmap); + for (int i = 0; i < len; ++i) + clear_trie(b->twigs + i, mm); + mm_free(mm, b->twigs); + } +} + +void trie_free(trie_t *tbl) +{ + if (tbl == NULL) + return; + if (tbl->weight) + clear_trie(&tbl->root, &tbl->mm); + mm_free(&tbl->mm, tbl); +} + +void trie_clear(trie_t *tbl) +{ + assert(tbl); + if (!tbl->weight) + return; + clear_trie(&tbl->root, &tbl->mm); + empty_root(&tbl->root); + tbl->weight = 0; +} + +size_t trie_weight(const trie_t *tbl) +{ + assert(tbl); + return tbl->weight; +} + +struct found { + leaf_t *l; /**< the found leaf (NULL if not found) */ + branch_t *p; /**< the leaf's parent (if exists) */ + bitmap_t b; /**< bit-mask with a single bit marking l under p */ +}; +/** Search trie for an item with the given key (equality only). */ +static struct found find_equal(trie_t *tbl, const char *key, uint32_t len) +{ + assert(tbl); + struct found ret0; + memset(&ret0, 0, sizeof(ret0)); + if (!tbl->weight) + return ret0; + /* Current node and parent while descending (returned values basically). */ + node_t *t = &tbl->root; + branch_t *p = NULL; + bitmap_t b = 0; + while (isbranch(t)) { + __builtin_prefetch(t->branch.twigs); + b = twigbit(t, key, len); + if (!hastwig(t, b)) + return ret0; + p = &t->branch; + t = twig(t, twigoff(t, b)); + } + if (key_cmp(key, len, t->leaf.key->chars, t->leaf.key->len) != 0) + return ret0; + return (struct found) { + .l = &t->leaf, + .p = p, + .b = b, + }; +} +/** Find item with the first key (lexicographical order). */ +static struct found find_first(trie_t *tbl) +{ + assert(tbl); + if (!tbl->weight) { + struct found ret0; + memset(&ret0, 0, sizeof(ret0)); + return ret0; + } + /* Current node and parent while descending (returned values basically). */ + node_t *t = &tbl->root; + branch_t *p = NULL; + while (isbranch(t)) { + p = &t->branch; + t = &p->twigs[0]; + } + return (struct found) { + .l = &t->leaf, + .p = p, + .b = p ? bitmap_lowest_bit(p->bitmap) : 0, + }; +} + +trie_val_t* trie_get_try(trie_t *tbl, const char *key, uint32_t len) +{ + struct found found = find_equal(tbl, key, len); + return found.l ? &found.l->val : NULL; +} + +trie_val_t* trie_get_first(trie_t *tbl, char **key, uint32_t *len) +{ + struct found found = find_first(tbl); + if (!found.l) + return NULL; + if (key) + *key = found.l->key->chars; + if (len) + *len = found.l->key->len; + return &found.l->val; +} + +/** Delete the found element (if any) and return value (unless NULL is passed) */ +static int del_found(trie_t *tbl, struct found found, trie_val_t *val) +{ + if (!found.l) + return KNOT_ENOENT; + mm_free(&tbl->mm, found.l->key); + if (val != NULL) + *val = found.l->val; // we return trie_val_t directly when deleting + --tbl->weight; + branch_t * const p = found.p; // short-hand + if (unlikely(!p)) { // whole trie was a single leaf + assert(tbl->weight == 0); + empty_root(&tbl->root); + return KNOT_EOK; + } + // remove leaf t as child of p; get child index via pointer arithmetic + int ci = ((union node *)found.l) - p->twigs, + cc = bitmap_weight(p->bitmap); // child count + assert(ci >= 0 && ci < cc); + + if (cc == 2) { // collapse binary node p: move the other child to this node + node_t *twigs = p->twigs; + (*(union node *)p) = twigs[1 - ci]; // it might be a leaf or branch + mm_free(&tbl->mm, twigs); + return KNOT_EOK; + } + memmove(p->twigs + ci, p->twigs + ci + 1, sizeof(node_t) * (cc - ci - 1)); + p->bitmap &= ~found.b; + node_t *twigs = mm_realloc(&tbl->mm, p->twigs, sizeof(node_t) * (cc - 1), + sizeof(node_t) * cc); + if (likely(twigs != NULL)) + p->twigs = twigs; + /* We can ignore mm_realloc failure, only beware that next time + * the prev_size passed to it wouldn't be correct; TODO? */ + return KNOT_EOK; +} + +int trie_del(trie_t *tbl, const char *key, uint32_t len, trie_val_t *val) +{ + struct found found = find_equal(tbl, key, len); + return del_found(tbl, found, val); +} + +int trie_del_first(trie_t *tbl, char *key, uint32_t *len, trie_val_t *val) +{ + struct found found = find_first(tbl); + if (!found.l) + return KNOT_ENOENT; + if (key) { + if (!len) + return KNOT_EINVAL; + if (*len < found.l->key->len) + return kr_error(ENOSPC); + memcpy(key, found.l->key->chars, found.l->key->len); + } + if (len) { // makes sense even with key == NULL + *len = found.l->key->len; + } + return del_found(tbl, found, val); +} + +/*! + * \brief Stack of nodes, storing a path down a trie. + * + * The structure also serves directly as the public trie_it_t type, + * in which case it always points to the current leaf, unless we've finished + * (i.e. it->len == 0). + */ +typedef struct trie_it { + node_t* *stack; /*!< The stack; malloc is used directly instead of mm. */ + uint32_t len; /*!< Current length of the stack. */ + uint32_t alen; /*!< Allocated/available length of the stack. */ + /*! \brief Initial storage for \a stack; it should fit in many use cases. */ + node_t* stack_init[60]; +} nstack_t; + +/*! \brief Create a node stack containing just the root (or empty). */ +static void ns_init(nstack_t *ns, trie_t *tbl) +{ + assert(tbl); + ns->stack = ns->stack_init; + ns->alen = sizeof(ns->stack_init) / sizeof(ns->stack_init[0]); + if (tbl->weight) { + ns->len = 1; + ns->stack[0] = &tbl->root; + } else { + ns->len = 0; + } +} + +/*! \brief Free inside of the stack, i.e. not the passed pointer itself. */ +static void ns_cleanup(nstack_t *ns) +{ + assert(ns && ns->stack); + if (likely(ns->stack == ns->stack_init)) + return; + free(ns->stack); + #ifndef NDEBUG + ns->stack = NULL; + ns->alen = 0; + #endif +} + +/*! \brief Allocate more space for the stack. */ +static int ns_longer_alloc(nstack_t *ns) +{ + ns->alen *= 2; + size_t new_size = sizeof(nstack_t) + ns->alen * sizeof(node_t *); + node_t **st; + if (ns->stack == ns->stack_init) { + st = malloc(new_size); + if (st != NULL) + memcpy(st, ns->stack, ns->len * sizeof(node_t *)); + } else { + st = realloc(ns->stack, new_size); + } + if (st == NULL) + return KNOT_ENOMEM; + ns->stack = st; + return KNOT_EOK; +} + +/*! \brief Ensure the node stack can be extended by one. */ +static inline int ns_longer(nstack_t *ns) +{ + // get a longer stack if needed + if (likely(ns->len < ns->alen)) + return KNOT_EOK; + return ns_longer_alloc(ns); // hand-split the part suitable for inlining +} + +/*! + * \brief Find the "branching point" as if searching for a key. + * + * The whole path to the point is kept on the passed stack; + * always at least the root will remain on the top of it. + * Beware: the precise semantics of this function is rather tricky. + * The top of the stack will contain: the corresponding leaf if exact match is found; + * or the immediate node below a branching-point-on-edge or the branching-point itself. + * + * \param info Set position of the point of first mismatch (in index and flags). + * \param first Set the value of the first non-matching character (from trie), + * optionally; end-of-string character has value -256 (that's why it's int). + * Note: the character is converted to *unsigned* char (i.e. 0..255), + * as that's the ordering used in the trie. + * + * \return KNOT_EOK or KNOT_ENOMEM. + */ +static int ns_find_branch(nstack_t *ns, const char *key, uint32_t len, + branch_t *info, int *first) +{ + assert(ns && ns->len && info); + // First find some leaf with longest matching prefix. + while (isbranch(ns->stack[ns->len - 1])) { + ERR_RETURN(ns_longer(ns)); + node_t *t = ns->stack[ns->len - 1]; + __builtin_prefetch(t->branch.twigs); + bitmap_t b = twigbit(t, key, len); + // Even if our key is missing from this branch we need to + // keep iterating down to a leaf. It doesn't matter which + // twig we choose since the keys are all the same up to this + // index. Note that blindly using twigoff(t, b) can cause + // an out-of-bounds index if it equals twigmax(t). + uint i = hastwig(t, b) ? twigoff(t, b) : 0; + ns->stack[ns->len++] = twig(t, i); + } + tkey_t *lkey = ns->stack[ns->len-1]->leaf.key; + // Find index of the first char that differs. + uint32_t index = 0; + while (index < MIN(len,lkey->len)) { + if (key[index] != lkey->chars[index]) + break; + else + ++index; + } + info->index = index; + if (first) + *first = lkey->len > index ? (unsigned char)lkey->chars[index] : -256; + // Find flags: which half-byte has matched. + uint flags; + if (index == len && len == lkey->len) { // found equivalent key + info->flags = flags = 0; + goto success; + } + if (likely(index < MIN(len,lkey->len))) { + byte k2 = (byte)lkey->chars[index]; + byte k1 = (byte)key[index]; + flags = ((k1 ^ k2) & 0xf0) ? 1 : 2; + } else { // one is prefix of another + flags = 1; + } + info->flags = flags; + // now go up the trie from the current leaf + branch_t *t; + do { + if (unlikely(ns->len == 1)) + goto success; // only the root stays on the stack + t = (branch_t*)ns->stack[ns->len - 2]; + if (t->index < index || (t->index == index && t->flags < flags)) + goto success; + --ns->len; + } while (true); +success: + #ifndef NDEBUG // invariants on successful return + assert(ns->len); + if (isbranch(ns->stack[ns->len - 1])) { + t = &ns->stack[ns->len - 1]->branch; + assert(t->index > index || (t->index == index && t->flags >= flags)); + } + if (ns->len > 1) { + t = &ns->stack[ns->len - 2]->branch; + assert(t->index < index || (t->index == index + && (t->flags < flags || (t->flags == 1 && flags == 0)))); + } + #endif + return KNOT_EOK; +} + +/*! + * \brief Advance the node stack to the last leaf in the subtree. + * + * \return KNOT_EOK or KNOT_ENOMEM. + */ +static int ns_last_leaf(nstack_t *ns) +{ + assert(ns); + do { + ERR_RETURN(ns_longer(ns)); + node_t *t = ns->stack[ns->len - 1]; + if (!isbranch(t)) + return KNOT_EOK; + int lasti = bitmap_weight(t->branch.bitmap) - 1; + assert(lasti >= 0); + ns->stack[ns->len++] = twig(t, lasti); + } while (true); +} + +/*! + * \brief Advance the node stack to the first leaf in the subtree. + * + * \return KNOT_EOK or KNOT_ENOMEM. + */ +static int ns_first_leaf(nstack_t *ns) +{ + assert(ns && ns->len); + do { + ERR_RETURN(ns_longer(ns)); + node_t *t = ns->stack[ns->len - 1]; + if (!isbranch(t)) + return KNOT_EOK; + ns->stack[ns->len++] = twig(t, 0); + } while (true); +} + +/*! + * \brief Advance the node stack to the leaf that is previous to the current node. + * + * \note Prefix leaf under the current node DOES count (if present; perhaps questionable). + * \return KNOT_EOK on success, KNOT_ENOENT on not-found, or possibly KNOT_ENOMEM. + */ +static int ns_prev_leaf(nstack_t *ns) +{ + assert(ns && ns->len > 0); + + node_t *t = ns->stack[ns->len - 1]; + if (hastwig(t, 1 << 0)) { // the prefix leaf + t = twig(t, 0); + ERR_RETURN(ns_longer(ns)); + ns->stack[ns->len++] = t; + return KNOT_EOK; + } + + do { + if (ns->len < 2) + return KNOT_ENOENT; // root without empty key has no previous leaf + t = ns->stack[ns->len - 1]; + node_t *p = ns->stack[ns->len - 2]; + int pindex = t - p->branch.twigs; // index in parent via pointer arithmetic + assert(pindex >= 0 && pindex <= 16); + if (pindex > 0) { // t isn't the first child -> go down the previous one + ns->stack[ns->len - 1] = twig(p, pindex - 1); + return ns_last_leaf(ns); + } + // we've got to go up again + --ns->len; + } while (true); +} + +/*! + * \brief Advance the node stack to the leaf that is successor to the current node. + * + * \note Prefix leaf or anything else under the current node DOES count. + * \return KNOT_EOK on success, KNOT_ENOENT on not-found, or possibly KNOT_ENOMEM. + */ +static int ns_next_leaf(nstack_t *ns) +{ + assert(ns && ns->len > 0); + + node_t *t = ns->stack[ns->len - 1]; + if (isbranch(t)) + return ns_first_leaf(ns); + do { + if (ns->len < 2) + return KNOT_ENOENT; // not found, as no more parent is available + t = ns->stack[ns->len - 1]; + node_t *p = ns->stack[ns->len - 2]; + int pindex = t - p->branch.twigs; // index in parent via pointer arithmetic + assert(pindex >= 0 && pindex <= 16); + int pcount = bitmap_weight(p->branch.bitmap); + if (pindex + 1 < pcount) { // t isn't the last child -> go down the next one + ns->stack[ns->len - 1] = twig(p, pindex + 1); + return ns_first_leaf(ns); + } + // we've got to go up again + --ns->len; + } while (true); +} + +int trie_get_leq(trie_t *tbl, const char *key, uint32_t len, trie_val_t **val) +{ + assert(tbl && val); + *val = NULL; // so on failure we can just return; + if (tbl->weight == 0) + return KNOT_ENOENT; + { // Intentionally un-indented; until end of function, to bound cleanup attr. + // First find a key with longest-matching prefix + __attribute__((cleanup(ns_cleanup))) + nstack_t ns_local; + ns_init(&ns_local, tbl); + nstack_t *ns = &ns_local; + branch_t bp; + int un_leaf; // first unmatched character in the leaf + ERR_RETURN(ns_find_branch(ns, key, len, &bp, &un_leaf)); + int un_key = bp.index < len ? (unsigned char)key[bp.index] : -256; + node_t *t = ns->stack[ns->len - 1]; + if (bp.flags == 0) { // found exact match + *val = &t->leaf.val; + return KNOT_EOK; + } + // Get t: the last node on matching path + if (isbranch(t) && t->branch.index == bp.index && t->branch.flags == bp.flags) { + // t is OK + } else { + // the top of the stack was the first unmatched node -> step up + if (ns->len == 1) { + // root was unmatched already + if (un_key < un_leaf) + return KNOT_ENOENT; + ERR_RETURN(ns_last_leaf(ns)); + goto success; + } + --ns->len; + t = ns->stack[ns->len - 1]; + } + // Now we re-do the first "non-matching" step in the trie + // but try the previous child if key was less (it may not exist) + bitmap_t b = twigbit(t, key, len); + int i = hastwig(t, b) + ? twigoff(t, b) - (un_key < un_leaf) + : twigoff(t, b) - 1 /*twigoff returns successor when !hastwig*/; + if (i >= 0) { + ERR_RETURN(ns_longer(ns)); + ns->stack[ns->len++] = twig(t, i); + ERR_RETURN(ns_last_leaf(ns)); + } else { + ERR_RETURN(ns_prev_leaf(ns)); + } +success: + assert(!isbranch(ns->stack[ns->len - 1])); + *val = &ns->stack[ns->len - 1]->leaf.val; + return 1; + } +} + +/*! \brief Initialize a new leaf, copying the key, and returning failure code. */ +static int mk_leaf(node_t *leaf, const char *key, uint32_t len, knot_mm_t *mm) +{ + tkey_t *k = mm_alloc(mm, sizeof(tkey_t) + len); + #if FLAGS_HACK + assert(((uintptr_t)k) % 4 == 0); // we need an aligned pointer + #endif + if (unlikely(!k)) + return KNOT_ENOMEM; + k->len = len; + memcpy(k->chars, key, len); + leaf->leaf = (leaf_t){ + #if !FLAGS_HACK + .flags = 0, + #endif + .val = NULL, + .key = k + }; + return KNOT_EOK; +} + +trie_val_t* trie_get_ins(trie_t *tbl, const char *key, uint32_t len) +{ + assert(tbl); + // First leaf in an empty tbl? + if (unlikely(!tbl->weight)) { + if (unlikely(mk_leaf(&tbl->root, key, len, &tbl->mm))) + return NULL; + ++tbl->weight; + return &tbl->root.leaf.val; + } + { // Intentionally un-indented; until end of function, to bound cleanup attr. + // Find the branching-point + __attribute__((cleanup(ns_cleanup))) + nstack_t ns_local; + ns_init(&ns_local, tbl); + nstack_t *ns = &ns_local; + branch_t bp; // branch-point: index and flags signifying the longest common prefix + int k2; // the first unmatched character in the leaf + if (unlikely(ns_find_branch(ns, key, len, &bp, &k2))) + return NULL; + node_t *t = ns->stack[ns->len - 1]; + if (bp.flags == 0) // the same key was already present + return &t->leaf.val; + node_t leaf; + if (unlikely(mk_leaf(&leaf, key, len, &tbl->mm))) + return NULL; + + if (isbranch(t) && bp.index == t->branch.index && bp.flags == t->branch.flags) { + // The node t needs a new leaf child. + bitmap_t b1 = twigbit(t, key, len); + assert(!hastwig(t, b1)); + uint s, m; TWIGOFFMAX(s, m, t, b1); // new child position and original child count + node_t *twigs = mm_realloc(&tbl->mm, t->branch.twigs, + sizeof(node_t) * (m + 1), sizeof(node_t) * m); + if (unlikely(!twigs)) + goto err_leaf; + memmove(twigs + s + 1, twigs + s, sizeof(node_t) * (m - s)); + twigs[s] = leaf; + t->branch.twigs = twigs; + t->branch.bitmap |= b1; + ++tbl->weight; + return &twigs[s].leaf.val; + } else { + // We need to insert a new binary branch with leaf at *t. + // Note: it works the same for the case where we insert above root t. + #ifndef NDEBUG + if (ns->len > 1) { + node_t *pt = ns->stack[ns->len - 2]; + assert(hastwig(pt, twigbit(pt, key, len))); + } + #endif + node_t *twigs = mm_alloc(&tbl->mm, sizeof(node_t) * 2); + if (unlikely(!twigs)) + goto err_leaf; + node_t t2 = *t; // Save before overwriting t. + t->branch.flags = bp.flags; + t->branch.index = bp.index; + t->branch.twigs = twigs; + bitmap_t b1 = twigbit(t, key, len); + bitmap_t b2 = unlikely(k2 == -256) ? (1 << 0) : nibbit(k2, bp.flags); + t->branch.bitmap = b1 | b2; + *twig(t, twigoff(t, b1)) = leaf; + *twig(t, twigoff(t, b2)) = t2; + ++tbl->weight; + return &twig(t, twigoff(t, b1))->leaf.val; + }; +err_leaf: + mm_free(&tbl->mm, leaf.leaf.key); + return NULL; + } +} + +/*! \brief Apply a function to every trie_val_t*, in order; a recursive solution. */ +static int apply_trie(node_t *t, int (*f)(trie_val_t *, void *), void *d) +{ + assert(t); + if (!isbranch(t)) + return f(&t->leaf.val, d); + int child_count = bitmap_weight(t->branch.bitmap); + for (int i = 0; i < child_count; ++i) + ERR_RETURN(apply_trie(twig(t, i), f, d)); + return KNOT_EOK; +} + +int trie_apply(trie_t *tbl, int (*f)(trie_val_t *, void *), void *d) +{ + assert(tbl && f); + if (!tbl->weight) + return KNOT_EOK; + return apply_trie(&tbl->root, f, d); +} + +/* These are all thin wrappers around static Tns* functions. */ +trie_it_t* trie_it_begin(trie_t *tbl) +{ + assert(tbl); + trie_it_t *it = malloc(sizeof(nstack_t)); + if (!it) + return NULL; + ns_init(it, tbl); + if (it->len == 0) // empty tbl + return it; + if (ns_first_leaf(it)) { + ns_cleanup(it); + free(it); + return NULL; + } + return it; +} + +void trie_it_next(trie_it_t *it) +{ + assert(it && it->len); + if (ns_next_leaf(it) != KNOT_EOK) + it->len = 0; +} + +bool trie_it_finished(trie_it_t *it) +{ + assert(it); + return it->len == 0; +} + +void trie_it_free(trie_it_t *it) +{ + if (!it) + return; + ns_cleanup(it); + free(it); +} + +const char* trie_it_key(trie_it_t *it, size_t *len) +{ + assert(it && it->len); + node_t *t = it->stack[it->len - 1]; + assert(!isbranch(t)); + tkey_t *key = t->leaf.key; + if (len) + *len = key->len; + return key->chars; +} + +trie_val_t* trie_it_val(trie_it_t *it) +{ + assert(it && it->len); + node_t *t = it->stack[it->len - 1]; + assert(!isbranch(t)); + return &t->leaf.val; +} diff --git a/lib/generic/trie.h b/lib/generic/trie.h new file mode 100644 index 0000000..0550e95 --- /dev/null +++ b/lib/generic/trie.h @@ -0,0 +1,150 @@ +/* Copyright (C) 2017-2018 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#pragma once + +#include <stdbool.h> +#include <stdint.h> + +#include <libknot/mm_ctx.h> +#include "lib/defines.h" + +/*! + * \brief Native API of QP-tries: + * + * - keys are char strings, not necessarily zero-terminated, + * the structure copies the contents of the passed keys + * - values are void* pointers, typically you get an ephemeral pointer to it + * - key lengths are limited by 2^32-1 ATM + * + * XXX EDITORS: trie.{h,c} are synced from + * https://gitlab.labs.nic.cz/knot/knot-dns/tree/68352fc969/src/contrib/qp-trie + * only with tiny adjustments, mostly #includes and KR_EXPORT. + */ + +/*! \brief Element value. */ +typedef void* trie_val_t; + +/*! \brief Opaque structure holding a QP-trie. */ +typedef struct trie trie_t; + +/*! \brief Opaque type for holding a QP-trie iterator. */ +typedef struct trie_it trie_it_t; + +/*! \brief Create a trie instance. Pass NULL to use malloc+free. */ +KR_EXPORT +trie_t* trie_create(knot_mm_t *mm); + +/*! \brief Free a trie instance. */ +KR_EXPORT +void trie_free(trie_t *tbl); + +/*! \brief Clear a trie instance (make it empty). */ +KR_EXPORT +void trie_clear(trie_t *tbl); + +/*! \brief Return the number of keys in the trie. */ +KR_EXPORT +size_t trie_weight(const trie_t *tbl); + +/*! \brief Search the trie, returning NULL on failure. */ +KR_EXPORT +trie_val_t* trie_get_try(trie_t *tbl, const char *key, uint32_t len); + +/*! + * \brief Return pointer to the minimum. Optionally with key and its length. */ +KR_EXPORT +trie_val_t* trie_get_first(trie_t *tbl, char **key, uint32_t *len); + +/*! \brief Search the trie, inserting NULL trie_val_t on failure. */ +KR_EXPORT +trie_val_t* trie_get_ins(trie_t *tbl, const char *key, uint32_t len); + +/*! + * \brief Search for less-or-equal element. + * + * \param tbl Trie. + * \param key Searched key. + * \param len Key length. + * \param val Must be valid; it will be set to NULL if not found or errored. + * \return KNOT_EOK for exact match, 1 for previous, KNOT_ENOENT for not-found, + * or KNOT_E*. + */ +KR_EXPORT +int trie_get_leq(trie_t *tbl, const char *key, uint32_t len, trie_val_t **val); + +/*! + * \brief Apply a function to every trie_val_t, in order. + * + * \param d Parameter passed as the second argument to f(). + * \return First nonzero from f() or zero (i.e. KNOT_EOK). + */ +int trie_apply(trie_t *tbl, int (*f)(trie_val_t *, void *), void *d); + +/*! + * \brief Remove an item, returning KNOT_EOK if succeeded or KNOT_ENOENT if not found. + * + * If val!=NULL and deletion succeeded, the deleted value is set. + */ +KR_EXPORT +int trie_del(trie_t *tbl, const char *key, uint32_t len, trie_val_t *val); + +/*! + * \brief Remove the first item, returning KNOT_EOK on success. + * + * You may optionally get the key and/or value. + * The key is copied, so you need to pass sufficient len, + * otherwise kr_error(ENOSPC) is returned. + */ +KR_EXPORT +int trie_del_first(trie_t *tbl, char *key, uint32_t *len, trie_val_t *val); + +/*! \brief Create a new iterator pointing to the first element (if any). */ +KR_EXPORT +trie_it_t* trie_it_begin(trie_t *tbl); + +/*! + * \brief Advance the iterator to the next element. + * + * Iteration is in ascending lexicographical order. + * In particular, the empty string would be considered as the very first. + * + * \note You may not use this function if the trie's key-set has been modified + * during the lifetime of the iterator (modifying values only is OK). + */ +KR_EXPORT +void trie_it_next(trie_it_t *it); + +/*! \brief Test if the iterator has gone past the last element. */ +KR_EXPORT +bool trie_it_finished(trie_it_t *it); + +/*! \brief Free any resources of the iterator. It's OK to call it on NULL. */ +KR_EXPORT +void trie_it_free(trie_it_t *it); + +/*! + * \brief Return pointer to the key of the current element. + * + * \note The optional len is uint32_t internally but size_t is better for our usage, + * as it is without an additional type conversion. + */ +KR_EXPORT +const char* trie_it_key(trie_it_t *it, size_t *len); + +/*! \brief Return pointer to the value of the current element (writable). */ +KR_EXPORT +trie_val_t* trie_it_val(trie_it_t *it); |