diff options
Diffstat (limited to '')
-rw-r--r-- | src/contrib/qp-trie/trie.c | 1451 | ||||
-rw-r--r-- | src/contrib/qp-trie/trie.h | 280 |
2 files changed, 1731 insertions, 0 deletions
diff --git a/src/contrib/qp-trie/trie.c b/src/contrib/qp-trie/trie.c new file mode 100644 index 0000000..30dcb42 --- /dev/null +++ b/src/contrib/qp-trie/trie.c @@ -0,0 +1,1451 @@ +/* Copyright (C) 2019 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + Copyright (C) 2018 Tony Finch <dot@dotat.at> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + + The code originated from https://github.com/fanf2/qp/blob/master/qp.c + at revision 5f6d93753. + */ + +#include <assert.h> +#include <limits.h> +#include <stdlib.h> +#include <string.h> + +#include "contrib/qp-trie/trie.h" +#include "contrib/macros.h" +#include "contrib/mempattern.h" +#include "libknot/errcode.h" + +typedef unsigned int uint; +typedef uint64_t trie_index_t; /*!< nibble index into a key */ +typedef uint64_t word; /*!< A type-punned word */ +typedef uint bitmap_t; /*!< Bit-maps, using the range of 1<<0 to 1<<16 (inclusive). */ + +typedef char static_assert_pointer_fits_in_word + [sizeof(word) >= sizeof(uintptr_t) ? 1 : -1]; + +#define KEYLENBITS 31 + +/*! \brief trie keys have lengths + * + * 32 bits are enough for key lengths; probably even 16 bits would be. + * However, a 32 bit length means the alignment will be a multiple of + * 4, allowing us to stash the COW and BRANCH flags in the bottom bits + * of a pointer to a key. + * + * We need to steal a couple of bits from the length to keep the COW + * state of key allocations. + */ +typedef struct { + uint32_t cow:1, len:KEYLENBITS; + trie_key_t chars[]; +} tkey_t; + +/*! \brief A trie node is a pair of words. + * + * Each word is type-punned, depending on whether this is a branch + * node or a leaf node. We'll define some accessor functions to wrap + * this up into something reasonably safe. + * + * We aren't using a union to avoid problems with strict aliasing, and + * we aren't using bitfields because we want to control exactly which + * bits in the word are used by each field (in particular the flags). + * + * Branch nodes are never allocated individually: they are always part + * of either the root node or the twigs array of their parent branch. + * + * In a branch: + * + * `i` contains flags, bitmap, and index, explained in more detail below. + * + * `p` is a pointer to the "twigs", an array of child nodes. + * + * In a leaf: + * + * `i` is cast from a pointer to a tkey_t, with flags in the bottom bits. + * + * `p` is a trie_val_t. + */ +typedef struct node { + word i; + void *p; +} node_t; + +struct trie { + node_t root; // undefined when weight == 0, see empty_root() + size_t weight; + knot_mm_t mm; +}; + +/*! \brief size (in bits) of nibble (half-byte) indexes into keys + * + * The bottom bit is clear for the upper nibble, and set for the lower + * nibble, big-endian style, since the tree has to be in lexicographic + * order. The index increases from one branch node to the next as you + * go deeper into the trie. All the keys below a branch are identical + * up to the nibble identified by the branch. + * + * (see also tkey_t.len above) + */ +#define TWIDTH_INDEX 33 + +/*! \brief exclusive limit on indexes */ +#define TMAX_INDEX (BIG1 << TWIDTH_INDEX) + +/*! \brief size (in bits) of branch bitmap + * + * The bitmap indicates which subtries are present. The present child + * nodes are stored in the twigs array (with no holes between them). + * + * To simplify storing keys that are prefixes of each other, the + * end-of-string position is treated as an extra nibble value, ordered + * before all others. So there are 16 possible real nibble values, + * plus one value for nibbles past the end of the key. + */ +#define TWIDTH_BMP 17 + +/* + * We're constructing the layout of the branch `i` field in a careful + * way to avoid mistakes, getting the compiler to calculate values + * rather than typing them in by hand. + */ +enum { + TSHIFT_BRANCH = 0, + TSHIFT_COW, + TSHIFT_BMP, + TOP_BMP = TSHIFT_BMP + TWIDTH_BMP, + TSHIFT_INDEX = TOP_BMP, + TOP_INDEX = TSHIFT_INDEX + TWIDTH_INDEX, +}; + +typedef char static_assert_fields_fit_in_word + [TOP_INDEX <= sizeof(word) * CHAR_BIT ? 1 : -1]; + +typedef char static_assert_bmp_fits + [TOP_BMP <= sizeof(bitmap_t) * CHAR_BIT ? 1 : -1]; + +#define BIG1 ((word)1) +#define TMASK(width, shift) (((BIG1 << (width)) - BIG1) << (shift)) + +/*! \brief is this node a branch or a leaf? */ +#define TFLAG_BRANCH (BIG1 << TSHIFT_BRANCH) + +/*! \brief copy-on-write flag, used in both leaves and branches */ +#define TFLAG_COW (BIG1 << TSHIFT_COW) + +/*! \brief for extracting pointer to key */ +#define TMASK_LEAF (~(word)(TFLAG_BRANCH | TFLAG_COW)) + +/*! \brief mask for extracting nibble index */ +#define TMASK_INDEX TMASK(TWIDTH_INDEX, TSHIFT_INDEX) + +/*! \brief mask for extracting bitmap */ +#define TMASK_BMP TMASK(TWIDTH_BMP, TSHIFT_BMP) + +/*! \brief bitmap entry for NOBYTE */ +#define BMP_NOBYTE (BIG1 << TSHIFT_BMP) + +/*! \brief Initialize a new leaf, copying the key, and returning failure code. */ +static int mkleaf(node_t *leaf, const trie_key_t *key, uint32_t len, knot_mm_t *mm) +{ + if (unlikely((word)len > (BIG1 << KEYLENBITS))) + return KNOT_ENOMEM; + tkey_t *lkey = mm_alloc(mm, sizeof(tkey_t) + len); + if (unlikely(!lkey)) + return KNOT_ENOMEM; + lkey->cow = 0; + lkey->len = len; + memcpy(lkey->chars, key, len); + word i = (uintptr_t)lkey; + assert((i & TFLAG_BRANCH) == 0); + *leaf = (node_t){ .i = i, .p = NULL }; + return KNOT_EOK; +} + +/*! \brief construct a branch node */ +static node_t mkbranch(trie_index_t index, bitmap_t bmp, node_t *twigs) +{ + word i = TFLAG_BRANCH | bmp + | (index << TSHIFT_INDEX); + assert(index < TMAX_INDEX); + assert((bmp & ~TMASK_BMP) == 0); + return (node_t){ .i = i, .p = twigs }; +} + +/*! \brief Make an empty root node. */ +static node_t empty_root(void) +{ + return mkbranch(TMAX_INDEX-1, 0, NULL); +} + +/*! \brief Propagate error codes. */ +#define ERR_RETURN(x) \ + do { \ + int err_code_ = x; \ + if (unlikely(err_code_ != KNOT_EOK)) \ + return err_code_; \ + } while (false) + + +/*! \brief Test flags to determine type of this node. */ +static bool isbranch(const node_t *t) +{ + return t->i & TFLAG_BRANCH; +} + +static tkey_t *tkey(const node_t *t) +{ + assert(!isbranch(t)); + return (tkey_t *)(uintptr_t)(t->i & TMASK_LEAF); +} + +static trie_val_t *tvalp(node_t *t) +{ + assert(!isbranch(t)); + return &t->p; +} + +/*! \brief Given a branch node, return the index of the corresponding nibble in the key. */ +static trie_index_t branch_index(const node_t *t) +{ + assert(isbranch(t)); + return (t->i & TMASK_INDEX) >> TSHIFT_INDEX; +} + +static bitmap_t branch_bmp(const node_t *t) +{ + assert(isbranch(t)); + return (t->i & TMASK_BMP); +} + +/*! + * \brief Count the number of set bits. + * + * \TODO This implementation may be relatively slow on some HW. + */ +static uint branch_weight(const node_t *t) +{ + assert(isbranch(t)); + uint n = __builtin_popcount(t->i & TMASK_BMP); + assert(n > 1 && n <= TWIDTH_BMP); + return n; +} + +/*! \brief Compute offset of an existing child in a branch node. */ +static uint twigoff(const node_t *t, bitmap_t bit) +{ + assert(isbranch(t)); + assert(__builtin_popcount(bit) == 1); + return __builtin_popcount(t->i & TMASK_BMP & (bit - 1)); +} + +/*! \brief Extract a nibble from a key and turn it into a bitmask. */ +static bitmap_t keybit(trie_index_t ni, const trie_key_t *key, uint32_t len) +{ + trie_index_t bytei = ni >> 1; + + if (bytei >= len) + return BMP_NOBYTE; + + uint8_t ki = (uint8_t)key[bytei]; + uint nibble = (ni & 1) ? (ki & 0xf) : (ki >> 4); + + // skip one for NOBYTE nibbles after the end of the key + return BIG1 << (nibble + 1 + TSHIFT_BMP); +} + +/*! \brief Extract a nibble from a key and turn it into a bitmask. */ +static bitmap_t twigbit(const node_t *t, const trie_key_t *key, uint32_t len) +{ + assert(isbranch(t)); + return keybit(branch_index(t), key, len); +} + +/*! \brief Test if a branch node has a child indicated by a bitmask. */ +static bool hastwig(const node_t *t, bitmap_t bit) +{ + assert(isbranch(t)); + assert((bit & ~TMASK_BMP) == 0); + assert(__builtin_popcount(bit) == 1); + return t->i & bit; +} + +/*! \brief Get pointer to packed array of child nodes. */ +static node_t* twigs(node_t *t) +{ + assert(isbranch(t)); + return t->p; +} + +/*! \brief Get pointer to a particular child of a branch node. */ +static node_t* twig(node_t *t, uint i) +{ + assert(i < branch_weight(t)); + return twigs(t) + i; +} + +/*! \brief Get twig number of a child node TODO: better description. */ +static uint twig_number(node_t *child, node_t *parent) +{ + // twig array index using pointer arithmetic + ptrdiff_t num = child - twigs(parent); + assert(num >= 0 && num < branch_weight(parent)); + return (uint)num; +} + +/*! \brief Simple string comparator. */ +static int key_cmp(const trie_key_t *k1, uint32_t k1_len, + const trie_key_t *k2, uint32_t k2_len) +{ + int ret = memcmp(k1, k2, MIN(k1_len, k2_len)); + if (ret != 0) { + return ret; + } + + /* Key string is equal, compare lengths. */ + if (k1_len == k2_len) { + return 0; + } else if (k1_len < k2_len) { + return -1; + } else { + return 1; + } +} + +trie_t* trie_create(knot_mm_t *mm) +{ + trie_t *trie = mm_alloc(mm, sizeof(trie_t)); + if (trie != NULL) { + trie->root = empty_root(); + trie->weight = 0; + if (mm != NULL) + trie->mm = *mm; + else + mm_ctx_init(&trie->mm); + } + return trie; +} + +/*! \brief Free anything under the trie node, except for the passed pointer itself. */ +static void clear_trie(node_t *trie, knot_mm_t *mm) +{ + if (!isbranch(trie)) { + mm_free(mm, tkey(trie)); + } else { + uint n = branch_weight(trie); + for (uint i = 0; i < n; ++i) + clear_trie(twig(trie, i), mm); + mm_free(mm, twigs(trie)); + } +} + +void trie_free(trie_t *tbl) +{ + if (tbl == NULL) + return; + if (tbl->weight) + clear_trie(&tbl->root, &tbl->mm); + mm_free(&tbl->mm, tbl); +} + +void trie_clear(trie_t *tbl) +{ + assert(tbl); + if (!tbl->weight) + return; + clear_trie(&tbl->root, &tbl->mm); + tbl->root = empty_root(); + tbl->weight = 0; +} + +static bool dup_trie(node_t *copy, const node_t *orig, trie_dup_cb dup_cb, knot_mm_t *mm) +{ + if (isbranch(orig)) { + uint n = branch_weight(orig); + node_t *cotw = mm_alloc(mm, n * sizeof(*cotw)); + if (cotw == NULL) { + return NULL; + } + const node_t *ortw = twigs((node_t *)orig); + for (uint i = 0; i < n; ++i) { + if (!dup_trie(cotw + i, ortw + i, dup_cb, mm)) { + while (i-- > 0) { + clear_trie(cotw + i, mm); + } + mm_free(mm, cotw); + return false; + } + } + *copy = mkbranch(branch_index(orig), branch_bmp(orig), cotw); + } else { + tkey_t *key = tkey(orig); + if (mkleaf(copy, key->chars, key->len, mm) != KNOT_EOK) { + return false; + } + if ((copy->p = dup_cb(orig->p, mm)) == NULL) { + mm_free(mm, tkey(copy)); + return false; + } + } + return true; +} + +trie_t* trie_dup(const trie_t *orig, trie_dup_cb dup_cb, knot_mm_t *mm) +{ + if (orig == NULL) { + return NULL; + } + trie_t *copy = mm_alloc(mm, sizeof(*copy)); + if (copy == NULL) { + return NULL; + } + copy->weight = orig->weight; + if (mm != NULL) { + copy->mm = *mm; + } else { + mm_ctx_init(©->mm); + } + if (copy->weight) { + if (!dup_trie(©->root, &orig->root, dup_cb, mm)) { + mm_free(mm, copy); + return NULL; + } + } + return copy; +} + +size_t trie_weight(const trie_t *tbl) +{ + assert(tbl); + return tbl->weight; +} + +trie_val_t* trie_get_try(trie_t *tbl, const trie_key_t *key, uint32_t len) +{ + assert(tbl); + if (!tbl->weight) + return NULL; + node_t *t = &tbl->root; + while (isbranch(t)) { + __builtin_prefetch(twigs(t)); + bitmap_t b = twigbit(t, key, len); + if (!hastwig(t, b)) + return NULL; + t = twig(t, twigoff(t, b)); + } + tkey_t *lkey = tkey(t); + if (key_cmp(key, len, lkey->chars, lkey->len) != 0) + return NULL; + return tvalp(t); +} + +/* Optimization: the approach isn't ideal, as e.g. walking through the prefix + * is duplicated and we explicitly construct the wildcard key. Still, it's close + * to optimum which would be significantly more complicated and error-prone to write. */ +trie_val_t* trie_get_try_wildcard(trie_t *tbl, const trie_key_t *key, uint32_t len) +{ + assert(tbl); + if (!tbl->weight) + return NULL; + // Find leaf sharing the longest common prefix; see ns_find_branch() for explanation. + node_t *t = &tbl->root; + while (isbranch(t)) { + __builtin_prefetch(twigs(t)); + bitmap_t b = twigbit(t, key, len); + uint i = hastwig(t, b) ? twigoff(t, b) : 0; + t = twig(t, i); + } + const tkey_t * const lcp_key = tkey(t); + + // Find the last matching zero byte or -1 (source of synthesis) + int i_lmz = -1; + for (int i = 0; i < len && i < lcp_key->len && key[i] == lcp_key->chars[i]; ++i) { + if (key[i] == '\0' && i < len - 1) // do not count the terminating zero + i_lmz = i; + // Shortcut: we may have found an exact match. + if (i == len - 1 && len == lcp_key->len) + return tvalp(t); + } + if (len == 0) // The empty name needs separate handling. + return lcp_key->len == 0 ? tvalp(t) : NULL; + + // Construct the key of the wildcard we need and look it up. + const int wild_len = i_lmz + 3; + uint8_t wild_key[wild_len]; + memcpy(wild_key, key, wild_len - 2); + wild_key[wild_len - 2] = '*'; + wild_key[wild_len - 1] = '\0'; // LF is always 0-terminated ATM + return trie_get_try(tbl, wild_key, wild_len); +} + +/*! \brief Delete leaf t with parent p; b is the bit for t under p. + * Optionally return the deleted value via val. The function can't fail. */ +static void del_found(trie_t *tbl, node_t *t, node_t *p, bitmap_t b, trie_val_t *val) +{ + assert(!tkey(t)->cow); + mm_free(&tbl->mm, tkey(t)); + if (val != NULL) + *val = *tvalp(t); // we return trie_val_t directly when deleting + --tbl->weight; + if (unlikely(!p)) { // whole trie was a single leaf + assert(tbl->weight == 0); + tbl->root = empty_root(); + return; + } + // remove leaf t as child of p + node_t *tp = twigs(p); + uint ci = twig_number(t, p); + uint cc = branch_weight(p); // child count + + if (cc == 2) { + // collapse binary node p: move the other child to the parent + *p = tp[1 - ci]; + mm_free(&tbl->mm, tp); + return; + } + memmove(tp + ci, tp + ci + 1, sizeof(node_t) * (cc - ci - 1)); + p->i &= ~b; + node_t *newt = mm_realloc(&tbl->mm, tp, sizeof(node_t) * (cc - 1), + sizeof(node_t) * cc); + if (likely(newt != NULL)) + p->p = newt; + // We can ignore mm_realloc failure because an oversized twig + // array is OK - only beware that next time the prev_size + // passed to mm_realloc will not be correct; TODO? +} + +int trie_del(trie_t *tbl, const trie_key_t *key, uint32_t len, trie_val_t *val) +{ + assert(tbl); + if (!tbl->weight) + return KNOT_ENOENT; + node_t *t = &tbl->root; // current and parent node + node_t *p = NULL; + bitmap_t b = 0; + while (isbranch(t)) { + __builtin_prefetch(twigs(t)); + b = twigbit(t, key, len); + if (!hastwig(t, b)) + return KNOT_ENOENT; + p = t; + t = twig(t, twigoff(t, b)); + } + tkey_t *lkey = tkey(t); + if (key_cmp(key, len, lkey->chars, lkey->len) != 0) + return KNOT_ENOENT; + del_found(tbl, t, p, b, val); + return KNOT_EOK; +} + +/*! + * \brief Stack of nodes, storing a path down a trie. + * + * The structure also serves directly as the public trie_it_t type, + * in which case it always points to the current leaf, unless we've finished + * (i.e. it->len == 0). + * stack[0] is always a valid pointer to the root -> ns_gettrie() + */ +typedef struct trie_it { + node_t* *stack; /*!< The stack; malloc is used directly instead of mm. */ + uint32_t len; /*!< Current length of the stack. */ + uint32_t alen; /*!< Allocated/available length of the stack. */ + /*! \brief Initial storage for \a stack; it should fit in most use cases. */ + node_t* stack_init[250]; +} nstack_t; + +/*! \brief Create a node stack containing just the root (or empty). */ +static void ns_init(nstack_t *ns, trie_t *tbl) +{ + assert(tbl); + ns->stack = ns->stack_init; + ns->alen = sizeof(ns->stack_init) / sizeof(ns->stack_init[0]); + ns->stack[0] = &tbl->root; + ns->len = (tbl->weight > 0); +} + +static inline trie_t * ns_gettrie(nstack_t *ns) +{ + assert(ns && ns->stack && ns->stack[0]); + return (struct trie *)ns->stack[0]; +} + +/*! \brief Free inside of the stack, i.e. not the passed pointer itself. */ +static void ns_cleanup(nstack_t *ns) +{ + assert(ns && ns->stack); + if (likely(ns->stack == ns->stack_init)) + return; + free(ns->stack); + #ifndef NDEBUG + ns->stack = NULL; + ns->alen = 0; + #endif +} + +/*! \brief Allocate more space for the stack. */ +static int ns_longer_alloc(nstack_t *ns) +{ + ns->alen *= 2; + size_t new_size = ns->alen * sizeof(node_t *); + node_t **st; + if (ns->stack == ns->stack_init) { + st = malloc(new_size); + if (st != NULL) + memcpy(st, ns->stack, ns->len * sizeof(node_t *)); + } else { + st = realloc(ns->stack, new_size); + } + if (st == NULL) + return KNOT_ENOMEM; + ns->stack = st; + return KNOT_EOK; +} + +/*! \brief Ensure the node stack can be extended by one. */ +static inline int ns_longer(nstack_t *ns) +{ + // get a longer stack if needed + if (likely(ns->len < ns->alen)) + return KNOT_EOK; + return ns_longer_alloc(ns); // hand-split the part suitable for inlining +} + +/*! + * \brief Find the "branching point" as if searching for a key. + * + * The whole path to the point is kept on the passed stack; + * always at least the root will remain on the top of it. + * Beware: the precise semantics of this function is rather tricky. + * The top of the stack will contain: the corresponding leaf if exact + * match is found; or the immediate node below a + * branching-point-on-edge or the branching-point itself. + * + * \param idiff Set the index of first differing nibble, or TMAX_INDEX for an exact match + * \param tbit Set the bit of the closest leaf's nibble at index idiff + * \param kbit Set the bit of the key's nibble at index idiff + * + * \return KNOT_EOK or KNOT_ENOMEM. + */ +static int ns_find_branch(nstack_t *ns, const trie_key_t *key, uint32_t len, + trie_index_t *idiff, bitmap_t *tbit, bitmap_t *kbit) +{ + assert(ns && ns->len && idiff); + // First find some leaf with longest matching prefix. + while (isbranch(ns->stack[ns->len - 1])) { + ERR_RETURN(ns_longer(ns)); + node_t *t = ns->stack[ns->len - 1]; + __builtin_prefetch(twigs(t)); + bitmap_t b = twigbit(t, key, len); + // Even if our key is missing from this branch we need to + // keep iterating down to a leaf. It doesn't matter which + // twig we choose since the keys are all the same up to this + // index. Note that blindly using twigoff(t, b) can cause + // an out-of-bounds index if it equals twigmax(t). + uint i = hastwig(t, b) ? twigoff(t, b) : 0; + ns->stack[ns->len++] = twig(t, i); + } + tkey_t *lkey = tkey(ns->stack[ns->len-1]); + // Find index of the first char that differs. + size_t bytei = 0; + uint32_t klen = lkey->len; + for (bytei = 0; bytei < MIN(len,klen); bytei++) { + if (key[bytei] != lkey->chars[bytei]) + break; + } + // Find which half-byte has matched. + trie_index_t index = bytei << 1; + if (bytei == len && len == lkey->len) { // found equivalent key + index = TMAX_INDEX; + goto success; + } + if (likely(bytei < MIN(len,klen))) { + uint8_t k2 = (uint8_t)lkey->chars[bytei]; + uint8_t k1 = (uint8_t)key[bytei]; + if (((k1 ^ k2) & 0xf0) == 0) + index += 1; + } + // now go up the trie from the current leaf + node_t *t; + do { + if (unlikely(ns->len == 1)) + goto success; // only the root stays on the stack + t = ns->stack[ns->len - 2]; + if (branch_index(t) < index) + goto success; + --ns->len; + } while (true); +success: + #ifndef NDEBUG // invariants on successful return + assert(ns->len); + if (isbranch(ns->stack[ns->len - 1])) { + t = ns->stack[ns->len - 1]; + assert(branch_index(t) >= index); + } + if (ns->len > 1) { + t = ns->stack[ns->len - 2]; + assert(branch_index(t) < index || index == TMAX_INDEX); + } + #endif + *idiff = index; + *tbit = keybit(index, lkey->chars, lkey->len); + *kbit = keybit(index, key, len); + return KNOT_EOK; +} + +/*! + * \brief Advance the node stack to the last leaf in the subtree. + * + * \return KNOT_EOK or KNOT_ENOMEM. + */ +static int ns_last_leaf(nstack_t *ns) +{ + assert(ns); + do { + ERR_RETURN(ns_longer(ns)); + node_t *t = ns->stack[ns->len - 1]; + if (!isbranch(t)) + return KNOT_EOK; + uint lasti = branch_weight(t) - 1; + ns->stack[ns->len++] = twig(t, lasti); + } while (true); +} + +/*! + * \brief Advance the node stack to the first leaf in the subtree. + * + * \return KNOT_EOK or KNOT_ENOMEM. + */ +static int ns_first_leaf(nstack_t *ns) +{ + assert(ns && ns->len); + do { + ERR_RETURN(ns_longer(ns)); + node_t *t = ns->stack[ns->len - 1]; + if (!isbranch(t)) + return KNOT_EOK; + ns->stack[ns->len++] = twig(t, 0); + } while (true); +} + +/*! + * \brief Advance the node stack to the leaf that is previous to the current node. + * + * \note Prefix leaf under the current node DOES count (if present; perhaps questionable). + * \return KNOT_EOK on success, KNOT_ENOENT on not-found, or possibly KNOT_ENOMEM. + */ +static int ns_prev_leaf(nstack_t *ns) +{ + assert(ns && ns->len > 0); + + node_t *t = ns->stack[ns->len - 1]; + // Beware: BMP_NOBYTE child is ordered *before* its parent. + if (isbranch(t) && hastwig(t, BMP_NOBYTE)) { + ERR_RETURN(ns_longer(ns)); + ns->stack[ns->len++] = twig(t, 0); + return KNOT_EOK; + } + + for (; ns->len >= 2; --ns->len) { + t = ns->stack[ns->len - 1]; + node_t *p = ns->stack[ns->len - 2]; + uint ci = twig_number(t, p); + if (ci == 0) // we've got to go up again + continue; + // t isn't the first child -> go down the previous one + ns->stack[ns->len - 1] = twig(p, ci - 1); + return ns_last_leaf(ns); + } + return KNOT_ENOENT; // root without empty key has no previous leaf +} + +/*! + * \brief Advance the node stack to the leaf that is successor to the current node. + * + * \param skip_prefixed skip any nodes whose key is a prefix of the current one. + * If false, prefix leaf or anything else under the current node DOES count. + * \return KNOT_EOK on success, KNOT_ENOENT on not-found, or possibly KNOT_ENOMEM. + */ +static int ns_next_leaf(nstack_t *ns, const bool skip_pefixed) +{ + assert(ns && ns->len > 0); + + node_t *t = ns->stack[ns->len - 1]; + if (!skip_pefixed && isbranch(t)) + return ns_first_leaf(ns); + for (; ns->len >= 2; --ns->len) { + t = ns->stack[ns->len - 1]; + node_t *p = ns->stack[ns->len - 2]; + uint ci = twig_number(t, p); + if (skip_pefixed && ci == 0 && hastwig(t, BMP_NOBYTE)) { + // Keys in the subtree of p are suffixes of the key of t, + // so we've got to go one level higher + // (this can't happen more than once) + continue; + } + uint cc = branch_weight(p); + assert(ci + 1 <= cc); + if (ci + 1 == cc) { + // t is the last child of p, so we need to keep climbing + continue; + } + // go down the next child of p + ns->stack[ns->len - 1] = twig(p, ci + 1); + return ns_first_leaf(ns); + } + return KNOT_ENOENT; // not found, as no more parent is available +} + +/*! \brief Advance the node stack to leaf with longest prefix of the current key. */ +static int ns_prefix(nstack_t *ns) +{ + assert(ns && ns->len > 0); + const node_t *start = ns->stack[ns->len - 1]; + // Walk up the trie until we find a BMP_NOBYTE child. + while (--ns->len > 0) { + node_t *p = ns->stack[ns->len - 1]; + if (!hastwig(p, BMP_NOBYTE)) + continue; + node_t *end = twig(p, 0); + // In case we started in a BMP_NOBYTE leaf, the first step up + // did NOT shorten the key and we would get back into the same + // node again. + if (end == start) + continue; + ns->stack[ns->len++] = end; + return KNOT_EOK; + } + return KNOT_ENOENT; // not found, as no more parent is available +} + +/*! \brief less-or-equal search. + * + * \return KNOT_EOK for exact match, 1 for previous, KNOT_ENOENT for not-found, + * or KNOT_E*. + */ +static int ns_get_leq(nstack_t *ns, const trie_key_t *key, uint32_t len) +{ + // First find the key with longest-matching prefix + trie_index_t idiff; + bitmap_t tbit, kbit; + ERR_RETURN(ns_find_branch(ns, key, len, &idiff, &tbit, &kbit)); + node_t *t = ns->stack[ns->len - 1]; + if (idiff == TMAX_INDEX) // found exact match + return KNOT_EOK; + // Get t: the last node on matching path + bitmap_t b; + if (isbranch(t) && branch_index(t) == idiff) { + // t is OK + b = kbit; + } else { + // the top of the stack was the first unmatched node -> step up + if (ns->len == 1) { + // root was unmatched already + if (kbit < tbit) + return KNOT_ENOENT; + ERR_RETURN(ns_last_leaf(ns)); + return 1; + } + --ns->len; + t = ns->stack[ns->len - 1]; + b = twigbit(t, key, len); + } + // Now we re-do the first "non-matching" step in the trie + // but try the previous child if key was less (it may not exist) + int i = hastwig(t, b) + ? (int)twigoff(t, b) - (kbit < tbit) + : (int)twigoff(t, b) - 1 /* twigoff returns successor when !hastwig */; + if (i >= 0) { + ERR_RETURN(ns_longer(ns)); + ns->stack[ns->len++] = twig(t, i); + ERR_RETURN(ns_last_leaf(ns)); + } else { + ERR_RETURN(ns_prev_leaf(ns)); + } + return 1; +} + +int trie_get_leq(trie_t *tbl, const trie_key_t *key, uint32_t len, trie_val_t **val) +{ + assert(tbl && val); + if (tbl->weight == 0) { + if (val) *val = NULL; + return KNOT_ENOENT; + } + // We try to do without malloc. + nstack_t ns_local; + ns_init(&ns_local, tbl); + nstack_t *ns = &ns_local; + + int ret = ns_get_leq(ns, key, len); + if (ret == KNOT_EOK || ret == 1) { + assert(!isbranch(ns->stack[ns->len - 1])); + if (val) *val = tvalp(ns->stack[ns->len - 1]); + } else { + if (val) *val = NULL; + } + ns_cleanup(ns); + return ret; +} + +int trie_it_get_leq(trie_it_t *it, const trie_key_t *key, uint32_t len) +{ + assert(it && it->stack[0] && it->alen); + const trie_t *tbl = ns_gettrie(it); + if (tbl->weight == 0) { + it->len = 0; + return KNOT_ENOENT; + } + it->len = 1; + int ret = ns_get_leq(it, key, len); + if (ret == KNOT_EOK || ret == 1) { + assert(trie_it_key(it, NULL)); + } else { + it->len = 0; + } + return ret; +} + +/* see below */ +static int cow_pushdown(trie_cow_t *cow, nstack_t *ns); + +/*! \brief implementation of trie_get_ins() and trie_get_cow() */ +static trie_val_t* cow_get_ins(trie_cow_t *cow, trie_t *tbl, + const trie_key_t *key, uint32_t len) +{ + assert(tbl); + // First leaf in an empty tbl? + if (unlikely(!tbl->weight)) { + if (unlikely(mkleaf(&tbl->root, key, len, &tbl->mm))) + return NULL; + ++tbl->weight; + return tvalp(&tbl->root); + } + { // Intentionally un-indented; until end of function, to bound cleanup attr. + // Find the branching-point + __attribute__((cleanup(ns_cleanup))) + nstack_t ns_local; + ns_init(&ns_local, tbl); + nstack_t *ns = &ns_local; + trie_index_t idiff; + bitmap_t tbit, kbit; + if (unlikely(ns_find_branch(ns, key, len, &idiff, &tbit, &kbit))) + return NULL; + if (unlikely(cow && cow_pushdown(cow, ns) != KNOT_EOK)) + return NULL; + node_t *t = ns->stack[ns->len - 1]; + if (idiff == TMAX_INDEX) // the same key was already present + return tvalp(t); + node_t leaf, *leafp; + if (unlikely(mkleaf(&leaf, key, len, &tbl->mm))) + return NULL; + + if (isbranch(t) && branch_index(t) == idiff) { + // The node t needs a new leaf child. + assert(!hastwig(t, kbit)); + // new child position and original child count + uint s = twigoff(t, kbit); + uint m = branch_weight(t); + node_t *nt = mm_realloc(&tbl->mm, twigs(t), + sizeof(node_t) * (m + 1), sizeof(node_t) * m); + if (unlikely(!nt)) + goto err_leaf; + memmove(nt + s + 1, nt + s, sizeof(node_t) * (m - s)); + leafp = nt + s; + *t = mkbranch(idiff, branch_bmp(t) | kbit, nt); + } else { + // We need to insert a new binary branch with leaf at *t. + // Note: it works the same for the case where we insert above root t. + #ifndef NDEBUG + if (ns->len > 1) { + node_t *pt = ns->stack[ns->len - 2]; + assert(hastwig(pt, twigbit(pt, key, len))); + } + #endif + node_t *nt = mm_alloc(&tbl->mm, sizeof(node_t) * 2); + if (unlikely(!nt)) + goto err_leaf; + node_t t2 = *t; // Save before overwriting t. + *t = mkbranch(idiff, tbit | kbit, nt); + *twig(t, twigoff(t, tbit)) = t2; + leafp = twig(t, twigoff(t, kbit)); + }; + *leafp = leaf; + ++tbl->weight; + return tvalp(leafp); +err_leaf: + mm_free(&tbl->mm, tkey(&leaf)); + return NULL; + } +} + +trie_val_t* trie_get_ins(trie_t *tbl, const trie_key_t *key, uint32_t len) +{ + return cow_get_ins(NULL, tbl, key, len); +} + +/*! \brief Apply a function to every trie_val_t*, in order; a recursive solution. */ +static int apply_nodes(node_t *t, int (*f)(trie_val_t *, void *), void *d) +{ + assert(t); + if (!isbranch(t)) + return f(tvalp(t), d); + uint n = branch_weight(t); + for (uint i = 0; i < n; ++i) + ERR_RETURN(apply_nodes(twig(t, i), f, d)); + return KNOT_EOK; +} + +int trie_apply(trie_t *tbl, int (*f)(trie_val_t *, void *), void *d) +{ + assert(tbl && f); + if (!tbl->weight) + return KNOT_EOK; + return apply_nodes(&tbl->root, f, d); +} + +/* These are all thin wrappers around static Tns* functions. */ +trie_it_t* trie_it_begin(trie_t *tbl) +{ + assert(tbl); + trie_it_t *it = malloc(sizeof(nstack_t)); + if (!it) + return NULL; + ns_init(it, tbl); + if (it->len == 0) // empty tbl + return it; + if (ns_first_leaf(it)) { + ns_cleanup(it); + free(it); + return NULL; + } + return it; +} + +bool trie_it_finished(trie_it_t *it) +{ + assert(it); + return it->len == 0; +} + +void trie_it_free(trie_it_t *it) +{ + if (!it) + return; + ns_cleanup(it); + free(it); +} + +trie_it_t *trie_it_clone(const trie_it_t *it) +{ + if (!it) // TODO: or should that be an assertion? + return NULL; + trie_it_t *it2 = malloc(sizeof(nstack_t)); + if (!it2) + return NULL; + it2->len = it->len; + it2->alen = it->alen; // we _might_ change it in the rare malloc case, but... + if (likely(it->stack == it->stack_init)) { + it2->stack = it2->stack_init; + assert(it->alen == sizeof(it->stack_init) / sizeof(it->stack_init[0])); + } else { + it2->stack = malloc(it2->alen * sizeof(it2->stack[0])); + if (!it2->stack) { + free(it2); + return NULL; + } + } + memcpy(it2->stack, it->stack, it->len * sizeof(it->stack[0])); + return it2; +} + +const trie_key_t* trie_it_key(trie_it_t *it, size_t *len) +{ + assert(it && it->len); + node_t *t = it->stack[it->len - 1]; + assert(!isbranch(t)); + tkey_t *key = tkey(t); + if (len) + *len = key->len; + return key->chars; +} + +trie_val_t* trie_it_val(trie_it_t *it) +{ + assert(it && it->len); + node_t *t = it->stack[it->len - 1]; + assert(!isbranch(t)); + return tvalp(t); +} + +void trie_it_next(trie_it_t *it) +{ + assert(it && it->len); + if (ns_next_leaf(it, false) != KNOT_EOK) + it->len = 0; +} + +void trie_it_next_loop(trie_it_t *it) +{ + assert(it && it->len); + int ret = ns_next_leaf(it, false); + if (ret == KNOT_ENOENT) { + it->len = 1; + ret = ns_first_leaf(it); + } + if (ret) + it->len = 0; +} + +void trie_it_next_nosuffix(trie_it_t *it) +{ + assert(it && it->len); + if (ns_next_leaf(it, true) != KNOT_EOK) + it->len = 0; +} + +void trie_it_prev(trie_it_t *it) +{ + assert(it && it->len); + if (ns_prev_leaf(it) != KNOT_EOK) + it->len = 0; +} + +void trie_it_prev_loop(trie_it_t *it) +{ + assert(it && it->len); + int ret = ns_prev_leaf(it); + if (ret == KNOT_ENOENT) { + it->len = 1; + ret = ns_last_leaf(it); + } + if (ret) + it->len = 0; +} + +void trie_it_parent(trie_it_t *it) +{ + assert(it && it->len); + if (ns_prefix(it)) + it->len = 0; +} + +void trie_it_del(trie_it_t *it) +{ + assert(it && it->len); + if (it->len == 0) + return; + node_t *t = it->stack[it->len - 1]; + assert(!isbranch(t)); + bitmap_t b; // del_found() needs to know which bit to zero in the bitmap + node_t *p; + if (it->len == 1) { // deleting the root + p = NULL; + b = 0; // unused + } else { + p = it->stack[it->len - 2]; + assert(isbranch(p)); + size_t len; + const trie_key_t *key = trie_it_key(it, &len); + b = twigbit(p, key, len); + } + // We could trie_it_{next,prev,...}(it) now, in case we wanted that semantics. + it->len = 0; + del_found(ns_gettrie(it), t, p, b, NULL); +} + + +/*!\file + * + * \section About copy-on-write + * + * In these notes I'll use the term "object" to refer to either the + * twig array of a branch, or the application's data that is referred + * to by a leaf's trie_val_t pointer. Note that for COW we don't care + * about trie node_t structs themselves, but the objects that they + * point to. + * + * \subsection COW states + * + * During a COW transaction an object can be in one of three states: + * shared, only in the old trie, or only in the new trie. When a + * transaction is rolled back, the only-new objects are freed; when a + * transaction is committed the new trie takes the place of the old + * one and only-old objects are freed. + * + * \subsection branch marks and regions + * + * A branch object can be marked by setting the COW flag in the first + * element of its twig array. Marked branches partition the trie into + * regions; an object's state depends on its region. + * + * The unmarked branch objects between a trie's root and the marked + * branches (excluding the marked branches themselves) is exclusively + * owned: either old-only (if you started from the old root) or + * new-only (if you started from the new root). + * + * Marked branch objects, and all objects reachable from marked branch + * objects, are in the shared region accessible from both old and new + * roots. All branch objects below a marked branch must be unmarked. + * (That is, there is at most one marked branch object on any path + * from the root of a trie.) + * + * Branch nodes in the new-only region can be modified in place, in + * the same way as an original qp trie. Branch nodes in the old-only + * or shared regions must not be modified. + * + * \subsection app object states + * + * The app objects reachable from the new-only and old-only regions + * explicitly record their state in a way determined by the + * application. (These app objects are reachable from the old and new + * roots by traversing only unmarked branch objects.) + * + * The app objects reachable from marked branch objects are implicitly + * shared, but their state field has an indeterminate value. If an app + * object was previously touched by a rolled-back transaction it may + * be marked shared or old-only; if it was previously touched by a + * committed transaction it may be marked shared or new-only. + * + * \subsection key states + * + * The memory allocated for tkey_t objects also needs to track its + * sharing state. They have a "cow" flag to mark when they are shared. + * Keys are relatively lazily copied (to make them exclusive) when + * their leaf node is touched by a COW mutation. + * + * [An alternative technique might be to copy them more eagerly, in + * cow_pushdown(), which would avoid the need for a flag bit at the + * cost of more allocator churn in a transaction.] + * + * \subsection outside COW + * + * When a COW transaction is not in progress, there are no marked + * branch objects, so everything is exclusively owned. When a COW + * transaction is finished (committed or rolled back), the branch + * marks are removed. Since they are in the shared region, this branch + * cleanup is visible to both old and new tries. + * + * However the state of app objects is not clean between COW + * transactions. When a COW transaction is committed, we traverse the + * old-only region to find old-only app objects that should be freed + * (and vice versa for rollback). In general, there will be app + * objects that are only reachable from the new-only region, and that + * have a mixture of shared and new states. + */ + +/*! \brief Trie copy-on-write state */ +struct trie_cow { + trie_t *old; + trie_t *new; + trie_cb *mark_shared; + void *d; +}; + +/*! \brief is this a marked branch object */ +static bool cow_marked(node_t *t) +{ + return isbranch(t) && (twigs(t)->i & TFLAG_COW); +} + +/*! \brief is this a leaf with a marked key */ +static bool cow_key(node_t *t) +{ + return !isbranch(t) && tkey(t)->cow; +} + +/*! \brief remove mark from a branch object */ +static void clear_cow(node_t *t) +{ + assert(isbranch(t)); + twigs(t)->i &= ~TFLAG_COW; +} + +/*! \brief mark a node as shared + * + * For branches this marks the twig array (in COW terminology, the + * branch object); for leaves it uses the callback to mark the app + * object. + */ +static void mark_cow(trie_cow_t *cow, node_t *t) +{ + if (isbranch(t)) { + node_t *object = twigs(t); + object->i |= TFLAG_COW; + } else { + tkey_t *lkey = tkey(t); + lkey->cow = 1; + if (cow->mark_shared != NULL) { + trie_val_t *valp = tvalp(t); + cow->mark_shared(*valp, lkey->chars, lkey->len, cow->d); + } + } +} + +/*! \brief push exclusive COW region down one node */ +static int cow_pushdown_one(trie_cow_t *cow, node_t *t) +{ + uint cc = branch_weight(t); + node_t *nt = mm_alloc(&cow->new->mm, sizeof(node_t) * cc); + if (nt == NULL) + return KNOT_ENOMEM; + /* mark all the children */ + for (uint ci = 0; ci < cc; ++ci) + mark_cow(cow, twig(t, ci)); + /* this node must be unmarked in both old and new versions */ + clear_cow(t); + t->p = memcpy(nt, twigs(t), sizeof(node_t) * cc); + return KNOT_EOK; +} + +/*! \brief push exclusive COW region to cover a whole node stack */ +static int cow_pushdown(trie_cow_t *cow, nstack_t *ns) +{ + node_t *new_twigs = NULL; + node_t *old_twigs = NULL; + for (uint i = 0; i < ns->len; i++) { + /* if we did a pushdown on the previous iteration, we + need to update this stack entry so it points into + the parent's new twigs instead of the old ones */ + if (new_twigs != old_twigs) + ns->stack[i] = new_twigs + (ns->stack[i] - old_twigs); + if (cow_marked(ns->stack[i])) { + old_twigs = twigs(ns->stack[i]); + if (cow_pushdown_one(cow, ns->stack[i])) + return KNOT_ENOMEM; + new_twigs = twigs(ns->stack[i]); + } else { + new_twigs = NULL; + old_twigs = NULL; + /* ensure key is exclusively owned */ + if (cow_key(ns->stack[i])) { + node_t oleaf = *ns->stack[i]; + tkey_t *okey = tkey(&oleaf); + if(mkleaf(ns->stack[i], okey->chars, okey->len, + &cow->new->mm)) + return KNOT_ENOMEM; + ns->stack[i]->p = oleaf.p; + okey->cow = 0; + } + } + } + return KNOT_EOK; +} + +trie_cow_t* trie_cow(trie_t *old, trie_cb *mark_shared, void *d) +{ + knot_mm_t *mm = &old->mm; + trie_t *new = mm_alloc(mm, sizeof(trie_t)); + trie_cow_t *cow = mm_alloc(mm, sizeof(trie_cow_t)); + if (new == NULL || cow == NULL) { + mm_free(mm, new); + mm_free(mm, cow); + return NULL; + } + new->mm = old->mm; + new->root = old->root; + new->weight = old->weight; + cow->old = old; + cow->new = new; + cow->mark_shared = mark_shared; + cow->d = d; + if (old->weight) + mark_cow(cow, &old->root); + return cow; +} + +trie_t* trie_cow_new(trie_cow_t *cow) +{ + assert(cow != NULL); + return cow->new; +} + +trie_val_t* trie_get_cow(trie_cow_t *cow, const trie_key_t *key, uint32_t len) +{ + return cow_get_ins(cow, cow->new, key, len); +} + +int trie_del_cow(trie_cow_t *cow, const trie_key_t *key, uint32_t len, trie_val_t *val) +{ + trie_t *tbl = cow->new; + if (unlikely(!tbl->weight)) + return KNOT_ENOENT; + { // Intentionally un-indented; until end of function, to bound cleanup attr. + // Find the branching-point + __attribute__((cleanup(ns_cleanup))) + nstack_t ns_local; + ns_init(&ns_local, tbl); + nstack_t *ns = &ns_local; + trie_index_t idiff; + bitmap_t tbit, kbit; + ERR_RETURN(ns_find_branch(ns, key, len, &idiff, &tbit, &kbit)); + if (idiff != TMAX_INDEX) + return KNOT_ENOENT; + ERR_RETURN(cow_pushdown(cow, ns)); + node_t *t = ns->stack[ns->len - 1]; + node_t *p = ns->len >= 2 ? ns->stack[ns->len - 2] : NULL; + del_found(tbl, t, p, p ? twigbit(p, key, len) : 0, val); + } + return KNOT_EOK; +} + +/*! \brief clean up after a COW transaction, recursively */ +static void cow_cleanup(trie_cow_t *cow, node_t *t, trie_cb *cb, void *d) +{ + if (cow_marked(t)) { + // we have hit the shared region, so just reset the mark + clear_cow(t); + return; + } else if (isbranch(t)) { + // traverse and free the exclusive region + uint cc = branch_weight(t); + for (uint ci = 0; ci < cc; ++ci) + cow_cleanup(cow, twig(t, ci), cb, d); + mm_free(&cow->new->mm, twigs(t)); + return; + } else { + // application must decide how to clean up its values + tkey_t *lkey = tkey(t); + if (cb != NULL) { + trie_val_t *valp = tvalp(t); + cb(*valp, lkey->chars, lkey->len, d); + } + // clean up exclusively-owned keys + if (lkey->cow) + lkey->cow = 0; + else + mm_free(&cow->new->mm, lkey); + return; + } +} + +trie_t* trie_cow_commit(trie_cow_t *cow, trie_cb *cb, void *d) +{ + trie_t *ret = cow->new; + if (cow->old->weight) + cow_cleanup(cow, &cow->old->root, cb, d); + mm_free(&ret->mm, cow->old); + mm_free(&ret->mm, cow); + return ret; +} + +trie_t* trie_cow_rollback(trie_cow_t *cow, trie_cb *cb, void *d) +{ + trie_t *ret = cow->old; + if (cow->new->weight) + cow_cleanup(cow, &cow->new->root, cb, d); + mm_free(&ret->mm, cow->new); + mm_free(&ret->mm, cow); + return ret; +} diff --git a/src/contrib/qp-trie/trie.h b/src/contrib/qp-trie/trie.h new file mode 100644 index 0000000..d479d3e --- /dev/null +++ b/src/contrib/qp-trie/trie.h @@ -0,0 +1,280 @@ +/* Copyright (C) 2019 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + Copyright (C) 2018 Tony Finch <dot@dotat.at> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +#pragma once + +#include <stdbool.h> +#include <stdint.h> + +#include "libknot/mm_ctx.h" + +/*! + * \brief Native API of QP-tries: + * + * - keys are uint8_t strings, not necessarily zero-terminated, + * the structure copies the contents of the passed keys + * - values are void* pointers, typically you get an ephemeral pointer to it + * - key lengths are limited by 2^32-1 ATM + */ + +/*! \brief Element value. */ +typedef void* trie_val_t; +/*! \brief Key for indexing tries. Sign could be flipped easily. */ +typedef uint8_t trie_key_t; + +/*! \brief Opaque structure holding a QP-trie. */ +typedef struct trie trie_t; + +/*! \brief Opaque type for holding a QP-trie iterator. */ +typedef struct trie_it trie_it_t; + +/*! \brief Callback for cloning trie values. */ +typedef trie_val_t (*trie_dup_cb)(const trie_val_t val, knot_mm_t *mm); + +/*! \brief Callback for performing actions on a trie leaf + * + * Used during copy-on-write transactions + * + * \param val The value of the element to be altered + * \param key The key of the element to be altered + * \param len The length of key + * \param d Additional user data + */ +typedef void trie_cb(trie_val_t val, const trie_key_t *key, size_t len, void *d); + +/*! \brief Opaque type for holding the copy-on-write state for a QP-trie. */ +typedef struct trie_cow trie_cow_t; + +/*! \brief Create a trie instance. */ +trie_t* trie_create(knot_mm_t *mm); + +/*! \brief Free a trie instance. */ +void trie_free(trie_t *tbl); + +/*! \brief Clear a trie instance (make it empty). */ +void trie_clear(trie_t *tbl); + +/*! \brief Create a clone of existing trie. */ +trie_t* trie_dup(const trie_t *orig, trie_dup_cb dup_cb, knot_mm_t *mm); + +/*! \brief Return the number of keys in the trie. */ +size_t trie_weight(const trie_t *tbl); + +/*! \brief Search the trie, returning NULL on failure. */ +trie_val_t* trie_get_try(trie_t *tbl, const trie_key_t *key, uint32_t len); + +/*! \brief Search the trie including DNS wildcard semantics, returning NULL on failure. + * + * \note We assume the key is in knot_dname_lf() format, i.e. labels are ordered + * from root to leaf and separated by zero bytes (and no other zeros are allowed). + * \note Beware that DNS wildcard matching is not exactly what normal people would expect. + */ +trie_val_t* trie_get_try_wildcard(trie_t *tbl, const trie_key_t *key, uint32_t len); + +/*! \brief Search the trie, inserting NULL trie_val_t on failure. */ +trie_val_t* trie_get_ins(trie_t *tbl, const trie_key_t *key, uint32_t len); + +/*! + * \brief Search for less-or-equal element. + * + * \param tbl Trie. + * \param key Searched key. + * \param len Key length. + * \param val (optional) Value found; it will be set to NULL if not found or errored. + * \return KNOT_EOK for exact match, 1 for previous, KNOT_ENOENT for not-found, + * or KNOT_E*. + */ +int trie_get_leq(trie_t *tbl, const trie_key_t *key, uint32_t len, trie_val_t **val); + +/*! + * \brief Apply a function to every trie_val_t, in order. + * + * \return KNOT_EOK if success or KNOT_E* if error. + */ +int trie_apply(trie_t *tbl, int (*f)(trie_val_t *, void *), void *d); + +/*! + * \brief Remove an item, returning KNOT_EOK if succeeded or KNOT_ENOENT if not found. + * + * If val!=NULL and deletion succeeded, the deleted value is set. + */ +int trie_del(trie_t *tbl, const trie_key_t *key, uint32_t len, trie_val_t *val); + + +/*! \brief Create a new iterator pointing to the first element (if any). + * + * trie_it_* functions deal with these iterators capable of walking and jumping + * over the trie. Note that any modification to key-set stored by the trie + * will in general invalidate all iterators and you will need to begin anew. + * (It won't be detected - you may end up reading freed memory, etc.) + */ +trie_it_t* trie_it_begin(trie_t *tbl); + +/*! \brief Test if the iterator has gone "past the end" (and points nowhere). */ +bool trie_it_finished(trie_it_t *it); + +/*! \brief Free any resources of the iterator. It's OK to call it on NULL. */ +void trie_it_free(trie_it_t *it); + +/*! \brief Copy the iterator. See the warning in trie_it_begin(). */ +trie_it_t *trie_it_clone(const trie_it_t *it); + +/*! + * \brief Return pointer to the key of the current element. + * + * \note The len is uint32_t internally but size_t is better for our usage + * as it is without an additional type conversion. + */ +const trie_key_t* trie_it_key(trie_it_t *it, size_t *len); + +/*! \brief Return pointer to the value of the current element (writable). */ +trie_val_t* trie_it_val(trie_it_t *it); + +/*! + * \brief Advance the iterator to the next element. + * + * Iteration is in ascending lexicographical order. + * In particular, the empty string would be considered as the very first. + * + * \TODO: in most iterator operations, ENOMEM is very unlikely + * but it leads to a _finished() iterator (silently). + * Perhaps the functions should simply return KNOT_E* + */ +void trie_it_next(trie_it_t *it); +/*! \brief Advance the iterator to the previous element. See trie_it_next(). */ +void trie_it_prev(trie_it_t *it); + +/*! \brief Advance iterator to the next element, looping to first after last. */ +void trie_it_next_loop(trie_it_t *it); +/*! \brief Advance iterator to the previous element, looping to last after first. */ +void trie_it_prev_loop(trie_it_t *it); + +/*! \brief Advance iterator to the next element while ignoring the subtree. + * + * \note Another formulation: skip keys that are prefixed by the current key. + * \TODO: name, maybe _unprefixed? The thing is that in the "subtree" meaning + * doesn't correspond to how the pointers go in the implementation, + * but we may not care much for implementation in the API... + */ +void trie_it_next_nosub(trie_it_t *it); + +/*! \brief Advance iterator to the longest prefix of the current key. + * + * \TODO: name, maybe _prefix? Arguments similar to _nosub vs. _unprefixed. + */ +void trie_it_parent(trie_it_t *it); + +/*! \brief trie_get_leq() but with an iterator. */ +int trie_it_get_leq(trie_it_t *it, const trie_key_t *key, uint32_t len); + +/*! \brief Remove the current element. The iterator will get trie_it_finished() */ +void trie_it_del(trie_it_t *it); + + +/*! \brief Start a COW transaction + * + * A copy-on-write transaction starts by obtaining a write lock (in + * your application code) followed by a call to trie_cow(). This + * creates a shared clone of the trie and saves both old and new roots + * in the COW context. + * + * During the COW transaction, you call trie_cow_ins() or + * trie_cow_del() as necessary. These calls ensure that the relevant + * parts of the (new) trie are copied so that they can be modified + * freely. + * + * Your trie_val_t objects must be able to distinguish their + * reachability, either shared, or old-only, or new-only. Before a COW + * transaction the reachability of your objects is indeterminate. + * During a transaction, any trie_val_t objects that might be affected + * (because they are adjacent to a trie_get_cow() or trie_del_cow()) + * are first marked as shared using the callback you pass to + * trie_cow(). + * + * When the transaction is complete, to commit, call trie_cow_new() to + * get the new root, swap the old and new trie roots (e.g. with + * rcu_xchg_pointer()), wait for readers to finish with the old trie + * (e.g. using synchronize_rcu()), then call trie_cow_commit(). For a + * rollback, you can just call trie_cow_rollback() without waiting + * since that doesn't conflict with readers. After trie_cow_commit() + * or trie_cow_rollback() have finished, you can release your write + * lock. + * + * Concurrent reading of the old trie is allowed during a transaction + * provided that it is known when all readers have finished with the + * old version, e.g. using rcu_read_lock() and rcu_read_unlock(). + * There must be only one write transaction at a time. + * + * \param old the old trie + * \param mark_shared callback to mark a leaf as shared (can be NULL) + * \param d extra data for the callback + * \return a pointer to a COW context, + * or NULL if there was a failure + */ +trie_cow_t* trie_cow(trie_t *old, trie_cb *mark_shared, void *d); + +/*! \brief get the new trie from a COW context */ +trie_t* trie_cow_new(trie_cow_t *cow); + +/*! \brief variant of trie_get_ins() for use during COW transactions + * + * As necessary, this copies path from the root of the trie to the + * leaf, so that it is no longer shared. Any leaves adjacent to this + * path are marked as shared using the mark_shared callback passed to + * trie_cow(). + * + * It is your responsibility to COW your trie_val_t objects. If you copy an + * object you must change the original's reachability from shared to old-only. + * New objects (including copies) must have new-only reachability. + */ +trie_val_t* trie_get_cow(trie_cow_t *cow, const trie_key_t *key, uint32_t len); + +/*! + * \brief variant of trie_del() for use during COW transactions + * + * The mark_shared callback is invoked as necessary, in the same way + * as trie_get_cow(). + * + * Returns KNOT_EOK if the key was removed or KNOT_ENOENT if not found. + * If val!=NULL and deletion succeeded, the *val is set to the deleted + * value pointer. + */ +int trie_del_cow(trie_cow_t *cow, const trie_key_t *key, uint32_t len, trie_val_t *val); + +/*! \brief clean up the old trie after committing a COW transaction + * + * Your callback is invoked for any trie_val_t objects that might need + * cleaning up; you must free any objects you have marked as old-only + * and retain objects with shared reachability. + * + * \note The callback can be NULL. + * + * The cow object is free()d, and the new trie root is returned. + */ +trie_t* trie_cow_commit(trie_cow_t *cow, trie_cb *cb, void *d); + +/*! \brief clean up the new trie after rolling back a COW transaction + * + * Your callback is invoked for any trie_val_t objects that might need + * cleaning up; you must free any objects you have marked as new-only + * and retain objects with shared reachability. + * + * \note The callback can be NULL. + * + * The cow object is free()d, and the old trie root is returned. + */ +trie_t* trie_cow_rollback(trie_cow_t *cow, trie_cb *cb, void *d); |