summaryrefslogtreecommitdiffstats
path: root/src/contrib/qp-trie
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-27 10:36:22 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-27 10:36:22 +0000
commitb88bb292821fd7742604ec4e280acebd9a049f62 (patch)
tree625e4e19e6619f7481e5a8103f876520950769f6 /src/contrib/qp-trie
parentInitial commit. (diff)
downloadknot-b88bb292821fd7742604ec4e280acebd9a049f62.tar.xz
knot-b88bb292821fd7742604ec4e280acebd9a049f62.zip
Adding upstream version 3.0.5.upstream/3.0.5upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/contrib/qp-trie')
-rw-r--r--src/contrib/qp-trie/trie.c1451
-rw-r--r--src/contrib/qp-trie/trie.h280
2 files changed, 1731 insertions, 0 deletions
diff --git a/src/contrib/qp-trie/trie.c b/src/contrib/qp-trie/trie.c
new file mode 100644
index 0000000..c5ee897
--- /dev/null
+++ b/src/contrib/qp-trie/trie.c
@@ -0,0 +1,1451 @@
+/* Copyright (C) 2019 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+ Copyright (C) 2018 Tony Finch <dot@dotat.at>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+ The code originated from https://github.com/fanf2/qp/blob/master/qp.c
+ at revision 5f6d93753.
+ */
+
+#include <assert.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "contrib/qp-trie/trie.h"
+#include "contrib/macros.h"
+#include "contrib/mempattern.h"
+#include "libknot/errcode.h"
+
+typedef unsigned int uint;
+typedef uint64_t index_t; /*!< nibble index into a key */
+typedef uint64_t word; /*!< A type-punned word */
+typedef uint bitmap_t; /*!< Bit-maps, using the range of 1<<0 to 1<<16 (inclusive). */
+
+typedef char static_assert_pointer_fits_in_word
+ [sizeof(word) >= sizeof(uintptr_t) ? 1 : -1];
+
+#define KEYLENBITS 31
+
+/*! \brief trie keys have lengths
+ *
+ * 32 bits are enough for key lengths; probably even 16 bits would be.
+ * However, a 32 bit length means the alignment will be a multiple of
+ * 4, allowing us to stash the COW and BRANCH flags in the bottom bits
+ * of a pointer to a key.
+ *
+ * We need to steal a couple of bits from the length to keep the COW
+ * state of key allocations.
+ */
+typedef struct {
+ uint32_t cow:1, len:KEYLENBITS;
+ trie_key_t chars[];
+} tkey_t;
+
+/*! \brief A trie node is a pair of words.
+ *
+ * Each word is type-punned, depending on whether this is a branch
+ * node or a leaf node. We'll define some accessor functions to wrap
+ * this up into something reasonably safe.
+ *
+ * We aren't using a union to avoid problems with strict aliasing, and
+ * we aren't using bitfields because we want to control exactly which
+ * bits in the word are used by each field (in particular the flags).
+ *
+ * Branch nodes are never allocated individually: they are always part
+ * of either the root node or the twigs array of their parent branch.
+ *
+ * In a branch:
+ *
+ * `i` contains flags, bitmap, and index, explained in more detail below.
+ *
+ * `p` is a pointer to the "twigs", an array of child nodes.
+ *
+ * In a leaf:
+ *
+ * `i` is cast from a pointer to a tkey_t, with flags in the bottom bits.
+ *
+ * `p` is a trie_val_t.
+ */
+typedef struct node {
+ word i;
+ void *p;
+} node_t;
+
+struct trie {
+ node_t root; // undefined when weight == 0, see empty_root()
+ size_t weight;
+ knot_mm_t mm;
+};
+
+/*! \brief size (in bits) of nibble (half-byte) indexes into keys
+ *
+ * The bottom bit is clear for the upper nibble, and set for the lower
+ * nibble, big-endian style, since the tree has to be in lexicographic
+ * order. The index increases from one branch node to the next as you
+ * go deeper into the trie. All the keys below a branch are identical
+ * up to the nibble identified by the branch.
+ *
+ * (see also tkey_t.len above)
+ */
+#define TWIDTH_INDEX 33
+
+/*! \brief exclusive limit on indexes */
+#define TMAX_INDEX (BIG1 << TWIDTH_INDEX)
+
+/*! \brief size (in bits) of branch bitmap
+ *
+ * The bitmap indicates which subtries are present. The present child
+ * nodes are stored in the twigs array (with no holes between them).
+ *
+ * To simplify storing keys that are prefixes of each other, the
+ * end-of-string position is treated as an extra nibble value, ordered
+ * before all others. So there are 16 possible real nibble values,
+ * plus one value for nibbles past the end of the key.
+ */
+#define TWIDTH_BMP 17
+
+/*
+ * We're constructing the layout of the branch `i` field in a careful
+ * way to avoid mistakes, getting the compiler to calculate values
+ * rather than typing them in by hand.
+ */
+enum {
+ TSHIFT_BRANCH = 0,
+ TSHIFT_COW,
+ TSHIFT_BMP,
+ TOP_BMP = TSHIFT_BMP + TWIDTH_BMP,
+ TSHIFT_INDEX = TOP_BMP,
+ TOP_INDEX = TSHIFT_INDEX + TWIDTH_INDEX,
+};
+
+typedef char static_assert_fields_fit_in_word
+ [TOP_INDEX <= sizeof(word) * CHAR_BIT ? 1 : -1];
+
+typedef char static_assert_bmp_fits
+ [TOP_BMP <= sizeof(bitmap_t) * CHAR_BIT ? 1 : -1];
+
+#define BIG1 ((word)1)
+#define TMASK(width, shift) (((BIG1 << (width)) - BIG1) << (shift))
+
+/*! \brief is this node a branch or a leaf? */
+#define TFLAG_BRANCH (BIG1 << TSHIFT_BRANCH)
+
+/*! \brief copy-on-write flag, used in both leaves and branches */
+#define TFLAG_COW (BIG1 << TSHIFT_COW)
+
+/*! \brief for extracting pointer to key */
+#define TMASK_LEAF (~(word)(TFLAG_BRANCH | TFLAG_COW))
+
+/*! \brief mask for extracting nibble index */
+#define TMASK_INDEX TMASK(TWIDTH_INDEX, TSHIFT_INDEX)
+
+/*! \brief mask for extracting bitmap */
+#define TMASK_BMP TMASK(TWIDTH_BMP, TSHIFT_BMP)
+
+/*! \brief bitmap entry for NOBYTE */
+#define BMP_NOBYTE (BIG1 << TSHIFT_BMP)
+
+/*! \brief Initialize a new leaf, copying the key, and returning failure code. */
+static int mkleaf(node_t *leaf, const trie_key_t *key, uint32_t len, knot_mm_t *mm)
+{
+ if (unlikely((word)len > (BIG1 << KEYLENBITS)))
+ return KNOT_ENOMEM;
+ tkey_t *lkey = mm_alloc(mm, sizeof(tkey_t) + len);
+ if (unlikely(!lkey))
+ return KNOT_ENOMEM;
+ lkey->cow = 0;
+ lkey->len = len;
+ memcpy(lkey->chars, key, len);
+ word i = (uintptr_t)lkey;
+ assert((i & TFLAG_BRANCH) == 0);
+ *leaf = (node_t){ .i = i, .p = NULL };
+ return KNOT_EOK;
+}
+
+/*! \brief construct a branch node */
+static node_t mkbranch(index_t index, bitmap_t bmp, node_t *twigs)
+{
+ word i = TFLAG_BRANCH | bmp
+ | (index << TSHIFT_INDEX);
+ assert(index < TMAX_INDEX);
+ assert((bmp & ~TMASK_BMP) == 0);
+ return (node_t){ .i = i, .p = twigs };
+}
+
+/*! \brief Make an empty root node. */
+static node_t empty_root(void)
+{
+ return mkbranch(TMAX_INDEX-1, 0, NULL);
+}
+
+/*! \brief Propagate error codes. */
+#define ERR_RETURN(x) \
+ do { \
+ int err_code_ = x; \
+ if (unlikely(err_code_ != KNOT_EOK)) \
+ return err_code_; \
+ } while (false)
+
+
+/*! \brief Test flags to determine type of this node. */
+static bool isbranch(const node_t *t)
+{
+ return t->i & TFLAG_BRANCH;
+}
+
+static tkey_t *tkey(const node_t *t)
+{
+ assert(!isbranch(t));
+ return (tkey_t *)(uintptr_t)(t->i & TMASK_LEAF);
+}
+
+static trie_val_t *tvalp(node_t *t)
+{
+ assert(!isbranch(t));
+ return &t->p;
+}
+
+/*! \brief Given a branch node, return the index of the corresponding nibble in the key. */
+static index_t branch_index(const node_t *t)
+{
+ assert(isbranch(t));
+ return (t->i & TMASK_INDEX) >> TSHIFT_INDEX;
+}
+
+static bitmap_t branch_bmp(const node_t *t)
+{
+ assert(isbranch(t));
+ return (t->i & TMASK_BMP);
+}
+
+/*!
+ * \brief Count the number of set bits.
+ *
+ * \TODO This implementation may be relatively slow on some HW.
+ */
+static uint branch_weight(const node_t *t)
+{
+ assert(isbranch(t));
+ uint n = __builtin_popcount(t->i & TMASK_BMP);
+ assert(n > 1 && n <= TWIDTH_BMP);
+ return n;
+}
+
+/*! \brief Compute offset of an existing child in a branch node. */
+static uint twigoff(const node_t *t, bitmap_t bit)
+{
+ assert(isbranch(t));
+ assert(__builtin_popcount(bit) == 1);
+ return __builtin_popcount(t->i & TMASK_BMP & (bit - 1));
+}
+
+/*! \brief Extract a nibble from a key and turn it into a bitmask. */
+static bitmap_t keybit(index_t ni, const trie_key_t *key, uint32_t len)
+{
+ index_t bytei = ni >> 1;
+
+ if (bytei >= len)
+ return BMP_NOBYTE;
+
+ uint8_t ki = (uint8_t)key[bytei];
+ uint nibble = (ni & 1) ? (ki & 0xf) : (ki >> 4);
+
+ // skip one for NOBYTE nibbles after the end of the key
+ return BIG1 << (nibble + 1 + TSHIFT_BMP);
+}
+
+/*! \brief Extract a nibble from a key and turn it into a bitmask. */
+static bitmap_t twigbit(const node_t *t, const trie_key_t *key, uint32_t len)
+{
+ assert(isbranch(t));
+ return keybit(branch_index(t), key, len);
+}
+
+/*! \brief Test if a branch node has a child indicated by a bitmask. */
+static bool hastwig(const node_t *t, bitmap_t bit)
+{
+ assert(isbranch(t));
+ assert((bit & ~TMASK_BMP) == 0);
+ assert(__builtin_popcount(bit) == 1);
+ return t->i & bit;
+}
+
+/*! \brief Get pointer to packed array of child nodes. */
+static node_t* twigs(node_t *t)
+{
+ assert(isbranch(t));
+ return t->p;
+}
+
+/*! \brief Get pointer to a particular child of a branch node. */
+static node_t* twig(node_t *t, uint i)
+{
+ assert(i < branch_weight(t));
+ return twigs(t) + i;
+}
+
+/*! \brief Get twig number of a child node TODO: better description. */
+static uint twig_number(node_t *child, node_t *parent)
+{
+ // twig array index using pointer arithmetic
+ ptrdiff_t num = child - twigs(parent);
+ assert(num >= 0 && num < branch_weight(parent));
+ return (uint)num;
+}
+
+/*! \brief Simple string comparator. */
+static int key_cmp(const trie_key_t *k1, uint32_t k1_len,
+ const trie_key_t *k2, uint32_t k2_len)
+{
+ int ret = memcmp(k1, k2, MIN(k1_len, k2_len));
+ if (ret != 0) {
+ return ret;
+ }
+
+ /* Key string is equal, compare lengths. */
+ if (k1_len == k2_len) {
+ return 0;
+ } else if (k1_len < k2_len) {
+ return -1;
+ } else {
+ return 1;
+ }
+}
+
+trie_t* trie_create(knot_mm_t *mm)
+{
+ trie_t *trie = mm_alloc(mm, sizeof(trie_t));
+ if (trie != NULL) {
+ trie->root = empty_root();
+ trie->weight = 0;
+ if (mm != NULL)
+ trie->mm = *mm;
+ else
+ mm_ctx_init(&trie->mm);
+ }
+ return trie;
+}
+
+/*! \brief Free anything under the trie node, except for the passed pointer itself. */
+static void clear_trie(node_t *trie, knot_mm_t *mm)
+{
+ if (!isbranch(trie)) {
+ mm_free(mm, tkey(trie));
+ } else {
+ uint n = branch_weight(trie);
+ for (uint i = 0; i < n; ++i)
+ clear_trie(twig(trie, i), mm);
+ mm_free(mm, twigs(trie));
+ }
+}
+
+void trie_free(trie_t *tbl)
+{
+ if (tbl == NULL)
+ return;
+ if (tbl->weight)
+ clear_trie(&tbl->root, &tbl->mm);
+ mm_free(&tbl->mm, tbl);
+}
+
+void trie_clear(trie_t *tbl)
+{
+ assert(tbl);
+ if (!tbl->weight)
+ return;
+ clear_trie(&tbl->root, &tbl->mm);
+ tbl->root = empty_root();
+ tbl->weight = 0;
+}
+
+static bool dup_trie(node_t *copy, const node_t *orig, trie_dup_cb dup_cb, knot_mm_t *mm)
+{
+ if (isbranch(orig)) {
+ uint n = branch_weight(orig);
+ node_t *cotw = mm_alloc(mm, n * sizeof(*cotw));
+ if (cotw == NULL) {
+ return NULL;
+ }
+ const node_t *ortw = twigs((node_t *)orig);
+ for (uint i = 0; i < n; ++i) {
+ if (!dup_trie(cotw + i, ortw + i, dup_cb, mm)) {
+ while (i-- > 0) {
+ clear_trie(cotw + i, mm);
+ }
+ mm_free(mm, cotw);
+ return false;
+ }
+ }
+ *copy = mkbranch(branch_index(orig), branch_bmp(orig), cotw);
+ } else {
+ tkey_t *key = tkey(orig);
+ if (mkleaf(copy, key->chars, key->len, mm) != KNOT_EOK) {
+ return false;
+ }
+ if ((copy->p = dup_cb(orig->p, mm)) == NULL) {
+ mm_free(mm, tkey(copy));
+ return false;
+ }
+ }
+ return true;
+}
+
+trie_t* trie_dup(const trie_t *orig, trie_dup_cb dup_cb, knot_mm_t *mm)
+{
+ if (orig == NULL) {
+ return NULL;
+ }
+ trie_t *copy = mm_alloc(mm, sizeof(*copy));
+ if (copy == NULL) {
+ return NULL;
+ }
+ copy->weight = orig->weight;
+ if (mm != NULL) {
+ copy->mm = *mm;
+ } else {
+ mm_ctx_init(&copy->mm);
+ }
+ if (copy->weight) {
+ if (!dup_trie(&copy->root, &orig->root, dup_cb, mm)) {
+ mm_free(mm, copy);
+ return NULL;
+ }
+ }
+ return copy;
+}
+
+size_t trie_weight(const trie_t *tbl)
+{
+ assert(tbl);
+ return tbl->weight;
+}
+
+trie_val_t* trie_get_try(trie_t *tbl, const trie_key_t *key, uint32_t len)
+{
+ assert(tbl);
+ if (!tbl->weight)
+ return NULL;
+ node_t *t = &tbl->root;
+ while (isbranch(t)) {
+ __builtin_prefetch(twigs(t));
+ bitmap_t b = twigbit(t, key, len);
+ if (!hastwig(t, b))
+ return NULL;
+ t = twig(t, twigoff(t, b));
+ }
+ tkey_t *lkey = tkey(t);
+ if (key_cmp(key, len, lkey->chars, lkey->len) != 0)
+ return NULL;
+ return tvalp(t);
+}
+
+/* Optimization: the approach isn't ideal, as e.g. walking through the prefix
+ * is duplicated and we explicitly construct the wildcard key. Still, it's close
+ * to optimum which would be significantly more complicated and error-prone to write. */
+trie_val_t* trie_get_try_wildcard(trie_t *tbl, const trie_key_t *key, uint32_t len)
+{
+ assert(tbl);
+ if (!tbl->weight)
+ return NULL;
+ // Find leaf sharing the longest common prefix; see ns_find_branch() for explanation.
+ node_t *t = &tbl->root;
+ while (isbranch(t)) {
+ __builtin_prefetch(twigs(t));
+ bitmap_t b = twigbit(t, key, len);
+ uint i = hastwig(t, b) ? twigoff(t, b) : 0;
+ t = twig(t, i);
+ }
+ const tkey_t * const lcp_key = tkey(t);
+
+ // Find the last matching zero byte or -1 (source of synthesis)
+ int i_lmz = -1;
+ for (int i = 0; i < len && i < lcp_key->len && key[i] == lcp_key->chars[i]; ++i) {
+ if (key[i] == '\0' && i < len - 1) // do not count the terminating zero
+ i_lmz = i;
+ // Shortcut: we may have found an exact match.
+ if (i == len - 1 && len == lcp_key->len)
+ return tvalp(t);
+ }
+ if (len == 0) // The empty name needs separate handling.
+ return lcp_key->len == 0 ? tvalp(t) : NULL;
+
+ // Construct the key of the wildcard we need and look it up.
+ const int wild_len = i_lmz + 3;
+ uint8_t wild_key[wild_len];
+ memcpy(wild_key, key, wild_len - 2);
+ wild_key[wild_len - 2] = '*';
+ wild_key[wild_len - 1] = '\0'; // LF is always 0-terminated ATM
+ return trie_get_try(tbl, wild_key, wild_len);
+}
+
+/*! \brief Delete leaf t with parent p; b is the bit for t under p.
+ * Optionally return the deleted value via val. The function can't fail. */
+static void del_found(trie_t *tbl, node_t *t, node_t *p, bitmap_t b, trie_val_t *val)
+{
+ assert(!tkey(t)->cow);
+ mm_free(&tbl->mm, tkey(t));
+ if (val != NULL)
+ *val = *tvalp(t); // we return trie_val_t directly when deleting
+ --tbl->weight;
+ if (unlikely(!p)) { // whole trie was a single leaf
+ assert(tbl->weight == 0);
+ tbl->root = empty_root();
+ return;
+ }
+ // remove leaf t as child of p
+ node_t *tp = twigs(p);
+ uint ci = twig_number(t, p);
+ uint cc = branch_weight(p); // child count
+
+ if (cc == 2) {
+ // collapse binary node p: move the other child to the parent
+ *p = tp[1 - ci];
+ mm_free(&tbl->mm, tp);
+ return;
+ }
+ memmove(tp + ci, tp + ci + 1, sizeof(node_t) * (cc - ci - 1));
+ p->i &= ~b;
+ node_t *newt = mm_realloc(&tbl->mm, tp, sizeof(node_t) * (cc - 1),
+ sizeof(node_t) * cc);
+ if (likely(newt != NULL))
+ p->p = newt;
+ // We can ignore mm_realloc failure because an oversized twig
+ // array is OK - only beware that next time the prev_size
+ // passed to mm_realloc will not be correct; TODO?
+}
+
+int trie_del(trie_t *tbl, const trie_key_t *key, uint32_t len, trie_val_t *val)
+{
+ assert(tbl);
+ if (!tbl->weight)
+ return KNOT_ENOENT;
+ node_t *t = &tbl->root; // current and parent node
+ node_t *p = NULL;
+ bitmap_t b = 0;
+ while (isbranch(t)) {
+ __builtin_prefetch(twigs(t));
+ b = twigbit(t, key, len);
+ if (!hastwig(t, b))
+ return KNOT_ENOENT;
+ p = t;
+ t = twig(t, twigoff(t, b));
+ }
+ tkey_t *lkey = tkey(t);
+ if (key_cmp(key, len, lkey->chars, lkey->len) != 0)
+ return KNOT_ENOENT;
+ del_found(tbl, t, p, b, val);
+ return KNOT_EOK;
+}
+
+/*!
+ * \brief Stack of nodes, storing a path down a trie.
+ *
+ * The structure also serves directly as the public trie_it_t type,
+ * in which case it always points to the current leaf, unless we've finished
+ * (i.e. it->len == 0).
+ * stack[0] is always a valid pointer to the root -> ns_gettrie()
+ */
+typedef struct trie_it {
+ node_t* *stack; /*!< The stack; malloc is used directly instead of mm. */
+ uint32_t len; /*!< Current length of the stack. */
+ uint32_t alen; /*!< Allocated/available length of the stack. */
+ /*! \brief Initial storage for \a stack; it should fit in most use cases. */
+ node_t* stack_init[250];
+} nstack_t;
+
+/*! \brief Create a node stack containing just the root (or empty). */
+static void ns_init(nstack_t *ns, trie_t *tbl)
+{
+ assert(tbl);
+ ns->stack = ns->stack_init;
+ ns->alen = sizeof(ns->stack_init) / sizeof(ns->stack_init[0]);
+ ns->stack[0] = &tbl->root;
+ ns->len = (tbl->weight > 0);
+}
+
+static inline trie_t * ns_gettrie(nstack_t *ns)
+{
+ assert(ns && ns->stack && ns->stack[0]);
+ return (struct trie *)ns->stack[0];
+}
+
+/*! \brief Free inside of the stack, i.e. not the passed pointer itself. */
+static void ns_cleanup(nstack_t *ns)
+{
+ assert(ns && ns->stack);
+ if (likely(ns->stack == ns->stack_init))
+ return;
+ free(ns->stack);
+ #ifndef NDEBUG
+ ns->stack = NULL;
+ ns->alen = 0;
+ #endif
+}
+
+/*! \brief Allocate more space for the stack. */
+static int ns_longer_alloc(nstack_t *ns)
+{
+ ns->alen *= 2;
+ size_t new_size = ns->alen * sizeof(node_t *);
+ node_t **st;
+ if (ns->stack == ns->stack_init) {
+ st = malloc(new_size);
+ if (st != NULL)
+ memcpy(st, ns->stack, ns->len * sizeof(node_t *));
+ } else {
+ st = realloc(ns->stack, new_size);
+ }
+ if (st == NULL)
+ return KNOT_ENOMEM;
+ ns->stack = st;
+ return KNOT_EOK;
+}
+
+/*! \brief Ensure the node stack can be extended by one. */
+static inline int ns_longer(nstack_t *ns)
+{
+ // get a longer stack if needed
+ if (likely(ns->len < ns->alen))
+ return KNOT_EOK;
+ return ns_longer_alloc(ns); // hand-split the part suitable for inlining
+}
+
+/*!
+ * \brief Find the "branching point" as if searching for a key.
+ *
+ * The whole path to the point is kept on the passed stack;
+ * always at least the root will remain on the top of it.
+ * Beware: the precise semantics of this function is rather tricky.
+ * The top of the stack will contain: the corresponding leaf if exact
+ * match is found; or the immediate node below a
+ * branching-point-on-edge or the branching-point itself.
+ *
+ * \param idiff Set the index of first differing nibble, or TMAX_INDEX for an exact match
+ * \param tbit Set the bit of the closest leaf's nibble at index idiff
+ * \param kbit Set the bit of the key's nibble at index idiff
+ *
+ * \return KNOT_EOK or KNOT_ENOMEM.
+ */
+static int ns_find_branch(nstack_t *ns, const trie_key_t *key, uint32_t len,
+ index_t *idiff, bitmap_t *tbit, bitmap_t *kbit)
+{
+ assert(ns && ns->len && idiff);
+ // First find some leaf with longest matching prefix.
+ while (isbranch(ns->stack[ns->len - 1])) {
+ ERR_RETURN(ns_longer(ns));
+ node_t *t = ns->stack[ns->len - 1];
+ __builtin_prefetch(twigs(t));
+ bitmap_t b = twigbit(t, key, len);
+ // Even if our key is missing from this branch we need to
+ // keep iterating down to a leaf. It doesn't matter which
+ // twig we choose since the keys are all the same up to this
+ // index. Note that blindly using twigoff(t, b) can cause
+ // an out-of-bounds index if it equals twigmax(t).
+ uint i = hastwig(t, b) ? twigoff(t, b) : 0;
+ ns->stack[ns->len++] = twig(t, i);
+ }
+ tkey_t *lkey = tkey(ns->stack[ns->len-1]);
+ // Find index of the first char that differs.
+ size_t bytei = 0;
+ uint32_t klen = lkey->len;
+ for (bytei = 0; bytei < MIN(len,klen); bytei++) {
+ if (key[bytei] != lkey->chars[bytei])
+ break;
+ }
+ // Find which half-byte has matched.
+ index_t index = bytei << 1;
+ if (bytei == len && len == lkey->len) { // found equivalent key
+ index = TMAX_INDEX;
+ goto success;
+ }
+ if (likely(bytei < MIN(len,klen))) {
+ uint8_t k2 = (uint8_t)lkey->chars[bytei];
+ uint8_t k1 = (uint8_t)key[bytei];
+ if (((k1 ^ k2) & 0xf0) == 0)
+ index += 1;
+ }
+ // now go up the trie from the current leaf
+ node_t *t;
+ do {
+ if (unlikely(ns->len == 1))
+ goto success; // only the root stays on the stack
+ t = ns->stack[ns->len - 2];
+ if (branch_index(t) < index)
+ goto success;
+ --ns->len;
+ } while (true);
+success:
+ #ifndef NDEBUG // invariants on successful return
+ assert(ns->len);
+ if (isbranch(ns->stack[ns->len - 1])) {
+ t = ns->stack[ns->len - 1];
+ assert(branch_index(t) >= index);
+ }
+ if (ns->len > 1) {
+ t = ns->stack[ns->len - 2];
+ assert(branch_index(t) < index || index == TMAX_INDEX);
+ }
+ #endif
+ *idiff = index;
+ *tbit = keybit(index, lkey->chars, lkey->len);
+ *kbit = keybit(index, key, len);
+ return KNOT_EOK;
+}
+
+/*!
+ * \brief Advance the node stack to the last leaf in the subtree.
+ *
+ * \return KNOT_EOK or KNOT_ENOMEM.
+ */
+static int ns_last_leaf(nstack_t *ns)
+{
+ assert(ns);
+ do {
+ ERR_RETURN(ns_longer(ns));
+ node_t *t = ns->stack[ns->len - 1];
+ if (!isbranch(t))
+ return KNOT_EOK;
+ uint lasti = branch_weight(t) - 1;
+ ns->stack[ns->len++] = twig(t, lasti);
+ } while (true);
+}
+
+/*!
+ * \brief Advance the node stack to the first leaf in the subtree.
+ *
+ * \return KNOT_EOK or KNOT_ENOMEM.
+ */
+static int ns_first_leaf(nstack_t *ns)
+{
+ assert(ns && ns->len);
+ do {
+ ERR_RETURN(ns_longer(ns));
+ node_t *t = ns->stack[ns->len - 1];
+ if (!isbranch(t))
+ return KNOT_EOK;
+ ns->stack[ns->len++] = twig(t, 0);
+ } while (true);
+}
+
+/*!
+ * \brief Advance the node stack to the leaf that is previous to the current node.
+ *
+ * \note Prefix leaf under the current node DOES count (if present; perhaps questionable).
+ * \return KNOT_EOK on success, KNOT_ENOENT on not-found, or possibly KNOT_ENOMEM.
+ */
+static int ns_prev_leaf(nstack_t *ns)
+{
+ assert(ns && ns->len > 0);
+
+ node_t *t = ns->stack[ns->len - 1];
+ // Beware: BMP_NOBYTE child is ordered *before* its parent.
+ if (isbranch(t) && hastwig(t, BMP_NOBYTE)) {
+ ERR_RETURN(ns_longer(ns));
+ ns->stack[ns->len++] = twig(t, 0);
+ return KNOT_EOK;
+ }
+
+ for (; ns->len >= 2; --ns->len) {
+ t = ns->stack[ns->len - 1];
+ node_t *p = ns->stack[ns->len - 2];
+ uint ci = twig_number(t, p);
+ if (ci == 0) // we've got to go up again
+ continue;
+ // t isn't the first child -> go down the previous one
+ ns->stack[ns->len - 1] = twig(p, ci - 1);
+ return ns_last_leaf(ns);
+ }
+ return KNOT_ENOENT; // root without empty key has no previous leaf
+}
+
+/*!
+ * \brief Advance the node stack to the leaf that is successor to the current node.
+ *
+ * \param skip_prefixed skip any nodes whose key is a prefix of the current one.
+ * If false, prefix leaf or anything else under the current node DOES count.
+ * \return KNOT_EOK on success, KNOT_ENOENT on not-found, or possibly KNOT_ENOMEM.
+ */
+static int ns_next_leaf(nstack_t *ns, const bool skip_pefixed)
+{
+ assert(ns && ns->len > 0);
+
+ node_t *t = ns->stack[ns->len - 1];
+ if (!skip_pefixed && isbranch(t))
+ return ns_first_leaf(ns);
+ for (; ns->len >= 2; --ns->len) {
+ t = ns->stack[ns->len - 1];
+ node_t *p = ns->stack[ns->len - 2];
+ uint ci = twig_number(t, p);
+ if (skip_pefixed && ci == 0 && hastwig(t, BMP_NOBYTE)) {
+ // Keys in the subtree of p are suffixes of the key of t,
+ // so we've got to go one level higher
+ // (this can't happen more than once)
+ continue;
+ }
+ uint cc = branch_weight(p);
+ assert(ci + 1 <= cc);
+ if (ci + 1 == cc) {
+ // t is the last child of p, so we need to keep climbing
+ continue;
+ }
+ // go down the next child of p
+ ns->stack[ns->len - 1] = twig(p, ci + 1);
+ return ns_first_leaf(ns);
+ }
+ return KNOT_ENOENT; // not found, as no more parent is available
+}
+
+/*! \brief Advance the node stack to leaf with longest prefix of the current key. */
+static int ns_prefix(nstack_t *ns)
+{
+ assert(ns && ns->len > 0);
+ const node_t *start = ns->stack[ns->len - 1];
+ // Walk up the trie until we find a BMP_NOBYTE child.
+ while (--ns->len > 0) {
+ node_t *p = ns->stack[ns->len - 1];
+ if (!hastwig(p, BMP_NOBYTE))
+ continue;
+ node_t *end = twig(p, 0);
+ // In case we started in a BMP_NOBYTE leaf, the first step up
+ // did NOT shorten the key and we would get back into the same
+ // node again.
+ if (end == start)
+ continue;
+ ns->stack[ns->len++] = end;
+ return KNOT_EOK;
+ }
+ return KNOT_ENOENT; // not found, as no more parent is available
+}
+
+/*! \brief less-or-equal search.
+ *
+ * \return KNOT_EOK for exact match, 1 for previous, KNOT_ENOENT for not-found,
+ * or KNOT_E*.
+ */
+static int ns_get_leq(nstack_t *ns, const trie_key_t *key, uint32_t len)
+{
+ // First find the key with longest-matching prefix
+ index_t idiff;
+ bitmap_t tbit, kbit;
+ ERR_RETURN(ns_find_branch(ns, key, len, &idiff, &tbit, &kbit));
+ node_t *t = ns->stack[ns->len - 1];
+ if (idiff == TMAX_INDEX) // found exact match
+ return KNOT_EOK;
+ // Get t: the last node on matching path
+ bitmap_t b;
+ if (isbranch(t) && branch_index(t) == idiff) {
+ // t is OK
+ b = kbit;
+ } else {
+ // the top of the stack was the first unmatched node -> step up
+ if (ns->len == 1) {
+ // root was unmatched already
+ if (kbit < tbit)
+ return KNOT_ENOENT;
+ ERR_RETURN(ns_last_leaf(ns));
+ return 1;
+ }
+ --ns->len;
+ t = ns->stack[ns->len - 1];
+ b = twigbit(t, key, len);
+ }
+ // Now we re-do the first "non-matching" step in the trie
+ // but try the previous child if key was less (it may not exist)
+ int i = hastwig(t, b)
+ ? (int)twigoff(t, b) - (kbit < tbit)
+ : (int)twigoff(t, b) - 1 /* twigoff returns successor when !hastwig */;
+ if (i >= 0) {
+ ERR_RETURN(ns_longer(ns));
+ ns->stack[ns->len++] = twig(t, i);
+ ERR_RETURN(ns_last_leaf(ns));
+ } else {
+ ERR_RETURN(ns_prev_leaf(ns));
+ }
+ return 1;
+}
+
+int trie_get_leq(trie_t *tbl, const trie_key_t *key, uint32_t len, trie_val_t **val)
+{
+ assert(tbl && val);
+ if (tbl->weight == 0) {
+ if (val) *val = NULL;
+ return KNOT_ENOENT;
+ }
+ // We try to do without malloc.
+ nstack_t ns_local;
+ ns_init(&ns_local, tbl);
+ nstack_t *ns = &ns_local;
+
+ int ret = ns_get_leq(ns, key, len);
+ if (ret == KNOT_EOK || ret == 1) {
+ assert(!isbranch(ns->stack[ns->len - 1]));
+ if (val) *val = tvalp(ns->stack[ns->len - 1]);
+ } else {
+ if (val) *val = NULL;
+ }
+ ns_cleanup(ns);
+ return ret;
+}
+
+int trie_it_get_leq(trie_it_t *it, const trie_key_t *key, uint32_t len)
+{
+ assert(it && it->stack[0] && it->alen);
+ const trie_t *tbl = ns_gettrie(it);
+ if (tbl->weight == 0) {
+ it->len = 0;
+ return KNOT_ENOENT;
+ }
+ it->len = 1;
+ int ret = ns_get_leq(it, key, len);
+ if (ret == KNOT_EOK || ret == 1) {
+ assert(trie_it_key(it, NULL));
+ } else {
+ it->len = 0;
+ }
+ return ret;
+}
+
+/* see below */
+static int cow_pushdown(trie_cow_t *cow, nstack_t *ns);
+
+/*! \brief implementation of trie_get_ins() and trie_get_cow() */
+static trie_val_t* cow_get_ins(trie_cow_t *cow, trie_t *tbl,
+ const trie_key_t *key, uint32_t len)
+{
+ assert(tbl);
+ // First leaf in an empty tbl?
+ if (unlikely(!tbl->weight)) {
+ if (unlikely(mkleaf(&tbl->root, key, len, &tbl->mm)))
+ return NULL;
+ ++tbl->weight;
+ return tvalp(&tbl->root);
+ }
+ { // Intentionally un-indented; until end of function, to bound cleanup attr.
+ // Find the branching-point
+ __attribute__((cleanup(ns_cleanup)))
+ nstack_t ns_local;
+ ns_init(&ns_local, tbl);
+ nstack_t *ns = &ns_local;
+ index_t idiff;
+ bitmap_t tbit, kbit;
+ if (unlikely(ns_find_branch(ns, key, len, &idiff, &tbit, &kbit)))
+ return NULL;
+ if (unlikely(cow && cow_pushdown(cow, ns) != KNOT_EOK))
+ return NULL;
+ node_t *t = ns->stack[ns->len - 1];
+ if (idiff == TMAX_INDEX) // the same key was already present
+ return tvalp(t);
+ node_t leaf, *leafp;
+ if (unlikely(mkleaf(&leaf, key, len, &tbl->mm)))
+ return NULL;
+
+ if (isbranch(t) && branch_index(t) == idiff) {
+ // The node t needs a new leaf child.
+ assert(!hastwig(t, kbit));
+ // new child position and original child count
+ uint s = twigoff(t, kbit);
+ uint m = branch_weight(t);
+ node_t *nt = mm_realloc(&tbl->mm, twigs(t),
+ sizeof(node_t) * (m + 1), sizeof(node_t) * m);
+ if (unlikely(!nt))
+ goto err_leaf;
+ memmove(nt + s + 1, nt + s, sizeof(node_t) * (m - s));
+ leafp = nt + s;
+ *t = mkbranch(idiff, branch_bmp(t) | kbit, nt);
+ } else {
+ // We need to insert a new binary branch with leaf at *t.
+ // Note: it works the same for the case where we insert above root t.
+ #ifndef NDEBUG
+ if (ns->len > 1) {
+ node_t *pt = ns->stack[ns->len - 2];
+ assert(hastwig(pt, twigbit(pt, key, len)));
+ }
+ #endif
+ node_t *nt = mm_alloc(&tbl->mm, sizeof(node_t) * 2);
+ if (unlikely(!nt))
+ goto err_leaf;
+ node_t t2 = *t; // Save before overwriting t.
+ *t = mkbranch(idiff, tbit | kbit, nt);
+ *twig(t, twigoff(t, tbit)) = t2;
+ leafp = twig(t, twigoff(t, kbit));
+ };
+ *leafp = leaf;
+ ++tbl->weight;
+ return tvalp(leafp);
+err_leaf:
+ mm_free(&tbl->mm, tkey(&leaf));
+ return NULL;
+ }
+}
+
+trie_val_t* trie_get_ins(trie_t *tbl, const trie_key_t *key, uint32_t len)
+{
+ return cow_get_ins(NULL, tbl, key, len);
+}
+
+/*! \brief Apply a function to every trie_val_t*, in order; a recursive solution. */
+static int apply_nodes(node_t *t, int (*f)(trie_val_t *, void *), void *d)
+{
+ assert(t);
+ if (!isbranch(t))
+ return f(tvalp(t), d);
+ uint n = branch_weight(t);
+ for (uint i = 0; i < n; ++i)
+ ERR_RETURN(apply_nodes(twig(t, i), f, d));
+ return KNOT_EOK;
+}
+
+int trie_apply(trie_t *tbl, int (*f)(trie_val_t *, void *), void *d)
+{
+ assert(tbl && f);
+ if (!tbl->weight)
+ return KNOT_EOK;
+ return apply_nodes(&tbl->root, f, d);
+}
+
+/* These are all thin wrappers around static Tns* functions. */
+trie_it_t* trie_it_begin(trie_t *tbl)
+{
+ assert(tbl);
+ trie_it_t *it = malloc(sizeof(nstack_t));
+ if (!it)
+ return NULL;
+ ns_init(it, tbl);
+ if (it->len == 0) // empty tbl
+ return it;
+ if (ns_first_leaf(it)) {
+ ns_cleanup(it);
+ free(it);
+ return NULL;
+ }
+ return it;
+}
+
+bool trie_it_finished(trie_it_t *it)
+{
+ assert(it);
+ return it->len == 0;
+}
+
+void trie_it_free(trie_it_t *it)
+{
+ if (!it)
+ return;
+ ns_cleanup(it);
+ free(it);
+}
+
+trie_it_t *trie_it_clone(const trie_it_t *it)
+{
+ if (!it) // TODO: or should that be an assertion?
+ return NULL;
+ trie_it_t *it2 = malloc(sizeof(nstack_t));
+ if (!it2)
+ return NULL;
+ it2->len = it->len;
+ it2->alen = it->alen; // we _might_ change it in the rare malloc case, but...
+ if (likely(it->stack == it->stack_init)) {
+ it2->stack = it2->stack_init;
+ assert(it->alen == sizeof(it->stack_init) / sizeof(it->stack_init[0]));
+ } else {
+ it2->stack = malloc(it2->alen * sizeof(it2->stack[0]));
+ if (!it2->stack) {
+ free(it2);
+ return NULL;
+ }
+ }
+ memcpy(it2->stack, it->stack, it->len * sizeof(it->stack[0]));
+ return it2;
+}
+
+const trie_key_t* trie_it_key(trie_it_t *it, size_t *len)
+{
+ assert(it && it->len);
+ node_t *t = it->stack[it->len - 1];
+ assert(!isbranch(t));
+ tkey_t *key = tkey(t);
+ if (len)
+ *len = key->len;
+ return key->chars;
+}
+
+trie_val_t* trie_it_val(trie_it_t *it)
+{
+ assert(it && it->len);
+ node_t *t = it->stack[it->len - 1];
+ assert(!isbranch(t));
+ return tvalp(t);
+}
+
+void trie_it_next(trie_it_t *it)
+{
+ assert(it && it->len);
+ if (ns_next_leaf(it, false) != KNOT_EOK)
+ it->len = 0;
+}
+
+void trie_it_next_loop(trie_it_t *it)
+{
+ assert(it && it->len);
+ int ret = ns_next_leaf(it, false);
+ if (ret == KNOT_ENOENT) {
+ it->len = 1;
+ ret = ns_first_leaf(it);
+ }
+ if (ret)
+ it->len = 0;
+}
+
+void trie_it_next_nosuffix(trie_it_t *it)
+{
+ assert(it && it->len);
+ if (ns_next_leaf(it, true) != KNOT_EOK)
+ it->len = 0;
+}
+
+void trie_it_prev(trie_it_t *it)
+{
+ assert(it && it->len);
+ if (ns_prev_leaf(it) != KNOT_EOK)
+ it->len = 0;
+}
+
+void trie_it_prev_loop(trie_it_t *it)
+{
+ assert(it && it->len);
+ int ret = ns_prev_leaf(it);
+ if (ret == KNOT_ENOENT) {
+ it->len = 1;
+ ret = ns_last_leaf(it);
+ }
+ if (ret)
+ it->len = 0;
+}
+
+void trie_it_parent(trie_it_t *it)
+{
+ assert(it && it->len);
+ if (ns_prefix(it))
+ it->len = 0;
+}
+
+void trie_it_del(trie_it_t *it)
+{
+ assert(it && it->len);
+ if (it->len == 0)
+ return;
+ node_t *t = it->stack[it->len - 1];
+ assert(!isbranch(t));
+ bitmap_t b; // del_found() needs to know which bit to zero in the bitmap
+ node_t *p;
+ if (it->len == 1) { // deleting the root
+ p = NULL;
+ b = 0; // unused
+ } else {
+ p = it->stack[it->len - 2];
+ assert(isbranch(p));
+ size_t len;
+ const trie_key_t *key = trie_it_key(it, &len);
+ b = twigbit(p, key, len);
+ }
+ // We could trie_it_{next,prev,...}(it) now, in case we wanted that semantics.
+ it->len = 0;
+ del_found(ns_gettrie(it), t, p, b, NULL);
+}
+
+
+/*!\file
+ *
+ * \section About copy-on-write
+ *
+ * In these notes I'll use the term "object" to refer to either the
+ * twig array of a branch, or the application's data that is referred
+ * to by a leaf's trie_val_t pointer. Note that for COW we don't care
+ * about trie node_t structs themselves, but the objects that they
+ * point to.
+ *
+ * \subsection COW states
+ *
+ * During a COW transaction an object can be in one of three states:
+ * shared, only in the old trie, or only in the new trie. When a
+ * transaction is rolled back, the only-new objects are freed; when a
+ * transaction is committed the new trie takes the place of the old
+ * one and only-old objects are freed.
+ *
+ * \subsection branch marks and regions
+ *
+ * A branch object can be marked by setting the COW flag in the first
+ * element of its twig array. Marked branches partition the trie into
+ * regions; an object's state depends on its region.
+ *
+ * The unmarked branch objects between a trie's root and the marked
+ * branches (excluding the marked branches themselves) is exclusively
+ * owned: either old-only (if you started from the old root) or
+ * new-only (if you started from the new root).
+ *
+ * Marked branch objects, and all objects reachable from marked branch
+ * objects, are in the shared region accessible from both old and new
+ * roots. All branch objects below a marked branch must be unmarked.
+ * (That is, there is at most one marked branch object on any path
+ * from the root of a trie.)
+ *
+ * Branch nodes in the new-only region can be modified in place, in
+ * the same way as an original qp trie. Branch nodes in the old-only
+ * or shared regions must not be modified.
+ *
+ * \subsection app object states
+ *
+ * The app objects reachable from the new-only and old-only regions
+ * explicitly record their state in a way determined by the
+ * application. (These app objects are reachable from the old and new
+ * roots by traversing only unmarked branch objects.)
+ *
+ * The app objects reachable from marked branch objects are implicitly
+ * shared, but their state field has an indeterminate value. If an app
+ * object was previously touched by a rolled-back transaction it may
+ * be marked shared or old-only; if it was previously touched by a
+ * committed transaction it may be marked shared or new-only.
+ *
+ * \subsection key states
+ *
+ * The memory allocated for tkey_t objects also needs to track its
+ * sharing state. They have a "cow" flag to mark when they are shared.
+ * Keys are relatively lazily copied (to make them exclusive) when
+ * their leaf node is touched by a COW mutation.
+ *
+ * [An alternative technique might be to copy them more eagerly, in
+ * cow_pushdown(), which would avoid the need for a flag bit at the
+ * cost of more allocator churn in a transaction.]
+ *
+ * \subsection outside COW
+ *
+ * When a COW transaction is not in progress, there are no marked
+ * branch objects, so everything is exclusively owned. When a COW
+ * transaction is finished (committed or rolled back), the branch
+ * marks are removed. Since they are in the shared region, this branch
+ * cleanup is visible to both old and new tries.
+ *
+ * However the state of app objects is not clean between COW
+ * transactions. When a COW transaction is committed, we traverse the
+ * old-only region to find old-only app objects that should be freed
+ * (and vice versa for rollback). In general, there will be app
+ * objects that are only reachable from the new-only region, and that
+ * have a mixture of shared and new states.
+ */
+
+/*! \brief Trie copy-on-write state */
+struct trie_cow {
+ trie_t *old;
+ trie_t *new;
+ trie_cb *mark_shared;
+ void *d;
+};
+
+/*! \brief is this a marked branch object */
+static bool cow_marked(node_t *t)
+{
+ return isbranch(t) && (twigs(t)->i & TFLAG_COW);
+}
+
+/*! \brief is this a leaf with a marked key */
+static bool cow_key(node_t *t)
+{
+ return !isbranch(t) && tkey(t)->cow;
+}
+
+/*! \brief remove mark from a branch object */
+static void clear_cow(node_t *t)
+{
+ assert(isbranch(t));
+ twigs(t)->i &= ~TFLAG_COW;
+}
+
+/*! \brief mark a node as shared
+ *
+ * For branches this marks the twig array (in COW terminology, the
+ * branch object); for leaves it uses the callback to mark the app
+ * object.
+ */
+static void mark_cow(trie_cow_t *cow, node_t *t)
+{
+ if (isbranch(t)) {
+ node_t *object = twigs(t);
+ object->i |= TFLAG_COW;
+ } else {
+ tkey_t *lkey = tkey(t);
+ lkey->cow = 1;
+ if (cow->mark_shared != NULL) {
+ trie_val_t *valp = tvalp(t);
+ cow->mark_shared(*valp, lkey->chars, lkey->len, cow->d);
+ }
+ }
+}
+
+/*! \brief push exclusive COW region down one node */
+static int cow_pushdown_one(trie_cow_t *cow, node_t *t)
+{
+ uint cc = branch_weight(t);
+ node_t *nt = mm_alloc(&cow->new->mm, sizeof(node_t) * cc);
+ if (nt == NULL)
+ return KNOT_ENOMEM;
+ /* mark all the children */
+ for (uint ci = 0; ci < cc; ++ci)
+ mark_cow(cow, twig(t, ci));
+ /* this node must be unmarked in both old and new versions */
+ clear_cow(t);
+ t->p = memcpy(nt, twigs(t), sizeof(node_t) * cc);
+ return KNOT_EOK;
+}
+
+/*! \brief push exclusive COW region to cover a whole node stack */
+static int cow_pushdown(trie_cow_t *cow, nstack_t *ns)
+{
+ node_t *new_twigs = NULL;
+ node_t *old_twigs = NULL;
+ for (uint i = 0; i < ns->len; i++) {
+ /* if we did a pushdown on the previous iteration, we
+ need to update this stack entry so it points into
+ the parent's new twigs instead of the old ones */
+ if (new_twigs != old_twigs)
+ ns->stack[i] = new_twigs + (ns->stack[i] - old_twigs);
+ if (cow_marked(ns->stack[i])) {
+ old_twigs = twigs(ns->stack[i]);
+ if (cow_pushdown_one(cow, ns->stack[i]))
+ return KNOT_ENOMEM;
+ new_twigs = twigs(ns->stack[i]);
+ } else {
+ new_twigs = NULL;
+ old_twigs = NULL;
+ /* ensure key is exclusively owned */
+ if (cow_key(ns->stack[i])) {
+ node_t oleaf = *ns->stack[i];
+ tkey_t *okey = tkey(&oleaf);
+ if(mkleaf(ns->stack[i], okey->chars, okey->len,
+ &cow->new->mm))
+ return KNOT_ENOMEM;
+ ns->stack[i]->p = oleaf.p;
+ okey->cow = 0;
+ }
+ }
+ }
+ return KNOT_EOK;
+}
+
+trie_cow_t* trie_cow(trie_t *old, trie_cb *mark_shared, void *d)
+{
+ knot_mm_t *mm = &old->mm;
+ trie_t *new = mm_alloc(mm, sizeof(trie_t));
+ trie_cow_t *cow = mm_alloc(mm, sizeof(trie_cow_t));
+ if (new == NULL || cow == NULL) {
+ mm_free(mm, new);
+ mm_free(mm, cow);
+ return NULL;
+ }
+ new->mm = old->mm;
+ new->root = old->root;
+ new->weight = old->weight;
+ cow->old = old;
+ cow->new = new;
+ cow->mark_shared = mark_shared;
+ cow->d = d;
+ if (old->weight)
+ mark_cow(cow, &old->root);
+ return cow;
+}
+
+trie_t* trie_cow_new(trie_cow_t *cow)
+{
+ assert(cow != NULL);
+ return cow->new;
+}
+
+trie_val_t* trie_get_cow(trie_cow_t *cow, const trie_key_t *key, uint32_t len)
+{
+ return cow_get_ins(cow, cow->new, key, len);
+}
+
+int trie_del_cow(trie_cow_t *cow, const trie_key_t *key, uint32_t len, trie_val_t *val)
+{
+ trie_t *tbl = cow->new;
+ if (unlikely(!tbl->weight))
+ return KNOT_ENOENT;
+ { // Intentionally un-indented; until end of function, to bound cleanup attr.
+ // Find the branching-point
+ __attribute__((cleanup(ns_cleanup)))
+ nstack_t ns_local;
+ ns_init(&ns_local, tbl);
+ nstack_t *ns = &ns_local;
+ index_t idiff;
+ bitmap_t tbit, kbit;
+ ERR_RETURN(ns_find_branch(ns, key, len, &idiff, &tbit, &kbit));
+ if (idiff != TMAX_INDEX)
+ return KNOT_ENOENT;
+ ERR_RETURN(cow_pushdown(cow, ns));
+ node_t *t = ns->stack[ns->len - 1];
+ node_t *p = ns->len >= 2 ? ns->stack[ns->len - 2] : NULL;
+ del_found(tbl, t, p, p ? twigbit(p, key, len) : 0, val);
+ }
+ return KNOT_EOK;
+}
+
+/*! \brief clean up after a COW transaction, recursively */
+static void cow_cleanup(trie_cow_t *cow, node_t *t, trie_cb *cb, void *d)
+{
+ if (cow_marked(t)) {
+ // we have hit the shared region, so just reset the mark
+ clear_cow(t);
+ return;
+ } else if (isbranch(t)) {
+ // traverse and free the exclusive region
+ uint cc = branch_weight(t);
+ for (uint ci = 0; ci < cc; ++ci)
+ cow_cleanup(cow, twig(t, ci), cb, d);
+ mm_free(&cow->new->mm, twigs(t));
+ return;
+ } else {
+ // application must decide how to clean up its values
+ tkey_t *lkey = tkey(t);
+ if (cb != NULL) {
+ trie_val_t *valp = tvalp(t);
+ cb(*valp, lkey->chars, lkey->len, d);
+ }
+ // clean up exclusively-owned keys
+ if (lkey->cow)
+ lkey->cow = 0;
+ else
+ mm_free(&cow->new->mm, lkey);
+ return;
+ }
+}
+
+trie_t* trie_cow_commit(trie_cow_t *cow, trie_cb *cb, void *d)
+{
+ trie_t *ret = cow->new;
+ if (cow->old->weight)
+ cow_cleanup(cow, &cow->old->root, cb, d);
+ mm_free(&ret->mm, cow->old);
+ mm_free(&ret->mm, cow);
+ return ret;
+}
+
+trie_t* trie_cow_rollback(trie_cow_t *cow, trie_cb *cb, void *d)
+{
+ trie_t *ret = cow->old;
+ if (cow->new->weight)
+ cow_cleanup(cow, &cow->new->root, cb, d);
+ mm_free(&ret->mm, cow->new);
+ mm_free(&ret->mm, cow);
+ return ret;
+}
diff --git a/src/contrib/qp-trie/trie.h b/src/contrib/qp-trie/trie.h
new file mode 100644
index 0000000..14fb1a2
--- /dev/null
+++ b/src/contrib/qp-trie/trie.h
@@ -0,0 +1,280 @@
+/* Copyright (C) 2019 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+ Copyright (C) 2018 Tony Finch <dot@dotat.at>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "libknot/mm_ctx.h"
+
+/*!
+ * \brief Native API of QP-tries:
+ *
+ * - keys are uint8_t strings, not necessarily zero-terminated,
+ * the structure copies the contents of the passed keys
+ * - values are void* pointers, typically you get an ephemeral pointer to it
+ * - key lengths are limited by 2^32-1 ATM
+ */
+
+/*! \brief Element value. */
+typedef void* trie_val_t;
+/*! \brief Key for indexing tries. Sign could be flipped easily. */
+typedef uint8_t trie_key_t;
+
+/*! \brief Opaque structure holding a QP-trie. */
+typedef struct trie trie_t;
+
+/*! \brief Opaque type for holding a QP-trie iterator. */
+typedef struct trie_it trie_it_t;
+
+/*! \brief Callback for cloning trie values. */
+typedef trie_val_t (*trie_dup_cb)(const trie_val_t val, knot_mm_t *mm);
+
+/*! \brief Callback for performing actions on a trie leaf
+ *
+ * Used during copy-on-write transactions
+ *
+ * \param val The value of the element to be altered
+ * \param key The key of the element to be altered
+ * \param len The length of key
+ * \param d Additional user data
+ */
+typedef void trie_cb(trie_val_t val, const trie_key_t *key, size_t len, void *d);
+
+/*! \brief Opaque type for holding the copy-on-write state for a QP-trie. */
+typedef struct trie_cow trie_cow_t;
+
+/*! \brief Create a trie instance. */
+trie_t* trie_create(knot_mm_t *mm);
+
+/*! \brief Free a trie instance. */
+void trie_free(trie_t *tbl);
+
+/*! \brief Clear a trie instance (make it empty). */
+void trie_clear(trie_t *tbl);
+
+/*! \brief Create a clone of existing trie. */
+trie_t* trie_dup(const trie_t *orig, trie_dup_cb dup_cb, knot_mm_t *mm);
+
+/*! \brief Return the number of keys in the trie. */
+size_t trie_weight(const trie_t *tbl);
+
+/*! \brief Search the trie, returning NULL on failure. */
+trie_val_t* trie_get_try(trie_t *tbl, const trie_key_t *key, uint32_t len);
+
+/*! \brief Search the trie including DNS wildcard semantics, returning NULL on failure.
+ *
+ * \note We assume the key is in knot_dname_lf() format, i.e. labels are ordered
+ * from root to leaf and separated by zero bytes (and no other zeros are allowed).
+ * \note Beware that DNS wildcard matching is not exactly what normal people would expect.
+ */
+trie_val_t* trie_get_try_wildcard(trie_t *tbl, const trie_key_t *key, uint32_t len);
+
+/*! \brief Search the trie, inserting NULL trie_val_t on failure. */
+trie_val_t* trie_get_ins(trie_t *tbl, const trie_key_t *key, uint32_t len);
+
+/*!
+ * \brief Search for less-or-equal element.
+ *
+ * \param tbl Trie.
+ * \param key Searched key.
+ * \param len Key length.
+ * \param val (optional) Value found; it will be set to NULL if not found or errored.
+ * \return KNOT_EOK for exact match, 1 for previous, KNOT_ENOENT for not-found,
+ * or KNOT_E*.
+ */
+int trie_get_leq(trie_t *tbl, const trie_key_t *key, uint32_t len, trie_val_t **val);
+
+/*!
+ * \brief Apply a function to every trie_val_t, in order.
+ *
+ * \return KNOT_EOK if success or KNOT_E* if error.
+ */
+int trie_apply(trie_t *tbl, int (*f)(trie_val_t *, void *), void *d);
+
+/*!
+ * \brief Remove an item, returning KNOT_EOK if succeeded or KNOT_ENOENT if not found.
+ *
+ * If val!=NULL and deletion succeeded, the deleted value is set.
+ */
+int trie_del(trie_t *tbl, const trie_key_t *key, uint32_t len, trie_val_t *val);
+
+
+/*! \brief Create a new iterator pointing to the first element (if any).
+ *
+ * trie_it_* functions deal with these iterators capable of walking and jumping
+ * over the trie. Note that any modification to key-set stored by the trie
+ * will in general invalidate all iterators and you will need to begin anew.
+ * (It won't be detected - you may end up reading freed memory, etc.)
+ */
+trie_it_t* trie_it_begin(trie_t *tbl);
+
+/*! \brief Test if the iterator has gone "past the end" (and points nowhere). */
+bool trie_it_finished(trie_it_t *it);
+
+/*! \brief Free any resources of the iterator. It's OK to call it on NULL. */
+void trie_it_free(trie_it_t *it);
+
+/*! \brief Copy the iterator. See the warning in trie_it_begin(). */
+trie_it_t *trie_it_clone(const trie_it_t *it);
+
+/*!
+ * \brief Return pointer to the key of the current element.
+ *
+ * \note The len is uint32_t internally but size_t is better for our usage
+ * as it is without an additional type conversion.
+ */
+const trie_key_t* trie_it_key(trie_it_t *it, size_t *len);
+
+/*! \brief Return pointer to the value of the current element (writable). */
+trie_val_t* trie_it_val(trie_it_t *it);
+
+/*!
+ * \brief Advance the iterator to the next element.
+ *
+ * Iteration is in ascending lexicographical order.
+ * In particular, the empty string would be considered as the very first.
+ *
+ * \TODO: in most iterator operations, ENOMEM is very unlikely
+ * but it leads to a _finished() iterator (silently).
+ * Perhaps the functions should simply return KNOT_E*
+ */
+void trie_it_next(trie_it_t *it);
+/*! \brief Advance the iterator to the previous element. See trie_it_next(). */
+void trie_it_prev(trie_it_t *it);
+
+/*! \brief Advance iterator to the next element, looping to first after last. */
+void trie_it_next_loop(trie_it_t *it);
+/*! \brief Advance iterator to the previous element, looping to last after first. */
+void trie_it_prev_loop(trie_it_t *it);
+
+/*! \brief Advance iterator to the next element while ignoring the subtree.
+ *
+ * \note Another formulation: skip keys that are prefixed by the current key.
+ * \TODO: name, maybe _noprefixed? The thing is that in the "subtree" meaning
+ * doesn't correspond to how the pointers go in the implementation,
+ * but we may not care much for implementation in the API...
+ */
+void trie_it_next_nosub(trie_it_t *it);
+
+/*! \brief Advance iterator to the longest prefix of the current key.
+ *
+ * \TODO: name, maybe _prefix? Arguments similar to _nosub vs. _noprefixed.
+ */
+void trie_it_parent(trie_it_t *it);
+
+/*! \brief trie_get_leq() but with an iterator. */
+int trie_it_get_leq(trie_it_t *it, const trie_key_t *key, uint32_t len);
+
+/*! \brief Remove the current element. The iterator will get trie_it_finished() */
+void trie_it_del(trie_it_t *it);
+
+
+/*! \brief Start a COW transaction
+ *
+ * A copy-on-write transaction starts by obtaining a write lock (in
+ * your application code) followed by a call to trie_cow(). This
+ * creates a shared clone of the trie and saves both old and new roots
+ * in the COW context.
+ *
+ * During the COW transaction, you call trie_cow_ins() or
+ * trie_cow_del() as necessary. These calls ensure that the relevant
+ * parts of the (new) trie are copied so that they can be modified
+ * freely.
+ *
+ * Your trie_val_t objects must be able to distinguish their
+ * reachability, either shared, or old-only, or new-only. Before a COW
+ * transaction the reachability of your objects is indeterminate.
+ * During a transaction, any trie_val_t objects that might be affected
+ * (because they are adjacent to a trie_get_cow() or trie_del_cow())
+ * are first marked as shared using the callback you pass to
+ * trie_cow().
+ *
+ * When the transaction is complete, to commit, call trie_cow_new() to
+ * get the new root, swap the old and new trie roots (e.g. with
+ * rcu_xchg_pointer()), wait for readers to finish with the old trie
+ * (e.g. using synchronize_rcu()), then call trie_cow_commit(). For a
+ * rollback, you can just call trie_cow_rollback() without waiting
+ * since that doesn't conflict with readers. After trie_cow_commit()
+ * or trie_cow_rollback() have finished, you can release your write
+ * lock.
+ *
+ * Concurrent reading of the old trie is allowed during a transaction
+ * provided that it is known when all readers have finished with the
+ * old version, e.g. using rcu_read_lock() and rcu_read_unlock().
+ * There must be only one write transaction at a time.
+ *
+ * \param old the old trie
+ * \param mark_shared callback to mark a leaf as shared (can be NULL)
+ * \param d extra data for the callback
+ * \return a pointer to a COW context,
+ * or NULL if there was a failure
+ */
+trie_cow_t* trie_cow(trie_t *old, trie_cb *mark_shared, void *d);
+
+/*! \brief get the new trie from a COW context */
+trie_t* trie_cow_new(trie_cow_t *cow);
+
+/*! \brief variant of trie_get_ins() for use during COW transactions
+ *
+ * As necessary, this copies path from the root of the trie to the
+ * leaf, so that it is no longer shared. Any leaves adjacent to this
+ * path are marked as shared using the mark_shared callback passed to
+ * trie_cow().
+ *
+ * It is your responsibility to COW your trie_val_t objects. If you copy an
+ * object you must change the original's reachability from shared to old-only.
+ * New objects (including copies) must have new-only reachability.
+ */
+trie_val_t* trie_get_cow(trie_cow_t *cow, const trie_key_t *key, uint32_t len);
+
+/*!
+ * \brief variant of trie_del() for use during COW transactions
+ *
+ * The mark_shared callback is invoked as necessary, in the same way
+ * as trie_get_cow().
+ *
+ * Returns KNOT_EOK if the key was removed or KNOT_ENOENT if not found.
+ * If val!=NULL and deletion succeeded, the *val is set to the deleted
+ * value pointer.
+ */
+int trie_del_cow(trie_cow_t *cow, const trie_key_t *key, uint32_t len, trie_val_t *val);
+
+/*! \brief clean up the old trie after committing a COW transaction
+ *
+ * Your callback is invoked for any trie_val_t objects that might need
+ * cleaning up; you must free any objects you have marked as old-only
+ * and retain objects with shared reachability.
+ *
+ * \note The callback can be NULL.
+ *
+ * The cow object is free()d, and the new trie root is returned.
+ */
+trie_t* trie_cow_commit(trie_cow_t *cow, trie_cb *cb, void *d);
+
+/*! \brief clean up the new trie after rolling back a COW transaction
+ *
+ * Your callback is invoked for any trie_val_t objects that might need
+ * cleaning up; you must free any objects you have marked as new-only
+ * and retain objects with shared reachability.
+ *
+ * \note The callback can be NULL.
+ *
+ * The cow object is free()d, and the old trie root is returned.
+ */
+trie_t* trie_cow_rollback(trie_cow_t *cow, trie_cb *cb, void *d);