src/contrib/qp-trie/trie.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280

/*  Copyright (C) 2019 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
    Copyright (C) 2018 Tony Finch <dot@dotat.at>

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */

#pragma once

#include <stdbool.h>
#include <stdint.h>

#include "libknot/mm_ctx.h"

/*!
 * \brief Native API of QP-tries:
 *
 * - keys are uint8_t strings, not necessarily zero-terminated,
 *   the structure copies the contents of the passed keys
 * - values are void* pointers, typically you get an ephemeral pointer to it
 * - key lengths are limited by 2^32-1 ATM
 */

/*! \brief Element value. */
typedef void* trie_val_t;
/*! \brief Key for indexing tries.  Sign could be flipped easily. */
typedef uint8_t trie_key_t;

/*! \brief Opaque structure holding a QP-trie. */
typedef struct trie trie_t;

/*! \brief Opaque type for holding a QP-trie iterator. */
typedef struct trie_it trie_it_t;

/*! \brief Callback for cloning trie values. */
typedef trie_val_t (*trie_dup_cb)(const trie_val_t val, knot_mm_t *mm);

/*! \brief Callback for performing actions on a trie leaf
 *
 * Used during copy-on-write transactions
 *
 * \param val	The value of the element to be altered
 * \param key	The key of the element to be altered
 * \param len	The length of key
 * \param d	Additional user data
 */
typedef void trie_cb(trie_val_t val, const trie_key_t *key, size_t len, void *d);

/*! \brief Opaque type for holding the copy-on-write state for a QP-trie. */
typedef struct trie_cow trie_cow_t;

/*! \brief Create a trie instance. */
trie_t* trie_create(knot_mm_t *mm);

/*! \brief Free a trie instance. */
void trie_free(trie_t *tbl);

/*! \brief Clear a trie instance (make it empty). */
void trie_clear(trie_t *tbl);

/*! \brief Create a clone of existing trie. */
trie_t* trie_dup(const trie_t *orig, trie_dup_cb dup_cb, knot_mm_t *mm);

/*! \brief Return the number of keys in the trie. */
size_t trie_weight(const trie_t *tbl);

/*! \brief Search the trie, returning NULL on failure. */
trie_val_t* trie_get_try(trie_t *tbl, const trie_key_t *key, uint32_t len);

/*! \brief Search the trie including DNS wildcard semantics, returning NULL on failure.
 *
 * \note We assume the key is in knot_dname_lf() format, i.e. labels are ordered
 *   from root to leaf and separated by zero bytes (and no other zeros are allowed).
 * \note Beware that DNS wildcard matching is not exactly what normal people would expect.
 */
trie_val_t* trie_get_try_wildcard(trie_t *tbl, const trie_key_t *key, uint32_t len);

/*! \brief Search the trie, inserting NULL trie_val_t on failure. */
trie_val_t* trie_get_ins(trie_t *tbl, const trie_key_t *key, uint32_t len);

/*!
 * \brief Search for less-or-equal element.
 *
 * \param tbl  Trie.
 * \param key  Searched key.
 * \param len  Key length.
 * \param val  (optional) Value found; it will be set to NULL if not found or errored.
 * \return KNOT_EOK for exact match, 1 for previous, KNOT_ENOENT for not-found,
 *         or KNOT_E*.
 */
int trie_get_leq(trie_t *tbl, const trie_key_t *key, uint32_t len, trie_val_t **val);

/*!
 * \brief Apply a function to every trie_val_t, in order.
 *
 * \return KNOT_EOK if success or KNOT_E* if error.
 */
int trie_apply(trie_t *tbl, int (*f)(trie_val_t *, void *), void *d);

/*!
 * \brief Remove an item, returning KNOT_EOK if succeeded or KNOT_ENOENT if not found.
 *
 * If val!=NULL and deletion succeeded, the deleted value is set.
 */
int trie_del(trie_t *tbl, const trie_key_t *key, uint32_t len, trie_val_t *val);


/*! \brief Create a new iterator pointing to the first element (if any).
 *
 * trie_it_* functions deal with these iterators capable of walking and jumping
 * over the trie.  Note that any modification to key-set stored by the trie
 * will in general invalidate all iterators and you will need to begin anew.
 * (It won't be detected - you may end up reading freed memory, etc.)
 */
trie_it_t* trie_it_begin(trie_t *tbl);

/*! \brief Test if the iterator has gone "past the end" (and points nowhere). */
bool trie_it_finished(trie_it_t *it);

/*! \brief Free any resources of the iterator. It's OK to call it on NULL. */
void trie_it_free(trie_it_t *it);

/*! \brief Copy the iterator.  See the warning in trie_it_begin(). */
trie_it_t *trie_it_clone(const trie_it_t *it);

/*!
 * \brief Return pointer to the key of the current element.
 *
 * \note The len is uint32_t internally but size_t is better for our usage
 *       as it is without an additional type conversion.
 */
const trie_key_t* trie_it_key(trie_it_t *it, size_t *len);

/*! \brief Return pointer to the value of the current element (writable). */
trie_val_t* trie_it_val(trie_it_t *it);

/*!
 * \brief Advance the iterator to the next element.
 *
 * Iteration is in ascending lexicographical order.
 * In particular, the empty string would be considered as the very first.
 *
 * \TODO: in most iterator operations, ENOMEM is very unlikely
 * but it leads to a _finished() iterator (silently).
 * Perhaps the functions should simply return KNOT_E*
 */
void trie_it_next(trie_it_t *it);
/*! \brief Advance the iterator to the previous element.  See trie_it_next(). */
void trie_it_prev(trie_it_t *it);

/*! \brief Advance iterator to the next element, looping to first after last. */
void trie_it_next_loop(trie_it_t *it);
/*! \brief Advance iterator to the previous element, looping to last after first. */
void trie_it_prev_loop(trie_it_t *it);

/*! \brief Advance iterator to the next element while ignoring the subtree.
 *
 * \note Another formulation: skip keys that are prefixed by the current key.
 * \TODO: name, maybe _unprefixed?  The thing is that in the "subtree" meaning
 * doesn't correspond to how the pointers go in the implementation,
 * but we may not care much for implementation in the API...
 */
void trie_it_next_nosub(trie_it_t *it);

/*! \brief Advance iterator to the longest prefix of the current key.
 *
 * \TODO: name, maybe _prefix?  Arguments similar to _nosub vs. _unprefixed.
 */
void trie_it_parent(trie_it_t *it);

/*! \brief trie_get_leq() but with an iterator. */
int trie_it_get_leq(trie_it_t *it, const trie_key_t *key, uint32_t len);

/*! \brief Remove the current element.  The iterator will get trie_it_finished() */
void trie_it_del(trie_it_t *it);


/*! \brief Start a COW transaction
 *
 * A copy-on-write transaction starts by obtaining a write lock (in
 * your application code) followed by a call to trie_cow(). This
 * creates a shared clone of the trie and saves both old and new roots
 * in the COW context.
 *
 * During the COW transaction, you call trie_cow_ins() or
 * trie_cow_del() as necessary. These calls ensure that the relevant
 * parts of the (new) trie are copied so that they can be modified
 * freely.
 *
 * Your trie_val_t objects must be able to distinguish their
 * reachability, either shared, or old-only, or new-only. Before a COW
 * transaction the reachability of your objects is indeterminate.
 * During a transaction, any trie_val_t objects that might be affected
 * (because they are adjacent to a trie_get_cow() or trie_del_cow())
 * are first marked as shared using the callback you pass to
 * trie_cow().
 *
 * When the transaction is complete, to commit, call trie_cow_new() to
 * get the new root, swap the old and new trie roots (e.g. with
 * rcu_xchg_pointer()), wait for readers to finish with the old trie
 * (e.g. using synchronize_rcu()), then call trie_cow_commit(). For a
 * rollback, you can just call trie_cow_rollback() without waiting
 * since that doesn't conflict with readers. After trie_cow_commit()
 * or trie_cow_rollback() have finished, you can release your write
 * lock.
 *
 * Concurrent reading of the old trie is allowed during a transaction
 * provided that it is known when all readers have finished with the
 * old version, e.g. using rcu_read_lock() and rcu_read_unlock().
 * There must be only one write transaction at a time.
 *
 * \param old		the old trie
 * \param mark_shared	callback to mark a leaf as shared (can be NULL)
 * \param d		extra data for the callback
 * \return		a pointer to a COW context,
 *			or NULL if there was a failure
 */
trie_cow_t* trie_cow(trie_t *old, trie_cb *mark_shared, void *d);

/*! \brief get the new trie from a COW context */
trie_t* trie_cow_new(trie_cow_t *cow);

/*! \brief variant of trie_get_ins() for use during COW transactions
 *
 * As necessary, this copies path from the root of the trie to the
 * leaf, so that it is no longer shared. Any leaves adjacent to this
 * path are marked as shared using the mark_shared callback passed to
 * trie_cow().
 *
 * It is your responsibility to COW your trie_val_t objects. If you copy an
 * object you must change the original's reachability from shared to old-only.
 * New objects (including copies) must have new-only reachability.
 */
trie_val_t* trie_get_cow(trie_cow_t *cow, const trie_key_t *key, uint32_t len);

/*!
 * \brief variant of trie_del() for use during COW transactions
 *
 * The mark_shared callback is invoked as necessary, in the same way
 * as trie_get_cow().
 *
 * Returns KNOT_EOK if the key was removed or KNOT_ENOENT if not found.
 * If val!=NULL and deletion succeeded, the *val is set to the deleted
 * value pointer.
 */
int trie_del_cow(trie_cow_t *cow, const trie_key_t *key, uint32_t len, trie_val_t *val);

/*! \brief clean up the old trie after committing a COW transaction
 *
 * Your callback is invoked for any trie_val_t objects that might need
 * cleaning up; you must free any objects you have marked as old-only
 * and retain objects with shared reachability.
 *
 * \note The callback can be NULL.
 *
 * The cow object is free()d, and the new trie root is returned.
 */
trie_t* trie_cow_commit(trie_cow_t *cow, trie_cb *cb, void *d);

/*! \brief clean up the new trie after rolling back a COW transaction
 *
 * Your callback is invoked for any trie_val_t objects that might need
 * cleaning up; you must free any objects you have marked as new-only
 * and retain objects with shared reachability.
 *
 * \note The callback can be NULL.
 *
 * The cow object is free()d, and the old trie root is returned.
 */
trie_t* trie_cow_rollback(trie_cow_t *cow, trie_cb *cb, void *d);