summaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--lib/README.rst314
-rw-r--r--lib/cache/api.c889
-rw-r--r--lib/cache/api.h201
-rw-r--r--lib/cache/cdb_api.h68
-rw-r--r--lib/cache/cdb_lmdb.c710
-rw-r--r--lib/cache/cdb_lmdb.h23
-rw-r--r--lib/cache/entry_list.c293
-rw-r--r--lib/cache/entry_pkt.c224
-rw-r--r--lib/cache/entry_rr.c146
-rw-r--r--lib/cache/impl.h412
-rw-r--r--lib/cache/knot_pkt.c106
-rw-r--r--lib/cache/nsec1.c511
-rw-r--r--lib/cache/nsec3.c501
-rw-r--r--lib/cache/peek.c724
-rw-r--r--lib/cookies/alg_containers.c72
-rw-r--r--lib/cookies/alg_containers.h49
-rw-r--r--lib/cookies/alg_sha.c122
-rw-r--r--lib/cookies/alg_sha.h30
-rw-r--r--lib/cookies/control.h49
-rw-r--r--lib/cookies/helper.c296
-rw-r--r--lib/cookies/helper.h86
-rw-r--r--lib/cookies/lru_cache.c70
-rw-r--r--lib/cookies/lru_cache.h69
-rw-r--r--lib/cookies/nonce.c34
-rw-r--r--lib/cookies/nonce.h43
-rw-r--r--lib/defines.h102
-rw-r--r--lib/dnssec.c473
-rw-r--r--lib/dnssec.h152
-rw-r--r--lib/dnssec/nsec.c540
-rw-r--r--lib/dnssec/nsec.h105
-rw-r--r--lib/dnssec/nsec3.c776
-rw-r--r--lib/dnssec/nsec3.h82
-rw-r--r--lib/dnssec/signature.c307
-rw-r--r--lib/dnssec/signature.h42
-rw-r--r--lib/dnssec/ta.c185
-rw-r--r--lib/dnssec/ta.h87
-rw-r--r--lib/generic/README.rst60
-rw-r--r--lib/generic/array.h166
-rw-r--r--lib/generic/lru.c236
-rw-r--r--lib/generic/lru.h249
-rw-r--r--lib/generic/map.c354
-rw-r--r--lib/generic/map.h115
-rw-r--r--lib/generic/pack.h249
-rw-r--r--lib/generic/queue.c124
-rw-r--r--lib/generic/queue.h260
-rw-r--r--lib/generic/set.h105
-rw-r--r--lib/generic/trie.c912
-rw-r--r--lib/generic/trie.h150
-rw-r--r--lib/layer.h107
-rw-r--r--lib/layer/cache.c31
-rw-r--r--lib/layer/iterate.c1135
-rw-r--r--lib/layer/iterate.h36
-rw-r--r--lib/layer/validate.c1133
-rw-r--r--lib/lib.mk110
-rw-r--r--lib/module.c170
-rw-r--r--lib/module.h111
-rw-r--r--lib/nsrep.c563
-rw-r--r--lib/nsrep.h188
-rw-r--r--lib/resolve.c1648
-rw-r--r--lib/resolve.h332
-rw-r--r--lib/rplan.c308
-rw-r--r--lib/rplan.h221
-rw-r--r--lib/utils.c1084
-rw-r--r--lib/utils.h495
-rw-r--r--lib/zonecut.c597
-rw-r--r--lib/zonecut.h173
66 files changed, 20315 insertions, 0 deletions
diff --git a/lib/README.rst b/lib/README.rst
new file mode 100644
index 0000000..2905676
--- /dev/null
+++ b/lib/README.rst
@@ -0,0 +1,314 @@
+*********************
+Knot Resolver library
+*********************
+
+Requirements
+============
+
+* libknot_ 2.0 (Knot DNS high-performance DNS library.)
+
+For users
+=========
+
+The library as described provides basic services for name resolution, which should cover the usage,
+examples are in the :ref:`resolve API <lib_api_rplan>` documentation.
+
+.. tip:: If you're migrating from ``getaddrinfo()``, see *"synchronous"* API, but the library offers iterative API as well to plug it into your event loop for example.
+
+.. _lib-layers:
+
+For developers
+==============
+
+The resolution process starts with the functions in :ref:`resolve.c <lib_api_rplan>`, they are responsible for:
+
+* reacting to state machine state (i.e. calling consume layers if we have an answer ready)
+* interacting with the library user (i.e. asking caller for I/O, accepting queries)
+* fetching assets needed by layers (i.e. zone cut)
+
+This is the *driver*. The driver is not meant to know *"how"* the query resolves, but rather *"when"* to execute *"what"*.
+
+.. image:: ../doc/resolution.png
+ :align: center
+
+On the other side are *layers*. They are responsible for dissecting the packets and informing the driver about the results. For example, a *produce* layer generates query, a *consume* layer validates answer.
+
+.. tip:: Layers are executed asynchronously by the driver. If you need some asset beforehand, you can signalize the driver using returning state or current query flags. For example, setting a flag ``AWAIT_CUT`` forces driver to fetch zone cut information before the packet is consumed; setting a ``RESOLVED`` flag makes it pop a query after the current set of layers is finished; returning ``FAIL`` state makes it fail current query.
+
+Layers can also change course of resolution, for example by appending additional queries.
+
+.. code-block:: lua
+
+ consume = function (state, req, answer)
+ answer = kres.pkt_t(answer)
+ if answer:qtype() == kres.type.NS then
+ req = kres.request_t(req)
+ local qry = req:push(answer:qname(), kres.type.SOA, kres.class.IN)
+ qry.flags.AWAIT_CUT = true
+ end
+ return state
+ end
+
+This **doesn't** block currently processed query, and the newly created sub-request will start as soon as driver finishes processing current. In some cases you might need to issue sub-request and process it **before** continuing with the current, i.e. validator may need a DNSKEY before it can validate signatures. In this case, layers can yield and resume afterwards.
+
+.. code-block:: lua
+
+ consume = function (state, req, answer)
+ answer = kres.pkt_t(answer)
+ if state == kres.YIELD then
+ print('continuing yielded layer')
+ return kres.DONE
+ else
+ if answer:qtype() == kres.type.NS then
+ req = kres.request_t(req)
+ local qry = req:push(answer:qname(), kres.type.SOA, kres.class.IN)
+ qry.flags.AWAIT_CUT = true
+ print('planned SOA query, yielding')
+ return kres.YIELD
+ end
+ return state
+ end
+ end
+
+The ``YIELD`` state is a bit special. When a layer returns it, it interrupts current walk through the layers. When the layer receives it,
+it means that it yielded before and now it is resumed. This is useful in a situation where you need a sub-request to determine whether current answer is valid or not.
+
+Writing layers
+==============
+
+.. warning:: FIXME: this dev-docs section is outdated! Better see comments in files instead, for now.
+
+The resolver :ref:`library <lib_index>` leverages the processing API from the libknot to separate packet processing code into layers.
+
+.. note:: This is only crash-course in the library internals, see the resolver :ref:`library <lib_index>` documentation for the complete overview of the services.
+
+The library offers following services:
+
+- :ref:`Cache <lib_api_cache>` - MVCC cache interface for retrieving/storing resource records.
+- :ref:`Resolution plan <lib_api_rplan>` - Query resolution plan, a list of partial queries (with hierarchy) sent in order to satisfy original query. This contains information about the queries, nameserver choice, timing information, answer and its class.
+- :ref:`Nameservers <lib_api_nameservers>` - Reputation database of nameservers, this serves as an aid for nameserver choice.
+
+A processing layer is going to be called by the query resolution driver for each query,
+so you're going to work with :ref:`struct kr_request <lib_api_rplan>` as your per-query context.
+This structure contains pointers to resolution context, resolution plan and also the final answer.
+
+.. code-block:: c
+
+ int consume(kr_layer_t *ctx, knot_pkt_t *pkt)
+ {
+ struct kr_request *req = ctx->req;
+ struct kr_query *qry = req->current_query;
+ }
+
+This is only passive processing of the incoming answer. If you want to change the course of resolution, say satisfy a query from a local cache before the library issues a query to the nameserver, you can use states (see the :ref:`Static hints <mod-hints>` for example).
+
+.. code-block:: c
+
+ int produce(kr_layer_t *ctx, knot_pkt_t *pkt)
+ {
+ struct kr_request *req = ctx->req;
+ struct kr_query *qry = req->current_query;
+
+ /* Query can be satisfied locally. */
+ if (can_satisfy(qry)) {
+ /* This flag makes the resolver move the query
+ * to the "resolved" list. */
+ qry->flags.RESOLVED = true;
+ return KR_STATE_DONE;
+ }
+
+ /* Pass-through. */
+ return ctx->state;
+ }
+
+It is possible to not only act during the query resolution, but also to view the complete resolution plan afterwards. This is useful for analysis-type tasks, or *"per answer"* hooks.
+
+.. code-block:: c
+
+ int finish(kr_layer_t *ctx)
+ {
+ struct kr_request *req = ctx->req;
+ struct kr_rplan *rplan = req->rplan;
+
+ /* Print the query sequence with start time. */
+ char qname_str[KNOT_DNAME_MAXLEN];
+ struct kr_query *qry = NULL
+ WALK_LIST(qry, rplan->resolved) {
+ knot_dname_to_str(qname_str, qry->sname, sizeof(qname_str));
+ printf("%s at %u\n", qname_str, qry->timestamp);
+ }
+
+ return ctx->state;
+ }
+
+APIs in Lua
+===========
+
+The APIs in Lua world try to mirror the C APIs using LuaJIT FFI, with several differences and enhancements.
+There is not comprehensive guide on the API yet, but you can have a look at the bindings_ file.
+
+Elementary types and constants
+------------------------------
+
+* States are directly in ``kres`` table, e.g. ``kres.YIELD, kres.CONSUME, kres.PRODUCE, kres.DONE, kres.FAIL``.
+* DNS classes are in ``kres.class`` table, e.g. ``kres.class.IN`` for Internet class.
+* DNS types are in ``kres.type`` table, e.g. ``kres.type.AAAA`` for AAAA type.
+* DNS rcodes types are in ``kres.rcode`` table, e.g. ``kres.rcode.NOERROR``.
+* Packet sections (QUESTION, ANSWER, AUTHORITY, ADDITIONAL) are in the ``kres.section`` table.
+
+Working with domain names
+-------------------------
+
+The internal API usually works with domain names in label format, you can convert between text and wire freely.
+
+.. code-block:: lua
+
+ local dname = kres.str2dname('business.se')
+ local strname = kres.dname2str(dname)
+
+Working with resource records
+-----------------------------
+
+Resource records are stored as tables.
+
+.. code-block:: lua
+
+ local rr = { owner = kres.str2dname('owner'),
+ ttl = 0,
+ class = kres.class.IN,
+ type = kres.type.CNAME,
+ rdata = kres.str2dname('someplace') }
+ print(kres.rr2str(rr))
+
+RRSets in packet can be accessed using FFI, you can easily fetch single records.
+
+.. code-block:: lua
+
+ local rrset = { ... }
+ local rr = rrset:get(0) -- Return first RR
+ print(kres.dname2str(rr:owner()))
+ print(rr:ttl())
+ print(kres.rr2str(rr))
+
+Working with packets
+--------------------
+
+Packet is the data structure that you're going to see in layers very often. They consists of a header, and four sections: QUESTION, ANSWER, AUTHORITY, ADDITIONAL. The first section is special, as it contains the query name, type, and class; the rest of the sections contain RRSets.
+
+First you need to convert it to a type known to FFI and check basic properties. Let's start with a snippet of a *consume* layer.
+
+.. code-block:: lua
+
+ consume = function (state, req, pkt)
+ pkt = kres.pkt_t(answer)
+ print('rcode:', pkt:rcode())
+ print('query:', kres.dname2str(pkt:qname()), pkt:qclass(), pkt:qtype())
+ if pkt:rcode() ~= kres.rcode.NOERROR then
+ print('error response')
+ end
+ end
+
+You can enumerate records in the sections.
+
+.. code-block:: lua
+
+ local records = pkt:section(kres.section.ANSWER)
+ for i = 1, #records do
+ local rr = records[i]
+ if rr.type == kres.type.AAAA then
+ print(kres.rr2str(rr))
+ end
+ end
+
+During *produce* or *begin*, you might want to want to write to packet. Keep in mind that you have to write packet sections in sequence,
+e.g. you can't write to ANSWER after writing AUTHORITY, it's like stages where you can't go back.
+
+.. code-block:: lua
+
+ pkt:rcode(kres.rcode.NXDOMAIN)
+ -- Clear answer and write QUESTION
+ pkt:recycle()
+ pkt:question('\7blocked', kres.class.IN, kres.type.SOA)
+ -- Start writing data
+ pkt:begin(kres.section.ANSWER)
+ -- Nothing in answer
+ pkt:begin(kres.section.AUTHORITY)
+ local soa = { owner = '\7blocked', ttl = 900, class = kres.class.IN, type = kres.type.SOA, rdata = '...' }
+ pkt:put(soa.owner, soa.ttl, soa.class, soa.type, soa.rdata)
+
+Working with requests
+---------------------
+
+The request holds information about currently processed query, enabled options, cache, and other extra data.
+You primarily need to retrieve currently processed query.
+
+.. code-block:: lua
+
+ consume = function (state, req, pkt)
+ req = kres.request_t(req)
+ print(req.options)
+ print(req.state)
+
+ -- Print information about current query
+ local current = req:current()
+ print(kres.dname2str(current.owner))
+ print(current.stype, current.sclass, current.id, current.flags)
+ end
+
+In layers that either begin or finalize, you can walk the list of resolved queries.
+
+.. code-block:: lua
+
+ local last = req:resolved()
+ print(last.stype)
+
+As described in the layers, you can not only retrieve information about current query, but also push new ones or pop old ones.
+
+.. code-block:: lua
+
+ -- Push new query
+ local qry = req:push(pkt:qname(), kres.type.SOA, kres.class.IN)
+ qry.flags.AWAIT_CUT = true
+
+ -- Pop the query, this will erase it from resolution plan
+ req:pop(qry)
+
+
+.. _libknot: https://gitlab.labs.nic.cz/knot/knot-dns/tree/master/src/libknot
+.. _bindings: https://gitlab.labs.nic.cz/knot/knot-resolver/blob/master/daemon/lua/kres.lua
+
+
+Significant Lua API changes
+---------------------------
+
+Incompatible changes since 3.0.0
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+In the main ``kres.*`` lua binding, there was only change in struct knot_rrset_t:
+
+- constructor now accepts TTL as additional parameter (defaulting to zero)
+- add_rdata() doesn't accept TTL anymore (and will throw an error if passed)
+
+In case you used knot_* functions and structures bound to lua:
+
+- knot_dname_is_sub(a, b): knot_dname_in_bailiwick(a, b) > 0
+- knot_rdata_rdlen(): knot_rdataset_at().len
+- knot_rdata_data(): knot_rdataset_at().data
+- knot_rdata_array_size(): offsetof(struct knot_data_t, data) + knot_rdataset_at().len
+- struct knot_rdataset: field names were renamed to .count and .rdata
+- some functions got inlined from headers, but you can use their kr_* clones:
+ kr_rrsig_sig_inception(), kr_rrsig_sig_expiration(), kr_rrsig_type_covered().
+ Note that these functions now accept knot_rdata_t* instead of a pair
+ knot_rdataset_t* and size_t - you can use knot_rdataset_at() for that.
+
+- knot_rrset_add_rdata() doesn't take TTL parameter anymore
+- knot_rrset_init_empty() was inlined, but in lua you can use the constructor
+- knot_rrset_ttl() was inlined, but in lua you can use :ttl() method instead
+
+- knot_pkt_qname(), _qtype(), _qclass(), _rr(), _section() were inlined,
+ but in lua you can use methods instead, e.g. myPacket:qname()
+- knot_pkt_free() takes knot_pkt_t* instead of knot_pkt_t**, but from lua
+ you probably didn't want to use that; constructor ensures garbage collection.
+
+
+.. |---| unicode:: U+02014 .. em dash
diff --git a/lib/cache/api.c b/lib/cache/api.c
new file mode 100644
index 0000000..c0591d6
--- /dev/null
+++ b/lib/cache/api.c
@@ -0,0 +1,889 @@
+/* Copyright (C) 2014-2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include <assert.h>
+#include <errno.h>
+#include <limits.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <libknot/descriptor.h>
+#include <libknot/dname.h>
+#include <libknot/errcode.h>
+#include <libknot/rrtype/rrsig.h>
+
+#include "contrib/cleanup.h"
+#include "contrib/ucw/lib.h"
+#include "lib/cache/api.h"
+#include "lib/cache/cdb_lmdb.h"
+#include "lib/defines.h"
+#include "lib/generic/trie.h"
+#include "lib/resolve.h"
+#include "lib/rplan.h"
+#include "lib/utils.h"
+
+#include "lib/cache/impl.h"
+
+/* TODO:
+ * - Reconsider when RRSIGs are put in and retrieved from the cache.
+ * Currently it's always done, which _might_ be spurious, depending
+ * on how kresd will use the returned result.
+ * There's also the "problem" that kresd ATM does _not_ ask upstream
+ * with DO bit in some cases.
+ */
+
+
+/** Cache version */
+static const uint16_t CACHE_VERSION = 5;
+/** Key size */
+#define KEY_HSIZE (sizeof(uint8_t) + sizeof(uint16_t))
+#define KEY_SIZE (KEY_HSIZE + KNOT_DNAME_MAXLEN)
+
+
+/** @internal Forward declarations of the implementation details
+ * \param optout[out] Set *optout = true; when encountering an opt-out NSEC3 (optional). */
+static ssize_t stash_rrset(struct kr_cache *cache, const struct kr_query *qry,
+ const knot_rrset_t *rr, const knot_rrset_t *rr_sigs, uint32_t timestamp,
+ uint8_t rank, trie_t *nsec_pmap, bool *has_optout);
+/** Preliminary checks before stash_rrset(). Don't call if returns <= 0. */
+static int stash_rrset_precond(const knot_rrset_t *rr, const struct kr_query *qry/*logs*/);
+
+/** @internal Removes all records from cache. */
+static inline int cache_clear(struct kr_cache *cache)
+{
+ cache->stats.delete += 1;
+ return cache_op(cache, clear);
+}
+
+/** @internal Open cache db transaction and check internal data version. */
+static int assert_right_version(struct kr_cache *cache)
+{
+ /* Check cache ABI version. */
+ /* CACHE_KEY_DEF: to avoid collisions with kr_cache_match(). */
+ uint8_t key_str[4] = "VERS";
+ knot_db_val_t key = { .data = key_str, .len = sizeof(key_str) };
+ knot_db_val_t val = { NULL, 0 };
+ int ret = cache_op(cache, read, &key, &val, 1);
+ if (ret == 0 && val.len == sizeof(CACHE_VERSION)
+ && memcmp(val.data, &CACHE_VERSION, sizeof(CACHE_VERSION)) == 0) {
+ ret = kr_error(EEXIST);
+ } else {
+ int oldret = ret;
+ /* Version doesn't match. Recreate cache and write version key. */
+ ret = cache_op(cache, count);
+ if (ret != 0) { /* Non-empty cache, purge it. */
+ kr_log_info("[ ][cach] incompatible cache database detected, purging\n");
+ if (oldret) {
+ kr_log_verbose("bad ret: %d\n", oldret);
+ } else if (val.len != sizeof(CACHE_VERSION)) {
+ kr_log_verbose("bad length: %d\n", (int)val.len);
+ } else {
+ uint16_t ver;
+ memcpy(&ver, val.data, sizeof(ver));
+ kr_log_verbose("bad version: %d\n", (int)ver);
+ }
+ ret = cache_clear(cache);
+ }
+ /* Either purged or empty. */
+ if (ret == 0) {
+ /* Key/Val is invalidated by cache purge, recreate it */
+ val.data = /*const-cast*/(void *)&CACHE_VERSION;
+ val.len = sizeof(CACHE_VERSION);
+ ret = cache_op(cache, write, &key, &val, 1);
+ }
+ }
+ kr_cache_sync(cache);
+ return ret;
+}
+
+int kr_cache_open(struct kr_cache *cache, const struct kr_cdb_api *api, struct kr_cdb_opts *opts, knot_mm_t *mm)
+{
+ if (!cache) {
+ return kr_error(EINVAL);
+ }
+ /* Open cache */
+ if (!api) {
+ api = kr_cdb_lmdb();
+ }
+ cache->api = api;
+ int ret = cache->api->open(&cache->db, opts, mm);
+ if (ret != 0) {
+ return ret;
+ }
+ memset(&cache->stats, 0, sizeof(cache->stats));
+ cache->ttl_min = KR_CACHE_DEFAULT_TTL_MIN;
+ cache->ttl_max = KR_CACHE_DEFAULT_TTL_MAX;
+ /* Check cache ABI version */
+ kr_cache_make_checkpoint(cache);
+ (void)assert_right_version(cache);
+
+ char *fpath;
+ ret = asprintf(&fpath, "%s/data.mdb", opts->path);
+ if (ret > 0) {
+ kr_cache_emergency_file_to_remove = fpath;
+ } else {
+ assert(false); /* non-critical, but still */
+ }
+ return 0;
+}
+
+const char *kr_cache_emergency_file_to_remove = NULL;
+
+
+#define cache_isvalid(cache) ((cache) && (cache)->api && (cache)->db)
+
+void kr_cache_close(struct kr_cache *cache)
+{
+ if (cache_isvalid(cache)) {
+ cache_op(cache, close);
+ cache->db = NULL;
+ }
+ free(/*const-cast*/(char*)kr_cache_emergency_file_to_remove);
+ kr_cache_emergency_file_to_remove = NULL;
+}
+
+int kr_cache_sync(struct kr_cache *cache)
+{
+ if (!cache_isvalid(cache)) {
+ return kr_error(EINVAL);
+ }
+ if (cache->api->sync) {
+ return cache_op(cache, sync);
+ }
+ return kr_ok();
+}
+
+int kr_cache_insert_rr(struct kr_cache *cache, const knot_rrset_t *rr, const knot_rrset_t *rrsig, uint8_t rank, uint32_t timestamp)
+{
+ int err = stash_rrset_precond(rr, NULL);
+ if (err <= 0) {
+ return kr_ok();
+ }
+ ssize_t written = stash_rrset(cache, NULL, rr, rrsig, timestamp, rank, NULL, NULL);
+ /* Zone's NSEC* parames aren't updated, but that's probably OK
+ * for kr_cache_insert_rr() */
+ if (written >= 0) {
+ return kr_ok();
+ }
+
+ return (int) written;
+}
+
+int kr_cache_clear(struct kr_cache *cache)
+{
+ if (!cache_isvalid(cache)) {
+ return kr_error(EINVAL);
+ }
+ int ret = cache_clear(cache);
+ if (ret == 0) {
+ kr_cache_make_checkpoint(cache);
+ ret = assert_right_version(cache);
+ }
+ return ret;
+}
+
+/* When going stricter, BEWARE of breaking entry_h_consistent_NSEC() */
+struct entry_h * entry_h_consistent(knot_db_val_t data, uint16_t type)
+{
+ (void) type; /* unused, for now */
+ if (!data.data) return NULL;
+ /* Length checks. */
+ if (data.len < offsetof(struct entry_h, data))
+ return NULL;
+ const struct entry_h *eh = data.data;
+ if (eh->is_packet) {
+ uint16_t pkt_len;
+ if (data.len < offsetof(struct entry_h, data) + sizeof(pkt_len)) {
+ return NULL;
+ }
+ memcpy(&pkt_len, eh->data, sizeof(pkt_len));
+ if (data.len < offsetof(struct entry_h, data) + sizeof(pkt_len)
+ + pkt_len) {
+ return NULL;
+ }
+ }
+
+ bool ok = true;
+ ok = ok && kr_rank_check(eh->rank);
+ ok = ok && (!kr_rank_test(eh->rank, KR_RANK_BOGUS)
+ || eh->is_packet);
+ ok = ok && (eh->is_packet || !eh->has_optout);
+
+ return ok ? /*const-cast*/(struct entry_h *)eh : NULL;
+}
+
+int32_t get_new_ttl(const struct entry_h *entry, const struct kr_query *qry,
+ const knot_dname_t *owner, uint16_t type, uint32_t now)
+{
+ int32_t diff = now - entry->time;
+ if (diff < 0) {
+ /* We may have obtained the record *after* the request started. */
+ diff = 0;
+ }
+ int32_t res = entry->ttl - diff;
+ if (res < 0 && owner && qry && qry->stale_cb) {
+ /* Stale-serving decision, delegated to a callback. */
+ int res_stale = qry->stale_cb(res, owner, type, qry);
+ if (res_stale >= 0)
+ return res_stale;
+ }
+ return res;
+}
+
+int32_t kr_cache_ttl(const struct kr_cache_p *peek, const struct kr_query *qry,
+ const knot_dname_t *name, uint16_t type)
+{
+ const struct entry_h *eh = peek->raw_data;
+ return get_new_ttl(eh, qry, name, type, qry->timestamp.tv_sec);
+}
+
+/** Check that no label contains a zero character, incl. a log trace.
+ *
+ * We refuse to work with those, as LF and our cache keys might become ambiguous.
+ * Assuming uncompressed name, as usual.
+ * CACHE_KEY_DEF
+ */
+static bool check_dname_for_lf(const knot_dname_t *n, const struct kr_query *qry/*logging*/)
+{
+ const bool ret = knot_dname_size(n) == strlen((const char *)n) + 1;
+ if (!ret) { WITH_VERBOSE(qry) {
+ auto_free char *n_str = kr_dname_text(n);
+ VERBOSE_MSG(qry, "=> skipping zero-containing name %s\n", n_str);
+ } }
+ return ret;
+}
+
+/** Return false on types to be ignored. Meant both for sname and direct cache requests. */
+static bool check_rrtype(uint16_t type, const struct kr_query *qry/*logging*/)
+{
+ const bool ret = !knot_rrtype_is_metatype(type)
+ && type != KNOT_RRTYPE_RRSIG;
+ if (!ret) { WITH_VERBOSE(qry) {
+ auto_free char *type_str = kr_rrtype_text(type);
+ VERBOSE_MSG(qry, "=> skipping RR type %s\n", type_str);
+ } }
+ return ret;
+}
+
+/** Like key_exact_type() but omits a couple checks not holding for pkt cache. */
+knot_db_val_t key_exact_type_maypkt(struct key *k, uint16_t type)
+{
+ assert(check_rrtype(type, NULL));
+ switch (type) {
+ case KNOT_RRTYPE_RRSIG: /* no RRSIG query caching, at least for now */
+ assert(false);
+ return (knot_db_val_t){ NULL, 0 };
+ /* xNAME lumped into NS. */
+ case KNOT_RRTYPE_CNAME:
+ case KNOT_RRTYPE_DNAME:
+ type = KNOT_RRTYPE_NS;
+ default:
+ break;
+ }
+
+ int name_len = k->buf[0];
+ k->buf[name_len + 1] = 0; /* make sure different names can never match */
+ k->buf[name_len + 2] = 'E'; /* tag for exact name+type matches */
+ memcpy(k->buf + name_len + 3, &type, 2);
+ k->type = type;
+ /* CACHE_KEY_DEF: key == dname_lf + '\0' + 'E' + RRTYPE */
+ return (knot_db_val_t){ k->buf + 1, name_len + 4 };
+}
+
+
+/** The inside for cache_peek(); implementation separated to ./peek.c */
+int peek_nosync(kr_layer_t *ctx, knot_pkt_t *pkt);
+/** function for .produce phase */
+int cache_peek(kr_layer_t *ctx, knot_pkt_t *pkt)
+{
+ struct kr_request *req = ctx->req;
+ struct kr_query *qry = req->current_query;
+ /* We first check various exit-conditions and then call the _real function. */
+
+ if (!kr_cache_is_open(&req->ctx->cache)
+ || ctx->state & (KR_STATE_FAIL|KR_STATE_DONE) || qry->flags.NO_CACHE
+ || (qry->flags.CACHE_TRIED && !qry->stale_cb)
+ || !check_rrtype(qry->stype, qry) /* LATER: some other behavior for some of these? */
+ || qry->sclass != KNOT_CLASS_IN) {
+ return ctx->state; /* Already resolved/failed or already tried, etc. */
+ }
+ /* ATM cache only peeks for qry->sname and that would be useless
+ * to repeat on every iteration, so disable it from now on.
+ * LATER(optim.): assist with more precise QNAME minimization. */
+ qry->flags.CACHE_TRIED = true;
+
+ if (qry->stype == KNOT_RRTYPE_NSEC) {
+ VERBOSE_MSG(qry, "=> skipping stype NSEC\n");
+ return ctx->state;
+ }
+ if (!check_dname_for_lf(qry->sname, qry)) {
+ return ctx->state;
+ }
+
+ int ret = peek_nosync(ctx, pkt);
+ kr_cache_sync(&req->ctx->cache);
+ return ret;
+}
+
+
+
+/** It's simply inside of cycle taken out to decrease indentation. \return error code. */
+static int stash_rrarray_entry(ranked_rr_array_t *arr, int arr_i,
+ const struct kr_query *qry, struct kr_cache *cache,
+ int *unauth_cnt, trie_t *nsec_pmap, bool *has_optout);
+/** Stash a single nsec_p. \return 0 (errors are ignored). */
+static int stash_nsec_p(const knot_dname_t *dname, const char *nsec_p_v,
+ struct kr_request *req);
+
+/** The whole .consume phase for the cache module. */
+int cache_stash(kr_layer_t *ctx, knot_pkt_t *pkt)
+{
+ struct kr_request *req = ctx->req;
+ struct kr_query *qry = req->current_query;
+ struct kr_cache *cache = &req->ctx->cache;
+
+ /* Note: we cache even in KR_STATE_FAIL. For example,
+ * BOGUS answer can go to +cd cache even without +cd request. */
+ if (!kr_cache_is_open(cache) || !qry
+ || qry->flags.CACHED || !check_rrtype(knot_pkt_qtype(pkt), qry)
+ || qry->sclass != KNOT_CLASS_IN) {
+ return ctx->state;
+ }
+ /* Do not cache truncated answers, at least for now. LATER */
+ if (knot_wire_get_tc(pkt->wire)) {
+ return ctx->state;
+ }
+ /* Stash individual records. */
+ ranked_rr_array_t *selected[] = kr_request_selected(req);
+ int unauth_cnt = 0;
+ trie_t *nsec_pmap = trie_create(&req->pool);
+ if (!nsec_pmap) {
+ assert(!ENOMEM);
+ goto finally;
+ }
+ bool has_optout = false;
+ /* ^^ DNSSEC_OPTOUT is not fired in cases like `com. A`,
+ * but currently we don't stash separate NSEC3 proving that. */
+ for (int psec = KNOT_ANSWER; psec <= KNOT_ADDITIONAL; ++psec) {
+ ranked_rr_array_t *arr = selected[psec];
+ /* uncached entries are located at the end */
+ for (ssize_t i = arr->len - 1; i >= 0; --i) {
+ ranked_rr_array_entry_t *entry = arr->at[i];
+ if (entry->qry_uid != qry->uid) {
+ continue;
+ /* TODO: probably safe to break but maybe not worth it */
+ }
+ int ret = stash_rrarray_entry(arr, i, qry, cache, &unauth_cnt,
+ nsec_pmap, &has_optout);
+ if (ret) {
+ VERBOSE_MSG(qry, "=> stashing RRs errored out\n");
+ goto finally;
+ }
+ /* LATER(optim.): maybe filter out some type-rank combinations
+ * that won't be useful as separate RRsets. */
+ }
+ }
+
+ trie_it_t *it;
+ for (it = trie_it_begin(nsec_pmap); !trie_it_finished(it); trie_it_next(it)) {
+ stash_nsec_p((const knot_dname_t *)trie_it_key(it, NULL),
+ (const char *)*trie_it_val(it), req);
+ }
+ trie_it_free(it);
+ /* LATER(optim.): typically we also have corresponding NS record in the list,
+ * so we might save a cache operation. */
+
+ stash_pkt(pkt, qry, req, has_optout);
+
+finally:
+ if (unauth_cnt) {
+ VERBOSE_MSG(qry, "=> stashed also %d nonauth RRsets\n", unauth_cnt);
+ };
+ kr_cache_sync(cache);
+ return ctx->state; /* we ignore cache-stashing errors */
+}
+
+/** Preliminary checks before stash_rrset(). Don't call if returns <= 0. */
+static int stash_rrset_precond(const knot_rrset_t *rr, const struct kr_query *qry/*logs*/)
+{
+ if (!rr || rr->rclass != KNOT_CLASS_IN) {
+ assert(!EINVAL);
+ return kr_error(EINVAL);
+ }
+ if (!check_rrtype(rr->type, qry)) {
+ return kr_ok();
+ }
+ if (!check_dname_for_lf(rr->owner, qry)) {
+ return kr_ok();
+ }
+ return 1/*proceed*/;
+}
+
+static ssize_t stash_rrset(struct kr_cache *cache, const struct kr_query *qry,
+ const knot_rrset_t *rr, const knot_rrset_t *rr_sigs, uint32_t timestamp,
+ uint8_t rank, trie_t *nsec_pmap, bool *has_optout)
+{
+ assert(stash_rrset_precond(rr, qry) > 0);
+ if (!cache) {
+ assert(!EINVAL);
+ return kr_error(EINVAL);
+ }
+
+ const int wild_labels = rr_sigs == NULL ? 0 :
+ knot_dname_labels(rr->owner, NULL) - knot_rrsig_labels(rr_sigs->rrs.rdata);
+ if (wild_labels < 0) {
+ return kr_ok();
+ }
+ const knot_dname_t *encloser = rr->owner; /**< the closest encloser name */
+ for (int i = 0; i < wild_labels; ++i) {
+ encloser = knot_wire_next_label(encloser, NULL);
+ }
+ int ret = 0;
+
+ /* Construct the key under which RRs will be stored,
+ * and add corresponding nsec_pmap item (if necessary). */
+ struct key k_storage, *k = &k_storage;
+ knot_db_val_t key;
+ switch (rr->type) {
+ case KNOT_RRTYPE_NSEC3:
+ /* Skip "suspicious" or opt-out NSEC3 sets. */
+ if (rr->rrs.count != 1) return kr_ok();
+ if (KNOT_NSEC3_FLAG_OPT_OUT & knot_nsec3_flags(rr->rrs.rdata)) {
+ if (has_optout) *has_optout = true;
+ return kr_ok();
+ }
+ /* fall through */
+ case KNOT_RRTYPE_NSEC:
+ if (!kr_rank_test(rank, KR_RANK_SECURE)) {
+ /* Skip any NSEC*s that aren't validated. */
+ return kr_ok();
+ }
+ if (!rr_sigs || !rr_sigs->rrs.count || !rr_sigs->rrs.rdata) {
+ assert(!EINVAL);
+ return kr_error(EINVAL);
+ }
+ const knot_dname_t *signer = knot_rrsig_signer_name(rr_sigs->rrs.rdata);
+ const int signer_size = knot_dname_size(signer);
+ k->zlf_len = signer_size - 1;
+
+ void **npp = nsec_pmap == NULL ? NULL
+ : trie_get_ins(nsec_pmap, (const char *)signer, signer_size);
+ assert(!nsec_pmap || (npp && ENOMEM));
+ if (rr->type == KNOT_RRTYPE_NSEC) {
+ key = key_NSEC1(k, encloser, wild_labels);
+ break;
+ }
+
+ assert(rr->type == KNOT_RRTYPE_NSEC3);
+ const knot_rdata_t * const rdata = rr->rrs.rdata;
+ if (rdata->len <= 4) return kr_error(EILSEQ); /*< data from outside; less trust */
+ const int np_dlen = nsec_p_rdlen(rdata->data);
+ if (np_dlen > rdata->len) return kr_error(EILSEQ);
+ key = key_NSEC3(k, encloser, nsec_p_mkHash(rdata->data));
+ if (npp && !*npp) {
+ *npp = mm_alloc(&qry->request->pool, np_dlen);
+ if (!*npp) {
+ assert(!ENOMEM);
+ break;
+ }
+ memcpy(*npp, rdata->data, np_dlen);
+ }
+ break;
+ default:
+ ret = kr_dname_lf(k->buf, encloser, wild_labels);
+ if (ret) {
+ assert(!ret);
+ return kr_error(ret);
+ }
+ key = key_exact_type(k, rr->type);
+ }
+
+ /* Compute materialized sizes of the new data. */
+ const knot_rdataset_t *rds_sigs = rr_sigs ? &rr_sigs->rrs : NULL;
+ const int rr_ssize = rdataset_dematerialize_size(&rr->rrs);
+ assert(rr_ssize == to_even(rr_ssize));
+ knot_db_val_t val_new_entry = {
+ .data = NULL,
+ .len = offsetof(struct entry_h, data) + rr_ssize
+ + rdataset_dematerialize_size(rds_sigs),
+ };
+
+ /* Prepare raw memory for the new entry. */
+ ret = entry_h_splice(&val_new_entry, rank, key, k->type, rr->type,
+ rr->owner, qry, cache, timestamp);
+ if (ret) return kr_ok(); /* some aren't really errors */
+ assert(val_new_entry.data);
+
+ const uint32_t ttl = rr->ttl;
+ /* FIXME: consider TTLs and expirations of RRSIGs as well, just in case. */
+
+ /* Write the entry itself. */
+ struct entry_h *eh = val_new_entry.data;
+ memset(eh, 0, offsetof(struct entry_h, data));
+ eh->time = timestamp;
+ eh->ttl = MAX(MIN(ttl, cache->ttl_max), cache->ttl_min);
+ eh->rank = rank;
+ if (rdataset_dematerialize(&rr->rrs, eh->data)
+ || rdataset_dematerialize(rds_sigs, eh->data + rr_ssize)) {
+ /* minimize the damage from incomplete write; TODO: better */
+ eh->time = 0;
+ eh->ttl = 0;
+ eh->rank = 0;
+ assert(false);
+ }
+ assert(entry_h_consistent(val_new_entry, rr->type));
+
+ #if 0 /* Occasionally useful when debugging some kinds of changes. */
+ {
+ kr_cache_sync(cache);
+ knot_db_val_t val = { NULL, 0 };
+ ret = cache_op(cache, read, &key, &val, 1);
+ if (ret != kr_error(ENOENT)) { // ENOENT might happen in some edge case, I guess
+ assert(!ret);
+ entry_list_t el;
+ entry_list_parse(val, el);
+ }
+ }
+ #endif
+
+ /* Update metrics */
+ cache->stats.insert += 1;
+
+ /* Verbose-log some not-too-common cases. */
+ WITH_VERBOSE(qry) { if (kr_rank_test(rank, KR_RANK_AUTH)
+ || rr->type == KNOT_RRTYPE_NS) {
+ auto_free char *type_str = kr_rrtype_text(rr->type),
+ *encl_str = kr_dname_text(encloser);
+ VERBOSE_MSG(qry, "=> stashed %s%s %s, rank 0%.2o, "
+ "%d B total, incl. %d RRSIGs\n",
+ (wild_labels ? "*." : ""), encl_str, type_str, rank,
+ (int)val_new_entry.len, (rr_sigs ? rr_sigs->rrs.count : 0)
+ );
+ } }
+
+ return (ssize_t) val_new_entry.len;
+}
+
+static int stash_rrarray_entry(ranked_rr_array_t *arr, int arr_i,
+ const struct kr_query *qry, struct kr_cache *cache,
+ int *unauth_cnt, trie_t *nsec_pmap, bool *has_optout)
+{
+ ranked_rr_array_entry_t *entry = arr->at[arr_i];
+ if (entry->cached) {
+ return kr_ok();
+ }
+ const knot_rrset_t *rr = entry->rr;
+ if (rr->type == KNOT_RRTYPE_RRSIG) {
+ return kr_ok(); /* reduce verbose logging from the following call */
+ }
+ int ret = stash_rrset_precond(rr, qry);
+ if (ret <= 0) {
+ return ret;
+ }
+
+ /* Try to find corresponding signatures, always. LATER(optim.): speed. */
+ ranked_rr_array_entry_t *entry_rrsigs = NULL;
+ const knot_rrset_t *rr_sigs = NULL;
+ for (ssize_t j = arr->len - 1; j >= 0; --j) {
+ /* TODO: ATM we assume that some properties are the same
+ * for all RRSIGs in the set (esp. label count). */
+ ranked_rr_array_entry_t *e = arr->at[j];
+ bool ok = e->qry_uid == qry->uid && !e->cached
+ && e->rr->type == KNOT_RRTYPE_RRSIG
+ && knot_rrsig_type_covered(e->rr->rrs.rdata) == rr->type
+ && knot_dname_is_equal(rr->owner, e->rr->owner);
+ if (!ok) continue;
+ entry_rrsigs = e;
+ rr_sigs = e->rr;
+ break;
+ }
+
+ ssize_t written = stash_rrset(cache, qry, rr, rr_sigs, qry->timestamp.tv_sec,
+ entry->rank, nsec_pmap, has_optout);
+ if (written < 0) {
+ kr_log_error("[%05u.%02u][cach] stash failed, ret = %d\n", qry->request->uid,
+ qry->uid, ret);
+ return (int) written;
+ }
+
+ if (written > 0) {
+ /* Mark entry as cached for the rest of the query processing */
+ entry->cached = true;
+ if (entry_rrsigs) {
+ entry_rrsigs->cached = true;
+ }
+ if (!kr_rank_test(entry->rank, KR_RANK_AUTH) && rr->type != KNOT_RRTYPE_NS) {
+ *unauth_cnt += 1;
+ }
+ }
+
+ return kr_ok();
+}
+
+static int stash_nsec_p(const knot_dname_t *dname, const char *nsec_p_v,
+ struct kr_request *req)
+{
+ const struct kr_query *qry = req->current_query;
+ struct kr_cache *cache = &req->ctx->cache;
+ uint32_t valid_until = qry->timestamp.tv_sec + cache->ttl_max;
+ /* LATER(optim.): be more precise here ^^ and reduce calls. */
+ static const int32_t ttl_margin = 3600;
+ const uint8_t *nsec_p = (const uint8_t *)nsec_p_v;
+ int data_stride = sizeof(valid_until) + nsec_p_rdlen(nsec_p);
+
+ unsigned int log_hash = 0xFeeeFeee; /* this type is simpler for printf args */
+ auto_free char *log_dname = NULL;
+ WITH_VERBOSE(qry) {
+ log_hash = nsec_p_v ? nsec_p_mkHash((const uint8_t *)nsec_p_v) : 0;
+ log_dname = kr_dname_text(dname);
+ }
+ /* Find what's in the cache. */
+ struct key k_storage, *k = &k_storage;
+ int ret = kr_dname_lf(k->buf, dname, false);
+ if (ret) return kr_error(ret);
+ knot_db_val_t key = key_exact_type(k, KNOT_RRTYPE_NS);
+ knot_db_val_t val_orig = { NULL, 0 };
+ ret = cache_op(cache, read, &key, &val_orig, 1);
+ if (ret && ret != -ABS(ENOENT)) {
+ VERBOSE_MSG(qry, "=> EL read failed (ret: %d)\n", ret);
+ return kr_ok();
+ }
+ /* Prepare new entry_list_t so we can just write at el[0]. */
+ entry_list_t el;
+ int log_refresh_by = 0;
+ if (ret == -ABS(ENOENT)) {
+ memset(el, 0, sizeof(el));
+ } else {
+ ret = entry_list_parse(val_orig, el);
+ if (ret) {
+ VERBOSE_MSG(qry, "=> EL parse failed (ret: %d)\n", ret);
+ return kr_error(0);
+ }
+ /* Find the index to replace. */
+ int i_replace = ENTRY_APEX_NSECS_CNT - 1;
+ for (int i = 0; i < ENTRY_APEX_NSECS_CNT; ++i) {
+ if (el[i].len != data_stride) continue;
+ if (nsec_p && memcmp(nsec_p, (uint8_t *)el[i].data + sizeof(uint32_t),
+ data_stride - sizeof(uint32_t)) != 0) {
+ continue;
+ }
+ /* Save a cache operation if TTL extended only a little. */
+ uint32_t valid_orig;
+ memcpy(&valid_orig, el[i].data, sizeof(valid_orig));
+ const int32_t ttl_extended_by = valid_until - valid_orig;
+ if (ttl_extended_by < ttl_margin) {
+ VERBOSE_MSG(qry,
+ "=> nsec_p stash for %s skipped (extra TTL: %d, hash: %x)\n",
+ log_dname, ttl_extended_by, log_hash);
+ return kr_ok();
+ }
+ i_replace = i;
+ log_refresh_by = ttl_extended_by;
+ break;
+ }
+ /* Shift the other indices: move the first `i_replace` blocks
+ * by one position. */
+ if (i_replace) {
+ memmove(&el[1], &el[0], sizeof(el[0]) * i_replace);
+ }
+ }
+ /* Prepare old data into a buffer. See entry_h_splice() for why. LATER(optim.) */
+ el[0].len = data_stride;
+ el[0].data = NULL;
+ knot_db_val_t val;
+ val.len = entry_list_serial_size(el),
+ val.data = mm_alloc(&req->pool, val.len),
+ entry_list_memcpy(val.data, el);
+ /* Prepare the new data chunk */
+ memcpy(el[0].data, &valid_until, sizeof(valid_until));
+ if (nsec_p) {
+ memcpy((uint8_t *)el[0].data + sizeof(valid_until), nsec_p,
+ data_stride - sizeof(valid_until));
+ }
+ /* Write it all to the cache */
+ ret = cache_op(cache, write, &key, &val, 1);
+ if (ret || !val.data) {
+ VERBOSE_MSG(qry, "=> EL write failed (ret: %d)\n", ret);
+ return kr_ok();
+ }
+ if (log_refresh_by) {
+ VERBOSE_MSG(qry, "=> nsec_p stashed for %s (refresh by %d, hash: %x)\n",
+ log_dname, log_refresh_by, log_hash);
+ } else {
+ VERBOSE_MSG(qry, "=> nsec_p stashed for %s (new, hash: %x)\n",
+ log_dname, log_hash);
+ }
+ return kr_ok();
+}
+
+
+static int peek_exact_real(struct kr_cache *cache, const knot_dname_t *name, uint16_t type,
+ struct kr_cache_p *peek)
+{
+ if (!check_rrtype(type, NULL) || !check_dname_for_lf(name, NULL)) {
+ return kr_error(ENOTSUP);
+ }
+ struct key k_storage, *k = &k_storage;
+
+ int ret = kr_dname_lf(k->buf, name, false);
+ if (ret) return kr_error(ret);
+
+ knot_db_val_t key = key_exact_type(k, type);
+ knot_db_val_t val = { NULL, 0 };
+ ret = cache_op(cache, read, &key, &val, 1);
+ if (!ret) ret = entry_h_seek(&val, type);
+ if (ret) return kr_error(ret);
+
+ const struct entry_h *eh = entry_h_consistent(val, type);
+ if (!eh || eh->is_packet) {
+ // TODO: no packets, but better get rid of whole kr_cache_peek_exact().
+ return kr_error(ENOENT);
+ }
+ *peek = (struct kr_cache_p){
+ .time = eh->time,
+ .ttl = eh->ttl,
+ .rank = eh->rank,
+ .raw_data = val.data,
+ .raw_bound = knot_db_val_bound(val),
+ };
+ return kr_ok();
+}
+int kr_cache_peek_exact(struct kr_cache *cache, const knot_dname_t *name, uint16_t type,
+ struct kr_cache_p *peek)
+{ /* Just wrap with extra verbose logging. */
+ const int ret = peek_exact_real(cache, name, type, peek);
+ if (false && VERBOSE_STATUS) { /* too noisy for usual --verbose */
+ auto_free char *type_str = kr_rrtype_text(type),
+ *name_str = kr_dname_text(name);
+ const char *result_str = (ret == kr_ok() ? "hit" :
+ (ret == kr_error(ENOENT) ? "miss" : "error"));
+ VERBOSE_MSG(NULL, "_peek_exact: %s %s %s (ret: %d)",
+ type_str, name_str, result_str, ret);
+ }
+ return ret;
+}
+
+int kr_cache_remove(struct kr_cache *cache, const knot_dname_t *name, uint16_t type)
+{
+ if (!cache_isvalid(cache)) {
+ return kr_error(EINVAL);
+ }
+ if (!cache->api->remove) {
+ return kr_error(ENOSYS);
+ }
+ struct key k_storage, *k = &k_storage;
+ int ret = kr_dname_lf(k->buf, name, false);
+ if (ret) return kr_error(ret);
+
+ knot_db_val_t key = key_exact_type(k, type);
+ return cache_op(cache, remove, &key, 1);
+}
+
+int kr_cache_match(struct kr_cache *cache, const knot_dname_t *name,
+ bool exact_name, knot_db_val_t keyval[][2], int maxcount)
+{
+ if (!cache_isvalid(cache)) {
+ return kr_error(EINVAL);
+ }
+ if (!cache->api->match) {
+ return kr_error(ENOSYS);
+ }
+
+ struct key k_storage, *k = &k_storage;
+
+ int ret = kr_dname_lf(k->buf, name, false);
+ if (ret) return kr_error(ret);
+
+ // use a mock type
+ knot_db_val_t key = key_exact_type(k, KNOT_RRTYPE_A);
+ /* CACHE_KEY_DEF */
+ key.len -= sizeof(uint16_t); /* the type */
+ if (!exact_name) {
+ key.len -= 2; /* '\0' 'E' */
+ if (name[0] == '\0') ++key.len; /* the root name is special ATM */
+ }
+ return cache_op(cache, match, &key, keyval, maxcount);
+}
+
+int kr_unpack_cache_key(knot_db_val_t key, knot_dname_t *buf, uint16_t *type)
+{
+ if (key.data == NULL || buf == NULL || type == NULL) {
+ return kr_error(EINVAL);
+ }
+
+ int len = -1;
+ const char *tag, *key_data = key.data;
+ for (tag = key_data + 1; tag < key_data + key.len; ++tag) {
+ /* CACHE_KEY_DEF */
+ if (tag[-1] == '\0' && (tag == key_data + 1 || tag[-2] == '\0')) {
+ if (tag[0] != 'E') return kr_error(EINVAL);
+ len = tag - 1 - key_data;
+ break;
+ }
+ }
+
+ if (len == -1 || len > KNOT_DNAME_MAXLEN) {
+ return kr_error(EINVAL);
+ }
+
+ int ret = knot_dname_lf2wire(buf, len, key.data);
+ if (ret < 0) {
+ return kr_error(ret);
+ }
+
+ /* CACHE_KEY_DEF: jump over "\0 E/1" */
+ memcpy(type, tag + 1, sizeof(uint16_t));
+
+ return kr_ok();
+}
+
+
+int kr_cache_remove_subtree(struct kr_cache *cache, const knot_dname_t *name,
+ bool exact_name, int maxcount)
+{
+ if (!cache_isvalid(cache)) {
+ return kr_error(EINVAL);
+ }
+
+ knot_db_val_t keyval[maxcount][2], keys[maxcount];
+ int ret = kr_cache_match(cache, name, exact_name, keyval, maxcount);
+ if (ret <= 0) { /* ENOENT -> nothing to remove */
+ return (ret == KNOT_ENOENT) ? 0 : ret;
+ }
+ const int count = ret;
+ /* Duplicate the key strings, as deletion may invalidate the pointers. */
+ int i;
+ for (i = 0; i < count; ++i) {
+ keys[i].len = keyval[i][0].len;
+ keys[i].data = malloc(keys[i].len);
+ if (!keys[i].data) {
+ ret = kr_error(ENOMEM);
+ goto cleanup;
+ }
+ memcpy(keys[i].data, keyval[i][0].data, keys[i].len);
+ }
+ ret = cache->api->remove(cache->db, keys, count);
+cleanup:
+ kr_cache_sync(cache); /* Sync even after just kr_cache_match(). */
+ /* Free keys */
+ while (--i >= 0) {
+ free(keys[i].data);
+ }
+ return ret;
+}
+
diff --git a/lib/cache/api.h b/lib/cache/api.h
new file mode 100644
index 0000000..61bf796
--- /dev/null
+++ b/lib/cache/api.h
@@ -0,0 +1,201 @@
+/* Copyright (C) 2014-2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <libknot/consts.h>
+#include <libknot/rrset.h>
+#include <sys/time.h>
+#include "lib/cache/cdb_api.h"
+#include "lib/defines.h"
+#include "contrib/ucw/config.h" /*uint*/
+
+/** When knot_pkt is passed from cache without ->wire, this is the ->size. */
+static const size_t PKT_SIZE_NOWIRE = -1;
+
+
+#include "lib/module.h"
+/* Prototypes for the 'cache' module implementation. */
+int cache_peek(kr_layer_t *ctx, knot_pkt_t *pkt);
+int cache_stash(kr_layer_t *ctx, knot_pkt_t *pkt);
+
+
+/**
+ * Cache structure, keeps API, instance and metadata.
+ */
+struct kr_cache
+{
+ knot_db_t *db; /**< Storage instance */
+ const struct kr_cdb_api *api; /**< Storage engine */
+ struct {
+ uint32_t hit; /**< Number of cache hits */
+ uint32_t miss; /**< Number of cache misses */
+ uint32_t insert; /**< Number of insertions */
+ uint32_t delete; /**< Number of deletions */
+ } stats;
+
+ uint32_t ttl_min, ttl_max; /**< TTL limits */
+
+ /* A pair of stamps for detection of real-time shifts during runtime. */
+ struct timeval checkpoint_walltime; /**< Wall time on the last check-point. */
+ uint64_t checkpoint_monotime; /**< Monotonic milliseconds on the last check-point. */
+};
+
+/**
+ * Open/create cache with provided storage options.
+ * @param cache cache structure to be initialized
+ * @param api storage engine API
+ * @param opts storage-specific options (may be NULL for default)
+ * @param mm memory context.
+ * @return 0 or an error code
+ */
+KR_EXPORT
+int kr_cache_open(struct kr_cache *cache, const struct kr_cdb_api *api, struct kr_cdb_opts *opts, knot_mm_t *mm);
+
+/**
+ * Path to cache file to remove on critical out-of-space error. (do NOT modify it)
+ */
+KR_EXPORT extern
+const char *kr_cache_emergency_file_to_remove;
+
+/**
+ * Close persistent cache.
+ * @note This doesn't clear the data, just closes the connection to the database.
+ * @param cache structure
+ */
+KR_EXPORT
+void kr_cache_close(struct kr_cache *cache);
+
+/** Run after a row of operations to release transaction/lock if needed. */
+KR_EXPORT
+int kr_cache_sync(struct kr_cache *cache);
+
+/**
+ * Return true if cache is open and enabled.
+ */
+static inline bool kr_cache_is_open(struct kr_cache *cache)
+{
+ return cache->db != NULL;
+}
+
+/** (Re)set the time pair to the current values. */
+static inline void kr_cache_make_checkpoint(struct kr_cache *cache)
+{
+ cache->checkpoint_monotime = kr_now();
+ gettimeofday(&cache->checkpoint_walltime, NULL);
+}
+
+/**
+ * Insert RRSet into cache, replacing any existing data.
+ * @param cache cache structure
+ * @param rr inserted RRSet
+ * @param rrsig RRSIG for inserted RRSet (optional)
+ * @param rank rank of the data
+ * @param timestamp current time
+ * @return 0 or an errcode
+ */
+KR_EXPORT
+int kr_cache_insert_rr(struct kr_cache *cache, const knot_rrset_t *rr, const knot_rrset_t *rrsig, uint8_t rank, uint32_t timestamp);
+
+/**
+ * Clear all items from the cache.
+ * @param cache cache structure
+ * @return 0 or an errcode
+ */
+KR_EXPORT
+int kr_cache_clear(struct kr_cache *cache);
+
+
+/* ** This interface is temporary. ** */
+
+struct kr_cache_p {
+ uint32_t time; /**< The time of inception. */
+ uint32_t ttl; /**< TTL at inception moment. Assuming it fits into int32_t ATM. */
+ uint8_t rank; /**< See enum kr_rank */
+ struct {
+ /* internal: pointer to eh struct */
+ void *raw_data, *raw_bound;
+ };
+};
+KR_EXPORT
+int kr_cache_peek_exact(struct kr_cache *cache, const knot_dname_t *name, uint16_t type,
+ struct kr_cache_p *peek);
+/* Parameters (qry, name, type) are used for timestamp and stale-serving decisions. */
+KR_EXPORT
+int32_t kr_cache_ttl(const struct kr_cache_p *peek, const struct kr_query *qry,
+ const knot_dname_t *name, uint16_t type);
+
+KR_EXPORT
+int kr_cache_materialize(knot_rdataset_t *dst, const struct kr_cache_p *ref,
+ knot_mm_t *pool);
+
+
+/**
+ * Remove an entry from cache.
+ * @param cache cache structure
+ * @param name dname
+ * @param type rr type
+ * @return number of deleted records, or negative error code
+ * @note only "exact hits" are considered ATM, and
+ * some other information may be removed alongside.
+ */
+KR_EXPORT
+int kr_cache_remove(struct kr_cache *cache, const knot_dname_t *name, uint16_t type);
+
+/**
+ * Get keys matching a dname lf prefix
+ * @param cache cache structure
+ * @param name dname
+ * @param exact_name whether to only consider exact name matches
+ * @param keyval matched key-value pairs
+ * @param maxcount limit on the number of returned key-value pairs
+ * @return result count or an errcode
+ * @note the cache keys are matched by prefix, i.e. it very much depends
+ * on their structure; CACHE_KEY_DEF.
+ */
+KR_EXPORT
+int kr_cache_match(struct kr_cache *cache, const knot_dname_t *name,
+ bool exact_name, knot_db_val_t keyval[][2], int maxcount);
+
+/**
+ * Remove a subtree in cache. It's like _match but removing them instead of returning.
+ * @return number of deleted entries or an errcode
+ */
+KR_EXPORT
+int kr_cache_remove_subtree(struct kr_cache *cache, const knot_dname_t *name,
+ bool exact_name, int maxcount);
+
+/**
+ * Find the closest cached zone apex for a name (in cache).
+ * @param is_DS start searching one name higher
+ * @return the number of labels to remove from the name, or negative error code
+ * @note timestamp is found by a syscall, and stale-serving is not considered
+ */
+KR_EXPORT
+int kr_cache_closest_apex(struct kr_cache *cache, const knot_dname_t *name, bool is_DS,
+ knot_dname_t **apex);
+
+/**
+ * Unpack dname and type from db key
+ * @param key db key representation
+ * @param buf output buffer of domain name in dname format
+ * @param type output for type
+ * @return length of dname or an errcode
+ * @note only "exact hits" are considered ATM, moreover xNAME records
+ * are "hidden" as NS. (see comments in struct entry_h)
+ */
+KR_EXPORT
+int kr_unpack_cache_key(knot_db_val_t key, knot_dname_t *buf, uint16_t *type);
diff --git a/lib/cache/cdb_api.h b/lib/cache/cdb_api.h
new file mode 100644
index 0000000..814a5f5
--- /dev/null
+++ b/lib/cache/cdb_api.h
@@ -0,0 +1,68 @@
+/* Copyright (C) 2016-2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+*/
+
+#pragma once
+
+#include <libknot/db/db.h>
+
+/* Cache options. */
+struct kr_cdb_opts {
+ const char *path; /*!< Cache URI path. */
+ size_t maxsize; /*!< Suggested cache size in bytes. */
+};
+
+/*! Cache database API.
+ * This is a simplified version of generic DB API from libknot,
+ * that is tailored to caching purposes.
+ */
+struct kr_cdb_api {
+ const char *name;
+
+ /* Context operations */
+
+ int (*open)(knot_db_t **db, struct kr_cdb_opts *opts, knot_mm_t *mm);
+ void (*close)(knot_db_t *db);
+ int (*count)(knot_db_t *db);
+ int (*clear)(knot_db_t *db);
+
+ /** Run after a row of operations to release transaction/lock if needed. */
+ int (*sync)(knot_db_t *db);
+
+ /* Data access */
+
+ int (*read)(knot_db_t *db, const knot_db_val_t *key, knot_db_val_t *val,
+ int maxcount);
+ int (*write)(knot_db_t *db, const knot_db_val_t *key, knot_db_val_t *val,
+ int maxcount);
+
+ /** Remove maxcount keys.
+ * \returns the number of succesfully removed keys or the first error code
+ * It returns on first error, but ENOENT is not considered an error. */
+ int (*remove)(knot_db_t *db, knot_db_val_t keys[], int maxcount);
+
+ /* Specialised operations */
+
+ /** Find key-value pairs that are prefixed by the given key, limited by maxcount.
+ * \return the number of pairs or negative error. */
+ int (*match)(knot_db_t *db, knot_db_val_t *key, knot_db_val_t keyval[][2], int maxcount);
+ /** Not implemented ATM. */
+ int (*prune)(knot_db_t *db, int maxcount);
+
+ /** Less-or-equal search (lexicographic ordering).
+ * On successful return, key->data and val->data point to DB-owned data.
+ * return: 0 for equality, > 0 for less, < 0 kr_error */
+ int (*read_leq)(knot_db_t *db, knot_db_val_t *key, knot_db_val_t *val);
+};
diff --git a/lib/cache/cdb_lmdb.c b/lib/cache/cdb_lmdb.c
new file mode 100644
index 0000000..621d2e8
--- /dev/null
+++ b/lib/cache/cdb_lmdb.c
@@ -0,0 +1,710 @@
+/* Copyright (C) 2016-2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+*/
+
+#include <assert.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <lmdb.h>
+
+#include "contrib/cleanup.h"
+#include "lib/cache/cdb_lmdb.h"
+#include "lib/cache/api.h"
+#include "lib/utils.h"
+
+
+/* Defines */
+#define LMDB_DIR_MODE 0770
+#define LMDB_FILE_MODE 0660
+
+struct lmdb_env
+{
+ size_t mapsize;
+ MDB_dbi dbi;
+ MDB_env *env;
+
+ /** Cached transactions
+ *
+ * - only one of (ro,rw) may be active at once
+ * - non-NULL .ro may be active or reset
+ * - non-NULL .rw is always active
+ */
+ struct {
+ bool ro_active, ro_curs_active;
+ MDB_txn *ro, *rw;
+ MDB_cursor *ro_curs;
+ } txn;
+};
+
+/** @brief Convert LMDB error code. */
+static int lmdb_error(int error)
+{
+ /* _BAD_TXN may happen with overfull DB,
+ * even during mdb_get with a single fork :-/ */
+ if (error == MDB_BAD_TXN) {
+ kr_log_info("[cache] MDB_BAD_TXN, probably overfull\n");
+ error = ENOSPC;
+ }
+ switch (error) {
+ case MDB_SUCCESS:
+ return kr_ok();
+ case MDB_NOTFOUND:
+ return kr_error(ENOENT);
+ case ENOSPC:
+ case MDB_MAP_FULL:
+ case MDB_TXN_FULL:
+ return kr_error(ENOSPC);
+ default:
+ kr_log_error("[cache] LMDB error: %s\n", mdb_strerror(error));
+ return kr_error(error);
+ }
+}
+
+/** Conversion between knot and lmdb structs for values. */
+static inline knot_db_val_t val_mdb2knot(MDB_val v)
+{
+ return (knot_db_val_t){ .len = v.mv_size, .data = v.mv_data };
+}
+static inline MDB_val val_knot2mdb(knot_db_val_t v)
+{
+ return (MDB_val){ .mv_size = v.len, .mv_data = v.data };
+}
+
+
+/*! \brief Set the environment map size.
+ * \note This also sets the maximum database size, see \fn mdb_env_set_mapsize
+ */
+static int set_mapsize(MDB_env *env, size_t map_size)
+{
+ long page_size = sysconf(_SC_PAGESIZE);
+ if (page_size <= 0) {
+ return KNOT_ERROR;
+ }
+
+ /* Round to page size. */
+ map_size = (map_size / page_size) * page_size;
+ int ret = mdb_env_set_mapsize(env, map_size);
+ if (ret != MDB_SUCCESS) {
+ return lmdb_error(ret);
+ }
+
+ return 0;
+}
+
+#define FLAG_RENEW (2*MDB_RDONLY)
+/** mdb_txn_begin or _renew + handle MDB_MAP_RESIZED.
+ *
+ * The retrying logic for MDB_MAP_RESIZED is so ugly that it has its own function.
+ * \note this assumes no transactions are active
+ * \return MDB_ errcode, not usual kr_error(...)
+ */
+static int txn_get_noresize(struct lmdb_env *env, unsigned int flag, MDB_txn **txn)
+{
+ assert(!env->txn.rw && (!env->txn.ro || !env->txn.ro_active));
+ int ret;
+ if (flag == FLAG_RENEW) {
+ ret = mdb_txn_renew(*txn);
+ } else {
+ ret = mdb_txn_begin(env->env, NULL, flag, txn);
+ }
+ if (ret != MDB_MAP_RESIZED) {
+ return ret;
+ }
+ //:unlikely
+ /* Another process increased the size; let's try to recover. */
+ kr_log_info("[cache] detected size increased by another process\n");
+ ret = mdb_env_set_mapsize(env->env, 0);
+ if (ret != MDB_SUCCESS) {
+ return ret;
+ }
+ if (flag == FLAG_RENEW) {
+ ret = mdb_txn_renew(*txn);
+ } else {
+ ret = mdb_txn_begin(env->env, NULL, flag, txn);
+ }
+ return ret;
+}
+
+/** Obtain a transaction. (they're cached in env->txn) */
+static int txn_get(struct lmdb_env *env, MDB_txn **txn, bool rdonly)
+{
+ assert(env && txn);
+ if (env->txn.rw) {
+ /* Reuse the *open* RW txn even if only reading is requested.
+ * We leave the management of this to the cdb_sync command.
+ * The user may e.g. want to do some reads between the writes. */
+ *txn = env->txn.rw;
+ return kr_ok();
+ }
+
+ if (!rdonly) {
+ /* avoid two active transactions */
+ if (env->txn.ro && env->txn.ro_active) {
+ mdb_txn_reset(env->txn.ro);
+ env->txn.ro_active = false;
+ env->txn.ro_curs_active = false;
+ }
+ int ret = txn_get_noresize(env, 0/*RW*/, &env->txn.rw);
+ if (ret == MDB_SUCCESS) {
+ *txn = env->txn.rw;
+ assert(*txn);
+ }
+ return lmdb_error(ret);
+ }
+
+ /* Get an active RO txn and return it. */
+ int ret = MDB_SUCCESS;
+ if (!env->txn.ro) { //:unlikely
+ ret = txn_get_noresize(env, MDB_RDONLY, &env->txn.ro);
+ } else if (!env->txn.ro_active) {
+ ret = txn_get_noresize(env, FLAG_RENEW, &env->txn.ro);
+ }
+ if (ret != MDB_SUCCESS) {
+ return lmdb_error(ret);
+ }
+ env->txn.ro_active = true;
+ *txn = env->txn.ro;
+ assert(*txn);
+ return kr_ok();
+}
+
+static int cdb_sync(knot_db_t *db)
+{
+ struct lmdb_env *env = db;
+ int ret = kr_ok();
+ if (env->txn.rw) {
+ ret = lmdb_error(mdb_txn_commit(env->txn.rw));
+ env->txn.rw = NULL; /* the transaction got freed even in case of errors */
+ } else if (env->txn.ro && env->txn.ro_active) {
+ mdb_txn_reset(env->txn.ro);
+ env->txn.ro_active = false;
+ env->txn.ro_curs_active = false;
+ }
+ return ret;
+}
+
+/** Obtain a read-only cursor (and a read-only transaction). */
+static int txn_curs_get(struct lmdb_env *env, MDB_cursor **curs)
+{
+ assert(env && curs);
+ if (env->txn.ro_curs_active) {
+ goto success;
+ }
+ /* Only in a read-only txn; TODO: it's a bit messy/coupled */
+ if (env->txn.rw) {
+ int ret = cdb_sync(env);
+ if (ret) return ret;
+ }
+ MDB_txn *txn = NULL;
+ int ret = txn_get(env, &txn, true);
+ if (ret) return ret;
+
+ if (env->txn.ro_curs) {
+ ret = mdb_cursor_renew(txn, env->txn.ro_curs);
+ } else {
+ ret = mdb_cursor_open(txn, env->dbi, &env->txn.ro_curs);
+ }
+ if (ret) return ret;
+ env->txn.ro_curs_active = true;
+success:
+ assert(env->txn.ro_curs_active && env->txn.ro && env->txn.ro_active
+ && !env->txn.rw);
+ *curs = env->txn.ro_curs;
+ assert(*curs);
+ return kr_ok();
+}
+
+static void free_txn_ro(struct lmdb_env *env)
+{
+ if (env->txn.ro) {
+ mdb_txn_abort(env->txn.ro);
+ env->txn.ro = NULL;
+ }
+ if (env->txn.ro_curs) {
+ mdb_cursor_close(env->txn.ro_curs);
+ env->txn.ro_curs = NULL;
+ }
+}
+
+/*! \brief Close the database. */
+static void cdb_close_env(struct lmdb_env *env)
+{
+ assert(env && env->env);
+
+ /* Get rid of any transactions. */
+ cdb_sync(env);
+ free_txn_ro(env);
+
+ mdb_env_sync(env->env, 1);
+ mdb_dbi_close(env->env, env->dbi);
+ mdb_env_close(env->env);
+ memset(env, 0, sizeof(*env));
+}
+
+/*! \brief Open database environment. */
+static int cdb_open_env(struct lmdb_env *env, unsigned flags, const char *path, size_t mapsize)
+{
+ int ret = mkdir(path, LMDB_DIR_MODE);
+ if (ret == -1 && errno != EEXIST) {
+ return kr_error(errno);
+ }
+
+ MDB_env *mdb_env = NULL;
+ ret = mdb_env_create(&mdb_env);
+ if (ret != MDB_SUCCESS) {
+ return lmdb_error(ret);
+ }
+
+ ret = set_mapsize(mdb_env, mapsize);
+ if (ret != 0) {
+ mdb_env_close(mdb_env);
+ return ret;
+ }
+
+ ret = mdb_env_open(mdb_env, path, flags, LMDB_FILE_MODE);
+ if (ret != MDB_SUCCESS) {
+ mdb_env_close(mdb_env);
+ return lmdb_error(ret);
+ }
+
+ /* Keep the environment pointer. */
+ env->env = mdb_env;
+ env->mapsize = mapsize;
+ return 0;
+}
+
+static int cdb_open(struct lmdb_env *env, const char *path, size_t mapsize)
+{
+ /* Cache doesn't require durability, we can be
+ * loose with the requirements as a tradeoff for speed. */
+ const unsigned flags = MDB_WRITEMAP | MDB_MAPASYNC | MDB_NOTLS;
+ int ret = cdb_open_env(env, flags, path, mapsize);
+ if (ret != 0) {
+ return ret;
+ }
+
+ /* Open the database. */
+ MDB_txn *txn = NULL;
+ ret = mdb_txn_begin(env->env, NULL, 0, &txn);
+ if (ret != MDB_SUCCESS) {
+ mdb_env_close(env->env);
+ return lmdb_error(ret);
+ }
+
+ ret = mdb_dbi_open(txn, NULL, 0, &env->dbi);
+ if (ret != MDB_SUCCESS) {
+ mdb_txn_abort(txn);
+ mdb_env_close(env->env);
+ return lmdb_error(ret);
+ }
+
+ ret = mdb_txn_commit(txn);
+ if (ret != MDB_SUCCESS) {
+ mdb_env_close(env->env);
+ return lmdb_error(ret);
+ }
+
+ return 0;
+}
+
+static int cdb_init(knot_db_t **db, struct kr_cdb_opts *opts, knot_mm_t *pool)
+{
+ if (!db || !opts) {
+ return kr_error(EINVAL);
+ }
+
+ struct lmdb_env *env = malloc(sizeof(*env));
+ if (!env) {
+ return kr_error(ENOMEM);
+ }
+ memset(env, 0, sizeof(struct lmdb_env));
+
+ /* Clear stale lockfiles. */
+ auto_free char *lockfile = kr_strcatdup(2, opts->path, "/.cachelock");
+ if (lockfile) {
+ if (unlink(lockfile) == 0) {
+ kr_log_info("[cache] cleared stale lockfile '%s'\n", lockfile);
+ } else if (errno != ENOENT) {
+ kr_log_info("[cache] failed to clear stale lockfile '%s': %s\n", lockfile,
+ strerror(errno));
+ }
+ }
+
+ /* Open the database. */
+ int ret = cdb_open(env, opts->path, opts->maxsize);
+ if (ret != 0) {
+ free(env);
+ return ret;
+ }
+
+ *db = env;
+ return 0;
+}
+
+static void cdb_deinit(knot_db_t *db)
+{
+ struct lmdb_env *env = db;
+ cdb_close_env(env);
+ free(env);
+}
+
+static int cdb_count(knot_db_t *db)
+{
+ struct lmdb_env *env = db;
+ MDB_txn *txn = NULL;
+ int ret = txn_get(env, &txn, true);
+ if (ret != 0) {
+ return ret;
+ }
+
+ MDB_stat stat;
+ ret = mdb_stat(txn, env->dbi, &stat);
+
+ return (ret == MDB_SUCCESS) ? stat.ms_entries : lmdb_error(ret);
+}
+
+static int cdb_clear(knot_db_t *db)
+{
+ struct lmdb_env *env = db;
+ /* First try mdb_drop() to clear the DB; this may fail with ENOSPC. */
+ /* If we didn't do this, explicit cache.clear() ran on an instance
+ * would lead to the instance detaching from the cache of others,
+ * until they reopened cache explicitly or cleared it for some reason.
+ */
+ {
+ MDB_txn *txn = NULL;
+ int ret = txn_get(env, &txn, false);
+ if (ret == kr_ok()) {
+ ret = lmdb_error(mdb_drop(txn, env->dbi, 0));
+ if (ret == kr_ok()) {
+ ret = cdb_sync(db);
+ }
+ if (ret == kr_ok()) {
+ return ret;
+ }
+ }
+ kr_log_info("[cache] clearing error, falling back\n");
+ }
+
+ /* We are about to switch to a different file, so end all txns, to be sure. */
+ (void) cdb_sync(db);
+ free_txn_ro(db);
+
+ /* Since there is no guarantee that there will be free
+ * pages to hold whole dirtied db for transaction-safe clear,
+ * we simply remove the database files and reopen.
+ * We can afford this since other readers will continue to read
+ * from removed file, but will reopen when encountering next
+ * error. */
+ mdb_filehandle_t fd = -1;
+ int ret = mdb_env_get_fd(env->env, &fd);
+ if (ret != MDB_SUCCESS) {
+ return lmdb_error(ret);
+ }
+ const char *path = NULL;
+ ret = mdb_env_get_path(env->env, &path);
+ if (ret != MDB_SUCCESS) {
+ return lmdb_error(ret);
+ }
+
+ auto_free char *mdb_datafile = kr_strcatdup(2, path, "/data.mdb");
+ auto_free char *mdb_lockfile = kr_strcatdup(2, path, "/lock.mdb");
+ auto_free char *lockfile = kr_strcatdup(2, path, "/.cachelock");
+ if (!mdb_datafile || !mdb_lockfile || !lockfile) {
+ return kr_error(ENOMEM);
+ }
+ /* Find if we get a lock on lockfile. */
+ ret = open(lockfile, O_CREAT|O_EXCL|O_RDONLY, S_IRUSR);
+ if (ret == -1) {
+ kr_log_error("[cache] clearing failed to get ./.cachelock; retry later\n");
+ /* As we're out of space (almost certainly - mdb_drop didn't work),
+ * we will retry on the next failing write operation. */
+ return kr_error(errno);
+ }
+ close(ret);
+ /* We acquired lockfile. Now find whether *.mdb are what we have open now. */
+ struct stat old_stat, new_stat;
+ if (fstat(fd, &new_stat) || stat(mdb_datafile, &old_stat)) {
+ ret = errno;
+ unlink(lockfile);
+ return kr_error(ret);
+ }
+ /* Remove underlying files only if current open environment
+ * points to file on the disk. Otherwise just reopen as someone
+ * else has already removed the files.
+ */
+ if (old_stat.st_dev == new_stat.st_dev && old_stat.st_ino == new_stat.st_ino) {
+ kr_log_verbose("[cache] clear: identical files, unlinking\n");
+ // coverity[toctou]
+ unlink(mdb_datafile);
+ unlink(mdb_lockfile);
+ } else
+ kr_log_verbose("[cache] clear: not identical files, reopening\n");
+ /* Keep copy as it points to current handle internals. */
+ auto_free char *path_copy = strdup(path);
+ size_t mapsize = env->mapsize;
+ cdb_close_env(env);
+ ret = cdb_open(env, path_copy, mapsize);
+ /* Environment updated, release lockfile. */
+ unlink(lockfile);
+ return ret;
+}
+
+static int cdb_readv(knot_db_t *db, const knot_db_val_t *key, knot_db_val_t *val,
+ int maxcount)
+{
+ struct lmdb_env *env = db;
+ MDB_txn *txn = NULL;
+ int ret = txn_get(env, &txn, true);
+ if (ret) {
+ return ret;
+ }
+
+ for (int i = 0; i < maxcount; ++i) {
+ /* Convert key structs */
+ MDB_val _key = val_knot2mdb(key[i]);
+ MDB_val _val = val_knot2mdb(val[i]);
+ ret = mdb_get(txn, env->dbi, &_key, &_val);
+ if (ret != MDB_SUCCESS) {
+ ret = lmdb_error(ret);
+ if (ret == kr_error(ENOSPC)) {
+ /* we're likely to be forced to cache clear anyway */
+ ret = kr_error(ENOENT);
+ }
+ return ret;
+ }
+ /* Update the result. */
+ val[i] = val_mdb2knot(_val);
+ }
+ return kr_ok();
+}
+
+static int cdb_write(struct lmdb_env *env, MDB_txn **txn, const knot_db_val_t *key,
+ knot_db_val_t *val, unsigned flags)
+{
+ /* Convert key structs and write */
+ MDB_val _key = val_knot2mdb(*key);
+ MDB_val _val = val_knot2mdb(*val);
+ int ret = mdb_put(*txn, env->dbi, &_key, &_val, flags);
+
+ /* Try to recover from doing too much writing in a single transaction. */
+ if (ret == MDB_TXN_FULL) {
+ ret = cdb_sync(env);
+ if (ret) {
+ ret = txn_get(env, txn, false);
+ }
+ if (ret) {
+ ret = mdb_put(*txn, env->dbi, &_key, &_val, flags);
+ }
+ }
+ if (ret != MDB_SUCCESS) {
+ return lmdb_error(ret);
+ }
+
+ /* Update the result. */
+ val->data = _val.mv_data;
+ val->len = _val.mv_size;
+ return kr_ok();
+}
+
+static int cdb_writev(knot_db_t *db, const knot_db_val_t *key, knot_db_val_t *val,
+ int maxcount)
+{
+ struct lmdb_env *env = db;
+ MDB_txn *txn = NULL;
+ int ret = txn_get(env, &txn, false);
+
+ for (int i = 0; ret == kr_ok() && i < maxcount; ++i) {
+ /* This is LMDB specific optimisation,
+ * if caller specifies value with NULL data and non-zero length,
+ * LMDB will preallocate the entry for caller and leave write
+ * transaction open, caller is responsible for syncing thus committing transaction.
+ */
+ unsigned mdb_flags = 0;
+ if (val[i].len > 0 && val[i].data == NULL) {
+ mdb_flags |= MDB_RESERVE;
+ }
+ ret = cdb_write(env, &txn, &key[i], &val[i], mdb_flags);
+ }
+
+ return ret;
+}
+
+static int cdb_remove(knot_db_t *db, knot_db_val_t keys[], int maxcount)
+{
+ struct lmdb_env *env = db;
+ MDB_txn *txn = NULL;
+ int ret = txn_get(env, &txn, false);
+ int deleted = 0;
+
+ for (int i = 0; ret == kr_ok() && i < maxcount; ++i) {
+ MDB_val _key = val_knot2mdb(keys[i]);
+ MDB_val val = { 0, NULL };
+ ret = lmdb_error(mdb_del(txn, env->dbi, &_key, &val));
+ if (ret == kr_ok())
+ deleted++;
+ else if (ret == KNOT_ENOENT)
+ ret = kr_ok(); /* skip over non-existing entries */
+ }
+
+ return ret < 0 ? ret : deleted;
+}
+
+static int cdb_match(knot_db_t *db, knot_db_val_t *key, knot_db_val_t keyval[][2], int maxcount)
+{
+ struct lmdb_env *env = db;
+ MDB_txn *txn = NULL;
+ int ret = txn_get(env, &txn, true);
+ if (ret != 0) {
+ return ret;
+ }
+
+ /* LATER(optim.): use txn_curs_get() instead, to save resources. */
+ MDB_cursor *cur = NULL;
+ ret = mdb_cursor_open(txn, env->dbi, &cur);
+ if (ret != 0) {
+ return lmdb_error(ret);
+ }
+
+ MDB_val cur_key = val_knot2mdb(*key);
+ MDB_val cur_val = { 0, NULL };
+ ret = mdb_cursor_get(cur, &cur_key, &cur_val, MDB_SET_RANGE);
+ if (ret != MDB_SUCCESS) {
+ mdb_cursor_close(cur);
+ return lmdb_error(ret);
+ }
+
+ int results = 0;
+ while (ret == MDB_SUCCESS) {
+ /* Retrieve current key and compare with prefix */
+ if (cur_key.mv_size < key->len || memcmp(cur_key.mv_data, key->data, key->len) != 0) {
+ break;
+ }
+ /* Add to result set */
+ if (results < maxcount) {
+ keyval[results][0] = val_mdb2knot(cur_key);
+ keyval[results][1] = val_mdb2knot(cur_val);
+ ++results;
+ } else {
+ break;
+ }
+ ret = mdb_cursor_get(cur, &cur_key, &cur_val, MDB_NEXT);
+ }
+
+ mdb_cursor_close(cur);
+ return results;
+}
+
+
+static int cdb_prune(knot_db_t *db, int limit)
+{
+ return -1;
+#if 0
+ /* Sync in-flight transactions */
+ cdb_sync(db);
+
+ /* Prune old records */
+ struct lmdb_env *env = db;
+ MDB_txn *txn = NULL;
+ int ret = txn_get(env, &txn, false);
+ if (ret != 0) {
+ return ret;
+ }
+
+ MDB_cursor *cur = NULL;
+ ret = mdb_cursor_open(txn, env->dbi, &cur);
+ if (ret != 0) {
+ return lmdb_error(ret);
+ }
+
+ MDB_val cur_key, cur_val;
+ ret = mdb_cursor_get(cur, &cur_key, &cur_val, MDB_FIRST);
+ if (ret != 0) {
+ mdb_cursor_close(cur);
+ return lmdb_error(ret);
+ }
+
+ int results = 0;
+ struct timeval now;
+ gettimeofday(&now, NULL);
+ while (ret == 0 && results < limit) {
+ /* Ignore special namespaces. */
+ if (cur_key.mv_size < 2 || ((const char *)cur_key.mv_data)[0] == 'V') {
+ ret = mdb_cursor_get(cur, &cur_key, &cur_val, MDB_NEXT);
+ continue;
+ }
+ /* Check entry age. */
+ struct kr_cache_entry *entry = cur_val.mv_data;
+ if (entry->timestamp > now.tv_sec ||
+ (now.tv_sec - entry->timestamp) < entry->ttl) {
+ ret = mdb_cursor_get(cur, &cur_key, &cur_val, MDB_NEXT);
+ continue;
+ }
+ /* Remove entry */
+ mdb_cursor_del(cur, 0);
+ ++results;
+ ret = mdb_cursor_get(cur, &cur_key, &cur_val, MDB_NEXT);
+ }
+ mdb_cursor_close(cur);
+ return ret < 0 ? ret : results;
+#endif
+}
+
+static int cdb_read_leq(knot_db_t *env, knot_db_val_t *key, knot_db_val_t *val)
+{
+ assert(env && key && key->data && val);
+ MDB_cursor *curs = NULL;
+ int ret = txn_curs_get(env, &curs);
+ if (ret) return ret;
+
+ MDB_val key2_m = val_knot2mdb(*key);
+ MDB_val val2_m = { 0, NULL };
+ ret = mdb_cursor_get(curs, &key2_m, &val2_m, MDB_SET_RANGE);
+ if (ret) return lmdb_error(ret);
+ /* test for equality //:unlikely */
+ if (key2_m.mv_size == key->len
+ && memcmp(key2_m.mv_data, key->data, key->len) == 0) {
+ ret = 0; /* equality */
+ goto success;
+ }
+ /* we must be greater than key; do one step to smaller */
+ ret = mdb_cursor_get(curs, &key2_m, &val2_m, MDB_PREV);
+ if (ret) return lmdb_error(ret);
+ ret = 1;
+success:
+ /* finalize the output */
+ *key = val_mdb2knot(key2_m);
+ *val = val_mdb2knot(val2_m);
+ return ret;
+}
+
+
+const struct kr_cdb_api *kr_cdb_lmdb(void)
+{
+ static const struct kr_cdb_api api = {
+ "lmdb",
+ cdb_init, cdb_deinit, cdb_count, cdb_clear, cdb_sync,
+ cdb_readv, cdb_writev, cdb_remove,
+ cdb_match, cdb_prune,
+ cdb_read_leq
+ };
+
+ return &api;
+}
diff --git a/lib/cache/cdb_lmdb.h b/lib/cache/cdb_lmdb.h
new file mode 100644
index 0000000..bc2c7d6
--- /dev/null
+++ b/lib/cache/cdb_lmdb.h
@@ -0,0 +1,23 @@
+/* Copyright (C) 2016-2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+*/
+
+#pragma once
+
+#include "lib/cache/cdb_api.h"
+#include "lib/defines.h"
+
+KR_EXPORT KR_CONST
+const struct kr_cdb_api *kr_cdb_lmdb(void);
diff --git a/lib/cache/entry_list.c b/lib/cache/entry_list.c
new file mode 100644
index 0000000..6a5001c
--- /dev/null
+++ b/lib/cache/entry_list.c
@@ -0,0 +1,293 @@
+/* Copyright (C) 2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+/** @file
+ * Implementation of chaining in struct entry_h. Prototypes in ./impl.h
+ */
+
+#include "lib/cache/impl.h"
+#include "lib/utils.h"
+
+
+static int entry_h_len(knot_db_val_t val);
+
+
+void entry_list_memcpy(struct entry_apex *ea, entry_list_t list)
+{
+ assert(ea);
+ memset(ea, 0, offsetof(struct entry_apex, data));
+ ea->has_ns = list[EL_NS ].len;
+ ea->has_cname = list[EL_CNAME ].len;
+ ea->has_dname = list[EL_DNAME ].len;
+ for (int i = 0; i < ENTRY_APEX_NSECS_CNT; ++i) {
+ ea->nsecs[i] = list[i].len == 0 ? 0 :
+ (list[i].len == 4 ? 1 : 3);
+ }
+ uint8_t *it = ea->data;
+ for (int i = 0; i < EL_LENGTH; ++i) {
+ if (list[i].data) {
+ memcpy(it, list[i].data, list[i].len);
+ /* LATER(optim.): coalesce consecutive writes? */
+ } else {
+ list[i].data = it;
+ }
+ it += to_even(list[i].len);
+ }
+}
+
+int entry_list_parse(const knot_db_val_t val, entry_list_t list)
+{
+ const bool ok = val.data && val.len && list;
+ if (!ok) {
+ assert(!EINVAL);
+ return kr_error(EINVAL);
+ }
+ /* Parse the apex itself (nsec parameters). */
+ const struct entry_apex *ea = entry_apex_consistent(val);
+ if (!ea) {
+ return kr_error(EILSEQ);
+ }
+ const uint8_t *it = ea->data,
+ *it_bound = knot_db_val_bound(val);
+ for (int i = 0; i < ENTRY_APEX_NSECS_CNT; ++i) {
+ if (it > it_bound) {
+ return kr_error(EILSEQ);
+ }
+ list[i].data = (void *)it;
+ switch (ea->nsecs[i]) {
+ case 0:
+ list[i].len = 0;
+ break;
+ case 1:
+ list[i].len = sizeof(uint32_t); /* just timestamp */
+ break;
+ case 3: { /* timestamp + NSEC3PARAM wire */
+ if (it + sizeof(uint32_t) + 4 > it_bound) {
+ return kr_error(EILSEQ);
+ }
+ list[i].len = sizeof(uint32_t)
+ + nsec_p_rdlen(it + sizeof(uint32_t));
+ break;
+ }
+ default:
+ return kr_error(EILSEQ);
+ };
+ it += to_even(list[i].len);
+ }
+ /* Parse every entry_h. */
+ for (int i = ENTRY_APEX_NSECS_CNT; i < EL_LENGTH; ++i) {
+ list[i].data = (void *)it;
+ bool has_type;
+ switch (i) {
+ case EL_NS: has_type = ea->has_ns; break;
+ case EL_CNAME: has_type = ea->has_cname; break;
+ case EL_DNAME: has_type = ea->has_dname; break;
+ default: assert(false); return kr_error(EINVAL); /* something very bad */
+ }
+ if (!has_type) {
+ list[i].len = 0;
+ continue;
+ }
+ if (it >= it_bound) {
+ assert(!EILSEQ);
+ return kr_error(EILSEQ);
+ }
+ const int len = entry_h_len(
+ (knot_db_val_t){ .data = (void *)it, .len = it_bound - it });
+ if (len < 0) {
+ assert(false);
+ return kr_error(len);
+ }
+ list[i].len = len;
+ it += to_even(len);
+ }
+ assert(it == it_bound);
+ return kr_ok();
+}
+
+/** Given a valid entry header, find its length (i.e. offset of the next entry).
+ * \param val The beginning of the data and the bound (read only).
+ */
+static int entry_h_len(const knot_db_val_t val)
+{
+ const bool ok = val.data && ((ssize_t)val.len) > 0;
+ if (!ok) return kr_error(EINVAL);
+ const struct entry_h *eh = val.data;
+ const uint8_t *d = eh->data; /* iterates over the data in entry */
+ const uint8_t *data_bound = knot_db_val_bound(val);
+ if (d >= data_bound) return kr_error(EILSEQ);
+ if (!eh->is_packet) { /* Positive RRset + its RRsig set (may be empty). */
+ int sets = 2;
+ while (sets-- > 0) {
+ d += rdataset_dematerialized_size(d);
+ if (d > data_bound) {
+ assert(!EILSEQ);
+ return kr_error(EILSEQ);
+ }
+ }
+ } else { /* A "packet" (opaque ATM). */
+ uint16_t len;
+ if (d + sizeof(len) > data_bound) return kr_error(EILSEQ);
+ memcpy(&len, d, sizeof(len));
+ d += 2 + to_even(len);
+ }
+ if (d > data_bound) {
+ assert(!EILSEQ);
+ return kr_error(EILSEQ);
+ }
+ return d - (uint8_t *)val.data;
+}
+
+struct entry_apex * entry_apex_consistent(knot_db_val_t val)
+{
+ //XXX: check lengths, etc.
+ return val.data;
+}
+
+/* See the header file. */
+int entry_h_seek(knot_db_val_t *val, uint16_t type)
+{
+ int i = -1;
+ switch (type) {
+ case KNOT_RRTYPE_NS: i = EL_NS; break;
+ case KNOT_RRTYPE_CNAME: i = EL_CNAME; break;
+ case KNOT_RRTYPE_DNAME: i = EL_DNAME; break;
+ default: return kr_ok();
+ }
+
+ entry_list_t el;
+ int ret = entry_list_parse(*val, el);
+ if (ret) return ret;
+ *val = el[i];
+ return val->len ? kr_ok() : kr_error(ENOENT);
+}
+
+static int cache_write_or_clear(struct kr_cache *cache, const knot_db_val_t *key,
+ knot_db_val_t *val, const struct kr_query *qry)
+{
+ int ret = cache_op(cache, write, key, val, 1);
+ if (!ret) return kr_ok();
+ /* Clear cache if overfull. It's nontrivial to do better with LMDB.
+ * LATER: some garbage-collection mechanism. */
+ if (ret == kr_error(ENOSPC)) {
+ ret = kr_cache_clear(cache);
+ const char *msg = "[cache] clearing because overfull, ret = %d\n";
+ if (ret) {
+ kr_log_error(msg, ret);
+ } else {
+ kr_log_info(msg, ret);
+ ret = kr_error(ENOSPC);
+ }
+ return ret;
+ }
+ VERBOSE_MSG(qry, "=> failed backend write, ret = %d\n", ret);
+ return kr_error(ret ? ret : ENOSPC);
+}
+
+
+/* See the header file. */
+int entry_h_splice(
+ knot_db_val_t *val_new_entry, uint8_t rank,
+ const knot_db_val_t key, const uint16_t ktype, const uint16_t type,
+ const knot_dname_t *owner/*log only*/,
+ const struct kr_query *qry, struct kr_cache *cache, uint32_t timestamp)
+{
+ //TODO: another review, perhaps incuding the API
+ const bool ok = val_new_entry && val_new_entry->len > 0;
+ if (!ok) {
+ assert(!EINVAL);
+ return kr_error(EINVAL);
+ }
+
+ int i_type;
+ switch (type) {
+ case KNOT_RRTYPE_NS: i_type = EL_NS; break;
+ case KNOT_RRTYPE_CNAME: i_type = EL_CNAME; break;
+ case KNOT_RRTYPE_DNAME: i_type = EL_DNAME; break;
+ default: i_type = 0;
+ }
+
+ /* Get eh_orig (original entry), and also el list if multi-entry case. */
+ const struct entry_h *eh_orig = NULL;
+ entry_list_t el;
+ int ret = -1;
+ if (!kr_rank_test(rank, KR_RANK_SECURE) || ktype == KNOT_RRTYPE_NS) {
+ knot_db_val_t val;
+ ret = cache_op(cache, read, &key, &val, 1);
+ if (i_type) {
+ if (!ret) ret = entry_list_parse(val, el);
+ if (ret) memset(el, 0, sizeof(el));
+ val = el[i_type];
+ }
+ /* val is on the entry, in either case (or error) */
+ if (!ret) {
+ eh_orig = entry_h_consistent(val, type);
+ }
+ } else {
+ /* We want to fully overwrite the entry, so don't even read it. */
+ memset(el, 0, sizeof(el));
+ }
+
+ if (!kr_rank_test(rank, KR_RANK_SECURE) && eh_orig) {
+ /* If equal rank was accepted, spoofing a *single* answer would be
+ * enough to e.g. override NS record in AUTHORITY section.
+ * This way they would have to hit the first answer
+ * (whenever TTL nears expiration).
+ * Stale-serving is NOT considered, but TTL 1 would be considered
+ * as expiring anyway, ... */
+ int32_t old_ttl = get_new_ttl(eh_orig, qry, NULL, 0, timestamp);
+ if (old_ttl > 0 && !is_expiring(eh_orig->ttl, old_ttl)
+ && rank <= eh_orig->rank) {
+ WITH_VERBOSE(qry) {
+ auto_free char *type_str = kr_rrtype_text(type),
+ *owner_str = kr_dname_text(owner);
+ VERBOSE_MSG(qry, "=> not overwriting %s %s\n",
+ type_str, owner_str);
+ }
+ return kr_error(EEXIST);
+ }
+ }
+
+ if (!i_type) {
+ /* The non-list types are trivial now. */
+ return cache_write_or_clear(cache, &key, val_new_entry, qry);
+ }
+ /* Now we're in trouble. In some cases, parts of data to be written
+ * is an lmdb entry that may be invalidated by our write request.
+ * (lmdb does even in-place updates!) Therefore we copy all into a buffer.
+ * (We don't bother deallocating from the mempool.)
+ * LATER(optim.): do this only when neccessary, or perhaps another approach.
+ * This is also complicated by the fact that the val_new_entry part
+ * is to be written *afterwards* by the caller.
+ */
+ el[i_type] = (knot_db_val_t){
+ .len = val_new_entry->len,
+ .data = NULL, /* perhaps unclear in the entry_h_splice() API */
+ };
+ knot_db_val_t val = {
+ .len = entry_list_serial_size(el),
+ .data = NULL,
+ };
+ void *buf = mm_alloc(&qry->request->pool, val.len);
+ entry_list_memcpy(buf, el);
+ ret = cache_write_or_clear(cache, &key, &val, qry);
+ if (ret) return kr_error(ret);
+ memcpy(val.data, buf, val.len); /* we also copy the "empty" space, but well... */
+ val_new_entry->data = (uint8_t *)val.data
+ + ((uint8_t *)el[i_type].data - (uint8_t *)buf);
+ return kr_ok();
+}
+
diff --git a/lib/cache/entry_pkt.c b/lib/cache/entry_pkt.c
new file mode 100644
index 0000000..00e73d4
--- /dev/null
+++ b/lib/cache/entry_pkt.c
@@ -0,0 +1,224 @@
+/* Copyright (C) 2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+/** @file
+ * Implementation of packet-caching. Prototypes in ./impl.h
+ *
+ * The packet is stashed in entry_h::data as uint16_t length + full packet wire format.
+ */
+
+#include "lib/utils.h"
+#include "lib/layer/iterate.h" /* kr_response_classify */
+#include "lib/cache/impl.h"
+
+
+/** Compute TTL for a packet. Generally it's minimum TTL, with extra conditions. */
+static uint32_t packet_ttl(const knot_pkt_t *pkt, bool is_negative)
+{
+ bool has_ttl = false;
+ uint32_t ttl = UINT32_MAX;
+ /* Find minimum entry TTL in the packet or SOA minimum TTL. */
+ for (knot_section_t i = KNOT_ANSWER; i <= KNOT_ADDITIONAL; ++i) {
+ const knot_pktsection_t *sec = knot_pkt_section(pkt, i);
+ for (unsigned k = 0; k < sec->count; ++k) {
+ const knot_rrset_t *rr = knot_pkt_rr(sec, k);
+ if (is_negative) {
+ /* Use SOA minimum TTL for negative answers. */
+ if (rr->type == KNOT_RRTYPE_SOA) {
+ return MIN(rr->ttl, knot_soa_minimum(rr->rrs.rdata));
+ } else {
+ continue; /* Use SOA only for negative answers. */
+ }
+ }
+ if (knot_rrtype_is_metatype(rr->type)) {
+ continue; /* Skip metatypes. */
+ }
+ ttl = MIN(ttl, rr->ttl);
+ }
+ }
+ /* If no valid TTL present, go with zero (will get clamped to minimum). */
+ return has_ttl ? ttl : 0;
+}
+
+
+void stash_pkt(const knot_pkt_t *pkt, const struct kr_query *qry,
+ const struct kr_request *req, const bool has_optout)
+{
+ /* In some cases, stash also the packet. */
+ const bool is_negative = kr_response_classify(pkt)
+ & (PKT_NODATA|PKT_NXDOMAIN);
+ const struct kr_qflags * const qf = &qry->flags;
+ const bool want_negative = qf->DNSSEC_INSECURE || !qf->DNSSEC_WANT || has_optout;
+ const bool want_pkt = qf->DNSSEC_BOGUS /*< useful for +cd answers */
+ || (is_negative && want_negative);
+
+ if (!want_pkt || !knot_wire_get_aa(pkt->wire)
+ || pkt->parsed != pkt->size /*< malformed packet; still can't detect KNOT_EFEWDATA */
+ ) {
+ return;
+ }
+
+ /* Compute rank. If cd bit is set or we got answer via non-validated
+ * forwarding, make the rank bad; otherwise it depends on flags.
+ * TODO: probably make validator attempt validation even with +cd. */
+ uint8_t rank = KR_RANK_AUTH;
+ const bool risky_vldr = is_negative && qf->FORWARD && qf->CNAME;
+ /* ^^ CNAME'ed NXDOMAIN answer in forwarding mode can contain
+ * unvalidated records; original commit: d6e22f476. */
+ if (knot_wire_get_cd(req->qsource.packet->wire) || qf->STUB || risky_vldr) {
+ kr_rank_set(&rank, KR_RANK_OMIT);
+ } else {
+ if (qf->DNSSEC_BOGUS) {
+ kr_rank_set(&rank, KR_RANK_BOGUS);
+ } else if (qf->DNSSEC_INSECURE) {
+ kr_rank_set(&rank, KR_RANK_INSECURE);
+ } else if (!qf->DNSSEC_WANT) {
+ /* no TAs at all, leave _RANK_AUTH */
+ } else if (has_optout) {
+ /* All bad cases should be filtered above,
+ * at least the same way as pktcache in kresd 1.5.x. */
+ kr_rank_set(&rank, KR_RANK_SECURE);
+ } else assert(false);
+ }
+
+ const uint16_t pkt_type = knot_pkt_qtype(pkt);
+ const knot_dname_t *owner = knot_pkt_qname(pkt); /* qname can't be compressed */
+
+ // LATER: nothing exists under NXDOMAIN. Implement that (optionally)?
+#if 0
+ if (knot_wire_get_rcode(pkt->wire) == KNOT_RCODE_NXDOMAIN
+ /* && !qf->DNSSEC_INSECURE */ ) {
+ pkt_type = KNOT_RRTYPE_NS;
+ }
+#endif
+
+ /* Construct the key under which the pkt will be stored. */
+ struct key k_storage, *k = &k_storage;
+ knot_db_val_t key;
+ int ret = kr_dname_lf(k->buf, owner, false);
+ if (ret) {
+ /* A server might (incorrectly) reply with QDCOUNT=0. */
+ assert(owner == NULL);
+ return;
+ }
+ key = key_exact_type_maypkt(k, pkt_type);
+
+ /* For now we stash the full packet byte-exactly as it came from upstream. */
+ const uint16_t pkt_size = pkt->size;
+ knot_db_val_t val_new_entry = {
+ .data = NULL,
+ .len = offsetof(struct entry_h, data) + sizeof(pkt_size) + pkt->size,
+ };
+ /* Prepare raw memory for the new entry and fill it. */
+ struct kr_cache *cache = &req->ctx->cache;
+ ret = entry_h_splice(&val_new_entry, rank, key, k->type, pkt_type,
+ owner, qry, cache, qry->timestamp.tv_sec);
+ if (ret) return; /* some aren't really errors */
+ assert(val_new_entry.data);
+ struct entry_h *eh = val_new_entry.data;
+ memset(eh, 0, offsetof(struct entry_h, data));
+ eh->time = qry->timestamp.tv_sec;
+ eh->ttl = MAX(MIN(packet_ttl(pkt, is_negative), cache->ttl_max), cache->ttl_min);
+ eh->rank = rank;
+ eh->is_packet = true;
+ eh->has_optout = qf->DNSSEC_OPTOUT;
+ memcpy(eh->data, &pkt_size, sizeof(pkt_size));
+ memcpy(eh->data + sizeof(pkt_size), pkt->wire, pkt_size);
+
+ WITH_VERBOSE(qry) {
+ auto_free char *type_str = kr_rrtype_text(pkt_type),
+ *owner_str = kr_dname_text(owner);
+ VERBOSE_MSG(qry, "=> stashed packet: rank 0%.2o, TTL %d, "
+ "%s %s (%d B)\n",
+ eh->rank, eh->ttl,
+ type_str, owner_str, (int)val_new_entry.len);
+ }
+}
+
+
+int answer_from_pkt(kr_layer_t *ctx, knot_pkt_t *pkt, uint16_t type,
+ const struct entry_h *eh, const void *eh_bound, uint32_t new_ttl)
+{
+ struct kr_request *req = ctx->req;
+ struct kr_query *qry = req->current_query;
+
+ uint16_t pkt_len;
+ memcpy(&pkt_len, eh->data, sizeof(pkt_len));
+ if (pkt_len > pkt->max_size) {
+ return kr_error(ENOENT);
+ }
+
+ /* Copy answer and reparse it, but keep the original message id. */
+ uint16_t msgid = knot_wire_get_id(pkt->wire);
+ knot_pkt_clear(pkt);
+ memcpy(pkt->wire, eh->data + 2, pkt_len);
+ pkt->size = pkt_len;
+ int ret = knot_pkt_parse(pkt, 0);
+ if (ret == KNOT_EFEWDATA || ret == KNOT_EMALF) {
+ return kr_error(ENOENT);
+ /* LATER(opt): try harder to avoid stashing such packets */
+ }
+ if (ret != KNOT_EOK) {
+ assert(!ret);
+ return kr_error(ret);
+ }
+ knot_wire_set_id(pkt->wire, msgid);
+
+ /* Add rank into the additional field. */
+ for (size_t i = 0; i < pkt->rrset_count; ++i) {
+ assert(!pkt->rr[i].additional);
+ uint8_t *rr_rank = mm_alloc(&pkt->mm, sizeof(*rr_rank));
+ if (!rr_rank) {
+ return kr_error(ENOMEM);
+ }
+ *rr_rank = eh->rank;
+ pkt->rr[i].additional = rr_rank;
+ }
+
+ /* Adjust TTL in each record. */
+ const uint32_t drift = eh->ttl - new_ttl;
+ for (knot_section_t i = KNOT_ANSWER; i <= KNOT_ADDITIONAL; ++i) {
+ const knot_pktsection_t *sec = knot_pkt_section(pkt, i);
+ for (unsigned k = 0; k < sec->count; ++k) {
+ knot_rrset_t *rrs = // vv FIXME??
+ /*const-cast*/(knot_rrset_t *)knot_pkt_rr(sec, k);
+ /* We need to be careful: due to enforcing minimum TTL
+ * on packet, some records may be below that value.
+ * We keep those records at TTL 0. */
+ if (rrs->ttl >= drift) {
+ rrs->ttl -= drift;
+ } else {
+ rrs->ttl = 0;
+ }
+ }
+ }
+
+ /* Finishing touches. TODO: perhaps factor out */
+ struct kr_qflags * const qf = &qry->flags;
+ qf->EXPIRING = is_expiring(eh->ttl, new_ttl);
+ qf->CACHED = true;
+ qf->NO_MINIMIZE = true;
+ qf->DNSSEC_INSECURE = kr_rank_test(eh->rank, KR_RANK_INSECURE);
+ qf->DNSSEC_BOGUS = kr_rank_test(eh->rank, KR_RANK_BOGUS);
+ if (qf->DNSSEC_INSECURE || qf->DNSSEC_BOGUS) {
+ qf->DNSSEC_WANT = false;
+ }
+ qf->DNSSEC_OPTOUT = eh->has_optout;
+ VERBOSE_MSG(qry, "=> satisfied by exact packet: rank 0%.2o, new TTL %d\n",
+ eh->rank, new_ttl);
+ return kr_ok();
+}
+
diff --git a/lib/cache/entry_rr.c b/lib/cache/entry_rr.c
new file mode 100644
index 0000000..94dd0e8
--- /dev/null
+++ b/lib/cache/entry_rr.c
@@ -0,0 +1,146 @@
+/* Copyright (C) 2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+/** @file
+ * Implementation of RRset (de)materialization, i.e. (de)serialization to storage
+ * format used in cache (some repeated fields are omitted). Prototypes in ./impl.h
+ */
+
+#include "lib/cache/impl.h"
+
+int rdataset_dematerialize(const knot_rdataset_t *rds, uint8_t * restrict data)
+{
+ /* FIXME: either give up on even alignment and thus direct usability
+ * of rdatasets as they are in lmdb, or align inside cdb_* functions
+ * (request sizes one byte longer and shift iff on an odd address). */
+ //if ((size_t)data & 1) VERBOSE_MSG(NULL, "dematerialize: odd address\n");
+ //const uint8_t *data0 = data;
+ if (!data) {
+ assert(data);
+ return kr_error(EINVAL);
+ }
+ const uint16_t rr_count = rds ? rds->count : 0;
+ memcpy(data, &rr_count, sizeof(rr_count));
+ data += sizeof(rr_count);
+ if (rr_count) {
+ size_t size = knot_rdataset_size(rds);
+ memcpy(data, rds->rdata, size);
+ data += size;
+ }
+ //VERBOSE_MSG(NULL, "dematerialized to %d B\n", (int)(data - data0));
+ (void)data;
+ return kr_ok();
+}
+
+/** Materialize a knot_rdataset_t from cache with given TTL.
+ * Return the number of bytes consumed or an error code.
+ */
+static int rdataset_materialize(knot_rdataset_t * restrict rds, const uint8_t * const data,
+ const uint8_t *data_bound, knot_mm_t *pool)
+{
+ assert(rds && data && data_bound && data_bound > data && !rds->rdata
+ /*&& !((size_t)data & 1)*/);
+ assert(pool); /* not required, but that's our current usage; guard leaks */
+ const uint8_t *d = data; /* iterates over the cache data */
+ {
+ uint16_t rr_count;
+ memcpy(&rr_count, d, sizeof(rr_count));
+ d += sizeof(rr_count);
+ rds->count = rr_count;
+ if (!rr_count) { /* avoid mm_alloc(pool, 0); etc. */
+ return d - data;
+ }
+ }
+ /* First sum up the sizes for wire format length. */
+ const knot_rdataset_t rds_tmp = {
+ .count = rds->count,
+ .rdata = (knot_rdata_t *)d,
+ };
+ size_t rds_size = knot_rdataset_size(&rds_tmp); /* TODO: we might overrun here already,
+ but we need to trust cache anyway...*/
+ if (d + rds_size > data_bound) {
+ VERBOSE_MSG(NULL, "materialize: EILSEQ!\n");
+ return kr_error(EILSEQ);
+ }
+ rds->rdata = mm_alloc(pool, rds_size);
+ if (!rds->rdata) {
+ return kr_error(ENOMEM);
+ }
+ memcpy(rds->rdata, d, rds_size);
+ d += rds_size;
+ //VERBOSE_MSG(NULL, "materialized from %d B\n", (int)(d - data));
+ return d - data;
+}
+
+int kr_cache_materialize(knot_rdataset_t *dst, const struct kr_cache_p *ref,
+ knot_mm_t *pool)
+{
+ struct entry_h *eh = ref->raw_data;
+ return rdataset_materialize(dst, eh->data, ref->raw_bound, pool);
+}
+
+
+int entry2answer(struct answer *ans, int id,
+ const struct entry_h *eh, const uint8_t *eh_bound,
+ const knot_dname_t *owner, uint16_t type, uint32_t new_ttl)
+{
+ /* We assume it's zeroed. Do basic sanity check. */
+ if (ans->rrsets[id].set.rr || ans->rrsets[id].sig_rds.rdata
+ || (type == KNOT_RRTYPE_NSEC && ans->nsec_p.raw)
+ || (type == KNOT_RRTYPE_NSEC3 && !ans->nsec_p.raw)
+ )
+ {
+ assert(false);
+ return kr_error(EINVAL);
+ }
+ /* Materialize the base RRset. */
+ knot_rrset_t *rr = ans->rrsets[id].set.rr
+ = knot_rrset_new(owner, type, KNOT_CLASS_IN, new_ttl, ans->mm);
+ if (!rr) {
+ assert(!ENOMEM);
+ return kr_error(ENOMEM);
+ }
+ int ret = rdataset_materialize(&rr->rrs, eh->data, eh_bound, ans->mm);
+ if (ret < 0) goto fail;
+ size_t data_off = ret;
+ ans->rrsets[id].set.rank = eh->rank;
+ ans->rrsets[id].set.expiring = is_expiring(eh->ttl, new_ttl);
+ /* Materialize the RRSIG RRset for the answer in (pseudo-)packet. */
+ bool want_rrsigs = true; /* LATER(optim.): might be omitted in some cases. */
+ if (want_rrsigs) {
+ ret = rdataset_materialize(&ans->rrsets[id].sig_rds, eh->data + data_off,
+ eh_bound, ans->mm);
+ if (ret < 0) goto fail;
+ /* Sanity check: we consumed exactly all data. */
+ int unused_bytes = eh_bound - (uint8_t *)eh->data - data_off - ret;
+ if (unused_bytes) {
+ kr_log_error("[cach] entry2answer ERROR: unused bytes: %d\n",
+ unused_bytes);
+ assert(!EILSEQ);
+ ret = kr_error(EILSEQ);
+ goto fail; /* to be on the safe side */
+ }
+ }
+ return kr_ok();
+fail:
+ assert(/*false*/!ret);
+ /* Cleanup the item that we might've (partially) written to. */
+ knot_rrset_free(ans->rrsets[id].set.rr, ans->mm);
+ knot_rdataset_clear(&ans->rrsets[id].sig_rds, ans->mm);
+ memset(&ans->rrsets[id], 0, sizeof(ans->rrsets[id]));
+ return kr_error(ret);
+}
+
diff --git a/lib/cache/impl.h b/lib/cache/impl.h
new file mode 100644
index 0000000..a08f355
--- /dev/null
+++ b/lib/cache/impl.h
@@ -0,0 +1,412 @@
+/* Copyright (C) 2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+/** @file
+ * Header internal for cache implementation(s).
+ * Only LMDB works for now.
+ */
+#pragma once
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include <libdnssec/error.h>
+#include <libdnssec/nsec.h>
+#include <libknot/consts.h>
+#include <libknot/db/db.h>
+#include <libknot/dname.h>
+
+#include "contrib/cleanup.h"
+#include "contrib/murmurhash3/murmurhash3.h" /* hash() for nsec_p_hash() */
+#include "lib/cache/cdb_api.h"
+#include "lib/resolve.h"
+
+/* Cache entry values - binary layout.
+ *
+ * It depends on type which is recognizable by the key.
+ * Code depending on the contents of the key is marked by CACHE_KEY_DEF.
+ *
+ * 'E' entry (exact hit):
+ * - ktype == NS: struct entry_apex - multiple types inside (NS and xNAME);
+ * - ktype != NS: struct entry_h
+ * * is_packet: uint16_t length, the rest is opaque and handled by ./entry_pkt.c
+ * * otherwise RRset + its RRSIG set (possibly empty).
+ * '1' or '3' entry (NSEC or NSEC3)
+ * - struct entry_h, contents is the same as for exact hit
+ * - flags don't make sense there
+ */
+
+struct entry_h {
+ uint32_t time; /**< The time of inception. */
+ uint32_t ttl; /**< TTL at inception moment. Assuming it fits into int32_t ATM. */
+ uint8_t rank : 6; /**< See enum kr_rank */
+ bool is_packet : 1; /**< Negative-answer packet for insecure/bogus name. */
+ bool has_optout : 1; /**< Only for packets; persisted DNSSEC_OPTOUT. */
+ uint8_t _pad; /**< We need even alignment for data now. */
+ uint8_t data[];
+};
+struct entry_apex;
+
+/** Check basic consistency of entry_h for 'E' entries, not looking into ->data.
+ * (for is_packet the length of data is checked)
+ */
+struct entry_h * entry_h_consistent(knot_db_val_t data, uint16_t type);
+
+struct entry_apex * entry_apex_consistent(knot_db_val_t val);
+
+/** Consistency check, ATM common for NSEC and NSEC3. */
+static inline struct entry_h * entry_h_consistent_NSEC(knot_db_val_t data)
+{
+ /* ATM it's enough to just extend the checks for exact entries. */
+ const struct entry_h *eh = entry_h_consistent(data, KNOT_RRTYPE_NSEC);
+ bool ok = eh != NULL;
+ ok = ok && !eh->is_packet && !eh->has_optout;
+ return ok ? /*const-cast*/(struct entry_h *)eh : NULL;
+}
+
+
+/* nsec_p* - NSEC* chain parameters */
+
+static inline int nsec_p_rdlen(const uint8_t *rdata)
+{
+ //TODO: do we really need the zero case?
+ return rdata ? 5 + rdata[4] : 0; /* rfc5155 4.2 and 3.2. */
+}
+static const int NSEC_P_MAXLEN = sizeof(uint32_t) + 5 + 255; // TODO: remove??
+
+/** Hash of NSEC3 parameters, used as a tag to separate different chains for same zone. */
+typedef uint32_t nsec_p_hash_t;
+static inline nsec_p_hash_t nsec_p_mkHash(const uint8_t *nsec_p)
+{
+ assert(nsec_p && !(KNOT_NSEC3_FLAG_OPT_OUT & nsec_p[1]));
+ return hash((const char *)nsec_p, nsec_p_rdlen(nsec_p));
+}
+
+/** NSEC* parameters for the chain. */
+struct nsec_p {
+ const uint8_t *raw; /**< Pointer to raw NSEC3 parameters; NULL for NSEC. */
+ nsec_p_hash_t hash; /**< Hash of `raw`, used for cache keys. */
+ dnssec_nsec3_params_t libknot; /**< Format for libknot; owns malloced memory! */
+};
+
+
+
+/** LATER(optim.): this is overshot, but struct key usage should be cheap ATM. */
+#define KR_CACHE_KEY_MAXLEN (KNOT_DNAME_MAXLEN + 100) /* CACHE_KEY_DEF */
+
+struct key {
+ const knot_dname_t *zname; /**< current zone name (points within qry->sname) */
+ uint8_t zlf_len; /**< length of current zone's lookup format */
+
+ /** Corresponding key type; e.g. NS for CNAME.
+ * Note: NSEC type is ambiguous (exact and range key). */
+ uint16_t type;
+ /** The key data start at buf+1, and buf[0] contains some length.
+ * For details see key_exact* and key_NSEC* functions. */
+ uint8_t buf[KR_CACHE_KEY_MAXLEN];
+ /* LATER(opt.): ^^ probably change the anchoring, so that kr_dname_lf()
+ * doesn't need to move data after knot_dname_lf(). */
+};
+
+static inline size_t key_nwz_off(const struct key *k)
+{
+ /* CACHE_KEY_DEF: zone name lf + 0 ('1' or '3').
+ * NSEC '1' case continues just with the name within zone. */
+ return k->zlf_len + 2;
+}
+static inline size_t key_nsec3_hash_off(const struct key *k)
+{
+ /* CACHE_KEY_DEF NSEC3: tag (nsec_p_hash_t) + 20 bytes NSEC3 name hash) */
+ return key_nwz_off(k) + sizeof(nsec_p_hash_t);
+}
+/** Hash is always SHA1; I see no plans to standardize anything else.
+ * https://www.iana.org/assignments/dnssec-nsec3-parameters/dnssec-nsec3-parameters.xhtml#dnssec-nsec3-parameters-3
+ */
+static const int NSEC3_HASH_LEN = 20,
+ NSEC3_HASH_TXT_LEN = 32;
+
+/** Finish constructing string key for for exact search.
+ * It's assumed that kr_dname_lf(k->buf, owner, *) had been ran.
+ */
+knot_db_val_t key_exact_type_maypkt(struct key *k, uint16_t type);
+
+/** Like key_exact_type_maypkt but with extra checks if used for RRs only. */
+static inline knot_db_val_t key_exact_type(struct key *k, uint16_t type)
+{
+ switch (type) {
+ /* Sanity check: forbidden types represented in other way(s). */
+ case KNOT_RRTYPE_NSEC:
+ case KNOT_RRTYPE_NSEC3:
+ assert(false);
+ return (knot_db_val_t){ NULL, 0 };
+ }
+ return key_exact_type_maypkt(k, type);
+}
+
+
+/* entry_h chaining; implementation in ./entry_list.c */
+
+enum { ENTRY_APEX_NSECS_CNT = 2 };
+
+/** Header of 'E' entry with ktype == NS. Inside is private to ./entry_list.c
+ *
+ * We store xNAME at NS type to lower the number of searches in closest_NS().
+ * CNAME is only considered for equal name, of course.
+ * We also store NSEC* parameters at NS type.
+ */
+struct entry_apex {
+ /* ENTRY_H_FLAGS */
+ bool has_ns : 1;
+ bool has_cname : 1;
+ bool has_dname : 1;
+
+ uint8_t pad_; /**< Weird: 1 byte + 2 bytes + x bytes; let's do 2+2+x. */
+ int8_t nsecs[ENTRY_APEX_NSECS_CNT]; /**< values: 0: none, 1: NSEC, 3: NSEC3 */
+ uint8_t data[];
+ /* XXX: if not first, stamp of last being the first?
+ * Purpose: save cache operations if rolled the algo/params long ago. */
+};
+
+/** Indices for decompressed entry_list_t. */
+enum EL {
+ EL_NS = ENTRY_APEX_NSECS_CNT,
+ EL_CNAME,
+ EL_DNAME,
+ EL_LENGTH
+};
+/** Decompressed entry_apex. It's an array of unparsed entry_h references.
+ * Note: arrays are passed "by reference" to functions (in C99). */
+typedef knot_db_val_t entry_list_t[EL_LENGTH];
+
+static inline uint16_t EL2RRTYPE(enum EL i)
+{
+ switch (i) {
+ case EL_NS: return KNOT_RRTYPE_NS;
+ case EL_CNAME: return KNOT_RRTYPE_CNAME;
+ case EL_DNAME: return KNOT_RRTYPE_DNAME;
+ default: assert(false); return 0;
+ }
+}
+
+/** There may be multiple entries within, so rewind `val` to the one we want.
+ *
+ * ATM there are multiple types only for the NS ktype - it also accommodates xNAMEs.
+ * \note `val->len` represents the bound of the whole list, not of a single entry.
+ * \note in case of ENOENT, `val` is still rewound to the beginning of the next entry.
+ * \return error code
+ * TODO: maybe get rid of this API?
+ */
+int entry_h_seek(knot_db_val_t *val, uint16_t type);
+
+/** Prepare space to insert an entry.
+ *
+ * Some checks are performed (rank, TTL), the current entry in cache is copied
+ * with a hole ready for the new entry (old one of the same type is cut out).
+ *
+ * \param val_new_entry The only changing parameter; ->len is read, ->data written.
+ * \return error code
+ */
+int entry_h_splice(
+ knot_db_val_t *val_new_entry, uint8_t rank,
+ const knot_db_val_t key, const uint16_t ktype, const uint16_t type,
+ const knot_dname_t *owner/*log only*/,
+ const struct kr_query *qry, struct kr_cache *cache, uint32_t timestamp);
+
+/** Parse an entry_apex into individual items. @return error code. */
+int entry_list_parse(const knot_db_val_t val, entry_list_t list);
+
+static inline size_t to_even(size_t n)
+{
+ return n + (n & 1);
+}
+
+static inline int entry_list_serial_size(const entry_list_t list)
+{
+ int size = offsetof(struct entry_apex, data);
+ for (int i = 0; i < EL_LENGTH; ++i) {
+ size += to_even(list[i].len);
+ }
+ return size;
+}
+
+/** Fill contents of an entry_apex.
+ *
+ * @note NULL pointers are overwritten - caller may like to fill the space later.
+ */
+void entry_list_memcpy(struct entry_apex *ea, entry_list_t list);
+
+
+
+/* Packet caching; implementation in ./entry_pkt.c */
+
+/** Stash the packet into cache (if suitable, etc.)
+ * \param has_optout whether the packet contains an opt-out NSEC3 */
+void stash_pkt(const knot_pkt_t *pkt, const struct kr_query *qry,
+ const struct kr_request *req, bool has_optout);
+
+/** Try answering from packet cache, given an entry_h.
+ *
+ * This assumes the TTL is OK and entry_h_consistent, but it may still return error.
+ * On success it handles all the rest, incl. qry->flags.
+ */
+int answer_from_pkt(kr_layer_t *ctx, knot_pkt_t *pkt, uint16_t type,
+ const struct entry_h *eh, const void *eh_bound, uint32_t new_ttl);
+
+
+/** Record is expiring if it has less than 1% TTL (or less than 5s) */
+static inline bool is_expiring(uint32_t orig_ttl, uint32_t new_ttl)
+{
+ int64_t nttl = new_ttl; /* avoid potential over/under-flow */
+ return 100 * (nttl - 5) < orig_ttl;
+}
+
+/** Returns signed result so you can inspect how much stale the RR is.
+ *
+ * @param owner name for stale-serving decisions. You may pass NULL to disable stale.
+ * @note: NSEC* uses zone name ATM; for NSEC3 the owner may not even be knowable.
+ * @param type for stale-serving.
+ */
+int32_t get_new_ttl(const struct entry_h *entry, const struct kr_query *qry,
+ const knot_dname_t *owner, uint16_t type, uint32_t now);
+
+
+/* RRset (de)materialization; implementation in ./entry_rr.c */
+
+/** Size of the RR count field */
+#define KR_CACHE_RR_COUNT_SIZE sizeof(uint16_t)
+
+/** Compute size of serialized rdataset. NULL is accepted as empty set. */
+static inline int rdataset_dematerialize_size(const knot_rdataset_t *rds)
+{
+ return KR_CACHE_RR_COUNT_SIZE + (rds == NULL ? 0 : knot_rdataset_size(rds));
+}
+
+static inline int rdataset_dematerialized_size(const uint8_t *data)
+{
+ knot_rdataset_t rds;
+ memcpy(&rds.count, data, sizeof(rds.count));
+ rds.rdata = (knot_rdata_t *)(data + sizeof(rds.count));
+ return sizeof(rds.count) + knot_rdataset_size(&rds);
+}
+
+/** Serialize an rdataset. */
+int rdataset_dematerialize(const knot_rdataset_t *rds, uint8_t * restrict data);
+
+
+/** Partially constructed answer when gathering RRsets from cache. */
+struct answer {
+ int rcode; /**< PKT_NODATA, etc. */
+ struct nsec_p nsec_p; /**< Don't mix different NSEC* parameters in one answer. */
+ knot_mm_t *mm; /**< Allocator for rrsets */
+ struct answer_rrset {
+ ranked_rr_array_entry_t set; /**< set+rank for the main data */
+ knot_rdataset_t sig_rds; /**< RRSIG data, if any */
+ } rrsets[1+1+3]; /**< see AR_ANSWER and friends; only required records are filled */
+};
+enum {
+ AR_ANSWER = 0, /**< Positive answer record. It might be wildcard-expanded. */
+ AR_SOA, /**< SOA record. */
+ AR_NSEC, /**< NSEC* covering or matching the SNAME (next closer name in NSEC3 case). */
+ AR_WILD, /**< NSEC* covering or matching the source of synthesis. */
+ AR_CPE, /**< NSEC3 matching the closest provable encloser. */
+};
+
+/** Materialize RRset + RRSIGs into ans->rrsets[id].
+ * LATER(optim.): it's slightly wasteful that we allocate knot_rrset_t for the packet
+ *
+ * \return error code. They are all bad conditions and "guarded" by assert.
+ */
+int entry2answer(struct answer *ans, int id,
+ const struct entry_h *eh, const uint8_t *eh_bound,
+ const knot_dname_t *owner, uint16_t type, uint32_t new_ttl);
+
+
+/* Preparing knot_pkt_t for cache answer from RRs; implementation in ./knot_pkt.c */
+
+/** Prepare answer packet to be filled by RRs (without RR data in wire). */
+int pkt_renew(knot_pkt_t *pkt, const knot_dname_t *name, uint16_t type);
+
+/** Append RRset + its RRSIGs into the current section (*shallow* copy), with given rank.
+ * \note it works with empty set as well (skipped)
+ * \note pkt->wire is not updated in any way
+ * \note KNOT_CLASS_IN is assumed
+ */
+int pkt_append(knot_pkt_t *pkt, const struct answer_rrset *rrset, uint8_t rank);
+
+
+
+/* NSEC (1) stuff. Implementation in ./nsec1.c */
+
+/** Construct a string key for for NSEC (1) predecessor-search.
+ * \param add_wildcard Act as if the name was extended by "*."
+ * \note k->zlf_len is assumed to have been correctly set */
+knot_db_val_t key_NSEC1(struct key *k, const knot_dname_t *name, bool add_wildcard);
+
+/** Closest encloser check for NSEC (1).
+ * To understand the interface, see the call point.
+ * \param k space to store key + input: zname and zlf_len
+ * \return 0: success; >0: try other (NSEC3); <0: exit cache immediately. */
+int nsec1_encloser(struct key *k, struct answer *ans,
+ const int sname_labels, int *clencl_labels,
+ knot_db_val_t *cover_low_kwz, knot_db_val_t *cover_hi_kwz,
+ const struct kr_query *qry, struct kr_cache *cache);
+
+/** Source of synthesis (SS) check for NSEC (1).
+ * To understand the interface, see the call point.
+ * \return 0: continue; <0: exit cache immediately;
+ * AR_SOA: skip to adding SOA (SS was covered or matched for NODATA). */
+int nsec1_src_synth(struct key *k, struct answer *ans, const knot_dname_t *clencl_name,
+ knot_db_val_t cover_low_kwz, knot_db_val_t cover_hi_kwz,
+ const struct kr_query *qry, struct kr_cache *cache);
+
+
+/* NSEC3 stuff. Implementation in ./nsec3.c */
+
+/** Construct a string key for for NSEC3 predecessor-search, from an NSEC3 name.
+ * \note k->zlf_len is assumed to have been correctly set */
+knot_db_val_t key_NSEC3(struct key *k, const knot_dname_t *nsec3_name,
+ const nsec_p_hash_t nsec_p_hash);
+
+/** TODO. See nsec1_encloser(...) */
+int nsec3_encloser(struct key *k, struct answer *ans,
+ const int sname_labels, int *clencl_labels,
+ const struct kr_query *qry, struct kr_cache *cache);
+
+/** TODO. See nsec1_src_synth(...) */
+int nsec3_src_synth(struct key *k, struct answer *ans, const knot_dname_t *clencl_name,
+ const struct kr_query *qry, struct kr_cache *cache);
+
+
+
+#define VERBOSE_MSG(qry, ...) QRVERBOSE((qry), "cach", ## __VA_ARGS__)
+
+/** Shorthand for operations on cache backend */
+#define cache_op(cache, op, ...) (cache)->api->op((cache)->db, ## __VA_ARGS__)
+
+
+static inline uint16_t get_uint16(const void *address)
+{
+ uint16_t tmp;
+ memcpy(&tmp, address, sizeof(tmp));
+ return tmp;
+}
+
+/** Useful pattern, especially as void-pointer arithmetic isn't standard-compliant. */
+static inline uint8_t * knot_db_val_bound(knot_db_val_t val)
+{
+ return (uint8_t *)val.data + val.len;
+}
+
diff --git a/lib/cache/knot_pkt.c b/lib/cache/knot_pkt.c
new file mode 100644
index 0000000..56836a6
--- /dev/null
+++ b/lib/cache/knot_pkt.c
@@ -0,0 +1,106 @@
+/* Copyright (C) 2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+/** @file
+ * Implementation of preparing knot_pkt_t for filling with RRs.
+ * Prototypes in ./impl.h
+ */
+
+#include "lib/cache/impl.h"
+
+int pkt_renew(knot_pkt_t *pkt, const knot_dname_t *name, uint16_t type)
+{
+ /* Update packet question if needed. */
+ if (!knot_dname_is_equal(knot_pkt_qname(pkt), name)
+ || knot_pkt_qtype(pkt) != type || knot_pkt_qclass(pkt) != KNOT_CLASS_IN) {
+ int ret = kr_pkt_recycle(pkt);
+ if (ret) return kr_error(ret);
+ ret = knot_pkt_put_question(pkt, name, KNOT_CLASS_IN, type);
+ if (ret) return kr_error(ret);
+ }
+
+ pkt->parsed = pkt->size = PKT_SIZE_NOWIRE;
+ knot_wire_set_qr(pkt->wire);
+ knot_wire_set_aa(pkt->wire);
+ return kr_ok();
+}
+
+/** Reserve space for additional `count` RRsets.
+ * \note pkt->rr_info gets correct length but is always zeroed
+ */
+static int pkt_alloc_space(knot_pkt_t *pkt, int count)
+{
+ size_t allocd_orig = pkt->rrset_allocd;
+ if (pkt->rrset_count + count <= allocd_orig) {
+ return kr_ok();
+ }
+ /* A simple growth strategy, amortized O(count). */
+ pkt->rrset_allocd = MAX(
+ pkt->rrset_count + count,
+ pkt->rrset_count + allocd_orig);
+
+ pkt->rr = mm_realloc(&pkt->mm, pkt->rr,
+ sizeof(pkt->rr[0]) * pkt->rrset_allocd,
+ sizeof(pkt->rr[0]) * allocd_orig);
+ if (!pkt->rr) {
+ return kr_error(ENOMEM);
+ }
+ /* Allocate pkt->rr_info to be certain, but just leave it zeroed. */
+ mm_free(&pkt->mm, pkt->rr_info);
+ pkt->rr_info = mm_alloc(&pkt->mm, sizeof(pkt->rr_info[0]) * pkt->rrset_allocd);
+ if (!pkt->rr_info) {
+ return kr_error(ENOMEM);
+ }
+ memset(pkt->rr_info, 0, sizeof(pkt->rr_info[0]) * pkt->rrset_allocd);
+ return kr_ok();
+}
+
+int pkt_append(knot_pkt_t *pkt, const struct answer_rrset *rrset, uint8_t rank)
+{
+ /* allocate space, to be sure */
+ int rrset_cnt = (rrset->set.rr->rrs.count > 0) + (rrset->sig_rds.count > 0);
+ int ret = pkt_alloc_space(pkt, rrset_cnt);
+ if (ret) return kr_error(ret);
+ /* write both sets */
+ const knot_rdataset_t *rdss[2] = { &rrset->set.rr->rrs, &rrset->sig_rds };
+ for (int i = 0; i < rrset_cnt; ++i) {
+ assert(rdss[i]->count);
+ /* allocate rank */
+ uint8_t *rr_rank = mm_alloc(&pkt->mm, sizeof(*rr_rank));
+ if (!rr_rank) return kr_error(ENOMEM);
+ *rr_rank = (i == 0) ? rank : (KR_RANK_OMIT | KR_RANK_AUTH);
+ /* rank for RRSIGs isn't really useful: ^^ */
+ if (i == 0) {
+ pkt->rr[pkt->rrset_count] = *rrset->set.rr;
+ pkt->rr[pkt->rrset_count].additional = rr_rank;
+ } else {
+ /* append the RR array */
+ pkt->rr[pkt->rrset_count] = (knot_rrset_t){
+ .owner = knot_dname_copy(rrset->set.rr->owner, &pkt->mm),
+ /* ^^ well, another copy isn't really needed */
+ .ttl = rrset->set.rr->ttl,
+ .type = KNOT_RRTYPE_RRSIG,
+ .rclass = KNOT_CLASS_IN,
+ .rrs = *rdss[i],
+ .additional = rr_rank,
+ };
+ }
+ ++pkt->rrset_count;
+ ++(pkt->sections[pkt->current].count);
+ }
+ return kr_ok();
+}
+
diff --git a/lib/cache/nsec1.c b/lib/cache/nsec1.c
new file mode 100644
index 0000000..74b3d34
--- /dev/null
+++ b/lib/cache/nsec1.c
@@ -0,0 +1,511 @@
+/* Copyright (C) 2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+/** @file
+ * Implementation of NSEC (1) handling. Prototypes in ./impl.h
+ */
+
+#include "lib/cache/impl.h"
+#include "lib/dnssec/nsec.h"
+#include "lib/layer/iterate.h"
+
+
+/** Reconstruct a name into a buffer (assuming length at least KNOT_DNAME_MAXLEN).
+ * \return kr_ok() or error code (<0). */
+static int dname_wire_reconstruct(knot_dname_t *buf, const struct key *k,
+ knot_db_val_t kwz)
+{
+ /* Reconstruct from key: first the ending, then zone name. */
+ int ret = knot_dname_lf2wire(buf, kwz.len, kwz.data);
+ if (ret < 0) {
+ VERBOSE_MSG(NULL, "=> NSEC: LF2wire ret = %d\n", ret);
+ assert(false);
+ return ret;
+ }
+ /* The last written byte is the zero label for root -> overwrite. */
+ knot_dname_t *zone_start = buf + ret - 1;
+ assert(*zone_start == '\0');
+ ret = knot_dname_to_wire(zone_start, k->zname, KNOT_DNAME_MAXLEN - kwz.len);
+ if (ret != k->zlf_len + 1) {
+ assert(false);
+ return ret < 0 ? ret : kr_error(EILSEQ);
+ }
+ return kr_ok();
+}
+
+
+knot_db_val_t key_NSEC1(struct key *k, const knot_dname_t *name, bool add_wildcard)
+{
+ /* we basically need dname_lf with two bytes added
+ * on a correct place within the name (the cut) */
+ int ret;
+ const bool ok = k && name
+ && !(ret = kr_dname_lf(k->buf, name, add_wildcard));
+ if (!ok) {
+ assert(false);
+ return (knot_db_val_t){ NULL, 0 };
+ }
+
+ uint8_t *begin = k->buf + 1 + k->zlf_len; /* one byte after zone's zero */
+ uint8_t *end = k->buf + 1 + k->buf[0]; /* we don't use the final zero in key,
+ * but move it anyway */
+ if (end < begin) {
+ assert(false);
+ return (knot_db_val_t){ NULL, 0 };
+ }
+ int key_len;
+ if (end > begin) {
+ memmove(begin + 2, begin, end - begin);
+ key_len = k->buf[0] + 1;
+ } else {
+ key_len = k->buf[0] + 2;
+ }
+ /* CACHE_KEY_DEF: key == zone's dname_lf + '\0' + '1' + dname_lf
+ * of the name within the zone without the final 0. Iff the latter is empty,
+ * there's no zero to cut and thus the key_len difference.
+ */
+ begin[0] = 0;
+ begin[1] = '1'; /* tag for NSEC1 */
+ k->type = KNOT_RRTYPE_NSEC;
+
+ /*
+ VERBOSE_MSG(NULL, "<> key_NSEC1; name: ");
+ kr_dname_print(name, add_wildcard ? "*." : "" , " ");
+ kr_log_verbose("(zone name LF length: %d; total key length: %d)\n",
+ k->zlf_len, key_len);
+ */
+
+ return (knot_db_val_t){ k->buf + 1, key_len };
+}
+
+
+/** Assuming that k1 < k4, find where k2 is. (Considers DNS wrap-around.)
+ *
+ * \return Intuition: position of k2 among kX.
+ * 0: k2 < k1; 1: k1 == k2; 2: k1 is a prefix of k2 < k4;
+ * 3: k1 < k2 < k4 (and not 2); 4: k2 == k4; 5: k2 > k4
+ * \note k1.data may be NULL, meaning assumption that k1 < k2 and not a prefix
+ * (i.e. return code will be > 2)
+ */
+static int kwz_between(knot_db_val_t k1, knot_db_val_t k2, knot_db_val_t k4)
+{
+ assert(k2.data && k4.data);
+ /* CACHE_KEY_DEF; we need to beware of one key being a prefix of another */
+ int ret_maybe; /**< result, assuming we confirm k2 < k4 */
+ if (k1.data) {
+ const int cmp12 = memcmp(k1.data, k2.data, MIN(k1.len, k2.len));
+ if (cmp12 == 0 && k1.len == k2.len) /* iff k1 == k2 */
+ return 1;
+ if (cmp12 > 0 || (cmp12 == 0 && k1.len > k2.len)) /* iff k1 > k2 */
+ return 0;
+ ret_maybe = cmp12 == 0 ? 2 : 3;
+ } else {
+ ret_maybe = 3;
+ }
+ if (k4.len == 0) { /* wrap-around */
+ return k2.len > 0 ? ret_maybe : 4;
+ } else {
+ const int cmp24 = memcmp(k2.data, k4.data, MIN(k2.len, k4.len));
+ if (cmp24 == 0 && k2.len == k4.len) /* iff k2 == k4 */
+ return 4;
+ if (cmp24 > 0 || (cmp24 == 0 && k2.len > k4.len)) /* iff k2 > k4 */
+ return 5;
+ return ret_maybe;
+ }
+}
+
+
+/** NSEC1 range search.
+ *
+ * \param key Pass output of key_NSEC1(k, ...)
+ * \param value[out] The raw data of the NSEC cache record (optional; consistency checked).
+ * \param exact_match[out] Whether the key was matched exactly or just covered (optional).
+ * \param kwz_low[out] Output the low end of covering NSEC, pointing within DB (optional).
+ * \param kwz_high[in,out] Storage for the high end of covering NSEC (optional).
+ * It's only set if !exact_match.
+ * \param new_ttl[out] New TTL of the NSEC (optional).
+ * \return Error message or NULL.
+ * \note The function itself does *no* bitmap checks, e.g. RFC 6840 sec. 4.
+ */
+static const char * find_leq_NSEC1(struct kr_cache *cache, const struct kr_query *qry,
+ const knot_db_val_t key, const struct key *k, knot_db_val_t *value,
+ bool *exact_match, knot_db_val_t *kwz_low, knot_db_val_t *kwz_high,
+ uint32_t *new_ttl)
+{
+ /* Do the cache operation. */
+ const size_t nwz_off = key_nwz_off(k);
+ if (!key.data || key.len < nwz_off) {
+ assert(false);
+ return "range search ERROR";
+ }
+ knot_db_val_t key_nsec = key;
+ knot_db_val_t val = { NULL, 0 };
+ int ret = cache_op(cache, read_leq, &key_nsec, &val);
+ if (ret < 0) {
+ if (ret == kr_error(ENOENT)) {
+ return "range search miss";
+ } else {
+ assert(false);
+ return "range search ERROR";
+ }
+ }
+ if (value) {
+ *value = val;
+ }
+ /* Check consistency, TTL, rank. */
+ const bool is_exact = (ret == 0);
+ if (exact_match) {
+ *exact_match = is_exact;
+ }
+ const struct entry_h *eh = entry_h_consistent_NSEC(val);
+ if (!eh) {
+ /* This might be just finding something else than NSEC1 entry,
+ * in case we searched before the very first one in the zone. */
+ return "range search found inconsistent entry";
+ }
+ /* Passing just zone name instead of owner, as we don't
+ * have it reconstructed at this point. */
+ int32_t new_ttl_ = get_new_ttl(eh, qry, k->zname, KNOT_RRTYPE_NSEC,
+ qry->timestamp.tv_sec);
+ if (new_ttl_ < 0 || !kr_rank_test(eh->rank, KR_RANK_SECURE)) {
+ return "range search found stale or insecure entry";
+ /* TODO: remove the stale record *and* retry,
+ * in case we haven't run off. Perhaps start by in_zone check. */
+ }
+ if (new_ttl) {
+ *new_ttl = new_ttl_;
+ }
+ if (kwz_low) {
+ *kwz_low = (knot_db_val_t){
+ .data = (uint8_t *)key_nsec.data + nwz_off,
+ .len = key_nsec.len - nwz_off,
+ }; /* CACHE_KEY_DEF */
+ }
+ if (is_exact) {
+ /* Nothing else to do. */
+ return NULL;
+ }
+ /* The NSEC starts strictly before our target name;
+ * now check that it still belongs into that zone. */
+ const bool nsec_in_zone = key_nsec.len >= nwz_off
+ /* CACHE_KEY_DEF */
+ && memcmp(key.data, key_nsec.data, nwz_off) == 0;
+ if (!nsec_in_zone) {
+ return "range search miss (!nsec_in_zone)";
+ }
+ /* We know it starts before sname, so let's check the other end.
+ * 1. construct the key for the next name - kwz_hi. */
+ /* it's *full* name ATM */
+ const knot_rdata_t *next = (const knot_rdata_t *)
+ (eh->data + KR_CACHE_RR_COUNT_SIZE);
+ if (KR_CACHE_RR_COUNT_SIZE != 2 || get_uint16(eh->data) == 0) {
+ assert(false);
+ return "ERROR";
+ /* TODO: more checks? */
+ }
+ /*
+ WITH_VERBOSE {
+ VERBOSE_MSG(qry, "=> NSEC: next name: ");
+ kr_dname_print(next, "", "\n");
+ }
+ */
+ knot_dname_t ch_buf[KNOT_DNAME_MAXLEN];
+ knot_dname_t *chs = kwz_high ? kwz_high->data : ch_buf;
+ if (!chs) {
+ assert(false);
+ return "EINVAL";
+ }
+ {
+ /* Lower-case chs; see also RFC 6840 5.1.
+ * LATER(optim.): we do lots of copying etc. */
+ knot_dname_t lower_buf[KNOT_DNAME_MAXLEN];
+ ret = knot_dname_to_wire(lower_buf, next->data,
+ MIN(next->len, KNOT_DNAME_MAXLEN));
+ if (ret < 0) { /* _ESPACE */
+ return "range search found record with incorrect contents";
+ }
+ knot_dname_to_lower(lower_buf);
+ ret = kr_dname_lf(chs, lower_buf, false);
+ }
+ if (ret) {
+ assert(false);
+ return "ERROR";
+ }
+ knot_db_val_t kwz_hi = { /* skip the zone name */
+ .data = chs + 1 + k->zlf_len,
+ .len = chs[0] - k->zlf_len,
+ };
+ assert((ssize_t)(kwz_hi.len) >= 0);
+ /* 2. do the actual range check. */
+ const knot_db_val_t kwz_sname = {
+ .data = (void *)/*const-cast*/(k->buf + 1 + nwz_off),
+ .len = k->buf[0] - k->zlf_len,
+ };
+ assert((ssize_t)(kwz_sname.len) >= 0);
+ bool covers = /* we know for sure that the low end is before kwz_sname */
+ 3 == kwz_between((knot_db_val_t){ NULL, 0 }, kwz_sname, kwz_hi);
+ if (!covers) {
+ return "range search miss (!covers)";
+ }
+ if (kwz_high) {
+ *kwz_high = kwz_hi;
+ }
+ return NULL;
+}
+
+
+int nsec1_encloser(struct key *k, struct answer *ans,
+ const int sname_labels, int *clencl_labels,
+ knot_db_val_t *cover_low_kwz, knot_db_val_t *cover_hi_kwz,
+ const struct kr_query *qry, struct kr_cache *cache)
+{
+ static const int ESKIP = ABS(ENOENT);
+ /* Basic sanity check. */
+ const bool ok = k && ans && clencl_labels && cover_low_kwz && cover_hi_kwz
+ && qry && cache;
+ if (!ok) {
+ assert(!EINVAL);
+ return kr_error(EINVAL);
+ }
+
+ /* Find a previous-or-equal name+NSEC in cache covering the QNAME,
+ * checking TTL etc. */
+ knot_db_val_t key = key_NSEC1(k, qry->sname, false);
+ knot_db_val_t val = { NULL, 0 };
+ bool exact_match;
+ uint32_t new_ttl;
+ const char *err = find_leq_NSEC1(cache, qry, key, k, &val,
+ &exact_match, cover_low_kwz, cover_hi_kwz, &new_ttl);
+ if (err) {
+ VERBOSE_MSG(qry, "=> NSEC sname: %s\n", err);
+ return ESKIP;
+ }
+
+ /* Get owner name of the record. */
+ const knot_dname_t *owner;
+ knot_dname_t owner_buf[KNOT_DNAME_MAXLEN];
+ if (exact_match) {
+ owner = qry->sname;
+ } else {
+ int ret = dname_wire_reconstruct(owner_buf, k, *cover_low_kwz);
+ if (unlikely(ret)) return ESKIP;
+ owner = owner_buf;
+ }
+ /* Basic checks OK -> materialize data. */
+ {
+ const struct entry_h *nsec_eh = val.data;
+ int ret = entry2answer(ans, AR_NSEC, nsec_eh, knot_db_val_bound(val),
+ owner, KNOT_RRTYPE_NSEC, new_ttl);
+ if (ret) return kr_error(ret);
+ }
+
+ /* Final checks, split for matching vs. covering our sname. */
+ const knot_rrset_t *nsec_rr = ans->rrsets[AR_NSEC].set.rr;
+ const uint8_t *bm = knot_nsec_bitmap(nsec_rr->rrs.rdata);
+ uint16_t bm_size = knot_nsec_bitmap_len(nsec_rr->rrs.rdata);
+ assert(bm);
+
+ if (exact_match) {
+ if (kr_nsec_bitmap_nodata_check(bm, bm_size, qry->stype, nsec_rr->owner) != 0) {
+ VERBOSE_MSG(qry,
+ "=> NSEC sname: match but failed type check\n");
+ return ESKIP;
+ }
+ /* NODATA proven; just need to add SOA+RRSIG later */
+ VERBOSE_MSG(qry, "=> NSEC sname: match proved NODATA, new TTL %d\n",
+ new_ttl);
+ ans->rcode = PKT_NODATA;
+ return kr_ok();
+ } /* else */
+
+ /* Inexact match. First check if sname is delegated by that NSEC. */
+ const int nsec_matched = knot_dname_matched_labels(nsec_rr->owner, qry->sname);
+ const bool is_sub = nsec_matched == knot_dname_labels(nsec_rr->owner, NULL);
+ if (is_sub && kr_nsec_children_in_zone_check(bm, bm_size) != 0) {
+ VERBOSE_MSG(qry, "=> NSEC sname: covered but delegated (or error)\n");
+ return ESKIP;
+ }
+ /* NXDOMAIN proven *except* for wildcards. */
+ WITH_VERBOSE(qry) {
+ auto_free char *owner_str = kr_dname_text(nsec_rr->owner),
+ *next_str = kr_dname_text(knot_nsec_next(nsec_rr->rrs.rdata));
+ VERBOSE_MSG(qry, "=> NSEC sname: covered by: %s -> %s, new TTL %d\n",
+ owner_str, next_str, new_ttl);
+ }
+
+ /* Find label count of the closest encloser.
+ * Both endpoints in an NSEC do exist (though possibly in a child zone)
+ * and any prefixes of those names as well (empty non-terminals),
+ * but nothing else exists inside this "triangle".
+ *
+ * Note that we have to lower-case the next name for comparison,
+ * even though we have canonicalized NSEC already; see RFC 6840 5.1.
+ * LATER(optim.): it might be faster to use the LFs we already have.
+ */
+ knot_dname_t next[KNOT_DNAME_MAXLEN];
+ int ret = knot_dname_to_wire(next, knot_nsec_next(nsec_rr->rrs.rdata), sizeof(next));
+ if (ret < 0) {
+ assert(!ret);
+ return kr_error(ret);
+ }
+ knot_dname_to_lower(next);
+ *clencl_labels = MAX(
+ nsec_matched,
+ knot_dname_matched_labels(qry->sname, next)
+ );
+
+ /* Empty non-terminals don't need to have
+ * a matching NSEC record. */
+ if (sname_labels == *clencl_labels) {
+ ans->rcode = PKT_NODATA;
+ VERBOSE_MSG(qry,
+ "=> NSEC sname: empty non-terminal by the same RR\n");
+ } else {
+ ans->rcode = PKT_NXDOMAIN;
+ }
+ return kr_ok();
+}
+
+/** Verify non-existence after kwz_between() call. */
+static bool nonexistence_ok(int cmp, const knot_rrset_t *rrs)
+{
+ if (cmp == 3) {
+ return true;
+ }
+ if (cmp != 2) {
+ return false;
+ }
+ const uint8_t *bm = knot_nsec_bitmap(rrs->rrs.rdata);
+ uint16_t bm_size = knot_nsec_bitmap_len(rrs->rrs.rdata);
+ return kr_nsec_children_in_zone_check(bm, bm_size) != 0;
+}
+
+int nsec1_src_synth(struct key *k, struct answer *ans, const knot_dname_t *clencl_name,
+ knot_db_val_t cover_low_kwz, knot_db_val_t cover_hi_kwz,
+ const struct kr_query *qry, struct kr_cache *cache)
+{
+ /* Construct key for the source of synthesis. */
+ knot_db_val_t key = key_NSEC1(k, clencl_name, true);
+ const size_t nwz_off = key_nwz_off(k);
+ if (!key.data || key.len < nwz_off) {
+ assert(false);
+ return kr_error(1);
+ }
+ /* Check if our sname-covering NSEC also covers/matches SS. */
+ knot_db_val_t kwz = {
+ .data = (uint8_t *)key.data + nwz_off,
+ .len = key.len - nwz_off,
+ };
+ assert((ssize_t)(kwz.len) >= 0);
+ const int cmp = kwz_between(cover_low_kwz, kwz, cover_hi_kwz);
+ if (nonexistence_ok(cmp, ans->rrsets[AR_NSEC].set.rr)) {
+ VERBOSE_MSG(qry, "=> NSEC wildcard: covered by the same RR\n");
+ return AR_SOA;
+ }
+ const knot_rrset_t *nsec_rr = NULL; /**< the wildcard proof NSEC */
+ bool exact_match; /**< whether it matches the source of synthesis */
+ if (cmp == 1) {
+ exact_match = true;
+ nsec_rr = ans->rrsets[AR_NSEC].set.rr;
+ } else {
+ /* Try to find the NSEC for SS. */
+ knot_db_val_t val = { NULL, 0 };
+ knot_db_val_t wild_low_kwz = { NULL, 0 };
+ uint32_t new_ttl;
+ const char *err = find_leq_NSEC1(cache, qry, key, k, &val,
+ &exact_match, &wild_low_kwz, NULL, &new_ttl);
+ if (err) {
+ VERBOSE_MSG(qry, "=> NSEC wildcard: %s\n", err);
+ return kr_ok();
+ }
+ /* Materialize the record into answer (speculatively). */
+ knot_dname_t owner[KNOT_DNAME_MAXLEN];
+ int ret = dname_wire_reconstruct(owner, k, wild_low_kwz);
+ if (ret) return kr_error(ret);
+ const struct entry_h *nsec_eh = val.data;
+ ret = entry2answer(ans, AR_WILD, nsec_eh, knot_db_val_bound(val),
+ owner, KNOT_RRTYPE_NSEC, new_ttl);
+ if (ret) return kr_error(ret);
+ nsec_rr = ans->rrsets[AR_WILD].set.rr;
+ }
+
+ assert(nsec_rr);
+ const uint32_t new_ttl_log =
+ kr_verbose_status ? nsec_rr->ttl : -1;
+ const uint8_t *bm = knot_nsec_bitmap(nsec_rr->rrs.rdata);
+ uint16_t bm_size = knot_nsec_bitmap_len(nsec_rr->rrs.rdata);
+ int ret;
+ struct answer_rrset * const arw = &ans->rrsets[AR_WILD];
+ if (!bm) {
+ assert(false);
+ ret = kr_error(1);
+ goto clean_wild;
+ }
+ if (!exact_match) {
+ /* Finish verification that the source of synthesis doesn't exist. */
+ const int nsec_matched =
+ knot_dname_matched_labels(nsec_rr->owner, clencl_name);
+ /* we don't need to use the full source of synthesis ^ */
+ const bool is_sub =
+ nsec_matched == knot_dname_labels(nsec_rr->owner, NULL);
+ if (is_sub && kr_nsec_children_in_zone_check(bm, bm_size) != 0) {
+ VERBOSE_MSG(qry,
+ "=> NSEC wildcard: covered but delegated (or error)\n");
+ ret = kr_ok();
+ goto clean_wild;
+ }
+ /* We have a record proving wildcard non-existence. */
+ WITH_VERBOSE(qry) {
+ auto_free char *owner_str = kr_dname_text(nsec_rr->owner),
+ *next_str = kr_dname_text(knot_nsec_next(nsec_rr->rrs.rdata));
+ VERBOSE_MSG(qry, "=> NSEC wildcard: covered by: %s -> %s, new TTL %d\n",
+ owner_str, next_str, new_ttl_log);
+ }
+ return AR_SOA;
+ }
+
+ /* The wildcard exists. Find if it's NODATA - check type bitmap. */
+ if (kr_nsec_bitmap_nodata_check(bm, bm_size, qry->stype, nsec_rr->owner) == 0) {
+ /* NODATA proven; just need to add SOA+RRSIG later */
+ WITH_VERBOSE(qry) {
+ const char *msg_start = "=> NSEC wildcard: match proved NODATA";
+ if (arw->set.rr) {
+ auto_free char *owner_str = kr_dname_text(nsec_rr->owner);
+ VERBOSE_MSG(qry, "%s: %s, new TTL %d\n",
+ msg_start, owner_str, new_ttl_log);
+ } else {
+ /* don't repeat the RR if it's the same */
+ VERBOSE_MSG(qry, "%s, by the same RR\n", msg_start);
+ }
+ }
+ ans->rcode = PKT_NODATA;
+ return AR_SOA;
+
+ } /* else */
+ /* The data probably exists -> don't add this NSEC
+ * and (later) try to find the real wildcard data */
+ VERBOSE_MSG(qry, "=> NSEC wildcard: should exist (or error)\n");
+ ans->rcode = PKT_NOERROR;
+ ret = kr_ok();
+clean_wild:
+ if (arw->set.rr) { /* we may have matched AR_NSEC */
+ knot_rrset_free(arw->set.rr, ans->mm);
+ arw->set.rr = NULL;
+ knot_rdataset_clear(&arw->sig_rds, ans->mm);
+ }
+ return ret;
+}
+
diff --git a/lib/cache/nsec3.c b/lib/cache/nsec3.c
new file mode 100644
index 0000000..d2ae72a
--- /dev/null
+++ b/lib/cache/nsec3.c
@@ -0,0 +1,501 @@
+/* Copyright (C) 2018 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+/** @file
+ * Implementation of NSEC3 handling. Prototypes in ./impl.h
+ */
+
+#include "lib/cache/impl.h"
+
+#include "contrib/base32hex.h"
+#include "lib/dnssec/nsec.h"
+#include "lib/layer/iterate.h"
+
+#include <libknot/rrtype/nsec3.h>
+
+static const knot_db_val_t VAL_EMPTY = { NULL, 0 };
+
+/** Common part: write all but the NSEC3 hash. */
+static knot_db_val_t key_NSEC3_common(struct key *k, const knot_dname_t *zname,
+ const nsec_p_hash_t nsec_p_hash)
+{
+ int ret;
+ const bool ok = k && zname
+ && !(ret = kr_dname_lf(k->buf, zname, false));
+ if (!ok) {
+ assert(false);
+ return VAL_EMPTY;
+ }
+
+ /* CACHE_KEY_DEF: key == zone's dname_lf + '\0' + '3' + nsec_p hash (4B)
+ * + NSEC3 hash (20B == NSEC3_HASH_LEN binary!)
+ * LATER(optim.) nsec_p hash: perhaps 2B would give a sufficient probability
+ * of avoiding collisions.
+ */
+ uint8_t *begin = k->buf + 1 + k->zlf_len; /* one byte after zone's zero */
+ begin[0] = 0;
+ begin[1] = '3'; /* tag for NSEC3 */
+ k->type = KNOT_RRTYPE_NSEC3;
+ memcpy(begin + 2, &nsec_p_hash, sizeof(nsec_p_hash));
+ return (knot_db_val_t){
+ .data = k->buf + 1,
+ .len = begin + 2 + sizeof(nsec_p_hash) - (k->buf + 1),
+ };
+}
+
+knot_db_val_t key_NSEC3(struct key *k, const knot_dname_t *nsec3_name,
+ const nsec_p_hash_t nsec_p_hash)
+{
+ knot_db_val_t val = key_NSEC3_common(k, nsec3_name /*only zname required*/,
+ nsec_p_hash);
+ if (!val.data) return val;
+ int len = base32hex_decode(nsec3_name + 1, nsec3_name[0],
+ knot_db_val_bound(val), KR_CACHE_KEY_MAXLEN - val.len);
+ if (len != NSEC3_HASH_LEN) {
+ return VAL_EMPTY;
+ }
+ val.len += len;
+ return val;
+}
+
+/** Construct a string key for for NSEC3 predecessor-search, from an non-NSEC3 name.
+ * \note k->zlf_len and k->zname are assumed to have been correctly set */
+static knot_db_val_t key_NSEC3_name(struct key *k, const knot_dname_t *name,
+ const bool add_wildcard, const struct nsec_p *nsec_p)
+{
+ bool ok = k && name && nsec_p && nsec_p->raw;
+ if (!ok) return VAL_EMPTY;
+ knot_db_val_t val = key_NSEC3_common(k, k->zname, nsec_p->hash);
+ if (!val.data) return val;
+
+ /* Make `name` point to correctly wildcarded owner name. */
+ uint8_t buf[KNOT_DNAME_MAXLEN];
+ int name_len;
+ if (add_wildcard) {
+ buf[0] = '\1';
+ buf[1] = '*';
+ name_len = knot_dname_to_wire(buf + 2, name, sizeof(buf) - 2);
+ if (name_len < 0) return VAL_EMPTY; /* wants wildcard but doesn't fit */
+ name = buf;
+ name_len += 2;
+ } else {
+ name_len = knot_dname_size(name);
+ }
+ /* Append the NSEC3 hash. */
+ const dnssec_binary_t dname = {
+ .size = name_len,
+ .data = (uint8_t *)/*const-cast*/name,
+ };
+
+ #if 0 // LATER(optim.): this requires a patched libdnssec - tries to realloc()
+ dnssec_binary_t hash = {
+ .size = KR_CACHE_KEY_MAXLEN - val.len,
+ .data = val.data + val.len,
+ };
+ int ret = dnssec_nsec3_hash(&dname, &nsec_p->libknot, &hash);
+ if (ret != DNSSEC_EOK) return VAL_EMPTY;
+ assert(hash.size == NSEC3_HASH_LEN);
+
+ #else
+ dnssec_binary_t hash = { .size = 0, .data = NULL };
+ int ret = dnssec_nsec3_hash(&dname, &nsec_p->libknot, &hash);
+ if (ret != DNSSEC_EOK) return VAL_EMPTY;
+ if (hash.size != NSEC3_HASH_LEN || !hash.data) {
+ assert(false);
+ return VAL_EMPTY;
+ }
+ memcpy(knot_db_val_bound(val), hash.data, NSEC3_HASH_LEN);
+ free(hash.data);
+ #endif
+
+ val.len += hash.size;
+ return val;
+}
+
+/** Return h1 < h2, semantically on NSEC3 hashes. */
+static inline bool nsec3_hash_ordered(const uint8_t *h1, const uint8_t *h2)
+{
+ return memcmp(h1, h2, NSEC3_HASH_LEN) < 0;
+}
+
+/** NSEC3 range search.
+ *
+ * \param key Pass output of key_NSEC3(k, ...)
+ * \param nsec_p Restrict to this NSEC3 parameter-set.
+ * \param value[out] The raw data of the NSEC3 cache record (optional; consistency checked).
+ * \param exact_match[out] Whether the key was matched exactly or just covered (optional).
+ * \param hash_low[out] Output the low end hash of covering NSEC3, pointing within DB (optional).
+ * \param new_ttl[out] New TTL of the NSEC3 (optional).
+ * \return Error message or NULL.
+ * \note The function itself does *no* bitmap checks, e.g. RFC 6840 sec. 4.
+ */
+static const char * find_leq_NSEC3(struct kr_cache *cache, const struct kr_query *qry,
+ const knot_db_val_t key, const struct key *k, const struct nsec_p *nsec_p,
+ knot_db_val_t *value, bool *exact_match, const uint8_t **hash_low,
+ uint32_t *new_ttl)
+{
+ /* Do the cache operation. */
+ const size_t hash_off = key_nsec3_hash_off(k);
+ if (!key.data || key.len < hash_off) {
+ assert(false);
+ return "range search ERROR";
+ }
+ knot_db_val_t key_found = key;
+ knot_db_val_t val = { NULL, 0 };
+ int ret = cache_op(cache, read_leq, &key_found, &val);
+ /* ^^ LATER(optim.): incrementing key and doing less-than search
+ * would probably be slightly more efficient with LMDB,
+ * but the code complexity would grow considerably. */
+ if (ret < 0) {
+ if (ret == kr_error(ENOENT)) {
+ return "range search miss";
+ } else {
+ assert(false);
+ return "range search ERROR";
+ }
+ }
+ if (value) {
+ *value = val;
+ }
+ /* Check consistency, TTL, rank. */
+ const bool is_exact = (ret == 0);
+ if (exact_match) {
+ *exact_match = is_exact;
+ }
+ const struct entry_h *eh = entry_h_consistent_NSEC(val);
+ if (!eh) {
+ /* This might be just finding something else than NSEC3 entry,
+ * in case we searched before the very first one in the zone. */
+ return "range search found inconsistent entry";
+ }
+ /* Passing just zone name instead of owner. */
+ int32_t new_ttl_ = get_new_ttl(eh, qry, k->zname, KNOT_RRTYPE_NSEC3,
+ qry->timestamp.tv_sec);
+ if (new_ttl_ < 0 || !kr_rank_test(eh->rank, KR_RANK_SECURE)) {
+ return "range search found stale or insecure entry";
+ /* TODO: remove the stale record *and* retry,
+ * in case we haven't run off. Perhaps start by in_zone check. */
+ }
+ if (new_ttl) {
+ *new_ttl = new_ttl_;
+ }
+ if (hash_low) {
+ *hash_low = (uint8_t *)key_found.data + hash_off;
+ }
+ if (is_exact) {
+ /* Nothing else to do. */
+ return NULL;
+ }
+ /* The NSEC3 starts strictly before our target name;
+ * now check that it still belongs into that zone and chain. */
+ const uint8_t *nsec_p_raw = eh->data + KR_CACHE_RR_COUNT_SIZE
+ + 2 /* RDLENGTH from rfc1034 */;
+ const int nsec_p_len = nsec_p_rdlen(nsec_p_raw);
+ const bool same_chain = key_found.len == hash_off + NSEC3_HASH_LEN
+ /* CACHE_KEY_DEF */
+ && memcmp(key.data, key_found.data, hash_off) == 0
+ /* exact comparison of NSEC3 parameters */
+ && nsec_p_len == nsec_p_rdlen(nsec_p->raw)
+ && memcmp(nsec_p_raw, nsec_p->raw, nsec_p_len) == 0;
+ if (!same_chain) {
+ return "range search miss (!same_chain)";
+ }
+ /* We know it starts before sname, so let's check the other end.
+ * A. find the next hash and check its length. */
+ if (KR_CACHE_RR_COUNT_SIZE != 2 || get_uint16(eh->data) == 0) {
+ assert(false);
+ return "ERROR";
+ /* TODO: more checks? Also, `next` computation is kinda messy. */
+ }
+ const uint8_t *hash_next = nsec_p_raw + nsec_p_len
+ + sizeof(uint8_t) /* hash length from rfc5155 */;
+ if (hash_next[-1] != NSEC3_HASH_LEN) {
+ return "unexpected next hash length";
+ }
+ /* B. do the actual range check. */
+ const uint8_t * const hash_searched = (uint8_t *)key.data + hash_off;
+ bool covers = /* we know for sure that the low end is before the searched name */
+ nsec3_hash_ordered(hash_searched, hash_next)
+ /* and the wrap-around case */
+ || nsec3_hash_ordered(hash_next, (const uint8_t *)key_found.data + hash_off);
+ if (!covers) {
+ return "range search miss (!covers)";
+ }
+ return NULL;
+}
+
+/** Extract textual representation of NSEC3 hash from a cache key.
+ * \param text must have length at least NSEC3_HASH_TXT_LEN+1 (will get 0-terminated). */
+static void key_NSEC3_hash2text(const knot_db_val_t key, char *text)
+{
+ assert(key.data && key.len > NSEC3_HASH_LEN);
+ const uint8_t *hash_raw = knot_db_val_bound(key) - NSEC3_HASH_LEN;
+ /* CACHE_KEY_DEF ^^ */
+ int len = base32hex_encode(hash_raw, NSEC3_HASH_LEN, (uint8_t *)text,
+ NSEC3_HASH_TXT_LEN);
+ assert(len == NSEC3_HASH_TXT_LEN); (void)len;
+ text[NSEC3_HASH_TXT_LEN] = '\0';
+}
+
+/** Reconstruct a name into a buffer (assuming length at least KNOT_DNAME_MAXLEN).
+ * \return kr_ok() or error code (<0). */
+static int dname_wire_reconstruct(knot_dname_t *buf, const knot_dname_t *zname,
+ const uint8_t *hash_raw)
+{
+ int len = base32hex_encode(hash_raw, NSEC3_HASH_LEN, buf + 1, NSEC3_HASH_TXT_LEN);
+ if (len != NSEC3_HASH_TXT_LEN) {
+ assert(false);
+ return kr_error(EINVAL);
+ }
+ buf[0] = len;
+ int ret = knot_dname_to_wire(buf + 1 + len, zname, KNOT_DNAME_MAXLEN - 1 - len);
+ return ret < 0 ? kr_error(ret) : kr_ok();
+}
+
+static void nsec3_hash2text(const knot_dname_t *owner, char *text)
+{
+ assert(owner[0] == NSEC3_HASH_TXT_LEN);
+ memcpy(text, owner + 1, MIN(owner[0], NSEC3_HASH_TXT_LEN));
+ text[NSEC3_HASH_TXT_LEN] = '\0';
+}
+
+int nsec3_encloser(struct key *k, struct answer *ans,
+ const int sname_labels, int *clencl_labels,
+ const struct kr_query *qry, struct kr_cache *cache)
+{
+ static const int ESKIP = ABS(ENOENT);
+ /* Basic sanity check. */
+ const bool ok = k && k->zname && ans && clencl_labels
+ && qry && cache;
+ if (!ok) {
+ assert(!EINVAL);
+ return kr_error(EINVAL);
+ }
+
+ /*** Find the closest encloser - cycle: name starting at sname,
+ * proceeding while longer than zname, shortening by one label on step.
+ * We need a pair where a name doesn't exist *and* its parent does. */
+ /* LATER(optim.): perhaps iterate in the other order - that
+ * should help significantly against deep queries where we have
+ * a shallow proof in the cache. We can also optimize by using
+ * only exact search unless we had a match in the previous iteration. */
+ const int zname_labels = knot_dname_labels(k->zname, NULL);
+ int last_nxproven_labels = -1;
+ const knot_dname_t *name = qry->sname;
+ for (int name_labels = sname_labels; name_labels >= zname_labels;
+ --name_labels, name += 1 + name[0]) {
+ /* Find a previous-or-equal NSEC3 in cache covering the name,
+ * checking TTL etc. */
+ const knot_db_val_t key = key_NSEC3_name(k, name, false, &ans->nsec_p);
+ if (!key.data) continue;
+ WITH_VERBOSE(qry) {
+ char hash_txt[NSEC3_HASH_TXT_LEN + 1];
+ key_NSEC3_hash2text(key, hash_txt);
+ VERBOSE_MSG(qry, "=> NSEC3 depth %d: hash %s\n",
+ name_labels - zname_labels, hash_txt);
+ }
+ knot_db_val_t val = { NULL, 0 };
+ bool exact_match;
+ uint32_t new_ttl;
+ const uint8_t *hash_low;
+ const char *err = find_leq_NSEC3(cache, qry, key, k, &ans->nsec_p, &val,
+ &exact_match, &hash_low, &new_ttl);
+ if (err) {
+ WITH_VERBOSE(qry) {
+ auto_free char *name_str = kr_dname_text(name);
+ VERBOSE_MSG(qry, "=> NSEC3 encloser error for %s: %s\n",
+ name_str, err);
+ }
+ continue;
+ }
+ if (exact_match && name_labels != sname_labels
+ && name_labels + 1 != last_nxproven_labels) {
+ /* This name exists (checked rank and TTL), and it's
+ * neither of the two interesting cases, so we do not
+ * keep searching for non-existence above this name. */
+ VERBOSE_MSG(qry,
+ "=> NSEC3 encloser: only found existence of an ancestor\n");
+ return ESKIP;
+ }
+ /* Optimization: avoid the rest of the last iteration if pointless. */
+ if (!exact_match && name_labels == zname_labels
+ && last_nxproven_labels != name_labels + 1) {
+ break;
+ }
+
+ /* Basic checks OK -> materialize data, cleaning any previous
+ * records on that answer index (unsuccessful attempts). */
+ knot_dname_t owner[KNOT_DNAME_MAXLEN];
+ {
+ int ret = dname_wire_reconstruct(owner, k->zname, hash_low);
+ if (unlikely(ret)) continue;
+ }
+ const int ans_id = (exact_match && name_labels + 1 == last_nxproven_labels)
+ ? AR_CPE : AR_NSEC;
+ {
+ const struct entry_h *nsec_eh = val.data;
+ memset(&ans->rrsets[ans_id], 0, sizeof(ans->rrsets[ans_id]));
+ int ret = entry2answer(ans, ans_id, nsec_eh, knot_db_val_bound(val),
+ owner, KNOT_RRTYPE_NSEC3, new_ttl);
+ if (ret) return kr_error(ret);
+ }
+
+ if (!exact_match) {
+ /* Non-existence proven, but we don't know if `name`
+ * is the next closer name.
+ * Note: we don't need to check for the sname being
+ * delegated away by this record, as with NSEC3 only
+ * *exact* match on an ancestor could do that. */
+ last_nxproven_labels = name_labels;
+ WITH_VERBOSE(qry) {
+ char hash_low_txt[NSEC3_HASH_TXT_LEN + 1];
+ nsec3_hash2text(owner, hash_low_txt);
+ VERBOSE_MSG(qry,
+ "=> NSEC3 depth %d: covered by %s -> TODO, new TTL %d\n",
+ name_labels - zname_labels, hash_low_txt, new_ttl);
+ }
+ continue;
+ }
+
+ /* Exactly matched NSEC3: two cases, one after another. */
+ const knot_rrset_t *nsec_rr = ans->rrsets[ans_id].set.rr;
+ const uint8_t *bm = knot_nsec3_bitmap(nsec_rr->rrs.rdata);
+ uint16_t bm_size = knot_nsec3_bitmap_len(nsec_rr->rrs.rdata);
+ assert(bm);
+ if (name_labels == sname_labels) {
+ if (kr_nsec_bitmap_nodata_check(bm, bm_size, qry->stype,
+ nsec_rr->owner) != 0) {
+ VERBOSE_MSG(qry,
+ "=> NSEC3 sname: match but failed type check\n");
+ return ESKIP;
+ }
+ /* NODATA proven; just need to add SOA+RRSIG later */
+ VERBOSE_MSG(qry,
+ "=> NSEC3 sname: match proved NODATA, new TTL %d\n",
+ new_ttl);
+ ans->rcode = PKT_NODATA;
+ return kr_ok();
+
+ } /* else */
+
+ assert(name_labels + 1 == last_nxproven_labels);
+ if (kr_nsec_children_in_zone_check(bm, bm_size) != 0) {
+ VERBOSE_MSG(qry,
+ "=> NSEC3 encloser: found but delegated (or error)\n");
+ return ESKIP;
+ }
+ /* NXDOMAIN proven *except* for wildcards. */
+ WITH_VERBOSE(qry) {
+ auto_free char *name_str = kr_dname_text(name);
+ VERBOSE_MSG(qry,
+ "=> NSEC3 encloser: confirmed as %s, new TTL %d\n",
+ name_str, new_ttl);
+ }
+ *clencl_labels = name_labels;
+ ans->rcode = PKT_NXDOMAIN;
+ /* Avoid repeated NSEC3 - remove either if the hashes match.
+ * This is very unlikely in larger zones: 1/size (per attempt).
+ * Well, deduplication would happen anyway when the answer
+ * from cache is read by kresd (internally). */
+ if (unlikely(0 == memcmp(ans->rrsets[AR_NSEC].set.rr->owner + 1,
+ ans->rrsets[AR_CPE ].set.rr->owner + 1,
+ NSEC3_HASH_LEN))) {
+ memset(&ans->rrsets[AR_CPE], 0, sizeof(ans->rrsets[AR_CPE]));
+ /* LATER(optim.): perhaps check this earlier and avoid some work? */
+ }
+ return kr_ok();
+ }
+
+ /* We've ran out of options. */
+ if (last_nxproven_labels > 0) {
+ /* We didn't manage to prove existence of the closest encloser,
+ * meaning the only chance left is a *positive* wildcard record. */
+ *clencl_labels = last_nxproven_labels - 1;
+ ans->rcode = PKT_NXDOMAIN;
+ /* FIXME: review */
+ }
+ return ESKIP;
+}
+
+int nsec3_src_synth(struct key *k, struct answer *ans, const knot_dname_t *clencl_name,
+ const struct kr_query *qry, struct kr_cache *cache)
+{
+ /* Find a previous-or-equal NSEC3 in cache covering or matching
+ * the source of synthesis, checking TTL etc. */
+ const knot_db_val_t key = key_NSEC3_name(k, clencl_name, true, &ans->nsec_p);
+ if (!key.data) return kr_error(1);
+ WITH_VERBOSE(qry) {
+ char hash_txt[NSEC3_HASH_TXT_LEN + 1];
+ key_NSEC3_hash2text(key, hash_txt);
+ VERBOSE_MSG(qry, "=> NSEC3 wildcard: hash %s\n", hash_txt);
+ }
+ knot_db_val_t val = { NULL, 0 };
+ bool exact_match;
+ uint32_t new_ttl;
+ const uint8_t *hash_low;
+ const char *err = find_leq_NSEC3(cache, qry, key, k, &ans->nsec_p, &val,
+ &exact_match, &hash_low, &new_ttl);
+ if (err) {
+ VERBOSE_MSG(qry, "=> NSEC3 wildcard: %s\n", err);
+ return kr_ok();
+ }
+
+ /* LATER(optim.): avoid duplicities in answer. */
+
+ /* Basic checks OK -> materialize the data (speculatively). */
+ knot_dname_t owner[KNOT_DNAME_MAXLEN];
+ {
+ int ret = dname_wire_reconstruct(owner, k->zname, hash_low);
+ if (unlikely(ret)) return kr_ok();
+ const struct entry_h *nsec_eh = val.data;
+ ret = entry2answer(ans, AR_WILD, nsec_eh, knot_db_val_bound(val),
+ owner, KNOT_RRTYPE_NSEC3, new_ttl);
+ if (ret) return kr_error(ret);
+ }
+ const knot_rrset_t *nsec_rr = ans->rrsets[AR_WILD].set.rr;
+
+ if (!exact_match) {
+ /* The record proves wildcard non-existence. */
+ WITH_VERBOSE(qry) {
+ char hash_low_txt[NSEC3_HASH_TXT_LEN + 1];
+ nsec3_hash2text(owner, hash_low_txt);
+ VERBOSE_MSG(qry,
+ "=> NSEC3 wildcard: covered by %s -> TODO, new TTL %d\n",
+ hash_low_txt, new_ttl);
+ }
+ return AR_SOA;
+ }
+
+ /* The wildcard exists. Find if it's NODATA - check type bitmap. */
+ const uint8_t *bm = knot_nsec3_bitmap(nsec_rr->rrs.rdata);
+ uint16_t bm_size = knot_nsec3_bitmap_len(nsec_rr->rrs.rdata);
+ assert(bm);
+ if (kr_nsec_bitmap_nodata_check(bm, bm_size, qry->stype, nsec_rr->owner) == 0) {
+ /* NODATA proven; just need to add SOA+RRSIG later */
+ VERBOSE_MSG(qry, "=> NSEC3 wildcard: match proved NODATA, new TTL %d\n",
+ new_ttl);
+ ans->rcode = PKT_NODATA;
+ return AR_SOA;
+
+ } /* else */
+ /* The data probably exists -> don't add this NSEC3
+ * and (later) try to find the real wildcard data */
+ VERBOSE_MSG(qry, "=> NSEC3 wildcard: should exist (or error)\n");
+ ans->rcode = PKT_NOERROR;
+ memset(&ans->rrsets[AR_WILD], 0, sizeof(ans->rrsets[AR_WILD]));
+ return kr_ok();
+}
+
diff --git a/lib/cache/peek.c b/lib/cache/peek.c
new file mode 100644
index 0000000..1af1627
--- /dev/null
+++ b/lib/cache/peek.c
@@ -0,0 +1,724 @@
+/* Copyright (C) 2018 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include "lib/cache/impl.h"
+
+#include "lib/dnssec/ta.h"
+#include "lib/layer/iterate.h"
+
+/* The whole file only exports peek_nosync().
+ * Forwards for larger chunks of code: */
+
+static int found_exact_hit(kr_layer_t *ctx, knot_pkt_t *pkt, knot_db_val_t val,
+ uint8_t lowest_rank);
+static int closest_NS(struct kr_cache *cache, struct key *k, entry_list_t el,
+ struct kr_query *qry, bool only_NS, bool is_DS);
+static int answer_simple_hit(kr_layer_t *ctx, knot_pkt_t *pkt, uint16_t type,
+ const struct entry_h *eh, const void *eh_bound, uint32_t new_ttl);
+static int try_wild(struct key *k, struct answer *ans, const knot_dname_t *clencl_name,
+ uint16_t type, uint8_t lowest_rank,
+ const struct kr_query *qry, struct kr_cache *cache);
+
+static int peek_encloser(
+ struct key *k, struct answer *ans, int sname_labels,
+ uint8_t lowest_rank, const struct kr_query *qry, struct kr_cache *cache);
+
+
+static int nsec_p_init(struct nsec_p *nsec_p, knot_db_val_t nsec_p_entry, bool with_knot)
+{
+ const size_t stamp_len = sizeof(uint32_t);
+ if (nsec_p_entry.len <= stamp_len) { /* plain NSEC if equal */
+ nsec_p->raw = NULL;
+ nsec_p->hash = 0;
+ return kr_ok();
+ }
+ nsec_p->raw = (uint8_t *)nsec_p_entry.data + stamp_len;
+ nsec_p->hash = nsec_p_mkHash(nsec_p->raw);
+ if (!with_knot) return kr_ok();
+ /* Convert NSEC3 params to another format. */
+ const dnssec_binary_t rdata = {
+ .size = nsec_p_rdlen(nsec_p->raw),
+ .data = (uint8_t *)/*const-cast*/nsec_p->raw,
+ };
+ int ret = dnssec_nsec3_params_from_rdata(&nsec_p->libknot, &rdata);
+ return ret == DNSSEC_EOK ? kr_ok() : kr_error(ret);
+}
+
+static void nsec_p_cleanup(struct nsec_p *nsec_p)
+{
+ dnssec_binary_free(&nsec_p->libknot.salt);
+ /* We don't really need to clear it, but it's not large. (`salt` zeroed above) */
+ memset(nsec_p, 0, sizeof(*nsec_p));
+}
+
+/** Compute new TTL for nsec_p entry, using SOA serial arith.
+ * \param new_ttl (optionally) write the new TTL (even if negative)
+ * \return error code, e.g. kr_error(ESTALE) */
+static int nsec_p_ttl(knot_db_val_t entry, const uint32_t timestamp, int32_t *new_ttl)
+{
+ if (!entry.data) {
+ assert(!EINVAL);
+ return kr_error(EINVAL);
+ }
+ uint32_t stamp;
+ if (!entry.len) {
+ return kr_error(ENOENT);
+ }
+ if (entry.len < sizeof(stamp)) {
+ assert(!EILSEQ);
+ return kr_error(EILSEQ);
+ }
+ memcpy(&stamp, entry.data, sizeof(stamp));
+ int32_t newttl = stamp - timestamp;
+ if (new_ttl) *new_ttl = newttl;
+ return newttl < 0 ? kr_error(ESTALE) : kr_ok();
+}
+
+static uint8_t get_lowest_rank(const struct kr_request *req, const struct kr_query *qry)
+{
+ /* TODO: move rank handling into the iterator (DNSSEC_* flags)? */
+ const bool allow_unverified =
+ knot_wire_get_cd(req->qsource.packet->wire) || qry->flags.STUB;
+ /* in stub mode we don't trust RRs anyway ^^ */
+ if (qry->flags.NONAUTH) {
+ return KR_RANK_INITIAL;
+ /* Note: there's little sense in validation status for non-auth records.
+ * In case of using NONAUTH to get NS IPs, knowing that you ask correct
+ * IP doesn't matter much for security; it matters whether you can
+ * validate the answers from the NS.
+ */
+ } else if (!allow_unverified) {
+ /* Records not present under any TA don't have their security
+ * verified at all, so we also accept low ranks in that case. */
+ const bool ta_covers = kr_ta_covers_qry(req->ctx, qry->sname, qry->stype);
+ /* ^ TODO: performance? TODO: stype - call sites */
+ if (ta_covers) {
+ return KR_RANK_INSECURE | KR_RANK_AUTH;
+ } /* else falltrhough */
+ }
+ return KR_RANK_INITIAL | KR_RANK_AUTH;
+}
+
+
+/** Almost whole .produce phase for the cache module.
+ * \note we don't transition to KR_STATE_FAIL even in case of "unexpected errors".
+ */
+int peek_nosync(kr_layer_t *ctx, knot_pkt_t *pkt)
+{
+ struct kr_request *req = ctx->req;
+ struct kr_query *qry = req->current_query;
+ struct kr_cache *cache = &req->ctx->cache;
+
+ struct key k_storage, *k = &k_storage;
+ int ret = kr_dname_lf(k->buf, qry->sname, false);
+ if (unlikely(ret)) {
+ assert(false);
+ return ctx->state;
+ }
+
+ const uint8_t lowest_rank = get_lowest_rank(req, qry);
+
+ /**** 1. find the name or the closest (available) zone, not considering wildcards
+ **** 1a. exact name+type match (can be negative answer in insecure zones) */
+ {
+ knot_db_val_t key = key_exact_type_maypkt(k, qry->stype);
+ knot_db_val_t val = { NULL, 0 };
+ ret = cache_op(cache, read, &key, &val, 1);
+ if (!ret) {
+ /* found an entry: test conditions, materialize into pkt, etc. */
+ ret = found_exact_hit(ctx, pkt, val, lowest_rank);
+ }
+ }
+ if (ret && ret != -abs(ENOENT)) {
+ VERBOSE_MSG(qry, "=> exact hit error: %d %s\n", ret, kr_strerror(ret));
+ assert(false);
+ return ctx->state;
+ } else if (!ret) {
+ return KR_STATE_DONE;
+ }
+
+ /**** 1b. otherwise, find the longest prefix zone/xNAME (with OK time+rank). [...] */
+ k->zname = qry->sname;
+ ret = kr_dname_lf(k->buf, k->zname, false); /* LATER(optim.): probably remove */
+ if (unlikely(ret)) {
+ assert(false);
+ return ctx->state;
+ }
+ entry_list_t el;
+ ret = closest_NS(cache, k, el, qry, false, qry->stype == KNOT_RRTYPE_DS);
+ if (ret) {
+ assert(ret == kr_error(ENOENT));
+ if (ret != kr_error(ENOENT) || !el[0].len) {
+ return ctx->state;
+ }
+ }
+ switch (k->type) {
+ case KNOT_RRTYPE_CNAME: {
+ const knot_db_val_t v = el[EL_CNAME];
+ assert(v.data && v.len);
+ const int32_t new_ttl = get_new_ttl(v.data, qry, qry->sname,
+ KNOT_RRTYPE_CNAME, qry->timestamp.tv_sec);
+ ret = answer_simple_hit(ctx, pkt, KNOT_RRTYPE_CNAME, v.data,
+ knot_db_val_bound(v), new_ttl);
+ /* TODO: ^^ cumbersome code; we also recompute the TTL */
+ return ret == kr_ok() ? KR_STATE_DONE : ctx->state;
+ }
+ case KNOT_RRTYPE_DNAME:
+ VERBOSE_MSG(qry, "=> DNAME not supported yet\n"); // LATER
+ return ctx->state;
+ }
+
+ /* We have to try proving from NSEC*. */
+ auto_free char *log_zname = NULL;
+ WITH_VERBOSE(qry) {
+ log_zname = kr_dname_text(k->zname);
+ if (!el[0].len) {
+ VERBOSE_MSG(qry, "=> no NSEC* cached for zone: %s\n", log_zname);
+ }
+ }
+
+#if 0
+ if (!eh) { /* fall back to root hints? */
+ ret = kr_zonecut_set_sbelt(req->ctx, &qry->zone_cut);
+ if (ret) return ctx->state;
+ assert(!qry->zone_cut.parent);
+
+ //VERBOSE_MSG(qry, "=> using root hints\n");
+ //qry->flags.AWAIT_CUT = false;
+ return ctx->state;
+ }
+
+ /* Now `eh` points to the closest NS record that we've found,
+ * and that's the only place to start - we may either find
+ * a negative proof or we may query upstream from that point. */
+ kr_zonecut_set(&qry->zone_cut, k->zname);
+ ret = kr_make_query(qry, pkt); // TODO: probably not yet - qname minimization
+ if (ret) return ctx->state;
+#endif
+
+ /** Structure for collecting multiple NSEC* + RRSIG records,
+ * in preparation for the answer, and for tracking the progress. */
+ struct answer ans;
+ memset(&ans, 0, sizeof(ans));
+ ans.mm = &pkt->mm;
+ const int sname_labels = knot_dname_labels(qry->sname, NULL);
+
+ /* Try the NSEC* parameters in order, until success.
+ * Let's not mix different parameters for NSEC* RRs in a single proof. */
+ for (int i = 0; ;) {
+ int32_t log_new_ttl = -123456789; /* visually recognizable value */
+ ret = nsec_p_ttl(el[i], qry->timestamp.tv_sec, &log_new_ttl);
+ if (!ret || VERBOSE_STATUS) {
+ nsec_p_init(&ans.nsec_p, el[i], !ret);
+ }
+ if (ret) {
+ VERBOSE_MSG(qry, "=> skipping zone: %s, %s, hash %x;"
+ "new TTL %d, ret %d\n",
+ log_zname, (ans.nsec_p.raw ? "NSEC3" : "NSEC"),
+ (unsigned)ans.nsec_p.hash, (int)log_new_ttl, ret);
+ /* no need for nsec_p_cleanup() in this case */
+ goto cont;
+ }
+ VERBOSE_MSG(qry, "=> trying zone: %s, %s, hash %x\n",
+ log_zname, (ans.nsec_p.raw ? "NSEC3" : "NSEC"),
+ (unsigned)ans.nsec_p.hash);
+ /**** 2. and 3. inside */
+ ret = peek_encloser(k, &ans, sname_labels,
+ lowest_rank, qry, cache);
+ nsec_p_cleanup(&ans.nsec_p);
+ if (!ret) break;
+ if (ret < 0) return ctx->state;
+ cont:
+ /* Otherwise we try another nsec_p, if available. */
+ if (++i == ENTRY_APEX_NSECS_CNT) return ctx->state;
+ /* clear possible partial answers in `ans` (no need to deallocate) */
+ ans.rcode = 0;
+ memset(&ans.rrsets, 0, sizeof(ans.rrsets));
+ }
+
+ /**** 4. add SOA iff needed */
+ if (ans.rcode != PKT_NOERROR) {
+ /* Assuming k->buf still starts with zone's prefix,
+ * look up the SOA in cache. */
+ k->buf[0] = k->zlf_len;
+ knot_db_val_t key = key_exact_type(k, KNOT_RRTYPE_SOA);
+ knot_db_val_t val = { NULL, 0 };
+ ret = cache_op(cache, read, &key, &val, 1);
+ const struct entry_h *eh;
+ if (ret || !(eh = entry_h_consistent(val, KNOT_RRTYPE_SOA))) {
+ assert(ret); /* only want to catch `eh` failures */
+ VERBOSE_MSG(qry, "=> SOA missed\n");
+ return ctx->state;
+ }
+ /* Check if the record is OK. */
+ int32_t new_ttl = get_new_ttl(eh, qry, k->zname, KNOT_RRTYPE_SOA,
+ qry->timestamp.tv_sec);
+ if (new_ttl < 0 || eh->rank < lowest_rank || eh->is_packet) {
+ VERBOSE_MSG(qry, "=> SOA unfit %s: rank 0%.2o, new TTL %d\n",
+ (eh->is_packet ? "packet" : "RR"),
+ eh->rank, new_ttl);
+ return ctx->state;
+ }
+ /* Add the SOA into the answer. */
+ ret = entry2answer(&ans, AR_SOA, eh, knot_db_val_bound(val),
+ k->zname, KNOT_RRTYPE_SOA, new_ttl);
+ if (ret) return ctx->state;
+ }
+
+ /* Find our target RCODE. */
+ int real_rcode;
+ switch (ans.rcode) {
+ case PKT_NODATA:
+ case PKT_NOERROR: /* positive wildcarded response */
+ real_rcode = KNOT_RCODE_NOERROR;
+ break;
+ case PKT_NXDOMAIN:
+ real_rcode = KNOT_RCODE_NXDOMAIN;
+ break;
+ default:
+ assert(false);
+ case 0: /* i.e. nothing was found */
+ /* LATER(optim.): zone cut? */
+ VERBOSE_MSG(qry, "=> cache miss\n");
+ return ctx->state;
+ }
+
+ if (pkt_renew(pkt, qry->sname, qry->stype)
+ || knot_pkt_begin(pkt, KNOT_ANSWER)
+ ) {
+ assert(false);
+ return ctx->state;
+ }
+ knot_wire_set_rcode(pkt->wire, real_rcode);
+
+ bool expiring = false; // TODO
+ VERBOSE_MSG(qry, "=> writing RRsets: ");
+ for (int i = 0; i < sizeof(ans.rrsets) / sizeof(ans.rrsets[0]); ++i) {
+ if (i == 1) knot_pkt_begin(pkt, KNOT_AUTHORITY);
+ if (!ans.rrsets[i].set.rr) continue;
+ expiring = expiring || ans.rrsets[i].set.expiring;
+ ret = pkt_append(pkt, &ans.rrsets[i], ans.rrsets[i].set.rank);
+ if (ret) {
+ assert(false);
+ return ctx->state;
+ }
+ kr_log_verbose(kr_rank_test(ans.rrsets[i].set.rank, KR_RANK_SECURE)
+ ? "+" : "-");
+ }
+ kr_log_verbose("\n");
+
+ /* Finishing touches. */
+ struct kr_qflags * const qf = &qry->flags;
+ qf->EXPIRING = expiring;
+ qf->CACHED = true;
+ qf->NO_MINIMIZE = true;
+
+ return KR_STATE_DONE;
+}
+
+/**
+ * This is where the high-level "business logic" of aggressive cache is.
+ * \return 0: success (may need SOA); >0: try other nsec_p; <0: exit cache immediately.
+ */
+static int peek_encloser(
+ struct key *k, struct answer *ans, const int sname_labels,
+ uint8_t lowest_rank, const struct kr_query *qry, struct kr_cache *cache)
+{
+ /** Start of NSEC* covering the sname;
+ * it's part of key - the one within zone (read only) */
+ knot_db_val_t cover_low_kwz = { NULL, 0 };
+ knot_dname_t cover_hi_storage[KNOT_DNAME_MAXLEN];
+ /** End of NSEC* covering the sname. */
+ knot_db_val_t cover_hi_kwz = {
+ .data = cover_hi_storage,
+ .len = sizeof(cover_hi_storage),
+ };
+
+ /**** 2. Find a closest (provable) encloser (of sname). */
+ int clencl_labels = -1;
+ bool clencl_is_tentative = false;
+ if (!ans->nsec_p.raw) { /* NSEC */
+ int ret = nsec1_encloser(k, ans, sname_labels, &clencl_labels,
+ &cover_low_kwz, &cover_hi_kwz, qry, cache);
+ if (ret) return ret;
+ } else {
+ int ret = nsec3_encloser(k, ans, sname_labels, &clencl_labels,
+ qry, cache);
+ clencl_is_tentative = ret == ABS(ENOENT) && clencl_labels >= 0;
+ /* ^^ Last chance: *positive* wildcard record under this clencl. */
+ if (ret && !clencl_is_tentative) return ret;
+ }
+
+ /* We should have either a match or a cover at this point. */
+ if (ans->rcode != PKT_NODATA && ans->rcode != PKT_NXDOMAIN) {
+ assert(false);
+ return kr_error(EINVAL);
+ }
+ const bool ncloser_covered = ans->rcode == PKT_NXDOMAIN;
+
+ /** Name of the closest (provable) encloser. */
+ const knot_dname_t *clencl_name = qry->sname;
+ for (int l = sname_labels; l > clencl_labels; --l)
+ clencl_name = knot_wire_next_label(clencl_name, NULL);
+
+ /**** 3. source of synthesis checks, in case the next closer name was covered.
+ **** 3a. We want to query for NSEC* of source of synthesis (SS) or its
+ * predecessor, providing us with a proof of its existence or non-existence. */
+ if (ncloser_covered && !ans->nsec_p.raw) {
+ int ret = nsec1_src_synth(k, ans, clencl_name,
+ cover_low_kwz, cover_hi_kwz, qry, cache);
+ if (ret == AR_SOA) return 0;
+ assert(ret <= 0);
+ if (ret) return ret;
+
+ } else if (ncloser_covered && ans->nsec_p.raw && !clencl_is_tentative) {
+ int ret = nsec3_src_synth(k, ans, clencl_name, qry, cache);
+ if (ret == AR_SOA) return 0;
+ assert(ret <= 0);
+ if (ret) return ret;
+
+ } /* else (!ncloser_covered) so no wildcard checks needed,
+ * as we proved that sname exists. */
+
+ /**** 3b. find wildcarded answer, if next closer name was covered
+ * and we don't have a full proof yet. (common for NSEC*) */
+ if (!ncloser_covered)
+ return kr_ok(); /* decrease indentation */
+ /* Construct key for exact qry->stype + source of synthesis. */
+ int ret = kr_dname_lf(k->buf, clencl_name, true);
+ if (ret) {
+ assert(!ret);
+ return kr_error(ret);
+ }
+ const uint16_t types[] = { qry->stype, KNOT_RRTYPE_CNAME };
+ for (int i = 0; i < (2 - (qry->stype == KNOT_RRTYPE_CNAME)); ++i) {
+ ret = try_wild(k, ans, clencl_name, types[i],
+ lowest_rank, qry, cache);
+ if (ret == kr_ok()) {
+ return kr_ok();
+ } else if (ret != -ABS(ENOENT) && ret != -ABS(ESTALE)) {
+ assert(false);
+ return kr_error(ret);
+ }
+ /* else continue */
+ }
+ /* Neither attempt succeeded, but the NSEC* proofs were found,
+ * so skip trying other parameters, as it seems very unlikely
+ * to turn out differently than by the same wildcard search. */
+ return -ABS(ENOENT);
+}
+
+
+static int answer_simple_hit(kr_layer_t *ctx, knot_pkt_t *pkt, uint16_t type,
+ const struct entry_h *eh, const void *eh_bound, uint32_t new_ttl)
+#define CHECK_RET(ret) do { \
+ if ((ret) < 0) { assert(false); return kr_error((ret)); } \
+} while (false)
+{
+ struct kr_request *req = ctx->req;
+ struct kr_query *qry = req->current_query;
+
+ /* All OK, so start constructing the (pseudo-)packet. */
+ int ret = pkt_renew(pkt, qry->sname, qry->stype);
+ CHECK_RET(ret);
+
+ /* Materialize the sets for the answer in (pseudo-)packet. */
+ struct answer ans;
+ memset(&ans, 0, sizeof(ans));
+ ans.mm = &pkt->mm;
+ ret = entry2answer(&ans, AR_ANSWER, eh, eh_bound,
+ qry->sname, type, new_ttl);
+ CHECK_RET(ret);
+ /* Put links to the materialized data into the pkt. */
+ ret = pkt_append(pkt, &ans.rrsets[AR_ANSWER], eh->rank);
+ CHECK_RET(ret);
+
+ /* Finishing touches. */
+ struct kr_qflags * const qf = &qry->flags;
+ qf->EXPIRING = is_expiring(eh->ttl, new_ttl);
+ qf->CACHED = true;
+ qf->NO_MINIMIZE = true;
+ qf->DNSSEC_INSECURE = kr_rank_test(eh->rank, KR_RANK_INSECURE);
+ if (qf->DNSSEC_INSECURE) {
+ qf->DNSSEC_WANT = false;
+ }
+ VERBOSE_MSG(qry, "=> satisfied by exact %s: rank 0%.2o, new TTL %d\n",
+ (type == KNOT_RRTYPE_CNAME ? "CNAME" : "RRset"),
+ eh->rank, new_ttl);
+ return kr_ok();
+}
+#undef CHECK_RET
+
+
+/** TODO: description; see the single call site for now. */
+static int found_exact_hit(kr_layer_t *ctx, knot_pkt_t *pkt, knot_db_val_t val,
+ uint8_t lowest_rank)
+{
+ struct kr_request *req = ctx->req;
+ struct kr_query *qry = req->current_query;
+
+ int ret = entry_h_seek(&val, qry->stype);
+ if (ret) return ret;
+ const struct entry_h *eh = entry_h_consistent(val, qry->stype);
+ if (!eh) {
+ assert(false);
+ return kr_error(ENOENT);
+ // LATER: recovery in case of error, perhaps via removing the entry?
+ // LATER(optim): pehaps optimize the zone cut search
+ }
+
+ int32_t new_ttl = get_new_ttl(eh, qry, qry->sname, qry->stype,
+ qry->timestamp.tv_sec);
+ if (new_ttl < 0 || eh->rank < lowest_rank) {
+ /* Positive record with stale TTL or bad rank.
+ * LATER(optim.): It's unlikely that we find a negative one,
+ * so we might theoretically skip all the cache code. */
+
+ VERBOSE_MSG(qry, "=> skipping exact %s: rank 0%.2o (min. 0%.2o), new TTL %d\n",
+ eh->is_packet ? "packet" : "RR", eh->rank, lowest_rank, new_ttl);
+ return kr_error(ENOENT);
+ }
+
+ const uint8_t *eh_bound = knot_db_val_bound(val);
+ if (eh->is_packet) {
+ /* Note: we answer here immediately, even if it's (theoretically)
+ * possible that we could generate a higher-security negative proof.
+ * Rank is high-enough so we take it to save time searching. */
+ return answer_from_pkt (ctx, pkt, qry->stype, eh, eh_bound, new_ttl);
+ } else {
+ return answer_simple_hit(ctx, pkt, qry->stype, eh, eh_bound, new_ttl);
+ }
+}
+
+
+/** Try to satisfy via wildcard (positively). See the single call site. */
+static int try_wild(struct key *k, struct answer *ans, const knot_dname_t *clencl_name,
+ const uint16_t type, const uint8_t lowest_rank,
+ const struct kr_query *qry, struct kr_cache *cache)
+{
+ knot_db_val_t key = key_exact_type(k, type);
+ /* Find the record. */
+ knot_db_val_t val = { NULL, 0 };
+ int ret = cache_op(cache, read, &key, &val, 1);
+ if (!ret) {
+ ret = entry_h_seek(&val, type);
+ }
+ if (ret) {
+ if (ret != -ABS(ENOENT)) {
+ VERBOSE_MSG(qry, "=> wildcard: hit error %d %s\n",
+ ret, strerror(abs(ret)));
+ assert(false);
+ }
+ WITH_VERBOSE(qry) {
+ auto_free char *clencl_str = kr_dname_text(clencl_name),
+ *type_str = kr_rrtype_text(type);
+ VERBOSE_MSG(qry, "=> wildcard: not found: *.%s %s\n",
+ clencl_str, type_str);
+ }
+ return ret;
+ }
+ /* Check if the record is OK. */
+ const struct entry_h *eh = entry_h_consistent(val, type);
+ if (!eh) {
+ assert(false);
+ return kr_error(ret);
+ // LATER: recovery in case of error, perhaps via removing the entry?
+ }
+ int32_t new_ttl = get_new_ttl(eh, qry, qry->sname, type, qry->timestamp.tv_sec);
+ /* ^^ here we use the *expanded* wildcard name */
+ if (new_ttl < 0 || eh->rank < lowest_rank || eh->is_packet) {
+ /* Wildcard record with stale TTL, bad rank or packet. */
+ VERBOSE_MSG(qry, "=> wildcard: skipping %s, rank 0%.2o, new TTL %d\n",
+ eh->is_packet ? "packet" : "RR", eh->rank, new_ttl);
+ return -ABS(ESTALE);
+ }
+ /* Add the RR into the answer. */
+ ret = entry2answer(ans, AR_ANSWER, eh, knot_db_val_bound(val),
+ qry->sname, type, new_ttl);
+ VERBOSE_MSG(qry, "=> wildcard: answer expanded, ret = %d, new TTL %d\n",
+ ret, (int)new_ttl);
+ if (ret) return kr_error(ret);
+ ans->rcode = PKT_NOERROR;
+ return kr_ok();
+}
+
+int kr_cache_closest_apex(struct kr_cache *cache, const knot_dname_t *name, bool is_DS,
+ knot_dname_t ** apex)
+{
+ if (!cache || !cache->db || !name || !apex || *apex) {
+ assert(!EINVAL);
+ return kr_error(EINVAL);
+ }
+ struct key k_storage, *k = &k_storage;
+ int ret = kr_dname_lf(k->buf, name, false);
+ if (ret)
+ return kr_error(ret);
+ entry_list_t el_;
+ k->zname = name;
+ ret = closest_NS(cache, k, el_, NULL, true, is_DS);
+ if (ret && ret != -abs(ENOENT))
+ return ret;
+ *apex = knot_dname_copy(k->zname, NULL);
+ if (!*apex)
+ return kr_error(ENOMEM);
+ return kr_ok();
+}
+
+/** \internal for closest_NS. Check suitability of a single entry, setting k->type if OK.
+ * \return error code, negative iff whole list should be skipped.
+ */
+static int check_NS_entry(struct key *k, knot_db_val_t entry, int i,
+ bool exact_match, bool is_DS,
+ const struct kr_query *qry, uint32_t timestamp);
+
+/**
+ * Find the longest prefix zone/xNAME (with OK time+rank), starting at k->*.
+ *
+ * The found type is returned via k->type; the values are returned in el.
+ * \note we use k->type = KNOT_RRTYPE_NS also for the nsec_p result.
+ * \param qry can be NULL (-> gettimeofday(), but you lose the stale-serve hook)
+ * \param only_NS don't consider xNAMEs
+ * \return error code
+ */
+static int closest_NS(struct kr_cache *cache, struct key *k, entry_list_t el,
+ struct kr_query *qry, const bool only_NS, const bool is_DS)
+{
+ /* get the current timestamp */
+ uint32_t timestamp;
+ if (qry) {
+ timestamp = qry->timestamp.tv_sec;
+ } else {
+ struct timeval tv;
+ if (gettimeofday(&tv, NULL)) return kr_error(errno);
+ timestamp = tv.tv_sec;
+ }
+
+ int zlf_len = k->buf[0];
+
+ // LATER(optim): if stype is NS, we check the same value again
+ bool exact_match = true;
+ bool need_zero = true;
+ /* Inspect the NS/xNAME entries, shortening by a label on each iteration. */
+ do {
+ k->buf[0] = zlf_len;
+ knot_db_val_t key = key_exact_type(k, KNOT_RRTYPE_NS);
+ knot_db_val_t val;
+ int ret = cache_op(cache, read, &key, &val, 1);
+ if (ret == -abs(ENOENT)) goto next_label;
+ if (ret) {
+ assert(!ret);
+ if (need_zero) memset(el, 0, sizeof(entry_list_t));
+ return kr_error(ret);
+ }
+
+ /* Check consistency, find any type;
+ * using `goto` for shortening by another label. */
+ ret = entry_list_parse(val, el);
+ if (ret) {
+ assert(!ret); // do something about it?
+ goto next_label;
+ }
+ need_zero = false;
+ /* More types are possible; try in order.
+ * For non-fatal failures just "continue;" to try the next type. */
+ const int el_count = only_NS ? EL_NS + 1 : EL_LENGTH;
+ for (int i = 0; i < el_count; ++i) {
+ ret = check_NS_entry(k, el[i], i, exact_match, is_DS,
+ qry, timestamp);
+ if (ret < 0) goto next_label; else
+ if (!ret) {
+ /* We found our match. */
+ k->zlf_len = zlf_len;
+ return kr_ok();
+ }
+ }
+
+ next_label:
+ /* remove one more label */
+ exact_match = false;
+ if (k->zname[0] == 0) {
+ /* We miss root NS in cache, but let's at least assume it exists. */
+ k->type = KNOT_RRTYPE_NS;
+ k->zlf_len = zlf_len;
+ assert(zlf_len == 0);
+ if (need_zero) memset(el, 0, sizeof(entry_list_t));
+ return kr_error(ENOENT);
+ }
+ zlf_len -= (k->zname[0] + 1);
+ k->zname += (k->zname[0] + 1);
+ k->buf[zlf_len + 1] = 0;
+ } while (true);
+}
+
+static int check_NS_entry(struct key *k, const knot_db_val_t entry, const int i,
+ const bool exact_match, const bool is_DS,
+ const struct kr_query *qry, uint32_t timestamp)
+{
+ const int ESKIP = ABS(ENOENT);
+ if (!entry.len
+ /* On a zone cut we want DS from the parent zone. */
+ || (i <= EL_NS && exact_match && is_DS)
+ /* CNAME is interesting only if we
+ * directly hit the name that was asked.
+ * Note that we want it even in the DS case. */
+ || (i == EL_CNAME && !exact_match)
+ /* DNAME is interesting only if we did NOT
+ * directly hit the name that was asked. */
+ || (i == EL_DNAME && exact_match)
+ ) {
+ return ESKIP;
+ }
+
+ uint16_t type;
+ if (i < ENTRY_APEX_NSECS_CNT) {
+ type = KNOT_RRTYPE_NS;
+ int32_t log_new_ttl = -123456789; /* visually recognizable value */
+ const int err = nsec_p_ttl(entry, timestamp, &log_new_ttl);
+ if (err) {
+ VERBOSE_MSG(qry,
+ "=> skipping unfit nsec_p: new TTL %d, error %d\n",
+ (int)log_new_ttl, err);
+ return ESKIP;
+ }
+ } else {
+ type = EL2RRTYPE(i);
+ /* Find the entry for the type, check positivity, TTL */
+ const struct entry_h *eh = entry_h_consistent(entry, type);
+ if (!eh) {
+ VERBOSE_MSG(qry, "=> EH not consistent\n");
+ assert(false);
+ return kr_error(EILSEQ);
+ }
+ const int32_t log_new_ttl = get_new_ttl(eh, qry, k->zname, type, timestamp);
+ const uint8_t rank_min = KR_RANK_INSECURE | KR_RANK_AUTH;
+ const bool ok = /* For NS any kr_rank is accepted,
+ * as insecure or even nonauth is OK */
+ (type == KNOT_RRTYPE_NS || eh->rank >= rank_min)
+ /* Not interested in negative bogus or outdated RRs. */
+ && !eh->is_packet && log_new_ttl >= 0;
+ WITH_VERBOSE(qry) { if (!ok) {
+ auto_free char *type_str = kr_rrtype_text(type);
+ const char *packet_str = eh->is_packet ? "packet" : "RR";
+ VERBOSE_MSG(qry,
+ "=> skipping unfit %s %s: rank 0%.2o, new TTL %d\n",
+ type_str, packet_str, eh->rank, (int)log_new_ttl);
+ } }
+ if (!ok) return ESKIP;
+ }
+ k->type = type;
+ return kr_ok();
+}
+
diff --git a/lib/cookies/alg_containers.c b/lib/cookies/alg_containers.c
new file mode 100644
index 0000000..a80487d
--- /dev/null
+++ b/lib/cookies/alg_containers.c
@@ -0,0 +1,72 @@
+/* Copyright (C) 2016-2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include <assert.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include <libknot/cookies/alg-fnv64.h>
+
+#include "lib/cookies/alg_containers.h"
+#include "lib/cookies/alg_sha.h"
+
+const struct knot_cc_alg *kr_cc_alg_get(int id)
+{
+ /*
+ * Client algorithm identifiers are used to index this array of
+ * pointers.
+ */
+ static const struct knot_cc_alg *const cc_algs[] = {
+ /* 0 */ &knot_cc_alg_fnv64,
+ /* 1 */ &knot_cc_alg_hmac_sha256_64
+ };
+
+ if (id >= 0 && id < 2) {
+ return cc_algs[id];
+ }
+
+ return NULL;
+}
+
+const knot_lookup_t kr_cc_alg_names[] = {
+ { 0, "FNV-64" },
+ { 1, "HMAC-SHA256-64" },
+ { -1, NULL }
+};
+
+const struct knot_sc_alg *kr_sc_alg_get(int id)
+{
+ /*
+ * Server algorithm identifiers are used to index this array of
+ * pointers.
+ */
+ static const struct knot_sc_alg *const sc_algs[] = {
+ /* 0 */ &knot_sc_alg_fnv64,
+ /* 1 */ &knot_sc_alg_hmac_sha256_64
+ };
+
+ if (id >= 0 && id < 2) {
+ return sc_algs[id];
+ }
+
+ return NULL;
+}
+
+const knot_lookup_t kr_sc_alg_names[] = {
+ { 0, "FNV-64" },
+ { 1, "HMAC-SHA256-64" },
+ { -1, NULL }
+};
diff --git a/lib/cookies/alg_containers.h b/lib/cookies/alg_containers.h
new file mode 100644
index 0000000..1e2aef6
--- /dev/null
+++ b/lib/cookies/alg_containers.h
@@ -0,0 +1,49 @@
+/* Copyright (C) 2016-2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <libknot/cookies/client.h>
+#include <libknot/cookies/server.h>
+#include <libknot/lookup.h>
+
+#include "lib/defines.h"
+
+/**
+ * @brief Returns pointer to client cookie algorithm.
+ *
+ * @param id algorithm identifier as defined by lookup table
+ * @return pointer to algorithm structure with given id or NULL on error
+ */
+KR_EXPORT
+const struct knot_cc_alg *kr_cc_alg_get(int id);
+
+/** Binds client algorithm identifiers onto names. */
+KR_EXPORT
+extern const knot_lookup_t kr_cc_alg_names[];
+
+/**
+ * @brief Returns pointer to server cookie algorithm.
+ *
+ * @param id algorithm identifier as defined by lookup table
+ * @return pointer to algorithm structure with given id or NULL on error
+ */
+KR_EXPORT
+const struct knot_sc_alg *kr_sc_alg_get(int id);
+
+/** Binds server algorithm identifiers onto names. */
+KR_EXPORT
+extern const knot_lookup_t kr_sc_alg_names[];
diff --git a/lib/cookies/alg_sha.c b/lib/cookies/alg_sha.c
new file mode 100644
index 0000000..e23c983
--- /dev/null
+++ b/lib/cookies/alg_sha.c
@@ -0,0 +1,122 @@
+/* Copyright (C) 2016-2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include <assert.h>
+#include <nettle/hmac.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include <libknot/errcode.h>
+#include <libknot/rrtype/opt-cookie.h>
+
+#include "lib/cookies/alg_sha.h"
+#include "lib/utils.h"
+
+/**
+ * @brief Update hash value.
+ *
+ * @param ctx HMAC-SHA256 context to be updated.
+ * @param sa Socket address.
+ */
+static inline void update_hash(struct hmac_sha256_ctx *ctx,
+ const struct sockaddr *sa)
+{
+ assert(ctx && sa);
+
+ int addr_len = kr_inaddr_len(sa);
+ const uint8_t *addr = (uint8_t *)kr_inaddr(sa);
+
+ if (addr && addr_len > 0) {
+ hmac_sha256_update(ctx, addr_len, addr);
+ }
+}
+
+/**
+ * @brief Compute client cookie using HMAC-SHA256-64.
+ * @note At least one of the arguments must be non-null.
+ * @param input input parameters
+ * @param cc_out buffer for computed client cookie
+ * @param cc_len buffer size
+ * @return Non-zero size of written data on success, 0 in case of a failure.
+ */
+static uint16_t cc_gen_hmac_sha256_64(const struct knot_cc_input *input,
+ uint8_t *cc_out, uint16_t cc_len)
+{
+ if (!knot_cc_input_is_valid(input) ||
+ !cc_out || cc_len < KNOT_OPT_COOKIE_CLNT) {
+ return 0;
+ }
+
+ struct hmac_sha256_ctx ctx;
+ hmac_sha256_set_key(&ctx, input->secret_len, input->secret_data);
+
+ if (input->clnt_sockaddr) {
+ update_hash(&ctx, input->clnt_sockaddr);
+ }
+
+ if (input->srvr_sockaddr) {
+ update_hash(&ctx, input->srvr_sockaddr);
+ }
+
+ /* KNOT_OPT_COOKIE_CLNT <= SHA256_DIGEST_SIZE */
+
+ hmac_sha256_digest(&ctx, KNOT_OPT_COOKIE_CLNT, cc_out);
+
+ return KNOT_OPT_COOKIE_CLNT;
+}
+
+#define SRVR_HMAC_SHA256_64_HASH_SIZE 8
+
+/**
+ * @brief Compute server cookie hash using HMAC-SHA256-64).
+ * @note Server cookie = nonce | time | HMAC-SHA256-64( server secret, client cookie | nonce| time | client IP )
+ * @param input data to compute cookie from
+ * @param hash_out hash output buffer
+ * @param hash_len buffer size
+ * @return Non-zero size of written data on success, 0 in case of a failure.
+ */
+static uint16_t sc_gen_hmac_sha256_64(const struct knot_sc_input *input,
+ uint8_t *hash_out, uint16_t hash_len)
+{
+ if (!knot_sc_input_is_valid(input) ||
+ !hash_out || hash_len < SRVR_HMAC_SHA256_64_HASH_SIZE) {
+ return 0;
+ }
+
+ struct hmac_sha256_ctx ctx;
+ hmac_sha256_set_key(&ctx, input->srvr_data->secret_len,
+ input->srvr_data->secret_data);
+
+ hmac_sha256_update(&ctx, input->cc_len, input->cc);
+
+ if (input->nonce && input->nonce_len) {
+ hmac_sha256_update(&ctx, input->nonce_len, input->nonce);
+ }
+
+ if (input->srvr_data->clnt_sockaddr) {
+ update_hash(&ctx, input->srvr_data->clnt_sockaddr);
+ }
+
+ /* SRVR_HMAC_SHA256_64_HASH_SIZE < SHA256_DIGEST_SIZE */
+
+ hmac_sha256_digest(&ctx, SRVR_HMAC_SHA256_64_HASH_SIZE, hash_out);
+
+ return SRVR_HMAC_SHA256_64_HASH_SIZE;
+}
+
+const struct knot_cc_alg knot_cc_alg_hmac_sha256_64 = { KNOT_OPT_COOKIE_CLNT, cc_gen_hmac_sha256_64 };
+
+const struct knot_sc_alg knot_sc_alg_hmac_sha256_64 = { SRVR_HMAC_SHA256_64_HASH_SIZE, sc_gen_hmac_sha256_64 };
diff --git a/lib/cookies/alg_sha.h b/lib/cookies/alg_sha.h
new file mode 100644
index 0000000..5b0d0c9
--- /dev/null
+++ b/lib/cookies/alg_sha.h
@@ -0,0 +1,30 @@
+/* Copyright (C) 2016-2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <libknot/cookies/client.h>
+#include <libknot/cookies/server.h>
+
+#include "lib/defines.h"
+
+/* These structures are not meant to be part of public interface. */
+
+/** HMAC-SHA256-64 client cookie algorithm. */
+extern const struct knot_cc_alg knot_cc_alg_hmac_sha256_64;
+
+/** HMAC-SHA256-64 server cookie algorithm. */
+extern const struct knot_sc_alg knot_sc_alg_hmac_sha256_64;
diff --git a/lib/cookies/control.h b/lib/cookies/control.h
new file mode 100644
index 0000000..1bf503f
--- /dev/null
+++ b/lib/cookies/control.h
@@ -0,0 +1,49 @@
+/* Copyright (C) 2016-2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "lib/defines.h"
+
+/** Holds secret quantity. */
+struct kr_cookie_secret {
+ size_t size; /*!< Secret quantity size. */
+ uint8_t data[]; /*!< Secret quantity data. */
+};
+
+/** Holds settings that have direct influence on cookie values computation. */
+struct kr_cookie_comp {
+ struct kr_cookie_secret *secr; /*!< Secret data. */
+ int alg_id; /*!< Cookie algorithm identifier. */
+};
+
+/** Holds settings that control client/server cookie behaviour. */
+struct kr_cookie_settings {
+ bool enabled; /**< Enable/disables DNS cookies functionality. */
+
+ struct kr_cookie_comp current; /**< Current cookie settings. */
+ struct kr_cookie_comp recent; /**< Recent cookie settings. */
+};
+
+/** DNS cookies controlling structure. */
+struct kr_cookie_ctx {
+ struct kr_cookie_settings clnt; /**< Client settings. */
+ struct kr_cookie_settings srvr; /**< Server settings. */
+};
diff --git a/lib/cookies/helper.c b/lib/cookies/helper.c
new file mode 100644
index 0000000..e07b8cb
--- /dev/null
+++ b/lib/cookies/helper.c
@@ -0,0 +1,296 @@
+/* Copyright (C) 2016-2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include <assert.h>
+#include <libknot/rrtype/opt.h>
+#include <libknot/rrtype/opt-cookie.h>
+
+#include "lib/cookies/helper.h"
+#include "lib/defines.h"
+
+/**
+ * @brief Check whether there is a cached cookie that matches the current
+ * client cookie.
+ */
+static const uint8_t *peek_and_check_cc(kr_cookie_lru_t *cache, const void *sa,
+ const uint8_t *cc, uint16_t cc_len)
+{
+ assert(cache && sa && cc && cc_len);
+
+ const uint8_t *cached_opt = kr_cookie_lru_get(cache, sa);
+ if (!cached_opt) {
+ return NULL;
+ }
+
+ const uint8_t *cached_cc = knot_edns_opt_get_data((uint8_t *) cached_opt);
+
+ if (cc_len == KNOT_OPT_COOKIE_CLNT &&
+ 0 == memcmp(cc, cached_cc, cc_len)) {
+ return cached_opt;
+ }
+
+ return NULL;
+}
+
+/**
+ * @brief Put a client cookie into the RR Set.
+ */
+static int opt_rr_put_cookie(knot_rrset_t *opt_rr, uint8_t *data,
+ uint16_t data_len, knot_mm_t *mm)
+{
+ assert(opt_rr && data && data_len > 0);
+
+ const uint8_t *cc = NULL, *sc = NULL;
+ uint16_t cc_len = 0, sc_len = 0;
+
+ int ret = knot_edns_opt_cookie_parse(data, data_len, &cc, &cc_len,
+ &sc, &sc_len);
+ if (ret != KNOT_EOK) {
+ return kr_error(EINVAL);
+ }
+ assert(data_len == cc_len + sc_len);
+
+ uint16_t cookies_size = data_len;
+ uint8_t *cookies_data = NULL;
+
+ ret = knot_edns_reserve_unique_option(opt_rr, KNOT_EDNS_OPTION_COOKIE,
+ cookies_size, &cookies_data, mm);
+ if (ret != KNOT_EOK) {
+ return kr_error(EINVAL);
+ }
+ assert(cookies_data != NULL);
+
+ cookies_size = knot_edns_opt_cookie_write(cc, cc_len, sc, sc_len,
+ cookies_data, cookies_size);
+ if (cookies_size == 0) {
+ return kr_error(EINVAL);
+ }
+ assert(cookies_size == data_len);
+
+ return kr_ok();
+}
+
+/**
+ * @brief Puts entire EDNS option into the RR Set.
+ */
+static int opt_rr_put_cookie_opt(knot_rrset_t *opt_rr, uint8_t *option, knot_mm_t *mm)
+{
+ assert(opt_rr && option);
+
+ uint16_t opt_code = knot_edns_opt_get_code(option);
+ if (opt_code != KNOT_EDNS_OPTION_COOKIE) {
+ return kr_error(EINVAL);
+ }
+
+ uint16_t opt_len = knot_edns_opt_get_length(option);
+ uint8_t *opt_data = knot_edns_opt_get_data(option);
+ if (!opt_data || opt_len == 0) {
+ return kr_error(EINVAL);
+ }
+
+ return opt_rr_put_cookie(opt_rr, opt_data, opt_len, mm);
+}
+
+int kr_request_put_cookie(const struct kr_cookie_comp *clnt_comp,
+ kr_cookie_lru_t *cookie_cache,
+ const struct sockaddr *clnt_sa,
+ const struct sockaddr *srvr_sa,
+ struct kr_request *req)
+{
+ if (!clnt_comp || !req) {
+ return kr_error(EINVAL);
+ }
+
+ if (!req->ctx->opt_rr) {
+ return kr_ok();
+ }
+
+ if (!clnt_comp->secr || (clnt_comp->alg_id < 0) || !cookie_cache) {
+ return kr_error(EINVAL);
+ }
+
+ /*
+ * Generate client cookie from client address, server address and
+ * secret quantity.
+ */
+ struct knot_cc_input input = {
+ .clnt_sockaddr = clnt_sa,
+ .srvr_sockaddr = srvr_sa,
+ .secret_data = clnt_comp->secr->data,
+ .secret_len = clnt_comp->secr->size
+ };
+ uint8_t cc[KNOT_OPT_COOKIE_CLNT];
+ uint16_t cc_len = KNOT_OPT_COOKIE_CLNT;
+ const struct knot_cc_alg *cc_alg = kr_cc_alg_get(clnt_comp->alg_id);
+ if (!cc_alg) {
+ return kr_error(EINVAL);
+ }
+ assert(cc_alg->gen_func);
+ cc_len = cc_alg->gen_func(&input, cc, cc_len);
+ if (cc_len != KNOT_OPT_COOKIE_CLNT) {
+ return kr_error(EINVAL);
+ }
+
+ const uint8_t *cached_cookie = peek_and_check_cc(cookie_cache,
+ srvr_sa, cc, cc_len);
+
+ /* Add cookie option. */
+ int ret;
+ if (cached_cookie) {
+ ret = opt_rr_put_cookie_opt(req->ctx->opt_rr,
+ (uint8_t *)cached_cookie,
+ req->ctx->pool);
+ } else {
+ ret = opt_rr_put_cookie(req->ctx->opt_rr, cc, cc_len,
+ req->ctx->pool);
+ }
+
+ return ret;
+}
+
+int kr_answer_write_cookie(struct knot_sc_input *sc_input,
+ const struct kr_nonce_input *nonce,
+ const struct knot_sc_alg *alg, knot_pkt_t *pkt)
+{
+ if (!sc_input || !sc_input->cc || sc_input->cc_len == 0) {
+ return kr_error(EINVAL);
+ }
+
+ if (!sc_input->srvr_data || !sc_input->srvr_data->clnt_sockaddr ||
+ !sc_input->srvr_data->secret_data ||
+ !sc_input->srvr_data->secret_len) {
+ return kr_error(EINVAL);
+ }
+
+ if (!nonce) {
+ return kr_error(EINVAL);
+ }
+
+ if (!alg || !alg->hash_size || !alg->hash_func) {
+ return kr_error(EINVAL);
+ }
+
+ if (!pkt || !pkt->opt_rr) {
+ return kr_error(EINVAL);
+ }
+
+ uint16_t nonce_len = KR_NONCE_LEN;
+ uint16_t hash_len = alg->hash_size;
+
+ /*
+ * Space for cookie is reserved inside the EDNS OPT RR of
+ * the answer packet.
+ */
+ uint8_t *cookie = NULL;
+ uint16_t cookie_len = knot_edns_opt_cookie_data_len(sc_input->cc_len,
+ nonce_len + hash_len);
+ if (cookie_len == 0) {
+ return kr_error(EINVAL);
+ }
+
+ int ret = knot_edns_reserve_unique_option(pkt->opt_rr,
+ KNOT_EDNS_OPTION_COOKIE,
+ cookie_len, &cookie,
+ &pkt->mm);
+ if (ret != KNOT_EOK) {
+ return kr_error(ENOMEM);
+ }
+ assert(cookie != NULL);
+
+ /*
+ * Function knot_edns_opt_cookie_data_len() returns the sum of its
+ * parameters or zero. Anyway, let's check again.
+ */
+ if (cookie_len < (sc_input->cc_len + nonce_len + hash_len)) {
+ return kr_error(EINVAL);
+ }
+
+ /* Copy client cookie data portion. */
+ memcpy(cookie, sc_input->cc, sc_input->cc_len);
+
+ if (nonce_len) {
+ /* Write nonce data portion. */
+ kr_nonce_write_wire(cookie + sc_input->cc_len, nonce_len,
+ nonce);
+ /* Adjust input for written nonce value. */
+ sc_input->nonce = cookie + sc_input->cc_len;
+ sc_input->nonce_len = nonce_len;
+ }
+
+ hash_len = alg->hash_func(sc_input,
+ cookie + sc_input->cc_len + nonce_len,
+ hash_len);
+ /* Zero nonce values. */
+ sc_input->nonce = NULL;
+ sc_input->nonce_len = 0;
+
+ return (hash_len != 0) ? kr_ok() : kr_error(EINVAL);
+}
+
+int kr_pkt_set_ext_rcode(knot_pkt_t *pkt, uint16_t whole_rcode)
+{
+ /*
+ * RFC6891 6.1.3 -- extended RCODE forms the upper 8 bits of whole
+ * 12-bit RCODE (together with the 4 bits of 'normal' RCODE).
+ *
+ * | 11 10 09 08 07 06 05 04 | 03 02 01 00 |
+ * | 12-bit whole RCODE |
+ * | 8-bit extended RCODE | 4-bit RCODE |
+ */
+
+ if (!pkt || !knot_pkt_has_edns(pkt)) {
+ return kr_error(EINVAL);
+ }
+
+ uint8_t rcode = whole_rcode & 0x0f;
+ uint8_t ext_rcode = whole_rcode >> 4;
+ knot_wire_set_rcode(pkt->wire, rcode);
+ knot_edns_set_ext_rcode(pkt->opt_rr, ext_rcode);
+
+ return kr_ok();
+}
+
+uint8_t *kr_no_question_cookie_query(const knot_pkt_t *pkt)
+{
+ if (!pkt || knot_wire_get_qdcount(pkt->wire) > 0) {
+ return false;
+ }
+
+ if (knot_wire_get_qr(pkt->wire) != 0 || !pkt->opt_rr) {
+ return false;
+ }
+
+ return knot_edns_get_option(pkt->opt_rr, KNOT_EDNS_OPTION_COOKIE);
+}
+
+int kr_parse_cookie_opt(uint8_t *cookie_opt, struct knot_dns_cookies *cookies)
+{
+ if (!cookie_opt || !cookies) {
+ return kr_error(EINVAL);
+ }
+
+ const uint8_t *cookie_data = knot_edns_opt_get_data(cookie_opt);
+ uint16_t cookie_len = knot_edns_opt_get_length(cookie_opt);
+ if (!cookie_data || cookie_len == 0) {
+ return kr_error(EINVAL);
+ }
+
+ int ret = knot_edns_opt_cookie_parse(cookie_data, cookie_len,
+ &cookies->cc, &cookies->cc_len,
+ &cookies->sc, &cookies->sc_len);
+
+ return (ret == KNOT_EOK) ? kr_ok() : kr_error(EINVAL);
+}
diff --git a/lib/cookies/helper.h b/lib/cookies/helper.h
new file mode 100644
index 0000000..e5659ce
--- /dev/null
+++ b/lib/cookies/helper.h
@@ -0,0 +1,86 @@
+/* Copyright (C) 2016-2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <libknot/packet/pkt.h>
+
+#include "lib/cookies/alg_containers.h"
+#include "lib/cookies/control.h"
+#include "lib/cookies/lru_cache.h"
+#include "lib/cookies/nonce.h"
+#include "lib/defines.h"
+#include "lib/resolve.h"
+
+/**
+ * @brief Updates DNS cookie in the request EDNS options.
+ * @note This function must be called before the request packet is finalised.
+ * @param clnt_comp client cookie control structure
+ * @param cookie_cache cookie cache
+ * @param clnt_sa client socket address
+ * @param srvr_sa server socket address
+ * @param req name resolution request
+ * @return kr_ok() or error code
+ */
+KR_EXPORT
+int kr_request_put_cookie(const struct kr_cookie_comp *clnt_comp,
+ kr_cookie_lru_t *cookie_cache,
+ const struct sockaddr *clnt_sa,
+ const struct sockaddr *srvr_sa,
+ struct kr_request *req);
+
+/**
+ * @brief Inserts a cookie option into the OPT RR. It does not write any
+ * wire data.
+ * @note The content of @a sc_input is modified. Any pre-set nonce value is
+ * ignored. After retuning its nonce value will be null.
+ * @param sc_input data needed to compute server cookie, nonce is ignored
+ * @param nonce nonce value that is actually used
+ * @param alg hash algorithm
+ * @param pkt DNS response packet
+ */
+KR_EXPORT
+int kr_answer_write_cookie(struct knot_sc_input *sc_input,
+ const struct kr_nonce_input *nonce,
+ const struct knot_sc_alg *alg, knot_pkt_t *pkt);
+
+/**
+ * @brief Set RCODE and extended RCODE.
+ * @param pkt DNS packet
+ * @param whole_rcode RCODE value
+ * @return kr_ok() or error code
+ */
+KR_EXPORT
+int kr_pkt_set_ext_rcode(knot_pkt_t *pkt, uint16_t whole_rcode);
+
+/**
+ * @brief Check whether packet is a server cookie request according to
+ * RFC7873 5.4.
+ * @param pkt Packet to be examined.
+ * @return Pointer to entire cookie option if is a server cookie query, NULL on
+ * errors or if packet doesn't contain cookies or if QDCOUNT > 0.
+ */
+KR_EXPORT
+uint8_t *kr_no_question_cookie_query(const knot_pkt_t *pkt);
+
+/**
+ * @brief Parse cookies from cookie option.
+ * @param cookie_opt Cookie option.
+ * @param cookies Cookie structure to be set.
+ * @return kr_ok() on success, error if cookies are malformed.
+ */
+KR_EXPORT
+int kr_parse_cookie_opt(uint8_t *cookie_opt, struct knot_dns_cookies *cookies);
diff --git a/lib/cookies/lru_cache.c b/lib/cookies/lru_cache.c
new file mode 100644
index 0000000..8ea97ab
--- /dev/null
+++ b/lib/cookies/lru_cache.c
@@ -0,0 +1,70 @@
+/* Copyright (C) 2016-2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include <libknot/rrtype/opt.h>
+#include <string.h>
+
+#include "lib/cookies/lru_cache.h"
+#include "lib/utils.h"
+
+const uint8_t *kr_cookie_lru_get(kr_cookie_lru_t *cache,
+ const struct sockaddr *sa)
+{
+ if (!cache || !sa) {
+ return NULL;
+ }
+
+ int addr_len = kr_inaddr_len(sa);
+ const char *addr = kr_inaddr(sa);
+ if (!addr || addr_len <= 0) {
+ return NULL;
+ }
+
+ struct cookie_opt_data *cached = lru_get_try(cache, addr, addr_len);
+ return cached ? cached->opt_data : NULL;
+}
+
+int kr_cookie_lru_set(kr_cookie_lru_t *cache, const struct sockaddr *sa,
+ uint8_t *opt)
+{
+ if (!cache || !sa) {
+ return kr_error(EINVAL);
+ }
+
+ if (!opt) {
+ return kr_ok();
+ }
+
+ int addr_len = kr_inaddr_len(sa);
+ const char *addr = kr_inaddr(sa);
+ if (!addr || addr_len <= 0) {
+ return kr_error(EINVAL);
+ }
+
+ uint16_t opt_size = KNOT_EDNS_OPTION_HDRLEN +
+ knot_edns_opt_get_length(opt);
+
+ if (opt_size > KR_COOKIE_OPT_MAX_LEN) {
+ return kr_error(EINVAL);
+ }
+
+ struct cookie_opt_data *cached = lru_get_new(cache, addr, addr_len, NULL);
+ if (cached) {
+ memcpy(cached->opt_data, opt, opt_size);
+ }
+
+ return kr_ok();
+}
diff --git a/lib/cookies/lru_cache.h b/lib/cookies/lru_cache.h
new file mode 100644
index 0000000..8ce0a01
--- /dev/null
+++ b/lib/cookies/lru_cache.h
@@ -0,0 +1,69 @@
+/* Copyright (C) 2016-2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <netinet/in.h>
+#include <stdint.h>
+
+#if defined(ENABLE_COOKIES)
+#include <libknot/rrtype/opt.h>
+#include <libknot/rrtype/opt-cookie.h>
+#else
+#define KNOT_OPT_COOKIE_CLNT 8
+#define KNOT_OPT_COOKIE_SRVR_MAX 32
+#endif /* defined(ENABLE_COOKIES) */
+
+#include "lib/defines.h"
+#include "lib/generic/lru.h"
+
+/** Maximal size of a cookie option. */
+#define KR_COOKIE_OPT_MAX_LEN (KNOT_EDNS_OPTION_HDRLEN + KNOT_OPT_COOKIE_CLNT + KNOT_OPT_COOKIE_SRVR_MAX)
+
+/**
+ * Cookie option entry.
+ */
+struct cookie_opt_data {
+ uint8_t opt_data[KR_COOKIE_OPT_MAX_LEN];
+};
+
+/**
+ * DNS cookies tracking.
+ */
+typedef lru_t(struct cookie_opt_data) kr_cookie_lru_t;
+
+/**
+ * @brief Obtain LRU cache entry.
+ *
+ * @param cache cookie LRU cache
+ * @param sa socket address serving as key
+ * @return pointer to cached option or NULL if not found or error occurred
+ */
+KR_EXPORT
+const uint8_t *kr_cookie_lru_get(kr_cookie_lru_t *cache,
+ const struct sockaddr *sa);
+
+/**
+ * @brief Stores cookie option into LRU cache.
+ *
+ * @param cache cookie LRU cache
+ * @param sa socket address serving as key
+ * @param opt cookie option to be stored
+ * @return kr_ok() or error code
+ */
+KR_EXPORT
+int kr_cookie_lru_set(kr_cookie_lru_t *cache, const struct sockaddr *sa,
+ uint8_t *opt);
diff --git a/lib/cookies/nonce.c b/lib/cookies/nonce.c
new file mode 100644
index 0000000..4ad72c8
--- /dev/null
+++ b/lib/cookies/nonce.c
@@ -0,0 +1,34 @@
+/* Copyright (C) 2016-2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include <assert.h>
+
+#include "contrib/wire.h"
+#include "lib/cookies/nonce.h"
+
+uint16_t kr_nonce_write_wire(uint8_t *buf, uint16_t buf_len,
+ const struct kr_nonce_input *input)
+{
+ if (!buf || buf_len < KR_NONCE_LEN || !input) {
+ return 0;
+ }
+
+ wire_write_u32(buf, input->rand);
+ wire_write_u32(buf + sizeof(uint32_t), input->time);
+ buf_len = 2 * sizeof(uint32_t);
+
+ return buf_len;
+}
diff --git a/lib/cookies/nonce.h b/lib/cookies/nonce.h
new file mode 100644
index 0000000..99c54d3
--- /dev/null
+++ b/lib/cookies/nonce.h
@@ -0,0 +1,43 @@
+/* Copyright (C) 2016-2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "lib/defines.h"
+
+/* RFC7873 Appendix B.2 mentions an algorithm using two values before the
+ * actual server cookie hash. */
+
+/** Nonce value length. */
+#define KR_NONCE_LEN 8
+
+/** Input data to generate nonce from. */
+struct kr_nonce_input {
+ uint32_t rand; /**< some random value */
+ uint32_t time; /**< time stamp */
+};
+
+/**
+ * @brief Writes server cookie nonce value into given buffer.
+ *
+ * @param buf buffer to write nonce data in wire format into
+ * @param buf_len buffer size
+ * @param input data to generate wire data from
+ * @return non-zero size of written data on success, 0 on failure
+ */
+KR_EXPORT
+uint16_t kr_nonce_write_wire(uint8_t *buf, uint16_t buf_len,
+ const struct kr_nonce_input *input);
diff --git a/lib/defines.h b/lib/defines.h
new file mode 100644
index 0000000..84da059
--- /dev/null
+++ b/lib/defines.h
@@ -0,0 +1,102 @@
+/* Copyright (C) 2014-2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <errno.h>
+#include <libknot/errcode.h>
+#include <libknot/dname.h>
+#include <libknot/rrset.h>
+
+/* Function attributes */
+#if __GNUC__ >= 4
+#define KR_EXPORT __attribute__ ((visibility ("default")))
+#define KR_CONST __attribute__((__const__))
+#define KR_PURE __attribute__((__pure__))
+#define KR_NORETURN __attribute__((__noreturn__))
+#define KR_COLD __attribute__((__cold__))
+#define KR_PRINTF(n) __attribute__((format (printf, n, (n+1))))
+#else
+#define KR_EXPORT
+#define KR_CONST
+#define KR_PURE
+#define KR_NORETURN
+#define KR_COLD
+#define KR_PRINTF(n)
+#endif
+
+#ifndef uint /* Redefining typedef is a C11 feature. */
+typedef unsigned int uint;
+#define uint uint
+#endif
+
+/*
+ * Error codes.
+ */
+#define kr_ok() 0
+/* Mark as cold to mark all branches as unlikely. */
+static inline int KR_COLD kr_error(int x) {
+ return x <= 0 ? x : -x;
+}
+#define kr_strerror(x) strerror(abs(x))
+
+/*
+ * Connection limits.
+ * @cond internal
+ */
+#define KR_CONN_RTT_MAX 2000 /* Timeout for network activity */
+#define KR_CONN_RETRY 200 /* Retry interval for network activity */
+#define KR_ITER_LIMIT 100 /* Built-in iterator limit */
+#define KR_RESOLVE_TIME_LIMIT 10000 /* Upper limit for resolution time of single query, ms */
+#define KR_CNAME_CHAIN_LIMIT 40 /* Built-in maximum CNAME chain length */
+#define KR_TIMEOUT_LIMIT 4 /* Maximum number of retries after timeout. */
+#define KR_QUERY_NSRETRY_LIMIT 4 /* Maximum number of retries per query. */
+
+/*
+ * Defines.
+ */
+#define KR_DNS_PORT 53
+#define KR_DNS_TLS_PORT 853
+#define KR_EDNS_VERSION 0
+#define KR_EDNS_PAYLOAD 4096 /* Default UDP payload (max unfragmented UDP is 1452B) */
+#define KR_CACHE_DEFAULT_TTL_MIN (5) /* avoid bursts of queries */
+#define KR_CACHE_DEFAULT_TTL_MAX (6 * 24 * 3600) /* 6 days, like the root NS TTL */
+
+#define KR_DNAME_STR_MAXLEN (KNOT_DNAME_TXT_MAXLEN + 1)
+#define KR_RRTYPE_STR_MAXLEN (16 + 1)
+
+/*
+ * Address sanitizer hints.
+ */
+#if !defined(__SANITIZE_ADDRESS__) && defined(__has_feature)
+# if __has_feature(address_sanitizer)
+# define __SANITIZE_ADDRESS__ 1
+# endif
+#endif
+#if defined(__SANITIZE_ADDRESS__)
+void __asan_poison_memory_region(void const volatile *addr, size_t size);
+void __asan_unpoison_memory_region(void const volatile *addr, size_t size);
+#define kr_asan_poison(addr, size) __asan_poison_memory_region((addr), (size))
+#define kr_asan_unpoison(addr, size) __asan_unpoison_memory_region((addr), (size))
+#define kr_asan_custom_poison(fn, addr) fn ##_poison((addr))
+#define kr_asan_custom_unpoison(fn, addr) fn ##_unpoison((addr))
+#else
+#define kr_asan_poison(addr, size)
+#define kr_asan_unpoison(addr, size)
+#define kr_asan_custom_poison(fn, addr)
+#define kr_asan_custom_unpoison(fn, addr)
+#endif
+/* @endcond */
diff --git a/lib/dnssec.c b/lib/dnssec.c
new file mode 100644
index 0000000..4f8ad8a
--- /dev/null
+++ b/lib/dnssec.c
@@ -0,0 +1,473 @@
+/* Copyright (C) 2015-2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include <assert.h>
+#include <libdnssec/binary.h>
+#include <libdnssec/crypto.h>
+#include <libdnssec/error.h>
+#include <libdnssec/key.h>
+#include <libdnssec/sign.h>
+#include <libknot/descriptor.h>
+#include <libknot/packet/wire.h>
+#include <libknot/rdataset.h>
+#include <libknot/rrset.h>
+#include <libknot/rrtype/dnskey.h>
+#include <libknot/rrtype/nsec.h>
+#include <libknot/rrtype/rrsig.h>
+#include <contrib/wire.h>
+
+#include "lib/defines.h"
+#include "lib/dnssec/nsec.h"
+#include "lib/dnssec/nsec3.h"
+#include "lib/dnssec/signature.h"
+#include "lib/dnssec.h"
+#include "lib/resolve.h"
+
+/* forward */
+static int kr_rrset_validate_with_key(kr_rrset_validation_ctx_t *vctx,
+ const knot_rrset_t *covered, size_t key_pos, const struct dseckey *key);
+
+void kr_crypto_init(void)
+{
+ dnssec_crypto_init();
+}
+
+void kr_crypto_cleanup(void)
+{
+ dnssec_crypto_cleanup();
+}
+
+void kr_crypto_reinit(void)
+{
+ dnssec_crypto_reinit();
+}
+
+#define FLG_WILDCARD_EXPANSION 0x01 /**< Possibly generated by using wildcard expansion. */
+
+/**
+ * Check the RRSIG RR validity according to RFC4035 5.3.1 .
+ * @param flags The flags are going to be set according to validation result.
+ * @param cov_labels Covered RRSet owner label count.
+ * @param rrsigs rdata containing the signatures.
+ * @param key_owner Associated DNSKEY's owner.
+ * @param key_rdata Associated DNSKEY's rdata.
+ * @param keytag Used key tag.
+ * @param zone_name The name of the zone cut.
+ * @param timestamp Validation time.
+ */
+static int validate_rrsig_rr(int *flags, int cov_labels,
+ const knot_rdata_t *rrsigs,
+ const knot_dname_t *key_owner, const knot_rdata_t *key_rdata,
+ uint16_t keytag,
+ const knot_dname_t *zone_name, uint32_t timestamp,
+ kr_rrset_validation_ctx_t *vctx)
+{
+ if (!flags || !rrsigs || !key_owner || !key_rdata || !zone_name) {
+ return kr_error(EINVAL);
+ }
+ /* bullet 5 */
+ if (knot_rrsig_sig_expiration(rrsigs) < timestamp) {
+ vctx->rrs_counters.expired++;
+ return kr_error(EINVAL);
+ }
+ /* bullet 6 */
+ if (knot_rrsig_sig_inception(rrsigs) > timestamp) {
+ vctx->rrs_counters.notyet++;
+ return kr_error(EINVAL);
+ }
+ /* bullet 2 */
+ const knot_dname_t *signer_name = knot_rrsig_signer_name(rrsigs);
+ if (!signer_name || !knot_dname_is_equal(signer_name, zone_name)) {
+ vctx->rrs_counters.signer_invalid++;
+ return kr_error(EAGAIN);
+ }
+ /* bullet 4 */
+ {
+ int rrsig_labels = knot_rrsig_labels(rrsigs);
+ if (rrsig_labels > cov_labels) {
+ vctx->rrs_counters.labels_invalid++;
+ return kr_error(EINVAL);
+ }
+ if (rrsig_labels < cov_labels) {
+ *flags |= FLG_WILDCARD_EXPANSION;
+ }
+ }
+
+ /* bullet 7 */
+ if ((!knot_dname_is_equal(key_owner, signer_name)) ||
+ (knot_dnskey_alg(key_rdata) != knot_rrsig_alg(rrsigs)) ||
+ (keytag != knot_rrsig_key_tag(rrsigs))) {
+ vctx->rrs_counters.key_invalid++;
+ return kr_error(EINVAL);
+ }
+ /* bullet 8 */
+ /* Checked somewhere else. */
+ /* bullet 9 and 10 */
+ /* One of the requirements should be always fulfilled. */
+
+ return kr_ok();
+}
+
+/**
+ * Returns the number of labels that have been added by wildcard expansion.
+ * @param expanded Expanded wildcard.
+ * @param rrsigs RRSet containing the signatures.
+ * @param sig_pos Specifies the signature within the RRSIG RRSet.
+ * @return Number of added labels, -1 on error.
+ */
+static inline int wildcard_radix_len_diff(const knot_dname_t *expanded,
+ const knot_rdata_t *rrsig)
+{
+ if (!expanded || !rrsig) {
+ return -1;
+ }
+
+ return knot_dname_labels(expanded, NULL) - knot_rrsig_labels(rrsig);
+}
+
+int kr_rrset_validate(kr_rrset_validation_ctx_t *vctx, const knot_rrset_t *covered)
+{
+ if (!vctx) {
+ return kr_error(EINVAL);
+ }
+ if (!vctx->pkt || !covered || !vctx->keys || !vctx->zone_name) {
+ return kr_error(EINVAL);
+ }
+
+ memset(&vctx->rrs_counters, 0, sizeof(vctx->rrs_counters));
+ for (unsigned i = 0; i < vctx->keys->rrs.count; ++i) {
+ int ret = kr_rrset_validate_with_key(vctx, covered, i, NULL);
+ if (ret == 0) {
+ return ret;
+ }
+ }
+
+ return kr_error(ENOENT);
+}
+
+/**
+ * Validate RRSet using a specific key.
+ * @param vctx Pointer to validation context.
+ * @param covered RRSet covered by a signature. It must be in canonical format.
+ * @param key_pos Position of the key to be validated with.
+ * @param key Key to be used to validate.
+ * If NULL, then key from DNSKEY RRSet is used.
+ * @return 0 or error code, same as vctx->result.
+ */
+static int kr_rrset_validate_with_key(kr_rrset_validation_ctx_t *vctx,
+ const knot_rrset_t *covered,
+ size_t key_pos, const struct dseckey *key)
+{
+ const knot_pkt_t *pkt = vctx->pkt;
+ const knot_rrset_t *keys = vctx->keys;
+ const knot_dname_t *zone_name = vctx->zone_name;
+ uint32_t timestamp = vctx->timestamp;
+ bool has_nsec3 = vctx->has_nsec3;
+ struct dseckey *created_key = NULL;
+
+ /* It's just caller's approximation that the RR is in that particular zone.
+ * We MUST guard against attempts of zones signing out-of-bailiwick records. */
+ if (knot_dname_in_bailiwick(covered->owner, zone_name) < 0) {
+ vctx->result = kr_error(ENOENT);
+ return vctx->result;
+ }
+
+ const knot_rdata_t *key_rdata = knot_rdataset_at(&keys->rrs, key_pos);
+ if (key == NULL) {
+ int ret = kr_dnssec_key_from_rdata(&created_key, keys->owner,
+ key_rdata->data, key_rdata->len);
+ if (ret != 0) {
+ vctx->result = ret;
+ return vctx->result;
+ }
+ key = created_key;
+ }
+ uint16_t keytag = dnssec_key_get_keytag((dnssec_key_t *)key);
+ int covered_labels = knot_dname_labels(covered->owner, NULL);
+ if (knot_dname_is_wildcard(covered->owner)) {
+ /* The asterisk does not count, RFC4034 3.1.3, paragraph 3. */
+ --covered_labels;
+ }
+
+ for (uint16_t i = 0; i < vctx->rrs->len; ++i) {
+ /* Consider every RRSIG that matches owner and covers the class/type. */
+ const knot_rrset_t *rrsig = vctx->rrs->at[i]->rr;
+ if (rrsig->type != KNOT_RRTYPE_RRSIG) {
+ continue;
+ }
+ if ((covered->rclass != rrsig->rclass) || !knot_dname_is_equal(covered->owner, rrsig->owner)) {
+ continue;
+ }
+ knot_rdata_t *rdata_j = rrsig->rrs.rdata;
+ for (uint16_t j = 0; j < rrsig->rrs.count; ++j, rdata_j = knot_rdataset_next(rdata_j)) {
+ int val_flgs = 0;
+ int trim_labels = 0;
+ if (knot_rrsig_type_covered(rdata_j) != covered->type) {
+ continue;
+ }
+ vctx->rrs_counters.matching_name_type++;
+ int retv = validate_rrsig_rr(&val_flgs, covered_labels, rdata_j,
+ keys->owner, key_rdata, keytag,
+ zone_name, timestamp, vctx);
+ if (retv == kr_error(EAGAIN)) {
+ kr_dnssec_key_free(&created_key);
+ vctx->result = retv;
+ return retv;
+ } else if (retv != 0) {
+ continue;
+ }
+ if (val_flgs & FLG_WILDCARD_EXPANSION) {
+ trim_labels = wildcard_radix_len_diff(covered->owner, rdata_j);
+ if (trim_labels < 0) {
+ break;
+ }
+ }
+ if (kr_check_signature(rdata_j, (dnssec_key_t *) key, covered, trim_labels) != 0) {
+ vctx->rrs_counters.crypto_invalid++;
+ continue;
+ }
+ if (val_flgs & FLG_WILDCARD_EXPANSION) {
+ int ret = 0;
+ if (!has_nsec3) {
+ ret = kr_nsec_wildcard_answer_response_check(pkt, KNOT_AUTHORITY, covered->owner);
+ } else {
+ ret = kr_nsec3_wildcard_answer_response_check(pkt, KNOT_AUTHORITY, covered->owner, trim_labels - 1);
+ if (ret == kr_error(KNOT_ERANGE)) {
+ ret = 0;
+ vctx->flags |= KR_DNSSEC_VFLG_OPTOUT;
+ }
+ }
+ if (ret != 0) {
+ vctx->rrs_counters.nsec_invalid++;
+ continue;
+ }
+ vctx->flags |= KR_DNSSEC_VFLG_WEXPAND;
+ }
+ /* Validated with current key, OK */
+ kr_dnssec_key_free(&created_key);
+ vctx->result = kr_ok();
+ return vctx->result;
+ }
+ }
+ /* No applicable key found, cannot be validated. */
+ kr_dnssec_key_free(&created_key);
+ vctx->result = kr_error(ENOENT);
+ return vctx->result;
+}
+
+static bool kr_ds_algo_support(const knot_rrset_t *ta)
+{
+ knot_rdata_t *rdata_i = ta->rrs.rdata;
+ for (uint16_t i = 0; i < ta->rrs.count;
+ ++i, rdata_i = knot_rdataset_next(rdata_i)) {
+ if (dnssec_algorithm_digest_support(knot_ds_digest_type(rdata_i))
+ && dnssec_algorithm_key_support(knot_ds_alg(rdata_i))) {
+ return true;
+ }
+ }
+ return false;
+}
+
+int kr_dnskeys_trusted(kr_rrset_validation_ctx_t *vctx, const knot_rrset_t *ta)
+{
+ const knot_pkt_t *pkt = vctx->pkt;
+ const knot_rrset_t *keys = vctx->keys;
+
+ const bool ok = pkt && keys && ta && ta->rrs.count && ta->rrs.rdata
+ && ta->type == KNOT_RRTYPE_DS;
+ if (!ok) {
+ assert(false);
+ return kr_error(EINVAL);
+ }
+
+ /* Check if at least one DS has a usable algorithm pair. */
+ if (!kr_ds_algo_support(ta)) {
+ /* See RFC6840 5.2. */
+ return vctx->result = kr_error(DNSSEC_INVALID_DS_ALGORITHM);
+ }
+
+ /* RFC4035 5.2, bullet 1
+ * The supplied DS record has been authenticated.
+ * It has been validated or is part of a configured trust anchor.
+ */
+ memset(&vctx->rrs_counters, 0, sizeof(vctx->rrs_counters));
+ for (uint16_t i = 0; i < keys->rrs.count; ++i) {
+ /* RFC4035 5.3.1, bullet 8 */ /* ZSK */
+ /* LATER(optim.): more efficient way to iterate than _at() */
+ const knot_rdata_t *krr = knot_rdataset_at(&keys->rrs, i);
+ if (!kr_dnssec_key_zsk(krr->data) || kr_dnssec_key_revoked(krr->data)) {
+ continue;
+ }
+
+ struct dseckey *key = NULL;
+ if (kr_dnssec_key_from_rdata(&key, keys->owner, krr->data, krr->len) != 0) {
+ continue;
+ }
+ if (kr_authenticate_referral(ta, (dnssec_key_t *) key) != 0) {
+ kr_dnssec_key_free(&key);
+ continue;
+ }
+ if (kr_rrset_validate_with_key(vctx, keys, i, key) != 0) {
+ kr_dnssec_key_free(&key);
+ continue;
+ }
+ kr_dnssec_key_free(&key);
+ assert (vctx->result == 0);
+ return vctx->result;
+ }
+
+ /* No useable key found */
+ vctx->result = kr_error(ENOENT);
+ return vctx->result;
+}
+
+bool kr_dnssec_key_zsk(const uint8_t *dnskey_rdata)
+{
+ return wire_read_u16(dnskey_rdata) & 0x0100;
+}
+
+bool kr_dnssec_key_ksk(const uint8_t *dnskey_rdata)
+{
+ return wire_read_u16(dnskey_rdata) & 0x0001;
+}
+
+/** Return true if the DNSKEY is revoked. */
+bool kr_dnssec_key_revoked(const uint8_t *dnskey_rdata)
+{
+ return wire_read_u16(dnskey_rdata) & 0x0080;
+}
+
+int kr_dnssec_key_tag(uint16_t rrtype, const uint8_t *rdata, size_t rdlen)
+{
+ if (!rdata || rdlen == 0 || (rrtype != KNOT_RRTYPE_DS && rrtype != KNOT_RRTYPE_DNSKEY)) {
+ return kr_error(EINVAL);
+ }
+ if (rrtype == KNOT_RRTYPE_DS) {
+ return wire_read_u16(rdata);
+ } else if (rrtype == KNOT_RRTYPE_DNSKEY) {
+ struct dseckey *key = NULL;
+ int ret = kr_dnssec_key_from_rdata(&key, NULL, rdata, rdlen);
+ if (ret != 0) {
+ return ret;
+ }
+ uint16_t keytag = dnssec_key_get_keytag((dnssec_key_t *)key);
+ kr_dnssec_key_free(&key);
+ return keytag;
+ } else {
+ return kr_error(EINVAL);
+ }
+}
+
+int kr_dnssec_key_match(const uint8_t *key_a_rdata, size_t key_a_rdlen,
+ const uint8_t *key_b_rdata, size_t key_b_rdlen)
+{
+ dnssec_key_t *key_a = NULL, *key_b = NULL;
+ int ret = kr_dnssec_key_from_rdata((struct dseckey **)&key_a, NULL, key_a_rdata, key_a_rdlen);
+ if (ret != 0) {
+ return ret;
+ }
+ ret = kr_dnssec_key_from_rdata((struct dseckey **)&key_b, NULL, key_b_rdata, key_b_rdlen);
+ if (ret != 0) {
+ dnssec_key_free(key_a);
+ return ret;
+ }
+ /* If the algorithm and the public key match, we can be sure
+ * that they are the same key. */
+ ret = kr_error(ENOENT);
+ dnssec_binary_t pk_a, pk_b;
+ if (dnssec_key_get_algorithm(key_a) == dnssec_key_get_algorithm(key_b) &&
+ dnssec_key_get_pubkey(key_a, &pk_a) == DNSSEC_EOK &&
+ dnssec_key_get_pubkey(key_b, &pk_b) == DNSSEC_EOK) {
+ if (pk_a.size == pk_b.size && memcmp(pk_a.data, pk_b.data, pk_a.size) == 0) {
+ ret = 0;
+ }
+ }
+ dnssec_key_free(key_a);
+ dnssec_key_free(key_b);
+ return ret;
+}
+
+int kr_dnssec_key_from_rdata(struct dseckey **key, const knot_dname_t *kown, const uint8_t *rdata, size_t rdlen)
+{
+ if (!key || !rdata || rdlen == 0) {
+ return kr_error(EINVAL);
+ }
+
+ dnssec_key_t *new_key = NULL;
+ const dnssec_binary_t binary_key = {
+ .size = rdlen,
+ .data = (uint8_t *)rdata
+ };
+
+ int ret = dnssec_key_new(&new_key);
+ if (ret != DNSSEC_EOK) {
+ return kr_error(ENOMEM);
+ }
+ ret = dnssec_key_set_rdata(new_key, &binary_key);
+ if (ret != DNSSEC_EOK) {
+ dnssec_key_free(new_key);
+ return kr_error(ret);
+ }
+ if (kown) {
+ ret = dnssec_key_set_dname(new_key, kown);
+ if (ret != DNSSEC_EOK) {
+ dnssec_key_free(new_key);
+ return kr_error(ENOMEM);
+ }
+ }
+
+ *key = (struct dseckey *) new_key;
+ return kr_ok();
+}
+
+void kr_dnssec_key_free(struct dseckey **key)
+{
+ assert(key);
+
+ dnssec_key_free((dnssec_key_t *) *key);
+ *key = NULL;
+}
+
+int kr_dnssec_matches_name_and_type(const ranked_rr_array_t *rrs, uint32_t qry_uid,
+ const knot_dname_t *name, uint16_t type)
+{
+ int ret = kr_error(ENOENT);
+ for (size_t i = 0; i < rrs->len; ++i) {
+ const ranked_rr_array_entry_t *entry = rrs->at[i];
+ const knot_rrset_t *nsec = entry->rr;
+ if (entry->qry_uid != qry_uid || entry->yielded) {
+ continue;
+ }
+ if (nsec->type != KNOT_RRTYPE_NSEC &&
+ nsec->type != KNOT_RRTYPE_NSEC3) {
+ continue;
+ }
+ if (!kr_rank_test(entry->rank, KR_RANK_SECURE)) {
+ continue;
+ }
+ if (nsec->type == KNOT_RRTYPE_NSEC) {
+ ret = kr_nsec_matches_name_and_type(nsec, name, type);
+ } else {
+ ret = kr_nsec3_matches_name_and_type(nsec, name, type);
+ }
+ if (ret == kr_ok()) {
+ return kr_ok();
+ } else if (ret != kr_error(ENOENT)) {
+ return ret;
+ }
+ }
+ return ret;
+}
diff --git a/lib/dnssec.h b/lib/dnssec.h
new file mode 100644
index 0000000..b54908c
--- /dev/null
+++ b/lib/dnssec.h
@@ -0,0 +1,152 @@
+/* Copyright (C) 2015-2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "lib/defines.h"
+#include "lib/utils.h"
+#include <libknot/packet/pkt.h>
+
+/**
+ * Initialise cryptographic back-end.
+ */
+KR_EXPORT
+void kr_crypto_init(void);
+
+/**
+ * De-initialise cryptographic back-end.
+ */
+KR_EXPORT
+void kr_crypto_cleanup(void);
+
+/**
+ * Re-initialise cryptographic back-end.
+ * @note Must be called after fork() in the child.
+ */
+KR_EXPORT
+void kr_crypto_reinit(void);
+
+/** Opaque DNSSEC key pointer. */
+struct dseckey;
+
+#define KR_DNSSEC_VFLG_WEXPAND 0x01
+#define KR_DNSSEC_VFLG_OPTOUT 0x02
+
+/** DNSSEC validation context. */
+struct kr_rrset_validation_ctx {
+ const knot_pkt_t *pkt; /*!< Packet to be validated. */
+ ranked_rr_array_t *rrs; /*!< List of preselected RRs to be validated. */
+ knot_section_t section_id; /*!< Section to work with. */
+ const knot_rrset_t *keys; /*!< DNSKEY RRSet. */
+ const knot_dname_t *zone_name; /*!< Name of the zone containing the RRSIG RRSet. */
+ uint32_t timestamp; /*!< Validation time. */
+ bool has_nsec3; /*!< Whether to use NSEC3 validation. */
+ uint32_t qry_uid; /*!< Current query uid. */
+ uint32_t flags; /*!< Output - Flags. */
+ uint32_t err_cnt; /*!< Output - Number of validation failures. */
+ int result; /*!< Output - 0 or error code. */
+ struct {
+ unsigned int matching_name_type; /*!< Name + type matches */
+ unsigned int expired;
+ unsigned int notyet;
+ unsigned int signer_invalid; /*!< Signer is not zone apex */
+ unsigned int labels_invalid; /*!< Number of labels in RRSIG */
+ unsigned int key_invalid; /*!< Algorithm/keytag/key owner */
+ unsigned int crypto_invalid;
+ unsigned int nsec_invalid;
+ } rrs_counters; /*!< Error counters for single RRset validation. */
+};
+
+typedef struct kr_rrset_validation_ctx kr_rrset_validation_ctx_t;
+
+/**
+ * Validate RRSet.
+ * @param vctx Pointer to validation context.
+ * @param covered RRSet covered by a signature. It must be in canonical format.
+ * @return 0 or error code, same as vctx->result.
+ */
+int kr_rrset_validate(kr_rrset_validation_ctx_t *vctx,
+ const knot_rrset_t *covered);
+
+/**
+ * Check whether the DNSKEY rrset matches the supplied trust anchor RRSet.
+ * @param vctx Pointer to validation context.
+ * @param ta Trust anchor RRSet against which to validate the DNSKEY RRSet.
+ * @return 0 or error code, same as vctx->result. In particular,
+ * DNSSEC_INVALID_DS_ALGORITHM if *each* DS records is unusable
+ * due to unimplemented DNSKEY or DS algorithm.
+ */
+int kr_dnskeys_trusted(kr_rrset_validation_ctx_t *vctx, const knot_rrset_t *ta);
+
+/** Return true if the DNSKEY can be used as a ZSK. */
+KR_EXPORT KR_PURE
+bool kr_dnssec_key_zsk(const uint8_t *dnskey_rdata);
+
+/** Return true if the DNSKEY indicates being KSK (=> has SEP). */
+KR_EXPORT KR_PURE
+bool kr_dnssec_key_ksk(const uint8_t *dnskey_rdata);
+
+/** Return true if the DNSKEY is revoked. */
+KR_EXPORT KR_PURE
+bool kr_dnssec_key_revoked(const uint8_t *dnskey_rdata);
+
+/** Return DNSKEY tag.
+ * @param rrtype RR type (either DS or DNSKEY are supported)
+ * @param rdata Key/digest RDATA.
+ * @param rdlen RDATA length.
+ * @return Key tag (positive number), or an error code
+ */
+KR_EXPORT KR_PURE
+int kr_dnssec_key_tag(uint16_t rrtype, const uint8_t *rdata, size_t rdlen);
+
+/** Return 0 if the two keys are identical.
+ * @note This compares RDATA only, algorithm and public key must match.
+ * @param key_a_rdata First key RDATA
+ * @param key_a_rdlen First key RDATA length
+ * @param key_b_rdata Second key RDATA
+ * @param key_b_rdlen Second key RDATA length
+ * @return 0 if they match or an error code
+ */
+KR_EXPORT KR_PURE
+int kr_dnssec_key_match(const uint8_t *key_a_rdata, size_t key_a_rdlen,
+ const uint8_t *key_b_rdata, size_t key_b_rdlen);
+
+/**
+ * Construct a DNSSEC key.
+ * @param key Pointer to be set to newly created DNSSEC key.
+ * @param kown DNSKEY owner name.
+ * @param rdata DNSKEY RDATA
+ * @param rdlen DNSKEY RDATA length
+ * @return 0 or error code; in particular: DNSSEC_INVALID_KEY_ALGORITHM
+ */
+int kr_dnssec_key_from_rdata(struct dseckey **key, const knot_dname_t *kown, const uint8_t *rdata, size_t rdlen);
+
+/**
+ * Frees the DNSSEC key.
+ * @param key Pointer to freed key.
+ */
+void kr_dnssec_key_free(struct dseckey **key);
+
+/**
+ * Checks whether NSEC/NSEC3 RR selected by iterator matches the supplied name and type.
+ * @param rrs Records selected by iterator.
+ * @param qry_uid Query unique identifier where NSEC/NSEC3 belongs to.
+ * @param name Name to be checked.
+ * @param type Type to be checked.
+ * @return 0 or error code.
+ */
+int kr_dnssec_matches_name_and_type(const ranked_rr_array_t *rrs, uint32_t qry_uid,
+ const knot_dname_t *name, uint16_t type);
diff --git a/lib/dnssec/nsec.c b/lib/dnssec/nsec.c
new file mode 100644
index 0000000..4ae13b2
--- /dev/null
+++ b/lib/dnssec/nsec.c
@@ -0,0 +1,540 @@
+/* Copyright (C) 2015-2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include <assert.h>
+#include <stdlib.h>
+
+#include <libknot/descriptor.h>
+#include <libknot/dname.h>
+#include <libknot/packet/wire.h>
+#include <libknot/rrset.h>
+#include <libknot/rrtype/nsec.h>
+#include <libknot/rrtype/rrsig.h>
+#include <libdnssec/error.h>
+#include <libdnssec/nsec.h>
+
+#include "lib/defines.h"
+#include "lib/dnssec/nsec.h"
+
+
+int kr_nsec_children_in_zone_check(const uint8_t *bm, uint16_t bm_size)
+{
+ if (!bm) {
+ return kr_error(EINVAL);
+ }
+ const bool parent_side =
+ dnssec_nsec_bitmap_contains(bm, bm_size, KNOT_RRTYPE_DNAME)
+ || (dnssec_nsec_bitmap_contains(bm, bm_size, KNOT_RRTYPE_NS)
+ && !dnssec_nsec_bitmap_contains(bm, bm_size, KNOT_RRTYPE_SOA)
+ );
+ return parent_side ? abs(ENOENT) : kr_ok();
+ /* LATER: after refactoring, probably also check if signer name equals owner,
+ * but even without that it's not possible to attack *correctly* signed zones.
+ */
+}
+
+/**
+ * Check whether the NSEC RR proves that there is no closer match for <SNAME, SCLASS>.
+ * @param nsec NSEC RRSet.
+ * @param sname Searched name.
+ * @return 0 if proves, >0 if not (abs(ENOENT)), or error code (<0).
+ */
+static int nsec_covers(const knot_rrset_t *nsec, const knot_dname_t *sname)
+{
+ assert(nsec && sname);
+ if (knot_dname_cmp(sname, nsec->owner) <= 0) {
+ return abs(ENOENT); /* 'sname' before 'owner', so can't be covered */
+ }
+
+ /* If NSEC 'owner' >= 'next', it means that there is nothing after 'owner' */
+ /* We have to lower-case it with libknot >= 2.7; see also RFC 6840 5.1. */
+ knot_dname_t next[KNOT_DNAME_MAXLEN];
+ int ret = knot_dname_to_wire(next, knot_nsec_next(nsec->rrs.rdata), sizeof(next));
+ if (ret < 0) {
+ assert(!ret);
+ return kr_error(ret);
+ }
+ knot_dname_to_lower(next);
+
+ const bool is_last_nsec = knot_dname_cmp(nsec->owner, next) >= 0;
+ const bool in_range = is_last_nsec || knot_dname_cmp(sname, next) < 0;
+ if (!in_range) {
+ return abs(ENOENT);
+ }
+ /* Before returning kr_ok(), we have to check a special case:
+ * sname might be under delegation from owner and thus
+ * not in the zone of this NSEC at all.
+ */
+ if (knot_dname_in_bailiwick(sname, nsec->owner) <= 0) {
+ return kr_ok();
+ }
+ const uint8_t *bm = knot_nsec_bitmap(nsec->rrs.rdata);
+ uint16_t bm_size = knot_nsec_bitmap_len(nsec->rrs.rdata);
+
+ return kr_nsec_children_in_zone_check(bm, bm_size);
+}
+
+#define FLG_NOEXIST_RRTYPE (1 << 0) /**< <SNAME, SCLASS> exists, <SNAME, SCLASS, STYPE> does not exist. */
+#define FLG_NOEXIST_RRSET (1 << 1) /**< <SNAME, SCLASS> does not exist. */
+#define FLG_NOEXIST_WILDCARD (1 << 2) /**< No wildcard covering <SNAME, SCLASS> exists. */
+#define FLG_NOEXIST_CLOSER (1 << 3) /**< Wildcard covering <SNAME, SCLASS> exists, but doesn't match STYPE. */
+
+
+/**
+ * According to set flags determine whether NSEC proving
+ * RRset or RRType non-existense has been found.
+ * @param f Flags to inspect.
+ * @return True if required NSEC exists.
+ */
+#define kr_nsec_rrset_noexist(f) \
+ ((f) & (FLG_NOEXIST_RRTYPE | FLG_NOEXIST_RRSET))
+/**
+ * According to set flags determine whether wildcard non-existense
+ * has been proven.
+ * @param f Flags to inspect.
+ * @return True if wildcard not exists.
+ */
+#define kr_nsec_wcard_noexist(f) ((f) & FLG_NOEXIST_WILDCARD)
+
+/**
+ * According to set flags determine whether authenticated denial of existence has been proven.
+ * @param f Flags to inspect.
+ * @return True if denial of existence proven.
+ */
+#define kr_nsec_existence_denied(f) \
+ ((kr_nsec_rrset_noexist(f)) && (kr_nsec_wcard_noexist(f)))
+
+/**
+ * Name error response check (RFC4035 3.1.3.2; RFC4035 5.4, bullet 2).
+ * @note Returned flags must be checked in order to prove denial.
+ * @param flags Flags to be set according to check outcome.
+ * @param nsec NSEC RR.
+ * @param name Name to be checked.
+ * @param pool
+ * @return 0 or error code.
+ */
+static int name_error_response_check_rr(int *flags, const knot_rrset_t *nsec,
+ const knot_dname_t *name)
+{
+ assert(flags && nsec && name);
+
+ if (nsec_covers(nsec, name) == 0) {
+ *flags |= FLG_NOEXIST_RRSET;
+ }
+
+ /* Try to find parent wildcard that is proved by this NSEC. */
+ uint8_t namebuf[KNOT_DNAME_MAXLEN];
+ int ret = knot_dname_to_wire(namebuf, name, sizeof(namebuf));
+ if (ret < 0)
+ return ret;
+ knot_dname_t *ptr = namebuf;
+ while (ptr[0]) {
+ /* Remove leftmost label and replace it with '\1*'. */
+ ptr = (uint8_t *) knot_wire_next_label(ptr, NULL);
+ if (!ptr) {
+ return kr_error(EINVAL);
+ }
+ *(--ptr) = '*';
+ *(--ptr) = 1;
+ /* True if this wildcard provably doesn't exist. */
+ if (nsec_covers(nsec, ptr) == 0) {
+ *flags |= FLG_NOEXIST_WILDCARD;
+ break;
+ }
+ /* Remove added leftmost asterisk. */
+ ptr += 2;
+ }
+
+ return kr_ok();
+}
+
+int kr_nsec_name_error_response_check(const knot_pkt_t *pkt, knot_section_t section_id,
+ const knot_dname_t *sname)
+{
+ const knot_pktsection_t *sec = knot_pkt_section(pkt, section_id);
+ if (!sec || !sname) {
+ return kr_error(EINVAL);
+ }
+
+ int flags = 0;
+ for (unsigned i = 0; i < sec->count; ++i) {
+ const knot_rrset_t *rrset = knot_pkt_rr(sec, i);
+ if (rrset->type != KNOT_RRTYPE_NSEC) {
+ continue;
+ }
+ int ret = name_error_response_check_rr(&flags, rrset, sname);
+ if (ret != 0) {
+ return ret;
+ }
+ }
+
+ return kr_nsec_existence_denied(flags) ? kr_ok() : kr_error(ENOENT);
+}
+
+/**
+ * Returns the labels from the covering RRSIG RRs.
+ * @note The number must be the same in all covering RRSIGs.
+ * @param nsec NSEC RR.
+ * @param sec Packet section.
+ * @param Number of labels or (negative) error code.
+ */
+static int coverign_rrsig_labels(const knot_rrset_t *nsec, const knot_pktsection_t *sec)
+{
+ assert(nsec && sec);
+
+ int ret = kr_error(ENOENT);
+
+ for (unsigned i = 0; i < sec->count; ++i) {
+ const knot_rrset_t *rrset = knot_pkt_rr(sec, i);
+ if ((rrset->type != KNOT_RRTYPE_RRSIG) ||
+ (!knot_dname_is_equal(rrset->owner, nsec->owner))) {
+ continue;
+ }
+
+ knot_rdata_t *rdata_j = rrset->rrs.rdata;
+ for (uint16_t j = 0; j < rrset->rrs.count;
+ ++j, rdata_j = knot_rdataset_next(rdata_j)) {
+ if (knot_rrsig_type_covered(rdata_j) != KNOT_RRTYPE_NSEC) {
+ continue;
+ }
+
+ if (ret < 0) {
+ ret = knot_rrsig_labels(rdata_j);
+ } else {
+ if (ret != knot_rrsig_labels(rdata_j)) {
+ return kr_error(EINVAL);
+ }
+ }
+ }
+ }
+
+ return ret;
+}
+
+
+int kr_nsec_bitmap_nodata_check(const uint8_t *bm, uint16_t bm_size, uint16_t type, const knot_dname_t *owner)
+{
+ const int NO_PROOF = abs(ENOENT);
+ if (!bm || !owner) {
+ return kr_error(EINVAL);
+ }
+ if (dnssec_nsec_bitmap_contains(bm, bm_size, type)) {
+ return NO_PROOF;
+ }
+
+ if (type != KNOT_RRTYPE_CNAME
+ && dnssec_nsec_bitmap_contains(bm, bm_size, KNOT_RRTYPE_CNAME)) {
+ return NO_PROOF;
+ }
+ /* Special behavior around zone cuts. */
+ switch (type) {
+ case KNOT_RRTYPE_DS:
+ /* Security feature: in case of DS also check for SOA
+ * non-existence to be more certain that we don't hold
+ * a child-side NSEC by some mistake (e.g. when forwarding).
+ * See RFC4035 5.2, next-to-last paragraph.
+ * This doesn't apply for root DS as it doesn't exist in DNS hierarchy.
+ */
+ if (owner[0] != '\0' && dnssec_nsec_bitmap_contains(bm, bm_size, KNOT_RRTYPE_SOA)) {
+ return NO_PROOF;
+ }
+ break;
+ case KNOT_RRTYPE_CNAME:
+ /* Exception from the `default` rule. It's perhaps disputable,
+ * but existence of CNAME at zone apex is not allowed, so we
+ * consider a parent-side record to be enough to prove non-existence. */
+ break;
+ default:
+ /* Parent-side delegation record isn't authoritative for non-DS;
+ * see RFC6840 4.1. */
+ if (dnssec_nsec_bitmap_contains(bm, bm_size, KNOT_RRTYPE_NS)
+ && !dnssec_nsec_bitmap_contains(bm, bm_size, KNOT_RRTYPE_SOA)) {
+ return NO_PROOF;
+ }
+ /* LATER(opt): perhaps short-circuit test if we repeat it here. */
+ }
+
+ return kr_ok();
+}
+
+/**
+ * Attempt to prove NODATA given a matching NSEC.
+ * @param flags Flags to be set according to check outcome.
+ * @param nsec NSEC RR.
+ * @param type Type to be checked.
+ * @return 0 on success, abs(ENOENT) for no proof, or error code (<0).
+ * @note It's not a *full* proof, of course (wildcards, etc.)
+ * @TODO returning result via `flags` is just ugly.
+ */
+static int no_data_response_check_rrtype(int *flags, const knot_rrset_t *nsec,
+ uint16_t type)
+{
+ assert(flags && nsec);
+
+ const uint8_t *bm = knot_nsec_bitmap(nsec->rrs.rdata);
+ uint16_t bm_size = knot_nsec_bitmap_len(nsec->rrs.rdata);
+ int ret = kr_nsec_bitmap_nodata_check(bm, bm_size, type, nsec->owner);
+ if (ret == kr_ok()) {
+ *flags |= FLG_NOEXIST_RRTYPE;
+ }
+ return ret <= 0 ? ret : kr_ok();
+}
+
+/**
+ * Perform check for RR type wildcard existence denial according to RFC4035 5.4, bullet 1.
+ * @param flags Flags to be set according to check outcome.
+ * @param nsec NSEC RR.
+ * @param sec Packet section to work with.
+ * @return 0 or error code.
+ */
+static int no_data_wildcard_existence_check(int *flags, const knot_rrset_t *nsec,
+ const knot_pktsection_t *sec)
+{
+ assert(flags && nsec && sec);
+
+ int rrsig_labels = coverign_rrsig_labels(nsec, sec);
+ if (rrsig_labels < 0) {
+ return rrsig_labels;
+ }
+ int nsec_labels = knot_dname_labels(nsec->owner, NULL);
+ if (nsec_labels < 0) {
+ return nsec_labels;
+ }
+
+ if (rrsig_labels == nsec_labels) {
+ *flags |= FLG_NOEXIST_WILDCARD;
+ }
+
+ return kr_ok();
+}
+
+/**
+ * Perform check for NSEC wildcard existence that covers sname and
+ * have no stype bit set.
+ * @param pkt Packet structure to be processed.
+ * @param sec Packet section to work with.
+ * @param sname Queried domain name.
+ * @param stype Queried type.
+ * @return 0 or error code.
+ */
+static int wildcard_match_check(const knot_pkt_t *pkt, const knot_pktsection_t *sec,
+ const knot_dname_t *sname, uint16_t stype)
+{
+ if (!sec || !sname) {
+ return kr_error(EINVAL);
+ }
+
+ int flags = 0;
+ for (unsigned i = 0; i < sec->count; ++i) {
+ const knot_rrset_t *rrset = knot_pkt_rr(sec, i);
+ if (rrset->type != KNOT_RRTYPE_NSEC) {
+ continue;
+ }
+ if (!knot_dname_is_wildcard(rrset->owner)) {
+ continue;
+ }
+ if (!knot_dname_is_equal(rrset->owner, sname)) {
+ int wcard_labels = knot_dname_labels(rrset->owner, NULL);
+ int common_labels = knot_dname_matched_labels(rrset->owner, sname);
+ int rrsig_labels = coverign_rrsig_labels(rrset, sec);
+ if (wcard_labels < 1 ||
+ common_labels != wcard_labels - 1 ||
+ common_labels != rrsig_labels) {
+ continue;
+ }
+ }
+ int ret = no_data_response_check_rrtype(&flags, rrset, stype);
+ if (ret != 0) {
+ return ret;
+ }
+ }
+ return (flags & FLG_NOEXIST_RRTYPE) ? kr_ok() : kr_error(ENOENT);
+}
+
+int kr_nsec_no_data_response_check(const knot_pkt_t *pkt, knot_section_t section_id,
+ const knot_dname_t *sname, uint16_t stype)
+{
+ const knot_pktsection_t *sec = knot_pkt_section(pkt, section_id);
+ if (!sec || !sname) {
+ return kr_error(EINVAL);
+ }
+
+ int flags = 0;
+ for (unsigned i = 0; i < sec->count; ++i) {
+ const knot_rrset_t *rrset = knot_pkt_rr(sec, i);
+ if (rrset->type != KNOT_RRTYPE_NSEC) {
+ continue;
+ }
+ if (knot_dname_is_equal(rrset->owner, sname)) {
+ int ret = no_data_response_check_rrtype(&flags, rrset, stype);
+ if (ret != 0) {
+ return ret;
+ }
+ }
+ }
+
+ return (flags & FLG_NOEXIST_RRTYPE) ? kr_ok() : kr_error(ENOENT);
+}
+
+int kr_nsec_wildcard_answer_response_check(const knot_pkt_t *pkt, knot_section_t section_id,
+ const knot_dname_t *sname)
+{
+ const knot_pktsection_t *sec = knot_pkt_section(pkt, section_id);
+ if (!sec || !sname) {
+ return kr_error(EINVAL);
+ }
+
+ for (unsigned i = 0; i < sec->count; ++i) {
+ const knot_rrset_t *rrset = knot_pkt_rr(sec, i);
+ if (rrset->type != KNOT_RRTYPE_NSEC) {
+ continue;
+ }
+ if (nsec_covers(rrset, sname) == 0) {
+ return kr_ok();
+ }
+ }
+
+ return kr_error(ENOENT);
+}
+
+int kr_nsec_existence_denial(const knot_pkt_t *pkt, knot_section_t section_id,
+ const knot_dname_t *sname, uint16_t stype)
+{
+ const knot_pktsection_t *sec = knot_pkt_section(pkt, section_id);
+ if (!sec || !sname) {
+ return kr_error(EINVAL);
+ }
+
+ int flags = 0;
+ for (unsigned i = 0; i < sec->count; ++i) {
+ const knot_rrset_t *rrset = knot_pkt_rr(sec, i);
+ if (rrset->type != KNOT_RRTYPE_NSEC) {
+ continue;
+ }
+ /* NSEC proves that name exists, but has no data (RFC4035 4.9, 1) */
+ if (knot_dname_is_equal(rrset->owner, sname)) {
+ no_data_response_check_rrtype(&flags, rrset, stype);
+ } else {
+ /* NSEC proves that name doesn't exist (RFC4035, 4.9, 2) */
+ name_error_response_check_rr(&flags, rrset, sname);
+ }
+ no_data_wildcard_existence_check(&flags, rrset, sec);
+ }
+ if (kr_nsec_existence_denied(flags)) {
+ /* denial of existence proved accordignly to 4035 5.4 -
+ * NSEC proving either rrset non-existance or
+ * qtype non-existance has been found,
+ * and no wildcard expansion occurred.
+ */
+ return kr_ok();
+ } else if (kr_nsec_rrset_noexist(flags)) {
+ /* NSEC proving either rrset non-existance or
+ * qtype non-existance has been found,
+ * but wildcard expansion occurs.
+ * Try to find matching wildcard and check
+ * corresponding types.
+ */
+ return wildcard_match_check(pkt, sec, sname, stype);
+ }
+ return kr_error(ENOENT);
+}
+
+int kr_nsec_ref_to_unsigned(const knot_pkt_t *pkt)
+{
+ int nsec_found = 0;
+ const knot_pktsection_t *sec = knot_pkt_section(pkt, KNOT_AUTHORITY);
+ if (!sec) {
+ return kr_error(EINVAL);
+ }
+ for (unsigned i = 0; i < sec->count; ++i) {
+ const knot_rrset_t *ns = knot_pkt_rr(sec, i);
+ if (ns->type == KNOT_RRTYPE_DS) {
+ return kr_error(EEXIST);
+ }
+ if (ns->type != KNOT_RRTYPE_NS) {
+ continue;
+ }
+ nsec_found = 0;
+ for (unsigned j = 0; j < sec->count; ++j) {
+ const knot_rrset_t *nsec = knot_pkt_rr(sec, j);
+ if (nsec->type == KNOT_RRTYPE_DS) {
+ return kr_error(EEXIST);
+ }
+ if (nsec->type != KNOT_RRTYPE_NSEC) {
+ continue;
+ }
+ /* nsec found
+ * check if owner name matches the delegation name
+ */
+ if (!knot_dname_is_equal(nsec->owner, ns->owner)) {
+ /* nsec does not match the delegation */
+ continue;
+ }
+ nsec_found = 1;
+ const uint8_t *bm = knot_nsec_bitmap(nsec->rrs.rdata);
+ uint16_t bm_size = knot_nsec_bitmap_len(nsec->rrs.rdata);
+ if (!bm) {
+ return kr_error(EINVAL);
+ }
+ if (dnssec_nsec_bitmap_contains(bm, bm_size,
+ KNOT_RRTYPE_NS) &&
+ !dnssec_nsec_bitmap_contains(bm, bm_size,
+ KNOT_RRTYPE_DS) &&
+ !dnssec_nsec_bitmap_contains(bm, bm_size,
+ KNOT_RRTYPE_SOA)) {
+ /* rfc4035, 5.2 */
+ return kr_ok();
+ }
+ }
+ if (nsec_found) {
+ /* nsec which owner matches
+ * the delegation name was found,
+ * but nsec type bitmap contains wrong types
+ */
+ return kr_error(EINVAL);
+ } else {
+ /* nsec that matches delegation was not found */
+ return kr_error(DNSSEC_NOT_FOUND);
+ }
+ }
+
+ return kr_error(EINVAL);
+}
+
+int kr_nsec_matches_name_and_type(const knot_rrset_t *nsec,
+ const knot_dname_t *name, uint16_t type)
+{
+ /* It's not secure enough to just check a single bit for (some) other types,
+ * but we don't (currently) only use this API for NS. See RFC 6840 sec. 4.
+ */
+ if (type != KNOT_RRTYPE_NS || !nsec || !name) {
+ assert(!EINVAL);
+ return kr_error(EINVAL);
+ }
+ if (!knot_dname_is_equal(nsec->owner, name)) {
+ return kr_error(ENOENT);
+ }
+ const uint8_t *bm = knot_nsec_bitmap(nsec->rrs.rdata);
+ uint16_t bm_size = knot_nsec_bitmap_len(nsec->rrs.rdata);
+ if (!bm) {
+ return kr_error(EINVAL);
+ }
+ if (dnssec_nsec_bitmap_contains(bm, bm_size, type)) {
+ return kr_ok();
+ } else {
+ return kr_error(ENOENT);
+ }
+}
diff --git a/lib/dnssec/nsec.h b/lib/dnssec/nsec.h
new file mode 100644
index 0000000..9439a88
--- /dev/null
+++ b/lib/dnssec/nsec.h
@@ -0,0 +1,105 @@
+/* Copyright (C) 2015-2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <libknot/packet/pkt.h>
+
+/**
+ * Check bitmap that child names are contained in the same zone.
+ * @note see RFC6840 4.1.
+ * @param bm Bitmap from NSEC or NSEC3.
+ * @param bm_size Bitmap size.
+ * @return 0 if they are, >0 if not (abs(ENOENT)), <0 on error.
+ */
+int kr_nsec_children_in_zone_check(const uint8_t *bm, uint16_t bm_size);
+
+/**
+ * Check an NSEC or NSEC3 bitmap for NODATA for a type.
+ * @param bm Bitmap.
+ * @param bm_size Bitmap size.
+ * @param type RR type to check.
+ * @param owner NSEC record owner.
+ * @note This includes special checks for zone cuts, e.g. from RFC 6840 sec. 4.
+ * @return 0, abs(ENOENT) (no proof), kr_error(EINVAL)
+ */
+int kr_nsec_bitmap_nodata_check(const uint8_t *bm, uint16_t bm_size, uint16_t type, const knot_dname_t *owner);
+
+/**
+ * Name error response check (RFC4035 3.1.3.2; RFC4035 5.4, bullet 2).
+ * @note No RRSIGs are validated.
+ * @param pkt Packet structure to be processed.
+ * @param section_id Packet section to be processed.
+ * @param sname Name to be checked.
+ * @return 0 or error code.
+ */
+int kr_nsec_name_error_response_check(const knot_pkt_t *pkt, knot_section_t section_id,
+ const knot_dname_t *sname);
+
+/**
+ * No data response check (RFC4035 3.1.3.1; RFC4035 5.4, bullet 1).
+ * @param pkt Packet structure to be processed.
+ * @param section_id Packet section to be processed.
+ * @param sname Name to be checked.
+ * @param stype Type to be checked.
+ * @return 0 or error code.
+ */
+int kr_nsec_no_data_response_check(const knot_pkt_t *pkt, knot_section_t section_id,
+ const knot_dname_t *sname, uint16_t stype);
+
+/**
+ * Wildcard answer response check (RFC4035 3.1.3.3).
+ * @param pkt Packet structure to be processed.
+ * @param section_id Packet section to be processed.
+ * @param sname Name to be checked.
+ * @return 0 or error code.
+ */
+int kr_nsec_wildcard_answer_response_check(const knot_pkt_t *pkt, knot_section_t section_id,
+ const knot_dname_t *sname);
+
+/**
+ * Authenticated denial of existence according to RFC4035 5.4.
+ * @note No RRSIGs are validated.
+ * @param pkt Packet structure to be processed.
+ * @param section_id Packet section to be processed.
+ * @param sname Queried domain name.
+ * @param stype Queried type.
+ * @return 0 or error code.
+ */
+int kr_nsec_existence_denial(const knot_pkt_t *pkt, knot_section_t section_id,
+ const knot_dname_t *sname, uint16_t stype);
+
+/**
+ * Referral to unsigned subzone check (RFC4035 5.2).
+ * @note No RRSIGs are validated.
+ * @param pkt Packet structure to be processed.
+ * @return 0 or error code:
+ * DNSSEC_NOT_FOUND - neither ds nor nsec records
+ * were not found.
+ * EEXIST - ds record was found.
+ * EINVAL - bogus.
+ */
+int kr_nsec_ref_to_unsigned(const knot_pkt_t *pkt);
+
+/**
+ * Checks whether supplied NSEC RR matches the supplied name and type.
+ * @param nsec NSEC RR.
+ * @param name Name to be checked.
+ * @param type Type to be checked. Only use with NS! TODO (+copy&paste NSEC3)
+ * @return 0 or error code.
+ */
+int kr_nsec_matches_name_and_type(const knot_rrset_t *nsec,
+ const knot_dname_t *name, uint16_t type);
diff --git a/lib/dnssec/nsec3.c b/lib/dnssec/nsec3.c
new file mode 100644
index 0000000..4c2ef95
--- /dev/null
+++ b/lib/dnssec/nsec3.c
@@ -0,0 +1,776 @@
+/* Copyright (C) 2015-2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include <assert.h>
+#include <string.h>
+
+#include <libdnssec/binary.h>
+#include <libdnssec/error.h>
+#include <libdnssec/nsec.h>
+#include <libknot/descriptor.h>
+#include <contrib/base32hex.h>
+#include <libknot/rrset.h>
+#include <libknot/rrtype/nsec3.h>
+
+#include "lib/defines.h"
+#include "lib/dnssec/nsec.h"
+#include "lib/dnssec/nsec3.h"
+
+#define OPT_OUT_BIT 0x01
+
+//#define FLG_CLOSEST_ENCLOSER (1 << 0)
+#define FLG_CLOSEST_PROVABLE_ENCLOSER (1 << 1)
+#define FLG_NAME_COVERED (1 << 2)
+#define FLG_NAME_MATCHED (1 << 3)
+#define FLG_TYPE_BIT_MISSING (1 << 4)
+#define FLG_CNAME_BIT_MISSING (1 << 5)
+
+/**
+ * Obtains NSEC3 parameters from RR.
+ * @param params NSEC3 parameters structure to be set.
+ * @param nsec3 NSEC3 RR containing the parameters.
+ * @return 0 or error code.
+ */
+static int nsec3_parameters(dnssec_nsec3_params_t *params, const knot_rrset_t *nsec3)
+{
+ assert(params && nsec3);
+
+ const knot_rdata_t *rr = knot_rdataset_at(&nsec3->rrs, 0);
+ assert(rr);
+
+ /* Every NSEC3 RR contains data from NSEC3PARAMS. */
+ const size_t SALT_OFFSET = 5; /* First 5 octets contain { Alg, Flags, Iterations, Salt length } */
+ dnssec_binary_t rdata = {
+ .size = SALT_OFFSET + (size_t)knot_nsec3_salt_len(nsec3->rrs.rdata),
+ .data = /*const-cast*/(uint8_t *)rr->data,
+ };
+ if (rdata.size > rr->len)
+ return kr_error(EMSGSIZE);
+
+ int ret = dnssec_nsec3_params_from_rdata(params, &rdata);
+ if (ret != DNSSEC_EOK) {
+ return kr_error(EINVAL);
+ }
+
+ return kr_ok();
+}
+
+/**
+ * Computes a hash of a given domain name.
+ * @param hash Resulting hash, must be freed.
+ * @param params NSEC3 parameters.
+ * @param name Domain name to be hashed.
+ * @return 0 or error code.
+ */
+static int hash_name(dnssec_binary_t *hash, const dnssec_nsec3_params_t *params,
+ const knot_dname_t *name)
+{
+ assert(hash && params);
+ if (!name)
+ return kr_error(EINVAL);
+
+ dnssec_binary_t dname = {
+ .size = knot_dname_size(name),
+ .data = (uint8_t *) name,
+ };
+
+ int ret = dnssec_nsec3_hash(&dname, params, hash);
+ if (ret != DNSSEC_EOK) {
+ return kr_error(EINVAL);
+ }
+
+ return kr_ok();
+}
+
+/**
+ * Read hash from NSEC3 owner name and store its binary form.
+ * @param hash Buffer to be written.
+ * @param max_hash_size Maximal has size.
+ * @param nsec3 NSEC3 RR.
+ * @return 0 or error code.
+ */
+static int read_owner_hash(dnssec_binary_t *hash, size_t max_hash_size, const knot_rrset_t *nsec3)
+{
+ assert(hash && nsec3);
+ assert(hash->data);
+
+ int32_t ret = base32hex_decode(nsec3->owner + 1, nsec3->owner[0], hash->data, max_hash_size);
+ if (ret < 0) {
+ return kr_error(EILSEQ);
+ }
+ hash->size = ret;
+
+ return kr_ok();
+}
+
+#define MAX_HASH_BYTES 64
+/**
+ * Closest (provable) encloser match (RFC5155 7.2.1, bullet 1).
+ * @param flags Flags to be set according to check outcome.
+ * @param nsec3 NSEC3 RR.
+ * @param name Name to be checked.
+ * @param skipped Number of skipped labels to find closest (provable) match.
+ * @return 0 or error code.
+ */
+static int closest_encloser_match(int *flags, const knot_rrset_t *nsec3,
+ const knot_dname_t *name, unsigned *skipped)
+{
+ assert(flags && nsec3 && name && skipped);
+
+ uint8_t hash_data[MAX_HASH_BYTES] = {0, };
+ dnssec_binary_t owner_hash = { 0, hash_data };
+ dnssec_nsec3_params_t params = { 0, };
+ dnssec_binary_t name_hash = { 0, };
+
+ int ret = read_owner_hash(&owner_hash, MAX_HASH_BYTES, nsec3);
+ if (ret != 0) {
+ goto fail;
+ }
+
+ ret = nsec3_parameters(&params, nsec3);
+ if (ret != 0) {
+ goto fail;
+ }
+
+ /* Root label has no encloser */
+ if (!name[0]) {
+ ret = kr_error(ENOENT);
+ goto fail;
+ }
+
+ const knot_dname_t *encloser = knot_wire_next_label(name, NULL);
+ *skipped = 1;
+
+ while(encloser) {
+ ret = hash_name(&name_hash, &params, encloser);
+ if (ret != 0) {
+ goto fail;
+ }
+
+ if ((owner_hash.size == name_hash.size) &&
+ (memcmp(owner_hash.data, name_hash.data, owner_hash.size) == 0)) {
+ dnssec_binary_free(&name_hash);
+ *flags |= FLG_CLOSEST_PROVABLE_ENCLOSER;
+ break;
+ }
+
+ dnssec_binary_free(&name_hash);
+
+ if (!encloser[0])
+ break;
+ encloser = knot_wire_next_label(encloser, NULL);
+ ++(*skipped);
+ }
+
+ ret = kr_ok();
+
+fail:
+ if (params.salt.data) {
+ dnssec_nsec3_params_free(&params);
+ }
+ if (name_hash.data) {
+ dnssec_binary_free(&name_hash);
+ }
+ return ret;
+}
+
+/**
+ * Checks whether NSEC3 RR covers the supplied name (RFC5155 7.2.1, bullet 2).
+ * @param flags Flags to be set according to check outcome.
+ * @param nsec3 NSEC3 RR.
+ * @param name Name to be checked.
+ * @return 0 or error code.
+ */
+static int covers_name(int *flags, const knot_rrset_t *nsec3, const knot_dname_t *name)
+{
+ assert(flags && nsec3 && name);
+
+ uint8_t hash_data[MAX_HASH_BYTES] = { 0, };
+ dnssec_binary_t owner_hash = { 0, hash_data };
+ dnssec_nsec3_params_t params = { 0, };
+ dnssec_binary_t name_hash = { 0, };
+
+ int ret = read_owner_hash(&owner_hash, MAX_HASH_BYTES, nsec3);
+ if (ret != 0) {
+ goto fail;
+ }
+
+ ret = nsec3_parameters(&params, nsec3);
+ if (ret != 0) {
+ goto fail;
+ }
+
+ ret = hash_name(&name_hash, &params, name);
+ if (ret != 0) {
+ goto fail;
+ }
+
+ uint8_t next_size = knot_nsec3_next_len(nsec3->rrs.rdata);
+ const uint8_t *next_hash = knot_nsec3_next(nsec3->rrs.rdata);
+
+ if ((next_size > 0) && (owner_hash.size == next_size) && (name_hash.size == next_size)) {
+ /* All hash lengths must be same. */
+ const uint8_t *ownrd = owner_hash.data;
+ const uint8_t *nextd = next_hash;
+ int covered = 0;
+ int greater_then_owner = (memcmp(ownrd, name_hash.data, next_size) < 0);
+ int less_then_next = (memcmp(name_hash.data, nextd, next_size) < 0);
+ if (memcmp(ownrd, nextd, next_size) < 0) {
+ /*
+ * 0 (...) owner ... next (...) MAX
+ * ^
+ * name
+ * ==>
+ * (owner < name) && (name < next)
+ */
+ covered = ((greater_then_owner) && (less_then_next));
+ } else {
+ /*
+ * owner ... MAX, 0 ... next
+ * ^ ^ ^
+ * name name name
+ * =>
+ * (owner < name) || (name < next)
+ */
+ covered = ((greater_then_owner) || (less_then_next));
+ }
+
+ if (covered) {
+ *flags |= FLG_NAME_COVERED;
+
+ uint8_t nsec3_flags = knot_nsec3_flags(nsec3->rrs.rdata);
+ if (nsec3_flags & ~OPT_OUT_BIT) {
+ /* RFC5155 3.1.2 */
+ ret = kr_error(EINVAL);
+ } else {
+ ret = kr_ok();
+ }
+ }
+ }
+
+fail:
+ if (params.salt.data) {
+ dnssec_nsec3_params_free(&params);
+ }
+ if (name_hash.data) {
+ dnssec_binary_free(&name_hash);
+ }
+ return ret;
+}
+
+/**
+ * Checks whether NSEC3 RR has the opt-out bit set.
+ * @param flags Flags to be set according to check outcome.
+ * @param nsec3 NSEC3 RR.
+ * @param name Name to be checked.
+ * @return 0 or error code.
+ */
+static bool has_optout(const knot_rrset_t *nsec3)
+{
+ if (!nsec3) {
+ return false;
+ }
+
+ uint8_t nsec3_flags = knot_nsec3_flags(nsec3->rrs.rdata);
+ if (nsec3_flags & ~OPT_OUT_BIT) {
+ /* RFC5155 3.1.2 */
+ return false;
+ }
+
+ return nsec3_flags & OPT_OUT_BIT;
+}
+
+/**
+ * Checks whether NSEC3 RR matches the supplied name.
+ * @param flags Flags to be set according to check outcome.
+ * @param nsec3 NSEC3 RR.
+ * @param name Name to be checked.
+ * @return 0 if matching, >0 if not (abs(ENOENT)), or error code (<0).
+ */
+static int matches_name(const knot_rrset_t *nsec3, const knot_dname_t *name)
+{
+ assert(nsec3 && name);
+
+ uint8_t hash_data[MAX_HASH_BYTES] = { 0, };
+ dnssec_binary_t owner_hash = { 0, hash_data };
+ dnssec_nsec3_params_t params = { 0, };
+ dnssec_binary_t name_hash = { 0, };
+
+ int ret = read_owner_hash(&owner_hash, MAX_HASH_BYTES, nsec3);
+ if (ret != 0) {
+ goto fail;
+ }
+
+ ret = nsec3_parameters(&params, nsec3);
+ if (ret != 0) {
+ goto fail;
+ }
+
+ ret = hash_name(&name_hash, &params, name);
+ if (ret != 0) {
+ goto fail;
+ }
+
+ if ((owner_hash.size == name_hash.size) &&
+ (memcmp(owner_hash.data, name_hash.data, owner_hash.size) == 0)) {
+ ret = kr_ok();
+ } else {
+ ret = abs(ENOENT);
+ }
+
+fail:
+ if (params.salt.data) {
+ dnssec_nsec3_params_free(&params);
+ }
+ if (name_hash.data) {
+ dnssec_binary_free(&name_hash);
+ }
+ return ret;
+}
+#undef MAX_HASH_BYTES
+
+/**
+ * Prepends an asterisk label to given name.
+ *
+ * @param tgt Target buffer to write domain name into.
+ * @param name Name to be added to the asterisk.
+ * @return Size of the resulting name or error code.
+ */
+static int prepend_asterisk(uint8_t *tgt, size_t maxlen, const knot_dname_t *name)
+{
+ assert(maxlen >= 3);
+ memcpy(tgt, "\1*", 3);
+ return knot_dname_to_wire(tgt + 2, name, maxlen - 2);
+}
+
+/**
+ * Closest encloser proof (RFC5155 7.2.1).
+ * @note No RRSIGs are validated.
+ * @param pkt Packet structure to be processed.
+ * @param section_id Packet section to be processed.
+ * @param sname Name to be checked.
+ * @param encloser_name Returned matching encloser name, if found.
+ * @param matching_encloser_nsec3 Pointer to matching encloser NSEC RRSet.
+ * @param covering_next_nsec3 Pointer to covering next closer NSEC3 RRSet.
+ * @return 0 or error code.
+ */
+static int closest_encloser_proof(const knot_pkt_t *pkt,
+ knot_section_t section_id,
+ const knot_dname_t *sname,
+ const knot_dname_t **encloser_name,
+ const knot_rrset_t **matching_encloser_nsec3,
+ const knot_rrset_t **covering_next_nsec3)
+{
+ const knot_pktsection_t *sec = knot_pkt_section(pkt, section_id);
+ if (!sec || !sname) {
+ return kr_error(EINVAL);
+ }
+
+ const knot_rrset_t *matching = NULL;
+ const knot_rrset_t *covering = NULL;
+
+ int flags = 0;
+ const knot_dname_t *next_closer = NULL;
+ for (unsigned i = 0; i < sec->count; ++i) {
+ const knot_rrset_t *rrset = knot_pkt_rr(sec, i);
+ if (rrset->type != KNOT_RRTYPE_NSEC3) {
+ continue;
+ }
+ /* Also skip the NSEC3-to-match an ancestor of sname if it's
+ * a parent-side delegation, as that would mean the owner
+ * does not really exist (authoritatively in this zone,
+ * even in case of opt-out).
+ */
+ const uint8_t *bm = knot_nsec3_bitmap(rrset->rrs.rdata);
+ uint16_t bm_size = knot_nsec3_bitmap_len(rrset->rrs.rdata);
+ if (kr_nsec_children_in_zone_check(bm, bm_size) != 0) {
+ continue; /* no fatal errors from bad RRs */
+ }
+ /* Match the NSEC3 to sname or one of its ancestors. */
+ unsigned skipped = 0;
+ flags = 0;
+ int ret = closest_encloser_match(&flags, rrset, sname, &skipped);
+ if (ret != 0) {
+ return ret;
+ }
+ if (!(flags & FLG_CLOSEST_PROVABLE_ENCLOSER)) {
+ continue;
+ }
+ matching = rrset;
+ /* Construct the next closer name and try to cover it. */
+ --skipped;
+ next_closer = sname;
+ for (unsigned j = 0; j < skipped; ++j) {
+ assert(next_closer[0]);
+ next_closer = knot_wire_next_label(next_closer, NULL);
+ }
+ for (unsigned j = 0; j < sec->count; ++j) {
+ const knot_rrset_t *rrset_j = knot_pkt_rr(sec, j);
+ if (rrset_j->type != KNOT_RRTYPE_NSEC3) {
+ continue;
+ }
+ ret = covers_name(&flags, rrset_j, next_closer);
+ if (ret != 0) {
+ return ret;
+ }
+ if (flags & FLG_NAME_COVERED) {
+ covering = rrset_j;
+ break;
+ }
+ }
+ if (flags & FLG_NAME_COVERED) {
+ break;
+ }
+ flags = 0; //
+ }
+
+ if ((flags & FLG_CLOSEST_PROVABLE_ENCLOSER) && (flags & FLG_NAME_COVERED) && next_closer) {
+ if (encloser_name && next_closer[0]) {
+ *encloser_name = knot_wire_next_label(next_closer, NULL);
+ }
+ if (matching_encloser_nsec3) {
+ *matching_encloser_nsec3 = matching;
+ }
+ if (covering_next_nsec3) {
+ *covering_next_nsec3 = covering;
+ }
+ return kr_ok();
+ }
+
+ return kr_error(ENOENT);
+}
+
+/**
+ * Check whether any NSEC3 RR covers a wildcard RR at the closer encloser.
+ * @param pkt Packet structure to be processed.
+ * @param section_id Packet section to be processed.
+ * @param encloser Closest (provable) encloser domain name.
+ * @return 0 or error code:
+ * KNOT_ERANGE - NSEC3 RR (that covers a wildcard)
+ * has been found, but has opt-out flag set;
+ * otherwise - error.
+ */
+static int covers_closest_encloser_wildcard(const knot_pkt_t *pkt, knot_section_t section_id,
+ const knot_dname_t *encloser)
+{
+ const knot_pktsection_t *sec = knot_pkt_section(pkt, section_id);
+ if (!sec || !encloser) {
+ return kr_error(EINVAL);
+ }
+
+ uint8_t wildcard[KNOT_DNAME_MAXLEN];
+ wildcard[0] = 1;
+ wildcard[1] = '*';
+ int encloser_len = knot_dname_size(encloser);
+ if (encloser_len < 0) {
+ return encloser_len;
+ }
+ memcpy(wildcard + 2, encloser, encloser_len);
+
+ int flags = 0;
+ for (unsigned i = 0; i < sec->count; ++i) {
+ const knot_rrset_t *rrset = knot_pkt_rr(sec, i);
+ if (rrset->type != KNOT_RRTYPE_NSEC3) {
+ continue;
+ }
+ int ret = covers_name(&flags, rrset, wildcard);
+ if (ret != 0) {
+ return ret;
+ }
+ if (flags & FLG_NAME_COVERED) {
+ return has_optout(rrset) ?
+ kr_error(KNOT_ERANGE) : kr_ok();
+ }
+ }
+
+ return kr_error(ENOENT);
+}
+
+int kr_nsec3_name_error_response_check(const knot_pkt_t *pkt, knot_section_t section_id,
+ const knot_dname_t *sname)
+{
+ const knot_dname_t *encloser = NULL;
+ const knot_rrset_t *covering_next_nsec3 = NULL;
+ int ret = closest_encloser_proof(pkt, section_id, sname,
+ &encloser, NULL, &covering_next_nsec3);
+ if (ret != 0) {
+ return ret;
+ }
+ ret = covers_closest_encloser_wildcard(pkt, section_id, encloser);
+ if (ret != 0) {
+ /* OK, but NSEC3 for wildcard at encloser has opt-out;
+ * or error */
+ return ret;
+ }
+ /* Closest encloser proof is OK and
+ * NSEC3 for wildcard has been found and optout flag is not set.
+ * Now check if NSEC3 that covers next closer name has opt-out. */
+ return has_optout(covering_next_nsec3) ?
+ kr_error(KNOT_ERANGE) : kr_ok();
+}
+
+/**
+ * Search the packet section for a matching NSEC3 with nodata-proving bitmap.
+ * @param pkt Packet structure to be processed.
+ * @param section_id Packet section to be processed.
+ * @param sname Name to be checked.
+ * @param stype Type to be checked.
+ * @return 0 or error code.
+ * @note This does NOT check the opt-out case if type is DS;
+ * see RFC 5155 8.6.
+ */
+static int nodata_find(const knot_pkt_t *pkt, knot_section_t section_id,
+ const knot_dname_t *name, const uint16_t type)
+{
+ const knot_pktsection_t *sec = knot_pkt_section(pkt, section_id);
+ if (!sec || !name) {
+ return kr_error(EINVAL);
+ }
+
+ for (unsigned i = 0; i < sec->count; ++i) {
+ const knot_rrset_t *nsec3 = knot_pkt_rr(sec, i);
+ /* Records causing any errors are simply skipped. */
+ if (nsec3->type != KNOT_RRTYPE_NSEC3
+ || matches_name(nsec3, name) != kr_ok()) {
+ continue;
+ /* LATER(optim.): we repeatedly recompute the hash of `name` */
+ }
+
+ const uint8_t *bm = knot_nsec3_bitmap(nsec3->rrs.rdata);
+ uint16_t bm_size = knot_nsec3_bitmap_len(nsec3->rrs.rdata);
+ if (kr_nsec_bitmap_nodata_check(bm, bm_size, type, nsec3->owner) == kr_ok()) {
+ return kr_ok();
+ }
+ }
+
+ return kr_error(ENOENT);
+}
+
+/**
+ * Check whether NSEC3 RR matches a wildcard at the closest encloser and has given type bit missing.
+ * @param pkt Packet structure to be processed.
+ * @param section_id Packet section to be processed.
+ * @param encloser Closest (provable) encloser domain name.
+ * @param stype Type to be checked.
+ * @return 0 or error code.
+ */
+static int matches_closest_encloser_wildcard(const knot_pkt_t *pkt, knot_section_t section_id,
+ const knot_dname_t *encloser, uint16_t stype)
+{
+ const knot_pktsection_t *sec = knot_pkt_section(pkt, section_id);
+ if (!sec || !encloser) {
+ return kr_error(EINVAL);
+ }
+
+ uint8_t wildcard[KNOT_DNAME_MAXLEN]; /**< the source of synthesis */
+ int ret = prepend_asterisk(wildcard, sizeof(wildcard), encloser);
+ if (ret < 0) {
+ return ret;
+ }
+ assert(ret >= 3);
+ return nodata_find(pkt, section_id, wildcard, stype);
+}
+
+int kr_nsec3_wildcard_answer_response_check(const knot_pkt_t *pkt, knot_section_t section_id,
+ const knot_dname_t *sname, int trim_to_next)
+{
+ const knot_pktsection_t *sec = knot_pkt_section(pkt, section_id);
+ if (!sec || !sname) {
+ return kr_error(EINVAL);
+ }
+
+ /* Compute the next closer name. */
+ for (int i = 0; i < trim_to_next; ++i) {
+ assert(sname[0]);
+ sname = knot_wire_next_label(sname, NULL);
+ }
+
+ int flags = 0;
+ for (unsigned i = 0; i < sec->count; ++i) {
+ const knot_rrset_t *rrset = knot_pkt_rr(sec, i);
+ if (rrset->type != KNOT_RRTYPE_NSEC3) {
+ continue;
+ }
+ int ret = covers_name(&flags, rrset, sname);
+ if (ret != 0) {
+ return ret;
+ }
+ if (flags & FLG_NAME_COVERED) {
+ return has_optout(rrset) ?
+ kr_error(KNOT_ERANGE) : kr_ok();
+ }
+ }
+
+ return kr_error(ENOENT);
+}
+
+
+int kr_nsec3_no_data(const knot_pkt_t *pkt, knot_section_t section_id,
+ const knot_dname_t *sname, uint16_t stype)
+{
+ /* DS record may be also matched by an existing NSEC3 RR. */
+ int ret = nodata_find(pkt, section_id, sname, stype);
+ if (ret == 0) {
+ /* Satisfies RFC5155 8.5 and 8.6, both first paragraph. */
+ return ret;
+ }
+
+ /* Find closest provable encloser. */
+ const knot_dname_t *encloser_name = NULL;
+ const knot_rrset_t *covering_next_nsec3 = NULL;
+ ret = closest_encloser_proof(pkt, section_id, sname, &encloser_name,
+ NULL, &covering_next_nsec3);
+ if (ret != 0) {
+ return ret;
+ }
+
+ assert(encloser_name && covering_next_nsec3);
+ ret = matches_closest_encloser_wildcard(pkt, section_id,
+ encloser_name, stype);
+ if (ret == 0) {
+ /* Satisfies RFC5155 8.7 */
+ if (has_optout(covering_next_nsec3)) {
+ /* Opt-out is detected.
+ * Despite the fact that all records
+ * in the packet can be properly signed,
+ * AD bit must not be set due to rfc5155 9.2.
+ * Return appropriate code to the caller */
+ ret = kr_error(KNOT_ERANGE);
+ }
+ return ret;
+ }
+
+ if (!has_optout(covering_next_nsec3)) {
+ /* Bogus */
+ ret = kr_error(ENOENT);
+ } else {
+ /*
+ * Satisfies RFC5155 8.6 (QTYPE == DS), 2nd paragraph.
+ * Also satisfies ERRATA 3441 8.5 (QTYPE != DS), 3rd paragraph.
+ * - (wildcard) empty nonterminal
+ * derived from unsecure delegation.
+ * Denial of existence can not be proven.
+ * Set error code to proceed unsecure.
+ */
+ ret = kr_error(KNOT_ERANGE);
+ }
+
+ return ret;
+}
+
+int kr_nsec3_ref_to_unsigned(const knot_pkt_t *pkt)
+{
+ const knot_pktsection_t *sec = knot_pkt_section(pkt, KNOT_AUTHORITY);
+ if (!sec) {
+ return kr_error(EINVAL);
+ }
+ for (unsigned i = 0; i < sec->count; ++i) {
+ const knot_rrset_t *ns = knot_pkt_rr(sec, i);
+ if (ns->type == KNOT_RRTYPE_DS) {
+ return kr_error(EEXIST);
+ }
+ if (ns->type != KNOT_RRTYPE_NS) {
+ continue;
+ }
+
+ int flags = 0;
+ bool nsec3_found = false;
+ for (unsigned j = 0; j < sec->count; ++j) {
+ const knot_rrset_t *nsec3 = knot_pkt_rr(sec, j);
+ if (nsec3->type == KNOT_RRTYPE_DS) {
+ return kr_error(EEXIST);
+ }
+ if (nsec3->type != KNOT_RRTYPE_NSEC3) {
+ continue;
+ }
+ nsec3_found = true;
+ /* nsec3 found, check if owner name matches the delegation name.
+ * Just skip in case of *any* errors. */
+ if (matches_name(nsec3, ns->owner) != kr_ok()) {
+ continue;
+ }
+
+ const uint8_t *bm = knot_nsec3_bitmap(nsec3->rrs.rdata);
+ uint16_t bm_size = knot_nsec3_bitmap_len(nsec3->rrs.rdata);
+ if (!bm) {
+ return kr_error(EINVAL);
+ }
+ if (dnssec_nsec_bitmap_contains(bm, bm_size,
+ KNOT_RRTYPE_NS) &&
+ !dnssec_nsec_bitmap_contains(bm, bm_size,
+ KNOT_RRTYPE_DS) &&
+ !dnssec_nsec_bitmap_contains(bm, bm_size,
+ KNOT_RRTYPE_SOA)) {
+ /* Satisfies rfc5155, 8.9. paragraph 2 */
+ return kr_ok();
+ }
+ }
+ if (!nsec3_found) {
+ return kr_error(DNSSEC_NOT_FOUND);
+ }
+ if (flags & FLG_NAME_MATCHED) {
+ /* nsec3 which owner matches
+ * the delegation name was found,
+ * but nsec3 type bitmap contains wrong types
+ */
+ return kr_error(EINVAL);
+ }
+ /* nsec3 that matches the delegation was not found.
+ * Check rfc5155, 8.9. paragraph 4.
+ * Find closest provable encloser.
+ */
+ const knot_dname_t *encloser_name = NULL;
+ const knot_rrset_t *covering_next_nsec3 = NULL;
+ int ret = closest_encloser_proof(pkt, KNOT_AUTHORITY, ns->owner,
+ &encloser_name, NULL, &covering_next_nsec3);
+ if (ret != 0) {
+ return kr_error(EINVAL);
+ }
+
+ if (has_optout(covering_next_nsec3)) {
+ return kr_error(KNOT_ERANGE);
+ } else {
+ return kr_error(EINVAL);
+ }
+ }
+ return kr_error(EINVAL);
+}
+
+int kr_nsec3_matches_name_and_type(const knot_rrset_t *nsec3,
+ const knot_dname_t *name, uint16_t type)
+{
+ /* It's not secure enough to just check a single bit for (some) other types,
+ * but we don't (currently) only use this API for NS. See RFC 6840 sec. 4.
+ */
+ if (type != KNOT_RRTYPE_NS) {
+ assert(!EINVAL);
+ return kr_error(EINVAL);
+ }
+ int ret = matches_name(nsec3, name);
+ if (ret) {
+ return kr_error(ret);
+ }
+ const uint8_t *bm = knot_nsec3_bitmap(nsec3->rrs.rdata);
+ uint16_t bm_size = knot_nsec3_bitmap_len(nsec3->rrs.rdata);
+ if (!bm) {
+ return kr_error(EINVAL);
+ }
+ if (dnssec_nsec_bitmap_contains(bm, bm_size, type)) {
+ return kr_ok();
+ } else {
+ return kr_error(ENOENT);
+ }
+}
diff --git a/lib/dnssec/nsec3.h b/lib/dnssec/nsec3.h
new file mode 100644
index 0000000..527ccce
--- /dev/null
+++ b/lib/dnssec/nsec3.h
@@ -0,0 +1,82 @@
+/* Copyright (C) 2015-2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <libknot/packet/pkt.h>
+
+/**
+ * Name error response check (RFC5155 7.2.2).
+ * @note No RRSIGs are validated.
+ * @param pkt Packet structure to be processed.
+ * @param section_id Packet section to be processed.
+ * @param sname Name to be checked.
+ * @return 0 or error code.
+ */
+int kr_nsec3_name_error_response_check(const knot_pkt_t *pkt, knot_section_t section_id,
+ const knot_dname_t *sname);
+
+/**
+ * Wildcard answer response check (RFC5155 7.2.6).
+ * @param pkt Packet structure to be processed.
+ * @param section_id Packet section to be processed.
+ * @param sname Name to be checked.
+ * @param trim_to_next Number of labels to remove to obtain next closer name.
+ * @return 0 or error code:
+ * KNOT_ERANGE - NSEC3 RR that covers a wildcard
+ * has been found, but has opt-out flag set;
+ * otherwise - error.
+ */
+int kr_nsec3_wildcard_answer_response_check(const knot_pkt_t *pkt, knot_section_t section_id,
+ const knot_dname_t *sname, int trim_to_next);
+
+/**
+ * Authenticated denial of existence according to RFC5155 8.5 and 8.7.
+ * @note No RRSIGs are validated.
+ * @param pkt Packet structure to be processed.
+ * @param section_id Packet section to be processed.
+ * @param sname Queried domain name.
+ * @param stype Queried type.
+ * @return 0 or error code:
+ * DNSSEC_NOT_FOUND - neither ds nor nsec records
+ * were not found.
+ * KNOT_ERANGE - denial of existence can't be proven
+ * due to opt-out, otherwise - bogus.
+ */
+int kr_nsec3_no_data(const knot_pkt_t *pkt, knot_section_t section_id,
+ const knot_dname_t *sname, uint16_t stype);
+
+/**
+ * Referral to unsigned subzone check (RFC5155 8.9).
+ * @note No RRSIGs are validated.
+ * @param pkt Packet structure to be processed.
+ * @return 0 or error code:
+ * KNOT_ERANGE - denial of existence can't be proven
+ * due to opt-out.
+ * EEXIST - ds record was found.
+ * EINVAL - bogus.
+ */
+int kr_nsec3_ref_to_unsigned(const knot_pkt_t *pkt);
+
+/**
+ * Checks whether supplied NSEC3 RR matches the supplied name and NS type.
+ * @param nsec3 NSEC3 RR.
+ * @param name Name to be checked.
+ * @param type Type to be checked. Only use with NS! TODO
+ * @return 0 or error code.
+ */
+int kr_nsec3_matches_name_and_type(const knot_rrset_t *nsec3,
+ const knot_dname_t *name, uint16_t type);
diff --git a/lib/dnssec/signature.c b/lib/dnssec/signature.c
new file mode 100644
index 0000000..356bf14
--- /dev/null
+++ b/lib/dnssec/signature.c
@@ -0,0 +1,307 @@
+/* Copyright (C) 2015-2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include <arpa/inet.h>
+#include <assert.h>
+#include <string.h>
+
+#include <libdnssec/error.h>
+#include <libdnssec/key.h>
+#include <libdnssec/sign.h>
+#include <libknot/descriptor.h>
+#include <libknot/packet/rrset-wire.h>
+#include <libknot/packet/wire.h>
+#include <libknot/rrset.h>
+#include <libknot/rrtype/rrsig.h>
+#include <libknot/rrtype/ds.h>
+
+#include "lib/defines.h"
+#include "lib/utils.h"
+#include "lib/dnssec/signature.h"
+
+#include "contrib/wire.h"
+
+static int authenticate_ds(const dnssec_key_t *key, dnssec_binary_t *ds_rdata, uint8_t digest_type)
+{
+ /* Compute DS RDATA from the DNSKEY. */
+ dnssec_binary_t computed_ds = { 0, };
+ int ret = dnssec_key_create_ds(key, digest_type, &computed_ds);
+ if (ret != DNSSEC_EOK) {
+ goto fail;
+ }
+
+ /* DS records contain algorithm, key tag and the digest.
+ * Therefore the comparison of the two DS is sufficient.
+ */
+ ret = (ds_rdata->size == computed_ds.size) &&
+ (memcmp(ds_rdata->data, computed_ds.data, ds_rdata->size) == 0);
+ ret = ret ? kr_ok() : kr_error(ENOENT);
+
+fail:
+ dnssec_binary_free(&computed_ds);
+ return kr_error(ret);
+}
+
+int kr_authenticate_referral(const knot_rrset_t *ref, const dnssec_key_t *key)
+{
+ assert(ref && key);
+ if (ref->type != KNOT_RRTYPE_DS) {
+ return kr_error(EINVAL);
+ }
+
+ /* Try all possible DS records */
+ int ret = 0;
+ knot_rdata_t *rd = ref->rrs.rdata;
+ for (uint16_t i = 0; i < ref->rrs.count; ++i) {
+ dnssec_binary_t ds_rdata = {
+ .size = rd->len,
+ .data = rd->data
+ };
+ ret = authenticate_ds(key, &ds_rdata, knot_ds_digest_type(rd));
+ if (ret == 0) { /* Found a good DS */
+ return kr_ok();
+ }
+ rd = knot_rdataset_next(rd);
+ }
+
+ return kr_error(ret);
+}
+
+/**
+ * Adjust TTL in wire format.
+ * @param wire RR Set in wire format.
+ * @param wire_size Size of the wire data portion.
+ * @param new_ttl TTL value to be set for all RRs.
+ * @return 0 or error code.
+ */
+static int adjust_wire_ttl(uint8_t *wire, size_t wire_size, uint32_t new_ttl)
+{
+ assert(wire);
+ assert(sizeof(uint16_t) == 2);
+ assert(sizeof(uint32_t) == 4);
+ uint16_t rdlen;
+
+ int ret;
+
+ new_ttl = htonl(new_ttl);
+
+ size_t i = 0;
+ /* RR wire format in RFC1035 3.2.1 */
+ while(i < wire_size) {
+ ret = knot_dname_size(wire + i);
+ if (ret < 0) {
+ return ret;
+ }
+ i += ret + 4;
+ memcpy(wire + i, &new_ttl, sizeof(uint32_t));
+ i += sizeof(uint32_t);
+
+ memcpy(&rdlen, wire + i, sizeof(uint16_t));
+ rdlen = ntohs(rdlen);
+ i += sizeof(uint16_t) + rdlen;
+
+ assert(i <= wire_size);
+ }
+
+ return kr_ok();
+}
+
+/*!
+ * \brief Add RRSIG RDATA without signature to signing context.
+ *
+ * Requires signer name in RDATA in canonical form.
+ *
+ * \param ctx Signing context.
+ * \param rdata Pointer to RRSIG RDATA.
+ *
+ * \return Error code, KNOT_EOK if successful.
+ */
+#define RRSIG_RDATA_SIGNER_OFFSET 18
+static int sign_ctx_add_self(dnssec_sign_ctx_t *ctx, const uint8_t *rdata)
+{
+ assert(ctx);
+ assert(rdata);
+
+ int result;
+
+ // static header
+
+ dnssec_binary_t header = {
+ .data = (uint8_t *)rdata,
+ .size = RRSIG_RDATA_SIGNER_OFFSET,
+ };
+
+ result = dnssec_sign_add(ctx, &header);
+ if (result != DNSSEC_EOK) {
+ return result;
+ }
+
+ // signer name
+
+ const uint8_t *rdata_signer = rdata + RRSIG_RDATA_SIGNER_OFFSET;
+ dnssec_binary_t signer = { 0 };
+ signer.data = knot_dname_copy(rdata_signer, NULL);
+ signer.size = knot_dname_size(signer.data);
+
+ result = dnssec_sign_add(ctx, &signer);
+ free(signer.data);
+
+ return result;
+}
+#undef RRSIG_RDATA_SIGNER_OFFSET
+
+/*!
+ * \brief Add covered RRs to signing context.
+ *
+ * Requires all DNAMEs in canonical form and all RRs ordered canonically.
+ *
+ * \param ctx Signing context.
+ * \param covered Covered RRs.
+ *
+ * \return Error code, KNOT_EOK if successful.
+ */
+static int sign_ctx_add_records(dnssec_sign_ctx_t *ctx, const knot_rrset_t *covered,
+ uint32_t orig_ttl, int trim_labels)
+{
+ if (!ctx || !covered || trim_labels < 0) {
+ return kr_error(EINVAL);
+ }
+
+ // huge block of rrsets can be optionally created
+ static uint8_t wire_buffer[KNOT_WIRE_MAX_PKTSIZE];
+ int written = knot_rrset_to_wire(covered, wire_buffer, sizeof(wire_buffer), NULL);
+ if (written < 0) {
+ return written;
+ }
+
+ /* Set original ttl. */
+ int ret = adjust_wire_ttl(wire_buffer, written, orig_ttl);
+ if (ret != 0) {
+ return ret;
+ }
+
+ if (!trim_labels) {
+ const dnssec_binary_t wire_binary = {
+ .size = written,
+ .data = wire_buffer
+ };
+ return dnssec_sign_add(ctx, &wire_binary);
+ }
+
+ /* RFC4035 5.3.2
+ * Remove leftmost labels and replace them with '*.'
+ * for each RR in covered.
+ */
+ uint8_t *beginp = wire_buffer;
+ for (uint16_t i = 0; i < covered->rrs.count; ++i) {
+ /* RR(i) = name | type | class | OrigTTL | RDATA length | RDATA */
+ for (int j = 0; j < trim_labels; ++j) {
+ assert(beginp[0]);
+ beginp = (uint8_t *) knot_wire_next_label(beginp, NULL);
+ assert(beginp != NULL);
+ }
+ *(--beginp) = '*';
+ *(--beginp) = 1;
+ const size_t rdatalen_offset = knot_dname_size(beginp) + /* name */
+ sizeof(uint16_t) + /* type */
+ sizeof(uint16_t) + /* class */
+ sizeof(uint32_t); /* OrigTTL */
+ const uint8_t *rdatalen_ptr = beginp + rdatalen_offset;
+ const uint16_t rdata_size = wire_read_u16(rdatalen_ptr);
+ const size_t rr_size = rdatalen_offset +
+ sizeof(uint16_t) + /* RDATA length */
+ rdata_size; /* RDATA */
+ const dnssec_binary_t wire_binary = {
+ .size = rr_size,
+ .data = beginp
+ };
+ ret = dnssec_sign_add(ctx, &wire_binary);
+ if (ret != 0) {
+ break;
+ }
+ beginp += rr_size;
+ }
+ return ret;
+}
+
+/*!
+ * \brief Add all data covered by signature into signing context.
+ *
+ * RFC 4034: The signature covers RRSIG RDATA field (excluding the signature)
+ * and all matching RR records, which are ordered canonically.
+ *
+ * Requires all DNAMEs in canonical form and all RRs ordered canonically.
+ *
+ * \param ctx Signing context.
+ * \param rrsig_rdata RRSIG RDATA with populated fields except signature.
+ * \param covered Covered RRs.
+ *
+ * \return Error code, KNOT_EOK if successful.
+ */
+/* TODO -- Taken from knot/src/knot/dnssec/rrset-sign.c. Re-write for better fit needed. */
+static int sign_ctx_add_data(dnssec_sign_ctx_t *ctx, const uint8_t *rrsig_rdata,
+ const knot_rrset_t *covered, uint32_t orig_ttl, int trim_labels)
+{
+ int result = sign_ctx_add_self(ctx, rrsig_rdata);
+ if (result != KNOT_EOK) {
+ return result;
+ }
+
+ return sign_ctx_add_records(ctx, covered, orig_ttl, trim_labels);
+}
+
+int kr_check_signature(const knot_rdata_t *rrsig,
+ const dnssec_key_t *key, const knot_rrset_t *covered,
+ int trim_labels)
+{
+ if (!rrsig || !key || !dnssec_key_can_verify(key)) {
+ return kr_error(EINVAL);
+ }
+
+ int ret = 0;
+ dnssec_sign_ctx_t *sign_ctx = NULL;
+ dnssec_binary_t signature = {
+ .data = /*const-cast*/(uint8_t*)knot_rrsig_signature(rrsig),
+ .size = knot_rrsig_signature_len(rrsig),
+ };
+ if (!signature.data || !signature.size) {
+ ret = kr_error(EINVAL);
+ goto fail;
+ }
+
+ if (dnssec_sign_new(&sign_ctx, key) != 0) {
+ ret = kr_error(ENOMEM);
+ goto fail;
+ }
+
+ uint32_t orig_ttl = knot_rrsig_original_ttl(rrsig);
+
+ if (sign_ctx_add_data(sign_ctx, rrsig->data, covered, orig_ttl, trim_labels) != 0) {
+ ret = kr_error(ENOMEM);
+ goto fail;
+ }
+
+ if (dnssec_sign_verify(sign_ctx, &signature) != 0) {
+ ret = kr_error(EBADMSG);
+ goto fail;
+ }
+
+ ret = kr_ok();
+
+fail:
+ dnssec_sign_free(sign_ctx);
+ return ret;
+}
diff --git a/lib/dnssec/signature.h b/lib/dnssec/signature.h
new file mode 100644
index 0000000..b756483
--- /dev/null
+++ b/lib/dnssec/signature.h
@@ -0,0 +1,42 @@
+/* Copyright (C) 2015-2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <libdnssec/key.h>
+#include <libknot/rrset.h>
+
+/**
+ * Performs referral authentication according to RFC4035 5.2, bullet 2
+ * @param ref Referral RRSet. Currently only DS can be used.
+ * @param key Already parsed key.
+ * @return 0 or error code. In particular: DNSSEC_INVALID_DS_ALGORITHM
+ * in case *all* DSs in ref use an unimplemented algorithm.
+ */
+int kr_authenticate_referral(const knot_rrset_t *ref, const dnssec_key_t *key);
+
+/**
+ * Check the signature of the supplied RRSet.
+ * @param rrsigs RRSet containing signatures.
+ * @param pos Index of the signature record in the signature RRSet.
+ * @param key Key to be used to validate the signature.
+ * @param covered The covered RRSet.
+ * @param trim_labels Number of the leftmost labels to be removed and replaced with '*.'.
+ * @return 0 if signature valid, error code else.
+ */
+int kr_check_signature(const knot_rdata_t *rrsig,
+ const dnssec_key_t *key, const knot_rrset_t *covered,
+ int trim_labels);
diff --git a/lib/dnssec/ta.c b/lib/dnssec/ta.c
new file mode 100644
index 0000000..12629c9
--- /dev/null
+++ b/lib/dnssec/ta.c
@@ -0,0 +1,185 @@
+/* Copyright (C) 2014-2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include <contrib/cleanup.h>
+#include <libknot/descriptor.h>
+#include <libknot/rdataset.h>
+#include <libknot/rrset.h>
+#include <libknot/packet/wire.h>
+#include <libdnssec/key.h>
+#include <libdnssec/error.h>
+
+#include "lib/defines.h"
+#include "lib/dnssec.h"
+#include "lib/dnssec/ta.h"
+#include "lib/resolve.h"
+#include "lib/utils.h"
+
+knot_rrset_t *kr_ta_get(map_t *trust_anchors, const knot_dname_t *name)
+{
+ return map_get(trust_anchors, (const char *)name);
+}
+
+const knot_dname_t *kr_ta_get_longest_name(map_t *trust_anchors, const knot_dname_t *name)
+{
+ while(name) {
+ if (kr_ta_get(trust_anchors, name)) {
+ return name;
+ }
+ if (name[0] == '\0') {
+ break;
+ }
+ name = knot_wire_next_label(name, NULL);
+ }
+ return NULL;
+}
+
+/* @internal Create DS from DNSKEY, caller MUST free dst if successful. */
+static int dnskey2ds(dnssec_binary_t *dst, const knot_dname_t *owner, const uint8_t *rdata, uint16_t rdlen)
+{
+ dnssec_key_t *key = NULL;
+ int ret = dnssec_key_new(&key);
+ if (ret) goto cleanup;
+ /* Create DS from DNSKEY and reinsert */
+ const dnssec_binary_t key_data = { .size = rdlen, .data = (uint8_t *)rdata };
+ ret = dnssec_key_set_rdata(key, &key_data);
+ if (ret) goto cleanup;
+ /* Accept only keys with Zone and SEP flags that aren't revoked,
+ * as a precaution. RFC 5011 also utilizes these flags.
+ * TODO: kr_dnssec_key_* names are confusing. */
+ const bool flags_ok = kr_dnssec_key_zsk(rdata) && !kr_dnssec_key_revoked(rdata);
+ if (!flags_ok) {
+ auto_free char *owner_str = kr_dname_text(owner);
+ kr_log_error("[ ta ] refusing to trust %s DNSKEY because of flags %d\n",
+ owner_str, dnssec_key_get_flags(key));
+ ret = kr_error(EILSEQ);
+ goto cleanup;
+ } else if (!kr_dnssec_key_ksk(rdata)) {
+ auto_free char *owner_str = kr_dname_text(owner);
+ int flags = dnssec_key_get_flags(key);
+ kr_log_info("[ ta ] warning: %s DNSKEY is missing the SEP bit; "
+ "flags %d instead of %d\n",
+ owner_str, flags, flags + 1/*a little ugly*/);
+ }
+ ret = dnssec_key_set_dname(key, owner);
+ if (ret) goto cleanup;
+ ret = dnssec_key_create_ds(key, DNSSEC_KEY_DIGEST_SHA256, dst);
+cleanup:
+ dnssec_key_free(key);
+ return kr_error(ret);
+}
+
+/* @internal Insert new TA to trust anchor set, rdata MUST be of DS type. */
+static int insert_ta(map_t *trust_anchors, const knot_dname_t *name,
+ uint32_t ttl, const uint8_t *rdata, uint16_t rdlen)
+{
+ bool is_new_key = false;
+ knot_rrset_t *ta_rr = kr_ta_get(trust_anchors, name);
+ if (!ta_rr) {
+ ta_rr = knot_rrset_new(name, KNOT_RRTYPE_DS, KNOT_CLASS_IN, ttl, NULL);
+ is_new_key = true;
+ }
+ /* Merge-in new key data */
+ if (!ta_rr || (rdlen > 0 && knot_rrset_add_rdata(ta_rr, rdata, rdlen, NULL) != 0)) {
+ knot_rrset_free(ta_rr, NULL);
+ return kr_error(ENOMEM);
+ }
+ if (is_new_key) {
+ return map_set(trust_anchors, (const char *)name, ta_rr);
+ }
+ return kr_ok();
+}
+
+int kr_ta_add(map_t *trust_anchors, const knot_dname_t *name, uint16_t type,
+ uint32_t ttl, const uint8_t *rdata, uint16_t rdlen)
+{
+ if (!trust_anchors || !name) {
+ return kr_error(EINVAL);
+ }
+
+ /* DS/DNSEY types are accepted, for DNSKEY we
+ * need to compute a DS digest. */
+ if (type == KNOT_RRTYPE_DS) {
+ return insert_ta(trust_anchors, name, ttl, rdata, rdlen);
+ } else if (type == KNOT_RRTYPE_DNSKEY) {
+ dnssec_binary_t ds_rdata = { 0, };
+ int ret = dnskey2ds(&ds_rdata, name, rdata, rdlen);
+ if (ret != 0) {
+ return ret;
+ }
+ ret = insert_ta(trust_anchors, name, ttl, ds_rdata.data, ds_rdata.size);
+ dnssec_binary_free(&ds_rdata);
+ return ret;
+ } else { /* Invalid type for TA */
+ return kr_error(EINVAL);
+ }
+}
+
+int kr_ta_covers(map_t *trust_anchors, const knot_dname_t *name)
+{
+ while(name) {
+ if (kr_ta_get(trust_anchors, name)) {
+ return true;
+ }
+ if (name[0] == '\0') {
+ return false;
+ }
+ name = knot_wire_next_label(name, NULL);
+ }
+ return false;
+}
+
+bool kr_ta_covers_qry(struct kr_context *ctx, const knot_dname_t *name,
+ const uint16_t type)
+{
+ assert(ctx && name);
+ if (type == KNOT_RRTYPE_DS && name[0] != '\0') {
+ /* DS is parent-side record, so the parent name needs to be covered. */
+ name = knot_wire_next_label(name, NULL);
+ if (!name) {
+ assert(false);
+ return false;
+ }
+ }
+ return kr_ta_covers(&ctx->trust_anchors, name)
+ && !kr_ta_covers(&ctx->negative_anchors, name);
+}
+
+/* Delete record data */
+static int del_record(const char *k, void *v, void *ext)
+{
+ knot_rrset_t *ta_rr = v;
+ if (ta_rr) {
+ knot_rrset_free(ta_rr, NULL);
+ }
+ return 0;
+}
+
+int kr_ta_del(map_t *trust_anchors, const knot_dname_t *name)
+{
+ knot_rrset_t *ta_rr = kr_ta_get(trust_anchors, name);
+ if (ta_rr) {
+ del_record(NULL, ta_rr, NULL);
+ map_del(trust_anchors, (const char *)name);
+ }
+ return kr_ok();
+}
+
+void kr_ta_clear(map_t *trust_anchors)
+{
+ map_walk(trust_anchors, del_record, NULL);
+ map_clear(trust_anchors);
+}
diff --git a/lib/dnssec/ta.h b/lib/dnssec/ta.h
new file mode 100644
index 0000000..dcc55e7
--- /dev/null
+++ b/lib/dnssec/ta.h
@@ -0,0 +1,87 @@
+/* Copyright (C) 2015-2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "lib/generic/map.h"
+#include <libknot/rrset.h>
+
+/**
+ * Find TA RRSet by name.
+ * @param trust_anchors trust store
+ * @param name name of the TA
+ * @return non-empty RRSet or NULL
+ */
+KR_EXPORT
+knot_rrset_t *kr_ta_get(map_t *trust_anchors, const knot_dname_t *name);
+
+/**
+ * Add TA to trust store. DS or DNSKEY types are supported.
+ * @param trust_anchors trust store
+ * @param name name of the TA
+ * @param type RR type of the TA (DS or DNSKEY)
+ * @param ttl
+ * @param rdata
+ * @param rdlen
+ * @return 0 or an error
+ */
+KR_EXPORT
+int kr_ta_add(map_t *trust_anchors, const knot_dname_t *name, uint16_t type,
+ uint32_t ttl, const uint8_t *rdata, uint16_t rdlen);
+
+/**
+ * Return true if the name is below/at any TA in the store.
+ * This can be useful to check if it's possible to validate a name beforehand.
+ * @param trust_anchors trust store
+ * @param name name of the TA
+ * @return boolean
+ */
+KR_EXPORT KR_PURE
+int kr_ta_covers(map_t *trust_anchors, const knot_dname_t *name);
+
+struct kr_context;
+/**
+ * A wrapper around kr_ta_covers that is aware of negative TA and types.
+ */
+KR_EXPORT KR_PURE
+bool kr_ta_covers_qry(struct kr_context *ctx, const knot_dname_t *name,
+ const uint16_t type);
+
+/**
+ * Remove TA from trust store.
+ * @param trust_anchors trust store
+ * @param name name of the TA
+ * @return 0 or an error
+ */
+KR_EXPORT
+int kr_ta_del(map_t *trust_anchors, const knot_dname_t *name);
+
+/**
+ * Clear trust store.
+ * @param trust_anchors trust store
+ */
+KR_EXPORT
+void kr_ta_clear(map_t *trust_anchors);
+
+/**
+ * Return TA with the longest name that covers given name.
+ * @param trust_anchors trust store
+ * @param name name of the TA
+ * @return pointer to name or NULL.
+ if not NULL, points inside the name parameter.
+ */
+KR_EXPORT
+const knot_dname_t *kr_ta_get_longest_name(map_t *trust_anchors, const knot_dname_t *name);
diff --git a/lib/generic/README.rst b/lib/generic/README.rst
new file mode 100644
index 0000000..7adff86
--- /dev/null
+++ b/lib/generic/README.rst
@@ -0,0 +1,60 @@
+Generics library
+----------------
+
+This small collection of "generics" was born out of frustration that I couldn't find no
+such thing for C. It's either bloated, has poor interface, null-checking is absent or
+doesn't allow custom allocation scheme. BSD-licensed (or compatible) code is allowed here,
+as long as it comes with a test case in `tests/test_generics.c`.
+
+* array_ - a set of simple macros to make working with dynamic arrays easier.
+* queue_ - a FIFO + LIFO queue.
+* map_ - a `Crit-bit tree`_ key-value map implementation (public domain) that comes with tests.
+* set_ - set abstraction implemented on top of ``map`` (unused now).
+* pack_ - length-prefixed list of objects (i.e. array-list).
+* lru_ - LRU-like hash table
+* trie_ - a trie-based key-value map, taken from knot-dns
+
+array
+~~~~~
+
+.. doxygenfile:: array.h
+ :project: libkres
+
+queue
+~~~~~
+
+.. doxygenfile:: queue.h
+ :project: libkres
+
+map
+~~~
+
+.. doxygenfile:: map.h
+ :project: libkres
+
+set
+~~~
+
+.. doxygenfile:: set.h
+ :project: libkres
+
+pack
+~~~~
+
+.. doxygenfile:: pack.h
+ :project: libkres
+
+lru
+~~~
+
+.. doxygenfile:: lru.h
+ :project: libkres
+
+trie
+~~~~
+
+.. doxygenfile:: trie.h
+ :project: libkres
+
+
+.. _`Crit-bit tree`: https://cr.yp.to/critbit.html
diff --git a/lib/generic/array.h b/lib/generic/array.h
new file mode 100644
index 0000000..ece4dd1
--- /dev/null
+++ b/lib/generic/array.h
@@ -0,0 +1,166 @@
+/* Copyright (C) 2015-2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+/**
+ *
+ * @file array.h
+ * @brief A set of simple macros to make working with dynamic arrays easier.
+ *
+ * @note The C has no generics, so it is implemented mostly using macros.
+ * Be aware of that, as direct usage of the macros in the evaluating macros
+ * may lead to different expectations:
+ *
+ * @code{.c}
+ * MIN(array_push(arr, val), other)
+ * @endcode
+ *
+ * May evaluate the code twice, leading to unexpected behaviour.
+ * This is a price to pay for the absence of proper generics.
+ *
+ * # Example usage:
+ *
+ * @code{.c}
+ * array_t(const char*) arr;
+ * array_init(arr);
+ *
+ * // Reserve memory in advance
+ * if (array_reserve(arr, 2) < 0) {
+ * return ENOMEM;
+ * }
+ *
+ * // Already reserved, cannot fail
+ * array_push(arr, "princess");
+ * array_push(arr, "leia");
+ *
+ * // Not reserved, may fail
+ * if (array_push(arr, "han") < 0) {
+ * return ENOMEM;
+ * }
+ *
+ * // It does not hide what it really is
+ * for (size_t i = 0; i < arr.len; ++i) {
+ * printf("%s\n", arr.at[i]);
+ * }
+ *
+ * // Random delete
+ * array_del(arr, 0);
+ * @endcode
+ * \addtogroup generics
+ * @{
+ */
+
+#pragma once
+#include <stdlib.h>
+
+/** Simplified Qt containers growth strategy. */
+static inline size_t array_next_count(size_t want)
+{
+ if (want < 2048) {
+ return (want < 20) ? want + 4 : want * 2;
+ } else {
+ return want + 2048;
+ }
+}
+
+/** @internal Incremental memory reservation */
+static inline int array_std_reserve(void *baton, char **mem, size_t elm_size, size_t want, size_t *have)
+{
+ if (*have >= want) {
+ return 0;
+ }
+ /* Simplified Qt containers growth strategy */
+ size_t next_size = array_next_count(want);
+ void *mem_new = realloc(*mem, next_size * elm_size);
+ if (mem_new != NULL) {
+ *mem = mem_new;
+ *have = next_size;
+ return 0;
+ }
+ return -1;
+}
+
+/** @internal Wrapper for stdlib free. */
+static inline void array_std_free(void *baton, void *p)
+{
+ free(p);
+}
+
+/** Declare an array structure. */
+#define array_t(type) struct {type * at; size_t len; size_t cap; }
+
+/** Zero-initialize the array. */
+#define array_init(array) ((array).at = NULL, (array).len = (array).cap = 0)
+
+/** Free and zero-initialize the array (plain malloc/free). */
+#define array_clear(array) \
+ array_clear_mm(array, array_std_free, NULL)
+
+/** Make the array empty and free pointed-to memory.
+ * Mempool usage: pass mm_free and a knot_mm_t* . */
+#define array_clear_mm(array, free, baton) \
+ (free)((baton), (array).at), array_init(array)
+
+/** Reserve capacity for at least n elements.
+ * @return 0 if success, <0 on failure */
+#define array_reserve(array, n) \
+ array_reserve_mm(array, n, array_std_reserve, NULL)
+
+/** Reserve capacity for at least n elements.
+ * Mempool usage: pass kr_memreserve and a knot_mm_t* .
+ * @return 0 if success, <0 on failure */
+#define array_reserve_mm(array, n, reserve, baton) \
+ (reserve)((baton), (char **) &(array).at, sizeof((array).at[0]), (n), &(array).cap)
+
+/**
+ * Push value at the end of the array, resize it if necessary.
+ * Mempool usage: pass kr_memreserve and a knot_mm_t* .
+ * @note May fail if the capacity is not reserved.
+ * @return element index on success, <0 on failure
+ */
+#define array_push_mm(array, val, reserve, baton) \
+ (int)((array).len < (array).cap ? ((array).at[(array).len] = val, (array).len++) \
+ : (array_reserve_mm(array, ((array).cap + 1), reserve, baton) < 0 ? -1 \
+ : ((array).at[(array).len] = val, (array).len++)))
+
+/**
+ * Push value at the end of the array, resize it if necessary (plain malloc/free).
+ * @note May fail if the capacity is not reserved.
+ * @return element index on success, <0 on failure
+ */
+#define array_push(array, val) \
+ array_push_mm(array, val, array_std_reserve, NULL)
+
+/**
+ * Pop value from the end of the array.
+ */
+#define array_pop(array) \
+ (array).len -= 1
+
+/**
+ * Remove value at given index.
+ * @return 0 on success, <0 on failure
+ */
+#define array_del(array, i) \
+ (int)((i) < (array).len ? ((array).len -= 1,(array).at[i] = (array).at[(array).len], 0) : -1)
+
+/**
+ * Return last element of the array.
+ * @warning Undefined if the array is empty.
+ */
+#define array_tail(array) \
+ (array).at[(array).len - 1]
+
+/** @} */
diff --git a/lib/generic/lru.c b/lib/generic/lru.c
new file mode 100644
index 0000000..12bba4e
--- /dev/null
+++ b/lib/generic/lru.c
@@ -0,0 +1,236 @@
+/* Copyright (C) 2016-2018 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include "lib/generic/lru.h"
+#include "contrib/murmurhash3/murmurhash3.h"
+
+typedef struct lru_group lru_group_t;
+
+struct lru_item {
+ uint16_t key_len, val_len; /**< Two bytes should be enough for our purposes. */
+ char data[];
+ /**< Place for both key and value.
+ *
+ * We use "char" to satisfy the C99+ aliasing rules.
+ * See C99 section 6.5 Expressions, paragraph 7.
+ * Any type can be accessed through char-pointer,
+ * so we can use a common struct definition
+ * for all types being held.
+ */
+};
+
+/** @internal Compute offset of value in struct lru_item. */
+static uint val_offset(uint key_len)
+{
+ uint key_end = offsetof(struct lru_item, data) + key_len;
+ // align it to the closest multiple of four
+ return round_power(key_end, 2);
+}
+
+/** @internal Return pointer to value in an item. */
+static void * item_val(struct lru_item *it)
+{
+ return it->data + val_offset(it->key_len) - offsetof(struct lru_item, data);
+}
+
+/** @internal Compute the size of an item. ATM we don't align/pad the end of it. */
+static uint item_size(uint key_len, uint val_len)
+{
+ return val_offset(key_len) + val_len;
+}
+
+/** @internal Free each item. */
+KR_EXPORT void lru_free_items_impl(struct lru *lru)
+{
+ assert(lru);
+ for (size_t i = 0; i < (1 << (size_t)lru->log_groups); ++i) {
+ lru_group_t *g = &lru->groups[i];
+ for (int j = 0; j < LRU_ASSOC; ++j)
+ mm_free(lru->mm, g->items[j]);
+ }
+}
+
+/** @internal See lru_apply. */
+KR_EXPORT void lru_apply_impl(struct lru *lru, lru_apply_fun f, void *baton)
+{
+ bool ok = lru && f;
+ if (!ok) {
+ assert(false);
+ return;
+ }
+ for (size_t i = 0; i < (1 << (size_t)lru->log_groups); ++i) {
+ lru_group_t *g = &lru->groups[i];
+ for (uint j = 0; j < LRU_ASSOC; ++j) {
+ struct lru_item *it = g->items[j];
+ if (!it)
+ continue;
+ enum lru_apply_do ret =
+ f(it->data, it->key_len, item_val(it), baton);
+ switch(ret) {
+ case LRU_APPLY_DO_EVICT: // evict
+ mm_free(lru->mm, it);
+ g->items[j] = NULL;
+ g->counts[j] = 0;
+ g->hashes[j] = 0;
+ break;
+ default:
+ assert(ret == LRU_APPLY_DO_NOTHING);
+ }
+ }
+ }
+}
+
+/** @internal See lru_create. */
+KR_EXPORT struct lru * lru_create_impl(uint max_slots, knot_mm_t *mm_array, knot_mm_t *mm)
+{
+ assert(max_slots);
+ if (!max_slots)
+ return NULL;
+ // let lru->log_groups = ceil(log2(max_slots / (float) assoc))
+ // without trying for efficiency
+ uint group_count = (max_slots - 1) / LRU_ASSOC + 1;
+ uint log_groups = 0;
+ for (uint s = group_count - 1; s; s /= 2)
+ ++log_groups;
+ group_count = 1 << log_groups;
+ assert(max_slots <= group_count * LRU_ASSOC && group_count * LRU_ASSOC < 2 * max_slots);
+
+ size_t size = offsetof(struct lru, groups[group_count]);
+ struct lru *lru = mm_alloc(mm_array, size);
+ if (unlikely(lru == NULL))
+ return NULL;
+ *lru = (struct lru){
+ .mm = mm,
+ .mm_array = mm_array,
+ .log_groups = log_groups,
+ };
+ // zeros are a good init
+ memset(lru->groups, 0, size - offsetof(struct lru, groups));
+ return lru;
+}
+
+/** @internal Decrement all counters within a group. */
+static void group_dec_counts(lru_group_t *g) {
+ g->counts[LRU_TRACKED] = LRU_TRACKED;
+ for (uint i = 0; i < LRU_TRACKED + 1; ++i)
+ if (likely(g->counts[i]))
+ --g->counts[i];
+}
+
+/** @internal Increment a counter within a group. */
+static void group_inc_count(lru_group_t *g, int i) {
+ if (likely(++(g->counts[i])))
+ return;
+ g->counts[i] = -1;
+ // We could've decreased or halved all of them, but let's keep the max.
+}
+
+/** @internal Implementation of both getting and insertion.
+ * Note: val_len is only meaningful if do_insert.
+ * *is_new is only meaningful when return value isn't NULL, contains
+ * true when returned lru entry has been allocated right now
+ * if return value is NULL, *is_new remains untouched.
+ */
+KR_EXPORT void * lru_get_impl(struct lru *lru, const char *key, uint key_len,
+ uint val_len, bool do_insert, bool *is_new)
+{
+ bool ok = lru && (key || !key_len) && key_len <= UINT16_MAX
+ && (!do_insert || val_len <= UINT16_MAX);
+ if (!ok) {
+ assert(false);
+ return NULL; // reasonable fallback when not debugging
+ }
+ bool is_new_entry = false;
+ // find the right group
+ uint32_t khash = hash(key, key_len);
+ uint16_t khash_top = khash >> 16;
+ lru_group_t *g = &lru->groups[khash & ((1 << lru->log_groups) - 1)];
+ struct lru_item *it = NULL;
+ uint i;
+ // scan the *stored* elements in the group
+ for (i = 0; i < LRU_ASSOC; ++i) {
+ if (g->hashes[i] == khash_top) {
+ it = g->items[i];
+ if (likely(it && it->key_len == key_len
+ && (key_len == 0 || memcmp(it->data, key, key_len) == 0))) {
+ /* Found a key, but trying to insert a value larger than available
+ * space in the allocated slot, so the entry must be resized to fit. */
+ if (unlikely(do_insert && val_len > it->val_len)) {
+ goto insert;
+ } else {
+ goto found; // to reduce huge nesting depth
+ }
+ }
+ }
+ }
+ // key not found; first try an empty/counted-out place to insert
+ if (do_insert)
+ for (i = 0; i < LRU_ASSOC; ++i)
+ if (g->items[i] == NULL || g->counts[i] == 0)
+ goto insert;
+ // check if we track key's count at least
+ for (i = LRU_ASSOC; i < LRU_TRACKED; ++i) {
+ if (g->hashes[i] == khash_top) {
+ group_inc_count(g, i);
+ if (!do_insert)
+ return NULL;
+ // check if we trumped some stored key
+ for (uint j = 0; j < LRU_ASSOC; ++j)
+ if (unlikely(g->counts[i] > g->counts[j])) {
+ // evict key j, i.e. swap with i
+ --g->counts[i]; // we increment it below
+ SWAP(g->counts[i], g->counts[j]);
+ SWAP(g->hashes[i], g->hashes[j]);
+ i = j;
+ goto insert;
+ }
+ return NULL;
+ }
+ }
+ // not found at all: decrement all counts but only on every LRU_TRACKED occasion
+ if (g->counts[LRU_TRACKED])
+ --g->counts[LRU_TRACKED];
+ else
+ group_dec_counts(g);
+ return NULL;
+insert: // insert into position i (incl. key)
+ assert(i < LRU_ASSOC);
+ g->hashes[i] = khash_top;
+ it = g->items[i];
+ uint new_size = item_size(key_len, val_len);
+ if (it == NULL || new_size != item_size(it->key_len, it->val_len)) {
+ // (re)allocate
+ mm_free(lru->mm, it);
+ it = g->items[i] = mm_alloc(lru->mm, new_size);
+ if (it == NULL)
+ return NULL;
+ }
+ it->key_len = key_len;
+ it->val_len = val_len;
+ if (key_len > 0) {
+ memcpy(it->data, key, key_len);
+ }
+ memset(item_val(it), 0, val_len); // clear the value
+ is_new_entry = true;
+found: // key and hash OK on g->items[i]; now update stamps
+ assert(i < LRU_ASSOC);
+ group_inc_count(g, i);
+ if (is_new) {
+ *is_new = is_new_entry;
+ }
+ return item_val(g->items[i]);
+}
+
diff --git a/lib/generic/lru.h b/lib/generic/lru.h
new file mode 100644
index 0000000..b5c9bcd
--- /dev/null
+++ b/lib/generic/lru.h
@@ -0,0 +1,249 @@
+/* Copyright (C) 2016-2018 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+/**
+ * @file lru.h
+ * @brief A lossy cache.
+ *
+ * @note The implementation tries to keep frequent keys and avoid others,
+ * even if "used recently", so it may refuse to store it on lru_get_new().
+ * It uses hashing to split the problem pseudo-randomly into smaller groups,
+ * and within each it tries to approximate relative usage counts of several
+ * most frequent keys/hashes. This tracking is done for *more* keys than
+ * those that are actually stored.
+ *
+ * Example usage:
+ * @code{.c}
+ * // Define new LRU type
+ * typedef lru_t(int) lru_int_t;
+ *
+ * // Create LRU
+ * lru_int_t *lru;
+ * lru_create(&lru, 5, NULL, NULL);
+ *
+ * // Insert some values
+ * int *pi = lru_get_new(lru, "luke", strlen("luke"), NULL);
+ * if (pi)
+ * *pi = 42;
+ * pi = lru_get_new(lru, "leia", strlen("leia"), NULL);
+ * if (pi)
+ * *pi = 24;
+ *
+ * // Retrieve values
+ * int *ret = lru_get_try(lru, "luke", strlen("luke"), NULL);
+ * if (!ret) printf("luke dropped out!\n");
+ * else printf("luke's number is %d\n", *ret);
+ *
+ * char *enemies[] = {"goro", "raiden", "subzero", "scorpion"};
+ * for (int i = 0; i < 4; ++i) {
+ * int *val = lru_get_new(lru, enemies[i], strlen(enemies[i]), NULL);
+ * if (val)
+ * *val = i;
+ * }
+ *
+ * // We're done
+ * lru_free(lru);
+ * @endcode
+ *
+ * \addtogroup generics
+ * @{
+ */
+
+#pragma once
+
+#include <assert.h>
+#include <stdint.h>
+#include <stddef.h>
+
+#include "contrib/ucw/lib.h"
+#include "lib/utils.h"
+#include "libknot/mm_ctx.h"
+
+/* ================================ Interface ================================ */
+
+/** @brief The type for LRU, parametrized by value type. */
+#define lru_t(type) \
+ union { \
+ type *pdata_t; /* only the *type* information is used */ \
+ struct lru lru; \
+ }
+
+/**
+ * @brief Allocate and initialize an LRU with default associativity.
+ *
+ * The real limit on the number of slots can be a bit larger but less than double.
+ *
+ * @param ptable pointer to a pointer to the LRU
+ * @param max_slots number of slots
+ * @param mm_ctx_array memory context to use for the huge array, NULL for default
+ * @param mm_ctx memory context to use for individual key-value pairs, NULL for default
+ *
+ * @note The pointers to memory contexts need to remain valid
+ * during the whole life of the structure (or be NULL).
+ */
+#define lru_create(ptable, max_slots, mm_ctx_array, mm_ctx) do { \
+ (void)(((__typeof__((*(ptable))->pdata_t))0) == (void *)0); /* typecheck lru_t */ \
+ *(ptable) = (__typeof__(*(ptable))) \
+ lru_create_impl((max_slots), (mm_ctx_array), (mm_ctx)); \
+ } while (false)
+
+/** @brief Free an LRU created by lru_create (it can be NULL). */
+#define lru_free(table) \
+ lru_free_impl(&(table)->lru)
+
+/** @brief Reset an LRU to the empty state (but preserve any settings). */
+#define lru_reset(table) \
+ lru_reset_impl(&(table)->lru)
+
+/**
+ * @brief Find key in the LRU and return pointer to the corresponding value.
+ *
+ * @param table pointer to LRU
+ * @param key_ lookup key
+ * @param len_ key length
+ * @return pointer to data or NULL if not found
+ */
+#define lru_get_try(table, key_, len_) \
+ (__typeof__((table)->pdata_t)) \
+ lru_get_impl(&(table)->lru, (key_), (len_), -1, false, NULL)
+
+/**
+ * @brief Return pointer to value, inserting if needed (zeroed).
+ *
+ * @param table pointer to LRU
+ * @param key_ lookup key
+ * @param len_ key lengthkeys
+ * @param is_new pointer to bool to store result of operation
+ * (true if entry is newly added, false otherwise; can be NULL).
+ * @return pointer to data or NULL (can be even if memory could be allocated!)
+ */
+#define lru_get_new(table, key_, len_, is_new) \
+ (__typeof__((table)->pdata_t)) \
+ lru_get_impl(&(table)->lru, (key_), (len_), \
+ sizeof(*(table)->pdata_t), true, is_new)
+
+/**
+ * @brief Apply a function to every item in LRU.
+ *
+ * @param table pointer to LRU
+ * @param function enum lru_apply_do (*function)(const char *key, uint len, val_type *val, void *baton)
+ * See enum lru_apply_do for the return type meanings.
+ * @param baton extra pointer passed to each function invocation
+ */
+#define lru_apply(table, function, baton) do { \
+ lru_apply_fun_g(fun_dummy, __typeof__(*(table)->pdata_t)) = 0; \
+ (void)(fun_dummy == (function)); /* produce a warning with incompatible function type */ \
+ lru_apply_impl(&(table)->lru, (lru_apply_fun)(function), (baton)); \
+ } while (false)
+
+/** @brief Possible actions to do with an element. */
+enum lru_apply_do {
+ LRU_APPLY_DO_NOTHING,
+ LRU_APPLY_DO_EVICT,
+ /* maybe more in future*/
+};
+
+/**
+ * @brief Return the real capacity - maximum number of keys holdable within.
+ *
+ * @param table pointer to LRU
+ */
+#define lru_capacity(table) lru_capacity_impl(&(table)->lru)
+
+
+/** @brief Round the value up to a multiple of (1 << power). */
+static inline uint round_power(uint size, uint power)
+{
+ uint res = ((size - 1) & ~((1 << power) - 1)) + (1 << power);
+ assert(__builtin_ctz(res) >= power);
+ assert(size <= res && res < size + (1 << power));
+ return res;
+}
+
+
+/* ======================== Inlined part of implementation ======================== */
+/** @cond internal */
+
+#define lru_apply_fun_g(name, val_type) \
+ enum lru_apply_do (*(name))(const char *key, uint len, val_type *val, void *baton)
+typedef lru_apply_fun_g(lru_apply_fun, void);
+
+#if __GNUC__ >= 4
+ #define CACHE_ALIGNED __attribute__((aligned(64)))
+#else
+ #define CACHE_ALIGNED
+#endif
+
+struct lru;
+void lru_free_items_impl(struct lru *lru);
+struct lru * lru_create_impl(uint max_slots, knot_mm_t *mm_array, knot_mm_t *mm);
+void * lru_get_impl(struct lru *lru, const char *key, uint key_len,
+ uint val_len, bool do_insert, bool *is_new);
+void lru_apply_impl(struct lru *lru, lru_apply_fun f, void *baton);
+
+struct lru_item;
+
+#if SIZE_MAX > (1 << 32)
+ /** @internal The number of keys stored within each group. */
+ #define LRU_ASSOC 3
+#else
+ #define LRU_ASSOC 4
+#endif
+/** @internal The number of hashes tracked within each group: 10-1 or 12-1. */
+#define LRU_TRACKED ((64 - sizeof(size_t) * LRU_ASSOC) / 4 - 1)
+
+struct lru_group {
+ uint16_t counts[LRU_TRACKED+1]; /*!< Occurrence counters; the last one is special. */
+ uint16_t hashes[LRU_TRACKED+1]; /*!< Top halves of hashes; the last one is unused. */
+ struct lru_item *items[LRU_ASSOC]; /*!< The full items. */
+} CACHE_ALIGNED;
+
+/* The sizes are chosen so lru_group just fits into a single x86 cache line. */
+static_assert(64 == sizeof(struct lru_group)
+ && 64 == LRU_ASSOC * sizeof(void*) + (LRU_TRACKED+1) * 4,
+ "bad sizing for your sizeof(void*)");
+
+struct lru {
+ struct knot_mm *mm, /**< Memory context to use for keys. */
+ *mm_array; /**< Memory context to use for this structure itself. */
+ uint log_groups; /**< Logarithm of the number of LRU groups. */
+ struct lru_group groups[] CACHE_ALIGNED; /**< The groups of items. */
+};
+
+/** @internal See lru_free. */
+static inline void lru_free_impl(struct lru *lru)
+{
+ if (!lru)
+ return;
+ lru_free_items_impl(lru);
+ mm_free(lru->mm_array, lru);
+}
+
+/** @internal See lru_reset. */
+static inline void lru_reset_impl(struct lru *lru)
+{
+ lru_free_items_impl(lru);
+ memset(lru->groups, 0, sizeof(lru->groups[0]) * (1 << lru->log_groups));
+}
+
+/** @internal See lru_capacity. */
+static inline uint lru_capacity_impl(struct lru *lru)
+{
+ assert(lru);
+ return (1 << lru->log_groups) * LRU_ASSOC;
+}
+
+/** @endcond */
+/** @} (addtogroup generics) */
diff --git a/lib/generic/map.c b/lib/generic/map.c
new file mode 100644
index 0000000..3e06520
--- /dev/null
+++ b/lib/generic/map.c
@@ -0,0 +1,354 @@
+/*
+ * critbit89 - A crit-bit tree implementation for strings in C89
+ * Written by Jonas Gehring <jonas@jgehring.net>
+ * Implemented key-value storing by Marek Vavrusa <marek.vavrusa@nic.cz>
+ *
+ * The code makes the assumption that malloc returns pointers aligned at at
+ * least a two-byte boundary. Since the C standard requires that malloc return
+ * pointers that can store any type, there are no commonly-used toolchains for
+ * which this assumption is false.
+ *
+ * See https://github.com/agl/critbit/blob/master/critbit.pdf for reference.
+ */
+
+#include <errno.h>
+#include <string.h>
+#include <stdlib.h>
+
+#include "map.h"
+#include "lib/utils.h"
+
+ /* Exports */
+#if defined _WIN32 || defined __CYGWIN__
+ #define EXPORT __attribute__ ((dllexport))
+#else
+ #define EXPORT __attribute__ ((visibility ("default")))
+#endif
+
+#ifdef _MSC_VER /* MSVC */
+ typedef unsigned __int8 uint8_t;
+ typedef unsigned __int32 uint32_t;
+ #ifdef _WIN64
+ typedef signed __int64 intptr_t;
+ #else
+ typedef _W64 signed int intptr_t;
+ #endif
+#else /* Not MSVC */
+ #include <stdint.h>
+#endif
+
+typedef struct {
+ void* value;
+ uint8_t key[];
+} cb_data_t;
+
+typedef struct {
+ void *child[2];
+ uint32_t byte;
+ uint8_t otherbits;
+} cb_node_t;
+
+/* Return true if ptr is internal node. */
+static inline int ref_is_internal(const uint8_t *p)
+{
+ return 1 & (intptr_t)p;
+}
+
+/* Get internal node. */
+static inline cb_node_t *ref_get_internal(uint8_t *p)
+{
+ return (cb_node_t *)(p - 1);
+}
+
+/* Static helper functions */
+static void cbt_traverse_delete(map_t *map, void *top)
+{
+ uint8_t *p = top;
+ if (ref_is_internal(p)) {
+ cb_node_t *q = ref_get_internal(p);
+ cbt_traverse_delete(map, q->child[0]);
+ cbt_traverse_delete(map, q->child[1]);
+ mm_free(map->pool, q);
+ } else {
+ mm_free(map->pool, p);
+ }
+}
+
+static int cbt_traverse_prefixed(void *top,
+ int (*callback)(const char *, void *, void *), void *baton)
+{
+ uint8_t *p = top;
+ cb_data_t *x = (cb_data_t *)top;
+
+ if (ref_is_internal(p)) {
+ cb_node_t *q = ref_get_internal(p);
+ int ret = 0;
+
+ ret = cbt_traverse_prefixed(q->child[0], callback, baton);
+ if (ret != 0) {
+ return ret;
+ }
+ ret = cbt_traverse_prefixed(q->child[1], callback, baton);
+ if (ret != 0) {
+ return ret;
+ }
+ return 0;
+ }
+
+ return (callback)((const char *)x->key, x->value, baton);
+}
+
+static cb_data_t *cbt_make_data(map_t *map, const uint8_t *str, size_t len, void *value)
+{
+ cb_data_t *x = mm_alloc(map->pool, sizeof(cb_data_t) + len);
+ if (x != NULL) {
+ x->value = value;
+ memcpy(x->key, str, len);
+ }
+ return x;
+}
+
+/*! Like map_contains, but also set the value, if passed and found. */
+static int cbt_get(map_t *map, const char *str, void **value)
+{
+ const uint8_t *ubytes = (void *)str;
+ const size_t ulen = strlen(str);
+ uint8_t *p = map->root;
+ cb_data_t *x = NULL;
+
+ if (p == NULL) {
+ return 0;
+ }
+
+ while (ref_is_internal(p)) {
+ cb_node_t *q = ref_get_internal(p);
+ uint8_t c = 0;
+ int direction;
+
+ if (q->byte < ulen) {
+ c = ubytes[q->byte];
+ }
+ direction = (1 + (q->otherbits | c)) >> 8;
+
+ p = q->child[direction];
+ }
+
+ x = (cb_data_t *)p;
+ if (strcmp(str, (const char *)x->key) == 0) {
+ if (value != NULL) {
+ *value = x->value;
+ }
+ return 1;
+ }
+
+ return 0;
+}
+
+/*! Returns non-zero if map contains str */
+EXPORT int map_contains(map_t *map, const char *str)
+{
+ return cbt_get(map, str, NULL);
+}
+
+EXPORT void *map_get(map_t *map, const char *str)
+{
+ void *v = NULL;
+ cbt_get(map, str, &v);
+ return v;
+}
+
+EXPORT int map_set(map_t *map, const char *str, void *val)
+{
+ const uint8_t *const ubytes = (void *)str;
+ const size_t ulen = strlen(str);
+ uint8_t *p = map->root;
+ uint8_t c = 0, *x = NULL;
+ uint32_t newbyte = 0;
+ uint32_t newotherbits = 0;
+ int direction = 0, newdirection = 0;
+ cb_node_t *newnode = NULL;
+ cb_data_t *data = NULL;
+ void **wherep = NULL;
+
+ if (p == NULL) {
+ map->root = cbt_make_data(map, (const uint8_t *)str, ulen + 1, val);
+ if (map->root == NULL) {
+ return ENOMEM;
+ }
+ return 0;
+ }
+
+ while (ref_is_internal(p)) {
+ cb_node_t *q = ref_get_internal(p);
+ c = 0;
+ if (q->byte < ulen) {
+ c = ubytes[q->byte];
+ }
+ direction = (1 + (q->otherbits | c)) >> 8;
+
+ p = q->child[direction];
+ }
+
+ data = (cb_data_t *)p;
+ for (newbyte = 0; newbyte < ulen; ++newbyte) {
+ if (data->key[newbyte] != ubytes[newbyte]) {
+ newotherbits = data->key[newbyte] ^ ubytes[newbyte];
+ goto different_byte_found;
+ }
+ }
+
+ if (data->key[newbyte] != 0) {
+ newotherbits = data->key[newbyte];
+ goto different_byte_found;
+ }
+ data->value = val;
+ return 1;
+
+different_byte_found:
+ newotherbits |= newotherbits >> 1;
+ newotherbits |= newotherbits >> 2;
+ newotherbits |= newotherbits >> 4;
+ newotherbits = (newotherbits & ~(newotherbits >> 1)) ^ 255;
+ c = data->key[newbyte];
+ newdirection = (1 + (newotherbits | c)) >> 8;
+
+ newnode = mm_alloc(map->pool, sizeof(cb_node_t));
+ if (newnode == NULL) {
+ return ENOMEM;
+ }
+
+ x = (uint8_t *)cbt_make_data(map, ubytes, ulen + 1, val);
+ if (x == NULL) {
+ mm_free(map->pool, newnode);
+ return ENOMEM;
+ }
+
+ newnode->byte = newbyte;
+ newnode->otherbits = newotherbits;
+ newnode->child[1 - newdirection] = x;
+
+ /* Insert into map */
+ wherep = &map->root;
+ for (;;) {
+ cb_node_t *q;
+ p = *wherep;
+ if (!ref_is_internal(p)) {
+ break;
+ }
+
+ q = ref_get_internal(p);
+ if (q->byte > newbyte) {
+ break;
+ }
+ if (q->byte == newbyte && q->otherbits > newotherbits) {
+ break;
+ }
+
+ c = 0;
+ if (q->byte < ulen) {
+ c = ubytes[q->byte];
+ }
+ direction = (1 + (q->otherbits | c)) >> 8;
+ wherep = q->child + direction;
+ }
+
+ newnode->child[newdirection] = *wherep;
+ *wherep = (void *)(1 + (char *)newnode);
+ return 0;
+}
+
+/*! Deletes str from the map, returns 0 on success */
+EXPORT int map_del(map_t *map, const char *str)
+{
+ const uint8_t *ubytes = (void *)str;
+ const size_t ulen = strlen(str);
+ uint8_t *p = map->root;
+ void **wherep = NULL, **whereq = NULL;
+ cb_node_t *q = NULL;
+ cb_data_t *data = NULL;
+ int direction = 0;
+
+ if (map->root == NULL) {
+ return 1;
+ }
+ wherep = &map->root;
+
+ while (ref_is_internal(p)) {
+ uint8_t c = 0;
+ whereq = wherep;
+ q = ref_get_internal(p);
+
+ if (q->byte < ulen) {
+ c = ubytes[q->byte];
+ }
+ direction = (1 + (q->otherbits | c)) >> 8;
+ wherep = q->child + direction;
+ p = *wherep;
+ }
+
+ data = (cb_data_t *)p;
+ if (strcmp(str, (const char *)data->key) != 0) {
+ return 1;
+ }
+ mm_free(map->pool, p);
+
+ if (!whereq) {
+ map->root = NULL;
+ return 0;
+ }
+
+ *whereq = q->child[1 - direction];
+ mm_free(map->pool, q);
+ return 0;
+}
+
+/*! Clears the given map */
+EXPORT void map_clear(map_t *map)
+{
+ if (map->root) {
+ cbt_traverse_delete(map, map->root);
+ }
+ map->root = NULL;
+}
+
+/*! Calls callback for all strings in map with the given prefix */
+EXPORT int map_walk_prefixed(map_t *map, const char *prefix,
+ int (*callback)(const char *, void *, void *), void *baton)
+{
+ if (!map) {
+ return 0;
+ }
+
+ const uint8_t *ubytes = (void *)prefix;
+ const size_t ulen = strlen(prefix);
+ uint8_t *p = map->root;
+ uint8_t *top = p;
+ cb_data_t *data = NULL;
+
+ if (p == NULL) {
+ return 0;
+ }
+
+ while (ref_is_internal(p)) {
+ cb_node_t *q = ref_get_internal(p);
+ uint8_t c = 0;
+ int direction;
+
+ if (q->byte < ulen) {
+ c = ubytes[q->byte];
+ }
+ direction = (1 + (q->otherbits | c)) >> 8;
+
+ p = q->child[direction];
+ if (q->byte < ulen) {
+ top = p;
+ }
+ }
+
+ data = (cb_data_t *)p;
+ if (strlen((const char *)data->key) < ulen || memcmp(data->key, prefix, ulen) != 0) {
+ return 0; /* No strings match */
+ }
+
+ return cbt_traverse_prefixed(top, callback, baton);
+}
diff --git a/lib/generic/map.h b/lib/generic/map.h
new file mode 100644
index 0000000..73ce4c0
--- /dev/null
+++ b/lib/generic/map.h
@@ -0,0 +1,115 @@
+/*
+ * critbit89 - A crit-bit map implementation for strings in C89
+ * Written by Jonas Gehring <jonas@jgehring.net>
+ */
+
+/**
+ * @file map.h
+ * @brief A Crit-bit tree key-value map implementation.
+ *
+ * @warning If the user provides a custom allocator, it must return addresses aligned to 2B boundary.
+ *
+ * # Example usage:
+ *
+ * @code{.c}
+ * map_t map = map_make(NULL);
+ *
+ * // Custom allocator (optional)
+ * map.malloc = &mymalloc;
+ * map.baton = &mymalloc_context;
+ *
+ * // Insert k-v pairs
+ * int values = { 42, 53, 64 };
+ * if (map_set(&map, "princess", &values[0]) != 0 ||
+ * map_set(&map, "prince", &values[1]) != 0 ||
+ * map_set(&map, "leia", &values[2]) != 0) {
+ * fail();
+ * }
+ *
+ * // Test membership
+ * if (map_contains(&map, "leia")) {
+ * success();
+ * }
+ *
+ * // Prefix search
+ * int i = 0;
+ * int count(const char *k, void *v, void *ext) { (*(int *)ext)++; return 0; }
+ * if (map_walk_prefixed(map, "princ", count, &i) == 0) {
+ * printf("%d matches\n", i);
+ * }
+ *
+ * // Delete
+ * if (map_del(&map, "badkey") != 0) {
+ * fail(); // No such key
+ * }
+ *
+ * // Clear the map
+ * map_clear(&map);
+ * @endcode
+ *
+ * \addtogroup generics
+ * @{
+ */
+
+#pragma once
+
+#include <stddef.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct knot_mm; /* avoid the unnecessary include */
+
+/** Main data structure */
+typedef struct {
+ void *root;
+ struct knot_mm *pool;
+} map_t;
+
+/** Creates an new empty critbit map. Pass NULL for malloc+free. */
+static inline map_t map_make(struct knot_mm *pool)
+{
+ return (map_t){ .root = NULL, .pool = pool };
+}
+
+/** Returns non-zero if map contains str */
+int map_contains(map_t *map, const char *str);
+
+/** Returns value if map contains str. Note: NULL may mean two different things. */
+void *map_get(map_t *map, const char *str);
+
+/** Inserts str into map. Returns 0 if new, 1 if replaced, or ENOMEM. */
+int map_set(map_t *map, const char *str, void *val);
+
+/** Deletes str from the map, returns 0 on suceess */
+int map_del(map_t *map, const char *str);
+
+/** Clears the given map */
+void map_clear(map_t *map);
+
+/**
+ * Calls callback for all strings in map
+ * See @fn map_walk_prefixed() for documentation on parameters.
+ */
+#define map_walk(map, callback, baton) \
+ map_walk_prefixed((map), "", (callback), (baton))
+
+/**
+ * Calls callback for all strings in map with the given prefix.
+ * Returns value immediately if a callback returns nonzero.
+ *
+ * @param map
+ * @param prefix required string prefix (empty => all strings)
+ * @param callback callback parameters are (key, value, baton)
+ * @param baton passed uservalue
+ */
+int map_walk_prefixed(map_t *map, const char *prefix,
+ int (*callback)(const char *, void *, void *), void *baton);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+/** @} */
diff --git a/lib/generic/pack.h b/lib/generic/pack.h
new file mode 100644
index 0000000..dc7a975
--- /dev/null
+++ b/lib/generic/pack.h
@@ -0,0 +1,249 @@
+/* Copyright (C) 2015-2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+/**
+ * @file pack.h
+ * @brief A length-prefixed list of objects, also an array list.
+ *
+ * Each object is prefixed by item length, unlike array this structure
+ * permits variable-length data. It is also equivallent to forward-only list
+ * backed by an array.
+ *
+ * @note Maximum object size is 2^16 bytes, see ::pack_objlen_t
+ * @TODO If some mistake happens somewhere, the access may end up in an infinite loop.
+ * (equality comparison on pointers)
+ *
+ * # Example usage:
+ *
+ * @code{.c}
+ * pack_t pack;
+ * pack_init(pack);
+ *
+ * // Reserve 2 objects, 6 bytes total
+ * pack_reserve(pack, 2, 4 + 2);
+ *
+ * // Push 2 objects
+ * pack_obj_push(pack, U8("jedi"), 4)
+ * pack_obj_push(pack, U8("\xbe\xef"), 2);
+ *
+ * // Iterate length-value pairs
+ * uint8_t *it = pack_head(pack);
+ * while (it != pack_tail(pack)) {
+ * uint8_t *val = pack_obj_val(it);
+ * it = pack_obj_next(it);
+ * }
+ *
+ * // Remove object
+ * pack_obj_del(pack, U8("jedi"), 4);
+ *
+ * pack_clear(pack);
+ * @endcode
+ *
+ * \addtogroup generics
+ * @{
+ */
+
+#pragma once
+
+#include <stdint.h>
+#include <string.h>
+#include "array.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** Packed object length type. */
+typedef uint16_t pack_objlen_t;
+
+/** Pack is defined as an array of bytes */
+typedef array_t(uint8_t) pack_t;
+
+/** Zero-initialize the pack. */
+#define pack_init(pack) \
+ array_init(pack)
+
+/** Make the pack empty and free pointed-to memory (plain malloc/free). */
+#define pack_clear(pack) \
+ array_clear(pack)
+
+/** Make the pack empty and free pointed-to memory.
+ * Mempool usage: pass mm_free and a knot_mm_t* . */
+#define pack_clear_mm(pack, free, baton) \
+ array_clear_mm((pack), (free), (baton))
+
+/** Reserve space for *additional* objects in the pack (plain malloc/free).
+ * @return 0 if success, <0 on failure */
+#define pack_reserve(pack, objs_count, objs_len) \
+ pack_reserve_mm((pack), (objs_count), (objs_len), array_std_reserve, NULL)
+
+/** Reserve space for *additional* objects in the pack.
+ * Mempool usage: pass kr_memreserve and a knot_mm_t* .
+ * @return 0 if success, <0 on failure */
+#define pack_reserve_mm(pack, objs_count, objs_len, reserve, baton) \
+ array_reserve_mm((pack), (pack).len + (sizeof(pack_objlen_t)*(objs_count) + (objs_len)), (reserve), (baton))
+
+/** Return pointer to first packed object.
+ *
+ * Recommended way to iterate:
+ * for (uint8_t *it = pack_head(pack); it != pack_tail(pack); it = pack_obj_next(it))
+ */
+#define pack_head(pack) \
+ ((pack).len > 0 ? &((pack).at[0]) : NULL)
+
+/** Return pack end pointer. */
+#define pack_tail(pack) \
+ ((pack).len > 0 ? &((pack).at[(pack).len]) : NULL)
+
+/** Return packed object length. */
+static inline pack_objlen_t pack_obj_len(uint8_t *it)
+{
+ pack_objlen_t len = 0;
+ if (it != NULL)
+ memcpy(&len, it, sizeof(len));
+ return len;
+}
+
+/** Return packed object value. */
+static inline uint8_t *pack_obj_val(uint8_t *it)
+{
+ if (it == NULL) {
+ assert(it);
+ return NULL;
+ }
+ return it + sizeof(pack_objlen_t);
+}
+
+/** Return pointer to next packed object. */
+static inline uint8_t *pack_obj_next(uint8_t *it)
+{
+ if (it == NULL) {
+ assert(it);
+ return NULL;
+ }
+ return pack_obj_val(it) + pack_obj_len(it);
+}
+
+/** Return pointer to the last packed object. */
+static inline uint8_t *pack_last(pack_t pack)
+{
+ if (pack.len == 0) {
+ return NULL;
+ }
+ uint8_t *it = pack_head(pack);
+ uint8_t *tail = pack_tail(pack);
+ while (true) {
+ uint8_t *next = pack_obj_next(it);
+ if (next == tail) {
+ return it;
+ }
+ it = next;
+ }
+}
+
+/** Push object to the end of the pack
+ * @return 0 on success, negative number on failure
+ */
+static inline int pack_obj_push(pack_t *pack, const uint8_t *obj, pack_objlen_t len)
+{
+ if (pack == NULL || obj == NULL) {
+ assert(false);
+ return kr_error(EINVAL);
+ }
+ size_t packed_len = len + sizeof(len);
+ if (pack->len + packed_len > pack->cap) {
+ return kr_error(ENOSPC);
+ }
+
+ uint8_t *endp = pack->at + pack->len;
+ memcpy(endp, (char *)&len, sizeof(len));
+ memcpy(endp + sizeof(len), obj, len);
+ pack->len += packed_len;
+ return 0;
+}
+
+/** Returns a pointer to packed object.
+ * @return pointer to packed object or NULL
+ */
+static inline uint8_t *pack_obj_find(pack_t *pack, const uint8_t *obj, pack_objlen_t len)
+{
+ if (pack == NULL || obj == NULL) {
+ assert(obj != NULL);
+ return NULL;
+ }
+ uint8_t *endp = pack_tail(*pack);
+ uint8_t *it = pack_head(*pack);
+ while (it != endp) {
+ uint8_t *val = pack_obj_val(it);
+ if (pack_obj_len(it) == len && memcmp(obj, val, len) == 0) {
+ return it;
+ }
+ it = pack_obj_next(it);
+ }
+ return NULL;
+}
+
+/** Delete object from the pack
+ * @return 0 on success, negative number on failure
+ */
+static inline int pack_obj_del(pack_t *pack, const uint8_t *obj, pack_objlen_t len)
+{
+ if (pack == NULL || obj == NULL) {
+ assert(obj != NULL);
+ return kr_error(EINVAL);
+ }
+ uint8_t *endp = pack_tail(*pack);
+ uint8_t *it = pack_obj_find(pack, obj, len);
+ if (it) {
+ size_t packed_len = len + sizeof(len);
+ memmove(it, it + packed_len, endp - it - packed_len);
+ pack->len -= packed_len;
+ return 0;
+ }
+ return -1;
+}
+
+/** Clone a pack, replacing destination pack; (*dst == NULL) is valid input.
+ * @return kr_error(ENOMEM) on allocation failure. */
+static inline int pack_clone(pack_t **dst, const pack_t *src, knot_mm_t *pool)
+{
+ if (!dst || !src) {
+ assert(false);
+ return kr_error(EINVAL);
+ }
+ /* Get a valid pack_t. */
+ if (!*dst) {
+ *dst = mm_alloc(pool, sizeof(pack_t));
+ if (!*dst) return kr_error(ENOMEM);
+ pack_init(**dst);
+ /* Clone data only if needed */
+ if (src->len == 0) return kr_ok();
+ }
+ /* Replace the contents of the pack_t. */
+ int ret = array_reserve_mm(**dst, src->len, kr_memreserve, pool);
+ if (ret < 0) {
+ return kr_error(ENOMEM);
+ }
+ memcpy((*dst)->at, src->at, src->len);
+ (*dst)->len = src->len;
+ return kr_ok();
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+/** @} */
diff --git a/lib/generic/queue.c b/lib/generic/queue.c
new file mode 100644
index 0000000..45657c7
--- /dev/null
+++ b/lib/generic/queue.c
@@ -0,0 +1,124 @@
+/* Copyright (C) 2018 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "lib/generic/queue.h"
+#include <string.h>
+
+KR_EXPORT void queue_init_impl(struct queue *q, size_t item_size)
+{
+ q->len = 0;
+ q->item_size = item_size;
+ q->head = q->tail = NULL;
+ /* Take 128 B (two x86 cache lines), except a small margin
+ * that the allocator can use for its overhead.
+ * Normally (64-bit pointers) this means 16 B header + 13*8 B data. */
+ q->chunk_cap = (128 - offsetof(struct queue_chunk, data)
+ - sizeof(size_t)
+ ) / item_size;
+ if (!q->chunk_cap) q->chunk_cap = 1; /* item_size big enough by itself */
+}
+
+KR_EXPORT void queue_deinit_impl(struct queue *q)
+{
+ assert(q);
+ struct queue_chunk *p = q->head;
+ while (p != NULL) {
+ struct queue_chunk *pf = p;
+ p = p->next;
+ free(pf);
+ }
+#ifndef NDEBUG
+ memset(q, 0, sizeof(*q));
+#endif
+}
+
+static struct queue_chunk * queue_chunk_new(const struct queue *q)
+{
+ struct queue_chunk *c = malloc(offsetof(struct queue_chunk, data)
+ + q->chunk_cap * q->item_size);
+ if (unlikely(!c)) abort(); // simplify stuff
+ memset(c, 0, offsetof(struct queue_chunk, data));
+ c->cap = q->chunk_cap;
+ /* ->begin and ->end are zero, i.e. we optimize for _push
+ * and not _push_head, by default. */
+ return c;
+}
+
+/* Return pointer to the space for the new element. */
+KR_EXPORT void * queue_push_impl(struct queue *q)
+{
+ assert(q);
+ struct queue_chunk *t = q->tail; // shorthand
+ if (unlikely(!t)) {
+ assert(!q->head && !q->len);
+ q->head = q->tail = t = queue_chunk_new(q);
+ } else
+ if (t->end == t->cap) {
+ if (t->begin * 2 >= t->cap) {
+ /* Utilization is below 50%, so let's shift (no overlap). */
+ memcpy(t->data, t->data + t->begin * q->item_size,
+ (t->end - t->begin) * q->item_size);
+ t->end -= t->begin;
+ t->begin = 0;
+ } else {
+ /* Let's grow the tail by another chunk. */
+ assert(!t->next);
+ t->next = queue_chunk_new(q);
+ t = q->tail = t->next;
+ }
+ }
+ assert(t->end < t->cap);
+ ++(q->len);
+ ++(t->end);
+ return t->data + q->item_size * (t->end - 1);
+}
+
+/* Return pointer to the space for the new element. */
+KR_EXPORT void * queue_push_head_impl(struct queue *q)
+{
+ /* When we have choice, we optimize for further _push_head,
+ * i.e. when shifting or allocating a chunk,
+ * we store items on the tail-end of the chunk. */
+ assert(q);
+ struct queue_chunk *h = q->head; // shorthand
+ if (unlikely(!h)) {
+ assert(!q->tail && !q->len);
+ h = q->head = q->tail = queue_chunk_new(q);
+ h->begin = h->end = h->cap;
+ } else
+ if (h->begin == 0) {
+ if (h->end * 2 <= h->cap) {
+ /* Utilization is below 50%, so let's shift (no overlap).
+ * Computations here are simplified due to h->begin == 0. */
+ const int cnt = h->end;
+ memcpy(h->data + (h->cap - cnt) * q->item_size, h->data,
+ cnt * q->item_size);
+ h->begin = h->cap - cnt;
+ h->end = h->cap;
+ } else {
+ /* Let's grow the head by another chunk. */
+ h = queue_chunk_new(q);
+ h->next = q->head;
+ q->head = h;
+ h->begin = h->end = h->cap;
+ }
+ }
+ assert(h->begin > 0);
+ --(h->begin);
+ ++(q->len);
+ return h->data + q->item_size * h->begin;
+}
+
diff --git a/lib/generic/queue.h b/lib/generic/queue.h
new file mode 100644
index 0000000..755e759
--- /dev/null
+++ b/lib/generic/queue.h
@@ -0,0 +1,260 @@
+/* Copyright (C) 2018 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+/**
+ * @file queue.h
+ * @brief A queue, usable for FIFO and LIFO simultaneously.
+ *
+ * Both the head and tail of the queue can be accessed and pushed to,
+ * but only the head can be popped from.
+ *
+ * @note The implementation uses a singly linked list of blocks
+ * where each block stores an array of values (for better efficiency).
+ *
+ * Example usage:
+ * @code{.c}
+ // define new queue type, and init a new queue instance
+ typedef queue_t(int) queue_int_t;
+ queue_int_t q;
+ queue_init(q);
+ // do some operations
+ queue_push(q, 1);
+ queue_push(q, 2);
+ queue_push(q, 3);
+ queue_push(q, 4);
+ queue_pop(q);
+ assert(queue_head(q) == 2);
+ assert(queue_tail(q) == 4);
+
+ // you may iterate
+ typedef queue_it_t(int) queue_it_int_t;
+ for (queue_it_int_t it = queue_it_begin(q); !queue_it_finished(it);
+ queue_it_next(it)) {
+ ++queue_it_val(it);
+ }
+ assert(queue_tail(q) == 5);
+
+ queue_push_head(q, 0);
+ ++queue_tail(q);
+ assert(queue_tail(q) == 6);
+ // free it up
+ queue_deinit(q);
+
+ // you may use dynamic allocation for the type itself
+ queue_int_t *qm = malloc(sizeof(queue_int_t));
+ queue_init(*qm);
+ queue_deinit(*qm);
+ free(qm);
+ * @endcode
+ *
+ * \addtogroup generics
+ * @{
+ */
+
+#pragma once
+
+#include "lib/defines.h"
+#include "contrib/ucw/lib.h"
+#include <assert.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+/** @brief The type for queue, parametrized by value type. */
+#define queue_t(type) \
+ union { \
+ type *pdata_t; /* only the *type* information is used */ \
+ struct queue queue; \
+ }
+
+/** @brief Initialize a queue. You can malloc() it the usual way. */
+#define queue_init(q) do { \
+ (void)(((__typeof__(((q).pdata_t)))0) == (void *)0); /* typecheck queue_t */ \
+ queue_init_impl(&(q).queue, sizeof(*(q).pdata_t)); \
+ } while (false)
+
+/** @brief De-initialize a queue: make it invalid and free any inner allocations. */
+#define queue_deinit(q) \
+ queue_deinit_impl(&(q).queue)
+
+/** @brief Push data to queue's tail. (Type-safe version; use _impl() otherwise.) */
+#define queue_push(q, data) \
+ *((__typeof__((q).pdata_t)) queue_push_impl(&(q).queue)) = data
+
+/** @brief Push data to queue's head. (Type-safe version; use _impl() otherwise.) */
+#define queue_push_head(q, data) \
+ *((__typeof__((q).pdata_t)) queue_push_head_impl(&(q).queue)) = data
+
+/** @brief Remove the element at the head.
+ * The queue must not be empty. */
+#define queue_pop(q) \
+ queue_pop_impl(&(q).queue)
+
+/** @brief Return a "reference" to the element at the head (it's an L-value).
+ * The queue must not be empty. */
+#define queue_head(q) \
+ ( *(__typeof__((q).pdata_t)) queue_head_impl(&(q).queue) )
+
+/** @brief Return a "reference" to the element at the tail (it's an L-value).
+ * The queue must not be empty. */
+#define queue_tail(q) \
+ ( *(__typeof__((q).pdata_t)) queue_tail_impl(&(q).queue) )
+
+/** @brief Return the number of elements in the queue (very efficient). */
+#define queue_len(q) \
+ ((const size_t)(q).queue.len)
+
+
+/** @brief Type for queue iterator, parametrized by value type.
+ * It's a simple structure that owns no other resources.
+ * You may NOT use it after doing any push or pop (without _begin again). */
+#define queue_it_t(type) \
+ union { \
+ type *pdata_t; /* only the *type* information is used */ \
+ struct queue_it iter; \
+ }
+
+/** @brief Initialize a queue iterator at the head of the queue.
+ * If you use this in assignment (instead of initialization),
+ * you will unfortunately need to add corresponding type-cast in front.
+ * Beware: there's no type-check between queue and iterator! */
+#define queue_it_begin(q) \
+ { .iter = queue_it_begin_impl(&(q).queue) }
+
+/** @brief Return a "reference" to the current element (it's an L-value) . */
+#define queue_it_val(it) \
+ ( *(__typeof__((it).pdata_t)) queue_it_val_impl(&(it).iter) )
+
+/** @brief Test if the iterator has gone past the last element.
+ * If it has, you may not use _val or _next. */
+#define queue_it_finished(it) \
+ queue_it_finished_impl(&(it).iter)
+
+/** @brief Advance the iterator to the next element. */
+#define queue_it_next(it) \
+ queue_it_next_impl(&(it).iter)
+
+
+
+/* ====================== Internal for the implementation ================== */
+/** @cond internal */
+
+struct queue;
+/* Non-inline functions are exported to be usable from daemon. */
+void queue_init_impl(struct queue *q, size_t item_size);
+void queue_deinit_impl(struct queue *q);
+void * queue_push_impl(struct queue *q);
+void * queue_push_head_impl(struct queue *q);
+
+struct queue_chunk;
+struct queue {
+ size_t len;
+ uint16_t chunk_cap, item_size;
+ struct queue_chunk *head, *tail;
+};
+
+struct queue_chunk {
+ struct queue_chunk *next; /*< head -> ... -> tail */
+ int16_t begin, end, cap, pad_; /*< indices: zero is closest to head */
+ /*< We could fit into uint8_t for example, but the choice of (3+1)*2 bytes
+ * is a compromise between wasting space and getting a good alignment.
+ * In particular, queue_t(type*) will store the pointers on addresses
+ * aligned to the pointer size, in both 64-bit and 32-bit platforms.
+ */
+ char data[];
+ /**< The item data. We use "char" to satisfy the C99+ aliasing rules.
+ * See C99 section 6.5 Expressions, paragraph 7.
+ * Any type can be accessed through char-pointer,
+ * so we can use a common struct definition
+ * for all types being held.
+ */
+};
+
+static inline void * queue_head_impl(const struct queue *q)
+{
+ assert(q);
+ struct queue_chunk *h = q->head;
+ if (unlikely(!h))
+ return NULL;
+ assert(h->end > h->begin);
+ return h->data + h->begin * q->item_size;
+}
+
+static inline void * queue_tail_impl(const struct queue *q)
+{
+ assert(q);
+ struct queue_chunk *t = q->tail;
+ if (unlikely(!t))
+ return NULL;
+ assert(t->end > t->begin);
+ return t->data + (t->end - 1) * q->item_size;
+}
+
+static inline void queue_pop_impl(struct queue *q)
+{
+ assert(q);
+ struct queue_chunk *h = q->head;
+ assert(h && h->end > h->begin);
+ if (h->end - h->begin == 1) {
+ /* removing the last element in the chunk */
+ q->head = h->next;
+ free(h);
+ } else {
+ ++(h->begin);
+ }
+ --(q->len);
+}
+
+
+struct queue_it {
+ struct queue_chunk *chunk;
+ int16_t pos, item_size;
+};
+
+static inline struct queue_it queue_it_begin_impl(struct queue *q)
+{
+ assert(q);
+ return (struct queue_it){
+ .chunk = q->head,
+ .pos = q->head ? q->head->begin : -1,
+ .item_size = q->item_size,
+ };
+}
+
+static inline bool queue_it_finished_impl(struct queue_it *it)
+{
+ return it->chunk == NULL || it->pos >= it->chunk->end;
+}
+
+static inline void * queue_it_val_impl(struct queue_it *it)
+{
+ assert(!queue_it_finished_impl(it));
+ return it->chunk->data + it->pos * it->item_size;
+}
+
+static inline void queue_it_next_impl(struct queue_it *it)
+{
+ assert(!queue_it_finished_impl(it));
+ ++(it->pos);
+ if (it->pos < it->chunk->end)
+ return;
+ it->chunk = it->chunk->next;
+ it->pos = it->chunk ? it->chunk->begin : -1;
+}
+
+/** @endcond (internal) */
+/** @} (addtogroup generics) */
+
diff --git a/lib/generic/set.h b/lib/generic/set.h
new file mode 100644
index 0000000..332c1aa
--- /dev/null
+++ b/lib/generic/set.h
@@ -0,0 +1,105 @@
+/* Copyright (C) 2015-2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+/**
+ * @file set.h
+ * @brief A set abstraction implemented on top of map.
+ *
+ * @note The API is based on map.h, see it for more examples.
+ *
+ * # Example usage:
+ *
+ * @code{.c}
+ * set_t set = set_make(NULL);
+ *
+ * // Insert keys
+ * if (set_add(&set, "princess") != 0 ||
+ * set_add(&set, "prince") != 0 ||
+ * set_add(&set, "leia") != 0) {
+ * fail();
+ * }
+ *
+ * // Test membership
+ * if (set_contains(&set, "leia")) {
+ * success();
+ * }
+ *
+ * // Prefix search
+ * int i = 0;
+ * int count(const char *s, void *n) { (*(int *)n)++; return 0; }
+ * if (set_walk_prefixed(set, "princ", count, &i) == 0) {
+ * printf("%d matches\n", i);
+ * }
+ *
+ * // Delete
+ * if (set_del(&set, "badkey") != 0) {
+ * fail(); // No such key
+ * }
+ *
+ * // Clear the set
+ * set_clear(&set);
+ * @endcode
+ *
+ * \addtogroup generics
+ * @{
+ */
+
+#pragma once
+
+#include <stddef.h>
+#include "map.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef map_t set_t;
+typedef int (set_walk_cb)(const char *, void *);
+
+/*! Creates an new, empty critbit set */
+#define set_make \
+ map_make
+
+/*! Returns non-zero if set contains str */
+#define set_contains(set, str) \
+ map_contains((set), (str))
+
+/*! Inserts str into set. Returns 0 if new, 1 if already present, or ENOMEM. */
+#define set_add(set, str) \
+ map_set((set), (str), (void *)1)
+
+/*! Deletes str from the set, returns 0 on suceess */
+#define set_del(set, str) \
+ map_del((set), (str))
+
+/*! Clears the given set */
+#define set_clear(set) \
+ map_clear(set)
+
+/*! Calls callback for all strings in map */
+#define set_walk(set, callback, baton) \
+ map_walk_prefixed((set), "", (callback), (baton))
+
+/*! Calls callback for all strings in set with the given prefix */
+#define set_walk_prefixed(set, prefix, callback, baton) \
+ map_walk_prefixed((set), (prefix), (callback), (baton))
+
+
+#ifdef __cplusplus
+}
+#endif
+
+/** @} */
diff --git a/lib/generic/trie.c b/lib/generic/trie.c
new file mode 100644
index 0000000..0009eef
--- /dev/null
+++ b/lib/generic/trie.c
@@ -0,0 +1,912 @@
+/* Copyright (C) 2016-2018 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+ The code originated from https://github.com/fanf2/qp/blob/master/qp.c
+ at revision 5f6d93753.
+ */
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "lib/generic/trie.h"
+#include "lib/utils.h"
+#include "contrib/ucw/lib.h"
+
+#if defined(__i386) || defined(__x86_64) || defined(_M_IX86) \
+ || (defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN) \
+ && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
+
+ /*!
+ * \brief Use a pointer alignment hack to save memory.
+ *
+ * When on, isbranch() relies on the fact that in leaf_t the first pointer
+ * is aligned on multiple of 4 bytes and that the flags bitfield is
+ * overlaid over the lowest two bits of that pointer.
+ * Neither is really guaranteed by the C standards; the second part should
+ * be OK with x86_64 ABI and most likely any other little-endian platform.
+ * It would be possible to manipulate the right bits portably, but it would
+ * complicate the code nontrivially. C++ doesn't even guarantee type-punning.
+ * In debug mode we check this works OK when creating a new trie instance.
+ */
+ #define FLAGS_HACK 1
+#else
+ #define FLAGS_HACK 0
+#endif
+
+typedef unsigned char byte;
+#ifndef uint
+typedef unsigned int uint;
+#define uint uint
+#endif
+typedef uint bitmap_t; /*! Bit-maps, using the range of 1<<0 to 1<<16 (inclusive). */
+
+typedef struct {
+ uint32_t len; // 32 bits are enough for key lengths; probably even 16 bits would be.
+ char chars[];
+} tkey_t;
+
+/*! \brief Leaf of trie. */
+typedef struct {
+ #if !FLAGS_HACK
+ byte flags;
+ #endif
+ tkey_t *key; /*!< The pointer must be aligned to 4-byte multiples! */
+ trie_val_t val;
+} leaf_t;
+
+/*! \brief A trie node is either leaf_t or branch_t. */
+typedef union node node_t;
+
+/*!
+ * \brief Branch node of trie.
+ *
+ * - The flags distinguish whether the node is a leaf_t (0), or a branch
+ * testing the more-important nibble (1) or the less-important one (2).
+ * - It stores the index of the byte that the node tests. The combined
+ * value (index*4 + flags) increases in branch nodes as you go deeper
+ * into the trie. All the keys below a branch are identical up to the
+ * nibble identified by the branch. Indices have to be stored because
+ * we skip any branch nodes that would have a single child.
+ * (Consequently, the skipped parts of key have to be validated in a leaf.)
+ * - The bitmap indicates which subtries are present. The present child nodes
+ * are stored in the twigs array (with no holes between them).
+ * - To simplify storing keys that are prefixes of each other, the end-of-string
+ * position is treated as another nibble value, ordered before all others.
+ * That affects the bitmap and twigs fields.
+ *
+ * \note The branch nodes are never allocated individually, but they are
+ * always part of either the root node or the twigs array of the parent.
+ */
+typedef struct {
+ #if FLAGS_HACK
+ uint32_t flags : 2,
+ bitmap : 17; /*!< The first bitmap bit is for end-of-string child. */
+ #else
+ byte flags;
+ uint32_t bitmap;
+ #endif
+ uint32_t index;
+ node_t *twigs;
+} branch_t;
+
+union node {
+ leaf_t leaf;
+ branch_t branch;
+};
+
+struct trie {
+ node_t root; // undefined when weight == 0, see empty_root()
+ size_t weight;
+ knot_mm_t mm;
+};
+
+/*! \brief Make the root node empty (debug-only). */
+static inline void empty_root(node_t *root) {
+#ifndef NDEBUG
+ *root = (node_t){ .branch = {
+ .flags = 3, // invalid value that fits
+ .bitmap = 0,
+ .index = -1,
+ .twigs = NULL
+ } };
+#endif
+}
+
+/*! \brief Check that unportable code works OK (debug-only). */
+static void assert_portability(void) {
+#if FLAGS_HACK
+ assert(((union node){ .leaf = {
+ .key = (tkey_t *)(((uint8_t *)NULL) + 1),
+ .val = NULL
+ } }).branch.flags == 1);
+#endif
+}
+
+/*! \brief Propagate error codes. */
+#define ERR_RETURN(x) \
+ do { \
+ int err_code_ = x; \
+ if (unlikely(err_code_ != KNOT_EOK)) \
+ return err_code_; \
+ } while (false)
+
+/*!
+ * \brief Count the number of set bits.
+ *
+ * \TODO This implementation may be relatively slow on some HW.
+ */
+static uint bitmap_weight(bitmap_t w)
+{
+ assert((w & ~((1 << 17) - 1)) == 0); // using the least-important 17 bits
+ return __builtin_popcount(w);
+}
+
+/*! \brief Only keep the lowest bit in the bitmap (least significant -> twigs[0]). */
+static bitmap_t bitmap_lowest_bit(bitmap_t w)
+{
+ assert((w & ~((1 << 17) - 1)) == 0); // using the least-important 17 bits
+ return 1 << __builtin_ctz(w);
+}
+
+/*! \brief Test flags to determine type of this node. */
+static bool isbranch(const node_t *t)
+{
+ uint f = t->branch.flags;
+ assert(f <= 2);
+ return f != 0;
+}
+
+/*! \brief Make a bitmask for testing a branch bitmap. */
+static bitmap_t nibbit(byte k, uint flags)
+{
+ uint shift = (2 - flags) << 2;
+ uint nibble = (k >> shift) & 0xf;
+ return 1 << (nibble + 1/*because of prefix keys*/);
+}
+
+/*! \brief Extract a nibble from a key and turn it into a bitmask. */
+static bitmap_t twigbit(const node_t *t, const char *key, uint32_t len)
+{
+ assert(isbranch(t));
+ uint i = t->branch.index;
+
+ if (i >= len)
+ return 1 << 0; // leaf position
+
+ return nibbit((byte)key[i], t->branch.flags);
+}
+
+/*! \brief Test if a branch node has a child indicated by a bitmask. */
+static bool hastwig(const node_t *t, bitmap_t bit)
+{
+ assert(isbranch(t));
+ return t->branch.bitmap & bit;
+}
+
+/*! \brief Compute offset of an existing child in a branch node. */
+static uint twigoff(const node_t *t, bitmap_t b)
+{
+ assert(isbranch(t));
+ return bitmap_weight(t->branch.bitmap & (b - 1));
+}
+
+/*! \brief Get pointer to a particular child of a branch node. */
+static node_t* twig(node_t *t, uint i)
+{
+ assert(isbranch(t));
+ return &t->branch.twigs[i];
+}
+
+/*!
+ * \brief For a branch nod, compute offset of a child and child count.
+ *
+ * Having this separate might be meaningful for performance optimization.
+ */
+#define TWIGOFFMAX(off, max, t, b) do { \
+ off = twigoff(t, b); \
+ max = bitmap_weight(t->branch.bitmap); \
+ } while(0)
+
+/*! \brief Simple string comparator. */
+static int key_cmp(const char *k1, uint32_t k1_len, const char *k2, uint32_t k2_len)
+{
+ int ret = memcmp(k1, k2, MIN(k1_len, k2_len));
+ if (ret != 0) {
+ return ret;
+ }
+
+ /* Key string is equal, compare lengths. */
+ if (k1_len == k2_len) {
+ return 0;
+ } else if (k1_len < k2_len) {
+ return -1;
+ } else {
+ return 1;
+ }
+}
+
+trie_t* trie_create(knot_mm_t *mm)
+{
+ assert_portability();
+ trie_t *trie = mm_alloc(mm, sizeof(trie_t));
+ if (trie != NULL) {
+ empty_root(&trie->root);
+ trie->weight = 0;
+ if (mm != NULL)
+ trie->mm = *mm;
+ else
+ mm_ctx_init(&trie->mm);
+ }
+ return trie;
+}
+
+/*! \brief Free anything under the trie node, except for the passed pointer itself. */
+static void clear_trie(node_t *trie, knot_mm_t *mm)
+{
+ if (!isbranch(trie)) {
+ mm_free(mm, trie->leaf.key);
+ } else {
+ branch_t *b = &trie->branch;
+ int len = bitmap_weight(b->bitmap);
+ for (int i = 0; i < len; ++i)
+ clear_trie(b->twigs + i, mm);
+ mm_free(mm, b->twigs);
+ }
+}
+
+void trie_free(trie_t *tbl)
+{
+ if (tbl == NULL)
+ return;
+ if (tbl->weight)
+ clear_trie(&tbl->root, &tbl->mm);
+ mm_free(&tbl->mm, tbl);
+}
+
+void trie_clear(trie_t *tbl)
+{
+ assert(tbl);
+ if (!tbl->weight)
+ return;
+ clear_trie(&tbl->root, &tbl->mm);
+ empty_root(&tbl->root);
+ tbl->weight = 0;
+}
+
+size_t trie_weight(const trie_t *tbl)
+{
+ assert(tbl);
+ return tbl->weight;
+}
+
+struct found {
+ leaf_t *l; /**< the found leaf (NULL if not found) */
+ branch_t *p; /**< the leaf's parent (if exists) */
+ bitmap_t b; /**< bit-mask with a single bit marking l under p */
+};
+/** Search trie for an item with the given key (equality only). */
+static struct found find_equal(trie_t *tbl, const char *key, uint32_t len)
+{
+ assert(tbl);
+ struct found ret0;
+ memset(&ret0, 0, sizeof(ret0));
+ if (!tbl->weight)
+ return ret0;
+ /* Current node and parent while descending (returned values basically). */
+ node_t *t = &tbl->root;
+ branch_t *p = NULL;
+ bitmap_t b = 0;
+ while (isbranch(t)) {
+ __builtin_prefetch(t->branch.twigs);
+ b = twigbit(t, key, len);
+ if (!hastwig(t, b))
+ return ret0;
+ p = &t->branch;
+ t = twig(t, twigoff(t, b));
+ }
+ if (key_cmp(key, len, t->leaf.key->chars, t->leaf.key->len) != 0)
+ return ret0;
+ return (struct found) {
+ .l = &t->leaf,
+ .p = p,
+ .b = b,
+ };
+}
+/** Find item with the first key (lexicographical order). */
+static struct found find_first(trie_t *tbl)
+{
+ assert(tbl);
+ if (!tbl->weight) {
+ struct found ret0;
+ memset(&ret0, 0, sizeof(ret0));
+ return ret0;
+ }
+ /* Current node and parent while descending (returned values basically). */
+ node_t *t = &tbl->root;
+ branch_t *p = NULL;
+ while (isbranch(t)) {
+ p = &t->branch;
+ t = &p->twigs[0];
+ }
+ return (struct found) {
+ .l = &t->leaf,
+ .p = p,
+ .b = p ? bitmap_lowest_bit(p->bitmap) : 0,
+ };
+}
+
+trie_val_t* trie_get_try(trie_t *tbl, const char *key, uint32_t len)
+{
+ struct found found = find_equal(tbl, key, len);
+ return found.l ? &found.l->val : NULL;
+}
+
+trie_val_t* trie_get_first(trie_t *tbl, char **key, uint32_t *len)
+{
+ struct found found = find_first(tbl);
+ if (!found.l)
+ return NULL;
+ if (key)
+ *key = found.l->key->chars;
+ if (len)
+ *len = found.l->key->len;
+ return &found.l->val;
+}
+
+/** Delete the found element (if any) and return value (unless NULL is passed) */
+static int del_found(trie_t *tbl, struct found found, trie_val_t *val)
+{
+ if (!found.l)
+ return KNOT_ENOENT;
+ mm_free(&tbl->mm, found.l->key);
+ if (val != NULL)
+ *val = found.l->val; // we return trie_val_t directly when deleting
+ --tbl->weight;
+ branch_t * const p = found.p; // short-hand
+ if (unlikely(!p)) { // whole trie was a single leaf
+ assert(tbl->weight == 0);
+ empty_root(&tbl->root);
+ return KNOT_EOK;
+ }
+ // remove leaf t as child of p; get child index via pointer arithmetic
+ int ci = ((union node *)found.l) - p->twigs,
+ cc = bitmap_weight(p->bitmap); // child count
+ assert(ci >= 0 && ci < cc);
+
+ if (cc == 2) { // collapse binary node p: move the other child to this node
+ node_t *twigs = p->twigs;
+ (*(union node *)p) = twigs[1 - ci]; // it might be a leaf or branch
+ mm_free(&tbl->mm, twigs);
+ return KNOT_EOK;
+ }
+ memmove(p->twigs + ci, p->twigs + ci + 1, sizeof(node_t) * (cc - ci - 1));
+ p->bitmap &= ~found.b;
+ node_t *twigs = mm_realloc(&tbl->mm, p->twigs, sizeof(node_t) * (cc - 1),
+ sizeof(node_t) * cc);
+ if (likely(twigs != NULL))
+ p->twigs = twigs;
+ /* We can ignore mm_realloc failure, only beware that next time
+ * the prev_size passed to it wouldn't be correct; TODO? */
+ return KNOT_EOK;
+}
+
+int trie_del(trie_t *tbl, const char *key, uint32_t len, trie_val_t *val)
+{
+ struct found found = find_equal(tbl, key, len);
+ return del_found(tbl, found, val);
+}
+
+int trie_del_first(trie_t *tbl, char *key, uint32_t *len, trie_val_t *val)
+{
+ struct found found = find_first(tbl);
+ if (!found.l)
+ return KNOT_ENOENT;
+ if (key) {
+ if (!len)
+ return KNOT_EINVAL;
+ if (*len < found.l->key->len)
+ return kr_error(ENOSPC);
+ memcpy(key, found.l->key->chars, found.l->key->len);
+ }
+ if (len) { // makes sense even with key == NULL
+ *len = found.l->key->len;
+ }
+ return del_found(tbl, found, val);
+}
+
+/*!
+ * \brief Stack of nodes, storing a path down a trie.
+ *
+ * The structure also serves directly as the public trie_it_t type,
+ * in which case it always points to the current leaf, unless we've finished
+ * (i.e. it->len == 0).
+ */
+typedef struct trie_it {
+ node_t* *stack; /*!< The stack; malloc is used directly instead of mm. */
+ uint32_t len; /*!< Current length of the stack. */
+ uint32_t alen; /*!< Allocated/available length of the stack. */
+ /*! \brief Initial storage for \a stack; it should fit in many use cases. */
+ node_t* stack_init[60];
+} nstack_t;
+
+/*! \brief Create a node stack containing just the root (or empty). */
+static void ns_init(nstack_t *ns, trie_t *tbl)
+{
+ assert(tbl);
+ ns->stack = ns->stack_init;
+ ns->alen = sizeof(ns->stack_init) / sizeof(ns->stack_init[0]);
+ if (tbl->weight) {
+ ns->len = 1;
+ ns->stack[0] = &tbl->root;
+ } else {
+ ns->len = 0;
+ }
+}
+
+/*! \brief Free inside of the stack, i.e. not the passed pointer itself. */
+static void ns_cleanup(nstack_t *ns)
+{
+ assert(ns && ns->stack);
+ if (likely(ns->stack == ns->stack_init))
+ return;
+ free(ns->stack);
+ #ifndef NDEBUG
+ ns->stack = NULL;
+ ns->alen = 0;
+ #endif
+}
+
+/*! \brief Allocate more space for the stack. */
+static int ns_longer_alloc(nstack_t *ns)
+{
+ ns->alen *= 2;
+ size_t new_size = sizeof(nstack_t) + ns->alen * sizeof(node_t *);
+ node_t **st;
+ if (ns->stack == ns->stack_init) {
+ st = malloc(new_size);
+ if (st != NULL)
+ memcpy(st, ns->stack, ns->len * sizeof(node_t *));
+ } else {
+ st = realloc(ns->stack, new_size);
+ }
+ if (st == NULL)
+ return KNOT_ENOMEM;
+ ns->stack = st;
+ return KNOT_EOK;
+}
+
+/*! \brief Ensure the node stack can be extended by one. */
+static inline int ns_longer(nstack_t *ns)
+{
+ // get a longer stack if needed
+ if (likely(ns->len < ns->alen))
+ return KNOT_EOK;
+ return ns_longer_alloc(ns); // hand-split the part suitable for inlining
+}
+
+/*!
+ * \brief Find the "branching point" as if searching for a key.
+ *
+ * The whole path to the point is kept on the passed stack;
+ * always at least the root will remain on the top of it.
+ * Beware: the precise semantics of this function is rather tricky.
+ * The top of the stack will contain: the corresponding leaf if exact match is found;
+ * or the immediate node below a branching-point-on-edge or the branching-point itself.
+ *
+ * \param info Set position of the point of first mismatch (in index and flags).
+ * \param first Set the value of the first non-matching character (from trie),
+ * optionally; end-of-string character has value -256 (that's why it's int).
+ * Note: the character is converted to *unsigned* char (i.e. 0..255),
+ * as that's the ordering used in the trie.
+ *
+ * \return KNOT_EOK or KNOT_ENOMEM.
+ */
+static int ns_find_branch(nstack_t *ns, const char *key, uint32_t len,
+ branch_t *info, int *first)
+{
+ assert(ns && ns->len && info);
+ // First find some leaf with longest matching prefix.
+ while (isbranch(ns->stack[ns->len - 1])) {
+ ERR_RETURN(ns_longer(ns));
+ node_t *t = ns->stack[ns->len - 1];
+ __builtin_prefetch(t->branch.twigs);
+ bitmap_t b = twigbit(t, key, len);
+ // Even if our key is missing from this branch we need to
+ // keep iterating down to a leaf. It doesn't matter which
+ // twig we choose since the keys are all the same up to this
+ // index. Note that blindly using twigoff(t, b) can cause
+ // an out-of-bounds index if it equals twigmax(t).
+ uint i = hastwig(t, b) ? twigoff(t, b) : 0;
+ ns->stack[ns->len++] = twig(t, i);
+ }
+ tkey_t *lkey = ns->stack[ns->len-1]->leaf.key;
+ // Find index of the first char that differs.
+ uint32_t index = 0;
+ while (index < MIN(len,lkey->len)) {
+ if (key[index] != lkey->chars[index])
+ break;
+ else
+ ++index;
+ }
+ info->index = index;
+ if (first)
+ *first = lkey->len > index ? (unsigned char)lkey->chars[index] : -256;
+ // Find flags: which half-byte has matched.
+ uint flags;
+ if (index == len && len == lkey->len) { // found equivalent key
+ info->flags = flags = 0;
+ goto success;
+ }
+ if (likely(index < MIN(len,lkey->len))) {
+ byte k2 = (byte)lkey->chars[index];
+ byte k1 = (byte)key[index];
+ flags = ((k1 ^ k2) & 0xf0) ? 1 : 2;
+ } else { // one is prefix of another
+ flags = 1;
+ }
+ info->flags = flags;
+ // now go up the trie from the current leaf
+ branch_t *t;
+ do {
+ if (unlikely(ns->len == 1))
+ goto success; // only the root stays on the stack
+ t = (branch_t*)ns->stack[ns->len - 2];
+ if (t->index < index || (t->index == index && t->flags < flags))
+ goto success;
+ --ns->len;
+ } while (true);
+success:
+ #ifndef NDEBUG // invariants on successful return
+ assert(ns->len);
+ if (isbranch(ns->stack[ns->len - 1])) {
+ t = &ns->stack[ns->len - 1]->branch;
+ assert(t->index > index || (t->index == index && t->flags >= flags));
+ }
+ if (ns->len > 1) {
+ t = &ns->stack[ns->len - 2]->branch;
+ assert(t->index < index || (t->index == index
+ && (t->flags < flags || (t->flags == 1 && flags == 0))));
+ }
+ #endif
+ return KNOT_EOK;
+}
+
+/*!
+ * \brief Advance the node stack to the last leaf in the subtree.
+ *
+ * \return KNOT_EOK or KNOT_ENOMEM.
+ */
+static int ns_last_leaf(nstack_t *ns)
+{
+ assert(ns);
+ do {
+ ERR_RETURN(ns_longer(ns));
+ node_t *t = ns->stack[ns->len - 1];
+ if (!isbranch(t))
+ return KNOT_EOK;
+ int lasti = bitmap_weight(t->branch.bitmap) - 1;
+ assert(lasti >= 0);
+ ns->stack[ns->len++] = twig(t, lasti);
+ } while (true);
+}
+
+/*!
+ * \brief Advance the node stack to the first leaf in the subtree.
+ *
+ * \return KNOT_EOK or KNOT_ENOMEM.
+ */
+static int ns_first_leaf(nstack_t *ns)
+{
+ assert(ns && ns->len);
+ do {
+ ERR_RETURN(ns_longer(ns));
+ node_t *t = ns->stack[ns->len - 1];
+ if (!isbranch(t))
+ return KNOT_EOK;
+ ns->stack[ns->len++] = twig(t, 0);
+ } while (true);
+}
+
+/*!
+ * \brief Advance the node stack to the leaf that is previous to the current node.
+ *
+ * \note Prefix leaf under the current node DOES count (if present; perhaps questionable).
+ * \return KNOT_EOK on success, KNOT_ENOENT on not-found, or possibly KNOT_ENOMEM.
+ */
+static int ns_prev_leaf(nstack_t *ns)
+{
+ assert(ns && ns->len > 0);
+
+ node_t *t = ns->stack[ns->len - 1];
+ if (hastwig(t, 1 << 0)) { // the prefix leaf
+ t = twig(t, 0);
+ ERR_RETURN(ns_longer(ns));
+ ns->stack[ns->len++] = t;
+ return KNOT_EOK;
+ }
+
+ do {
+ if (ns->len < 2)
+ return KNOT_ENOENT; // root without empty key has no previous leaf
+ t = ns->stack[ns->len - 1];
+ node_t *p = ns->stack[ns->len - 2];
+ int pindex = t - p->branch.twigs; // index in parent via pointer arithmetic
+ assert(pindex >= 0 && pindex <= 16);
+ if (pindex > 0) { // t isn't the first child -> go down the previous one
+ ns->stack[ns->len - 1] = twig(p, pindex - 1);
+ return ns_last_leaf(ns);
+ }
+ // we've got to go up again
+ --ns->len;
+ } while (true);
+}
+
+/*!
+ * \brief Advance the node stack to the leaf that is successor to the current node.
+ *
+ * \note Prefix leaf or anything else under the current node DOES count.
+ * \return KNOT_EOK on success, KNOT_ENOENT on not-found, or possibly KNOT_ENOMEM.
+ */
+static int ns_next_leaf(nstack_t *ns)
+{
+ assert(ns && ns->len > 0);
+
+ node_t *t = ns->stack[ns->len - 1];
+ if (isbranch(t))
+ return ns_first_leaf(ns);
+ do {
+ if (ns->len < 2)
+ return KNOT_ENOENT; // not found, as no more parent is available
+ t = ns->stack[ns->len - 1];
+ node_t *p = ns->stack[ns->len - 2];
+ int pindex = t - p->branch.twigs; // index in parent via pointer arithmetic
+ assert(pindex >= 0 && pindex <= 16);
+ int pcount = bitmap_weight(p->branch.bitmap);
+ if (pindex + 1 < pcount) { // t isn't the last child -> go down the next one
+ ns->stack[ns->len - 1] = twig(p, pindex + 1);
+ return ns_first_leaf(ns);
+ }
+ // we've got to go up again
+ --ns->len;
+ } while (true);
+}
+
+int trie_get_leq(trie_t *tbl, const char *key, uint32_t len, trie_val_t **val)
+{
+ assert(tbl && val);
+ *val = NULL; // so on failure we can just return;
+ if (tbl->weight == 0)
+ return KNOT_ENOENT;
+ { // Intentionally un-indented; until end of function, to bound cleanup attr.
+ // First find a key with longest-matching prefix
+ __attribute__((cleanup(ns_cleanup)))
+ nstack_t ns_local;
+ ns_init(&ns_local, tbl);
+ nstack_t *ns = &ns_local;
+ branch_t bp;
+ int un_leaf; // first unmatched character in the leaf
+ ERR_RETURN(ns_find_branch(ns, key, len, &bp, &un_leaf));
+ int un_key = bp.index < len ? (unsigned char)key[bp.index] : -256;
+ node_t *t = ns->stack[ns->len - 1];
+ if (bp.flags == 0) { // found exact match
+ *val = &t->leaf.val;
+ return KNOT_EOK;
+ }
+ // Get t: the last node on matching path
+ if (isbranch(t) && t->branch.index == bp.index && t->branch.flags == bp.flags) {
+ // t is OK
+ } else {
+ // the top of the stack was the first unmatched node -> step up
+ if (ns->len == 1) {
+ // root was unmatched already
+ if (un_key < un_leaf)
+ return KNOT_ENOENT;
+ ERR_RETURN(ns_last_leaf(ns));
+ goto success;
+ }
+ --ns->len;
+ t = ns->stack[ns->len - 1];
+ }
+ // Now we re-do the first "non-matching" step in the trie
+ // but try the previous child if key was less (it may not exist)
+ bitmap_t b = twigbit(t, key, len);
+ int i = hastwig(t, b)
+ ? twigoff(t, b) - (un_key < un_leaf)
+ : twigoff(t, b) - 1 /*twigoff returns successor when !hastwig*/;
+ if (i >= 0) {
+ ERR_RETURN(ns_longer(ns));
+ ns->stack[ns->len++] = twig(t, i);
+ ERR_RETURN(ns_last_leaf(ns));
+ } else {
+ ERR_RETURN(ns_prev_leaf(ns));
+ }
+success:
+ assert(!isbranch(ns->stack[ns->len - 1]));
+ *val = &ns->stack[ns->len - 1]->leaf.val;
+ return 1;
+ }
+}
+
+/*! \brief Initialize a new leaf, copying the key, and returning failure code. */
+static int mk_leaf(node_t *leaf, const char *key, uint32_t len, knot_mm_t *mm)
+{
+ tkey_t *k = mm_alloc(mm, sizeof(tkey_t) + len);
+ #if FLAGS_HACK
+ assert(((uintptr_t)k) % 4 == 0); // we need an aligned pointer
+ #endif
+ if (unlikely(!k))
+ return KNOT_ENOMEM;
+ k->len = len;
+ memcpy(k->chars, key, len);
+ leaf->leaf = (leaf_t){
+ #if !FLAGS_HACK
+ .flags = 0,
+ #endif
+ .val = NULL,
+ .key = k
+ };
+ return KNOT_EOK;
+}
+
+trie_val_t* trie_get_ins(trie_t *tbl, const char *key, uint32_t len)
+{
+ assert(tbl);
+ // First leaf in an empty tbl?
+ if (unlikely(!tbl->weight)) {
+ if (unlikely(mk_leaf(&tbl->root, key, len, &tbl->mm)))
+ return NULL;
+ ++tbl->weight;
+ return &tbl->root.leaf.val;
+ }
+ { // Intentionally un-indented; until end of function, to bound cleanup attr.
+ // Find the branching-point
+ __attribute__((cleanup(ns_cleanup)))
+ nstack_t ns_local;
+ ns_init(&ns_local, tbl);
+ nstack_t *ns = &ns_local;
+ branch_t bp; // branch-point: index and flags signifying the longest common prefix
+ int k2; // the first unmatched character in the leaf
+ if (unlikely(ns_find_branch(ns, key, len, &bp, &k2)))
+ return NULL;
+ node_t *t = ns->stack[ns->len - 1];
+ if (bp.flags == 0) // the same key was already present
+ return &t->leaf.val;
+ node_t leaf;
+ if (unlikely(mk_leaf(&leaf, key, len, &tbl->mm)))
+ return NULL;
+
+ if (isbranch(t) && bp.index == t->branch.index && bp.flags == t->branch.flags) {
+ // The node t needs a new leaf child.
+ bitmap_t b1 = twigbit(t, key, len);
+ assert(!hastwig(t, b1));
+ uint s, m; TWIGOFFMAX(s, m, t, b1); // new child position and original child count
+ node_t *twigs = mm_realloc(&tbl->mm, t->branch.twigs,
+ sizeof(node_t) * (m + 1), sizeof(node_t) * m);
+ if (unlikely(!twigs))
+ goto err_leaf;
+ memmove(twigs + s + 1, twigs + s, sizeof(node_t) * (m - s));
+ twigs[s] = leaf;
+ t->branch.twigs = twigs;
+ t->branch.bitmap |= b1;
+ ++tbl->weight;
+ return &twigs[s].leaf.val;
+ } else {
+ // We need to insert a new binary branch with leaf at *t.
+ // Note: it works the same for the case where we insert above root t.
+ #ifndef NDEBUG
+ if (ns->len > 1) {
+ node_t *pt = ns->stack[ns->len - 2];
+ assert(hastwig(pt, twigbit(pt, key, len)));
+ }
+ #endif
+ node_t *twigs = mm_alloc(&tbl->mm, sizeof(node_t) * 2);
+ if (unlikely(!twigs))
+ goto err_leaf;
+ node_t t2 = *t; // Save before overwriting t.
+ t->branch.flags = bp.flags;
+ t->branch.index = bp.index;
+ t->branch.twigs = twigs;
+ bitmap_t b1 = twigbit(t, key, len);
+ bitmap_t b2 = unlikely(k2 == -256) ? (1 << 0) : nibbit(k2, bp.flags);
+ t->branch.bitmap = b1 | b2;
+ *twig(t, twigoff(t, b1)) = leaf;
+ *twig(t, twigoff(t, b2)) = t2;
+ ++tbl->weight;
+ return &twig(t, twigoff(t, b1))->leaf.val;
+ };
+err_leaf:
+ mm_free(&tbl->mm, leaf.leaf.key);
+ return NULL;
+ }
+}
+
+/*! \brief Apply a function to every trie_val_t*, in order; a recursive solution. */
+static int apply_trie(node_t *t, int (*f)(trie_val_t *, void *), void *d)
+{
+ assert(t);
+ if (!isbranch(t))
+ return f(&t->leaf.val, d);
+ int child_count = bitmap_weight(t->branch.bitmap);
+ for (int i = 0; i < child_count; ++i)
+ ERR_RETURN(apply_trie(twig(t, i), f, d));
+ return KNOT_EOK;
+}
+
+int trie_apply(trie_t *tbl, int (*f)(trie_val_t *, void *), void *d)
+{
+ assert(tbl && f);
+ if (!tbl->weight)
+ return KNOT_EOK;
+ return apply_trie(&tbl->root, f, d);
+}
+
+/* These are all thin wrappers around static Tns* functions. */
+trie_it_t* trie_it_begin(trie_t *tbl)
+{
+ assert(tbl);
+ trie_it_t *it = malloc(sizeof(nstack_t));
+ if (!it)
+ return NULL;
+ ns_init(it, tbl);
+ if (it->len == 0) // empty tbl
+ return it;
+ if (ns_first_leaf(it)) {
+ ns_cleanup(it);
+ free(it);
+ return NULL;
+ }
+ return it;
+}
+
+void trie_it_next(trie_it_t *it)
+{
+ assert(it && it->len);
+ if (ns_next_leaf(it) != KNOT_EOK)
+ it->len = 0;
+}
+
+bool trie_it_finished(trie_it_t *it)
+{
+ assert(it);
+ return it->len == 0;
+}
+
+void trie_it_free(trie_it_t *it)
+{
+ if (!it)
+ return;
+ ns_cleanup(it);
+ free(it);
+}
+
+const char* trie_it_key(trie_it_t *it, size_t *len)
+{
+ assert(it && it->len);
+ node_t *t = it->stack[it->len - 1];
+ assert(!isbranch(t));
+ tkey_t *key = t->leaf.key;
+ if (len)
+ *len = key->len;
+ return key->chars;
+}
+
+trie_val_t* trie_it_val(trie_it_t *it)
+{
+ assert(it && it->len);
+ node_t *t = it->stack[it->len - 1];
+ assert(!isbranch(t));
+ return &t->leaf.val;
+}
diff --git a/lib/generic/trie.h b/lib/generic/trie.h
new file mode 100644
index 0000000..0550e95
--- /dev/null
+++ b/lib/generic/trie.h
@@ -0,0 +1,150 @@
+/* Copyright (C) 2017-2018 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include <libknot/mm_ctx.h>
+#include "lib/defines.h"
+
+/*!
+ * \brief Native API of QP-tries:
+ *
+ * - keys are char strings, not necessarily zero-terminated,
+ * the structure copies the contents of the passed keys
+ * - values are void* pointers, typically you get an ephemeral pointer to it
+ * - key lengths are limited by 2^32-1 ATM
+ *
+ * XXX EDITORS: trie.{h,c} are synced from
+ * https://gitlab.labs.nic.cz/knot/knot-dns/tree/68352fc969/src/contrib/qp-trie
+ * only with tiny adjustments, mostly #includes and KR_EXPORT.
+ */
+
+/*! \brief Element value. */
+typedef void* trie_val_t;
+
+/*! \brief Opaque structure holding a QP-trie. */
+typedef struct trie trie_t;
+
+/*! \brief Opaque type for holding a QP-trie iterator. */
+typedef struct trie_it trie_it_t;
+
+/*! \brief Create a trie instance. Pass NULL to use malloc+free. */
+KR_EXPORT
+trie_t* trie_create(knot_mm_t *mm);
+
+/*! \brief Free a trie instance. */
+KR_EXPORT
+void trie_free(trie_t *tbl);
+
+/*! \brief Clear a trie instance (make it empty). */
+KR_EXPORT
+void trie_clear(trie_t *tbl);
+
+/*! \brief Return the number of keys in the trie. */
+KR_EXPORT
+size_t trie_weight(const trie_t *tbl);
+
+/*! \brief Search the trie, returning NULL on failure. */
+KR_EXPORT
+trie_val_t* trie_get_try(trie_t *tbl, const char *key, uint32_t len);
+
+/*!
+ * \brief Return pointer to the minimum. Optionally with key and its length. */
+KR_EXPORT
+trie_val_t* trie_get_first(trie_t *tbl, char **key, uint32_t *len);
+
+/*! \brief Search the trie, inserting NULL trie_val_t on failure. */
+KR_EXPORT
+trie_val_t* trie_get_ins(trie_t *tbl, const char *key, uint32_t len);
+
+/*!
+ * \brief Search for less-or-equal element.
+ *
+ * \param tbl Trie.
+ * \param key Searched key.
+ * \param len Key length.
+ * \param val Must be valid; it will be set to NULL if not found or errored.
+ * \return KNOT_EOK for exact match, 1 for previous, KNOT_ENOENT for not-found,
+ * or KNOT_E*.
+ */
+KR_EXPORT
+int trie_get_leq(trie_t *tbl, const char *key, uint32_t len, trie_val_t **val);
+
+/*!
+ * \brief Apply a function to every trie_val_t, in order.
+ *
+ * \param d Parameter passed as the second argument to f().
+ * \return First nonzero from f() or zero (i.e. KNOT_EOK).
+ */
+int trie_apply(trie_t *tbl, int (*f)(trie_val_t *, void *), void *d);
+
+/*!
+ * \brief Remove an item, returning KNOT_EOK if succeeded or KNOT_ENOENT if not found.
+ *
+ * If val!=NULL and deletion succeeded, the deleted value is set.
+ */
+KR_EXPORT
+int trie_del(trie_t *tbl, const char *key, uint32_t len, trie_val_t *val);
+
+/*!
+ * \brief Remove the first item, returning KNOT_EOK on success.
+ *
+ * You may optionally get the key and/or value.
+ * The key is copied, so you need to pass sufficient len,
+ * otherwise kr_error(ENOSPC) is returned.
+ */
+KR_EXPORT
+int trie_del_first(trie_t *tbl, char *key, uint32_t *len, trie_val_t *val);
+
+/*! \brief Create a new iterator pointing to the first element (if any). */
+KR_EXPORT
+trie_it_t* trie_it_begin(trie_t *tbl);
+
+/*!
+ * \brief Advance the iterator to the next element.
+ *
+ * Iteration is in ascending lexicographical order.
+ * In particular, the empty string would be considered as the very first.
+ *
+ * \note You may not use this function if the trie's key-set has been modified
+ * during the lifetime of the iterator (modifying values only is OK).
+ */
+KR_EXPORT
+void trie_it_next(trie_it_t *it);
+
+/*! \brief Test if the iterator has gone past the last element. */
+KR_EXPORT
+bool trie_it_finished(trie_it_t *it);
+
+/*! \brief Free any resources of the iterator. It's OK to call it on NULL. */
+KR_EXPORT
+void trie_it_free(trie_it_t *it);
+
+/*!
+ * \brief Return pointer to the key of the current element.
+ *
+ * \note The optional len is uint32_t internally but size_t is better for our usage,
+ * as it is without an additional type conversion.
+ */
+KR_EXPORT
+const char* trie_it_key(trie_it_t *it, size_t *len);
+
+/*! \brief Return pointer to the value of the current element (writable). */
+KR_EXPORT
+trie_val_t* trie_it_val(trie_it_t *it);
diff --git a/lib/layer.h b/lib/layer.h
new file mode 100644
index 0000000..0909cb7
--- /dev/null
+++ b/lib/layer.h
@@ -0,0 +1,107 @@
+/* Copyright (C) 2014-2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "lib/defines.h"
+#include "lib/utils.h"
+
+#ifdef NOVERBOSELOG
+ #define QRVERBOSE(query, cls, ...)
+#else
+ /** Print a debug message related to resolution.
+ * \param _query associated kr_query, may be NULL
+ * \param _cls identifying string, typically of length exactly four (padded)
+ * \param ... printf-compatible list of parameters
+ */
+ #define QRVERBOSE(_query, _cls, ...) do { \
+ const struct kr_query *_qry = (_query); \
+ if (kr_log_trace_enabled(_qry)) { \
+ kr_log_trace(_qry, (_cls), __VA_ARGS__); \
+ } else if (VERBOSE_STATUS) { \
+ kr_log_qverbose_impl(_qry, (_cls), __VA_ARGS__); \
+ } \
+ } while (false)
+#endif
+
+/** Layer processing states. Only one value at a time (but see TODO).
+ *
+ * Each state represents the state machine transition,
+ * and determines readiness for the next action.
+ * See struct kr_layer_api for the actions.
+ *
+ * TODO: the cookie module sometimes sets (_FAIL | _DONE) on purpose (!)
+ */
+enum kr_layer_state {
+ KR_STATE_CONSUME = 1 << 0, /*!< Consume data. */
+ KR_STATE_PRODUCE = 1 << 1, /*!< Produce data. */
+ KR_STATE_DONE = 1 << 2, /*!< Finished successfully. */
+ KR_STATE_FAIL = 1 << 3, /*!< Error. */
+ KR_STATE_YIELD = 1 << 4, /*!< Paused, waiting for a sub-query. */
+};
+
+/* Forward declarations. */
+struct kr_layer_api;
+
+/** Packet processing context. */
+typedef struct kr_layer {
+ int state; /*!< The current state; bitmap of enum kr_layer_state. */
+ struct kr_request *req; /*!< The corresponding request. */
+ const struct kr_layer_api *api;
+} kr_layer_t;
+
+/** Packet processing module API. All functions return the new kr_layer_state. */
+struct kr_layer_api {
+ /** Start of processing the DNS request. */
+ int (*begin)(kr_layer_t *ctx);
+
+ int (*reset)(kr_layer_t *ctx);
+
+ /** Paired to begin, called both on successes and failures. */
+ int (*finish)(kr_layer_t *ctx);
+
+ /** Processing an answer from upstream or the answer to the request. */
+ int (*consume)(kr_layer_t *ctx, knot_pkt_t *pkt);
+
+ /** Produce either an answer to the request or a query for upstream (or fail). */
+ int (*produce)(kr_layer_t *ctx, knot_pkt_t *pkt);
+
+ /** Finalises the outbound query packet with the knowledge of the IP addresses.
+ * The checkout layer doesn't persist the state, so canceled subrequests
+ * don't affect the resolution or rest of the processing. */
+ int (*checkout)(kr_layer_t *ctx, knot_pkt_t *packet, struct sockaddr *dst, int type);
+
+ /** Finalises the answer.
+ * Last chance to affect what will get into the answer, including EDNS.*/
+ int (*answer_finalize)(kr_layer_t *ctx);
+
+ /** The module can store anything in here. */
+ void *data;
+
+ /** Internal to ./daemon/ffimodule.c. */
+ int cb_slots[];
+};
+
+typedef struct kr_layer_api kr_layer_api_t;
+
+/** Pickled layer state (api, input, state). */
+struct kr_layer_pickle {
+ struct kr_layer_pickle *next;
+ const struct kr_layer_api *api;
+ knot_pkt_t *pkt;
+ unsigned state;
+};
+
diff --git a/lib/layer/cache.c b/lib/layer/cache.c
new file mode 100644
index 0000000..c7bbc1a
--- /dev/null
+++ b/lib/layer/cache.c
@@ -0,0 +1,31 @@
+/* Copyright (C) 2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include "lib/module.h"
+#include "lib/cache/api.h"
+
+/** Module implementation. */
+const kr_layer_api_t *cache_layer(struct kr_module *module)
+{
+ static const kr_layer_api_t _layer = {
+ .produce = &cache_peek,
+ .consume = &cache_stash,
+ };
+
+ return &_layer;
+}
+
+KR_MODULE_EXPORT(cache)
diff --git a/lib/layer/iterate.c b/lib/layer/iterate.c
new file mode 100644
index 0000000..cf57cc5
--- /dev/null
+++ b/lib/layer/iterate.c
@@ -0,0 +1,1135 @@
+/* Copyright (C) 2014-2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+/** @file iterate.c
+ *
+ * This builtin module is mainly active in the consume phase.
+ * Primary responsibilities:
+ * - Classify the packet as auth/nonauth and change its AA flag accordingly.
+ * - Pick interesting RRs to kr_request::answ_selected and ::auth_selected,
+ * NEW: and classify their rank, except for validation status.
+ * - Update kr_query::zone_cut (in case of referral).
+ * - Interpret CNAMEs.
+ * - Prepare the followup query - either inline or as another kr_query
+ * (CNAME jumps create a new "sibling" query).
+ */
+
+#include <sys/time.h>
+#include <assert.h>
+#include <arpa/inet.h>
+
+#include <contrib/cleanup.h>
+#include <libknot/descriptor.h>
+#include <libknot/rrtype/rdname.h>
+#include <libknot/rrtype/rrsig.h>
+
+#include "lib/layer/iterate.h"
+#include "lib/resolve.h"
+#include "lib/rplan.h"
+#include "lib/defines.h"
+#include "lib/nsrep.h"
+#include "lib/module.h"
+#include "lib/dnssec/ta.h"
+
+#define VERBOSE_MSG(...) QRVERBOSE(req->current_query, "iter", __VA_ARGS__)
+#define QVERBOSE_MSG(qry, ...) QRVERBOSE(qry, "iter", __VA_ARGS__)
+
+/* Iterator often walks through packet section, this is an abstraction. */
+typedef int (*rr_callback_t)(const knot_rrset_t *, unsigned, struct kr_request *);
+
+/** Return minimized QNAME/QTYPE for current zone cut. */
+static const knot_dname_t *minimized_qname(struct kr_query *query, uint16_t *qtype)
+{
+ /* Minimization disabled. */
+ const knot_dname_t *qname = query->sname;
+ if (qname[0] == '\0' || query->flags.NO_MINIMIZE || query->flags.STUB) {
+ return qname;
+ }
+
+ /* Minimize name to contain current zone cut + 1 label. */
+ int cut_labels = knot_dname_labels(query->zone_cut.name, NULL);
+ int qname_labels = knot_dname_labels(qname, NULL);
+ while(qname[0] && qname_labels > cut_labels + 1) {
+ qname = knot_wire_next_label(qname, NULL);
+ qname_labels -= 1;
+ }
+
+ /* Hide QTYPE if minimized. */
+ if (qname != query->sname) {
+ *qtype = KNOT_RRTYPE_NS;
+ }
+
+ return qname;
+}
+
+/** Answer is paired to query. */
+static bool is_paired_to_query(const knot_pkt_t *answer, struct kr_query *query)
+{
+ uint16_t qtype = query->stype;
+ const knot_dname_t *qname = minimized_qname(query, &qtype);
+
+ return query->id == knot_wire_get_id(answer->wire) &&
+ knot_wire_get_qdcount(answer->wire) > 0 &&
+ query->sclass == knot_pkt_qclass(answer) &&
+ qtype == knot_pkt_qtype(answer) &&
+ knot_dname_is_equal(qname, knot_pkt_qname(answer));
+}
+
+/** Relaxed rule for AA, either AA=1 or SOA matching zone cut is required. */
+static bool is_authoritative(const knot_pkt_t *answer, struct kr_query *query)
+{
+ if (knot_wire_get_aa(answer->wire)) {
+ return true;
+ }
+
+ const knot_pktsection_t *ns = knot_pkt_section(answer, KNOT_AUTHORITY);
+ for (unsigned i = 0; i < ns->count; ++i) {
+ const knot_rrset_t *rr = knot_pkt_rr(ns, i);
+ if (rr->type == KNOT_RRTYPE_SOA
+ && knot_dname_in_bailiwick(rr->owner, query->zone_cut.name) >= 0) {
+ return true;
+ }
+ }
+
+#ifndef STRICT_MODE
+ /* Last resort to work around broken auths, if the zone cut is at/parent of the QNAME. */
+ if (knot_dname_is_equal(query->zone_cut.name, knot_pkt_qname(answer))) {
+ return true;
+ }
+#endif
+
+ /* Some authoritative servers are hopelessly broken, allow lame answers in permissive mode. */
+ if (query->flags.PERMISSIVE) {
+ return true;
+ }
+
+ return false;
+}
+
+int kr_response_classify(const knot_pkt_t *pkt)
+{
+ const knot_pktsection_t *an = knot_pkt_section(pkt, KNOT_ANSWER);
+ switch (knot_wire_get_rcode(pkt->wire)) {
+ case KNOT_RCODE_NOERROR:
+ return (an->count == 0) ? PKT_NODATA : PKT_NOERROR;
+ case KNOT_RCODE_NXDOMAIN:
+ return PKT_NXDOMAIN;
+ case KNOT_RCODE_REFUSED:
+ return PKT_REFUSED;
+ default:
+ return PKT_ERROR;
+ }
+}
+
+/** @internal Filter ANY or loopback addresses. */
+static bool is_valid_addr(const uint8_t *addr, size_t len)
+{
+ if (len == sizeof(struct in_addr)) {
+ /* Filter ANY and 127.0.0.0/8 */
+ uint32_t ip_host = ntohl(*(const uint32_t *)(addr));
+ if (ip_host == 0 || (ip_host & 0xff000000) == 0x7f000000) {
+ return false;
+ }
+ } else if (len == sizeof(struct in6_addr)) {
+ struct in6_addr ip6_mask;
+ memset(&ip6_mask, 0, sizeof(ip6_mask));
+ /* All except last byte are zeroed, last byte defines ANY/::1 */
+ if (memcmp(addr, ip6_mask.s6_addr, sizeof(ip6_mask.s6_addr) - 1) == 0) {
+ return (addr[len - 1] > 1);
+ }
+ }
+ return true;
+}
+
+/** @internal Update NS address from record \a rr. Return _FAIL on error. */
+static int update_nsaddr(const knot_rrset_t *rr, struct kr_query *query, int *glue_cnt)
+{
+ if (rr->type == KNOT_RRTYPE_A || rr->type == KNOT_RRTYPE_AAAA) {
+ const knot_rdata_t *rdata = rr->rrs.rdata;
+ const int a_len = rr->type == KNOT_RRTYPE_A
+ ? sizeof(struct in_addr) : sizeof(struct in6_addr);
+ if (a_len != rdata->len) {
+ QVERBOSE_MSG(query, "<= ignoring invalid glue, length %d != %d\n",
+ (int)rdata->len, a_len);
+ return KR_STATE_FAIL;
+ }
+ char name_str[KR_DNAME_STR_MAXLEN];
+ char addr_str[INET6_ADDRSTRLEN];
+ WITH_VERBOSE(query) {
+ const int af = (rr->type == KNOT_RRTYPE_A) ? AF_INET : AF_INET6;
+ knot_dname_to_str(name_str, rr->owner, sizeof(name_str));
+ name_str[sizeof(name_str) - 1] = 0;
+ inet_ntop(af, rdata->data, addr_str, sizeof(addr_str));
+ }
+ if (!(query->flags.ALLOW_LOCAL) &&
+ !is_valid_addr(rdata->data, rdata->len)) {
+ QVERBOSE_MSG(query, "<= ignoring invalid glue for "
+ "'%s': '%s'\n", name_str, addr_str);
+ return KR_STATE_CONSUME; /* Ignore invalid addresses */
+ }
+ int ret = kr_zonecut_add(&query->zone_cut, rr->owner, rdata->data, rdata->len);
+ if (ret != 0) {
+ return KR_STATE_FAIL;
+ }
+
+ ++*glue_cnt; /* reduced verbosity */
+ /* QVERBOSE_MSG(query, "<= using glue for "
+ "'%s': '%s'\n", name_str, addr_str);
+ */
+ }
+ return KR_STATE_CONSUME;
+}
+
+/** @internal From \a pkt, fetch glue records for name \a ns, and update the cut etc.
+ *
+ * \param glue_cnt the number of accepted addresses (to be incremented)
+ */
+static void fetch_glue(knot_pkt_t *pkt, const knot_dname_t *ns, bool in_bailiwick,
+ struct kr_request *req, const struct kr_query *qry, int *glue_cnt)
+{
+ ranked_rr_array_t *selected[] = kr_request_selected(req);
+ for (knot_section_t i = KNOT_ANSWER; i <= KNOT_ADDITIONAL; ++i) {
+ const knot_pktsection_t *sec = knot_pkt_section(pkt, i);
+ for (unsigned k = 0; k < sec->count; ++k) {
+ const knot_rrset_t *rr = knot_pkt_rr(sec, k);
+ if (!knot_dname_is_equal(ns, rr->owner)) {
+ continue;
+ }
+ if ((rr->type != KNOT_RRTYPE_A) &&
+ (rr->type != KNOT_RRTYPE_AAAA)) {
+ continue;
+ }
+
+ uint8_t rank = (in_bailiwick && i == KNOT_ANSWER)
+ ? (KR_RANK_INITIAL | KR_RANK_AUTH) : KR_RANK_OMIT;
+ (void) kr_ranked_rrarray_add(selected[i], rr, rank,
+ false, qry->uid, &req->pool);
+
+ if ((rr->type == KNOT_RRTYPE_A) &&
+ (req->ctx->options.NO_IPV4)) {
+ continue;
+ }
+ if ((rr->type == KNOT_RRTYPE_AAAA) &&
+ (req->ctx->options.NO_IPV6)) {
+ continue;
+ }
+ (void) update_nsaddr(rr, req->current_query, glue_cnt);
+ }
+ }
+}
+
+/** Attempt to find glue for given nameserver name (best effort). */
+static bool has_glue(knot_pkt_t *pkt, const knot_dname_t *ns)
+{
+ for (knot_section_t i = KNOT_ANSWER; i <= KNOT_ADDITIONAL; ++i) {
+ const knot_pktsection_t *sec = knot_pkt_section(pkt, i);
+ for (unsigned k = 0; k < sec->count; ++k) {
+ const knot_rrset_t *rr = knot_pkt_rr(sec, k);
+ if (knot_dname_is_equal(ns, rr->owner) &&
+ (rr->type == KNOT_RRTYPE_A || rr->type == KNOT_RRTYPE_AAAA)) {
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+/** @internal Update the cut with another NS(+glue) record.
+ * @param current_cut is cut name before this packet.
+ * @return _DONE if cut->name changes, _FAIL on error, and _CONSUME otherwise. */
+static int update_cut(knot_pkt_t *pkt, const knot_rrset_t *rr,
+ struct kr_request *req, const knot_dname_t *current_cut,
+ int *glue_cnt)
+{
+ struct kr_query *qry = req->current_query;
+ struct kr_zonecut *cut = &qry->zone_cut;
+ int state = KR_STATE_CONSUME;
+
+ /* New authority MUST be at/below the authority of the current cut;
+ * also qname must be below new authority;
+ * otherwise it's a possible cache injection attempt. */
+ const bool ok = knot_dname_in_bailiwick(rr->owner, current_cut) >= 0
+ && knot_dname_in_bailiwick(qry->sname, rr->owner) >= 0;
+ if (!ok) {
+ VERBOSE_MSG("<= authority: ns outside bailiwick\n");
+#ifdef STRICT_MODE
+ return KR_STATE_FAIL;
+#else
+ /* Workaround: ignore out-of-bailiwick NSs for authoritative answers,
+ * but fail for referrals. This is important to detect lame answers. */
+ if (knot_pkt_section(pkt, KNOT_ANSWER)->count == 0) {
+ state = KR_STATE_FAIL;
+ }
+ return state;
+#endif
+ }
+
+ /* Update zone cut name */
+ if (!knot_dname_is_equal(rr->owner, cut->name)) {
+ /* Remember parent cut and descend to new (keep keys and TA). */
+ struct kr_zonecut *parent = mm_alloc(&req->pool, sizeof(*parent));
+ if (parent) {
+ memcpy(parent, cut, sizeof(*parent));
+ kr_zonecut_init(cut, rr->owner, &req->pool);
+ cut->key = parent->key;
+ cut->trust_anchor = parent->trust_anchor;
+ cut->parent = parent;
+ } else {
+ kr_zonecut_set(cut, rr->owner);
+ }
+ state = KR_STATE_DONE;
+ }
+
+ /* Fetch glue for each NS */
+ knot_rdata_t *rdata_i = rr->rrs.rdata;
+ for (unsigned i = 0; i < rr->rrs.count;
+ ++i, rdata_i = knot_rdataset_next(rdata_i)) {
+ const knot_dname_t *ns_name = knot_ns_name(rdata_i);
+ /* Glue is mandatory for NS below zone */
+ if (knot_dname_in_bailiwick(ns_name, rr->owner) >= 0
+ && !has_glue(pkt, ns_name)) {
+ const char *msg =
+ "<= authority: missing mandatory glue, skipping NS";
+ WITH_VERBOSE(qry) {
+ auto_free char *ns_str = kr_dname_text(ns_name);
+ VERBOSE_MSG("%s %s\n", msg, ns_str);
+ }
+ continue;
+ }
+ int ret = kr_zonecut_add(cut, ns_name, NULL, 0);
+ assert(!ret); (void)ret;
+
+ /* Choose when to use glue records. */
+ const bool in_bailiwick =
+ knot_dname_in_bailiwick(ns_name, current_cut) >= 0;
+ bool do_fetch;
+ if (qry->flags.PERMISSIVE) {
+ do_fetch = true;
+ } else if (qry->flags.STRICT) {
+ /* Strict mode uses only mandatory glue. */
+ do_fetch = knot_dname_in_bailiwick(ns_name, cut->name) >= 0;
+ } else {
+ /* Normal mode uses in-bailiwick glue. */
+ do_fetch = in_bailiwick;
+ }
+ if (do_fetch) {
+ fetch_glue(pkt, ns_name, in_bailiwick, req, qry, glue_cnt);
+ }
+ }
+
+ return state;
+}
+
+/** Compute rank appropriate for RRs present in the packet.
+ * @param answer whether the RR is from answer or authority section
+ * @param is_nonauth: from referral or forwarding (etc.) */
+static uint8_t get_initial_rank(const knot_rrset_t *rr, const struct kr_query *qry,
+ const bool answer, const bool is_nonauth)
+{
+ /* For RRSIGs, ensure the KR_RANK_AUTH flag corresponds to the signed RR. */
+ uint16_t type = kr_rrset_type_maysig(rr);
+
+ if (qry->flags.CACHED) {
+ return rr->additional ? *(uint8_t *)rr->additional : KR_RANK_OMIT;
+ /* ^^ Current use case for "cached" RRs without rank: hints module. */
+ }
+ if (answer || type == KNOT_RRTYPE_DS
+ || type == KNOT_RRTYPE_SOA /* needed for aggressive negative caching */
+ || type == KNOT_RRTYPE_NSEC || type == KNOT_RRTYPE_NSEC3) {
+ /* We almost always want these validated, and it should be possible. */
+ return KR_RANK_INITIAL | KR_RANK_AUTH;
+ }
+ /* Be aggressive: try to validate anything else (almost never extra latency). */
+ return KR_RANK_TRY;
+ /* TODO: this classifier of authoritativity may not be perfect yet. */
+}
+
+static int pick_authority(knot_pkt_t *pkt, struct kr_request *req, bool to_wire)
+{
+ struct kr_query *qry = req->current_query;
+ const knot_pktsection_t *ns = knot_pkt_section(pkt, KNOT_AUTHORITY);
+
+ const knot_dname_t *zonecut_name = qry->zone_cut.name;
+ bool referral = !knot_wire_get_aa(pkt->wire);
+ if (referral) {
+ /* zone cut already updated by process_authority()
+ * use parent zonecut name */
+ zonecut_name = qry->zone_cut.parent ? qry->zone_cut.parent->name : qry->zone_cut.name;
+ to_wire = false;
+ }
+
+ for (unsigned i = 0; i < ns->count; ++i) {
+ const knot_rrset_t *rr = knot_pkt_rr(ns, i);
+ if (rr->rclass != KNOT_CLASS_IN
+ || knot_dname_in_bailiwick(rr->owner, zonecut_name) < 0) {
+ continue;
+ }
+ uint8_t rank = get_initial_rank(rr, qry, false,
+ qry->flags.FORWARD || referral);
+ int ret = kr_ranked_rrarray_add(&req->auth_selected, rr,
+ rank, to_wire, qry->uid, &req->pool);
+ if (ret != kr_ok()) {
+ return ret;
+ }
+ }
+
+ return kr_ok();
+}
+
+static int process_authority(knot_pkt_t *pkt, struct kr_request *req)
+{
+ struct kr_query *qry = req->current_query;
+ assert(!(qry->flags.STUB));
+
+ int result = KR_STATE_CONSUME;
+ if (qry->flags.FORWARD) {
+ return result;
+ }
+
+ const knot_pktsection_t *ns = knot_pkt_section(pkt, KNOT_AUTHORITY);
+ const knot_pktsection_t *an = knot_pkt_section(pkt, KNOT_ANSWER);
+
+#ifdef STRICT_MODE
+ /* AA, terminate resolution chain. */
+ if (knot_wire_get_aa(pkt->wire)) {
+ return KR_STATE_CONSUME;
+ }
+#else
+ /* Work around servers sending back CNAME with different delegation and no AA. */
+ if (an->count > 0 && ns->count > 0) {
+ const knot_rrset_t *rr = knot_pkt_rr(an, 0);
+ if (rr->type == KNOT_RRTYPE_CNAME) {
+ return KR_STATE_CONSUME;
+ }
+ /* Work around for these NSs which are authoritative both for
+ * parent and child and mixes data from both zones in single answer */
+ if (knot_wire_get_aa(pkt->wire) &&
+ (rr->type == qry->stype) &&
+ (knot_dname_is_equal(rr->owner, qry->sname))) {
+ return KR_STATE_CONSUME;
+ }
+ }
+#endif
+ /* Remember current bailiwick for NS processing. */
+ const knot_dname_t *current_zone_cut = qry->zone_cut.name;
+ bool ns_record_exists = false;
+ int glue_cnt = 0;
+ /* Update zone cut information. */
+ for (unsigned i = 0; i < ns->count; ++i) {
+ const knot_rrset_t *rr = knot_pkt_rr(ns, i);
+ if (rr->type == KNOT_RRTYPE_NS) {
+ ns_record_exists = true;
+ int state = update_cut(pkt, rr, req, current_zone_cut, &glue_cnt);
+ switch(state) {
+ case KR_STATE_DONE: result = state; break;
+ case KR_STATE_FAIL: return state; break;
+ default: /* continue */ break;
+ }
+ } else if (rr->type == KNOT_RRTYPE_SOA
+ && knot_dname_in_bailiwick(rr->owner, qry->zone_cut.name) > 0) {
+ /* SOA below cut in authority indicates different authority,
+ * but same NS set. */
+ qry->zone_cut.name = knot_dname_copy(rr->owner, &req->pool);
+ }
+ }
+
+ /* Nameserver is authoritative for both parent side and the child side of the
+ * delegation may respond with an NS record in the answer section, and still update
+ * the zone cut (e.g. what a.gtld-servers.net would respond for `com NS`) */
+ if (!ns_record_exists && knot_wire_get_aa(pkt->wire)) {
+ for (unsigned i = 0; i < an->count; ++i) {
+ const knot_rrset_t *rr = knot_pkt_rr(an, i);
+ if (rr->type == KNOT_RRTYPE_NS
+ && knot_dname_in_bailiwick(rr->owner, qry->zone_cut.name) > 0) {
+ /* NS below cut in authority indicates different authority,
+ * but same NS set. */
+ qry->zone_cut.name = knot_dname_copy(rr->owner, &req->pool);
+ }
+ }
+ }
+
+ if (glue_cnt) {
+ VERBOSE_MSG("<= loaded %d glue addresses\n", glue_cnt);
+ }
+
+
+ if ((qry->flags.DNSSEC_WANT) && (result == KR_STATE_CONSUME)) {
+ if (knot_wire_get_aa(pkt->wire) == 0 &&
+ knot_wire_get_ancount(pkt->wire) == 0 &&
+ ns_record_exists) {
+ /* Unhelpful referral
+ Prevent from validating as an authoritative answer */
+ result = KR_STATE_DONE;
+ }
+ }
+
+ /* CONSUME => Unhelpful referral.
+ * DONE => Zone cut updated. */
+ return result;
+}
+
+static void finalize_answer(knot_pkt_t *pkt, struct kr_query *qry, struct kr_request *req)
+{
+ /* Finalize header */
+ knot_pkt_t *answer = req->answer;
+ knot_wire_set_rcode(answer->wire, knot_wire_get_rcode(pkt->wire));
+}
+
+static int unroll_cname(knot_pkt_t *pkt, struct kr_request *req, bool referral, const knot_dname_t **cname_ret)
+{
+ struct kr_query *query = req->current_query;
+ assert(!(query->flags.STUB));
+ /* Process answer type */
+ const knot_pktsection_t *an = knot_pkt_section(pkt, KNOT_ANSWER);
+ const knot_dname_t *cname = NULL;
+ const knot_dname_t *pending_cname = query->sname;
+ unsigned cname_chain_len = 0;
+ bool is_final = (query->parent == NULL);
+ uint32_t iter_count = 0;
+ bool strict_mode = (query->flags.STRICT);
+ do {
+ /* CNAME was found at previous iteration, but records may not follow the correct order.
+ * Try to find records for pending_cname owner from section start. */
+ cname = pending_cname;
+ pending_cname = NULL;
+ const int cname_labels = knot_dname_labels(cname, NULL);
+ for (unsigned i = 0; i < an->count; ++i) {
+ const knot_rrset_t *rr = knot_pkt_rr(an, i);
+
+ /* Skip the RR if its owner+type doesn't interest us. */
+ const uint16_t type = kr_rrset_type_maysig(rr);
+ const bool type_OK = rr->type == query->stype || type == query->stype
+ || type == KNOT_RRTYPE_CNAME || type == KNOT_RRTYPE_DNAME;
+ /* TODO: actually handle DNAMEs */
+ if (rr->rclass != KNOT_CLASS_IN || !type_OK
+ || !knot_dname_is_equal(rr->owner, cname)
+ || knot_dname_in_bailiwick(rr->owner, query->zone_cut.name) < 0) {
+ continue;
+ }
+
+ if (rr->type == KNOT_RRTYPE_RRSIG) {
+ int rrsig_labels = knot_rrsig_labels(rr->rrs.rdata);
+ if (rrsig_labels > cname_labels) {
+ /* clearly wrong RRSIG, don't pick it.
+ * don't fail immediately,
+ * let validator work. */
+ continue;
+ }
+ if (rrsig_labels < cname_labels) {
+ query->flags.DNSSEC_WEXPAND = true;
+ }
+ }
+
+ /* Process records matching current SNAME */
+ int state = KR_STATE_FAIL;
+ bool to_wire = false;
+ if (is_final) {
+ /* if not referral, mark record to be written to final answer */
+ to_wire = !referral;
+ } else {
+ int cnt_ = 0;
+ state = update_nsaddr(rr, query->parent, &cnt_);
+ if (state == KR_STATE_FAIL) {
+ return state;
+ }
+ }
+ uint8_t rank = get_initial_rank(rr, query, true,
+ query->flags.FORWARD || referral);
+ state = kr_ranked_rrarray_add(&req->answ_selected, rr,
+ rank, to_wire, query->uid, &req->pool);
+ if (state != kr_ok()) {
+ return KR_STATE_FAIL;
+ }
+ /* Jump to next CNAME target */
+ if ((query->stype == KNOT_RRTYPE_CNAME) || (rr->type != KNOT_RRTYPE_CNAME)) {
+ continue;
+ }
+ cname_chain_len += 1;
+ pending_cname = knot_cname_name(rr->rrs.rdata);
+ if (!pending_cname) {
+ break;
+ }
+ if (cname_chain_len > an->count || cname_chain_len > KR_CNAME_CHAIN_LIMIT) {
+ VERBOSE_MSG("<= too long cname chain\n");
+ return KR_STATE_FAIL;
+ }
+ /* Don't use pending_cname immediately.
+ * There are can be records for "old" cname. */
+ }
+ if (!pending_cname) {
+ break;
+ }
+ if (knot_dname_is_equal(cname, pending_cname)) {
+ VERBOSE_MSG("<= cname chain loop\n");
+ return KR_STATE_FAIL;
+ }
+ /* In strict mode, explicitly fetch each CNAME target. */
+ if (strict_mode) {
+ cname = pending_cname;
+ break;
+ }
+ /* Information outside bailiwick is not trusted. */
+ if (knot_dname_in_bailiwick(pending_cname, query->zone_cut.name) < 0) {
+ cname = pending_cname;
+ break;
+ }
+ /* The validator still can't handle multiple zones in one answer,
+ * so we only follow if a single label is replaced.
+ * TODO: this still isn't 100%, as the target might have a NS+DS,
+ * possibly leading to a SERVFAIL for the in-bailiwick name. */
+ const int pending_labels = knot_dname_labels(pending_cname, NULL);
+ if (pending_labels != cname_labels) {
+ cname = pending_cname;
+ break;
+ }
+ if (knot_dname_matched_labels(pending_cname, cname) !=
+ (cname_labels - 1)) {
+ cname = pending_cname;
+ break;
+ }
+ } while (++iter_count < KR_CNAME_CHAIN_LIMIT);
+ if (iter_count >= KR_CNAME_CHAIN_LIMIT) {
+ VERBOSE_MSG("<= too long cname chain\n");
+ return KR_STATE_FAIL;
+ }
+ *cname_ret = cname;
+ return kr_ok();
+}
+
+static int process_referral_answer(knot_pkt_t *pkt, struct kr_request *req)
+{
+ const knot_dname_t *cname = NULL;
+ int state = unroll_cname(pkt, req, true, &cname);
+ if (state != kr_ok()) {
+ return KR_STATE_FAIL;
+ }
+ struct kr_query *query = req->current_query;
+ if (!(query->flags.CACHED)) {
+ /* If not cached (i.e. got from upstream)
+ * make sure that this is not an authoritative answer
+ * (even with AA=1) for other layers.
+ * There can be answers with AA=1,
+ * empty answer section and NS in authority.
+ * Clearing of AA prevents them from
+ * caching in the packet cache.
+ * If packet already cached, don't touch him. */
+ knot_wire_clear_aa(pkt->wire);
+ }
+ state = pick_authority(pkt, req, false);
+ return state == kr_ok() ? KR_STATE_DONE : KR_STATE_FAIL;
+}
+
+static int process_final(knot_pkt_t *pkt, struct kr_request *req,
+ const knot_dname_t *cname)
+{
+ const int pkt_class = kr_response_classify(pkt);
+ struct kr_query *query = req->current_query;
+ ranked_rr_array_t *array = &req->answ_selected;
+ for (size_t i = 0; i < array->len; ++i) {
+ const knot_rrset_t *rr = array->at[i]->rr;
+ if (!knot_dname_is_equal(rr->owner, cname)) {
+ continue;
+ }
+ if ((rr->rclass != query->sclass) ||
+ (rr->type != query->stype)) {
+ continue;
+ }
+ const bool to_wire = ((pkt_class & (PKT_NXDOMAIN|PKT_NODATA)) != 0);
+ const int state = pick_authority(pkt, req, to_wire);
+ if (state != kr_ok()) {
+ return KR_STATE_FAIL;
+ }
+ if (!array->at[i]->to_wire) {
+ const size_t last_idx = array->len - 1;
+ size_t j = i;
+ ranked_rr_array_entry_t *entry = array->at[i];
+ /* Relocate record to the end, after current cname */
+ while (j < last_idx) {
+ array->at[j] = array->at[j + 1];
+ ++j;
+ }
+ array->at[last_idx] = entry;
+ entry->to_wire = true;
+ }
+ finalize_answer(pkt, query, req);
+ return KR_STATE_DONE;
+ }
+ return kr_ok();
+}
+
+static int process_answer(knot_pkt_t *pkt, struct kr_request *req)
+{
+ struct kr_query *query = req->current_query;
+
+ /* Response for minimized QNAME. Note that current iterator's minimization
+ * is only able ask one label below a zone cut.
+ * NODATA => may be empty non-terminal, retry (found zone cut)
+ * NOERROR => found zone cut, retry, except the case described below
+ * NXDOMAIN => parent is zone cut, retry as a workaround for bad authoritatives
+ */
+ const bool is_final = (query->parent == NULL);
+ const int pkt_class = kr_response_classify(pkt);
+ const knot_dname_t * pkt_qname = knot_pkt_qname(pkt);
+ if (!knot_dname_is_equal(pkt_qname, query->sname) &&
+ (pkt_class & (PKT_NOERROR|PKT_NXDOMAIN|PKT_REFUSED|PKT_NODATA))) {
+ /* Check for parent server that is authoritative for child zone,
+ * several CCTLDs where the SLD and TLD have the same name servers */
+ const knot_pktsection_t *ans = knot_pkt_section(pkt, KNOT_ANSWER);
+ if ((pkt_class & (PKT_NOERROR)) && ans->count > 0 &&
+ knot_dname_is_equal(pkt_qname, query->zone_cut.name)) {
+ VERBOSE_MSG("<= continuing with qname minimization\n");
+ } else {
+ /* fall back to disabling minimization */
+ VERBOSE_MSG("<= retrying with non-minimized name\n");
+ query->flags.NO_MINIMIZE = true;
+ }
+ return KR_STATE_CONSUME;
+ }
+
+ /* This answer didn't improve resolution chain, therefore must be authoritative (relaxed to negative). */
+ if (!is_authoritative(pkt, query)) {
+ if (!(query->flags.FORWARD) &&
+ pkt_class & (PKT_NXDOMAIN|PKT_NODATA)) {
+ VERBOSE_MSG("<= lame response: non-auth sent negative response\n");
+ return KR_STATE_FAIL;
+ }
+ }
+
+ const knot_dname_t *cname = NULL;
+ /* Process answer type */
+ int state = unroll_cname(pkt, req, false, &cname);
+ if (state != kr_ok()) {
+ return state;
+ }
+ /* Make sure that this is an authoritative answer (even with AA=0) for other layers */
+ knot_wire_set_aa(pkt->wire);
+ /* Either way it resolves current query. */
+ query->flags.RESOLVED = true;
+ /* Follow canonical name as next SNAME. */
+ if (!knot_dname_is_equal(cname, query->sname)) {
+ /* Check if target record has been already copied */
+ query->flags.CNAME = true;
+ if (is_final) {
+ state = process_final(pkt, req, cname);
+ if (state != kr_ok()) {
+ return state;
+ }
+ } else if ((query->flags.FORWARD) &&
+ ((query->stype == KNOT_RRTYPE_DS) ||
+ (query->stype == KNOT_RRTYPE_NS))) {
+ /* CNAME'ed answer for DS or NS subquery.
+ * Treat it as proof of zonecut nonexistance. */
+ return KR_STATE_DONE;
+ }
+ VERBOSE_MSG("<= cname chain, following\n");
+ /* Check if the same query was followed in the same CNAME chain. */
+ for (const struct kr_query *q = query->cname_parent; q != NULL;
+ q = q->cname_parent) {
+ if (q->sclass == query->sclass &&
+ q->stype == query->stype &&
+ knot_dname_is_equal(q->sname, cname)) {
+ VERBOSE_MSG("<= cname chain loop\n");
+ return KR_STATE_FAIL;
+ }
+ }
+ struct kr_query *next = kr_rplan_push(&req->rplan, query->parent, cname, query->sclass, query->stype);
+ if (!next) {
+ return KR_STATE_FAIL;
+ }
+ next->flags.AWAIT_CUT = true;
+
+ /* Copy transitive flags from original query to CNAME followup. */
+ next->flags.TRACE = query->flags.TRACE;
+ next->flags.ALWAYS_CUT = query->flags.ALWAYS_CUT;
+ next->flags.NO_MINIMIZE = query->flags.NO_MINIMIZE;
+ next->flags.NO_THROTTLE = query->flags.NO_THROTTLE;
+
+ if (query->flags.FORWARD) {
+ next->forward_flags.CNAME = true;
+ if (query->parent == NULL) {
+ state = kr_nsrep_copy_set(&next->ns, &query->ns);
+ if (state != kr_ok()) {
+ return KR_STATE_FAIL;
+ }
+ }
+ }
+ next->cname_parent = query;
+ /* Want DNSSEC if and only if it's posible to secure
+ * this name (i.e. iff it is covered by a TA) */
+ if (kr_ta_covers_qry(req->ctx, cname, query->stype)) {
+ next->flags.DNSSEC_WANT = true;
+ } else {
+ next->flags.DNSSEC_WANT = false;
+ }
+ if (!(query->flags.FORWARD) ||
+ (query->flags.DNSSEC_WEXPAND)) {
+ state = pick_authority(pkt, req, false);
+ if (state != kr_ok()) {
+ return KR_STATE_FAIL;
+ }
+ }
+ } else if (!query->parent) {
+ /* Answer for initial query */
+ const bool to_wire = ((pkt_class & (PKT_NXDOMAIN|PKT_NODATA)) != 0);
+ state = pick_authority(pkt, req, to_wire);
+ if (state != kr_ok()) {
+ return KR_STATE_FAIL;
+ }
+ finalize_answer(pkt, query, req);
+ } else {
+ /* Answer for sub-query; DS, IP for NS etc.
+ * It may contains NSEC \ NSEC3 records for
+ * data non-existence or wc expansion proving.
+ * If yes, they must be validated by validator.
+ * If no, authority section is unuseful.
+ * dnssec\nsec.c & dnssec\nsec3.c use
+ * rrsets from incoming packet.
+ * validator uses answer_selected & auth_selected.
+ * So, if nsec\nsec3 records are present in authority,
+ * pick_authority() must be called.
+ * TODO refactor nsec\nsec3 modules to work with
+ * answer_selected & auth_selected instead of incoming pkt. */
+ bool auth_is_unuseful = true;
+ const knot_pktsection_t *ns = knot_pkt_section(pkt, KNOT_AUTHORITY);
+ for (unsigned i = 0; i < ns->count; ++i) {
+ const knot_rrset_t *rr = knot_pkt_rr(ns, i);
+ if (rr->type == KNOT_RRTYPE_NSEC ||
+ rr->type == KNOT_RRTYPE_NSEC3) {
+ auth_is_unuseful = false;
+ break;
+ }
+ }
+ if (!auth_is_unuseful) {
+ state = pick_authority(pkt, req, false);
+ if (state != kr_ok()) {
+ return KR_STATE_FAIL;
+ }
+ }
+ }
+ return KR_STATE_DONE;
+}
+
+/** @internal like process_answer() but for the STUB mode. */
+static int process_stub(knot_pkt_t *pkt, struct kr_request *req)
+{
+ struct kr_query *query = req->current_query;
+ assert(query->flags.STUB);
+ /* Pick all answer RRs. */
+ const knot_pktsection_t *an = knot_pkt_section(pkt, KNOT_ANSWER);
+ for (unsigned i = 0; i < an->count; ++i) {
+ const knot_rrset_t *rr = knot_pkt_rr(an, i);
+ int err = kr_ranked_rrarray_add(&req->answ_selected, rr,
+ KR_RANK_OMIT | KR_RANK_AUTH, true, query->uid, &req->pool);
+ /* KR_RANK_AUTH: we don't have the records directly from
+ * an authoritative source, but we do trust the server and it's
+ * supposed to only send us authoritative records. */
+ if (err != kr_ok()) {
+ return KR_STATE_FAIL;
+ }
+ }
+
+ knot_wire_set_aa(pkt->wire);
+ query->flags.RESOLVED = true;
+ /* Pick authority RRs. */
+ int pkt_class = kr_response_classify(pkt);
+ const bool to_wire = ((pkt_class & (PKT_NXDOMAIN|PKT_NODATA)) != 0);
+ int err = pick_authority(pkt, req, to_wire);
+ if (err != kr_ok()) {
+ return KR_STATE_FAIL;
+ }
+
+ finalize_answer(pkt, query, req);
+ return KR_STATE_DONE;
+}
+
+
+/** Error handling, RFC1034 5.3.3, 4d. */
+static int resolve_error(knot_pkt_t *pkt, struct kr_request *req)
+{
+ return KR_STATE_FAIL;
+}
+
+/* State-less single resolution iteration step, not needed. */
+static int reset(kr_layer_t *ctx) { return KR_STATE_PRODUCE; }
+
+/* Set resolution context and parameters. */
+static int begin(kr_layer_t *ctx)
+{
+ if (ctx->state & (KR_STATE_DONE|KR_STATE_FAIL)) {
+ return ctx->state;
+ }
+ /*
+ * RFC7873 5.4 extends the QUERY operation code behaviour in order to
+ * be able to generate requests for server cookies. Such requests have
+ * QDCOUNT equal to zero and must contain a cookie option.
+ * Server cookie queries must be handled by the cookie module/layer
+ * before this layer.
+ */
+ const knot_pkt_t *pkt = ctx->req->qsource.packet;
+ if (!pkt || knot_wire_get_qdcount(pkt->wire) == 0) {
+ return KR_STATE_FAIL;
+ }
+
+ struct kr_query *qry = ctx->req->current_query;
+ /* Avoid any other classes, and avoid any meta-types ~~except for ANY~~. */
+ if (qry->sclass != KNOT_CLASS_IN
+ || (knot_rrtype_is_metatype(qry->stype)
+ /* && qry->stype != KNOT_RRTYPE_ANY hmm ANY seems broken ATM */)) {
+ knot_wire_set_rcode(ctx->req->answer->wire, KNOT_RCODE_NOTIMPL);
+ return KR_STATE_FAIL;
+ }
+
+ return reset(ctx);
+}
+
+int kr_make_query(struct kr_query *query, knot_pkt_t *pkt)
+{
+ /* Minimize QNAME (if possible). */
+ uint16_t qtype = query->stype;
+ const knot_dname_t *qname = minimized_qname(query, &qtype);
+
+ /* Form a query for the authoritative. */
+ knot_pkt_clear(pkt);
+ int ret = knot_pkt_put_question(pkt, qname, query->sclass, qtype);
+ if (ret != KNOT_EOK) {
+ return ret;
+ }
+
+ /* Query built, expect answer. */
+ query->id = kr_rand_bytes(2);
+ /* We must respect https://tools.ietf.org/html/rfc7766#section-6.2.1
+ * - When sending multiple queries over a TCP connection, clients MUST NOT
+ * reuse the DNS Message ID of an in-flight query on that connection.
+ *
+ * So, if query is going to be sent over TCP connection
+ * this id can be changed to avoid duplication with query that already was sent
+ * but didn't receive answer yet.
+ */
+ knot_wire_set_id(pkt->wire, query->id);
+ pkt->parsed = pkt->size;
+
+ return kr_ok();
+}
+
+static int prepare_query(kr_layer_t *ctx, knot_pkt_t *pkt)
+{
+ assert(pkt && ctx);
+ struct kr_request *req = ctx->req;
+ struct kr_query *query = req->current_query;
+ if (!query || ctx->state & (KR_STATE_DONE|KR_STATE_FAIL)) {
+ return ctx->state;
+ }
+
+ /* Make query */
+ int ret = kr_make_query(query, pkt);
+ if (ret != 0) {
+ return KR_STATE_FAIL;
+ }
+
+ WITH_VERBOSE(query) {
+ KR_DNAME_GET_STR(name_str, query->sname);
+ KR_RRTYPE_GET_STR(type_str, query->stype);
+ QVERBOSE_MSG(query, "'%s' type '%s' new uid was assigned .%02u, parent uid .%02u\n",
+ name_str, type_str, req->rplan.next_uid,
+ query->parent ? query->parent->uid : 0);
+ }
+
+ query->uid = req->rplan.next_uid;
+ req->rplan.next_uid += 1;
+
+ return KR_STATE_CONSUME;
+}
+
+static int resolve_badmsg(knot_pkt_t *pkt, struct kr_request *req, struct kr_query *query)
+{
+
+#ifndef STRICT_MODE
+ /* Work around broken auths/load balancers */
+ if (query->flags.SAFEMODE) {
+ return resolve_error(pkt, req);
+ } else if (query->flags.NO_MINIMIZE) {
+ query->flags.SAFEMODE = true;
+ return KR_STATE_DONE;
+ } else {
+ query->flags.NO_MINIMIZE = true;
+ return KR_STATE_DONE;
+ }
+#else
+ return resolve_error(pkt, req);
+#endif
+}
+
+static int resolve_notimpl(knot_pkt_t *pkt, struct kr_request *req, struct kr_query *qry)
+{
+ if (qry->stype == KNOT_RRTYPE_RRSIG && qry->parent != NULL) {
+ /* RRSIG subquery have got NOTIMPL.
+ * Possible scenario - same NS is autoritative for child and parent,
+ * but child isn't signed.
+ * We got delegation to parent,
+ * then NS responded as NS for child zone.
+ * Answer contained record been requested, but no RRSIGs,
+ * Validator issued RRSIG query then. If qname is zone name,
+ * we can get NOTIMPL. Ask for DS to find out security status.
+ * TODO - maybe it would be better to do this in validator, when
+ * RRSIG revalidation occurs.
+ */
+ struct kr_rplan *rplan = &req->rplan;
+ struct kr_query *next = kr_rplan_push(rplan, qry->parent, qry->sname,
+ qry->sclass, KNOT_RRTYPE_DS);
+ if (!next) {
+ return KR_STATE_FAIL;
+ }
+ kr_zonecut_set(&next->zone_cut, qry->parent->zone_cut.name);
+ kr_zonecut_copy(&next->zone_cut, &qry->parent->zone_cut);
+ kr_zonecut_copy_trust(&next->zone_cut, &qry->parent->zone_cut);
+ next->flags.DNSSEC_WANT = true;
+ qry->flags.RESOLVED = true;
+ return KR_STATE_DONE;
+ }
+ return resolve_badmsg(pkt, req, qry);
+}
+
+/** Resolve input query or continue resolution with followups.
+ *
+ * This roughly corresponds to RFC1034, 5.3.3 4a-d.
+ */
+static int resolve(kr_layer_t *ctx, knot_pkt_t *pkt)
+{
+ assert(pkt && ctx);
+ struct kr_request *req = ctx->req;
+ struct kr_query *query = req->current_query;
+ if (!query) {
+ return ctx->state;
+ }
+
+ WITH_VERBOSE(query) {
+ if (query->flags.TRACE) {
+ auto_free char *pkt_text = kr_pkt_text(pkt);
+ VERBOSE_MSG("<= answer received: \n%s\n", pkt_text);
+ }
+ }
+
+ if (query->flags.RESOLVED || query->flags.BADCOOKIE_AGAIN) {
+ return ctx->state;
+ }
+
+ /* Check for packet processing errors first.
+ * Note - we *MUST* check if it has at least a QUESTION,
+ * otherwise it would crash on accessing QNAME. */
+#ifdef STRICT_MODE
+ if (pkt->parsed < pkt->size) {
+ VERBOSE_MSG("<= pkt contains excessive data\n");
+ return resolve_badmsg(pkt, req, query);
+ } else
+#endif
+ if (pkt->parsed <= KNOT_WIRE_HEADER_SIZE) {
+ VERBOSE_MSG("<= malformed response\n");
+ return resolve_badmsg(pkt, req, query);
+ } else if (!is_paired_to_query(pkt, query)) {
+ VERBOSE_MSG("<= ignoring mismatching response\n");
+ /* Force TCP, to work around authoritatives messing up question
+ * without yielding to spoofed responses. */
+ query->flags.TCP = true;
+ return resolve_badmsg(pkt, req, query);
+ } else if (knot_wire_get_tc(pkt->wire)) {
+ VERBOSE_MSG("<= truncated response, failover to TCP\n");
+ if (query) {
+ /* Fail if already on TCP. */
+ if (query->flags.TCP) {
+ VERBOSE_MSG("<= TC=1 with TCP, bailing out\n");
+ return resolve_error(pkt, req);
+ }
+ query->flags.TCP = true;
+ }
+ return KR_STATE_CONSUME;
+ }
+
+#ifndef NOVERBOSELOG
+ const knot_lookup_t *rcode = knot_lookup_by_id(knot_rcode_names, knot_wire_get_rcode(pkt->wire));
+#endif
+
+ /* Check response code. */
+ switch(knot_wire_get_rcode(pkt->wire)) {
+ case KNOT_RCODE_NOERROR:
+ case KNOT_RCODE_NXDOMAIN:
+ break; /* OK */
+ case KNOT_RCODE_REFUSED:
+ case KNOT_RCODE_SERVFAIL: {
+ if (query->flags.STUB) {
+ /* Pass through in stub mode */
+ break;
+ }
+ VERBOSE_MSG("<= rcode: %s\n", rcode ? rcode->name : "??");
+ query->fails += 1;
+ if (query->fails >= KR_QUERY_NSRETRY_LIMIT) {
+ query->fails = 0; /* Reset per-query counter. */
+ return resolve_error(pkt, req);
+ } else {
+ if (!query->flags.FORWARD) {
+ query->flags.NO_MINIMIZE = true; /* Drop minimisation as a safe-guard. */
+ }
+ return KR_STATE_CONSUME;
+ }
+ }
+ case KNOT_RCODE_FORMERR:
+ VERBOSE_MSG("<= rcode: %s\n", rcode ? rcode->name : "??");
+ return resolve_badmsg(pkt, req, query);
+ case KNOT_RCODE_NOTIMPL:
+ VERBOSE_MSG("<= rcode: %s\n", rcode ? rcode->name : "??");
+ return resolve_notimpl(pkt, req, query);
+ default:
+ VERBOSE_MSG("<= rcode: %s\n", rcode ? rcode->name : "??");
+ return resolve_error(pkt, req);
+ }
+
+ /* Forwarding/stub mode is special. */
+ if (query->flags.STUB) {
+ return process_stub(pkt, req);
+ }
+
+ /* Resolve authority to see if it's referral or authoritative. */
+ int state = process_authority(pkt, req);
+ switch(state) {
+ case KR_STATE_CONSUME: /* Not referral, process answer. */
+ VERBOSE_MSG("<= rcode: %s\n", rcode ? rcode->name : "??");
+ state = process_answer(pkt, req);
+ break;
+ case KR_STATE_DONE: /* Referral */
+ state = process_referral_answer(pkt,req);
+ VERBOSE_MSG("<= referral response, follow\n");
+ break;
+ default:
+ break;
+ }
+
+ return state;
+}
+
+/** Module implementation. */
+const kr_layer_api_t *iterate_layer(struct kr_module *module)
+{
+ static const kr_layer_api_t _layer = {
+ .begin = &begin,
+ .reset = &reset,
+ .consume = &resolve,
+ .produce = &prepare_query
+ };
+ return &_layer;
+}
+
+KR_MODULE_EXPORT(iterate)
+
+#undef VERBOSE_MSG
diff --git a/lib/layer/iterate.h b/lib/layer/iterate.h
new file mode 100644
index 0000000..a9395bf
--- /dev/null
+++ b/lib/layer/iterate.h
@@ -0,0 +1,36 @@
+/* Copyright (C) 2014-2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "lib/layer.h"
+#include "lib/rplan.h"
+
+/* Packet classification. */
+enum {
+ PKT_NOERROR = 1 << 0, /* Positive response */
+ PKT_NODATA = 1 << 1, /* No data response */
+ PKT_NXDOMAIN = 1 << 2, /* Negative response */
+ PKT_REFUSED = 1 << 3, /* Refused response */
+ PKT_ERROR = 1 << 4 /* Bad message */
+};
+
+/** Classify response by type. */
+int kr_response_classify(const knot_pkt_t *pkt);
+
+/** Make next iterative query. */
+KR_EXPORT
+int kr_make_query(struct kr_query *query, knot_pkt_t *pkt);
diff --git a/lib/layer/validate.c b/lib/layer/validate.c
new file mode 100644
index 0000000..55c3ad7
--- /dev/null
+++ b/lib/layer/validate.c
@@ -0,0 +1,1133 @@
+/* Copyright (C) 2014-2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include <assert.h>
+#include <errno.h>
+#include <sys/time.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <contrib/cleanup.h>
+#include <libknot/packet/wire.h>
+#include <libknot/rrtype/rdname.h>
+#include <libknot/rrtype/rrsig.h>
+#include <libdnssec/error.h>
+
+#include "lib/dnssec/nsec.h"
+#include "lib/dnssec/nsec3.h"
+#include "lib/dnssec.h"
+#include "lib/layer.h"
+#include "lib/resolve.h"
+#include "lib/rplan.h"
+#include "lib/utils.h"
+#include "lib/defines.h"
+#include "lib/module.h"
+
+#define VERBOSE_MSG(qry, ...) QRVERBOSE(qry, "vldr", __VA_ARGS__)
+
+#define MAX_REVALIDATION_CNT 2
+
+/**
+ * Search in section for given type.
+ * @param sec Packet section.
+ * @param type Type to search for.
+ * @return True if found.
+ */
+static bool section_has_type(const knot_pktsection_t *sec, uint16_t type)
+{
+ if (!sec) {
+ return false;
+ }
+
+ for (unsigned i = 0; i < sec->count; ++i) {
+ const knot_rrset_t *rr = knot_pkt_rr(sec, i);
+ if (rr->type == type) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static bool pkt_has_type(const knot_pkt_t *pkt, uint16_t type)
+{
+ if (!pkt) {
+ return false;
+ }
+
+ if (section_has_type(knot_pkt_section(pkt, KNOT_ANSWER), type)) {
+ return true;
+ }
+ if (section_has_type(knot_pkt_section(pkt, KNOT_AUTHORITY), type)) {
+ return true;
+ }
+ return section_has_type(knot_pkt_section(pkt, KNOT_ADDITIONAL), type);
+}
+
+static void log_bogus_rrsig(kr_rrset_validation_ctx_t *vctx, const struct kr_query *qry,
+ const knot_rrset_t *rr, const char *msg) {
+ WITH_VERBOSE(qry) {
+ auto_free char *name_text = kr_dname_text(rr->owner);
+ auto_free char *type_text = kr_rrtype_text(rr->type);
+ VERBOSE_MSG(qry, ">< %s: %s %s "
+ "(%u matching RRSIGs, %u expired, %u not yet valid, "
+ "%u invalid signer, %u invalid label count, %u invalid key, "
+ "%u invalid crypto, %u invalid NSEC)\n",
+ msg, name_text, type_text, vctx->rrs_counters.matching_name_type,
+ vctx->rrs_counters.expired, vctx->rrs_counters.notyet,
+ vctx->rrs_counters.signer_invalid, vctx->rrs_counters.labels_invalid,
+ vctx->rrs_counters.key_invalid, vctx->rrs_counters.crypto_invalid,
+ vctx->rrs_counters.nsec_invalid);
+ }
+}
+
+static int validate_section(kr_rrset_validation_ctx_t *vctx, const struct kr_query *qry,
+ knot_mm_t *pool)
+{
+ if (!vctx) {
+ return kr_error(EINVAL);
+ }
+
+ /* Can't use qry->zone_cut.name directly, as this name can
+ * change when updating cut information before validation.
+ */
+ vctx->zone_name = vctx->keys ? vctx->keys->owner : NULL;
+
+ int validation_result = 0;
+ for (ssize_t i = 0; i < vctx->rrs->len; ++i) {
+ ranked_rr_array_entry_t *entry = vctx->rrs->at[i];
+ const knot_rrset_t *rr = entry->rr;
+
+ if (entry->yielded || vctx->qry_uid != entry->qry_uid) {
+ continue;
+ }
+
+ if (kr_rank_test(entry->rank, KR_RANK_OMIT)
+ || kr_rank_test(entry->rank, KR_RANK_SECURE)) {
+ continue; /* these are already OK */
+ }
+
+ if (rr->type == KNOT_RRTYPE_RRSIG) {
+ const knot_dname_t *signer_name = knot_rrsig_signer_name(rr->rrs.rdata);
+ if (!knot_dname_is_equal(vctx->zone_name, signer_name)) {
+ kr_rank_set(&entry->rank, KR_RANK_MISMATCH);
+ vctx->err_cnt += 1;
+ break;
+ }
+ kr_rank_set(&entry->rank, KR_RANK_OMIT);
+ continue;
+ }
+
+ uint8_t rank_orig = entry->rank;
+ validation_result = kr_rrset_validate(vctx, rr);
+ if (validation_result == kr_ok()) {
+ kr_rank_set(&entry->rank, KR_RANK_SECURE);
+
+ } else if (kr_rank_test(rank_orig, KR_RANK_TRY)) {
+ log_bogus_rrsig(vctx, qry, rr,
+ "failed to validate non-authoritative data but continuing");
+ vctx->result = kr_ok();
+ kr_rank_set(&entry->rank, KR_RANK_TRY);
+ /* ^^ BOGUS would be more accurate, but it might change
+ * to MISMATCH on revalidation, e.g. in test val_referral_nods :-/
+ */
+
+ } else if (validation_result == kr_error(ENOENT)) {
+ /* no RRSIGs found */
+ kr_rank_set(&entry->rank, KR_RANK_MISSING);
+ vctx->err_cnt += 1;
+ log_bogus_rrsig(vctx, qry, rr, "no valid RRSIGs found");
+ } else {
+ kr_rank_set(&entry->rank, KR_RANK_BOGUS);
+ vctx->err_cnt += 1;
+ log_bogus_rrsig(vctx, qry, rr, "bogus signatures");
+ }
+ }
+ return kr_ok();
+}
+
+static int validate_records(struct kr_request *req, knot_pkt_t *answer, knot_mm_t *pool, bool has_nsec3)
+{
+ struct kr_query *qry = req->current_query;
+ if (!qry->zone_cut.key) {
+ VERBOSE_MSG(qry, "<= no DNSKEY, can't validate\n");
+ return kr_error(EBADMSG);
+ }
+
+ kr_rrset_validation_ctx_t vctx = {
+ .pkt = answer,
+ .rrs = &req->answ_selected,
+ .section_id = KNOT_ANSWER,
+ .keys = qry->zone_cut.key,
+ .zone_name = qry->zone_cut.name,
+ .timestamp = qry->timestamp.tv_sec,
+ .qry_uid = qry->uid,
+ .has_nsec3 = has_nsec3,
+ .flags = 0,
+ .err_cnt = 0,
+ .result = 0
+ };
+
+ int ret = validate_section(&vctx, qry, pool);
+ req->answ_validated = (vctx.err_cnt == 0);
+ if (ret != kr_ok()) {
+ return ret;
+ }
+
+ uint32_t an_flags = vctx.flags;
+ vctx.rrs = &req->auth_selected;
+ vctx.section_id = KNOT_AUTHORITY;
+ vctx.flags = 0;
+ vctx.err_cnt = 0;
+ vctx.result = 0;
+
+ ret = validate_section(&vctx, qry, pool);
+ req->auth_validated = (vctx.err_cnt == 0);
+ if (ret != kr_ok()) {
+ return ret;
+ }
+
+ /* Records were validated.
+ * If there is wildcard expansion in answer,
+ * or optout - flag the query.
+ */
+ if (an_flags & KR_DNSSEC_VFLG_WEXPAND) {
+ qry->flags.DNSSEC_WEXPAND = true;
+ }
+ if (an_flags & KR_DNSSEC_VFLG_OPTOUT) {
+ qry->flags.DNSSEC_OPTOUT = true;
+ }
+
+ return ret;
+}
+
+static int validate_keyset(struct kr_request *req, knot_pkt_t *answer, bool has_nsec3)
+{
+ /* Merge DNSKEY records from answer that are below/at current cut. */
+ struct kr_query *qry = req->current_query;
+ bool updated_key = false;
+ const knot_pktsection_t *an = knot_pkt_section(answer, KNOT_ANSWER);
+ for (unsigned i = 0; i < an->count; ++i) {
+ const knot_rrset_t *rr = knot_pkt_rr(an, i);
+ if (rr->type != KNOT_RRTYPE_DNSKEY
+ || knot_dname_in_bailiwick(rr->owner, qry->zone_cut.name) < 0) {
+ continue;
+ }
+ /* Merge with zone cut (or replace ancestor key). */
+ if (!qry->zone_cut.key || !knot_dname_is_equal(qry->zone_cut.key->owner, rr->owner)) {
+ qry->zone_cut.key = knot_rrset_copy(rr, qry->zone_cut.pool);
+ if (!qry->zone_cut.key) {
+ return kr_error(ENOMEM);
+ }
+ updated_key = true;
+ } else {
+ int ret = knot_rdataset_merge(&qry->zone_cut.key->rrs,
+ &rr->rrs, qry->zone_cut.pool);
+ if (ret != 0) {
+ knot_rrset_free(qry->zone_cut.key, qry->zone_cut.pool);
+ qry->zone_cut.key = NULL;
+ return ret;
+ }
+ updated_key = true;
+ }
+ }
+
+ /* Check if there's a key for current TA. */
+ if (updated_key && !(qry->flags.CACHED)) {
+
+ kr_rrset_validation_ctx_t vctx = {
+ .pkt = answer,
+ .rrs = &req->answ_selected,
+ .section_id = KNOT_ANSWER,
+ .keys = qry->zone_cut.key,
+ .zone_name = qry->zone_cut.name,
+ .timestamp = qry->timestamp.tv_sec,
+ .qry_uid = qry->uid,
+ .has_nsec3 = has_nsec3,
+ .flags = 0,
+ .result = 0
+ };
+ int ret = kr_dnskeys_trusted(&vctx, qry->zone_cut.trust_anchor);
+ if (ret != 0) {
+ if (ret != kr_error(DNSSEC_INVALID_DS_ALGORITHM) &&
+ ret != kr_error(EAGAIN)) {
+ log_bogus_rrsig(&vctx, qry, qry->zone_cut.key, "bogus key");
+ }
+ knot_rrset_free(qry->zone_cut.key, qry->zone_cut.pool);
+ qry->zone_cut.key = NULL;
+ return ret;
+ }
+
+ if (vctx.flags & KR_DNSSEC_VFLG_WEXPAND) {
+ qry->flags.DNSSEC_WEXPAND = true;
+ }
+ if (vctx.flags & KR_DNSSEC_VFLG_OPTOUT) {
+ qry->flags.DNSSEC_OPTOUT = true;
+ }
+
+ }
+ return kr_ok();
+}
+
+static knot_rrset_t *update_ds(struct kr_zonecut *cut, const knot_pktsection_t *sec)
+{
+ /* Aggregate DS records (if using multiple keys) */
+ knot_rrset_t *new_ds = NULL;
+ for (unsigned i = 0; i < sec->count; ++i) {
+ const knot_rrset_t *rr = knot_pkt_rr(sec, i);
+ if (rr->type != KNOT_RRTYPE_DS) {
+ continue;
+ }
+ int ret = 0;
+ if (new_ds) {
+ ret = knot_rdataset_merge(&new_ds->rrs, &rr->rrs, cut->pool);
+ } else {
+ new_ds = knot_rrset_copy(rr, cut->pool);
+ if (!new_ds) {
+ return NULL;
+ }
+ }
+ if (ret != 0) {
+ knot_rrset_free(new_ds, cut->pool);
+ return NULL;
+ }
+ }
+ return new_ds;
+}
+
+static void mark_insecure_parents(const struct kr_query *qry)
+{
+ /* If there is a chain of DS queries mark all of them,
+ * then mark first non-DS parent.
+ * Stop if parent is waiting for ns address.
+ * NS can be located at unsigned zone, but still will return
+ * valid DNSSEC records for initial query. */
+ struct kr_query *parent = qry->parent;
+ while (parent && !parent->flags.AWAIT_IPV4 && !parent->flags.AWAIT_IPV6) {
+ parent->flags.DNSSEC_WANT = false;
+ parent->flags.DNSSEC_INSECURE = true;
+ if (parent->stype != KNOT_RRTYPE_DS &&
+ parent->stype != KNOT_RRTYPE_RRSIG) {
+ break;
+ }
+ parent = parent->parent;
+ }
+}
+
+static int update_parent_keys(struct kr_request *req, uint16_t answer_type)
+{
+ struct kr_query *qry = req->current_query;
+ struct kr_query *parent = qry->parent;
+ assert(parent);
+ switch(answer_type) {
+ case KNOT_RRTYPE_DNSKEY:
+ VERBOSE_MSG(qry, "<= parent: updating DNSKEY\n");
+ parent->zone_cut.key = knot_rrset_copy(qry->zone_cut.key, parent->zone_cut.pool);
+ if (!parent->zone_cut.key) {
+ return KR_STATE_FAIL;
+ }
+ break;
+ case KNOT_RRTYPE_DS:
+ VERBOSE_MSG(qry, "<= parent: updating DS\n");
+ if (qry->flags.DNSSEC_INSECURE) { /* DS non-existence proven. */
+ mark_insecure_parents(qry);
+ } else if (qry->flags.DNSSEC_NODS && !qry->flags.FORWARD) {
+ if (qry->flags.DNSSEC_OPTOUT) {
+ mark_insecure_parents(qry);
+ } else {
+ int ret = kr_dnssec_matches_name_and_type(&req->auth_selected, qry->uid,
+ qry->sname, KNOT_RRTYPE_NS);
+ if (ret == kr_ok()) {
+ mark_insecure_parents(qry);
+ }
+ }
+ } else if (qry->flags.DNSSEC_NODS && qry->flags.FORWARD) {
+ int ret = kr_dnssec_matches_name_and_type(&req->auth_selected, qry->uid,
+ qry->sname, KNOT_RRTYPE_NS);
+ if (ret == kr_ok()) {
+ mark_insecure_parents(qry);
+ }
+ } else { /* DS existence proven. */
+ parent->zone_cut.trust_anchor = knot_rrset_copy(qry->zone_cut.trust_anchor, parent->zone_cut.pool);
+ if (!parent->zone_cut.trust_anchor) {
+ return KR_STATE_FAIL;
+ }
+ }
+ break;
+ default: break;
+ }
+ return kr_ok();
+}
+
+static int update_delegation(struct kr_request *req, struct kr_query *qry, knot_pkt_t *answer, bool has_nsec3)
+{
+ struct kr_zonecut *cut = &qry->zone_cut;
+
+ /* RFC4035 3.1.4. authoritative must send either DS or proof of non-existence.
+ * If it contains neither, resolver must query the parent for the DS (RFC4035 5.2.).
+ * If DS exists, the referral is OK,
+ * otherwise referral is bogus (or an attempted downgrade attack).
+ */
+
+
+ unsigned section = KNOT_ANSWER;
+ const bool referral = !knot_wire_get_aa(answer->wire);
+ if (referral) {
+ section = KNOT_AUTHORITY;
+ } else if (knot_pkt_qtype(answer) == KNOT_RRTYPE_DS &&
+ !(qry->flags.CNAME) &&
+ (knot_wire_get_rcode(answer->wire) != KNOT_RCODE_NXDOMAIN)) {
+ section = KNOT_ANSWER;
+ } else { /* N/A */
+ return kr_ok();
+ }
+
+ int ret = 0;
+ const knot_dname_t *proved_name = knot_pkt_qname(answer);
+ /* Aggregate DS records (if using multiple keys) */
+ knot_rrset_t *new_ds = update_ds(cut, knot_pkt_section(answer, section));
+ if (!new_ds) {
+ /* No DS provided, check for proof of non-existence. */
+ if (!has_nsec3) {
+ if (referral) {
+ /* Check if it is referral to unsigned, rfc4035 5.2 */
+ ret = kr_nsec_ref_to_unsigned(answer);
+ } else {
+ /* No-data answer */
+ ret = kr_nsec_existence_denial(answer, KNOT_AUTHORITY, proved_name, KNOT_RRTYPE_DS);
+ }
+ } else {
+ if (referral) {
+ /* Check if it is referral to unsigned, rfc5155 8.9 */
+ ret = kr_nsec3_ref_to_unsigned(answer);
+ } else {
+ /* No-data answer, QTYPE is DS, rfc5155 8.6 */
+ ret = kr_nsec3_no_data(answer, KNOT_AUTHORITY, proved_name, KNOT_RRTYPE_DS);
+ }
+ if (ret == kr_error(KNOT_ERANGE)) {
+ /* Not bogus, going insecure due to optout */
+ ret = 0;
+ }
+ }
+
+ if (referral && qry->stype != KNOT_RRTYPE_DS &&
+ ret == DNSSEC_NOT_FOUND) {
+ /* referral,
+ * qtype is not KNOT_RRTYPE_DS, NSEC\NSEC3 were not found.
+ * Check if DS already was fetched. */
+ knot_rrset_t *ta = cut->trust_anchor;
+ if (knot_dname_is_equal(cut->name, ta->owner)) {
+ /* DS is OK */
+ ret = 0;
+ }
+ } else if (ret != 0) {
+ VERBOSE_MSG(qry, "<= bogus proof of DS non-existence\n");
+ qry->flags.DNSSEC_BOGUS = true;
+ } else if (proved_name[0] != '\0') { /* don't go to insecure for . DS */
+ VERBOSE_MSG(qry, "<= DS doesn't exist, going insecure\n");
+ qry->flags.DNSSEC_NODS = true;
+ /* Rank the corresponding nonauth NS as insecure. */
+ for (int i = 0; i < req->auth_selected.len; ++i) {
+ ranked_rr_array_entry_t *ns = req->auth_selected.at[i];
+ if (ns->qry_uid != qry->uid
+ || !ns->rr
+ || ns->rr->type != KNOT_RRTYPE_NS) {
+ continue;
+ }
+ if (!referral && !knot_dname_is_equal(qry->sname, ns->rr->owner)) {
+ continue;
+ }
+ /* Found the record. Note: this is slightly fragile
+ * in case there were more NS records in the packet.
+ * As it is now for referrals, kr_nsec*_ref_to_unsigned consider
+ * (only) the first NS record in the packet. */
+ if (!kr_rank_test(ns->rank, KR_RANK_AUTH)) { /* sanity */
+ ns->rank = KR_RANK_INSECURE;
+ }
+ break;
+ }
+ }
+ return ret;
+ } else if (qry->flags.FORWARD && qry->parent) {
+ struct kr_query *parent = qry->parent;
+ parent->zone_cut.name = knot_dname_copy(qry->sname, parent->zone_cut.pool);
+ }
+
+ /* Extend trust anchor */
+ VERBOSE_MSG(qry, "<= DS: OK\n");
+ cut->trust_anchor = new_ds;
+ return ret;
+}
+
+static const knot_dname_t *find_first_signer(ranked_rr_array_t *arr)
+{
+ for (size_t i = 0; i < arr->len; ++i) {
+ ranked_rr_array_entry_t *entry = arr->at[i];
+ const knot_rrset_t *rr = entry->rr;
+ if (entry->yielded ||
+ (!kr_rank_test(entry->rank, KR_RANK_INITIAL) &&
+ !kr_rank_test(entry->rank, KR_RANK_TRY) &&
+ !kr_rank_test(entry->rank, KR_RANK_MISMATCH))) {
+ continue;
+ }
+ if (rr->type == KNOT_RRTYPE_RRSIG) {
+ return knot_rrsig_signer_name(rr->rrs.rdata);
+ }
+ }
+ return NULL;
+}
+
+static const knot_dname_t *signature_authority(struct kr_request *req)
+{
+ const knot_dname_t *signer_name = find_first_signer(&req->answ_selected);
+ if (!signer_name) {
+ signer_name = find_first_signer(&req->auth_selected);
+ }
+ return signer_name;
+}
+
+static int rrsig_not_found(kr_layer_t *ctx, const knot_rrset_t *rr)
+{
+ struct kr_request *req = ctx->req;
+ struct kr_query *qry = req->current_query;
+
+ /* Parent-side record, so don't ask for RRSIG.
+ * We won't receive it anyway. */
+ if (qry->stype == KNOT_RRTYPE_DS) {
+ return KR_STATE_FAIL;
+ }
+
+ struct kr_zonecut *cut = &qry->zone_cut;
+ const knot_dname_t *cut_name_start = qry->zone_cut.name;
+ bool use_cut = true;
+ if (knot_dname_in_bailiwick(rr->owner, cut_name_start) < 0) {
+ int zone_labels = knot_dname_labels(qry->zone_cut.name, NULL);
+ int matched_labels = knot_dname_matched_labels(qry->zone_cut.name, rr->owner);
+ int skip_labels = zone_labels - matched_labels;
+ while (skip_labels--) {
+ cut_name_start = knot_wire_next_label(cut_name_start, NULL);
+ }
+ /* try to find the name wanted among ancestors */
+ use_cut = false;
+ while (cut->parent) {
+ cut = cut->parent;
+ if (knot_dname_is_equal(cut_name_start, cut->name)) {
+ use_cut = true;
+ break;
+ }
+ };
+ }
+ struct kr_rplan *rplan = &req->rplan;
+ struct kr_query *next = kr_rplan_push(rplan, qry, rr->owner, rr->rclass, KNOT_RRTYPE_RRSIG);
+ if (!next) {
+ return KR_STATE_FAIL;
+ }
+ kr_zonecut_init(&next->zone_cut, cut_name_start, &req->pool);
+ if (use_cut) {
+ kr_zonecut_copy(&next->zone_cut, cut);
+ kr_zonecut_copy_trust(&next->zone_cut, cut);
+ } else {
+ next->flags.AWAIT_CUT = true;
+ }
+ if (qry->flags.FORWARD) {
+ next->flags.AWAIT_CUT = false;
+ }
+ next->flags.DNSSEC_WANT = true;
+ return KR_STATE_YIELD;
+}
+
+static int check_validation_result(kr_layer_t *ctx, ranked_rr_array_t *arr)
+{
+ int ret = KR_STATE_DONE;
+ struct kr_request *req = ctx->req;
+ struct kr_query *qry = req->current_query;
+ ranked_rr_array_entry_t *invalid_entry = NULL;
+ for (size_t i = 0; i < arr->len; ++i) {
+ ranked_rr_array_entry_t *entry = arr->at[i];
+ if (entry->yielded || entry->qry_uid != qry->uid) {
+ continue;
+ }
+ if (kr_rank_test(entry->rank, KR_RANK_MISMATCH)) {
+ invalid_entry = entry;
+ break;
+ } else if (kr_rank_test(entry->rank, KR_RANK_MISSING) &&
+ !invalid_entry) {
+ invalid_entry = entry;
+ } else if (kr_rank_test(entry->rank, KR_RANK_OMIT)) {
+ continue;
+ } else if (!kr_rank_test(entry->rank, KR_RANK_SECURE) &&
+ !invalid_entry) {
+ invalid_entry = entry;
+ }
+ }
+
+ if (!invalid_entry) {
+ return ret;
+ }
+
+ if (!kr_rank_test(invalid_entry->rank, KR_RANK_SECURE) &&
+ (++(invalid_entry->revalidation_cnt) > MAX_REVALIDATION_CNT)) {
+ VERBOSE_MSG(qry, "<= continuous revalidation, fails\n");
+ qry->flags.DNSSEC_BOGUS = true;
+ return KR_STATE_FAIL;
+ }
+
+ const knot_rrset_t *rr = invalid_entry->rr;
+ if (kr_rank_test(invalid_entry->rank, KR_RANK_MISMATCH)) {
+ const knot_dname_t *signer_name = knot_rrsig_signer_name(rr->rrs.rdata);
+ if (knot_dname_in_bailiwick(signer_name, qry->zone_cut.name) > 0) {
+ qry->zone_cut.name = knot_dname_copy(signer_name, &req->pool);
+ qry->flags.AWAIT_CUT = true;
+ } else if (!knot_dname_is_equal(signer_name, qry->zone_cut.name)) {
+ if (qry->zone_cut.parent) {
+ memcpy(&qry->zone_cut, qry->zone_cut.parent, sizeof(qry->zone_cut));
+ } else {
+ qry->flags.AWAIT_CUT = true;
+ }
+ qry->zone_cut.name = knot_dname_copy(signer_name, &req->pool);
+ }
+ VERBOSE_MSG(qry, ">< cut changed (new signer), needs revalidation\n");
+ ret = KR_STATE_YIELD;
+ } else if (kr_rank_test(invalid_entry->rank, KR_RANK_MISSING)) {
+ ret = rrsig_not_found(ctx, rr);
+ } else if (!kr_rank_test(invalid_entry->rank, KR_RANK_SECURE)) {
+ qry->flags.DNSSEC_BOGUS = true;
+ ret = KR_STATE_FAIL;
+ }
+
+ return ret;
+}
+
+static bool check_empty_answer(kr_layer_t *ctx, knot_pkt_t *pkt)
+{
+ struct kr_request *req = ctx->req;
+ struct kr_query *qry = req->current_query;
+ ranked_rr_array_t *arr = &req->answ_selected;
+ size_t num_entries = 0;
+ for (size_t i = 0; i < arr->len; ++i) {
+ ranked_rr_array_entry_t *entry = arr->at[i];
+ const knot_rrset_t *rr = entry->rr;
+ if (rr->type == KNOT_RRTYPE_RRSIG && qry->stype != KNOT_RRTYPE_RRSIG) {
+ continue;
+ }
+ if (entry->qry_uid == qry->uid) {
+ ++num_entries;
+ }
+ }
+ const knot_pktsection_t *an = knot_pkt_section(pkt, KNOT_ANSWER);
+ return ((an->count != 0) && (num_entries == 0)) ? false : true;
+}
+
+static int unsigned_forward(kr_layer_t *ctx, knot_pkt_t *pkt)
+{
+ struct kr_request *req = ctx->req;
+ struct kr_query *qry = req->current_query;
+ const uint16_t qtype = knot_pkt_qtype(pkt);
+ const uint8_t pkt_rcode = knot_wire_get_rcode(pkt->wire);
+ bool nods = false;
+ bool ns_exist = true;
+ for (int i = 0; i < req->rplan.resolved.len; ++i) {
+ struct kr_query *q = req->rplan.resolved.at[i];
+ if (q->sclass == qry->sclass &&
+ q->stype == KNOT_RRTYPE_DS &&
+ knot_dname_is_equal(q->sname, qry->sname)) {
+ nods = true;
+ if (!(q->flags.DNSSEC_OPTOUT)) {
+ int ret = kr_dnssec_matches_name_and_type(&req->auth_selected, q->uid,
+ qry->sname, KNOT_RRTYPE_NS);
+ ns_exist = (ret == kr_ok());
+ }
+ }
+ }
+
+ if (nods && ns_exist && qtype == KNOT_RRTYPE_NS) {
+ qry->flags.DNSSEC_WANT = false;
+ qry->flags.DNSSEC_INSECURE = true;
+ if (qry->forward_flags.CNAME) {
+ assert(qry->cname_parent);
+ qry->cname_parent->flags.DNSSEC_WANT = false;
+ qry->cname_parent->flags.DNSSEC_INSECURE = true;
+ } else if (pkt_rcode == KNOT_RCODE_NOERROR && qry->parent != NULL) {
+ const knot_pktsection_t *sec = knot_pkt_section(pkt, KNOT_ANSWER);
+ const knot_rrset_t *rr = knot_pkt_rr(sec, 0);
+ if (rr->type == KNOT_RRTYPE_NS) {
+ qry->parent->zone_cut.name = knot_dname_copy(rr->owner, &req->pool);
+ qry->parent->flags.DNSSEC_WANT = false;
+ qry->parent->flags.DNSSEC_INSECURE = true;
+ }
+ }
+ while (qry->parent) {
+ qry = qry->parent;
+ qry->flags.DNSSEC_WANT = false;
+ qry->flags.DNSSEC_INSECURE = true;
+ if (qry->forward_flags.CNAME) {
+ assert(qry->cname_parent);
+ qry->cname_parent->flags.DNSSEC_WANT = false;
+ qry->cname_parent->flags.DNSSEC_INSECURE = true;
+ }
+ }
+ return KR_STATE_DONE;
+ }
+
+ if (ctx->state == KR_STATE_YIELD) {
+ return KR_STATE_DONE;
+ }
+
+ if (!nods && qtype != KNOT_RRTYPE_DS) {
+ struct kr_rplan *rplan = &req->rplan;
+ struct kr_query *next = kr_rplan_push(rplan, qry, qry->sname, qry->sclass, KNOT_RRTYPE_DS);
+ if (!next) {
+ return KR_STATE_FAIL;
+ }
+ kr_zonecut_set(&next->zone_cut, qry->zone_cut.name);
+ kr_zonecut_copy_trust(&next->zone_cut, &qry->zone_cut);
+ next->flags.DNSSEC_WANT = true;
+ }
+
+ return KR_STATE_YIELD;
+}
+
+static int check_signer(kr_layer_t *ctx, knot_pkt_t *pkt)
+{
+ struct kr_request *req = ctx->req;
+ struct kr_query *qry = req->current_query;
+ const knot_dname_t *ta_name = qry->zone_cut.trust_anchor ? qry->zone_cut.trust_anchor->owner : NULL;
+ const knot_dname_t *signer = signature_authority(req);
+ if (ta_name && (!signer || !knot_dname_is_equal(ta_name, signer))) {
+ /* check all newly added RRSIGs */
+ if (!signer) {
+ if (qry->flags.FORWARD) {
+ return unsigned_forward(ctx, pkt);
+ }
+ /* Not a DNSSEC-signed response. */
+ if (ctx->state == KR_STATE_YIELD) {
+ /* Already yielded for revalidation.
+ * It means that trust chain is OK and
+ * transition to INSECURE hasn't occurred.
+ * Let the validation logic ask about RRSIG. */
+ return KR_STATE_DONE;
+ }
+ /* Ask parent for DS
+ * to prove transition to INSECURE. */
+ const uint16_t qtype = knot_pkt_qtype(pkt);
+ const knot_dname_t *qname = knot_pkt_qname(pkt);
+ if (qtype == KNOT_RRTYPE_NS &&
+ knot_dname_in_bailiwick(qname, qry->zone_cut.name) > 0) {
+ /* Server is authoritative
+ * for both parent and child,
+ * and child zone is not signed. */
+ qry->zone_cut.name = knot_dname_copy(qname, &req->pool);
+ }
+ } else if (knot_dname_in_bailiwick(signer, qry->zone_cut.name) > 0) {
+ if (!(qry->flags.FORWARD)) {
+ /* Key signer is below current cut, advance and refetch keys. */
+ qry->zone_cut.name = knot_dname_copy(signer, &req->pool);
+ } else {
+ /* Check if DS does not exist. */
+ struct kr_query *q = kr_rplan_find_resolved(&req->rplan, NULL,
+ signer, qry->sclass, KNOT_RRTYPE_DS);
+ if (q && q->flags.DNSSEC_NODS) {
+ qry->flags.DNSSEC_WANT = false;
+ qry->flags.DNSSEC_INSECURE = true;
+ if (qry->parent) {
+ qry->parent->flags.DNSSEC_WANT = false;
+ qry->parent->flags.DNSSEC_INSECURE = true;
+ }
+ } else if (qry->stype != KNOT_RRTYPE_DS) {
+ struct kr_rplan *rplan = &req->rplan;
+ struct kr_query *next = kr_rplan_push(rplan, qry, qry->sname,
+ qry->sclass, KNOT_RRTYPE_DS);
+ if (!next) {
+ return KR_STATE_FAIL;
+ }
+ kr_zonecut_set(&next->zone_cut, qry->zone_cut.name);
+ kr_zonecut_copy_trust(&next->zone_cut, &qry->zone_cut);
+ next->flags.DNSSEC_WANT = true;
+ }
+ }
+ } else if (!knot_dname_is_equal(signer, qry->zone_cut.name)) {
+ /* Key signer is above the current cut, so we can't validate it. This happens when
+ a server is authoritative for both grandparent, parent and child zone.
+ Ascend to parent cut, and refetch authority for signer. */
+ if (qry->zone_cut.parent) {
+ memcpy(&qry->zone_cut, qry->zone_cut.parent, sizeof(qry->zone_cut));
+ } else {
+ qry->flags.AWAIT_CUT = true;
+ }
+ qry->zone_cut.name = knot_dname_copy(signer, &req->pool);
+ }
+
+ /* zone cut matches, but DS/DNSKEY doesn't => refetch. */
+ VERBOSE_MSG(qry, ">< cut changed, needs revalidation\n");
+ if ((qry->flags.FORWARD) && qry->stype != KNOT_RRTYPE_DS) {
+ struct kr_rplan *rplan = &req->rplan;
+ struct kr_query *next = kr_rplan_push(rplan, qry, signer,
+ qry->sclass, KNOT_RRTYPE_DS);
+ if (!next) {
+ return KR_STATE_FAIL;
+ }
+ kr_zonecut_set(&next->zone_cut, qry->zone_cut.name);
+ kr_zonecut_copy_trust(&next->zone_cut, &qry->zone_cut);
+ next->flags.DNSSEC_WANT = true;
+ return KR_STATE_YIELD;
+ }
+ if (!(qry->flags.FORWARD)) {
+ return KR_STATE_YIELD;
+ }
+ }
+ return KR_STATE_DONE;
+}
+
+/** Change ranks of RRs from this single iteration:
+ * _INITIAL or _TRY or _MISSING -> rank_to_set.
+ *
+ * Optionally do this only in a `bailiwick` (if not NULL).
+ * Iterator shouldn't have selected such records, but we check to be sure. */
+static void rank_records(kr_layer_t *ctx, enum kr_rank rank_to_set,
+ const knot_dname_t *bailiwick)
+{
+ struct kr_request *req = ctx->req;
+ struct kr_query *qry = req->current_query;
+ ranked_rr_array_t *ptrs[2] = { &req->answ_selected, &req->auth_selected };
+ for (size_t i = 0; i < 2; ++i) {
+ ranked_rr_array_t *arr = ptrs[i];
+ for (size_t j = 0; j < arr->len; ++j) {
+ ranked_rr_array_entry_t *entry = arr->at[j];
+ if (entry->qry_uid != qry->uid) {
+ continue;
+ }
+ if (bailiwick && knot_dname_in_bailiwick(entry->rr->owner,
+ bailiwick) < 0) {
+ continue;
+ }
+ if (kr_rank_test(entry->rank, KR_RANK_INITIAL)
+ || kr_rank_test(entry->rank, KR_RANK_TRY)
+ || kr_rank_test(entry->rank, KR_RANK_MISSING)) {
+ kr_rank_set(&entry->rank, rank_to_set);
+ }
+ }
+ }
+}
+
+static void check_wildcard(kr_layer_t *ctx)
+{
+ struct kr_request *req = ctx->req;
+ struct kr_query *qry = req->current_query;
+ ranked_rr_array_t *ptrs[2] = { &req->answ_selected, &req->auth_selected };
+
+ for (int i = 0; i < 2; ++i) {
+ ranked_rr_array_t *arr = ptrs[i];
+ for (ssize_t j = 0; j < arr->len; ++j) {
+ ranked_rr_array_entry_t *entry = arr->at[j];
+ const knot_rrset_t *rrsigs = entry->rr;
+
+ if (qry->uid != entry->qry_uid) {
+ continue;
+ }
+
+ if (rrsigs->type != KNOT_RRTYPE_RRSIG) {
+ continue;
+ }
+
+ int owner_labels = knot_dname_labels(rrsigs->owner, NULL);
+
+ knot_rdata_t *rdata_k = rrsigs->rrs.rdata;
+ for (int k = 0; k < rrsigs->rrs.count;
+ ++k, rdata_k = knot_rdataset_next(rdata_k)) {
+ if (knot_rrsig_labels(rdata_k) != owner_labels) {
+ qry->flags.DNSSEC_WEXPAND = true;
+ }
+ }
+ }
+ }
+}
+
+/** Just for wildcard_adjust_to_wire() */
+static bool rr_is_for_wildcard(const ranked_rr_array_entry_t *entry)
+{
+ switch (kr_rrset_type_maysig(entry->rr)) {
+ case KNOT_RRTYPE_NSEC:
+ case KNOT_RRTYPE_NSEC3:
+ return true;
+ default:
+ return false;
+ }
+}
+/** In case of wildcard expansion, mark required authority RRs by to_wire. */
+static int wildcard_adjust_to_wire(struct kr_request *req, const struct kr_query *qry)
+{
+ if (!qry->parent && qry->flags.DNSSEC_WEXPAND) {
+ return kr_ranked_rrarray_set_wire(&req->auth_selected, true,
+ qry->uid, true, &rr_is_for_wildcard);
+ }
+ return kr_ok();
+}
+
+static int validate(kr_layer_t *ctx, knot_pkt_t *pkt)
+{
+ int ret = 0;
+ struct kr_request *req = ctx->req;
+ struct kr_query *qry = req->current_query;
+
+ /* Ignore faulty or unprocessed responses. */
+ if (ctx->state & (KR_STATE_FAIL|KR_STATE_CONSUME)) {
+ return ctx->state;
+ }
+
+ /* Pass-through if user doesn't want secure answer or stub. */
+ /* @todo: Validating stub resolver mode. */
+ if (qry->flags.STUB) {
+ rank_records(ctx, KR_RANK_OMIT, NULL);
+ return ctx->state;
+ }
+ uint8_t pkt_rcode = knot_wire_get_rcode(pkt->wire);
+ if ((qry->flags.FORWARD) &&
+ pkt_rcode != KNOT_RCODE_NOERROR &&
+ pkt_rcode != KNOT_RCODE_NXDOMAIN) {
+ do {
+ qry->flags.DNSSEC_BOGUS = true;
+ if (qry->cname_parent) {
+ qry->cname_parent->flags.DNSSEC_BOGUS = true;
+ }
+ qry = qry->parent;
+ } while (qry);
+ ctx->state = KR_STATE_DONE;
+ return ctx->state;
+ }
+
+ if (!(qry->flags.DNSSEC_WANT)) {
+ const bool is_insec = qry->flags.CACHED && qry->flags.DNSSEC_INSECURE;
+ if ((qry->flags.DNSSEC_INSECURE)) {
+ rank_records(ctx, KR_RANK_INSECURE, qry->zone_cut.name);
+ }
+ if (is_insec && qry->parent != NULL) {
+ /* We have got insecure answer from cache.
+ * Mark parent(s) as insecure. */
+ mark_insecure_parents(qry);
+ VERBOSE_MSG(qry, "<= cached insecure response, going insecure\n");
+ ctx->state = KR_STATE_DONE;
+ } else if (ctx->state == KR_STATE_YIELD) {
+ /* Transition to unsecure state
+ occurred during revalidation.
+ if state remains YIELD, answer will not be cached.
+ Let cache layers to work. */
+ ctx->state = KR_STATE_DONE;
+ }
+ return ctx->state;
+ }
+
+ /* Pass-through if CD bit is set. */
+ if (knot_wire_get_cd(req->qsource.packet->wire)) {
+ check_wildcard(ctx);
+ wildcard_adjust_to_wire(req, qry);
+ rank_records(ctx, KR_RANK_OMIT, NULL);
+ return ctx->state;
+ }
+ /* Answer for RRSIG may not set DO=1, but all records MUST still validate. */
+ bool use_signatures = (knot_pkt_qtype(pkt) != KNOT_RRTYPE_RRSIG);
+ if (!(qry->flags.CACHED) && !knot_pkt_has_dnssec(pkt) && !use_signatures) {
+ VERBOSE_MSG(qry, "<= got insecure response\n");
+ qry->flags.DNSSEC_BOGUS = true;
+ return KR_STATE_FAIL;
+ }
+
+ /* Check if this is a DNSKEY answer, check trust chain and store. */
+ uint16_t qtype = knot_pkt_qtype(pkt);
+ bool has_nsec3 = pkt_has_type(pkt, KNOT_RRTYPE_NSEC3);
+ const knot_pktsection_t *an = knot_pkt_section(pkt, KNOT_ANSWER);
+ const bool referral = (an->count == 0 && !knot_wire_get_aa(pkt->wire));
+
+ if (!(qry->flags.CACHED) && knot_wire_get_aa(pkt->wire)) {
+ /* Check if answer if not empty,
+ * but iterator has not selected any records. */
+ if (!check_empty_answer(ctx, pkt)) {
+ VERBOSE_MSG(qry, "<= no useful RR in authoritative answer\n");
+ qry->flags.DNSSEC_BOGUS = true;
+ return KR_STATE_FAIL;
+ }
+ /* Track difference between current TA and signer name.
+ * This indicates that the NS is auth for both parent-child,
+ * and we must update DS/DNSKEY to validate it.
+ */
+ ret = check_signer(ctx, pkt);
+ if (ret != KR_STATE_DONE) {
+ return ret;
+ }
+ if (qry->flags.FORWARD && qry->flags.DNSSEC_INSECURE) {
+ return KR_STATE_DONE;
+ }
+ }
+
+ if (knot_wire_get_aa(pkt->wire) && qtype == KNOT_RRTYPE_DNSKEY) {
+ ret = validate_keyset(req, pkt, has_nsec3);
+ if (ret == kr_error(EAGAIN)) {
+ VERBOSE_MSG(qry, ">< cut changed, needs revalidation\n");
+ return KR_STATE_YIELD;
+ } else if (ret == kr_error(DNSSEC_INVALID_DS_ALGORITHM)) {
+ VERBOSE_MSG(qry, ">< all DS entries use unsupported algorithm pairs, going insecure\n");
+ /* ^ the message is a bit imprecise to avoid being too verbose */
+ qry->flags.DNSSEC_WANT = false;
+ qry->flags.DNSSEC_INSECURE = true;
+ rank_records(ctx, KR_RANK_INSECURE, qry->zone_cut.name);
+ mark_insecure_parents(qry);
+ return KR_STATE_DONE;
+ } else if (ret != 0) {
+ VERBOSE_MSG(qry, "<= bad keys, broken trust chain\n");
+ qry->flags.DNSSEC_BOGUS = true;
+ return KR_STATE_FAIL;
+ }
+ }
+
+ /* Validate non-existence proof if not positive answer.
+ * In case of CNAME, iterator scheduled a sibling query for the target,
+ * so we just drop the negative piece of information and don't try to prove it.
+ * TODO: not ideal; with aggressive cache we'll at least avoid the extra packet. */
+ if (!qry->flags.CACHED && pkt_rcode == KNOT_RCODE_NXDOMAIN && !qry->flags.CNAME) {
+ /* @todo If knot_pkt_qname(pkt) is used instead of qry->sname then the tests crash. */
+ if (!has_nsec3) {
+ ret = kr_nsec_name_error_response_check(pkt, KNOT_AUTHORITY, qry->sname);
+ } else {
+ ret = kr_nsec3_name_error_response_check(pkt, KNOT_AUTHORITY, qry->sname);
+ }
+ if (has_nsec3 && (ret == kr_error(KNOT_ERANGE))) {
+ /* NXDOMAIN proof is OK,
+ * but NSEC3 that covers next closer name
+ * (or wildcard at next closer name) has opt-out flag.
+ * RFC5155 9.2; AD flag can not be set */
+ qry->flags.DNSSEC_OPTOUT = true;
+ VERBOSE_MSG(qry, "<= can't prove NXDOMAIN due to optout, going insecure\n");
+ } else if (ret != 0) {
+ VERBOSE_MSG(qry, "<= bad NXDOMAIN proof\n");
+ qry->flags.DNSSEC_BOGUS = true;
+ return KR_STATE_FAIL;
+ }
+ }
+
+ /* @todo WTH, this needs API that just tries to find a proof and the caller
+ * doesn't have to worry about NSEC/NSEC3
+ * @todo rework this
+ * CNAME: same as the NXDOMAIN case above */
+ if (!qry->flags.CACHED && pkt_rcode == KNOT_RCODE_NOERROR && !qry->flags.CNAME) {
+ bool no_data = (an->count == 0 && knot_wire_get_aa(pkt->wire));
+ if (no_data) {
+ /* @todo
+ * ? quick mechanism to determine which check to preform first
+ * ? merge the functionality together to share code/resources
+ */
+ if (!has_nsec3) {
+ ret = kr_nsec_existence_denial(pkt, KNOT_AUTHORITY, knot_pkt_qname(pkt), knot_pkt_qtype(pkt));
+ } else {
+ ret = kr_nsec3_no_data(pkt, KNOT_AUTHORITY, knot_pkt_qname(pkt), knot_pkt_qtype(pkt));
+ }
+ if (ret != 0) {
+ if (has_nsec3 && (ret == kr_error(KNOT_ERANGE))) {
+ VERBOSE_MSG(qry, "<= can't prove NODATA due to optout, going insecure\n");
+ qry->flags.DNSSEC_OPTOUT = true;
+ /* Could not return from here,
+ * we must continue, validate NSEC\NSEC3 and
+ * call update_parent_keys() to mark
+ * parent queries as insecured */
+ } else {
+ VERBOSE_MSG(qry, "<= bad NODATA proof\n");
+ qry->flags.DNSSEC_BOGUS = true;
+ return KR_STATE_FAIL;
+ }
+ }
+ }
+ }
+
+ /* Validate all records, fail as bogus if it doesn't match.
+ * Do not revalidate data from cache, as it's already trusted. */
+ if (!(qry->flags.CACHED)) {
+ ret = validate_records(req, pkt, req->rplan.pool, has_nsec3);
+ if (ret != 0) {
+ /* something exceptional - no DNS key, empty pointers etc
+ * normally it shoudn't happen */
+ VERBOSE_MSG(qry, "<= couldn't validate RRSIGs\n");
+ qry->flags.DNSSEC_BOGUS = true;
+ return KR_STATE_FAIL;
+ }
+ /* check validation state and spawn subrequests */
+ if (!req->answ_validated) {
+ ret = check_validation_result(ctx, &req->answ_selected);
+ if (ret != KR_STATE_DONE) {
+ return ret;
+ }
+ }
+ if (!req->auth_validated) {
+ ret = check_validation_result(ctx, &req->auth_selected);
+ if (ret != KR_STATE_DONE) {
+ return ret;
+ }
+ }
+ }
+
+ wildcard_adjust_to_wire(req, qry);
+
+ /* Check and update current delegation point security status. */
+ ret = update_delegation(req, qry, pkt, has_nsec3);
+ if (ret == DNSSEC_NOT_FOUND && qry->stype != KNOT_RRTYPE_DS) {
+ if (ctx->state == KR_STATE_YIELD) {
+ VERBOSE_MSG(qry, "<= can't validate referral\n");
+ qry->flags.DNSSEC_BOGUS = true;
+ return KR_STATE_FAIL;
+ } else {
+ /* Check the trust chain and query DS\DNSKEY if needed. */
+ VERBOSE_MSG(qry, "<= DS\\NSEC was not found, querying for DS\n");
+ return KR_STATE_YIELD;
+ }
+ } else if (ret != 0) {
+ return KR_STATE_FAIL;
+ } else if (pkt_rcode == KNOT_RCODE_NOERROR &&
+ referral &&
+ ((!qry->flags.DNSSEC_WANT && qry->flags.DNSSEC_INSECURE) ||
+ (qry->flags.DNSSEC_NODS))) {
+ /* referral with proven DS non-existance */
+ qtype = KNOT_RRTYPE_DS;
+ }
+ /* Update parent query zone cut */
+ if (qry->parent) {
+ if (update_parent_keys(req, qtype) != 0) {
+ return KR_STATE_FAIL;
+ }
+ }
+
+ if (qry->flags.FORWARD && qry->parent) {
+ if (pkt_rcode == KNOT_RCODE_NXDOMAIN) {
+ qry->parent->forward_flags.NO_MINIMIZE = true;
+ }
+ }
+ VERBOSE_MSG(qry, "<= answer valid, OK\n");
+ return KR_STATE_DONE;
+}
+/** Module implementation. */
+const kr_layer_api_t *validate_layer(struct kr_module *module)
+{
+ static const kr_layer_api_t _layer = {
+ .consume = &validate,
+ };
+ /* Store module reference */
+ return &_layer;
+}
+
+int validate_init(struct kr_module *module)
+{
+ return kr_ok();
+}
+
+KR_MODULE_EXPORT(validate)
+
+#undef VERBOSE_MSG
diff --git a/lib/lib.mk b/lib/lib.mk
new file mode 100644
index 0000000..8ac4e91
--- /dev/null
+++ b/lib/lib.mk
@@ -0,0 +1,110 @@
+libkres_SOURCES := \
+ lib/cache/api.c \
+ lib/cache/cdb_lmdb.c \
+ lib/cache/entry_list.c \
+ lib/cache/entry_pkt.c \
+ lib/cache/entry_rr.c \
+ lib/cache/knot_pkt.c \
+ lib/cache/nsec1.c \
+ lib/cache/nsec3.c \
+ lib/cache/peek.c \
+ lib/dnssec.c \
+ lib/dnssec/nsec.c \
+ lib/dnssec/nsec3.c \
+ lib/dnssec/signature.c \
+ lib/dnssec/ta.c \
+ lib/generic/lru.c \
+ lib/generic/map.c \
+ lib/generic/queue.c \
+ lib/generic/trie.c \
+ lib/layer/cache.c \
+ lib/layer/iterate.c \
+ lib/layer/validate.c \
+ lib/module.c \
+ lib/nsrep.c \
+ lib/resolve.c \
+ lib/rplan.c \
+ lib/utils.c \
+ lib/zonecut.c
+
+libkres_HEADERS := \
+ lib/cache/api.h \
+ lib/cache/cdb_api.h \
+ lib/cache/cdb_lmdb.h \
+ lib/cache/impl.h \
+ lib/defines.h \
+ lib/dnssec.h \
+ lib/dnssec/nsec.h \
+ lib/dnssec/nsec3.h \
+ lib/dnssec/signature.h \
+ lib/dnssec/ta.h \
+ lib/generic/array.h \
+ lib/generic/lru.h \
+ lib/generic/map.h \
+ lib/generic/pack.h \
+ lib/generic/queue.h \
+ lib/generic/trie.h \
+ lib/layer.h \
+ lib/layer/iterate.h \
+ lib/module.h \
+ lib/nsrep.h \
+ lib/resolve.h \
+ lib/rplan.h \
+ lib/utils.h \
+ lib/zonecut.h
+
+# Dependencies
+libkres_DEPEND := $(contrib)
+libkres_CFLAGS := -fPIC $(lmdb_CFLAGS)
+libkres_LIBS := $(contrib_TARGET) $(libknot_LIBS) $(libdnssec_LIBS) $(lmdb_LIBS) \
+ $(libuv_LIBS) $(gnutls_LIBS)
+libkres_TARGET := -L$(abspath lib) -lkres
+
+ifeq ($(ENABLE_COOKIES),yes)
+libkres_SOURCES += \
+ lib/cookies/alg_containers.c \
+ lib/cookies/alg_sha.c \
+ lib/cookies/helper.c \
+ lib/cookies/lru_cache.c \
+ lib/cookies/nonce.c
+
+libkres_HEADERS += \
+ lib/cookies/alg_containers.h \
+ lib/cookies/alg_sha.h \
+ lib/cookies/control.h \
+ lib/cookies/helper.h \
+ lib/cookies/lru_cache.h \
+ lib/cookies/nonce.h
+
+libkres_LIBS += $(nettle_LIBS)
+endif
+
+# Make library
+ifeq ($(BUILDMODE), static)
+$(eval $(call make_static,libkres,lib,yes))
+else
+$(eval $(call make_lib,libkres,lib,yes,$(ABIVER)))
+endif
+
+# Generate pkg-config file
+libkres.pc:
+ @echo 'prefix='$(PREFIX) > $@
+ @echo 'exec_prefix=$${prefix}' >> $@
+ @echo 'libdir='$(LIBDIR) >> $@
+ @echo 'includedir='$(INCLUDEDIR) >> $@
+ @echo 'Name: libkres' >> $@
+ @echo 'Description: Knot Resolver library' >> $@
+ @echo 'URL: https://www.knot-resolver.cz' >> $@
+ @echo 'Version: $(VERSION)' >> $@
+ @echo 'Libs: -L$${libdir} -lkres' >> $@
+ @echo 'Cflags: -I$${includedir}' >> $@
+libkres-pcinstall: libkres.pc libkres-install
+ $(INSTALL) -d -m 755 $(DESTDIR)$(PKGCONFIGDIR)
+ $(INSTALL) -m 644 $< $(DESTDIR)$(PKGCONFIGDIR)
+
+# Targets
+lib: $(libkres)
+lib-install: libkres-install libkres-pcinstall
+lib-clean: libkres-clean
+
+.PHONY: lib lib-install lib-clean libkres.pc
diff --git a/lib/module.c b/lib/module.c
new file mode 100644
index 0000000..68abc84
--- /dev/null
+++ b/lib/module.c
@@ -0,0 +1,170 @@
+/* Copyright (C) 2015-2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include <stdlib.h>
+#include <dlfcn.h>
+#include <pthread.h>
+#include <contrib/cleanup.h>
+
+#include "lib/defines.h"
+#include "lib/utils.h"
+#include "lib/module.h"
+
+/* List of embedded modules */
+const kr_layer_api_t *iterate_layer(struct kr_module *module);
+const kr_layer_api_t *validate_layer(struct kr_module *module);
+const kr_layer_api_t *cache_layer(struct kr_module *module);
+static const struct kr_module embedded_modules[] = {
+ { "iterate", NULL, NULL, NULL, iterate_layer, NULL, NULL, NULL },
+ { "validate", NULL, NULL, NULL, validate_layer, NULL, NULL, NULL },
+ { "cache", NULL, NULL, NULL, cache_layer, NULL, NULL, NULL },
+};
+
+/** Library extension. */
+#if defined(__APPLE__)
+ #define LIBEXT ".dylib"
+#elif _WIN32
+ #define LIBEXT ".dll"
+#else
+ #define LIBEXT ".so"
+#endif
+
+
+/** Load prefixed symbol. */
+static void *load_symbol(void *lib, const char *prefix, const char *name)
+{
+ auto_free char *symbol = kr_strcatdup(2, prefix, name);
+ return dlsym(lib, symbol);
+}
+
+static int load_library(struct kr_module *module, const char *name, const char *path)
+{
+ assert(module && name && path);
+ /* Absolute or relative path (then only library search path is used). */
+ auto_free char *lib_path = kr_strcatdup(4, path, "/", name, LIBEXT);
+ if (lib_path == NULL) {
+ return kr_error(ENOMEM);
+ }
+
+ /* Workaround for buggy _fini/__attribute__((destructor)) and dlclose(),
+ * this keeps the library mapped until the program finishes though. */
+ module->lib = dlopen(lib_path, RTLD_NOW | RTLD_NODELETE);
+ if (module->lib) {
+ return kr_ok();
+ }
+
+ return kr_error(ENOENT);
+}
+
+const struct kr_module * kr_module_embedded(const char *name)
+{
+ for (unsigned i = 0; i < sizeof(embedded_modules)/sizeof(embedded_modules[0]); ++i) {
+ if (strcmp(name, embedded_modules[i].name) == 0)
+ return embedded_modules + i;
+ }
+ return NULL;
+}
+
+/** Load C module symbols. */
+static int load_sym_c(struct kr_module *module, uint32_t api_required)
+{
+ #pragma GCC diagnostic push
+ #pragma GCC diagnostic ignored "-Wpedantic" /* casts after load_symbol() */
+ /* Check if it's embedded first */
+ const struct kr_module *embedded = kr_module_embedded(module->name);
+ if (embedded) {
+ module->init = embedded->init;
+ module->deinit = embedded->deinit;
+ module->config = embedded->config;
+ module->layer = embedded->layer;
+ module->props = embedded->props;
+ return kr_ok();
+ }
+ /* Load dynamic library module */
+ auto_free char *m_prefix = kr_strcatdup(2, module->name, "_");
+
+ /* Check ABI version, return error on mismatch. */
+ module_api_cb *api = load_symbol(module->lib, m_prefix, "api");
+ if (api == NULL) {
+ return kr_error(ENOENT);
+ }
+ if (api() != api_required) {
+ return kr_error(ENOTSUP);
+ }
+
+ /* Load ABI by symbol names. */
+ #define ML(symname) module->symname = \
+ load_symbol(module->lib, m_prefix, #symname)
+ ML(init);
+ ML(deinit);
+ ML(config);
+ ML(layer);
+ ML(props);
+ #undef ML
+
+ return kr_ok();
+ #pragma GCC diagnostic pop
+}
+
+int kr_module_load(struct kr_module *module, const char *name, const char *path)
+{
+ if (module == NULL || name == NULL) {
+ return kr_error(EINVAL);
+ }
+
+ /* Initialize, keep userdata */
+ void *data = module->data;
+ memset(module, 0, sizeof(struct kr_module));
+ module->data = data;
+ module->name = strdup(name);
+ if (module->name == NULL) {
+ return kr_error(ENOMEM);
+ }
+
+ /* Search for module library. */
+ if (!path || load_library(module, name, path) != 0) {
+ module->lib = RTLD_DEFAULT;
+ }
+
+ /* Try to load module ABI. */
+ int ret = load_sym_c(module, KR_MODULE_API);
+ if (ret == 0 && module->init) {
+ ret = module->init(module);
+ }
+ if (ret != 0) {
+ kr_module_unload(module);
+ }
+
+ return ret;
+}
+
+void kr_module_unload(struct kr_module *module)
+{
+ if (module == NULL) {
+ return;
+ }
+
+ if (module->deinit) {
+ module->deinit(module);
+ }
+
+ if (module->lib && module->lib != RTLD_DEFAULT) {
+ dlclose(module->lib);
+ }
+
+ free(module->name);
+ memset(module, 0, sizeof(struct kr_module));
+}
diff --git a/lib/module.h b/lib/module.h
new file mode 100644
index 0000000..295c1f9
--- /dev/null
+++ b/lib/module.h
@@ -0,0 +1,111 @@
+/* Copyright (C) 2015-2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+/** @file
+ * Module API definition and functions for (un)loading modules.
+ */
+
+#pragma once
+
+#include "lib/defines.h"
+#include "lib/utils.h"
+#include "lib/layer.h"
+
+struct kr_module;
+struct kr_prop;
+
+
+/**
+ * Export module API version (place this at the end of your module).
+ *
+ * @param module module name (f.e. hints)
+ */
+#define KR_MODULE_EXPORT(module) \
+ KR_EXPORT uint32_t module ## _api() { return KR_MODULE_API; }
+#define KR_MODULE_API ((uint32_t) 0x20180401)
+
+typedef uint32_t (module_api_cb)(void);
+
+
+/**
+ * Module representation.
+ *
+ * The five symbols (init, ...) may be defined by the module as name_init(), etc;
+ * all are optional and missing symbols are represented as NULLs;
+ */
+struct kr_module {
+ char *name;
+
+ /** Constructor. Called after loading the module. @return error code. */
+ int (*init)(struct kr_module *self);
+ /** Destructor. Called before unloading the module. @return error code. */
+ int (*deinit)(struct kr_module *self);
+ /** Configure with encoded JSON (NULL if missing). @return error code. */
+ int (*config)(struct kr_module *self, const char *input);
+ /** Get a pointer to packet processing API specs. See docs on that type. */
+ const kr_layer_api_t * (*layer)(struct kr_module *self);
+ /** Get a pointer to list of properties, terminated by { NULL, NULL, NULL }. */
+ const struct kr_prop * (*props)(void);
+
+ void *lib; /**< Shared library handle or RTLD_DEFAULT */
+ void *data; /**< Custom data context. */
+};
+
+/**
+ * Module property callback. Input and output is passed via a JSON encoded in a string.
+ *
+ * @param env pointer to the lua engine, i.e. struct engine *env (TODO: explicit type)
+ * @param input parameter (NULL if missing/nil on lua level)
+ * @return a free-form JSON output (malloc-ated)
+ * @note see l_trampoline() implementation for details about the input/output conversion.
+ */
+typedef char *(kr_prop_cb)(void *env, struct kr_module *self, const char *input);
+
+/**
+ * Module property (named callable).
+ */
+struct kr_prop {
+ kr_prop_cb *cb;
+ const char *name;
+ const char *info;
+};
+
+
+/**
+ * Load a C module instance into memory.
+ *
+ * @param module module structure
+ * @param name module name
+ * @param path module search path
+ * @return 0 or an error
+ */
+KR_EXPORT
+int kr_module_load(struct kr_module *module, const char *name, const char *path);
+
+/**
+ * Unload module instance.
+ *
+ * @param module module structure
+ */
+KR_EXPORT
+void kr_module_unload(struct kr_module *module);
+
+/**
+ * Get embedded module prototype by name (or NULL).
+ */
+KR_EXPORT
+const struct kr_module * kr_module_embedded(const char *name);
+
diff --git a/lib/nsrep.c b/lib/nsrep.c
new file mode 100644
index 0000000..2af12f2
--- /dev/null
+++ b/lib/nsrep.c
@@ -0,0 +1,563 @@
+/* Copyright (C) 2014-2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include <assert.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <netdb.h>
+
+#include <arpa/inet.h>
+
+#include "lib/nsrep.h"
+#include "lib/rplan.h"
+#include "lib/resolve.h"
+#include "lib/defines.h"
+#include "lib/generic/pack.h"
+#include "contrib/ucw/lib.h"
+
+/** Some built-in unfairness ... */
+#ifndef FAVOUR_IPV6
+#define FAVOUR_IPV6 20 /* 20ms bonus for v6 */
+#endif
+
+/** @internal Macro to set address structure. */
+#define ADDR_SET(sa, family, addr, len, port) do {\
+ memcpy(&sa ## _addr, (addr), (len)); \
+ sa ## _family = (family); \
+ sa ## _port = htons(port); \
+} while (0)
+
+/** Update nameserver representation with current name/address pair. */
+static void update_nsrep(struct kr_nsrep *ns, size_t pos, uint8_t *addr, size_t addr_len, int port)
+{
+ if (addr == NULL) {
+ ns->addr[pos].ip.sa_family = AF_UNSPEC;
+ return;
+ }
+
+ /* Rotate previous addresses to the right. */
+ memmove(ns->addr + pos + 1, ns->addr + pos, (KR_NSREP_MAXADDR - pos - 1) * sizeof(ns->addr[0]));
+
+ switch(addr_len) {
+ case sizeof(struct in_addr):
+ ADDR_SET(ns->addr[pos].ip4.sin, AF_INET, addr, addr_len, port); break;
+ case sizeof(struct in6_addr):
+ ADDR_SET(ns->addr[pos].ip6.sin6, AF_INET6, addr, addr_len, port); break;
+ default: assert(0); break;
+ }
+}
+
+static void update_nsrep_set(struct kr_nsrep *ns, const knot_dname_t *name, uint8_t *addr[], unsigned score)
+{
+ /* NSLIST is not empty, empty NS cannot be a leader. */
+ if (!addr[0] && ns->addr[0].ip.sa_family != AF_UNSPEC) {
+ return;
+ }
+ /* Set new NS leader */
+ ns->name = name;
+ ns->score = score;
+ for (size_t i = 0; i < KR_NSREP_MAXADDR; ++i) {
+ if (addr[i]) {
+ void *addr_val = pack_obj_val(addr[i]);
+ size_t len = pack_obj_len(addr[i]);
+ update_nsrep(ns, i, addr_val, len, KR_DNS_PORT);
+ } else {
+ break;
+ }
+ }
+}
+
+#undef ADDR_SET
+
+/**
+ * \param addr_set pack with one IP address per element */
+static unsigned eval_addr_set(const pack_t *addr_set, struct kr_context *ctx,
+ struct kr_qflags opts, unsigned score, uint8_t *addr[])
+{
+ kr_nsrep_rtt_lru_t *rtt_cache = ctx->cache_rtt;
+ kr_nsrep_rtt_lru_entry_t *rtt_cache_entry_ptr[KR_NSREP_MAXADDR] = { NULL, };
+ assert (KR_NSREP_MAXADDR >= 2);
+ unsigned rtt_cache_entry_score[KR_NSREP_MAXADDR] = { score, KR_NS_MAX_SCORE + 1, };
+ uint64_t now = kr_now();
+
+ /* Name server is better candidate if it has address record. */
+ for (uint8_t *it = pack_head(*addr_set); it != pack_tail(*addr_set);
+ it = pack_obj_next(it)) {
+ void *val = pack_obj_val(it);
+ size_t len = pack_obj_len(it);
+ unsigned favour = 0;
+ bool is_valid = false;
+ /* Check if the address isn't disabled. */
+ if (len == sizeof(struct in6_addr)) {
+ is_valid = !(opts.NO_IPV6);
+ favour = FAVOUR_IPV6;
+ } else if (len == sizeof(struct in_addr)) {
+ is_valid = !(opts.NO_IPV4);
+ } else {
+ assert(!EINVAL);
+ is_valid = false;
+ }
+
+ if (!is_valid) {
+ continue;
+ }
+
+ /* Get score for the current address. */
+ kr_nsrep_rtt_lru_entry_t *cached = rtt_cache ?
+ lru_get_try(rtt_cache, val, len) :
+ NULL;
+ unsigned cur_addr_score = KR_NS_GLUED;
+ if (cached) {
+ cur_addr_score = cached->score;
+ if (cached->score >= KR_NS_TIMEOUT) {
+ /* If NS once was marked as "timeouted",
+ * it won't participate in NS elections
+ * at least ctx->cache_rtt_tout_retry_interval milliseconds. */
+ uint64_t elapsed = now - cached->tout_timestamp;
+ elapsed = elapsed > UINT_MAX ? UINT_MAX : elapsed;
+ if (elapsed > ctx->cache_rtt_tout_retry_interval) {
+ /* Select this NS for probing in this particular query,
+ * but don't change the cached score.
+ * For other queries this NS will remain "timeouted". */
+ cur_addr_score = KR_NS_LONG - 1;
+ }
+ }
+ }
+
+ for (size_t i = 0; i < KR_NSREP_MAXADDR; ++i) {
+ if (cur_addr_score >= KR_NS_TIMEOUT) {
+ /* We can't use favour here.
+ * If all of the conditions below are true
+ *
+ * rtt_cache_entry_score[i] < KR_NS_TIMEOUT
+ * rtt_cache_entry_score[i] + favour > KR_NS_TIMEOUT
+ * cur_addr_score < rtt_cache_entry_score[i] + favour
+ *
+ * we will prefer "certainly dead" cur_addr_score
+ * instead of "almost dead, but alive" rtt_cache_entry_score[i]
+ */
+ if (cur_addr_score >= rtt_cache_entry_score[i]) {
+ continue;
+ }
+ }
+ if (cur_addr_score >= KR_NS_TIMEOUT
+ || cur_addr_score < rtt_cache_entry_score[i] + favour) {
+ /* Shake down previous contenders */
+ for (size_t j = KR_NSREP_MAXADDR - 1; j > i; --j) {
+ addr[j] = addr[j - 1];
+ rtt_cache_entry_ptr[j] = rtt_cache_entry_ptr[j - 1];
+ rtt_cache_entry_score[j] = rtt_cache_entry_score[j - 1];
+ }
+ addr[i] = it;
+ rtt_cache_entry_score[i] = cur_addr_score;
+ rtt_cache_entry_ptr[i] = cached;
+ break;
+ }
+ }
+ }
+
+ /* At this point, rtt_cache_entry_ptr contains up to KR_NSREP_MAXADDR
+ * pointers to the rtt cache entries with the best scores for the given addr_set.
+ * Check if there are timeouted NS. */
+
+ for (size_t i = 0; i < KR_NSREP_MAXADDR; ++i) {
+ if (rtt_cache_entry_ptr[i] == NULL)
+ continue;
+ if (rtt_cache_entry_ptr[i]->score < KR_NS_TIMEOUT)
+ continue;
+
+ uint64_t elapsed = now - rtt_cache_entry_ptr[i]->tout_timestamp;
+ elapsed = elapsed > UINT_MAX ? UINT_MAX : elapsed;
+ if (elapsed <= ctx->cache_rtt_tout_retry_interval)
+ continue;
+
+ /* rtt_cache_entry_ptr[i] points to "timeouted" rtt cache entry.
+ * The period of the ban on participation in elections has expired. */
+
+ if (VERBOSE_STATUS) {
+ void *val = pack_obj_val(addr[i]);
+ size_t len = pack_obj_len(addr[i]);
+ char sa_str[INET6_ADDRSTRLEN];
+ int af = (len == sizeof(struct in6_addr)) ? AF_INET6 : AF_INET;
+ inet_ntop(af, val, sa_str, sizeof(sa_str));
+ kr_log_verbose("[ ][nsre] probing timeouted NS: %s, score %i\n",
+ sa_str, rtt_cache_entry_ptr[i]->score);
+ }
+
+ rtt_cache_entry_ptr[i]->tout_timestamp = now;
+ }
+
+ return rtt_cache_entry_score[0];
+}
+
+static int eval_nsrep(const knot_dname_t *owner, const pack_t *addr_set, struct kr_query *qry)
+{
+ struct kr_nsrep *ns = &qry->ns;
+ struct kr_context *ctx = ns->ctx;
+ unsigned score = KR_NS_MAX_SCORE;
+ unsigned reputation = 0;
+ uint8_t *addr_choice[KR_NSREP_MAXADDR] = { NULL, };
+
+ /* Fetch NS reputation */
+ if (ctx->cache_rep) {
+ unsigned *cached = lru_get_try(ctx->cache_rep, (const char *)owner,
+ knot_dname_size(owner));
+ if (cached) {
+ reputation = *cached;
+ }
+ }
+
+ /* Favour nameservers with unknown addresses to probe them,
+ * otherwise discover the current best address for the NS. */
+ if (addr_set->len == 0) {
+ score = KR_NS_UNKNOWN;
+ /* If the server doesn't have IPv6, give it disadvantage. */
+ if (reputation & KR_NS_NOIP6) {
+ score += FAVOUR_IPV6;
+ /* If the server is unknown but has rep record, treat it as timeouted */
+ if (reputation & KR_NS_NOIP4) {
+ score = KR_NS_UNKNOWN;
+ /* Try to start with clean slate */
+ if (!(qry->flags.NO_IPV6)) {
+ reputation &= ~KR_NS_NOIP6;
+ }
+ if (!(qry->flags.NO_IPV4)) {
+ reputation &= ~KR_NS_NOIP4;
+ }
+ }
+ }
+ } else {
+ score = eval_addr_set(addr_set, ctx, qry->flags, score, addr_choice);
+ }
+
+ /* Probabilistic bee foraging strategy (naive).
+ * The fastest NS is preferred by workers until it is depleted (timeouts or degrades),
+ * at the same time long distance scouts probe other sources (low probability).
+ * Servers on TIMEOUT will not have probed at all.
+ * Servers with score above KR_NS_LONG will have periodically removed from
+ * reputation cache, so that kresd can reprobe them. */
+ if (score >= KR_NS_TIMEOUT) {
+ return kr_ok();
+ } else if (score <= ns->score &&
+ (score < KR_NS_LONG || qry->flags.NO_THROTTLE)) {
+ update_nsrep_set(ns, owner, addr_choice, score);
+ ns->reputation = reputation;
+ } else if (kr_rand_coin(1, 10) &&
+ !kr_rand_coin(score, KR_NS_MAX_SCORE)) {
+ /* With 10% chance probe server with a probability
+ * given by its RTT / MAX_RTT. */
+ update_nsrep_set(ns, owner, addr_choice, score);
+ ns->reputation = reputation;
+ return 1; /* Stop evaluation */
+ } else if (ns->score > KR_NS_MAX_SCORE) {
+ /* Check if any server was already selected.
+ * If no, pick current server and continue evaluation. */
+ update_nsrep_set(ns, owner, addr_choice, score);
+ ns->reputation = reputation;
+ }
+
+ return kr_ok();
+}
+
+int kr_nsrep_set(struct kr_query *qry, size_t index, const struct sockaddr *sock)
+{
+ if (!qry) {
+ return kr_error(EINVAL);
+ }
+ if (index >= KR_NSREP_MAXADDR) {
+ return kr_error(ENOSPC);
+ }
+
+ if (!sock) {
+ qry->ns.name = (const uint8_t *)"";
+ qry->ns.addr[index].ip.sa_family = AF_UNSPEC;
+ return kr_ok();
+ }
+
+ switch (sock->sa_family) {
+ case AF_INET:
+ if (qry->flags.NO_IPV4) {
+ return kr_error(ENOENT);
+ }
+ qry->ns.addr[index].ip4 = *(const struct sockaddr_in *)sock;
+ break;
+ case AF_INET6:
+ if (qry->flags.NO_IPV6) {
+ return kr_error(ENOENT);
+ }
+ qry->ns.addr[index].ip6 = *(const struct sockaddr_in6 *)sock;
+ break;
+ default:
+ qry->ns.addr[index].ip.sa_family = AF_UNSPEC;
+ return kr_error(EINVAL);
+ }
+
+ qry->ns.name = (const uint8_t *)"";
+ /* Reset score on first entry */
+ if (index == 0) {
+ qry->ns.score = KR_NS_UNKNOWN;
+ qry->ns.reputation = 0;
+ }
+
+ /* Retrieve RTT from cache */
+ struct kr_context *ctx = qry->ns.ctx;
+ kr_nsrep_rtt_lru_entry_t *rtt_cache_entry = ctx
+ ? lru_get_try(ctx->cache_rtt, kr_inaddr(sock), kr_family_len(sock->sa_family))
+ : NULL;
+ if (rtt_cache_entry) {
+ qry->ns.score = MIN(qry->ns.score, rtt_cache_entry->score);
+ }
+
+ return kr_ok();
+}
+
+#define ELECT_INIT(ns, ctx_) do { \
+ (ns)->ctx = (ctx_); \
+ (ns)->addr[0].ip.sa_family = AF_UNSPEC; \
+ (ns)->reputation = 0; \
+ (ns)->score = KR_NS_MAX_SCORE + 1; \
+} while (0)
+
+int kr_nsrep_elect(struct kr_query *qry, struct kr_context *ctx)
+{
+ if (!qry || !ctx) {
+ //assert(!EINVAL);
+ return kr_error(EINVAL);
+ }
+
+ struct kr_nsrep *ns = &qry->ns;
+ ELECT_INIT(ns, ctx);
+
+ int ret = kr_ok();
+ trie_it_t *it;
+ for (it = trie_it_begin(qry->zone_cut.nsset); !trie_it_finished(it);
+ trie_it_next(it)) {
+ ret = eval_nsrep(/* we trust it's a correct dname */
+ (const knot_dname_t *)trie_it_key(it, NULL),
+ (const pack_t *)*trie_it_val(it), qry);
+ if (ret) break;
+ }
+ trie_it_free(it);
+
+ if (qry->ns.score <= KR_NS_MAX_SCORE && qry->ns.score >= KR_NS_LONG) {
+ /* This is a low-reliability probe,
+ * go with TCP to get ICMP reachability check. */
+ qry->flags.TCP = true;
+ }
+ return ret;
+}
+
+int kr_nsrep_elect_addr(struct kr_query *qry, struct kr_context *ctx)
+{
+ if (!qry || !ctx) {
+ //assert(!EINVAL);
+ return kr_error(EINVAL);
+ }
+
+ /* Get address list for this NS */
+ struct kr_nsrep *ns = &qry->ns;
+ ELECT_INIT(ns, ctx);
+ pack_t *addr_set = kr_zonecut_find(&qry->zone_cut, ns->name);
+ if (!addr_set) {
+ return kr_error(ENOENT);
+ }
+ /* Evaluate addr list */
+ uint8_t *addr_choice[KR_NSREP_MAXADDR] = { NULL, };
+ unsigned score = eval_addr_set(addr_set, ctx, qry->flags, ns->score, addr_choice);
+ update_nsrep_set(ns, ns->name, addr_choice, score);
+ return kr_ok();
+}
+
+#undef ELECT_INIT
+
+int kr_nsrep_update_rtt(struct kr_nsrep *ns, const struct sockaddr *addr,
+ unsigned score, kr_nsrep_rtt_lru_t *cache, int umode)
+{
+ if (!cache || umode > KR_NS_MAX || umode < 0) {
+ return kr_error(EINVAL);
+ }
+
+ /* Get `addr`, and later its raw string. */
+ if (addr) {
+ /* Caller provided specific address, OK. */
+ } else if (ns != NULL) {
+ addr = &ns->addr[0].ip;
+ } else {
+ assert(false && "kr_nsrep_update_rtt: don't know what address to update");
+ return kr_error(EINVAL);
+ }
+ const char *addr_in = kr_inaddr(addr);
+ size_t addr_len = kr_inaddr_len(addr);
+ if (!addr_in || addr_len <= 0) {
+ assert(false && "kr_nsrep_update_rtt: incorrect address");
+ return kr_error(EINVAL);
+ }
+
+ bool is_new_entry = false;
+ kr_nsrep_rtt_lru_entry_t *cur = lru_get_new(cache, addr_in, addr_len,
+ (&is_new_entry));
+ if (!cur) {
+ return kr_ok();
+ }
+ if (score <= KR_NS_GLUED) {
+ score = KR_NS_GLUED + 1;
+ }
+ /* If there's nothing to update, we reset it unless KR_NS_UPDATE_NORESET
+ * mode was requested. New items are zeroed by LRU automatically. */
+ if (is_new_entry && umode != KR_NS_UPDATE_NORESET) {
+ umode = KR_NS_RESET;
+ }
+ unsigned new_score = 0;
+ /* Update score, by default smooth over last two measurements. */
+ switch (umode) {
+ case KR_NS_UPDATE:
+ case KR_NS_UPDATE_NORESET:
+ new_score = (cur->score + score) / 2; break;
+ case KR_NS_RESET: new_score = score; break;
+ case KR_NS_ADD: new_score = MIN(KR_NS_MAX_SCORE - 1, cur->score + score); break;
+ case KR_NS_MAX: new_score = MAX(cur->score, score); break;
+ default: return kr_error(EINVAL);
+ }
+ /* Score limits */
+ if (new_score > KR_NS_MAX_SCORE) {
+ new_score = KR_NS_MAX_SCORE;
+ }
+ if (new_score >= KR_NS_TIMEOUT && cur->score < KR_NS_TIMEOUT) {
+ /* Set the timestamp only when NS became "timeouted" */
+ cur->tout_timestamp = kr_now();
+ }
+ cur->score = new_score;
+ return kr_ok();
+}
+
+int kr_nsrep_update_rep(struct kr_nsrep *ns, unsigned reputation, kr_nsrep_lru_t *cache)
+{
+ if (!ns || !cache ) {
+ return kr_error(EINVAL);
+ }
+
+ /* Store in the struct */
+ ns->reputation = reputation;
+ /* Store reputation in the LRU cache */
+ unsigned *cur = lru_get_new(cache, (const char *)ns->name,
+ knot_dname_size(ns->name), NULL);
+ if (cur) {
+ *cur = reputation;
+ }
+ return kr_ok();
+}
+
+int kr_nsrep_copy_set(struct kr_nsrep *dst, const struct kr_nsrep *src)
+{
+ if (!dst || !src ) {
+ return kr_error(EINVAL);
+ }
+
+ memcpy(dst, src, sizeof(struct kr_nsrep));
+ dst->name = (const uint8_t *)"";
+ dst->score = KR_NS_UNKNOWN;
+ dst->reputation = 0;
+
+ return kr_ok();
+}
+
+int kr_nsrep_sort(struct kr_nsrep *ns, struct kr_context *ctx)
+{
+ if (!ns || !ctx) {
+ assert(false);
+ return kr_error(EINVAL);
+ }
+
+ kr_nsrep_rtt_lru_t *rtt_cache = ctx->cache_rtt;
+
+ ns->reputation = 0;
+ ns->score = KR_NS_MAX_SCORE + 1;
+
+ if (ns->addr[0].ip.sa_family == AF_UNSPEC) {
+ return kr_error(EINVAL);
+ }
+
+ /* Compute the scores. Unfortunately there's no space for scores
+ * along the addresses. */
+ unsigned scores[KR_NSREP_MAXADDR];
+ int i;
+ bool timeouted_address_is_already_selected = false;
+ for (i = 0; i < KR_NSREP_MAXADDR; ++i) {
+ const struct sockaddr *sa = &ns->addr[i].ip;
+ if (sa->sa_family == AF_UNSPEC) {
+ break;
+ }
+ kr_nsrep_rtt_lru_entry_t *rtt_cache_entry = lru_get_try(rtt_cache,
+ kr_inaddr(sa),
+ kr_family_len(sa->sa_family));
+ if (!rtt_cache_entry) {
+ scores[i] = 1; /* prefer unknown to probe RTT */
+ } else if (rtt_cache_entry->score < KR_NS_FWD_TIMEOUT) {
+ /* some probability to bump bad ones up for re-probe */
+ scores[i] = rtt_cache_entry->score;
+ /* The lower the rtt, the more likely it will be selected. */
+ if (!kr_rand_coin(rtt_cache_entry->score, KR_NS_FWD_TIMEOUT)) {
+ scores[i] = 1;
+ }
+ } else {
+ uint64_t now = kr_now();
+ uint64_t elapsed = now - rtt_cache_entry->tout_timestamp;
+ scores[i] = KR_NS_MAX_SCORE + 1;
+ elapsed = elapsed > UINT_MAX ? UINT_MAX : elapsed;
+ if (elapsed > ctx->cache_rtt_tout_retry_interval &&
+ !timeouted_address_is_already_selected) {
+ scores[i] = 1;
+ rtt_cache_entry->tout_timestamp = now;
+ timeouted_address_is_already_selected = true;
+ }
+ }
+
+ /* Give advantage to IPv6. */
+ if (scores[i] <= KR_NS_MAX_SCORE && sa->sa_family == AF_INET) {
+ scores[i] += FAVOUR_IPV6;
+ }
+
+ if (VERBOSE_STATUS) {
+ kr_log_verbose("[ ][nsre] score %d for %s;\t cached RTT: %d\n",
+ scores[i], kr_straddr(sa),
+ rtt_cache_entry ? rtt_cache_entry->score : -1);
+ }
+ }
+
+ /* Select-sort the addresses. */
+ const int count = i;
+ for (i = 0; i < count - 1; ++i) {
+ /* find min from i onwards */
+ int min_i = i;
+ for (int j = i + 1; j < count; ++j) {
+ if (scores[j] < scores[min_i]) {
+ min_i = j;
+ }
+ }
+ /* swap the indices */
+ if (min_i != i) {
+ SWAP(scores[min_i], scores[i]);
+ SWAP(ns->addr[min_i], ns->addr[i]);
+ }
+ }
+
+ if (count > 0) {
+ ns->score = scores[0];
+ ns->reputation = 0;
+ }
+
+ return kr_ok();
+}
diff --git a/lib/nsrep.h b/lib/nsrep.h
new file mode 100644
index 0000000..cc35ca4
--- /dev/null
+++ b/lib/nsrep.h
@@ -0,0 +1,188 @@
+/* Copyright (C) 2014-2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <netinet/in.h>
+#include <sys/socket.h>
+#include <libknot/dname.h>
+#include <limits.h>
+
+#include "lib/defines.h"
+#include "lib/generic/lru.h"
+
+struct kr_query;
+
+/**
+ * NS RTT score (special values).
+ * @note RTT is measured in milliseconds.
+ */
+enum kr_ns_score {
+ KR_NS_MAX_SCORE = 20 * KR_CONN_RTT_MAX, /* max possible value */
+ KR_NS_FWD_TIMEOUT = (95 * 10000) / 100, /* timeout for upstream recursor,
+ * 95 percents from max resolution time */
+ KR_NS_TIMEOUT = (95 * KR_CONN_RTT_MAX) / 100, /* timeout for upstream auth */
+ KR_NS_LONG = (3 * KR_NS_TIMEOUT) / 4,
+ KR_NS_UNKNOWN = KR_NS_TIMEOUT / 2,
+ KR_NS_PENALTY = 100,
+ KR_NS_GLUED = 10
+};
+
+/**
+ * See kr_nsrep_update_rtt()
+ */
+#define KR_NS_DEAD (((KR_NS_TIMEOUT * 4) + 3) / 3)
+#define KR_NS_FWD_DEAD (((KR_NS_FWD_TIMEOUT * 4) + 3) / 3)
+
+/** If once NS was marked as "timeouted", it won't participate in NS elections
+ * at least KR_NS_TIMEOUT_RETRY_INTERVAL milliseconds (now: one second). */
+#define KR_NS_TIMEOUT_RETRY_INTERVAL 1000
+
+/**
+ * NS QoS flags.
+ */
+enum kr_ns_rep {
+ KR_NS_NOIP4 = 1 << 0, /**< NS has no IPv4 */
+ KR_NS_NOIP6 = 1 << 1, /**< NS has no IPv6 */
+ KR_NS_NOEDNS = 1 << 2 /**< NS has no EDNS support */
+};
+
+/**
+ * NS RTT update modes.
+ * First update is always KR_NS_RESET unless
+ * KR_NS_UPDATE_NORESET mode had choosen.
+ */
+enum kr_ns_update_mode {
+ KR_NS_UPDATE = 0, /**< Update as smooth over last two measurements */
+ KR_NS_UPDATE_NORESET, /**< Same as KR_NS_UPDATE, but disable fallback to
+ * KR_NS_RESET on newly added entries.
+ * Zero is used as initial value. */
+ KR_NS_RESET, /**< Set to given value */
+ KR_NS_ADD, /**< Increment current value */
+ KR_NS_MAX /**< Set to maximum of current/proposed value. */
+};
+
+struct kr_nsrep_rtt_lru_entry {
+ unsigned score; /* combined rtt */
+ uint64_t tout_timestamp; /* The time when score became
+ * greater or equal then KR_NS_TIMEOUT.
+ * Is meaningful only when score >= KR_NS_TIMEOUT */
+};
+
+typedef struct kr_nsrep_rtt_lru_entry kr_nsrep_rtt_lru_entry_t;
+
+/**
+ * NS QoS tracking.
+ */
+typedef lru_t(kr_nsrep_rtt_lru_entry_t) kr_nsrep_rtt_lru_t;
+
+/**
+ * NS reputation tracking.
+ */
+typedef lru_t(unsigned) kr_nsrep_lru_t;
+
+/* Maximum count of addresses probed in one go (last is left empty) */
+#define KR_NSREP_MAXADDR 4
+
+/**
+ * Name server representation.
+ * Contains extra information about the name server, e.g. score
+ * or other metadata.
+ */
+struct kr_nsrep
+{
+ unsigned score; /**< NS score */
+ unsigned reputation; /**< NS reputation */
+ const knot_dname_t *name; /**< NS name */
+ struct kr_context *ctx; /**< Resolution context */
+ union inaddr addr[KR_NSREP_MAXADDR]; /**< NS address(es) */
+};
+
+/**
+ * Set given NS address. (Very low-level access to the list.)
+ * @param qry updated query
+ * @param index index of the updated target
+ * @param sock socket address to use (sockaddr_in or sockaddr_in6 or NULL)
+ * @return 0 or an error code, in particular kr_error(ENOENT) for net.ipvX
+ */
+KR_EXPORT
+int kr_nsrep_set(struct kr_query *qry, size_t index, const struct sockaddr *sock);
+
+/**
+ * Elect best nameserver/address pair from the nsset.
+ * @param qry updated query
+ * @param ctx resolution context
+ * @return 0 or an error code
+ */
+KR_EXPORT
+int kr_nsrep_elect(struct kr_query *qry, struct kr_context *ctx);
+
+/**
+ * Elect best nameserver/address pair from the nsset.
+ * @param qry updated query
+ * @param ctx resolution context
+ * @return 0 or an error code
+ */
+KR_EXPORT
+int kr_nsrep_elect_addr(struct kr_query *qry, struct kr_context *ctx);
+
+/**
+ * Update NS address RTT information.
+ *
+ * @brief In KR_NS_UPDATE mode reputation is smoothed over last N measurements.
+ *
+ * @param ns updated NS representation
+ * @param addr chosen address (NULL for first)
+ * @param score new score (i.e. RTT), see enum kr_ns_score
+ * @param cache RTT LRU cache
+ * @param umode update mode (KR_NS_UPDATE or KR_NS_RESET or KR_NS_ADD)
+ * @return 0 on success, error code on failure
+ */
+KR_EXPORT
+int kr_nsrep_update_rtt(struct kr_nsrep *ns, const struct sockaddr *addr,
+ unsigned score, kr_nsrep_rtt_lru_t *cache, int umode);
+
+/**
+ * Update NSSET reputation information.
+ *
+ * @param ns updated NS representation
+ * @param reputation combined reputation flags, see enum kr_ns_rep
+ * @param cache LRU cache
+ * @return 0 on success, error code on failure
+ */
+KR_EXPORT
+int kr_nsrep_update_rep(struct kr_nsrep *ns, unsigned reputation, kr_nsrep_lru_t *cache);
+/**
+ * Copy NSSET reputation information and resets score.
+ *
+ * @param dst updated NS representation
+ * @param src source NS representation
+ * @return 0 on success, error code on failure
+ */
+int kr_nsrep_copy_set(struct kr_nsrep *dst, const struct kr_nsrep *src);
+
+/**
+ * Sort addresses in the query nsrep list by cached RTT.
+ * if RTT is greater then KR_NS_TIMEOUT, address will placed at the beginning of the
+ * nsrep list once in cache.ns_tout() milliseconds. Otherwise it will be sorted
+ * as if it has cached RTT equal to KR_NS_MAX_SCORE + 1.
+ * @param ns updated kr_nsrep
+ * @param ctx name resolution context.
+ * @return 0 or an error code
+ * @note ns reputation is zeroed and score is set to KR_NS_MAX_SCORE + 1.
+ */
+KR_EXPORT
+int kr_nsrep_sort(struct kr_nsrep *ns, struct kr_context *ctx);
diff --git a/lib/resolve.c b/lib/resolve.c
new file mode 100644
index 0000000..8718f25
--- /dev/null
+++ b/lib/resolve.c
@@ -0,0 +1,1648 @@
+/* Copyright (C) 2014-2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include <ctype.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <fcntl.h>
+#include <assert.h>
+#include <arpa/inet.h>
+#include <libknot/rrtype/rdname.h>
+#include <libknot/descriptor.h>
+#include <ucw/mempool.h>
+#include "lib/resolve.h"
+#include "lib/layer.h"
+#include "lib/rplan.h"
+#include "lib/layer/iterate.h"
+#include "lib/dnssec/ta.h"
+#include "lib/dnssec.h"
+#if defined(ENABLE_COOKIES)
+#include "lib/cookies/control.h"
+#include "lib/cookies/helper.h"
+#include "lib/cookies/nonce.h"
+#else /* Define compatibility macros */
+#define KNOT_EDNS_OPTION_COOKIE 10
+#endif /* defined(ENABLE_COOKIES) */
+
+#define VERBOSE_MSG(qry, ...) QRVERBOSE((qry), "resl", __VA_ARGS__)
+
+bool kr_rank_check(uint8_t rank)
+{
+ switch (rank & ~KR_RANK_AUTH) {
+ case KR_RANK_INITIAL:
+ case KR_RANK_OMIT:
+ case KR_RANK_TRY:
+ case KR_RANK_INDET:
+ case KR_RANK_BOGUS:
+ case KR_RANK_MISMATCH:
+ case KR_RANK_MISSING:
+ case KR_RANK_INSECURE:
+ case KR_RANK_SECURE:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/** @internal Set @a yielded to all RRs with matching @a qry_uid. */
+static void set_yield(ranked_rr_array_t *array, const uint32_t qry_uid, const bool yielded)
+{
+ for (unsigned i = 0; i < array->len; ++i) {
+ ranked_rr_array_entry_t *entry = array->at[i];
+ if (entry->qry_uid == qry_uid) {
+ entry->yielded = yielded;
+ }
+ }
+}
+
+/**
+ * @internal Defer execution of current query.
+ * The current layer state and input will be pushed to a stack and resumed on next iteration.
+ */
+static int consume_yield(kr_layer_t *ctx, knot_pkt_t *pkt)
+{
+ struct kr_request *req = ctx->req;
+ size_t pkt_size = pkt->size;
+ if (knot_pkt_has_tsig(pkt)) {
+ pkt_size += pkt->tsig_wire.len;
+ }
+ knot_pkt_t *pkt_copy = knot_pkt_new(NULL, pkt_size, &req->pool);
+ struct kr_layer_pickle *pickle = mm_alloc(&req->pool, sizeof(*pickle));
+ if (pickle && pkt_copy && knot_pkt_copy(pkt_copy, pkt) == 0) {
+ struct kr_query *qry = req->current_query;
+ pickle->api = ctx->api;
+ pickle->state = ctx->state;
+ pickle->pkt = pkt_copy;
+ pickle->next = qry->deferred;
+ qry->deferred = pickle;
+ set_yield(&req->answ_selected, qry->uid, true);
+ set_yield(&req->auth_selected, qry->uid, true);
+ return kr_ok();
+ }
+ return kr_error(ENOMEM);
+}
+static int begin_yield(kr_layer_t *ctx) { return kr_ok(); }
+static int reset_yield(kr_layer_t *ctx) { return kr_ok(); }
+static int finish_yield(kr_layer_t *ctx) { return kr_ok(); }
+static int produce_yield(kr_layer_t *ctx, knot_pkt_t *pkt) { return kr_ok(); }
+static int checkout_yield(kr_layer_t *ctx, knot_pkt_t *packet, struct sockaddr *dst, int type) { return kr_ok(); }
+static int answer_finalize_yield(kr_layer_t *ctx) { return kr_ok(); }
+
+/** @internal Macro for iterating module layers. */
+#define RESUME_LAYERS(from, r, qry, func, ...) \
+ (r)->current_query = (qry); \
+ for (size_t i = (from); i < (r)->ctx->modules->len; ++i) { \
+ struct kr_module *mod = (r)->ctx->modules->at[i]; \
+ if (mod->layer) { \
+ struct kr_layer layer = {.state = (r)->state, .api = mod->layer(mod), .req = (r)}; \
+ if (layer.api && layer.api->func) { \
+ (r)->state = layer.api->func(&layer, ##__VA_ARGS__); \
+ if ((r)->state == KR_STATE_YIELD) { \
+ func ## _yield(&layer, ##__VA_ARGS__); \
+ break; \
+ } \
+ } \
+ } \
+ } /* Invalidate current query. */ \
+ (r)->current_query = NULL
+
+/** @internal Macro for starting module iteration. */
+#define ITERATE_LAYERS(req, qry, func, ...) RESUME_LAYERS(0, req, qry, func, ##__VA_ARGS__)
+
+/** @internal Find layer id matching API. */
+static inline size_t layer_id(struct kr_request *req, const struct kr_layer_api *api) {
+ module_array_t *modules = req->ctx->modules;
+ for (size_t i = 0; i < modules->len; ++i) {
+ struct kr_module *mod = modules->at[i];
+ if (mod->layer && mod->layer(mod) == api) {
+ return i;
+ }
+ }
+ return 0; /* Not found, try all. */
+}
+
+/* @internal We don't need to deal with locale here */
+KR_CONST static inline bool isletter(unsigned chr)
+{ return (chr | 0x20 /* tolower */) - 'a' <= 'z' - 'a'; }
+
+/* Randomize QNAME letter case.
+ * This adds 32 bits of randomness at maximum, but that's more than an average domain name length.
+ * https://tools.ietf.org/html/draft-vixie-dnsext-dns0x20-00
+ */
+static void randomized_qname_case(knot_dname_t * restrict qname, uint32_t secret)
+{
+ if (secret == 0) {
+ return;
+ }
+ assert(qname);
+ const int len = knot_dname_size(qname) - 2; /* Skip first, last label. */
+ for (int i = 0; i < len; ++i) {
+ if (isletter(*++qname)) {
+ *qname ^= ((secret >> (i & 31)) & 1) * 0x20;
+ }
+ }
+}
+
+/** Invalidate current NS/addr pair. */
+static int invalidate_ns(struct kr_rplan *rplan, struct kr_query *qry)
+{
+ if (qry->ns.addr[0].ip.sa_family != AF_UNSPEC) {
+ const char *addr = kr_inaddr(&qry->ns.addr[0].ip);
+ int addr_len = kr_inaddr_len(&qry->ns.addr[0].ip);
+ return kr_zonecut_del(&qry->zone_cut, qry->ns.name, addr, addr_len);
+ } else {
+ return kr_zonecut_del_all(&qry->zone_cut, qry->ns.name);
+ }
+}
+
+/** This turns of QNAME minimisation if there is a non-terminal between current zone cut, and name target.
+ * It save several minimization steps, as the zone cut is likely final one.
+ */
+static void check_empty_nonterms(struct kr_query *qry, knot_pkt_t *pkt, struct kr_cache *cache, uint32_t timestamp)
+{
+ // FIXME cleanup, etc.
+#if 0
+ if (qry->flags.NO_MINIMIZE) {
+ return;
+ }
+
+ const knot_dname_t *target = qry->sname;
+ const knot_dname_t *cut_name = qry->zone_cut.name;
+ if (!target || !cut_name)
+ return;
+
+ struct kr_cache_entry *entry = NULL;
+ /* @note: The non-terminal must be direct child of zone cut (e.g. label distance <= 2),
+ * otherwise this would risk leaking information to parent if the NODATA TTD > zone cut TTD. */
+ int labels = knot_dname_labels(target, NULL) - knot_dname_labels(cut_name, NULL);
+ while (target[0] && labels > 2) {
+ target = knot_wire_next_label(target, NULL);
+ --labels;
+ }
+ for (int i = 0; i < labels; ++i) {
+ int ret = kr_cache_peek(cache, KR_CACHE_PKT, target, KNOT_RRTYPE_NS, &entry, &timestamp);
+ if (ret == 0) { /* Either NXDOMAIN or NODATA, start here. */
+ /* @todo We could stop resolution here for NXDOMAIN, but we can't because of broken CDNs */
+ qry->flags.NO_MINIMIZE = true;
+ kr_make_query(qry, pkt);
+ break;
+ }
+ assert(target[0]);
+ target = knot_wire_next_label(target, NULL);
+ }
+ kr_cache_sync(cache);
+#endif
+}
+
+static int ns_fetch_cut(struct kr_query *qry, const knot_dname_t *requested_name,
+ struct kr_request *req, knot_pkt_t *pkt)
+{
+ /* It can occur that here parent query already have
+ * provably insecured zonecut which not in the cache yet. */
+ struct kr_qflags pflags;
+ if (qry->parent) {
+ pflags = qry->parent->flags;
+ }
+ const bool is_insecured = qry->parent != NULL
+ && !(pflags.AWAIT_IPV4 || pflags.AWAIT_IPV6)
+ && (pflags.DNSSEC_INSECURE || pflags.DNSSEC_NODS);
+
+ /* Want DNSSEC if it's possible to secure this name
+ * (e.g. is covered by any TA) */
+ if (is_insecured) {
+ /* If parent is unsecured we don't want DNSSEC
+ * even if cut name is covered by TA. */
+ qry->flags.DNSSEC_WANT = false;
+ qry->flags.DNSSEC_INSECURE = true;
+ VERBOSE_MSG(qry, "=> going insecure because parent query is insecure\n");
+ } else if (kr_ta_covers_qry(req->ctx, qry->zone_cut.name, KNOT_RRTYPE_NS)) {
+ qry->flags.DNSSEC_WANT = true;
+ } else {
+ qry->flags.DNSSEC_WANT = false;
+ VERBOSE_MSG(qry, "=> going insecure because there's no covering TA\n");
+ }
+
+ struct kr_zonecut cut_found;
+ kr_zonecut_init(&cut_found, requested_name, req->rplan.pool);
+ /* Cut that has been found can differs from cut that has been requested.
+ * So if not already insecured,
+ * try to fetch ta & keys even if initial cut name not covered by TA */
+ bool secured = !is_insecured;
+ int ret = kr_zonecut_find_cached(req->ctx, &cut_found, requested_name,
+ qry, &secured);
+ if (ret == kr_error(ENOENT)) {
+ /* No cached cut found, start from SBELT
+ * and issue priming query. */
+ kr_zonecut_deinit(&cut_found);
+ ret = kr_zonecut_set_sbelt(req->ctx, &qry->zone_cut);
+ if (ret != 0) {
+ return KR_STATE_FAIL;
+ }
+ VERBOSE_MSG(qry, "=> using root hints\n");
+ qry->flags.AWAIT_CUT = false;
+ return KR_STATE_DONE;
+ } else if (ret != kr_ok()) {
+ kr_zonecut_deinit(&cut_found);
+ return KR_STATE_FAIL;
+ }
+
+ /* Find out security status.
+ * Go insecure if the zone cut is provably insecure */
+ if ((qry->flags.DNSSEC_WANT) && !secured) {
+ VERBOSE_MSG(qry, "=> NS is provably without DS, going insecure\n");
+ qry->flags.DNSSEC_WANT = false;
+ qry->flags.DNSSEC_INSECURE = true;
+ }
+ /* Zonecut name can change, check it again
+ * to prevent unnecessary DS & DNSKEY queries */
+ if (!(qry->flags.DNSSEC_INSECURE) &&
+ kr_ta_covers_qry(req->ctx, cut_found.name, KNOT_RRTYPE_NS)) {
+ qry->flags.DNSSEC_WANT = true;
+ } else {
+ qry->flags.DNSSEC_WANT = false;
+ }
+ /* Check if any DNSKEY found for cached cut */
+ if (qry->flags.DNSSEC_WANT && cut_found.key == NULL &&
+ kr_zonecut_is_empty(&cut_found)) {
+ /* Cut found and there are no proofs of zone insecurity.
+ * But no DNSKEY found and no glue fetched.
+ * We have got circular dependency - must fetch A\AAAA
+ * from authoritative, but we have no key to verify it. */
+ kr_zonecut_deinit(&cut_found);
+ if (requested_name[0] != '\0' ) {
+ /* If not root - try next label */
+ return KR_STATE_CONSUME;
+ }
+ /* No cached cut & keys found, start from SBELT */
+ ret = kr_zonecut_set_sbelt(req->ctx, &qry->zone_cut);
+ if (ret != 0) {
+ return KR_STATE_FAIL;
+ }
+ VERBOSE_MSG(qry, "=> using root hints\n");
+ qry->flags.AWAIT_CUT = false;
+ return KR_STATE_DONE;
+ }
+ /* Use the found zone cut. */
+ kr_zonecut_move(&qry->zone_cut, &cut_found);
+ /* Check if there's a non-terminal between target and current cut. */
+ struct kr_cache *cache = &req->ctx->cache;
+ check_empty_nonterms(qry, pkt, cache, qry->timestamp.tv_sec);
+ /* Cut found */
+ return KR_STATE_PRODUCE;
+}
+
+static int ns_resolve_addr(struct kr_query *qry, struct kr_request *param)
+{
+ struct kr_rplan *rplan = &param->rplan;
+ struct kr_context *ctx = param->ctx;
+
+
+ /* Start NS queries from root, to avoid certain cases
+ * where a NS drops out of cache and the rest is unavailable,
+ * this would lead to dependency loop in current zone cut.
+ * Prefer IPv6 and continue with IPv4 if not available.
+ */
+ uint16_t next_type = 0;
+ if (!(qry->flags.AWAIT_IPV6) &&
+ !(ctx->options.NO_IPV6)) {
+ next_type = KNOT_RRTYPE_AAAA;
+ qry->flags.AWAIT_IPV6 = true;
+ } else if (!(qry->flags.AWAIT_IPV4) &&
+ !(ctx->options.NO_IPV4)) {
+ next_type = KNOT_RRTYPE_A;
+ qry->flags.AWAIT_IPV4 = true;
+ /* Hmm, no useable IPv6 then. */
+ qry->ns.reputation |= KR_NS_NOIP6;
+ kr_nsrep_update_rep(&qry->ns, qry->ns.reputation, ctx->cache_rep);
+ }
+ /* Bail out if the query is already pending or dependency loop. */
+ if (!next_type || kr_rplan_satisfies(qry->parent, qry->ns.name, KNOT_CLASS_IN, next_type)) {
+ /* Fall back to SBELT if root server query fails. */
+ if (!next_type && qry->zone_cut.name[0] == '\0') {
+ VERBOSE_MSG(qry, "=> fallback to root hints\n");
+ kr_zonecut_set_sbelt(ctx, &qry->zone_cut);
+ qry->flags.NO_THROTTLE = true; /* Pick even bad SBELT servers */
+ return kr_error(EAGAIN);
+ }
+ /* No IPv4 nor IPv6, flag server as unusable. */
+ VERBOSE_MSG(qry, "=> unresolvable NS address, bailing out\n");
+ qry->ns.reputation |= KR_NS_NOIP4 | KR_NS_NOIP6;
+ kr_nsrep_update_rep(&qry->ns, qry->ns.reputation, ctx->cache_rep);
+ invalidate_ns(rplan, qry);
+ return kr_error(EHOSTUNREACH);
+ }
+ /* Push new query to the resolution plan */
+ struct kr_query *next =
+ kr_rplan_push(rplan, qry, qry->ns.name, KNOT_CLASS_IN, next_type);
+ if (!next) {
+ return kr_error(ENOMEM);
+ }
+ next->flags.NONAUTH = true;
+
+ /* At the root level with no NS addresses, add SBELT subrequest. */
+ int ret = 0;
+ if (qry->zone_cut.name[0] == '\0') {
+ ret = kr_zonecut_set_sbelt(ctx, &next->zone_cut);
+ if (ret == 0) { /* Copy TA and key since it's the same cut to avoid lookup. */
+ kr_zonecut_copy_trust(&next->zone_cut, &qry->zone_cut);
+ kr_zonecut_set_sbelt(ctx, &qry->zone_cut); /* Add SBELT to parent in case query fails. */
+ qry->flags.NO_THROTTLE = true; /* Pick even bad SBELT servers */
+ }
+ } else {
+ next->flags.AWAIT_CUT = true;
+ }
+ return ret;
+}
+
+static int edns_put(knot_pkt_t *pkt, bool reclaim)
+{
+ if (!pkt->opt_rr) {
+ return kr_ok();
+ }
+ if (reclaim) {
+ /* Reclaim reserved size. */
+ int ret = knot_pkt_reclaim(pkt, knot_edns_wire_size(pkt->opt_rr));
+ if (ret != 0) {
+ return ret;
+ }
+ }
+ /* Write to packet. */
+ assert(pkt->current == KNOT_ADDITIONAL);
+ return knot_pkt_put(pkt, KNOT_COMPR_HINT_NONE, pkt->opt_rr, KNOT_PF_FREE);
+}
+
+/** Removes last EDNS OPT RR written to the packet. */
+static int edns_erase_and_reserve(knot_pkt_t *pkt)
+{
+ /* Nothing to be done. */
+ if (!pkt || !pkt->opt_rr) {
+ return 0;
+ }
+
+ /* Fail if the data are located elsewhere than at the end of packet. */
+ if (pkt->current != KNOT_ADDITIONAL ||
+ pkt->opt_rr != &pkt->rr[pkt->rrset_count - 1]) {
+ return -1;
+ }
+
+ size_t len = knot_rrset_size(pkt->opt_rr);
+ int16_t rr_removed = pkt->opt_rr->rrs.count;
+ /* Decrease rrset counters. */
+ pkt->rrset_count -= 1;
+ pkt->sections[pkt->current].count -= 1;
+ pkt->size -= len;
+ knot_wire_add_arcount(pkt->wire, -rr_removed); /* ADDITIONAL */
+
+ pkt->opt_rr = NULL;
+
+ /* Reserve the freed space. */
+ return knot_pkt_reserve(pkt, len);
+}
+
+static int edns_create(knot_pkt_t *pkt, knot_pkt_t *template, struct kr_request *req)
+{
+ pkt->opt_rr = knot_rrset_copy(req->ctx->opt_rr, &pkt->mm);
+ size_t wire_size = knot_edns_wire_size(pkt->opt_rr);
+#if defined(ENABLE_COOKIES)
+ if (req->ctx->cookie_ctx.clnt.enabled ||
+ req->ctx->cookie_ctx.srvr.enabled) {
+ wire_size += KR_COOKIE_OPT_MAX_LEN;
+ }
+#endif /* defined(ENABLE_COOKIES) */
+ if (req->qsource.flags.tls) {
+ if (req->ctx->tls_padding == -1)
+ /* FIXME: we do not know how to reserve space for the
+ * default padding policy, since we can't predict what
+ * it will select. So i'm just guessing :/ */
+ wire_size += KNOT_EDNS_OPTION_HDRLEN + 512;
+ if (req->ctx->tls_padding >= 2)
+ wire_size += KNOT_EDNS_OPTION_HDRLEN + req->ctx->tls_padding;
+ }
+ return knot_pkt_reserve(pkt, wire_size);
+}
+
+static int answer_prepare(struct kr_request *req, knot_pkt_t *query)
+{
+ knot_pkt_t *answer = req->answer;
+ if (knot_pkt_init_response(answer, query) != 0) {
+ return kr_error(ENOMEM); /* Failed to initialize answer */
+ }
+ /* Handle EDNS in the query */
+ if (knot_pkt_has_edns(query)) {
+ answer->opt_rr = knot_rrset_copy(req->ctx->opt_rr, &answer->mm);
+ if (answer->opt_rr == NULL){
+ return kr_error(ENOMEM);
+ }
+ /* Set DO bit if set (DNSSEC requested). */
+ if (knot_pkt_has_dnssec(query)) {
+ knot_edns_set_do(answer->opt_rr);
+ }
+ }
+ return kr_ok();
+}
+
+/** @return error code, ignoring if forced to truncate the packet. */
+static int write_extra_records(const rr_array_t *arr, uint16_t reorder, knot_pkt_t *answer)
+{
+ for (size_t i = 0; i < arr->len; ++i) {
+ int err = knot_pkt_put_rotate(answer, 0, arr->at[i], reorder, 0);
+ if (err != KNOT_EOK) {
+ return err == KNOT_ESPACE ? kr_ok() : kr_error(err);
+ }
+ }
+ return kr_ok();
+}
+
+/**
+ * @param all_secure optionally &&-combine security of written RRs into its value.
+ * (i.e. if you pass a pointer to false, it will always remain)
+ * @param all_cname optionally output if all written RRs are CNAMEs and RRSIGs of CNAMEs
+ * @return error code, ignoring if forced to truncate the packet.
+ */
+static int write_extra_ranked_records(const ranked_rr_array_t *arr, uint16_t reorder,
+ knot_pkt_t *answer, bool *all_secure, bool *all_cname)
+{
+ const bool has_dnssec = knot_pkt_has_dnssec(answer);
+ bool all_sec = true;
+ bool all_cn = (all_cname != NULL); /* optim.: init as false if not needed */
+ int err = kr_ok();
+
+ for (size_t i = 0; i < arr->len; ++i) {
+ ranked_rr_array_entry_t * entry = arr->at[i];
+ if (!entry->to_wire) {
+ continue;
+ }
+ knot_rrset_t *rr = entry->rr;
+ if (!has_dnssec) {
+ if (rr->type != knot_pkt_qtype(answer) && knot_rrtype_is_dnssec(rr->type)) {
+ continue;
+ }
+ }
+ err = knot_pkt_put_rotate(answer, 0, rr, reorder, 0);
+ if (err != KNOT_EOK) {
+ if (err == KNOT_ESPACE) {
+ err = kr_ok();
+ }
+ break;
+ }
+
+ if (rr->type != KNOT_RRTYPE_RRSIG) {
+ all_sec = all_sec && kr_rank_test(entry->rank, KR_RANK_SECURE);
+ }
+ all_cn = all_cn && kr_rrset_type_maysig(entry->rr) == KNOT_RRTYPE_CNAME;
+ }
+
+ if (all_secure) {
+ *all_secure = *all_secure && all_sec;
+ }
+ if (all_cname) {
+ *all_cname = all_cn;
+ }
+ return err;
+}
+
+/** @internal Add an EDNS padding RR into the answer if requested and required. */
+static int answer_padding(struct kr_request *request)
+{
+ if (!request || !request->answer || !request->ctx) {
+ assert(false);
+ return kr_error(EINVAL);
+ }
+ int32_t padding = request->ctx->tls_padding;
+ knot_pkt_t *answer = request->answer;
+ knot_rrset_t *opt_rr = answer->opt_rr;
+ int32_t pad_bytes = -1;
+
+ if (padding == -1) { /* use the default padding policy from libknot */
+ pad_bytes = knot_pkt_default_padding_size(answer, opt_rr);
+ }
+ if (padding >= 2) {
+ int32_t max_pad_bytes = knot_edns_get_payload(opt_rr) - (answer->size + knot_rrset_size(opt_rr));
+ pad_bytes = MIN(knot_edns_alignment_size(answer->size, knot_rrset_size(opt_rr), padding),
+ max_pad_bytes);
+ }
+
+ if (pad_bytes >= 0) {
+ uint8_t zeros[MAX(1, pad_bytes)];
+ memset(zeros, 0, sizeof(zeros));
+ int r = knot_edns_add_option(opt_rr, KNOT_EDNS_OPTION_PADDING,
+ pad_bytes, zeros, &answer->mm);
+ if (r != KNOT_EOK) {
+ knot_rrset_clear(opt_rr, &answer->mm);
+ return kr_error(r);
+ }
+ }
+ return kr_ok();
+}
+
+static int answer_fail(struct kr_request *request)
+{
+ knot_pkt_t *answer = request->answer;
+ int ret = kr_pkt_clear_payload(answer);
+ knot_wire_clear_ad(answer->wire);
+ knot_wire_clear_aa(answer->wire);
+ knot_wire_set_rcode(answer->wire, KNOT_RCODE_SERVFAIL);
+ if (ret == 0 && answer->opt_rr) {
+ /* OPT in SERVFAIL response is still useful for cookies/additional info. */
+ knot_pkt_begin(answer, KNOT_ADDITIONAL);
+ answer_padding(request); /* Ignore failed padding in SERVFAIL answer. */
+ ret = edns_put(answer, false);
+ }
+ return ret;
+}
+
+static int answer_finalize(struct kr_request *request, int state)
+{
+ struct kr_rplan *rplan = &request->rplan;
+ knot_pkt_t *answer = request->answer;
+
+ /* Always set SERVFAIL for bogus answers. */
+ if (state == KR_STATE_FAIL && rplan->pending.len > 0) {
+ struct kr_query *last = array_tail(rplan->pending);
+ if ((last->flags.DNSSEC_WANT) && (last->flags.DNSSEC_BOGUS)) {
+ return answer_fail(request);
+ }
+ }
+
+ struct kr_query *last = rplan->resolved.len > 0 ? array_tail(rplan->resolved) : NULL;
+ /* TODO ^^^^ this is slightly fragile */
+
+ /* AD flag. We can only change `secure` from true to false.
+ * Be conservative. Primary approach: check ranks of all RRs in wire.
+ * Only "negative answers" need special handling. */
+ bool secure = last != NULL && state == KR_STATE_DONE /*< suspicious otherwise */
+ && knot_pkt_qtype(answer) != KNOT_RRTYPE_RRSIG;
+ if (last && (last->flags.STUB)) {
+ secure = false; /* don't trust forwarding for now */
+ }
+ if (last && (last->flags.DNSSEC_OPTOUT)) {
+ VERBOSE_MSG(NULL, "AD: opt-out\n");
+ secure = false; /* the last answer is insecure due to opt-out */
+ }
+
+ const uint16_t reorder = last ? last->reorder : 0;
+ bool answ_all_cnames = false/*arbitrary*/;
+ if (request->answ_selected.len > 0) {
+ assert(answer->current <= KNOT_ANSWER);
+ /* Write answer records. */
+ if (answer->current < KNOT_ANSWER) {
+ knot_pkt_begin(answer, KNOT_ANSWER);
+ }
+ if (write_extra_ranked_records(&request->answ_selected, reorder,
+ answer, &secure, &answ_all_cnames))
+ {
+ return answer_fail(request);
+ }
+ }
+
+ /* Write authority records. */
+ if (answer->current < KNOT_AUTHORITY) {
+ knot_pkt_begin(answer, KNOT_AUTHORITY);
+ }
+ if (write_extra_ranked_records(&request->auth_selected, reorder,
+ answer, &secure, NULL)) {
+ return answer_fail(request);
+ }
+ /* Write additional records. */
+ knot_pkt_begin(answer, KNOT_ADDITIONAL);
+ if (write_extra_records(&request->additional, reorder, answer)) {
+ return answer_fail(request);
+ }
+ /* Write EDNS information */
+ if (answer->opt_rr) {
+ if (request->qsource.flags.tls) {
+ if (answer_padding(request) != kr_ok()) {
+ return answer_fail(request);
+ }
+ }
+ knot_pkt_begin(answer, KNOT_ADDITIONAL);
+ int ret = knot_pkt_put(answer, KNOT_COMPR_HINT_NONE,
+ answer->opt_rr, KNOT_PF_FREE);
+ if (ret != KNOT_EOK) {
+ return answer_fail(request);
+ }
+ }
+
+ if (!last) secure = false; /*< should be no-op, mostly documentation */
+ /* AD: "negative answers" need more handling. */
+ if (kr_response_classify(answer) != PKT_NOERROR
+ /* Additionally check for CNAME chains that "end in NODATA",
+ * as those would also be PKT_NOERROR. */
+ || (answ_all_cnames && knot_pkt_qtype(answer) != KNOT_RRTYPE_CNAME)) {
+
+ secure = secure && last->flags.DNSSEC_WANT
+ && !last->flags.DNSSEC_BOGUS && !last->flags.DNSSEC_INSECURE;
+ }
+
+ if (secure) {
+ struct kr_query *cname_parent = last->cname_parent;
+ while (cname_parent != NULL) {
+ if (cname_parent->flags.DNSSEC_OPTOUT) {
+ secure = false;
+ break;
+ }
+ cname_parent = cname_parent->cname_parent;
+ }
+ }
+
+ /* No detailed analysis ATM, just _SECURE or not.
+ * LATER: request->rank might better be computed in validator's finish phase. */
+ VERBOSE_MSG(last, "AD: request%s classified as SECURE\n", secure ? "" : " NOT");
+ request->rank = secure ? KR_RANK_SECURE : KR_RANK_INITIAL;
+
+ /* Clear AD if not secure. ATM answer has AD=1 if requested secured answer. */
+ if (!secure) {
+ knot_wire_clear_ad(answer->wire);
+ }
+
+ return kr_ok();
+}
+
+static int query_finalize(struct kr_request *request, struct kr_query *qry, knot_pkt_t *pkt)
+{
+ int ret = 0;
+ knot_pkt_begin(pkt, KNOT_ADDITIONAL);
+ if (!(qry->flags.SAFEMODE)) {
+ /* Remove any EDNS records from any previous iteration. */
+ ret = edns_erase_and_reserve(pkt);
+ if (ret == 0) {
+ ret = edns_create(pkt, request->answer, request);
+ }
+ if (ret == 0) {
+ /* Stub resolution (ask for +rd and +do) */
+ if (qry->flags.STUB) {
+ knot_wire_set_rd(pkt->wire);
+ if (knot_pkt_has_dnssec(request->qsource.packet)) {
+ knot_edns_set_do(pkt->opt_rr);
+ }
+ if (knot_wire_get_cd(request->qsource.packet->wire)) {
+ knot_wire_set_cd(pkt->wire);
+ }
+ /* Full resolution (ask for +cd and +do) */
+ } else if (qry->flags.FORWARD) {
+ knot_wire_set_rd(pkt->wire);
+ knot_edns_set_do(pkt->opt_rr);
+ knot_wire_set_cd(pkt->wire);
+ } else if (qry->flags.DNSSEC_WANT) {
+ knot_edns_set_do(pkt->opt_rr);
+ knot_wire_set_cd(pkt->wire);
+ }
+ }
+ }
+ return ret;
+}
+
+int kr_resolve_begin(struct kr_request *request, struct kr_context *ctx, knot_pkt_t *answer)
+{
+ /* Initialize request */
+ request->ctx = ctx;
+ request->answer = answer;
+ request->options = ctx->options;
+ request->state = KR_STATE_CONSUME;
+ request->current_query = NULL;
+ array_init(request->additional);
+ array_init(request->answ_selected);
+ array_init(request->auth_selected);
+ array_init(request->add_selected);
+ request->answ_validated = false;
+ request->auth_validated = false;
+ request->rank = KR_RANK_INITIAL;
+ request->trace_log = NULL;
+ request->trace_finish = NULL;
+
+ /* Expect first query */
+ kr_rplan_init(&request->rplan, request, &request->pool);
+ return KR_STATE_CONSUME;
+}
+
+static int resolve_query(struct kr_request *request, const knot_pkt_t *packet)
+{
+ struct kr_rplan *rplan = &request->rplan;
+ const knot_dname_t *qname = knot_pkt_qname(packet);
+ uint16_t qclass = knot_pkt_qclass(packet);
+ uint16_t qtype = knot_pkt_qtype(packet);
+ struct kr_query *qry = NULL;
+ struct kr_context *ctx = request->ctx;
+ struct kr_cookie_ctx *cookie_ctx = ctx ? &ctx->cookie_ctx : NULL;
+
+ if (qname != NULL) {
+ qry = kr_rplan_push(rplan, NULL, qname, qclass, qtype);
+ } else if (cookie_ctx && cookie_ctx->srvr.enabled &&
+ knot_wire_get_qdcount(packet->wire) == 0 &&
+ knot_pkt_has_edns(packet) &&
+ knot_pkt_edns_option(packet, KNOT_EDNS_OPTION_COOKIE)) {
+ /* Plan empty query only for cookies. */
+ qry = kr_rplan_push_empty(rplan, NULL);
+ }
+ if (!qry) {
+ return KR_STATE_FAIL;
+ }
+
+ if (qname != NULL) {
+ /* Deferred zone cut lookup for this query. */
+ qry->flags.AWAIT_CUT = true;
+ /* Want DNSSEC if it's posible to secure this name (e.g. is covered by any TA) */
+ if ((knot_wire_get_ad(packet->wire) || knot_pkt_has_dnssec(packet)) &&
+ kr_ta_covers_qry(request->ctx, qname, qtype)) {
+ qry->flags.DNSSEC_WANT = true;
+ }
+ }
+
+ /* Initialize answer packet */
+ knot_pkt_t *answer = request->answer;
+ knot_wire_set_qr(answer->wire);
+ knot_wire_clear_aa(answer->wire);
+ knot_wire_set_ra(answer->wire);
+ knot_wire_set_rcode(answer->wire, KNOT_RCODE_NOERROR);
+
+ assert(request->qsource.packet);
+ if (knot_wire_get_cd(request->qsource.packet->wire)) {
+ knot_wire_set_cd(answer->wire);
+ } else if (qry->flags.DNSSEC_WANT) {
+ knot_wire_set_ad(answer->wire);
+ }
+
+ /* Expect answer, pop if satisfied immediately */
+ ITERATE_LAYERS(request, qry, begin);
+ if ((request->state & KR_STATE_DONE) != 0) {
+ kr_rplan_pop(rplan, qry);
+ } else if (qname == NULL) {
+ /* it is an empty query which must be resolved by
+ `begin` layer of cookie module.
+ If query isn't resolved, fail. */
+ request->state = KR_STATE_FAIL;
+ }
+ return request->state;
+}
+
+KR_PURE static bool kr_inaddr_equal(const struct sockaddr *a, const struct sockaddr *b)
+{
+ const int a_len = kr_inaddr_len(a);
+ const int b_len = kr_inaddr_len(b);
+ return a_len == b_len && memcmp(kr_inaddr(a), kr_inaddr(b), a_len) == 0;
+}
+
+static void update_nslist_rtt(struct kr_context *ctx, struct kr_query *qry, const struct sockaddr *src)
+{
+ /* Do not track in safe mode. */
+ if (qry->flags.SAFEMODE) {
+ return;
+ }
+
+ /* Calculate total resolution time from the time the query was generated. */
+ uint64_t elapsed = kr_now() - qry->timestamp_mono;
+ elapsed = elapsed > UINT_MAX ? UINT_MAX : elapsed;
+
+ /* NSs in the preference list prior to the one who responded will be penalised
+ * with the RETRY timer interval. This is because we know they didn't respond
+ * for N retries, so their RTT must be at least N * RETRY.
+ * The NS in the preference list that responded will have RTT relative to the
+ * time when the query was sent out, not when it was originated.
+ */
+ for (size_t i = 0; i < KR_NSREP_MAXADDR; ++i) {
+ const struct sockaddr *addr = &qry->ns.addr[i].ip;
+ if (addr->sa_family == AF_UNSPEC) {
+ break;
+ }
+ /* If this address is the source of the answer, update its RTT */
+ if (kr_inaddr_equal(src, addr)) {
+ kr_nsrep_update_rtt(&qry->ns, addr, elapsed, ctx->cache_rtt, KR_NS_UPDATE);
+ WITH_VERBOSE(qry) {
+ char addr_str[INET6_ADDRSTRLEN];
+ inet_ntop(addr->sa_family, kr_inaddr(addr), addr_str, sizeof(addr_str));
+ VERBOSE_MSG(qry, "<= server: '%s' rtt: %"PRIu64" ms\n",
+ addr_str, elapsed);
+ }
+ } else {
+ /* Response didn't come from this IP, but we know the RTT must be at least
+ * several RETRY timer tries, e.g. if we have addresses [a, b, c] and we have
+ * tried [a, b] when the answer from 'a' came after 350ms, then we know
+ * that 'b' didn't respond for at least 350 - (1 * 300) ms. We can't say that
+ * its RTT is 50ms, but we can say that its score shouldn't be less than 50. */
+ kr_nsrep_update_rtt(&qry->ns, addr, elapsed, ctx->cache_rtt, KR_NS_MAX);
+ WITH_VERBOSE(qry) {
+ char addr_str[INET6_ADDRSTRLEN];
+ inet_ntop(addr->sa_family, kr_inaddr(addr), addr_str, sizeof(addr_str));
+ VERBOSE_MSG(qry, "<= server: '%s' rtt: >= %"PRIu64" ms\n",
+ addr_str, elapsed);
+ }
+ }
+ /* Subtract query start time from elapsed time */
+ if (elapsed < KR_CONN_RETRY) {
+ break;
+ }
+ elapsed = elapsed - KR_CONN_RETRY;
+ }
+}
+
+static void update_nslist_score(struct kr_request *request, struct kr_query *qry, const struct sockaddr *src, knot_pkt_t *packet)
+{
+ struct kr_context *ctx = request->ctx;
+ /* On successful answer, update preference list RTT and penalise timer */
+ if (request->state != KR_STATE_FAIL) {
+ /* Update RTT information for preference list */
+ update_nslist_rtt(ctx, qry, src);
+ /* Do not complete NS address resolution on soft-fail. */
+ const int rcode = packet ? knot_wire_get_rcode(packet->wire) : 0;
+ if (rcode != KNOT_RCODE_SERVFAIL && rcode != KNOT_RCODE_REFUSED) {
+ qry->flags.AWAIT_IPV6 = false;
+ qry->flags.AWAIT_IPV4 = false;
+ } else { /* Penalize SERVFAILs. */
+ kr_nsrep_update_rtt(&qry->ns, src, KR_NS_PENALTY, ctx->cache_rtt, KR_NS_ADD);
+ }
+ }
+}
+
+static bool resolution_time_exceeded(struct kr_query *qry, uint64_t now)
+{
+ uint64_t resolving_time = now - qry->creation_time_mono;
+ if (resolving_time > KR_RESOLVE_TIME_LIMIT) {
+ WITH_VERBOSE(qry) {
+ VERBOSE_MSG(qry, "query resolution time limit exceeded\n");
+ }
+ return true;
+ }
+ return false;
+}
+
+int kr_resolve_consume(struct kr_request *request, const struct sockaddr *src, knot_pkt_t *packet)
+{
+ struct kr_rplan *rplan = &request->rplan;
+
+ /* Empty resolution plan, push packet as the new query */
+ if (packet && kr_rplan_empty(rplan)) {
+ if (answer_prepare(request, packet) != 0) {
+ return KR_STATE_FAIL;
+ }
+ return resolve_query(request, packet);
+ }
+
+ /* Different processing for network error */
+ struct kr_query *qry = array_tail(rplan->pending);
+ /* Check overall resolution time */
+ if (resolution_time_exceeded(qry, kr_now())) {
+ return KR_STATE_FAIL;
+ }
+ bool tried_tcp = (qry->flags.TCP);
+ if (!packet || packet->size == 0) {
+ if (tried_tcp) {
+ request->state = KR_STATE_FAIL;
+ } else {
+ qry->flags.TCP = true;
+ }
+ } else {
+ /* Packet cleared, derandomize QNAME. */
+ knot_dname_t *qname_raw = knot_pkt_qname(packet);
+ if (qname_raw && qry->secret != 0) {
+ randomized_qname_case(qname_raw, qry->secret);
+ }
+ request->state = KR_STATE_CONSUME;
+ if (qry->flags.CACHED) {
+ ITERATE_LAYERS(request, qry, consume, packet);
+ } else {
+ /* Fill in source and latency information. */
+ request->upstream.rtt = kr_now() - qry->timestamp_mono;
+ request->upstream.addr = src;
+ ITERATE_LAYERS(request, qry, consume, packet);
+ /* Clear temporary information */
+ request->upstream.addr = NULL;
+ request->upstream.rtt = 0;
+ }
+ }
+
+ /* Track RTT for iterative answers */
+ if (src && !(qry->flags.CACHED)) {
+ update_nslist_score(request, qry, src, packet);
+ }
+ /* Resolution failed, invalidate current NS. */
+ if (request->state == KR_STATE_FAIL) {
+ invalidate_ns(rplan, qry);
+ qry->flags.RESOLVED = false;
+ }
+
+ /* Pop query if resolved. */
+ if (request->state == KR_STATE_YIELD) {
+ return KR_STATE_PRODUCE; /* Requery */
+ } else if (qry->flags.RESOLVED) {
+ kr_rplan_pop(rplan, qry);
+ } else if (!tried_tcp && (qry->flags.TCP)) {
+ return KR_STATE_PRODUCE; /* Requery over TCP */
+ } else { /* Clear query flags for next attempt */
+ qry->flags.CACHED = false;
+ if (!request->options.TCP) {
+ qry->flags.TCP = false;
+ }
+ }
+
+ ITERATE_LAYERS(request, qry, reset);
+
+ /* Do not finish with bogus answer. */
+ if (qry->flags.DNSSEC_BOGUS) {
+ return KR_STATE_FAIL;
+ }
+
+ return kr_rplan_empty(&request->rplan) ? KR_STATE_DONE : KR_STATE_PRODUCE;
+}
+
+/** @internal Spawn subrequest in current zone cut (no minimization or lookup). */
+static struct kr_query *zone_cut_subreq(struct kr_rplan *rplan, struct kr_query *parent,
+ const knot_dname_t *qname, uint16_t qtype)
+{
+ struct kr_query *next = kr_rplan_push(rplan, parent, qname, parent->sclass, qtype);
+ if (!next) {
+ return NULL;
+ }
+ kr_zonecut_set(&next->zone_cut, parent->zone_cut.name);
+ if (kr_zonecut_copy(&next->zone_cut, &parent->zone_cut) != 0 ||
+ kr_zonecut_copy_trust(&next->zone_cut, &parent->zone_cut) != 0) {
+ return NULL;
+ }
+ next->flags.NO_MINIMIZE = true;
+ if (parent->flags.DNSSEC_WANT) {
+ next->flags.DNSSEC_WANT = true;
+ }
+ return next;
+}
+
+static int forward_trust_chain_check(struct kr_request *request, struct kr_query *qry, bool resume)
+{
+ struct kr_rplan *rplan = &request->rplan;
+ map_t *trust_anchors = &request->ctx->trust_anchors;
+ map_t *negative_anchors = &request->ctx->negative_anchors;
+
+ if (qry->parent != NULL &&
+ !(qry->forward_flags.CNAME) &&
+ !(qry->flags.DNS64_MARK) &&
+ knot_dname_in_bailiwick(qry->zone_cut.name, qry->parent->zone_cut.name) >= 0) {
+ return KR_STATE_PRODUCE;
+ }
+
+ assert(qry->flags.FORWARD);
+
+ if (!trust_anchors) {
+ qry->flags.AWAIT_CUT = false;
+ return KR_STATE_PRODUCE;
+ }
+
+ if (qry->flags.DNSSEC_INSECURE) {
+ qry->flags.AWAIT_CUT = false;
+ return KR_STATE_PRODUCE;
+ }
+
+ if (qry->forward_flags.NO_MINIMIZE) {
+ qry->flags.AWAIT_CUT = false;
+ return KR_STATE_PRODUCE;
+ }
+
+ const knot_dname_t *start_name = qry->sname;
+ if ((qry->flags.AWAIT_CUT) && !resume) {
+ qry->flags.AWAIT_CUT = false;
+ const knot_dname_t *longest_ta = kr_ta_get_longest_name(trust_anchors, qry->sname);
+ if (longest_ta) {
+ start_name = longest_ta;
+ qry->zone_cut.name = knot_dname_copy(start_name, qry->zone_cut.pool);
+ qry->flags.DNSSEC_WANT = true;
+ } else {
+ qry->flags.DNSSEC_WANT = false;
+ return KR_STATE_PRODUCE;
+ }
+ }
+
+ bool has_ta = (qry->zone_cut.trust_anchor != NULL);
+ knot_dname_t *ta_name = (has_ta ? qry->zone_cut.trust_anchor->owner : NULL);
+ bool refetch_ta = (!has_ta || !knot_dname_is_equal(qry->zone_cut.name, ta_name));
+ bool is_dnskey_subreq = kr_rplan_satisfies(qry, ta_name, KNOT_CLASS_IN, KNOT_RRTYPE_DNSKEY);
+ bool refetch_key = has_ta && (!qry->zone_cut.key || !knot_dname_is_equal(ta_name, qry->zone_cut.key->owner));
+ if (refetch_key && !is_dnskey_subreq) {
+ struct kr_query *next = zone_cut_subreq(rplan, qry, ta_name, KNOT_RRTYPE_DNSKEY);
+ if (!next) {
+ return KR_STATE_FAIL;
+ }
+ return KR_STATE_DONE;
+ }
+
+ int name_offset = 1;
+ const knot_dname_t *wanted_name;
+ bool nods, ds_req, ns_req, minimized, ns_exist;
+ do {
+ wanted_name = start_name;
+ ds_req = false;
+ ns_req = false;
+ ns_exist = true;
+
+ int cut_labels = knot_dname_labels(qry->zone_cut.name, NULL);
+ int wanted_name_labels = knot_dname_labels(wanted_name, NULL);
+ while (wanted_name[0] && wanted_name_labels > cut_labels + name_offset) {
+ wanted_name = knot_wire_next_label(wanted_name, NULL);
+ wanted_name_labels -= 1;
+ }
+ minimized = (wanted_name != qry->sname);
+
+ for (int i = 0; i < request->rplan.resolved.len; ++i) {
+ struct kr_query *q = request->rplan.resolved.at[i];
+ if (q->parent == qry &&
+ q->sclass == qry->sclass &&
+ (q->stype == KNOT_RRTYPE_DS || q->stype == KNOT_RRTYPE_NS) &&
+ knot_dname_is_equal(q->sname, wanted_name)) {
+ if (q->stype == KNOT_RRTYPE_DS) {
+ ds_req = true;
+ if (q->flags.CNAME) {
+ ns_exist = false;
+ } else if (!(q->flags.DNSSEC_OPTOUT)) {
+ int ret = kr_dnssec_matches_name_and_type(&request->auth_selected, q->uid,
+ wanted_name, KNOT_RRTYPE_NS);
+ ns_exist = (ret == kr_ok());
+ }
+ } else {
+ if (q->flags.CNAME) {
+ ns_exist = false;
+ }
+ ns_req = true;
+ }
+ }
+ }
+
+ if (ds_req && ns_exist && !ns_req && (minimized || resume)) {
+ struct kr_query *next = zone_cut_subreq(rplan, qry, wanted_name,
+ KNOT_RRTYPE_NS);
+ if (!next) {
+ return KR_STATE_FAIL;
+ }
+ return KR_STATE_DONE;
+ }
+
+ if (qry->parent == NULL && (qry->flags.CNAME) &&
+ ds_req && ns_req) {
+ return KR_STATE_PRODUCE;
+ }
+
+ /* set `nods` */
+ if ((qry->stype == KNOT_RRTYPE_DS) &&
+ knot_dname_is_equal(wanted_name, qry->sname)) {
+ nods = true;
+ } else if (resume && !ds_req) {
+ nods = false;
+ } else if (!minimized && qry->stype != KNOT_RRTYPE_DNSKEY) {
+ nods = true;
+ } else {
+ nods = ds_req;
+ }
+ name_offset += 1;
+ } while (ds_req && (ns_req || !ns_exist) && minimized);
+
+ /* Disable DNSSEC if it enters NTA. */
+ if (kr_ta_get(negative_anchors, wanted_name)){
+ VERBOSE_MSG(qry, ">< negative TA, going insecure\n");
+ qry->flags.DNSSEC_WANT = false;
+ }
+
+ /* Enable DNSSEC if enters a new island of trust. */
+ bool want_secured = (qry->flags.DNSSEC_WANT) &&
+ !knot_wire_get_cd(request->qsource.packet->wire);
+ if (!(qry->flags.DNSSEC_WANT) &&
+ !knot_wire_get_cd(request->qsource.packet->wire) &&
+ kr_ta_get(trust_anchors, wanted_name)) {
+ qry->flags.DNSSEC_WANT = true;
+ want_secured = true;
+ WITH_VERBOSE(qry) {
+ KR_DNAME_GET_STR(qname_str, wanted_name);
+ VERBOSE_MSG(qry, ">< TA: '%s'\n", qname_str);
+ }
+ }
+
+ if (want_secured && !qry->zone_cut.trust_anchor) {
+ knot_rrset_t *ta_rr = kr_ta_get(trust_anchors, wanted_name);
+ if (!ta_rr) {
+ char name[] = "\0";
+ ta_rr = kr_ta_get(trust_anchors, (knot_dname_t*)name);
+ }
+ if (ta_rr) {
+ qry->zone_cut.trust_anchor = knot_rrset_copy(ta_rr, qry->zone_cut.pool);
+ }
+ }
+
+ has_ta = (qry->zone_cut.trust_anchor != NULL);
+ ta_name = (has_ta ? qry->zone_cut.trust_anchor->owner : NULL);
+ refetch_ta = (!has_ta || !knot_dname_is_equal(wanted_name, ta_name));
+ if (!nods && want_secured && refetch_ta) {
+ struct kr_query *next = zone_cut_subreq(rplan, qry, wanted_name,
+ KNOT_RRTYPE_DS);
+ if (!next) {
+ return KR_STATE_FAIL;
+ }
+ return KR_STATE_DONE;
+ }
+
+ /* Try to fetch missing DNSKEY.
+ * Do not fetch if this is a DNSKEY subrequest to avoid circular dependency. */
+ is_dnskey_subreq = kr_rplan_satisfies(qry, ta_name, KNOT_CLASS_IN, KNOT_RRTYPE_DNSKEY);
+ refetch_key = has_ta && (!qry->zone_cut.key || !knot_dname_is_equal(ta_name, qry->zone_cut.key->owner));
+ if (want_secured && refetch_key && !is_dnskey_subreq) {
+ struct kr_query *next = zone_cut_subreq(rplan, qry, ta_name, KNOT_RRTYPE_DNSKEY);
+ if (!next) {
+ return KR_STATE_FAIL;
+ }
+ return KR_STATE_DONE;
+ }
+
+ return KR_STATE_PRODUCE;
+}
+
+/* @todo: Validator refactoring, keep this in driver for now. */
+static int trust_chain_check(struct kr_request *request, struct kr_query *qry)
+{
+ struct kr_rplan *rplan = &request->rplan;
+ map_t *trust_anchors = &request->ctx->trust_anchors;
+ map_t *negative_anchors = &request->ctx->negative_anchors;
+
+ /* Disable DNSSEC if it enters NTA. */
+ if (kr_ta_get(negative_anchors, qry->zone_cut.name)){
+ VERBOSE_MSG(qry, ">< negative TA, going insecure\n");
+ qry->flags.DNSSEC_WANT = false;
+ qry->flags.DNSSEC_INSECURE = true;
+ }
+ if (qry->flags.DNSSEC_NODS) {
+ /* This is the next query iteration with minimized qname.
+ * At previous iteration DS non-existance has been proven */
+ qry->flags.DNSSEC_NODS = false;
+ qry->flags.DNSSEC_WANT = false;
+ qry->flags.DNSSEC_INSECURE = true;
+ }
+ /* Enable DNSSEC if entering a new (or different) island of trust,
+ * and update the TA RRset if required. */
+ bool want_secured = (qry->flags.DNSSEC_WANT) &&
+ !knot_wire_get_cd(request->qsource.packet->wire);
+ knot_rrset_t *ta_rr = kr_ta_get(trust_anchors, qry->zone_cut.name);
+ if (!knot_wire_get_cd(request->qsource.packet->wire) && ta_rr) {
+ qry->flags.DNSSEC_WANT = true;
+ want_secured = true;
+
+ if (qry->zone_cut.trust_anchor == NULL
+ || !knot_dname_is_equal(qry->zone_cut.trust_anchor->owner, qry->zone_cut.name)) {
+ mm_free(qry->zone_cut.pool, qry->zone_cut.trust_anchor);
+ qry->zone_cut.trust_anchor = knot_rrset_copy(ta_rr, qry->zone_cut.pool);
+
+ WITH_VERBOSE(qry) {
+ KR_DNAME_GET_STR(qname_str, ta_rr->owner);
+ VERBOSE_MSG(qry, ">< TA: '%s'\n", qname_str);
+ }
+ }
+ }
+
+ /* Try to fetch missing DS (from above the cut). */
+ const bool has_ta = (qry->zone_cut.trust_anchor != NULL);
+ const knot_dname_t *ta_name = (has_ta ? qry->zone_cut.trust_anchor->owner : NULL);
+ const bool refetch_ta = !has_ta || !knot_dname_is_equal(qry->zone_cut.name, ta_name);
+ if (want_secured && refetch_ta) {
+ /* @todo we could fetch the information from the parent cut, but we don't remember that now */
+ struct kr_query *next = kr_rplan_push(rplan, qry, qry->zone_cut.name, qry->sclass, KNOT_RRTYPE_DS);
+ if (!next) {
+ return KR_STATE_FAIL;
+ }
+ next->flags.AWAIT_CUT = true;
+ next->flags.DNSSEC_WANT = true;
+ return KR_STATE_DONE;
+ }
+ /* Try to fetch missing DNSKEY (either missing or above current cut).
+ * Do not fetch if this is a DNSKEY subrequest to avoid circular dependency. */
+ const bool is_dnskey_subreq = kr_rplan_satisfies(qry, ta_name, KNOT_CLASS_IN, KNOT_RRTYPE_DNSKEY);
+ const bool refetch_key = has_ta && (!qry->zone_cut.key || !knot_dname_is_equal(ta_name, qry->zone_cut.key->owner));
+ if (want_secured && refetch_key && !is_dnskey_subreq) {
+ struct kr_query *next = zone_cut_subreq(rplan, qry, ta_name, KNOT_RRTYPE_DNSKEY);
+ if (!next) {
+ return KR_STATE_FAIL;
+ }
+ return KR_STATE_DONE;
+ }
+
+ return KR_STATE_PRODUCE;
+}
+
+/** @internal Check current zone cut status and credibility, spawn subrequests if needed. */
+static int zone_cut_check(struct kr_request *request, struct kr_query *qry, knot_pkt_t *packet)
+/* TODO: using cache on this point in this way just isn't nice; remove in time */
+{
+ /* Stub mode, just forward and do not solve cut. */
+ if (qry->flags.STUB) {
+ return KR_STATE_PRODUCE;
+ }
+
+ /* Forwarding to upstream resolver mode.
+ * Since forwarding targets already are in qry->ns -
+ * cut fetching is not needed. */
+ if (qry->flags.FORWARD) {
+ return forward_trust_chain_check(request, qry, false);
+ }
+ if (!(qry->flags.AWAIT_CUT)) {
+ /* The query was resolved from cache.
+ * Spawn DS \ DNSKEY requests if needed and exit */
+ return trust_chain_check(request, qry);
+ }
+
+ /* The query wasn't resolved from cache,
+ * now it's the time to look up closest zone cut from cache. */
+ struct kr_cache *cache = &request->ctx->cache;
+ if (!kr_cache_is_open(cache)) {
+ int ret = kr_zonecut_set_sbelt(request->ctx, &qry->zone_cut);
+ if (ret != 0) {
+ return KR_STATE_FAIL;
+ }
+ VERBOSE_MSG(qry, "=> no cache open, using root hints\n");
+ qry->flags.AWAIT_CUT = false;
+ return KR_STATE_DONE;
+ }
+
+ const knot_dname_t *requested_name = qry->sname;
+ /* If at/subdomain of parent zone cut, start from its encloser.
+ * This is for case when we get to a dead end
+ * (and need glue from parent), or DS refetch. */
+ if (qry->parent) {
+ const knot_dname_t *parent = qry->parent->zone_cut.name;
+ if (parent[0] != '\0'
+ && knot_dname_in_bailiwick(qry->sname, parent) >= 0) {
+ requested_name = knot_wire_next_label(parent, NULL);
+ }
+ } else if ((qry->stype == KNOT_RRTYPE_DS) && (qry->sname[0] != '\0')) {
+ /* If this is explicit DS query, start from encloser too. */
+ requested_name = knot_wire_next_label(requested_name, NULL);
+ }
+
+ int state = KR_STATE_FAIL;
+ do {
+ state = ns_fetch_cut(qry, requested_name, request, packet);
+ if (state == KR_STATE_DONE || state == KR_STATE_FAIL) {
+ return state;
+ } else if (state == KR_STATE_CONSUME) {
+ requested_name = knot_wire_next_label(requested_name, NULL);
+ }
+ } while (state == KR_STATE_CONSUME);
+
+ /* Update minimized QNAME if zone cut changed */
+ if (qry->zone_cut.name && qry->zone_cut.name[0] != '\0' && !(qry->flags.NO_MINIMIZE)) {
+ if (kr_make_query(qry, packet) != 0) {
+ return KR_STATE_FAIL;
+ }
+ }
+ qry->flags.AWAIT_CUT = false;
+
+ /* Check trust chain */
+ return trust_chain_check(request, qry);
+}
+
+int kr_resolve_produce(struct kr_request *request, struct sockaddr **dst, int *type, knot_pkt_t *packet)
+{
+ struct kr_rplan *rplan = &request->rplan;
+ unsigned ns_election_iter = 0;
+
+ /* No query left for resolution */
+ if (kr_rplan_empty(rplan)) {
+ return KR_STATE_FAIL;
+ }
+ /* If we have deferred answers, resume them. */
+ struct kr_query *qry = array_tail(rplan->pending);
+ if (qry->deferred != NULL) {
+ /* @todo: Refactoring validator, check trust chain before resuming. */
+ int state = 0;
+ if (((qry->flags.FORWARD) == 0) ||
+ ((qry->stype == KNOT_RRTYPE_DS) && (qry->flags.CNAME))) {
+ state = trust_chain_check(request, qry);
+ } else {
+ state = forward_trust_chain_check(request, qry, true);
+ }
+
+ switch(state) {
+ case KR_STATE_FAIL: return KR_STATE_FAIL;
+ case KR_STATE_DONE: return KR_STATE_PRODUCE;
+ default: break;
+ }
+ VERBOSE_MSG(qry, "=> resuming yielded answer\n");
+ struct kr_layer_pickle *pickle = qry->deferred;
+ request->state = KR_STATE_YIELD;
+ set_yield(&request->answ_selected, qry->uid, false);
+ set_yield(&request->auth_selected, qry->uid, false);
+ RESUME_LAYERS(layer_id(request, pickle->api), request, qry, consume, pickle->pkt);
+ if (request->state != KR_STATE_YIELD) {
+ /* No new deferred answers, take the next */
+ qry->deferred = pickle->next;
+ }
+ } else {
+ /* Caller is interested in always tracking a zone cut, even if the answer is cached
+ * this is normally not required, and incurrs another cache lookups for cached answer. */
+ if (qry->flags.ALWAYS_CUT) {
+ if (!(qry->flags.STUB)) {
+ switch(zone_cut_check(request, qry, packet)) {
+ case KR_STATE_FAIL: return KR_STATE_FAIL;
+ case KR_STATE_DONE: return KR_STATE_PRODUCE;
+ default: break;
+ }
+ }
+ }
+ /* Resolve current query and produce dependent or finish */
+ request->state = KR_STATE_PRODUCE;
+ ITERATE_LAYERS(request, qry, produce, packet);
+ if (request->state != KR_STATE_FAIL && knot_wire_get_qr(packet->wire)) {
+ /* Produced an answer from cache, consume it. */
+ qry->secret = 0;
+ request->state = KR_STATE_CONSUME;
+ ITERATE_LAYERS(request, qry, consume, packet);
+ }
+ }
+ switch(request->state) {
+ case KR_STATE_FAIL: return request->state;
+ case KR_STATE_CONSUME: break;
+ case KR_STATE_DONE:
+ default: /* Current query is done */
+ if (qry->flags.RESOLVED && request->state != KR_STATE_YIELD) {
+ kr_rplan_pop(rplan, qry);
+ }
+ ITERATE_LAYERS(request, qry, reset);
+ return kr_rplan_empty(rplan) ? KR_STATE_DONE : KR_STATE_PRODUCE;
+ }
+
+
+ /* This query has RD=0 or is ANY, stop here. */
+ if (qry->stype == KNOT_RRTYPE_ANY ||
+ !knot_wire_get_rd(request->qsource.packet->wire)) {
+ VERBOSE_MSG(qry, "=> qtype is ANY or RD=0, bail out\n");
+ return KR_STATE_FAIL;
+ }
+
+ /* Update zone cut, spawn new subrequests. */
+ if (!(qry->flags.STUB)) {
+ int state = zone_cut_check(request, qry, packet);
+ switch(state) {
+ case KR_STATE_FAIL: return KR_STATE_FAIL;
+ case KR_STATE_DONE: return KR_STATE_PRODUCE;
+ default: break;
+ }
+ }
+
+ns_election:
+
+ /* If the query has already selected a NS and is waiting for IPv4/IPv6 record,
+ * elect best address only, otherwise elect a completely new NS.
+ */
+ if(++ns_election_iter >= KR_ITER_LIMIT) {
+ VERBOSE_MSG(qry, "=> couldn't converge NS selection, bail out\n");
+ return KR_STATE_FAIL;
+ }
+
+ const struct kr_qflags qflg = qry->flags;
+ const bool retry = qflg.TCP || qflg.BADCOOKIE_AGAIN;
+ if (qflg.AWAIT_IPV4 || qflg.AWAIT_IPV6) {
+ kr_nsrep_elect_addr(qry, request->ctx);
+ } else if (qflg.FORWARD || qflg.STUB) {
+ kr_nsrep_sort(&qry->ns, request->ctx);
+ if (qry->ns.score > KR_NS_MAX_SCORE) {
+ /* At the moment all NS have bad reputation.
+ * But there can be existing connections*/
+ VERBOSE_MSG(qry, "=> no valid NS left\n");
+ return KR_STATE_FAIL;
+ }
+ } else if (!qry->ns.name || !retry) { /* Keep NS when requerying/stub/badcookie. */
+ /* Root DNSKEY must be fetched from the hints to avoid chicken and egg problem. */
+ if (qry->sname[0] == '\0' && qry->stype == KNOT_RRTYPE_DNSKEY) {
+ kr_zonecut_set_sbelt(request->ctx, &qry->zone_cut);
+ qry->flags.NO_THROTTLE = true; /* Pick even bad SBELT servers */
+ }
+ kr_nsrep_elect(qry, request->ctx);
+ if (qry->ns.score > KR_NS_MAX_SCORE) {
+ if (kr_zonecut_is_empty(&qry->zone_cut)) {
+ VERBOSE_MSG(qry, "=> no NS with an address\n");
+ } else {
+ VERBOSE_MSG(qry, "=> no valid NS left\n");
+ }
+ if (!qry->flags.NO_NS_FOUND) {
+ qry->flags.NO_NS_FOUND = true;
+ } else {
+ ITERATE_LAYERS(request, qry, reset);
+ kr_rplan_pop(rplan, qry);
+ }
+ return KR_STATE_PRODUCE;
+ }
+ }
+
+ /* Resolve address records */
+ if (qry->ns.addr[0].ip.sa_family == AF_UNSPEC) {
+ int ret = ns_resolve_addr(qry, request);
+ if (ret != 0) {
+ qry->flags.AWAIT_IPV6 = false;
+ qry->flags.AWAIT_IPV4 = false;
+ qry->flags.TCP = false;
+ qry->ns.name = NULL;
+ goto ns_election; /* Must try different NS */
+ }
+ ITERATE_LAYERS(request, qry, reset);
+ return KR_STATE_PRODUCE;
+ }
+
+ /* Randomize query case (if not in safe mode or turned off) */
+ qry->secret = (qry->flags.SAFEMODE || qry->flags.NO_0X20)
+ ? 0 : kr_rand_bytes(sizeof(qry->secret));
+ knot_dname_t *qname_raw = knot_pkt_qname(packet);
+ randomized_qname_case(qname_raw, qry->secret);
+
+ /*
+ * Additional query is going to be finalized when calling
+ * kr_resolve_checkout().
+ */
+ qry->timestamp_mono = kr_now();
+ *dst = &qry->ns.addr[0].ip;
+ *type = (qry->flags.TCP) ? SOCK_STREAM : SOCK_DGRAM;
+ return request->state;
+}
+
+#if defined(ENABLE_COOKIES)
+/** Update DNS cookie data in packet. */
+static bool outbound_request_update_cookies(struct kr_request *req,
+ const struct sockaddr *src,
+ const struct sockaddr *dst)
+{
+ assert(req);
+
+ /* RFC7873 4.1 strongly requires server address. */
+ if (!dst) {
+ return false;
+ }
+
+ struct kr_cookie_settings *clnt_sett = &req->ctx->cookie_ctx.clnt;
+
+ /* Cookies disabled or packet has no EDNS section. */
+ if (!clnt_sett->enabled) {
+ return true;
+ }
+
+ /*
+ * RFC7873 4.1 recommends using also the client address. The matter is
+ * also discussed in section 6.
+ */
+
+ kr_request_put_cookie(&clnt_sett->current, req->ctx->cache_cookie,
+ src, dst, req);
+
+ return true;
+}
+#endif /* defined(ENABLE_COOKIES) */
+
+int kr_resolve_checkout(struct kr_request *request, const struct sockaddr *src,
+ struct sockaddr *dst, int type, knot_pkt_t *packet)
+{
+ /* @todo: Update documentation if this function becomes approved. */
+
+ struct kr_rplan *rplan = &request->rplan;
+
+ if (knot_wire_get_qr(packet->wire) != 0) {
+ return kr_ok();
+ }
+
+ /* No query left for resolution */
+ if (kr_rplan_empty(rplan)) {
+ return kr_error(EINVAL);
+ }
+ struct kr_query *qry = array_tail(rplan->pending);
+
+#if defined(ENABLE_COOKIES)
+ /* Update DNS cookies in request. */
+ if (type == SOCK_DGRAM) { /* @todo: Add cookies also over TCP? */
+ /*
+ * The actual server IP address is needed before generating the
+ * actual cookie. If we don't know the server address then we
+ * also don't know the actual cookie size.
+ */
+ if (!outbound_request_update_cookies(request, src, dst)) {
+ return kr_error(EINVAL);
+ }
+ }
+#endif /* defined(ENABLE_COOKIES) */
+
+ int ret = query_finalize(request, qry, packet);
+ if (ret != 0) {
+ return kr_error(EINVAL);
+ }
+
+ /* Track changes in minimization secret to enable/disable minimization */
+ uint32_t old_minimization_secret = qry->secret;
+
+ /* Run the checkout layers and cancel on failure.
+ * The checkout layer doesn't persist the state, so canceled subrequests
+ * don't affect the resolution or rest of the processing. */
+ int state = request->state;
+ ITERATE_LAYERS(request, qry, checkout, packet, dst, type);
+ if (request->state == KR_STATE_FAIL) {
+ request->state = state; /* Restore */
+ return kr_error(ECANCELED);
+ }
+
+ /* Randomize query case (if secret changed) */
+ knot_dname_t *qname = (knot_dname_t *)knot_pkt_qname(packet);
+ if (qry->secret != old_minimization_secret) {
+ randomized_qname_case(qname, qry->secret);
+ }
+
+ /* Write down OPT unless in safemode */
+ if (!(qry->flags.SAFEMODE)) {
+ ret = edns_put(packet, true);
+ if (ret != 0) {
+ return kr_error(EINVAL);
+ }
+ }
+
+ WITH_VERBOSE(qry) {
+
+ KR_DNAME_GET_STR(qname_str, knot_pkt_qname(packet));
+ KR_DNAME_GET_STR(zonecut_str, qry->zone_cut.name);
+ KR_RRTYPE_GET_STR(type_str, knot_pkt_qtype(packet));
+
+ for (size_t i = 0; i < KR_NSREP_MAXADDR; ++i) {
+ struct sockaddr *addr = &qry->ns.addr[i].ip;
+ if (addr->sa_family == AF_UNSPEC) {
+ break;
+ }
+ if (!kr_inaddr_equal(dst, addr)) {
+ continue;
+ }
+ const char *ns_str = kr_straddr(addr);
+ VERBOSE_MSG(qry,
+ "=> id: '%05u' querying: '%s' score: %u zone cut: '%s' "
+ "qname: '%s' qtype: '%s' proto: '%s'\n",
+ qry->id, ns_str ? ns_str : "", qry->ns.score, zonecut_str,
+ qname_str, type_str, (qry->flags.TCP) ? "tcp" : "udp");
+
+ break;
+ }}
+
+ return kr_ok();
+}
+
+int kr_resolve_finish(struct kr_request *request, int state)
+{
+ /* Finalize answer and construct wire-buffer. */
+ ITERATE_LAYERS(request, NULL, answer_finalize);
+ if (request->state == KR_STATE_FAIL) {
+ state = KR_STATE_FAIL;
+ } else if (answer_finalize(request, state) != 0) {
+ state = KR_STATE_FAIL;
+ }
+
+ /* Error during processing, internal failure */
+ if (state != KR_STATE_DONE) {
+ knot_pkt_t *answer = request->answer;
+ if (knot_wire_get_rcode(answer->wire) == KNOT_RCODE_NOERROR) {
+ knot_wire_clear_ad(answer->wire);
+ knot_wire_clear_aa(answer->wire);
+ knot_wire_set_rcode(answer->wire, KNOT_RCODE_SERVFAIL);
+ }
+ }
+
+ request->state = state;
+ ITERATE_LAYERS(request, NULL, finish);
+
+#ifndef NOVERBOSELOG
+ struct kr_rplan *rplan = &request->rplan;
+ struct kr_query *last = kr_rplan_last(rplan);
+ VERBOSE_MSG(last, "finished: %d, queries: %zu, mempool: %zu B\n",
+ request->state, rplan->resolved.len, (size_t) mp_total_size(request->pool.ctx));
+#endif
+
+ /* Trace request finish */
+ if (request->trace_finish) {
+ request->trace_finish(request);
+ }
+
+ /* Uninstall all tracepoints */
+ request->trace_finish = NULL;
+ request->trace_log = NULL;
+
+ return KR_STATE_DONE;
+}
+
+struct kr_rplan *kr_resolve_plan(struct kr_request *request)
+{
+ if (request) {
+ return &request->rplan;
+ }
+ return NULL;
+}
+
+knot_mm_t *kr_resolve_pool(struct kr_request *request)
+{
+ if (request) {
+ return &request->pool;
+ }
+ return NULL;
+}
+
+#undef VERBOSE_MSG
diff --git a/lib/resolve.h b/lib/resolve.h
new file mode 100644
index 0000000..60d80bb
--- /dev/null
+++ b/lib/resolve.h
@@ -0,0 +1,332 @@
+/* Copyright (C) 2014-2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <netinet/in.h>
+#include <sys/socket.h>
+#include <libknot/packet/pkt.h>
+
+#include "lib/cookies/control.h"
+#include "lib/cookies/lru_cache.h"
+#include "lib/layer.h"
+#include "lib/generic/map.h"
+#include "lib/generic/array.h"
+#include "lib/nsrep.h"
+#include "lib/rplan.h"
+#include "lib/module.h"
+#include "lib/cache/api.h"
+
+/**
+ * @file resolve.h
+ * @brief The API provides an API providing a "consumer-producer"-like interface to enable
+ * user to plug it into existing event loop or I/O code.
+ *
+ * # Example usage of the iterative API:
+ *
+ * @code{.c}
+ *
+ * // Create request and its memory pool
+ * struct kr_request req = {
+ * .pool = {
+ * .ctx = mp_new (4096),
+ * .alloc = (mm_alloc_t) mp_alloc
+ * }
+ * };
+ *
+ * // Setup and provide input query
+ * int state = kr_resolve_begin(&req, ctx, final_answer);
+ * state = kr_resolve_consume(&req, query);
+ *
+ * // Generate answer
+ * while (state == KR_STATE_PRODUCE) {
+ *
+ * // Additional query generate, do the I/O and pass back answer
+ * state = kr_resolve_produce(&req, &addr, &type, query);
+ * while (state == KR_STATE_CONSUME) {
+ * int ret = sendrecv(addr, proto, query, resp);
+ *
+ * // If I/O fails, make "resp" empty
+ * state = kr_resolve_consume(&request, addr, resp);
+ * knot_pkt_clear(resp);
+ * }
+ * knot_pkt_clear(query);
+ * }
+ *
+ * // "state" is either DONE or FAIL
+ * kr_resolve_finish(&request, state);
+ *
+ * @endcode
+ */
+
+
+/**
+ * RRset rank - for cache and ranked_rr_*.
+ *
+ * The rank meaning consists of one independent flag - KR_RANK_AUTH,
+ * and the rest have meaning of values where only one can hold at any time.
+ * You can use one of the enums as a safe initial value, optionally | KR_RANK_AUTH;
+ * otherwise it's best to manipulate ranks via the kr_rank_* functions.
+ *
+ * @note The representation is complicated by restrictions on integer comparison:
+ * - AUTH must be > than !AUTH
+ * - AUTH INSECURE must be > than AUTH (because it attempted validation)
+ * - !AUTH SECURE must be > than AUTH (because it's valid)
+ *
+ * See also:
+ * https://tools.ietf.org/html/rfc2181#section-5.4.1
+ * https://tools.ietf.org/html/rfc4035#section-4.3
+ */
+enum kr_rank {
+ /* Initial-like states. No validation has been attempted (yet). */
+ KR_RANK_INITIAL = 0, /**< Did not attempt to validate. It's assumed
+ compulsory to validate (or prove insecure). */
+ KR_RANK_OMIT, /**< Do not attempt to validate.
+ (And don't consider it a validation failure.) */
+ KR_RANK_TRY, /**< Attempt to validate, but failures are non-fatal. */
+
+ /* Failure states. These have higher value because they have more information. */
+ KR_RANK_INDET = 4, /**< Unable to determine whether it should be secure. */
+ KR_RANK_BOGUS, /**< Ought to be secure but isn't. */
+ KR_RANK_MISMATCH,
+ KR_RANK_MISSING, /**< Unable to obtain a good signature. */
+
+ /** Proven to be insecure, i.e. we have a chain of trust from TAs
+ * that cryptographically denies the possibility of existence
+ * of a positive chain of trust from the TAs to the record. */
+ KR_RANK_INSECURE = 8,
+
+ /** Authoritative data flag; the chain of authority was "verified".
+ * Even if not set, only in-bailiwick stuff is acceptable,
+ * i.e. almost authoritative (example: mandatory glue and its NS RR). */
+ KR_RANK_AUTH = 16,
+
+ KR_RANK_SECURE = 32, /**< Verified whole chain of trust from the closest TA. */
+ /* @note Rank must not exceed 6 bits */
+};
+
+/** Check that a rank value is valid. Meant for assertions. */
+bool kr_rank_check(uint8_t rank) KR_PURE;
+
+/** Test the presence of any flag/state in a rank, i.e. including KR_RANK_AUTH. */
+static inline bool kr_rank_test(uint8_t rank, uint8_t kr_flag)
+{
+ assert(kr_rank_check(rank) && kr_rank_check(kr_flag));
+ if (kr_flag == KR_RANK_AUTH) {
+ return rank & KR_RANK_AUTH;
+ }
+ assert(!(kr_flag & KR_RANK_AUTH));
+ /* The rest are exclusive values - exactly one has to be set. */
+ return (rank & ~KR_RANK_AUTH) == kr_flag;
+}
+
+/** Set the rank state. The _AUTH flag is kept as it was. */
+static inline void kr_rank_set(uint8_t *rank, uint8_t kr_flag)
+{
+ assert(rank && kr_rank_check(*rank));
+ assert(kr_rank_check(kr_flag) && !(kr_flag & KR_RANK_AUTH));
+ *rank = kr_flag | (*rank & KR_RANK_AUTH);
+}
+
+
+/** @cond internal Array of modules. */
+typedef array_t(struct kr_module *) module_array_t;
+/* @endcond */
+
+/**
+ * Name resolution context.
+ *
+ * Resolution context provides basic services like cache, configuration and options.
+ *
+ * @note This structure is persistent between name resolutions and may
+ * be shared between threads.
+ */
+struct kr_context
+{
+ struct kr_qflags options;
+ knot_rrset_t *opt_rr;
+ map_t trust_anchors;
+ map_t negative_anchors;
+ struct kr_zonecut root_hints;
+ struct kr_cache cache;
+ kr_nsrep_rtt_lru_t *cache_rtt;
+ unsigned cache_rtt_tout_retry_interval;
+ kr_nsrep_lru_t *cache_rep;
+ module_array_t *modules;
+ /* The cookie context structure should not be held within the cookies
+ * module because of better access. */
+ struct kr_cookie_ctx cookie_ctx;
+ kr_cookie_lru_t *cache_cookie;
+ int32_t tls_padding; /**< See net.tls_padding in ../daemon/README.rst -- -1 is "true" (default policy), 0 is "false" (no padding) */
+ knot_mm_t *pool;
+};
+
+/* Kept outside, because kres-gen.lua can't handle this depth
+ * (and lines here were too long anyway). */
+struct kr_request_qsource_flags {
+ bool tcp:1; /**< true if the request is on TCP (or TLS); only meaningful if (dst_addr). */
+ bool tls:1; /**< true if the request is on TLS; only meaningful if (dst_addr). */
+};
+
+/**
+ * Name resolution request.
+ *
+ * Keeps information about current query processing between calls to
+ * processing APIs, i.e. current resolved query, resolution plan, ...
+ * Use this instead of the simple interface if you want to implement
+ * multiplexing or custom I/O.
+ *
+ * @note All data for this request must be allocated from the given pool.
+ */
+struct kr_request {
+ struct kr_context *ctx;
+ knot_pkt_t *answer;
+ struct kr_query *current_query; /**< Current evaluated query. */
+ struct {
+ /** Address that originated the request. NULL for internal origin. */
+ const struct sockaddr *addr;
+ /** Address that accepted the request. NULL for internal origin. */
+ const struct sockaddr *dst_addr;
+ const knot_pkt_t *packet;
+ struct kr_request_qsource_flags flags; /**< See definition above. */
+ size_t size; /**< query packet size */
+ } qsource;
+ struct {
+ unsigned rtt; /**< Current upstream RTT */
+ const struct sockaddr *addr; /**< Current upstream address */
+ } upstream; /**< Upstream information, valid only in consume() phase */
+ struct kr_qflags options;
+ int state;
+ ranked_rr_array_t answ_selected;
+ ranked_rr_array_t auth_selected;
+ ranked_rr_array_t add_selected;
+ rr_array_t additional;
+ bool answ_validated; /**< internal to validator; beware of caching, etc. */
+ bool auth_validated; /**< see answ_validated ^^ ; TODO */
+
+ /** Overall rank for the request.
+ *
+ * Values from kr_rank, currently just KR_RANK_SECURE and _INITIAL.
+ * Only read this in finish phase and after validator, please.
+ * Meaning of _SECURE: all RRs in answer+authority are _SECURE,
+ * including any negative results implied (NXDOMAIN, NODATA).
+ */
+ uint8_t rank;
+
+ struct kr_rplan rplan;
+ trace_log_f trace_log; /**< Logging tracepoint */
+ trace_callback_f trace_finish; /**< Request finish tracepoint */
+ int vars_ref; /**< Reference to per-request variable table. LUA_NOREF if not set. */
+ knot_mm_t pool;
+ unsigned int uid; /** for logging purposes only */
+ void *daemon_context; /** pointer to worker from daemon. Can be used in modules. */
+};
+
+/** Initializer for an array of *_selected. */
+#define kr_request_selected(req) { \
+ [KNOT_ANSWER] = &(req)->answ_selected, \
+ [KNOT_AUTHORITY] = &(req)->auth_selected, \
+ [KNOT_ADDITIONAL] = &(req)->add_selected, \
+ }
+
+/**
+ * Begin name resolution.
+ *
+ * @note Expects a request to have an initialized mempool, the "answer" packet will
+ * be kept during the resolution and will contain the final answer at the end.
+ *
+ * @param request request state with initialized mempool
+ * @param ctx resolution context
+ * @param answer allocated packet for final answer
+ * @return CONSUME (expecting query)
+ */
+KR_EXPORT
+int kr_resolve_begin(struct kr_request *request, struct kr_context *ctx, knot_pkt_t *answer);
+
+/**
+ * Consume input packet (may be either first query or answer to query originated from kr_resolve_produce())
+ *
+ * @note If the I/O fails, provide an empty or NULL packet, this will make iterator recognize nameserver failure.
+ *
+ * @param request request state (awaiting input)
+ * @param src [in] packet source address
+ * @param packet [in] input packet
+ * @return any state
+ */
+KR_EXPORT
+int kr_resolve_consume(struct kr_request *request, const struct sockaddr *src, knot_pkt_t *packet);
+
+/**
+ * Produce either next additional query or finish.
+ *
+ * If the CONSUME is returned then dst, type and packet will be filled with
+ * appropriate values and caller is responsible to send them and receive answer.
+ * If it returns any other state, then content of the variables is undefined.
+ *
+ * @param request request state (in PRODUCE state)
+ * @param dst [out] possible address of the next nameserver
+ * @param type [out] possible used socket type (SOCK_STREAM, SOCK_DGRAM)
+ * @param packet [out] packet to be filled with additional query
+ * @return any state
+ */
+KR_EXPORT
+int kr_resolve_produce(struct kr_request *request, struct sockaddr **dst, int *type, knot_pkt_t *packet);
+
+/**
+ * Finalises the outbound query packet with the knowledge of the IP addresses.
+ *
+ * @note The function must be called before actual sending of the request packet.
+ *
+ * @param request request state (in PRODUCE state)
+ * @param src address from which the query is going to be sent
+ * @param dst address of the name server
+ * @param type used socket type (SOCK_STREAM, SOCK_DGRAM)
+ * @param packet [in,out] query packet to be finalised
+ * @return kr_ok() or error code
+ */
+KR_EXPORT
+int kr_resolve_checkout(struct kr_request *request, const struct sockaddr *src,
+ struct sockaddr *dst, int type, knot_pkt_t *packet);
+
+/**
+ * Finish resolution and commit results if the state is DONE.
+ *
+ * @note The structures will be deinitialized, but the assigned memory pool is not going to
+ * be destroyed, as it's owned by caller.
+ *
+ * @param request request state
+ * @param state either DONE or FAIL state
+ * @return DONE
+ */
+KR_EXPORT
+int kr_resolve_finish(struct kr_request *request, int state);
+
+/**
+ * Return resolution plan.
+ * @param request request state
+ * @return pointer to rplan
+ */
+KR_EXPORT KR_PURE
+struct kr_rplan *kr_resolve_plan(struct kr_request *request);
+
+/**
+ * Return memory pool associated with request.
+ * @param request request state
+ * @return mempool
+ */
+KR_EXPORT KR_PURE
+knot_mm_t *kr_resolve_pool(struct kr_request *request);
+
diff --git a/lib/rplan.c b/lib/rplan.c
new file mode 100644
index 0000000..6ed8e4e
--- /dev/null
+++ b/lib/rplan.c
@@ -0,0 +1,308 @@
+/* Copyright (C) 2014-2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include <libknot/descriptor.h>
+#include <libknot/errcode.h>
+
+#include "lib/rplan.h"
+#include "lib/resolve.h"
+#include "lib/cache/api.h"
+#include "lib/defines.h"
+#include "lib/layer.h"
+
+#define VERBOSE_MSG(qry, ...) QRVERBOSE(qry, "plan", __VA_ARGS__)
+#define QUERY_PROVIDES(q, name, cls, type) \
+ ((q)->sclass == (cls) && (q)->stype == type && knot_dname_is_equal((q)->sname, name))
+
+inline static unsigned char chars_or(const unsigned char a, const unsigned char b)
+{
+ return a | b;
+}
+
+/** Bits set to 1 in variable b will be set to zero in variable a. */
+inline static unsigned char chars_mask(const unsigned char a, const unsigned char b)
+{
+ return a & ~b;
+}
+
+/** Apply mod(a, b) to every byte a, b from fl1, fl2 and return result in fl1. */
+inline static void kr_qflags_mod(struct kr_qflags *fl1, struct kr_qflags fl2,
+ unsigned char mod(const unsigned char a, const unsigned char b))
+{
+ if (!fl1) abort();
+ union {
+ struct kr_qflags flags;
+ /* C99 section 6.5.3.4: sizeof(char) == 1 */
+ unsigned char chars[sizeof(struct kr_qflags)];
+ } tmp1, tmp2;
+ /* The compiler should be able to optimize all this into simple ORs. */
+ tmp1.flags = *fl1;
+ tmp2.flags = fl2;
+ for (size_t i = 0; i < sizeof(struct kr_qflags); ++i) {
+ tmp1.chars[i] = mod(tmp1.chars[i], tmp2.chars[i]);
+ }
+ *fl1 = tmp1.flags;
+}
+
+/**
+ * Set bits from variable fl2 in variable fl1.
+ * Bits which are not set in fl2 are not modified in fl1.
+ *
+ * @param[in,out] fl1
+ * @param[in] fl2
+ */
+void kr_qflags_set(struct kr_qflags *fl1, struct kr_qflags fl2)
+{
+ kr_qflags_mod(fl1, fl2, chars_or);
+}
+
+/**
+ * Clear bits from variable fl2 in variable fl1.
+ * Bits which are not set in fl2 are not modified in fl1.
+ *
+ * @param[in,out] fl1
+ * @param[in] fl2
+ */
+void kr_qflags_clear(struct kr_qflags *fl1, struct kr_qflags fl2)
+{
+ kr_qflags_mod(fl1, fl2, chars_mask);
+}
+
+static struct kr_query *query_create(knot_mm_t *pool, const knot_dname_t *name, uint32_t uid)
+{
+ struct kr_query *qry = mm_alloc(pool, sizeof(struct kr_query));
+ if (qry == NULL) {
+ return NULL;
+ }
+
+ memset(qry, 0, sizeof(struct kr_query));
+ if (name != NULL) {
+ qry->sname = knot_dname_copy(name, pool);
+ if (qry->sname == NULL) {
+ mm_free(pool, qry);
+ return NULL;
+ }
+ }
+
+ knot_dname_to_lower(qry->sname);
+ qry->uid = uid;
+ return qry;
+}
+
+static void query_free(knot_mm_t *pool, struct kr_query *qry)
+{
+ kr_zonecut_deinit(&qry->zone_cut);
+ mm_free(pool, qry->sname);
+ mm_free(pool, qry);
+}
+
+int kr_rplan_init(struct kr_rplan *rplan, struct kr_request *request, knot_mm_t *pool)
+{
+ if (rplan == NULL) {
+ return KNOT_EINVAL;
+ }
+
+ memset(rplan, 0, sizeof(struct kr_rplan));
+
+ rplan->pool = pool;
+ rplan->request = request;
+ array_init(rplan->pending);
+ array_init(rplan->resolved);
+ rplan->next_uid = 0;
+ return KNOT_EOK;
+}
+
+void kr_rplan_deinit(struct kr_rplan *rplan)
+{
+ if (rplan == NULL) {
+ return;
+ }
+
+ for (size_t i = 0; i < rplan->pending.len; ++i) {
+ query_free(rplan->pool, rplan->pending.at[i]);
+ }
+ for (size_t i = 0; i < rplan->resolved.len; ++i) {
+ query_free(rplan->pool, rplan->resolved.at[i]);
+ }
+ array_clear_mm(rplan->pending, mm_free, rplan->pool);
+ array_clear_mm(rplan->resolved, mm_free, rplan->pool);
+}
+
+bool kr_rplan_empty(struct kr_rplan *rplan)
+{
+ if (rplan == NULL) {
+ return true;
+ }
+
+ return rplan->pending.len == 0;
+}
+
+static struct kr_query *kr_rplan_push_query(struct kr_rplan *rplan,
+ struct kr_query *parent,
+ const knot_dname_t *name)
+{
+ if (rplan == NULL) {
+ return NULL;
+ }
+
+ /* Make sure there's enough space */
+ int ret = array_reserve_mm(rplan->pending, rplan->pending.len + 1, kr_memreserve, rplan->pool);
+ if (ret != 0) {
+ return NULL;
+ }
+
+ struct kr_query *qry = query_create(rplan->pool, name, rplan->next_uid);
+ if (qry == NULL) {
+ return NULL;
+ }
+ rplan->next_uid += 1;
+ /* Class and type must be set outside this function. */
+ qry->flags = rplan->request->options;
+ qry->parent = parent;
+ qry->request = rplan->request;
+ qry->ns.ctx = rplan->request->ctx;
+ qry->ns.addr[0].ip.sa_family = AF_UNSPEC;
+ gettimeofday(&qry->timestamp, NULL);
+ qry->timestamp_mono = kr_now();
+ qry->creation_time_mono = parent ? parent->creation_time_mono : qry->timestamp_mono;
+ kr_zonecut_init(&qry->zone_cut, (const uint8_t *)"", rplan->pool);
+ qry->reorder = qry->flags.REORDER_RR ? kr_rand_bytes(sizeof(qry->reorder)) : 0;
+
+ /* When forwarding, keep the nameserver addresses. */
+ if (parent && parent->flags.FORWARD && qry->flags.FORWARD) {
+ ret = kr_nsrep_copy_set(&qry->ns, &parent->ns);
+ if (ret) {
+ query_free(rplan->pool, qry);
+ return NULL;
+ }
+ }
+
+ array_push(rplan->pending, qry);
+
+ return qry;
+}
+
+struct kr_query *kr_rplan_push_empty(struct kr_rplan *rplan, struct kr_query *parent)
+{
+ if (rplan == NULL) {
+ return NULL;
+ }
+
+ struct kr_query *qry = kr_rplan_push_query(rplan, parent, NULL);
+ if (qry == NULL) {
+ return NULL;
+ }
+
+ WITH_VERBOSE(qry) {
+ VERBOSE_MSG(qry, "plan '%s' type '%s' uid [%05u.%02u]\n", "", "",
+ qry->request ? qry->request->uid : 0, qry->uid);
+ }
+ return qry;
+}
+
+struct kr_query *kr_rplan_push(struct kr_rplan *rplan, struct kr_query *parent,
+ const knot_dname_t *name, uint16_t cls, uint16_t type)
+{
+ if (rplan == NULL || name == NULL) {
+ return NULL;
+ }
+
+ struct kr_query *qry = kr_rplan_push_query(rplan, parent, name);
+ if (qry == NULL) {
+ return NULL;
+ }
+
+ qry->sclass = cls;
+ qry->stype = type;
+
+ WITH_VERBOSE(qry) {
+ KR_DNAME_GET_STR(name_str, name);
+ KR_RRTYPE_GET_STR(type_str, type);
+ VERBOSE_MSG(parent, "plan '%s' type '%s' uid [%05u.%02u]\n",
+ name_str, type_str,
+ qry->request ? qry->request->uid : 0, qry->uid);
+ }
+ return qry;
+}
+
+int kr_rplan_pop(struct kr_rplan *rplan, struct kr_query *qry)
+{
+ if (rplan == NULL || qry == NULL) {
+ return KNOT_EINVAL;
+ }
+
+ /* Make sure there's enough space */
+ int ret = array_reserve_mm(rplan->resolved, rplan->resolved.len + 1, kr_memreserve, rplan->pool);
+ if (ret != 0) {
+ return ret;
+ }
+
+ /* Find the query, it will likely be on top */
+ for (size_t i = rplan->pending.len; i > 0; i--) {
+ if (rplan->pending.at[i - 1] == qry) {
+ array_del(rplan->pending, i - 1);
+ array_push(rplan->resolved, qry);
+ break;
+ }
+ }
+ return KNOT_EOK;
+}
+
+bool kr_rplan_satisfies(struct kr_query *closure, const knot_dname_t *name, uint16_t cls, uint16_t type)
+{
+ while (name && closure) {
+ if (QUERY_PROVIDES(closure, name, cls, type)) {
+ return true;
+ }
+ closure = closure->parent;
+ }
+ return false;
+}
+
+struct kr_query *kr_rplan_resolved(struct kr_rplan *rplan)
+{
+ if (rplan->resolved.len == 0) {
+ return NULL;
+ }
+ return array_tail(rplan->resolved);
+}
+
+struct kr_query *kr_rplan_last(struct kr_rplan *rplan)
+{
+ if (!kr_rplan_empty(rplan)) {
+ return array_tail(rplan->pending);
+ }
+
+ return kr_rplan_resolved(rplan);
+}
+
+struct kr_query *kr_rplan_find_resolved(struct kr_rplan *rplan, struct kr_query *parent,
+ const knot_dname_t *name, uint16_t cls, uint16_t type)
+{
+ struct kr_query *ret = NULL;
+ for (int i = 0; i < rplan->resolved.len; ++i) {
+ struct kr_query *q = rplan->resolved.at[i];
+ if (q->stype == type && q->sclass == cls &&
+ (parent == NULL || q->parent == parent) &&
+ knot_dname_is_equal(q->sname, name)) {
+ ret = q;
+ break;
+ }
+ }
+ return ret;
+}
+
+#undef VERBOSE_MSG
diff --git a/lib/rplan.h b/lib/rplan.h
new file mode 100644
index 0000000..21b0b0b
--- /dev/null
+++ b/lib/rplan.h
@@ -0,0 +1,221 @@
+/* Copyright (C) 2014-2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <sys/time.h>
+#include <libknot/dname.h>
+#include <libknot/codes.h>
+
+#include "lib/cache/api.h"
+#include "lib/zonecut.h"
+#include "lib/nsrep.h"
+
+/** Query flags */
+struct kr_qflags {
+ bool NO_MINIMIZE : 1; /**< Don't minimize QNAME. */
+ bool NO_THROTTLE : 1; /**< No query/slow NS throttling. */
+ bool NO_IPV6 : 1; /**< Disable IPv6 */
+ bool NO_IPV4 : 1; /**< Disable IPv4 */
+ bool TCP : 1; /**< Use TCP for this query. */
+ bool RESOLVED : 1; /**< Query is resolved. Note that kr_query gets
+ * RESOLVED before following a CNAME chain; see .CNAME. */
+ bool AWAIT_IPV4 : 1; /**< Query is waiting for A address. */
+ bool AWAIT_IPV6 : 1; /**< Query is waiting for AAAA address. */
+ bool AWAIT_CUT : 1; /**< Query is waiting for zone cut lookup */
+ bool SAFEMODE : 1; /**< Don't use fancy stuff (EDNS, 0x20, ...) */
+ bool CACHED : 1; /**< Query response is cached. */
+ bool NO_CACHE : 1; /**< No cache for lookup; exception: finding NSs and subqueries. */
+ bool EXPIRING : 1; /**< Query response is cached, but expiring. */
+ bool ALLOW_LOCAL : 1; /**< Allow queries to local or private address ranges. */
+ bool DNSSEC_WANT : 1; /**< Want DNSSEC secured answer; exception: +cd,
+ * i.e. knot_wire_set_cd(request->answer->wire). */
+ bool DNSSEC_BOGUS : 1; /**< Query response is DNSSEC bogus. */
+ bool DNSSEC_INSECURE : 1;/**< Query response is DNSSEC insecure. */
+ bool DNSSEC_CD : 1; /**< Instruction to set CD bit in request. */
+ bool STUB : 1; /**< Stub resolution, accept received answer as solved. */
+ bool ALWAYS_CUT : 1; /**< Always recover zone cut (even if cached). */
+ bool DNSSEC_WEXPAND : 1; /**< Query response has wildcard expansion. */
+ bool PERMISSIVE : 1; /**< Permissive resolver mode. */
+ bool STRICT : 1; /**< Strict resolver mode. */
+ bool BADCOOKIE_AGAIN : 1;/**< Query again because bad cookie returned. */
+ bool CNAME : 1; /**< Query response contains CNAME in answer section. */
+ bool REORDER_RR : 1; /**< Reorder cached RRs. */
+ bool TRACE : 1; /**< Also log answers if --verbose. */
+ bool NO_0X20 : 1; /**< Disable query case randomization . */
+ bool DNSSEC_NODS : 1; /**< DS non-existance is proven */
+ bool DNSSEC_OPTOUT : 1; /**< Closest encloser proof has optout */
+ bool NONAUTH : 1; /**< Non-authoritative in-bailiwick records are enough.
+ * TODO: utilize this also outside cache. */
+ bool FORWARD : 1; /**< Forward all queries to upstream; validate answers. */
+ bool DNS64_MARK : 1; /**< Internal mark for dns64 module. */
+ bool CACHE_TRIED : 1; /**< Internal to cache module. */
+ bool NO_NS_FOUND : 1; /**< No valid NS found during last PRODUCE stage. */
+};
+
+/** Combine flags together. This means set union for simple flags. */
+KR_EXPORT
+void kr_qflags_set(struct kr_qflags *fl1, struct kr_qflags fl2);
+
+/** Remove flags. This means set-theoretic difference. */
+KR_EXPORT
+void kr_qflags_clear(struct kr_qflags *fl1, struct kr_qflags fl2);
+
+/** Callback for serve-stale decisions.
+ * @param ttl the expired TTL (i.e. it's < 0)
+ * @return the adjusted TTL (typically 1) or < 0.
+ */
+typedef int32_t (*kr_stale_cb)(int32_t ttl, const knot_dname_t *owner, uint16_t type,
+ const struct kr_query *qry);
+
+/**
+ * Single query representation.
+ */
+struct kr_query {
+ struct kr_query *parent;
+ knot_dname_t *sname; /**< The name to resolve - lower-cased, uncompressed. */
+ uint16_t stype;
+ uint16_t sclass;
+ uint16_t id;
+ struct kr_qflags flags, forward_flags;
+ uint32_t secret;
+ uint16_t fails;
+ uint16_t reorder; /**< Seed to reorder (cached) RRs in answer or zero. */
+ uint64_t creation_time_mono; /* The time of query's creation (milliseconds).
+ * Or time of creation of an oldest
+ * ancestor if it is a subquery. */
+ uint64_t timestamp_mono; /**< Time of query created or time of
+ * query to upstream resolver (milliseconds). */
+ struct timeval timestamp; /**< Real time for TTL+DNSSEC checks (.tv_sec only). */
+ struct kr_zonecut zone_cut;
+ struct kr_layer_pickle *deferred;
+ uint32_t uid; /**< Query iteration number, unique within the kr_rplan. */
+ /** Pointer to the query that originated this one because of following a CNAME (or NULL). */
+ struct kr_query *cname_parent;
+ struct kr_request *request; /**< Parent resolution request. */
+ kr_stale_cb stale_cb; /**< See the type */
+ /* Beware: this must remain the last, because of lua bindings. */
+ struct kr_nsrep ns;
+};
+
+/** @cond internal Array of queries. */
+typedef array_t(struct kr_query *) kr_qarray_t;
+/* @endcond */
+
+/**
+ * Query resolution plan structure.
+ *
+ * The structure most importantly holds the original query, answer and the
+ * list of pending queries required to resolve the original query.
+ * It also keeps a notion of current zone cut.
+ */
+struct kr_rplan {
+ kr_qarray_t pending; /**< List of pending queries. */
+ kr_qarray_t resolved; /**< List of resolved queries. */
+ struct kr_request *request; /**< Parent resolution request. */
+ knot_mm_t *pool; /**< Temporary memory pool. */
+ uint32_t next_uid; /**< Next value for kr_query::uid (incremental). */
+};
+
+/**
+ * Initialize resolution plan (empty).
+ * @param rplan plan instance
+ * @param request resolution request
+ * @param pool ephemeral memory pool for whole resolution
+ */
+KR_EXPORT
+int kr_rplan_init(struct kr_rplan *rplan, struct kr_request *request, knot_mm_t *pool);
+
+/**
+ * Deinitialize resolution plan, aborting any uncommited transactions.
+ * @param rplan plan instance
+ */
+KR_EXPORT
+void kr_rplan_deinit(struct kr_rplan *rplan);
+
+/**
+ * Return true if the resolution plan is empty (i.e. finished or initialized)
+ * @param rplan plan instance
+ * @return true or false
+ */
+KR_EXPORT KR_PURE
+bool kr_rplan_empty(struct kr_rplan *rplan);
+
+/**
+ * Push empty query to the top of the resolution plan.
+ * @note This query serves as a cookie query only.
+ * @param rplan plan instance
+ * @param parent query parent (or NULL)
+ * @return query instance or NULL
+ */
+KR_EXPORT
+struct kr_query *kr_rplan_push_empty(struct kr_rplan *rplan,
+ struct kr_query *parent);
+
+/**
+ * Push a query to the top of the resolution plan.
+ * @note This means that this query takes precedence before all pending queries.
+ * @param rplan plan instance
+ * @param parent query parent (or NULL)
+ * @param name resolved name
+ * @param cls resolved class
+ * @param type resolved type
+ * @return query instance or NULL
+ */
+KR_EXPORT
+struct kr_query *kr_rplan_push(struct kr_rplan *rplan, struct kr_query *parent,
+ const knot_dname_t *name, uint16_t cls, uint16_t type);
+
+/**
+ * Pop existing query from the resolution plan.
+ * @note Popped queries are not discarded, but moved to the resolved list.
+ * @param rplan plan instance
+ * @param qry resolved query
+ * @return 0 or an error
+ */
+KR_EXPORT
+int kr_rplan_pop(struct kr_rplan *rplan, struct kr_query *qry);
+
+/**
+ * Return true if resolution chain satisfies given query.
+ */
+KR_EXPORT KR_PURE
+bool kr_rplan_satisfies(struct kr_query *closure, const knot_dname_t *name, uint16_t cls, uint16_t type);
+
+/** Return last resolved query. */
+KR_EXPORT KR_PURE
+struct kr_query *kr_rplan_resolved(struct kr_rplan *rplan);
+
+/**
+ * Return last query (either currently being solved or last resolved).
+ * This is necessary to retrieve the last query in case of resolution failures (e.g. time limit reached).
+ */
+KR_EXPORT KR_PURE
+struct kr_query *kr_rplan_last(struct kr_rplan *rplan);
+
+
+/**
+ * Check if a given query already resolved.
+ * @param rplan plan instance
+ * @param parent query parent (or NULL)
+ * @param name resolved name
+ * @param cls resolved class
+ * @param type resolved type
+ * @return query instance or NULL
+ */
+KR_EXPORT KR_PURE
+struct kr_query *kr_rplan_find_resolved(struct kr_rplan *rplan, struct kr_query *parent,
+ const knot_dname_t *name, uint16_t cls, uint16_t type);
diff --git a/lib/utils.c b/lib/utils.c
new file mode 100644
index 0000000..fe9ab03
--- /dev/null
+++ b/lib/utils.c
@@ -0,0 +1,1084 @@
+/* Copyright (C) 2014-2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include <stdarg.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <arpa/inet.h>
+#include <sys/time.h>
+#include <contrib/cleanup.h>
+#include <contrib/ccan/asprintf/asprintf.h>
+#include <ucw/mempool.h>
+#include <gnutls/gnutls.h>
+#include <libknot/descriptor.h>
+#include <libknot/dname.h>
+#include <libknot/rrtype/rrsig.h>
+#include <libknot/rrset-dump.h>
+#include <libknot/version.h>
+#include <uv.h>
+
+#include "lib/defines.h"
+#include "lib/utils.h"
+#include "lib/generic/array.h"
+#include "lib/nsrep.h"
+#include "lib/module.h"
+#include "lib/resolve.h"
+
+
+/* Always compile-in log symbols, even if disabled. */
+#undef kr_verbose_status
+#undef kr_verbose_set
+#undef kr_log_verbose
+
+/* Logging & debugging */
+bool kr_verbose_status = false;
+
+void *mm_realloc(knot_mm_t *mm, void *what, size_t size, size_t prev_size)
+{
+ if (mm) {
+ void *p = mm->alloc(mm->ctx, size);
+ if (p == NULL) {
+ return NULL;
+ } else {
+ if (what) {
+ memcpy(p, what,
+ prev_size < size ? prev_size : size);
+ }
+ mm_free(mm, what);
+ return p;
+ }
+ } else {
+ return realloc(what, size);
+ }
+}
+
+void *mm_malloc(void *ctx, size_t n)
+{
+ (void)ctx;
+ return malloc(n);
+}
+
+/*
+ * Macros.
+ */
+#define strlen_safe(x) ((x) ? strlen(x) : 0)
+
+/**
+ * @internal Convert 16bit unsigned to string, keeps leading spaces.
+ * @note Always fills dst length = 5
+ * Credit: http://computer-programming-forum.com/46-asm/7aa4b50bce8dd985.htm
+ */
+static inline int u16tostr(uint8_t *dst, uint16_t num)
+{
+ uint32_t tmp = num * (((1 << 28) / 10000) + 1) - (num / 4);
+ for(size_t i = 0; i < 5; i++) {
+ dst[i] = '0' + (char) (tmp >> 28);
+ tmp = (tmp & 0x0fffffff) * 10;
+ }
+ return 5;
+}
+
+/*
+ * Cleanup callbacks.
+ */
+
+static void kres_gnutls_log(int level, const char *message)
+{
+ kr_log_verbose("[gnutls] (%d) %s", level, message);
+}
+
+bool kr_verbose_set(bool status)
+{
+#ifndef NOVERBOSELOG
+ kr_verbose_status = status;
+
+ /* gnutls logs messages related to our TLS and also libdnssec,
+ * and the logging is set up in a global way only */
+ if (status) {
+ gnutls_global_set_log_function(kres_gnutls_log);
+ }
+ gnutls_global_set_log_level(status ? 5 : 0);
+#endif
+ return kr_verbose_status;
+}
+
+void kr_log_verbose(const char *fmt, ...)
+{
+ if (kr_verbose_status) {
+ va_list args;
+ va_start(args, fmt);
+ vprintf(fmt, args);
+ va_end(args);
+ fflush(stdout);
+ }
+}
+
+void kr_log_qverbose_impl(const struct kr_query *qry, const char *cls, const char *fmt, ...)
+{
+ unsigned ind = 0;
+ for (const struct kr_query *q = qry; q; q = q->parent)
+ ind += 2;
+ uint32_t qry_uid = qry ? qry->uid : 0;
+ uint32_t req_uid = qry && qry->request ? qry->request->uid : 0;
+ /* Simplified kr_log_verbose() calls, first prefix then passed fmt...
+ * Calling it would take about the same amount of code. */
+ printf("[%05u.%02u][%s] %*s", req_uid, qry_uid, cls, ind, "");
+ va_list args;
+ va_start(args, fmt);
+ vprintf(fmt, args);
+ va_end(args);
+ fflush(stdout);
+}
+
+bool kr_log_trace(const struct kr_query *query, const char *source, const char *fmt, ...)
+{
+ if (!kr_log_trace_enabled(query)) {
+ return false;
+ }
+
+ auto_free char *msg = NULL;
+
+ va_list args;
+ va_start(args, fmt);
+ int len = vasprintf(&msg, fmt, args);
+ va_end(args);
+
+ /* Check formatting result before logging */
+ if (len < 0) {
+ return false;
+ }
+
+ query->request->trace_log(query, source, msg);
+ return true;
+}
+
+char* kr_strcatdup(unsigned n, ...)
+{
+ if (n < 1) {
+ return NULL;
+ }
+
+ /* Calculate total length */
+ size_t total_len = 0;
+ va_list vl;
+ va_start(vl, n);
+ for (unsigned i = 0; i < n; ++i) {
+ char *item = va_arg(vl, char *);
+ const size_t new_len = total_len + strlen_safe(item);
+ if (unlikely(new_len < total_len)) {
+ va_end(vl);
+ return NULL;
+ }
+ total_len = new_len;
+ }
+ va_end(vl);
+
+ /* Allocate result and fill */
+ char *result = NULL;
+ if (total_len > 0) {
+ if (unlikely(total_len + 1 == 0)) return NULL;
+ result = malloc(total_len + 1);
+ }
+ if (result) {
+ char *stream = result;
+ va_start(vl, n);
+ for (unsigned i = 0; i < n; ++i) {
+ char *item = va_arg(vl, char *);
+ if (item) {
+ size_t len = strlen(item);
+ memcpy(stream, item, len + 1);
+ stream += len;
+ }
+ }
+ va_end(vl);
+ }
+
+ return result;
+}
+
+int kr_memreserve(void *baton, char **mem, size_t elm_size, size_t want, size_t *have)
+{
+ if (*have >= want) {
+ return 0;
+ } else {
+ knot_mm_t *pool = baton;
+ size_t next_size = array_next_count(want);
+ void *mem_new = mm_alloc(pool, next_size * elm_size);
+ if (mem_new != NULL) {
+ memcpy(mem_new, *mem, (*have)*(elm_size));
+ mm_free(pool, *mem);
+ *mem = mem_new;
+ *have = next_size;
+ return 0;
+ }
+ }
+ return -1;
+}
+
+static int pkt_recycle(knot_pkt_t *pkt, bool keep_question)
+{
+ /* The maximum size of a header + query name + (class, type) */
+ uint8_t buf[KNOT_WIRE_HEADER_SIZE + KNOT_DNAME_MAXLEN + 2 * sizeof(uint16_t)];
+
+ /* Save header and the question section */
+ size_t base_size = KNOT_WIRE_HEADER_SIZE;
+ if (keep_question) {
+ base_size += knot_pkt_question_size(pkt);
+ }
+ assert(base_size <= sizeof(buf));
+ memcpy(buf, pkt->wire, base_size);
+
+ /* Clear the packet and its auxiliary structures */
+ knot_pkt_clear(pkt);
+
+ /* Restore header and question section and clear counters */
+ pkt->size = base_size;
+ memcpy(pkt->wire, buf, base_size);
+ knot_wire_set_qdcount(pkt->wire, keep_question);
+ knot_wire_set_ancount(pkt->wire, 0);
+ knot_wire_set_nscount(pkt->wire, 0);
+ knot_wire_set_arcount(pkt->wire, 0);
+
+ /* Reparse question */
+ knot_pkt_begin(pkt, KNOT_ANSWER);
+ return knot_pkt_parse_question(pkt);
+}
+
+int kr_pkt_recycle(knot_pkt_t *pkt)
+{
+ return pkt_recycle(pkt, false);
+}
+
+int kr_pkt_clear_payload(knot_pkt_t *pkt)
+{
+ return pkt_recycle(pkt, knot_wire_get_qdcount(pkt->wire));
+}
+
+int kr_pkt_put(knot_pkt_t *pkt, const knot_dname_t *name, uint32_t ttl,
+ uint16_t rclass, uint16_t rtype, const uint8_t *rdata, uint16_t rdlen)
+{
+ /* LATER(opt.): there's relatively lots of copying, but ATM kr_pkt_put()
+ * isn't considered to be used in any performance-critical parts (just lua). */
+ if (!pkt || !name) {
+ return kr_error(EINVAL);
+ }
+ /* Create empty RR */
+ knot_rrset_t rr;
+ knot_rrset_init(&rr, knot_dname_copy(name, &pkt->mm), rtype, rclass, ttl);
+ /* Create RDATA */
+ knot_rdata_t *rdata_tmp = mm_alloc(&pkt->mm, offsetof(knot_rdata_t, data) + rdlen);
+ knot_rdata_init(rdata_tmp, rdlen, rdata);
+ knot_rdataset_add(&rr.rrs, rdata_tmp, &pkt->mm);
+ mm_free(&pkt->mm, rdata_tmp); /* we're always on mempool for now, but whatever */
+ /* Append RR */
+ return knot_pkt_put(pkt, 0, &rr, KNOT_PF_FREE);
+}
+
+void kr_pkt_make_auth_header(knot_pkt_t *pkt)
+{
+ assert(pkt && pkt->wire);
+ knot_wire_clear_ad(pkt->wire);
+ knot_wire_set_aa(pkt->wire);
+}
+
+const char *kr_inaddr(const struct sockaddr *addr)
+{
+ if (!addr) {
+ return NULL;
+ }
+ switch (addr->sa_family) {
+ case AF_INET: return (const char *)&(((const struct sockaddr_in *)addr)->sin_addr);
+ case AF_INET6: return (const char *)&(((const struct sockaddr_in6 *)addr)->sin6_addr);
+ default: return NULL;
+ }
+}
+
+int kr_inaddr_family(const struct sockaddr *addr)
+{
+ if (!addr)
+ return AF_UNSPEC;
+ return addr->sa_family;
+}
+
+int kr_inaddr_len(const struct sockaddr *addr)
+{
+ if (!addr) {
+ return kr_error(EINVAL);
+ }
+ return kr_family_len(addr->sa_family);
+}
+
+int kr_sockaddr_len(const struct sockaddr *addr)
+{
+ if (!addr) {
+ return kr_error(EINVAL);
+ }
+ switch (addr->sa_family) {
+ case AF_INET: return sizeof(struct sockaddr_in);
+ case AF_INET6: return sizeof(struct sockaddr_in6);
+ default: return kr_error(EINVAL);
+ }
+}
+
+int kr_sockaddr_cmp(const struct sockaddr *left, const struct sockaddr *right)
+{
+ if (!left || !right) {
+ return kr_error(EINVAL);
+ }
+ if (left->sa_family != right->sa_family) {
+ return kr_error(EFAULT);
+ }
+ if (left->sa_family == AF_INET) {
+ struct sockaddr_in *left_in = (struct sockaddr_in *)left;
+ struct sockaddr_in *right_in = (struct sockaddr_in *)right;
+ if (left_in->sin_addr.s_addr != right_in->sin_addr.s_addr) {
+ return kr_error(EFAULT);
+ }
+ if (left_in->sin_port != right_in->sin_port) {
+ return kr_error(EFAULT);
+ }
+ } else if (left->sa_family == AF_INET6) {
+ struct sockaddr_in6 *left_in6 = (struct sockaddr_in6 *)left;
+ struct sockaddr_in6 *right_in6 = (struct sockaddr_in6 *)right;
+ if (memcmp(&left_in6->sin6_addr, &right_in6->sin6_addr,
+ sizeof(struct in6_addr)) != 0) {
+ return kr_error(EFAULT);
+ }
+ if (left_in6->sin6_port != right_in6->sin6_port) {
+ return kr_error(EFAULT);
+ }
+ } else {
+ return kr_error(ENOENT);
+ }
+ return kr_ok();
+}
+
+uint16_t kr_inaddr_port(const struct sockaddr *addr)
+{
+ if (!addr) {
+ return 0;
+ }
+ switch (addr->sa_family) {
+ case AF_INET: return ntohs(((const struct sockaddr_in *)addr)->sin_port);
+ case AF_INET6: return ntohs(((const struct sockaddr_in6 *)addr)->sin6_port);
+ default: return 0;
+ }
+}
+
+void kr_inaddr_set_port(struct sockaddr *addr, uint16_t port)
+{
+ if (!addr) {
+ return;
+ }
+ switch (addr->sa_family) {
+ case AF_INET: ((struct sockaddr_in *)addr)->sin_port = htons(port);
+ case AF_INET6: ((struct sockaddr_in6 *)addr)->sin6_port = htons(port);
+ default: break;
+ }
+}
+
+int kr_inaddr_str(const struct sockaddr *addr, char *buf, size_t *buflen)
+{
+ if (!addr || !buf || !buflen) {
+ return kr_error(EINVAL);
+ }
+
+ if (!inet_ntop(addr->sa_family, kr_inaddr(addr), buf, *buflen)) {
+ return kr_error(errno);
+ }
+ const int len = strlen(buf);
+ const int len_need = len + 1 + 5 + 1;
+ if (len_need > *buflen) {
+ *buflen = len_need;
+ return kr_error(ENOSPC);
+ }
+ *buflen = len_need;
+ buf[len] = '#';
+ u16tostr((uint8_t *)&buf[len + 1], kr_inaddr_port(addr));
+ buf[len_need - 1] = 0;
+ return kr_ok();
+}
+
+int kr_straddr_family(const char *addr)
+{
+ if (!addr) {
+ return kr_error(EINVAL);
+ }
+ if (strchr(addr, ':')) {
+ return AF_INET6;
+ }
+ return AF_INET;
+}
+
+int kr_family_len(int family)
+{
+ switch (family) {
+ case AF_INET: return sizeof(struct in_addr);
+ case AF_INET6: return sizeof(struct in6_addr);
+ default: return kr_error(EINVAL);
+ }
+}
+
+struct sockaddr * kr_straddr_socket(const char *addr, int port)
+{
+ switch (kr_straddr_family(addr)) {
+ case AF_INET: {
+ struct sockaddr_in *res = malloc(sizeof(*res));
+ if (uv_ip4_addr(addr, port, res) >= 0) {
+ return (struct sockaddr *)res;
+ } else {
+ free(res);
+ return NULL;
+ }
+ }
+ case AF_INET6: {
+ struct sockaddr_in6 *res = malloc(sizeof(*res));
+ if (uv_ip6_addr(addr, port, res) >= 0) {
+ return (struct sockaddr *)res;
+ } else {
+ free(res);
+ return NULL;
+ }
+ }
+ default:
+ return NULL;
+ }
+}
+
+int kr_straddr_subnet(void *dst, const char *addr)
+{
+ if (!dst || !addr) {
+ return kr_error(EINVAL);
+ }
+ /* Parse subnet */
+ int bit_len = 0;
+ int family = kr_straddr_family(addr);
+ auto_free char *addr_str = strdup(addr);
+ char *subnet = strchr(addr_str, '/');
+ if (subnet) {
+ *subnet = '\0';
+ subnet += 1;
+ bit_len = strtol(subnet, NULL, 10);
+ /* Check client subnet length */
+ const int max_len = (family == AF_INET6) ? 128 : 32;
+ if (bit_len < 0 || bit_len > max_len) {
+ return kr_error(ERANGE);
+ }
+ } else {
+ /* No subnet, use maximal subnet length. */
+ bit_len = (family == AF_INET6) ? 128 : 32;
+ }
+ /* Parse address */
+ int ret = inet_pton(family, addr_str, dst);
+ if (ret < 0) {
+ return kr_error(EILSEQ);
+ }
+
+ return bit_len;
+}
+
+int kr_straddr_split(const char *addr, char *buf, size_t buflen, uint16_t *port)
+{
+ const int base = 10;
+ long p = 0;
+ size_t addrlen = strlen(addr);
+ char *p_start = strchr(addr, '@');
+ char *p_end;
+
+ if (!p_start) {
+ p_start = strchr(addr, '#');
+ }
+
+ if (p_start) {
+ if (p_start[1] != '\0'){
+ p = strtol(p_start + 1, &p_end, base);
+ if (*p_end != '\0' || p <= 0 || p > UINT16_MAX) {
+ return kr_error(EINVAL);
+ }
+ }
+ addrlen = p_start - addr;
+ }
+
+ /* Check if address is valid. */
+ if (addrlen >= INET6_ADDRSTRLEN) {
+ return kr_error(EINVAL);
+ }
+
+ char str[INET6_ADDRSTRLEN];
+ struct sockaddr_storage ss;
+
+ memcpy(str, addr, addrlen); str[addrlen] = '\0';
+
+ int family = kr_straddr_family(str);
+ if (family == kr_error(EINVAL) || !inet_pton(family, str, &ss)) {
+ return kr_error(EINVAL);
+ }
+
+ /* Address and port contains valid values, return it to caller */
+ if (buf) {
+ if (addrlen >= buflen) {
+ return kr_error(ENOSPC);
+ }
+ memcpy(buf, addr, addrlen); buf[addrlen] = '\0';
+ }
+ if (port) {
+ *port = (uint16_t)p;
+ }
+
+ return kr_ok();
+}
+
+int kr_straddr_join(const char *addr, uint16_t port, char *buf, size_t *buflen)
+{
+ if (!addr || !buf || !buflen) {
+ return kr_error(EINVAL);
+ }
+
+ struct sockaddr_storage ss;
+ int family = kr_straddr_family(addr);
+ if (family == kr_error(EINVAL) || !inet_pton(family, addr, &ss)) {
+ return kr_error(EINVAL);
+ }
+
+ int len = strlen(addr);
+ if (len + 6 >= *buflen) {
+ return kr_error(ENOSPC);
+ }
+
+ memcpy(buf, addr, len + 1);
+ buf[len] = '#';
+ u16tostr((uint8_t *)&buf[len + 1], port);
+ len += 6;
+ buf[len] = 0;
+ *buflen = len;
+
+ return kr_ok();
+}
+
+int kr_bitcmp(const char *a, const char *b, int bits)
+{
+ /* We're using the function from lua directly, so at least for now
+ * we avoid crashing on bogus inputs. Meaning: NULL is ordered before
+ * anything else, and negative length is the same as zero.
+ * TODO: review the call sites and probably remove the checks. */
+ if (bits <= 0 || (!a && !b)) {
+ return 0;
+ } else if (!a) {
+ return -1;
+ } else if (!b) {
+ return 1;
+ }
+
+ assert((a && b && bits >= 0) || bits == 0);
+ /* Compare part byte-divisible part. */
+ const size_t chunk = bits / 8;
+ int ret = memcmp(a, b, chunk);
+ if (ret != 0) {
+ return ret;
+ }
+ a += chunk;
+ b += chunk;
+ bits -= chunk * 8;
+ /* Compare last partial byte address block. */
+ if (bits > 0) {
+ const size_t shift = (8 - bits);
+ ret = ((uint8_t)(*a >> shift) - (uint8_t)(*b >> shift));
+ }
+ return ret;
+}
+
+int kr_rrkey(char *key, uint16_t class, const knot_dname_t *owner,
+ uint16_t type, uint16_t additional)
+{
+ if (!key || !owner) {
+ return kr_error(EINVAL);
+ }
+ uint8_t *key_buf = (uint8_t *)key;
+ int ret = u16tostr(key_buf, class);
+ if (ret <= 0) {
+ return ret;
+ }
+ key_buf += ret;
+ ret = knot_dname_to_wire(key_buf, owner, KNOT_DNAME_MAXLEN);
+ if (ret <= 0) {
+ return ret;
+ }
+ knot_dname_to_lower(key_buf);
+ key_buf += ret - 1;
+ ret = u16tostr(key_buf, type);
+ if (ret <= 0) {
+ return ret;
+ }
+ key_buf += ret;
+ ret = u16tostr(key_buf, additional);
+ if (ret <= 0) {
+ return ret;
+ }
+ key_buf[ret] = '\0';
+ return (char *)&key_buf[ret] - key;
+}
+
+/** Return whether two RRsets match, i.e. would form the same set; see ranked_rr_array_t */
+static inline bool rrsets_match(const knot_rrset_t *rr1, const knot_rrset_t *rr2)
+{
+ bool match = rr1->type == rr2->type && rr1->rclass == rr2->rclass;
+ if (match && rr2->type == KNOT_RRTYPE_RRSIG) {
+ match = match && knot_rrsig_type_covered(rr1->rrs.rdata)
+ == knot_rrsig_type_covered(rr2->rrs.rdata);
+ }
+ match = match && knot_dname_is_equal(rr1->owner, rr2->owner);
+ return match;
+}
+
+/** Ensure that an index in a ranked array won't cause "duplicate" RRsets on wire.
+ *
+ * Other entries that would form the same RRset get to_wire = false.
+ * See also rrsets_match.
+ */
+static int to_wire_ensure_unique(ranked_rr_array_t *array, size_t index)
+{
+ bool ok = array && index < array->len;
+ if (!ok) {
+ assert(false);
+ return kr_error(EINVAL);
+ }
+
+ const struct ranked_rr_array_entry *e0 = array->at[index];
+ if (!e0->to_wire) {
+ return kr_ok();
+ }
+
+ for (ssize_t i = array->len - 1; i >= 0; --i) {
+ /* ^ iterate backwards, as the end is more likely in CPU caches */
+ struct ranked_rr_array_entry *ei = array->at[i];
+ if (ei->qry_uid == e0->qry_uid /* assumption: no duplicates within qry */
+ || !ei->to_wire /* no use for complex comparison if @to_wire */
+ ) {
+ continue;
+ }
+ if (rrsets_match(ei->rr, e0->rr)) {
+ ei->to_wire = false;
+ }
+ }
+ return kr_ok();
+}
+
+int kr_ranked_rrarray_add(ranked_rr_array_t *array, const knot_rrset_t *rr,
+ uint8_t rank, bool to_wire, uint32_t qry_uid, knot_mm_t *pool)
+{
+ /* rr always has one record per rrset
+ * check if another rrset with the same
+ * rclass/type/owner combination exists within current query
+ * and merge if needed */
+ for (ssize_t i = array->len - 1; i >= 0; --i) {
+ ranked_rr_array_entry_t *stashed = array->at[i];
+ if (stashed->yielded) {
+ break;
+ }
+ if (stashed->qry_uid != qry_uid) {
+ break;
+ }
+ if (!rrsets_match(stashed->rr, rr)) {
+ continue;
+ }
+ /* Found the entry to merge with. Check consistency and merge. */
+ bool ok = stashed->rank == rank && !stashed->cached;
+ if (!ok) {
+ assert(false);
+ return kr_error(EEXIST);
+ }
+ /* It may happen that an RRset is first considered useful
+ * (to_wire = false, e.g. due to being part of glue),
+ * and later we may find we also want it in the answer. */
+ stashed->to_wire = stashed->to_wire || to_wire;
+
+ return knot_rdataset_merge(&stashed->rr->rrs, &rr->rrs, pool);
+ }
+
+ /* No stashed rrset found, add */
+ int ret = array_reserve_mm(*array, array->len + 1, kr_memreserve, pool);
+ if (ret != 0) {
+ return kr_error(ENOMEM);
+ }
+
+ ranked_rr_array_entry_t *entry = mm_alloc(pool, sizeof(ranked_rr_array_entry_t));
+ if (!entry) {
+ return kr_error(ENOMEM);
+ }
+ knot_rrset_t *copy = knot_rrset_copy(rr, pool);
+ if (!copy) {
+ mm_free(pool, entry);
+ return kr_error(ENOMEM);
+ }
+
+ entry->qry_uid = qry_uid;
+ entry->rr = copy;
+ entry->rank = rank;
+ entry->revalidation_cnt = 0;
+ entry->cached = false;
+ entry->yielded = false;
+ entry->to_wire = to_wire;
+ if (array_push(*array, entry) < 0) {
+ /* Silence coverity. It shouldn't be possible to happen,
+ * due to the array_reserve_mm call above. */
+ mm_free(pool, entry);
+ return kr_error(ENOMEM);
+ }
+
+ return to_wire_ensure_unique(array, array->len - 1);
+}
+
+int kr_ranked_rrarray_set_wire(ranked_rr_array_t *array, bool to_wire,
+ uint32_t qry_uid, bool check_dups,
+ bool (*extraCheck)(const ranked_rr_array_entry_t *))
+{
+ for (size_t i = 0; i < array->len; ++i) {
+ ranked_rr_array_entry_t *entry = array->at[i];
+ if (entry->qry_uid != qry_uid) {
+ continue;
+ }
+ if (extraCheck != NULL && !extraCheck(entry)) {
+ continue;
+ }
+ entry->to_wire = to_wire;
+ if (check_dups) {
+ int ret = to_wire_ensure_unique(array, i);
+ if (ret) return ret;
+ }
+ }
+ return kr_ok();
+}
+
+
+static char *callprop(struct kr_module *module, const char *prop, const char *input, void *env)
+{
+ if (!module || !module->props || !prop) {
+ return NULL;
+ }
+ for (const struct kr_prop *p = module->props(); p && p->name; ++p) {
+ if (p->cb != NULL && strcmp(p->name, prop) == 0) {
+ return p->cb(env, module, input);
+ }
+ }
+ return NULL;
+}
+
+char *kr_module_call(struct kr_context *ctx, const char *module, const char *prop, const char *input)
+{
+ if (!ctx || !ctx->modules || !module || !prop) {
+ return NULL;
+ }
+ module_array_t *mod_list = ctx->modules;
+ for (size_t i = 0; i < mod_list->len; ++i) {
+ struct kr_module *mod = mod_list->at[i];
+ if (strcmp(mod->name, module) == 0) {
+ return callprop(mod, prop, input, ctx);
+ }
+ }
+ return NULL;
+}
+
+static void flags_to_str(char *dst, const knot_pkt_t *pkt, size_t maxlen)
+{
+ int offset = 0;
+ int ret = 0;
+ struct {
+ uint8_t (*get) (const uint8_t *packet);
+ char name[3];
+ } flag[7] = {
+ {knot_wire_get_qr, "qr"},
+ {knot_wire_get_aa, "aa"},
+ {knot_wire_get_rd, "rd"},
+ {knot_wire_get_ra, "ra"},
+ {knot_wire_get_tc, "tc"},
+ {knot_wire_get_ad, "ad"},
+ {knot_wire_get_cd, "cd"}
+ };
+ for (int i = 0; i < 7; ++i) {
+ if (!flag[i].get(pkt->wire)) {
+ continue;
+ }
+ ret = snprintf(dst + offset, maxlen, "%s ", flag[i].name);
+ if (ret <= 0 || ret >= maxlen) {
+ dst[0] = 0;
+ return;
+ }
+ offset += ret;
+ maxlen -= ret;
+ }
+ dst[offset] = 0;
+}
+
+static char *print_section_opt(struct mempool *mp, char *endp, const knot_rrset_t *rr, const uint8_t rcode)
+{
+ uint8_t ercode = knot_edns_get_ext_rcode(rr);
+ uint16_t ext_rcode_id = knot_edns_whole_rcode(ercode, rcode);
+ const char *ext_rcode_str = "Unused";
+ const knot_lookup_t *ext_rcode;
+
+ if (ercode > 0) {
+ ext_rcode = knot_lookup_by_id(knot_rcode_names, ext_rcode_id);
+ if (ext_rcode != NULL) {
+ ext_rcode_str = ext_rcode->name;
+ } else {
+ ext_rcode_str = "Unknown";
+ }
+ }
+
+ return mp_printf_append(mp, endp,
+ ";; EDNS PSEUDOSECTION:\n;; "
+ "Version: %u; flags: %s; UDP size: %u B; ext-rcode: %s\n\n",
+ knot_edns_get_version(rr),
+ (knot_edns_do(rr) != 0) ? "do" : "",
+ knot_edns_get_payload(rr),
+ ext_rcode_str);
+
+}
+
+char *kr_pkt_text(const knot_pkt_t *pkt)
+{
+ if (!pkt) {
+ return NULL;
+ }
+
+ struct mempool *mp = mp_new(512);
+
+ static const char * snames[] = {
+ ";; ANSWER SECTION", ";; AUTHORITY SECTION", ";; ADDITIONAL SECTION"
+ };
+ char flags[32];
+ uint8_t pkt_rcode = knot_wire_get_rcode(pkt->wire);
+ uint8_t pkt_opcode = knot_wire_get_opcode(pkt->wire);
+ const char *rcode_str = "Unknown";
+ const char *opcode_str = "Unknown";
+ const knot_lookup_t *rcode = knot_lookup_by_id(knot_rcode_names, pkt_rcode);
+ const knot_lookup_t *opcode = knot_lookup_by_id(knot_opcode_names, pkt_opcode);
+ uint16_t qry_id = knot_wire_get_id(pkt->wire);
+ uint16_t qdcount = knot_wire_get_qdcount(pkt->wire);
+
+ if (rcode != NULL) {
+ rcode_str = rcode->name;
+ }
+ if (opcode != NULL) {
+ opcode_str = opcode->name;
+ }
+ flags_to_str(flags, pkt, sizeof(flags));
+
+ char *ptr = mp_printf(mp,
+ ";; ->>HEADER<<- opcode: %s; status: %s; id: %hu\n"
+ ";; Flags: %s QUERY: %hu; ANSWER: %hu; "
+ "AUTHORITY: %hu; ADDITIONAL: %hu\n\n",
+ opcode_str, rcode_str, qry_id,
+ flags,
+ qdcount,
+ knot_wire_get_ancount(pkt->wire),
+ knot_wire_get_nscount(pkt->wire),
+ knot_wire_get_arcount(pkt->wire));
+
+ if (knot_pkt_has_edns(pkt)) {
+ ptr = print_section_opt(mp, ptr, pkt->opt_rr, knot_wire_get_rcode(pkt->wire));
+ }
+
+ if (qdcount == 1) {
+ KR_DNAME_GET_STR(qname, knot_pkt_qname(pkt));
+ KR_RRTYPE_GET_STR(rrtype, knot_pkt_qtype(pkt));
+ ptr = mp_printf_append(mp, ptr, ";; QUESTION SECTION\n%s\t\t%s\n", qname, rrtype);
+ } else if (qdcount > 1) {
+ ptr = mp_printf_append(mp, ptr, ";; Warning: unsupported QDCOUNT %hu\n", qdcount);
+ }
+
+ for (knot_section_t i = KNOT_ANSWER; i <= KNOT_ADDITIONAL; ++i) {
+ const knot_pktsection_t *sec = knot_pkt_section(pkt, i);
+ if (sec->count == 0 || knot_pkt_rr(sec, 0)->type == KNOT_RRTYPE_OPT) {
+ /* OPT RRs are _supposed_ to be the last ^^, if they appear */
+ continue;
+ }
+
+ ptr = mp_printf_append(mp, ptr, "\n%s\n", snames[i - KNOT_ANSWER]);
+ for (unsigned k = 0; k < sec->count; ++k) {
+ const knot_rrset_t *rr = knot_pkt_rr(sec, k);
+ if (rr->type == KNOT_RRTYPE_OPT) {
+ continue;
+ }
+ auto_free char *rr_text = kr_rrset_text(rr);
+ ptr = mp_printf_append(mp, ptr, "%s", rr_text);
+ }
+ }
+
+ /* Close growing buffer and duplicate result before deleting */
+ char *result = strdup(ptr);
+ mp_delete(mp);
+ return result;
+}
+
+char *kr_rrset_text(const knot_rrset_t *rr)
+{
+ if (!rr) {
+ return NULL;
+ }
+
+ /* Note: knot_rrset_txt_dump will double the size until the rrset fits */
+ size_t bufsize = 128;
+ char *buf = malloc(bufsize);
+ int ret = knot_rrset_txt_dump(rr, &buf, &bufsize, &KNOT_DUMP_STYLE_DEFAULT);
+ if (ret < 0) {
+ free(buf);
+ return NULL;
+ }
+
+ return buf;
+}
+
+uint64_t kr_now()
+{
+ return uv_now(uv_default_loop());
+}
+
+const char *kr_strptime_diff(const char *format, const char *time1_str,
+ const char *time0_str, double *diff) {
+ assert(format != NULL);
+ assert(time1_str != NULL);
+ assert(time0_str != NULL);
+ assert(diff != NULL);
+
+ struct tm time1_tm;
+ time_t time1_u;
+ struct tm time0_tm;
+ time_t time0_u;
+
+ char *err = strptime(time1_str, format, &time1_tm);
+ if (err == NULL || err != time1_str + strlen(time1_str))
+ return "strptime failed for time1";
+ time1_tm.tm_isdst = -1; /* determine if DST is active or not */
+ time1_u = mktime(&time1_tm);
+ if (time1_u == (time_t)-1)
+ return "mktime failed for time1";
+
+ err = strptime(time0_str, format, &time0_tm);
+ if (err == NULL || err != time0_str + strlen(time0_str))
+ return "strptime failed for time0";
+ time0_tm.tm_isdst = -1; /* determine if DST is active or not */
+ time0_u = mktime(&time0_tm);
+ if (time0_u == (time_t)-1)
+ return "mktime failed for time0";
+ *diff = difftime(time1_u, time0_u);
+
+ return NULL;
+}
+
+int knot_dname_lf2wire(knot_dname_t * const dst, uint8_t len, const uint8_t *lf)
+{
+ knot_dname_t *d = dst; /* moving "cursor" as we write it out */
+ bool ok = d && (len == 0 || lf);
+ if (!ok) {
+ assert(false);
+ return kr_error(EINVAL);
+ }
+ /* we allow the final zero byte to be omitted */
+ if (!len) {
+ goto finish;
+ }
+ if (lf[len - 1]) {
+ ++len;
+ }
+ /* convert the name, one label at a time */
+ int label_end = len - 1; /* index of the zero byte after the current label */
+ while (label_end >= 0) {
+ /* find label_start */
+ int i = label_end - 1;
+ while (i >= 0 && lf[i])
+ --i;
+ const int label_start = i + 1; /* index of the first byte of the current label */
+ const int label_len = label_end - label_start;
+ assert(label_len >= 0);
+ if (label_len > 63 || label_len <= 0)
+ return kr_error(EILSEQ);
+ /* write the label */
+ *d = label_len;
+ ++d;
+ memcpy(d, lf + label_start, label_len);
+ d += label_len;
+ /* next label */
+ label_end = label_start - 1;
+ }
+finish:
+ *d = 0; /* the final zero */
+ ++d;
+ return d - dst;
+}
+
+static void rnd_noerror(void *data, uint size)
+{
+ int ret = gnutls_rnd(GNUTLS_RND_NONCE, data, size);
+ if (ret) {
+ kr_log_error("gnutls_rnd(): %s\n", gnutls_strerror(ret));
+ abort();
+ }
+}
+void kr_rnd_buffered(void *data, uint size)
+{
+ /* static circular buffer, from index _begin (inclusive) to _end (exclusive) */
+ static uint8_t buf[512/8]; /* gnutls_rnd() works on blocks of 512 bits (chacha) */
+ static uint buf_begin = sizeof(buf);
+
+ if (unlikely(size > sizeof(buf))) {
+ rnd_noerror(data, size);
+ return;
+ }
+ /* Start with contiguous chunk, possibly until the end of buffer. */
+ const uint size1 = MIN(size, sizeof(buf) - buf_begin);
+ uint8_t *d = data;
+ memcpy(d, buf + buf_begin, size1);
+ if (size1 == size) {
+ buf_begin += size1;
+ return;
+ }
+ d += size1;
+ size -= size1;
+ /* Refill the whole buffer, and finish by another contiguous chunk. */
+ rnd_noerror(buf, sizeof(buf));
+ memcpy(d, buf, size);
+ buf_begin = size;
+}
+
+void kr_rrset_init(knot_rrset_t *rrset, knot_dname_t *owner,
+ uint16_t type, uint16_t rclass, uint32_t ttl)
+{
+ assert(rrset);
+ knot_rrset_init(rrset, owner, type, rclass, ttl);
+}
+uint16_t kr_pkt_qclass(const knot_pkt_t *pkt)
+{
+ return knot_pkt_qclass(pkt);
+}
+uint16_t kr_pkt_qtype(const knot_pkt_t *pkt)
+{
+ return knot_pkt_qtype(pkt);
+}
+uint32_t kr_rrsig_sig_inception(const knot_rdata_t *rdata)
+{
+ return knot_rrsig_sig_inception(rdata);
+}
+uint32_t kr_rrsig_sig_expiration(const knot_rdata_t *rdata)
+{
+ return knot_rrsig_sig_expiration(rdata);
+}
+uint16_t kr_rrsig_type_covered(const knot_rdata_t *rdata)
+{
+ return knot_rrsig_type_covered(rdata);
+}
+
diff --git a/lib/utils.h b/lib/utils.h
new file mode 100644
index 0000000..21eabac
--- /dev/null
+++ b/lib/utils.h
@@ -0,0 +1,495 @@
+/* Copyright (C) 2014-2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <netinet/in.h>
+
+#include <gnutls/gnutls.h>
+#include <gnutls/crypto.h>
+#include <lua.h>
+
+#include <libknot/libknot.h>
+#include <libknot/packet/pkt.h>
+#include <libknot/rrset.h>
+#include <libknot/rrtype/rrsig.h>
+
+#include "lib/generic/array.h"
+#include "lib/defines.h"
+
+struct kr_query;
+struct kr_request;
+
+/*
+ * Logging and debugging.
+ */
+
+/** @brief Callback for request events. */
+typedef void (*trace_callback_f)(struct kr_request *request);
+/** @brief Callback for request logging handler. */
+typedef void (*trace_log_f)(const struct kr_query *query, const char *source, const char *msg);
+
+#define kr_log_info(...) do { printf(__VA_ARGS__); fflush(stdout); } while(0)
+#define kr_log_error(...) fprintf(stderr, ## __VA_ARGS__)
+
+/* Always export these, but override direct calls by macros conditionally. */
+/** Whether in --verbose mode. Only use this for reading. */
+KR_EXPORT extern bool kr_verbose_status;
+
+/** Set --verbose mode. Not available if compiled with -DNOVERBOSELOG. */
+KR_EXPORT bool kr_verbose_set(bool status);
+
+/** Log a message if in --verbose mode. */
+KR_EXPORT KR_PRINTF(1)
+void kr_log_verbose(const char *fmt, ...);
+
+/** Utility for QRVERBOSE - use that instead. */
+KR_EXPORT KR_PRINTF(3)
+void kr_log_qverbose_impl(const struct kr_query *qry, const char *cls, const char *fmt, ...);
+
+/**
+ * @brief Return true if the query has request log handler installed.
+ */
+#define kr_log_trace_enabled(query) (__builtin_expect( \
+ (query) && (query)->request && (query)->request->trace_log, \
+ false))
+
+/**
+ * Log a message through the request log handler.
+ * @param query current query
+ * @param source message source
+ * @param fmt message format
+ * @return true if the message was logged
+ */
+KR_EXPORT KR_PRINTF(3)
+bool kr_log_trace(const struct kr_query *query, const char *source, const char *fmt, ...);
+
+#ifdef NOVERBOSELOG
+/* Efficient compile-time disabling of verbose messages. */
+#define kr_verbose_status false
+#define kr_verbose_set(x)
+#endif
+
+/** Block run in --verbose mode; optimized when not run. */
+#define VERBOSE_STATUS __builtin_expect(kr_verbose_status, false)
+#define WITH_VERBOSE(query) if(__builtin_expect(kr_verbose_status || kr_log_trace_enabled(query), false))
+#define kr_log_verbose if(VERBOSE_STATUS) kr_log_verbose
+
+#define KR_DNAME_GET_STR(dname_str, dname) \
+ char dname_str[KR_DNAME_STR_MAXLEN]; \
+ knot_dname_to_str(dname_str, (dname), sizeof(dname_str)); \
+ dname_str[sizeof(dname_str) - 1] = 0;
+
+#define KR_RRTYPE_GET_STR(rrtype_str, rrtype) \
+ char rrtype_str[KR_RRTYPE_STR_MAXLEN]; \
+ knot_rrtype_to_string((rrtype), rrtype_str, sizeof(rrtype_str)); \
+ rrtype_str[sizeof(rrtype_str) - 1] = 0;
+
+/* C11 compatibility, but without any implementation so far. */
+#ifndef static_assert
+#define static_assert(cond, msg)
+#endif
+
+/** @cond Memory alloc routines */
+static inline void *mm_alloc(knot_mm_t *mm, size_t size)
+{
+ if (mm) return mm->alloc(mm->ctx, size);
+ else return malloc(size);
+}
+static inline void mm_free(knot_mm_t *mm, const void *what)
+{
+ if (mm) {
+ if (mm->free)
+ mm->free((void *)what);
+ }
+ else free((void *)what);
+}
+
+/** Realloc implementation using memory context. */
+KR_EXPORT
+void *mm_realloc(knot_mm_t *mm, void *what, size_t size, size_t prev_size);
+
+/** Trivial malloc() wrapper. */
+void *mm_malloc(void *ctx, size_t n);
+
+/** Initialize mm with standard malloc+free. */
+static inline void mm_ctx_init(knot_mm_t *mm)
+{
+ mm->ctx = NULL;
+ mm->alloc = mm_malloc;
+ mm->free = free;
+}
+/* @endcond */
+
+/** Return time difference in miliseconds.
+ * @note based on the _BSD_SOURCE timersub() macro */
+static inline long time_diff(struct timeval *begin, struct timeval *end) {
+ struct timeval res = {
+ .tv_sec = end->tv_sec - begin->tv_sec,
+ .tv_usec = end->tv_usec - begin->tv_usec
+ };
+ if (res.tv_usec < 0) {
+ --res.tv_sec;
+ res.tv_usec += 1000000;
+ }
+ return res.tv_sec * 1000 + res.tv_usec / 1000;
+}
+
+/** @cond internal Array types */
+struct kr_context;
+
+typedef array_t(knot_rrset_t *) rr_array_t;
+struct ranked_rr_array_entry {
+ uint32_t qry_uid;
+ uint8_t rank; /**< enum kr_rank */
+ uint8_t revalidation_cnt;
+ bool cached : 1; /**< Set to true if the entry was written into cache */
+ bool yielded : 1;
+ bool to_wire : 1; /**< whether to be put into the answer */
+ bool expiring : 1; /**< low remaining TTL; see is_expiring; only used in cache ATM */
+ knot_rrset_t *rr;
+};
+typedef struct ranked_rr_array_entry ranked_rr_array_entry_t;
+
+/** Array of RRsets coming from multiple queries; for struct kr_request.
+ *
+ * Notes:
+ * - RRSIGs are only considered to form an RRset when the types covered match;
+ * cache-related code relies on that!
+ * - RRsets from the same packet (qry_uid) get merged.
+ */
+typedef array_t(ranked_rr_array_entry_t *) ranked_rr_array_t;
+/* @endcond */
+
+/** Concatenate N strings. */
+KR_EXPORT
+char* kr_strcatdup(unsigned n, ...);
+
+/** You probably want kr_rand_* convenience functions instead.
+ * This is a buffered version of gnutls_rnd(GNUTLS_RND_NONCE, ..) */
+KR_EXPORT
+void kr_rnd_buffered(void *data, unsigned int size);
+
+/** Return a few random bytes. */
+static inline uint64_t kr_rand_bytes(unsigned int size)
+{
+ uint64_t result;
+ if (size <= 0 || size > sizeof(result)) {
+ kr_log_error("kr_rand_bytes(): EINVAL\n");
+ abort();
+ }
+ uint8_t data[sizeof(result)];
+ kr_rnd_buffered(data, size);
+ /* I would have liked to dump the random data into a size_t directly,
+ * but that would work well only on little-endian machines,
+ * so intsead I hope that the compiler will optimize this out.
+ * (Tested via reading assembly from usual gcc -O2 setup.)
+ * Alternatively we could waste more rnd bytes, but that seemed worse. */
+ result = 0;
+ for (unsigned int i = 0; i < size; ++ i) {
+ result |= ((size_t)data[i]) << (i * 8);
+ }
+ return result;
+}
+
+/** Throw a pseudo-random coin, succeeding approximately with probability nomin/denomin.
+ * - low precision, only one byte of randomness (or none with extreme parameters)
+ * - tip: use !kr_rand_coin() to get the complementary probability
+ */
+static inline bool kr_rand_coin(unsigned int nomin, unsigned int denomin)
+{
+ /* This function might be called with non-constant values
+ * so we try to handle odd corner cases instead of crash. */
+ if (nomin >= denomin)
+ return true;
+ else if (nomin <= 0)
+ return false;
+
+ /* threshold = how many parts from 256 are a success */
+ unsigned int threshold = (nomin * 256 + /*rounding*/ denomin / 2) / denomin;
+ if (threshold == 0) threshold = 1;
+ if (threshold == 256) threshold = 255;
+ return (kr_rand_bytes(1) < threshold);
+}
+
+/** Memory reservation routine for knot_mm_t */
+KR_EXPORT
+int kr_memreserve(void *baton, char **mem, size_t elm_size, size_t want, size_t *have);
+
+/** @internal Fast packet reset. */
+KR_EXPORT
+int kr_pkt_recycle(knot_pkt_t *pkt);
+
+/** @internal Clear packet payload. */
+KR_EXPORT
+int kr_pkt_clear_payload(knot_pkt_t *pkt);
+
+/** Construct and put record to packet. */
+KR_EXPORT
+int kr_pkt_put(knot_pkt_t *pkt, const knot_dname_t *name, uint32_t ttl,
+ uint16_t rclass, uint16_t rtype, const uint8_t *rdata, uint16_t rdlen);
+
+/** Set packet header suitable for authoritative answer. (for policy module) */
+KR_EXPORT
+void kr_pkt_make_auth_header(knot_pkt_t *pkt);
+
+/** Simple storage for IPx address or AF_UNSPEC. */
+union inaddr {
+ struct sockaddr ip;
+ struct sockaddr_in ip4;
+ struct sockaddr_in6 ip6;
+};
+
+/** Address bytes for given family. */
+KR_EXPORT KR_PURE
+const char *kr_inaddr(const struct sockaddr *addr);
+/** Address family. */
+KR_EXPORT KR_PURE
+int kr_inaddr_family(const struct sockaddr *addr);
+/** Address length for given family, i.e. sizeof(struct in*_addr). */
+KR_EXPORT KR_PURE
+int kr_inaddr_len(const struct sockaddr *addr);
+/** Sockaddr length for given family, i.e. sizeof(struct sockaddr_in*). */
+KR_EXPORT KR_PURE
+int kr_sockaddr_len(const struct sockaddr *addr);
+/** Compare two given sockaddr.
+ * return 0 - addresses are equal, error code otherwise.
+ */
+KR_EXPORT KR_PURE
+int kr_sockaddr_cmp(const struct sockaddr *left, const struct sockaddr *right);
+/** Port. */
+KR_EXPORT KR_PURE
+uint16_t kr_inaddr_port(const struct sockaddr *addr);
+/** Set port. */
+KR_EXPORT
+void kr_inaddr_set_port(struct sockaddr *addr, uint16_t port);
+
+/** Write string representation for given address as "<addr>#<port>".
+ * \param[in] addr the raw address
+ * \param[out] buf the buffer for output string
+ * \param[in,out] buflen the available(in) and utilized(out) length, including \0 */
+KR_EXPORT
+int kr_inaddr_str(const struct sockaddr *addr, char *buf, size_t *buflen);
+
+/** Return address type for string. */
+KR_EXPORT KR_PURE
+int kr_straddr_family(const char *addr);
+/** Return address length in given family (struct in*_addr). */
+KR_EXPORT KR_CONST
+int kr_family_len(int family);
+/** Create a sockaddr* from string+port representation (also accepts IPv6 link-local). */
+KR_EXPORT
+struct sockaddr * kr_straddr_socket(const char *addr, int port);
+/** Parse address and return subnet length (bits).
+ * @warning 'dst' must be at least `sizeof(struct in6_addr)` long. */
+KR_EXPORT
+int kr_straddr_subnet(void *dst, const char *addr);
+
+/** Splits ip address specified as "addr@port" or "addr#port" into addr and port
+ * and performs validation.
+ * @note if #port part isn't present, then port will be set to 0.
+ * buf and\or port can be set to NULL.
+ * @return kr_error(EINVAL) - addr part doesn't contains valid ip address or
+ * #port part is out-of-range (either < 0 either > UINT16_MAX)
+ * kr_error(ENOSP) - buflen is too small
+ */
+KR_EXPORT
+int kr_straddr_split(const char *addr, char *buf, size_t buflen, uint16_t *port);
+/** Formats ip address and port in "addr#port" format.
+ * and performs validation.
+ * @note Port always formatted as five-character string with leading zeros.
+ * @return kr_error(EINVAL) - addr or buf is NULL or buflen is 0 or
+ * addr doesn't contain a valid ip address
+ * kr_error(ENOSP) - buflen is too small
+ */
+KR_EXPORT
+int kr_straddr_join(const char *addr, uint16_t port, char *buf, size_t *buflen);
+
+/** Compare memory bitwise. The semantics is "the same" as for memcmp().
+ * The partial byte is considered with more-significant bits first,
+ * so this is e.g. suitable for comparing IP prefixes. */
+KR_EXPORT KR_PURE
+int kr_bitcmp(const char *a, const char *b, int bits);
+
+/** @internal RR map flags. */
+static const uint8_t KEY_FLAG_RRSIG = 0x02;
+static inline uint8_t KEY_FLAG_RANK(const char *key)
+ { return ((uint8_t)(key[0])) >> 2; }
+static inline bool KEY_COVERING_RRSIG(const char *key)
+ { return ((uint8_t)(key[0])) & KEY_FLAG_RRSIG; }
+
+/* Stash key = {[5] class, [1-255] owner, [5] type, [5] additional, [1] \x00 } */
+#define KR_RRKEY_LEN (16 + KNOT_DNAME_MAXLEN)
+/** Create unique null-terminated string key for RR.
+ * @param key Destination buffer for key size, MUST be KR_RRKEY_LEN or larger.
+ * @param class RR class.
+ * @param owner RR owner name.
+ * @param type RR type.
+ * @param additional flags (for instance can be used for storing covered type
+ * when RR type is RRSIG).
+ * @return key length if successful or an error
+ * */
+KR_EXPORT
+int kr_rrkey(char *key, uint16_t class, const knot_dname_t *owner,
+ uint16_t type, uint16_t additional);
+
+/** @internal Add RRSet copy to ranked RR array. */
+KR_EXPORT
+int kr_ranked_rrarray_add(ranked_rr_array_t *array, const knot_rrset_t *rr,
+ uint8_t rank, bool to_wire, uint32_t qry_uid, knot_mm_t *pool);
+
+/** @internal Mark the RRSets from particular query as
+ * "have (not) to be recorded in the final answer".
+ * @param array RRSet array.
+ * @param to_wire Records must be\must not be recorded in final answer.
+ * @param qry_uid Query uid.
+ * @param check_dups When to_wire is true, try to avoid duplicate RRSets.
+ * @param extraCheck optional function checking whether to consider the record
+ * @return 0 or an error
+ */
+int kr_ranked_rrarray_set_wire(ranked_rr_array_t *array, bool to_wire,
+ uint32_t qry_uid, bool check_dups,
+ bool (*extraCheck)(const ranked_rr_array_entry_t *));
+
+KR_PURE
+char *kr_pkt_text(const knot_pkt_t *pkt);
+
+KR_PURE
+char *kr_rrset_text(const knot_rrset_t *rr);
+
+KR_PURE
+static inline char *kr_dname_text(const knot_dname_t *name) {
+ return knot_dname_to_str_alloc(name);
+}
+
+KR_CONST
+static inline char *kr_rrtype_text(const uint16_t rrtype) {
+ char type_str[32] = {0};
+ knot_rrtype_to_string(rrtype, type_str, sizeof(type_str));
+ return strdup(type_str);
+}
+
+/**
+ * Call module property.
+ */
+KR_EXPORT
+char *kr_module_call(struct kr_context *ctx, const char *module, const char *prop, const char *input);
+
+/** Swap two places. Note: the parameters need to be without side effects. */
+#define SWAP(x, y) do { /* http://stackoverflow.com/a/3982430/587396 */ \
+ unsigned char swap_temp[sizeof(x) == sizeof(y) ? (ssize_t)sizeof(x) : -1]; \
+ memcpy(swap_temp, &y, sizeof(x)); \
+ memcpy(&y, &x, sizeof(x)); \
+ memcpy(&x, swap_temp, sizeof(x)); \
+ } while(0)
+
+/** Return the (covered) type of an nonempty RRset. */
+static inline uint16_t kr_rrset_type_maysig(const knot_rrset_t *rr)
+{
+ assert(rr && rr->rrs.count && rr->rrs.rdata);
+ uint16_t type = rr->type;
+ if (type == KNOT_RRTYPE_RRSIG)
+ type = knot_rrsig_type_covered(rr->rrs.rdata);
+ return type;
+}
+
+/** Printf onto the lua stack, avoiding additional copy (thin wrapper). */
+KR_PRINTF(2)
+static inline const char *lua_push_printf(lua_State *L, const char *fmt, ...)
+{
+ va_list args;
+ va_start(args, fmt);
+ const char *ret = lua_pushvfstring(L, fmt, args);
+ va_end(args);
+ return ret;
+}
+
+/** @internal Return string representation of addr.
+ * @note return pointer to static string
+ */
+static inline char *kr_straddr(const struct sockaddr *addr)
+{
+ assert(addr != NULL);
+ /* We are the sinle-threaded application */
+ static char str[INET6_ADDRSTRLEN + 1 + 5 + 1];
+ size_t len = sizeof(str);
+ int ret = kr_inaddr_str(addr, str, &len);
+ return ret != kr_ok() || len == 0 ? NULL : str;
+}
+
+/** The current time in monotonic milliseconds.
+ *
+ * \note it may be outdated in case of long callbacks; see uv_now().
+ */
+KR_EXPORT
+uint64_t kr_now();
+
+/** Convert name from lookup format to wire. See knot_dname_lf
+ *
+ * \note len bytes are read and len+1 are written with *normal* LF,
+ * but it's also allowed that the final zero byte is omitted in LF.
+ * \return the number of bytes written (>0) or error code (<0)
+ */
+int knot_dname_lf2wire(knot_dname_t *dst, uint8_t len, const uint8_t *lf);
+
+/** Patched knot_dname_lf. LF for "." has length zero instead of one, for consistency.
+ * (TODO: consistency?)
+ * \param add_wildcard append the wildcard label
+ * \note packet is always NULL
+ */
+static inline int kr_dname_lf(uint8_t *dst, const knot_dname_t *src, bool add_wildcard)
+{
+ knot_dname_storage_t right_aligned_dst;
+ uint8_t *right_aligned_dname_start = knot_dname_lf(src, right_aligned_dst);
+ if (!right_aligned_dname_start) {
+ return kr_error(EINVAL);
+ }
+ int len = right_aligned_dname_start[0];
+ assert(right_aligned_dname_start + 1 + len - KNOT_DNAME_MAXLEN == right_aligned_dst);
+ memcpy(dst + 1, right_aligned_dname_start + 1, len);
+ if (add_wildcard) {
+ if (len + 2 > KNOT_DNAME_MAXLEN)
+ return kr_error(ENOSPC);
+ dst[len + 1] = '*';
+ dst[len + 2] = '\0';
+ len += 2;
+ }
+ dst[0] = len;
+ return KNOT_EOK;
+}
+
+/**
+ * Difference between two calendar times specified as strings.
+ * \param format[in] format for strptime
+ * \param diff[out] result from C difftime(time1, time0)
+ */
+KR_EXPORT
+const char *kr_strptime_diff(const char *format, const char *time1_str,
+ const char *time0_str, double *diff);
+
+/* Trivial non-inline wrappers, to be used in lua. */
+KR_EXPORT void kr_rrset_init(knot_rrset_t *rrset, knot_dname_t *owner,
+ uint16_t type, uint16_t rclass, uint32_t ttl);
+KR_EXPORT uint16_t kr_pkt_qclass(const knot_pkt_t *pkt);
+KR_EXPORT uint16_t kr_pkt_qtype(const knot_pkt_t *pkt);
+KR_EXPORT uint32_t kr_rrsig_sig_inception(const knot_rdata_t *rdata);
+KR_EXPORT uint32_t kr_rrsig_sig_expiration(const knot_rdata_t *rdata);
+KR_EXPORT uint16_t kr_rrsig_type_covered(const knot_rdata_t *rdata);
diff --git a/lib/zonecut.c b/lib/zonecut.c
new file mode 100644
index 0000000..5e54d97
--- /dev/null
+++ b/lib/zonecut.c
@@ -0,0 +1,597 @@
+/* Copyright (C) 2014-2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include "lib/zonecut.h"
+
+#include "contrib/cleanup.h"
+#include "lib/defines.h"
+#include "lib/generic/pack.h"
+#include "lib/layer.h"
+#include "lib/resolve.h"
+#include "lib/rplan.h"
+
+#include <libknot/descriptor.h>
+#include <libknot/packet/wire.h>
+#include <libknot/rrtype/rdname.h>
+
+#define VERBOSE_MSG(qry, ...) QRVERBOSE(qry, "zcut", __VA_ARGS__)
+
+/** Information for one NS name + address type. */
+typedef enum {
+ AI_UNINITED = 0,
+ AI_REPUT, /**< Don't use this addrset, due to: cache_rep, NO_IPV6, ...
+ * cache_rep approximates various problems when fetching the RRset. */
+ AI_CYCLED, /**< Skipped due to cycle detection; see implementation for details. */
+ AI_LAST_BAD = AI_CYCLED, /** bad states: <= AI_LAST_BAD */
+ AI_UNKNOWN, /**< Don't know status of this RRset; various reasons. */
+ AI_EMPTY, /**< No usable address (may mean e.g. just NODATA). */
+ AI_OK, /**< At least one usable address.
+ * LATER: we might be interested whether it's only glue. */
+} addrset_info_t;
+
+
+static void update_cut_name(struct kr_zonecut *cut, const knot_dname_t *name)
+{
+ if (knot_dname_is_equal(name, cut->name)) {
+ return;
+ }
+ knot_dname_t *next_name = knot_dname_copy(name, cut->pool);
+ mm_free(cut->pool, cut->name);
+ cut->name = next_name;
+}
+
+int kr_zonecut_init(struct kr_zonecut *cut, const knot_dname_t *name, knot_mm_t *pool)
+{
+ if (!cut || !name) {
+ return kr_error(EINVAL);
+ }
+
+ memset(cut, 0, sizeof(*cut));
+ cut->name = knot_dname_copy(name, pool);
+ cut->pool = pool;
+ cut->nsset = trie_create(pool);
+ return cut->name && cut->nsset ? kr_ok() : kr_error(ENOMEM);
+}
+
+/** Completely free a pack_t. */
+static inline void free_addr_set(pack_t *pack, knot_mm_t *pool)
+{
+ if (unlikely(!pack)) {
+ /* promised we don't store NULL packs */
+ assert(false);
+ return;
+ }
+ pack_clear_mm(*pack, mm_free, pool);
+ mm_free(pool, pack);
+}
+/** Trivial wrapper for use in trie_apply, due to ugly casting. */
+static int free_addr_set_cb(trie_val_t *v, void *pool)
+{
+ free_addr_set(*v, pool);
+ return kr_ok();
+}
+
+void kr_zonecut_deinit(struct kr_zonecut *cut)
+{
+ if (!cut) {
+ return;
+ }
+ mm_free(cut->pool, cut->name);
+ if (cut->nsset) {
+ trie_apply(cut->nsset, free_addr_set_cb, cut->pool);
+ trie_free(cut->nsset);
+ }
+ knot_rrset_free(cut->key, cut->pool);
+ knot_rrset_free(cut->trust_anchor, cut->pool);
+}
+
+void kr_zonecut_move(struct kr_zonecut *to, const struct kr_zonecut *from)
+{
+ if (!to || !from) abort();
+ kr_zonecut_deinit(to);
+ memcpy(to, from, sizeof(*to));
+}
+
+void kr_zonecut_set(struct kr_zonecut *cut, const knot_dname_t *name)
+{
+ if (!cut || !name) {
+ return;
+ }
+ knot_rrset_t *key, *ta;
+ key = cut->key; cut->key = NULL;
+ ta = cut->trust_anchor; cut->trust_anchor = NULL;
+ kr_zonecut_deinit(cut);
+ kr_zonecut_init(cut, name, cut->pool);
+ cut->key = key;
+ cut->trust_anchor = ta;
+}
+
+int kr_zonecut_copy(struct kr_zonecut *dst, const struct kr_zonecut *src)
+{
+ if (!dst || !src) {
+ return kr_error(EINVAL);
+ }
+ if (!dst->nsset) {
+ dst->nsset = trie_create(dst->pool);
+ }
+ /* Copy the contents, one by one. */
+ int ret = kr_ok();
+ trie_it_t *it;
+ for (it = trie_it_begin(src->nsset); !trie_it_finished(it); trie_it_next(it)) {
+ size_t klen;
+ const char * const k = trie_it_key(it, &klen);
+ pack_t **new_pack = (pack_t **)trie_get_ins(dst->nsset, k, klen);
+ if (!new_pack) {
+ ret = kr_error(ENOMEM);
+ break;
+ }
+ const pack_t *old_pack = *trie_it_val(it);
+ ret = pack_clone(new_pack, old_pack, dst->pool);
+ if (ret) break;
+ }
+ trie_it_free(it);
+ return ret;
+}
+
+int kr_zonecut_copy_trust(struct kr_zonecut *dst, const struct kr_zonecut *src)
+{
+ knot_rrset_t *key_copy = NULL;
+ knot_rrset_t *ta_copy = NULL;
+
+ if (src->key) {
+ key_copy = knot_rrset_copy(src->key, dst->pool);
+ if (!key_copy) {
+ return kr_error(ENOMEM);
+ }
+ }
+
+ if (src->trust_anchor) {
+ ta_copy = knot_rrset_copy(src->trust_anchor, dst->pool);
+ if (!ta_copy) {
+ knot_rrset_free(key_copy, dst->pool);
+ return kr_error(ENOMEM);
+ }
+ }
+
+ knot_rrset_free(dst->key, dst->pool);
+ dst->key = key_copy;
+ knot_rrset_free(dst->trust_anchor, dst->pool);
+ dst->trust_anchor = ta_copy;
+
+ return kr_ok();
+}
+
+int kr_zonecut_add(struct kr_zonecut *cut, const knot_dname_t *ns, const void *data, int len)
+{
+ if (!cut || !ns || !cut->nsset || (data && len <= 0)) {
+ assert(!EINVAL);
+ return kr_error(EINVAL);
+ }
+ /* Disabled; add_reverse_pair() misuses this for domain name in rdata. */
+ if (false && data && len != sizeof(struct in_addr)
+ && len != sizeof(struct in6_addr)) {
+ assert(!EINVAL);
+ return kr_error(EINVAL);
+ }
+
+ /* Get a pack_t for the ns. */
+ pack_t **pack = (pack_t **)trie_get_ins(cut->nsset, (const char *)ns, knot_dname_size(ns));
+ if (!pack) return kr_error(ENOMEM);
+ if (*pack == NULL) {
+ *pack = mm_alloc(cut->pool, sizeof(pack_t));
+ if (*pack == NULL) return kr_error(ENOMEM);
+ pack_init(**pack);
+ }
+ /* Insert data (if has any) */
+ if (data == NULL) {
+ return kr_ok();
+ }
+ /* Check for duplicates */
+ if (pack_obj_find(*pack, data, len)) {
+ return kr_ok();
+ }
+ /* Push new address */
+ int ret = pack_reserve_mm(**pack, 1, len, kr_memreserve, cut->pool);
+ if (ret != 0) {
+ return kr_error(ENOMEM);
+ }
+ return pack_obj_push(*pack, data, len);
+}
+
+int kr_zonecut_del(struct kr_zonecut *cut, const knot_dname_t *ns, const void *data, int len)
+{
+ if (!cut || !ns || (data && len <= 0)) {
+ return kr_error(EINVAL);
+ }
+
+ /* Find the address list. */
+ int ret = kr_ok();
+ pack_t *pack = kr_zonecut_find(cut, ns);
+ if (pack == NULL) {
+ return kr_error(ENOENT);
+ }
+ /* Remove address from the pack. */
+ if (data) {
+ ret = pack_obj_del(pack, data, len);
+ }
+ /* No servers left, remove NS from the set. */
+ if (pack->len == 0) {
+ free_addr_set(pack, cut->pool);
+ ret = trie_del(cut->nsset, (const char *)ns, knot_dname_size(ns), NULL);
+ assert(ret == 0); /* only KNOT_ENOENT and that *can't* happen */
+ return (ret == 0) ? kr_ok() : kr_error(ret);
+ }
+
+ return ret;
+}
+
+int kr_zonecut_del_all(struct kr_zonecut *cut, const knot_dname_t *ns)
+{
+ if (!cut || !ns) {
+ return kr_error(EINVAL);
+ }
+
+ /* Find the address list; then free and remove it. */
+ pack_t *pack;
+ int ret = trie_del(cut->nsset, (const char *)ns, knot_dname_size(ns),
+ (trie_val_t *)&pack);
+ if (ret) { /* deletion failed */
+ assert(ret == KNOT_ENOENT);
+ return kr_error(ENOENT);
+ }
+ free_addr_set(pack, cut->pool);
+ return kr_ok();
+}
+
+pack_t *kr_zonecut_find(struct kr_zonecut *cut, const knot_dname_t *ns)
+{
+ if (!cut || !ns) {
+ return NULL;
+ }
+ trie_val_t *val = trie_get_try(cut->nsset, (const char *)ns, knot_dname_size(ns));
+ /* we get pointer to the pack_t pointer */
+ return val ? (pack_t *)*val : NULL;
+}
+
+static int has_address(trie_val_t *v, void *baton_)
+{
+ const pack_t *pack = *v;
+ const bool found = pack != NULL && pack->len != 0;
+ return found;
+}
+
+bool kr_zonecut_is_empty(struct kr_zonecut *cut)
+{
+ if (!cut || !cut->nsset) {
+ assert(false);
+ return true;
+ }
+ return !trie_apply(cut->nsset, has_address, NULL);
+}
+
+int kr_zonecut_set_sbelt(struct kr_context *ctx, struct kr_zonecut *cut)
+{
+ if (!ctx || !cut || !ctx->root_hints.nsset) {
+ return kr_error(EINVAL);
+ }
+
+ trie_apply(cut->nsset, free_addr_set_cb, cut->pool);
+ trie_clear(cut->nsset);
+
+ const uint8_t *const dname_root = (const uint8_t *)/*sign-cast*/("");
+ update_cut_name(cut, dname_root);
+ /* Copy root hints from resolution context. */
+ return kr_zonecut_copy(cut, &ctx->root_hints);
+}
+
+/** Fetch address for zone cut. Any rank is accepted (i.e. glue as well). */
+static addrset_info_t fetch_addr(pack_t *addrs, const knot_dname_t *ns, uint16_t rrtype,
+ knot_mm_t *mm_pool, const struct kr_query *qry)
+// LATER(optim.): excessive data copying
+{
+ int rdlen;
+ switch (rrtype) {
+ case KNOT_RRTYPE_A:
+ rdlen = 4;
+ break;
+ case KNOT_RRTYPE_AAAA:
+ rdlen = 16;
+ break;
+ default:
+ assert(!EINVAL);
+ return AI_UNKNOWN;
+ }
+
+ struct kr_context *ctx = qry->request->ctx;
+ struct kr_cache_p peek;
+ if (kr_cache_peek_exact(&ctx->cache, ns, rrtype, &peek) != 0) {
+ return AI_UNKNOWN;
+ }
+ int32_t new_ttl = kr_cache_ttl(&peek, qry, ns, rrtype);
+ if (new_ttl < 0) {
+ return AI_UNKNOWN;
+ }
+
+ knot_rrset_t cached_rr;
+ knot_rrset_init(&cached_rr, /*const-cast*/(knot_dname_t *)ns, rrtype,
+ KNOT_CLASS_IN, new_ttl);
+ if (kr_cache_materialize(&cached_rr.rrs, &peek, mm_pool) < 0) {
+ return AI_UNKNOWN;
+ }
+
+ /* Reserve memory in *addrs. Implementation detail:
+ * pack_t cares for lengths, so we don't store those in the data. */
+ const size_t pack_extra_size = knot_rdataset_size(&cached_rr.rrs)
+ - cached_rr.rrs.count * offsetof(knot_rdata_t, len);
+ int ret = pack_reserve_mm(*addrs, cached_rr.rrs.count, pack_extra_size,
+ kr_memreserve, mm_pool);
+ if (ret) abort(); /* ENOMEM "probably" */
+
+ int usable_cnt = 0;
+ addrset_info_t result = AI_EMPTY;
+ knot_rdata_t *rd = cached_rr.rrs.rdata;
+ for (uint16_t i = 0; i < cached_rr.rrs.count; ++i, rd = knot_rdataset_next(rd)) {
+ if (unlikely(rd->len != rdlen)) {
+ VERBOSE_MSG(qry, "bad NS address length %d for rrtype %d, skipping\n",
+ (int)rd->len, (int)rrtype);
+ continue;
+ }
+ /* Check RTT cache - whether the IP is usable or not. */
+ kr_nsrep_rtt_lru_entry_t *rtt_e = ctx->cache_rtt
+ ? lru_get_try(ctx->cache_rtt, (const char *)rd->data, rd->len)
+ : NULL;
+ const bool unusable = rtt_e && rtt_e->score >= KR_NS_TIMEOUT
+ && qry->creation_time_mono
+ < rtt_e->tout_timestamp + ctx->cache_rtt_tout_retry_interval;
+ if (!unusable) {
+ result = AI_OK;
+ ++usable_cnt;
+ }
+
+ ret = pack_obj_push(addrs, rd->data, rd->len);
+ assert(!ret); /* didn't fit because of incorrectly reserved memory */
+ /* LATER: for now we lose quite some information here,
+ * as keeping it would need substantial changes on other places,
+ * and it turned out to be premature optimization (most likely).
+ * We might e.g. skip adding unusable addresses,
+ * and either keep some rtt information associated
+ * or even finish up choosing the set to send packets to.
+ * Overall there's some overlap with nsrep.c functionality.
+ */
+ }
+ if (usable_cnt != cached_rr.rrs.count) {
+ VERBOSE_MSG(qry, "usable NS addresses: %d/%d\n",
+ usable_cnt, cached_rr.rrs.count);
+ }
+ return result;
+}
+
+/** Fetch best NS for zone cut. */
+static int fetch_ns(struct kr_context *ctx, struct kr_zonecut *cut,
+ const knot_dname_t *name, const struct kr_query *qry,
+ uint8_t * restrict rank)
+{
+ struct kr_cache_p peek;
+ int ret = kr_cache_peek_exact(&ctx->cache, name, KNOT_RRTYPE_NS, &peek);
+ if (ret != 0) {
+ return ret;
+ }
+ /* Note: we accept *any* rank from the cache. We assume that nothing
+ * completely untrustworthy could get into the cache, e.g out-of-bailiwick
+ * records that weren't validated.
+ */
+ *rank = peek.rank;
+
+ int32_t new_ttl = kr_cache_ttl(&peek, qry, name, KNOT_RRTYPE_NS);
+ if (new_ttl < 0) {
+ return kr_error(ESTALE);
+ }
+ /* Materialize the rdataset temporarily, for simplicity. */
+ knot_rdataset_t ns_rds = { 0, NULL };
+ ret = kr_cache_materialize(&ns_rds, &peek, cut->pool);
+ if (ret < 0) {
+ return ret;
+ }
+
+ /* Insert name servers for this zone cut, addresses will be looked up
+ * on-demand (either from cache or iteratively) */
+ bool all_bad = true; /**< All NSs (seen so far) are in a bad state. */
+ knot_rdata_t *rdata_i = ns_rds.rdata;
+ for (unsigned i = 0; i < ns_rds.count;
+ ++i, rdata_i = knot_rdataset_next(rdata_i)) {
+ const knot_dname_t *ns_name = knot_ns_name(rdata_i);
+ const size_t ns_size = knot_dname_size(ns_name);
+
+ /* Get a new pack within the nsset. */
+ pack_t **pack = (pack_t **)trie_get_ins(cut->nsset,
+ (const char *)ns_name, ns_size);
+ if (!pack) return kr_error(ENOMEM);
+ assert(!*pack); /* not critical, really */
+ *pack = mm_alloc(cut->pool, sizeof(pack_t));
+ if (!*pack) return kr_error(ENOMEM);
+ pack_init(**pack);
+
+ addrset_info_t infos[2];
+ /* Fetch NS reputation and decide whether to prefetch A/AAAA records. */
+ unsigned *cached = lru_get_try(ctx->cache_rep,
+ (const char *)ns_name, ns_size);
+ unsigned reputation = (cached) ? *cached : 0;
+ infos[0] = (reputation & KR_NS_NOIP4) || qry->flags.NO_IPV4
+ ? AI_REPUT
+ : fetch_addr(*pack, ns_name, KNOT_RRTYPE_A, cut->pool, qry);
+ infos[1] = (reputation & KR_NS_NOIP6) || qry->flags.NO_IPV6
+ ? AI_REPUT
+ : fetch_addr(*pack, ns_name, KNOT_RRTYPE_AAAA, cut->pool, qry);
+
+ #if 0 /* rather unlikely to be useful unless changing some zcut code */
+ WITH_VERBOSE(qry) {
+ auto_free char *ns_name_txt = kr_dname_text(ns_name);
+ VERBOSE_MSG(qry, "NS %s infos: %d, %d\n",
+ ns_name_txt, (int)infos[0], (int)infos[1]);
+ }
+ #endif
+
+ /* AI_CYCLED checks.
+ * If an ancestor query has its zone cut in the state that
+ * it's looking for name or address(es) of some NS(s),
+ * we want to avoid doing so with a NS that lies under its cut.
+ * Instead we need to consider such names unusable in the cut (for now). */
+ if (infos[0] != AI_UNKNOWN && infos[1] != AI_UNKNOWN) {
+ /* Optimization: the following loop would be pointless. */
+ all_bad = false;
+ continue;
+ }
+ for (const struct kr_query *aq = qry; aq->parent; aq = aq->parent) {
+ const struct kr_qflags *aqpf = &aq->parent->flags;
+ if ( (aqpf->AWAIT_CUT && aq->stype == KNOT_RRTYPE_NS)
+ || (aqpf->AWAIT_IPV4 && aq->stype == KNOT_RRTYPE_A)
+ || (aqpf->AWAIT_IPV6 && aq->stype == KNOT_RRTYPE_AAAA)) {
+ if (knot_dname_in_bailiwick(ns_name,
+ aq->parent->zone_cut.name)) {
+ for (int j = 0; j < 2; ++j)
+ if (infos[j] == AI_UNKNOWN)
+ infos[j] = AI_CYCLED;
+ break;
+ }
+ } else {
+ /* This ancestor waits for other reason that
+ * NS name or address, so we're out of a direct cycle. */
+ break;
+ }
+ }
+ all_bad = all_bad && infos[0] <= AI_LAST_BAD && infos[1] <= AI_LAST_BAD;
+ }
+
+ if (all_bad) { WITH_VERBOSE(qry) {
+ auto_free char *name_txt = kr_dname_text(name);
+ VERBOSE_MSG(qry, "cut %s: all NSs bad, count = %d\n",
+ name_txt, (int)ns_rds.count);
+ } }
+ knot_rdataset_clear(&ns_rds, cut->pool);
+ return all_bad ? ELOOP : kr_ok();
+}
+
+/**
+ * Fetch secure RRSet of given type.
+ */
+static int fetch_secure_rrset(knot_rrset_t **rr, struct kr_cache *cache,
+ const knot_dname_t *owner, uint16_t type, knot_mm_t *pool,
+ const struct kr_query *qry)
+{
+ if (!rr) {
+ assert(!EINVAL);
+ return kr_error(EINVAL);
+ }
+ /* peek, check rank and TTL */
+ struct kr_cache_p peek;
+ int ret = kr_cache_peek_exact(cache, owner, type, &peek);
+ if (ret != 0) {
+ return ret;
+ }
+ if (!kr_rank_test(peek.rank, KR_RANK_SECURE)) {
+ return kr_error(ENOENT);
+ }
+ int32_t new_ttl = kr_cache_ttl(&peek, qry, owner, type);
+ if (new_ttl < 0) {
+ return kr_error(ESTALE);
+ }
+ /* materialize a new RRset */
+ knot_rrset_free(*rr, pool);
+ *rr = mm_alloc(pool, sizeof(knot_rrset_t));
+ if (*rr == NULL) {
+ return kr_error(ENOMEM);
+ }
+ owner = knot_dname_copy(/*const-cast*/(knot_dname_t *)owner, pool);
+ if (!owner) {
+ mm_free(pool, *rr);
+ *rr = NULL;
+ return kr_error(ENOMEM);
+ }
+ knot_rrset_init(*rr, /*const-cast*/(knot_dname_t *)owner, type,
+ KNOT_CLASS_IN, new_ttl);
+ ret = kr_cache_materialize(&(*rr)->rrs, &peek, pool);
+ if (ret < 0) {
+ knot_rrset_free(*rr, pool);
+ *rr = NULL;
+ return ret;
+ }
+
+ return kr_ok();
+}
+
+int kr_zonecut_find_cached(struct kr_context *ctx, struct kr_zonecut *cut,
+ const knot_dname_t *name, const struct kr_query *qry,
+ bool * restrict secured)
+{
+ if (!ctx || !cut || !name) {
+ //assert(false);
+ return kr_error(EINVAL);
+ }
+ /* I'm not sure whether the caller always passes a clean state;
+ * mixing doesn't seem to make sense in any case, so let's clear it.
+ * We don't bother freeing the packs, as they're on mempool. */
+ trie_clear(cut->nsset);
+ /* Copy name as it may overlap with cut name that is to be replaced. */
+ knot_dname_t *qname = knot_dname_copy(name, cut->pool);
+ if (!qname) {
+ return kr_error(ENOMEM);
+ }
+ /* Start at QNAME. */
+ int ret;
+ const knot_dname_t *label = qname;
+ while (true) {
+ /* Fetch NS first and see if it's insecure. */
+ uint8_t rank = 0;
+ const bool is_root = (label[0] == '\0');
+ ret = fetch_ns(ctx, cut, label, qry, &rank);
+ if (ret == 0) {
+ /* Flag as insecure if cached as this */
+ if (kr_rank_test(rank, KR_RANK_INSECURE)) {
+ *secured = false;
+ }
+ /* Fetch DS and DNSKEY if caller wants secure zone cut */
+ int ret_ds = 1, ret_dnskey = 1;
+ if (*secured || is_root) {
+ ret_ds = fetch_secure_rrset(&cut->trust_anchor, &ctx->cache,
+ label, KNOT_RRTYPE_DS, cut->pool, qry);
+ ret_dnskey = fetch_secure_rrset(&cut->key, &ctx->cache,
+ label, KNOT_RRTYPE_DNSKEY, cut->pool, qry);
+ }
+ update_cut_name(cut, label);
+ WITH_VERBOSE(qry) {
+ auto_free char *label_str = kr_dname_text(label);
+ VERBOSE_MSG(qry,
+ "found cut: %s (rank 0%.2o return codes: DS %d, DNSKEY %d)\n",
+ label_str, rank, ret_ds, ret_dnskey);
+ }
+ ret = kr_ok();
+ break;
+ } /* else */
+
+ trie_clear(cut->nsset);
+ /* Subtract label from QNAME. */
+ if (!is_root) {
+ label = knot_wire_next_label(label, NULL);
+ } else {
+ ret = kr_error(ENOENT);
+ break;
+ }
+ }
+
+ kr_cache_sync(&ctx->cache);
+ mm_free(cut->pool, qname);
+ return ret;
+}
diff --git a/lib/zonecut.h b/lib/zonecut.h
new file mode 100644
index 0000000..2808c66
--- /dev/null
+++ b/lib/zonecut.h
@@ -0,0 +1,173 @@
+/* Copyright (C) 2014-2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "lib/cache/api.h"
+#include "lib/defines.h"
+#include "lib/generic/pack.h"
+#include "lib/generic/trie.h"
+
+struct kr_rplan;
+struct kr_context;
+
+/**
+ * Current zone cut representation.
+*/
+struct kr_zonecut {
+ knot_dname_t *name; /**< Zone cut name. */
+ knot_rrset_t* key; /**< Zone cut DNSKEY. */
+ knot_rrset_t* trust_anchor; /**< Current trust anchor. */
+ struct kr_zonecut *parent; /**< Parent zone cut. */
+ trie_t *nsset; /**< Map of nameserver => address_set (pack_t). */
+ knot_mm_t *pool; /**< Memory pool. */
+};
+
+/**
+ * Populate root zone cut with SBELT.
+ * @param cut zone cut
+ * @param name
+ * @param pool
+ * @return 0 or error code
+ */
+KR_EXPORT
+int kr_zonecut_init(struct kr_zonecut *cut, const knot_dname_t *name, knot_mm_t *pool);
+
+/**
+ * Clear the structure and free the address set.
+ * @param cut zone cut
+ */
+KR_EXPORT
+void kr_zonecut_deinit(struct kr_zonecut *cut);
+
+/**
+ * Move a zonecut, transferring ownership of any pointed-to memory.
+ * @param to the target - it gets deinit-ed
+ * @param from the source - not modified, but shouldn't be used afterward
+ */
+KR_EXPORT
+void kr_zonecut_move(struct kr_zonecut *to, const struct kr_zonecut *from);
+
+/**
+ * Reset zone cut to given name and clear address list.
+ * @note This clears the address list even if the name doesn't change. TA and DNSKEY don't change.
+ * @param cut zone cut to be set
+ * @param name new zone cut name
+ */
+KR_EXPORT
+void kr_zonecut_set(struct kr_zonecut *cut, const knot_dname_t *name);
+
+/**
+ * Copy zone cut, including all data. Does not copy keys and trust anchor.
+ * @param dst destination zone cut
+ * @param src source zone cut
+ * @return 0 or an error code; If it fails with kr_error(ENOMEM),
+ * it may be in a half-filled state, but it's safe to deinit...
+ * @note addresses for names in `src` get replaced and others are left as they were.
+ */
+KR_EXPORT
+int kr_zonecut_copy(struct kr_zonecut *dst, const struct kr_zonecut *src);
+
+/**
+ * Copy zone trust anchor and keys.
+ * @param dst destination zone cut
+ * @param src source zone cut
+ * @return 0 or an error code
+ */
+KR_EXPORT
+int kr_zonecut_copy_trust(struct kr_zonecut *dst, const struct kr_zonecut *src);
+
+/**
+ * Add address record to the zone cut.
+ *
+ * The record will be merged with existing data,
+ * it may be either A/AAAA type.
+ *
+ * @param cut zone cut to be populated
+ * @param ns nameserver name
+ * @param data typically knot_rdata_t::data
+ * @param len typically knot_rdata_t::len
+ * @return 0 or error code
+ */
+KR_EXPORT
+int kr_zonecut_add(struct kr_zonecut *cut, const knot_dname_t *ns, const void *data, int len);
+
+/**
+ * Delete nameserver/address pair from the zone cut.
+ * @param cut
+ * @param ns name server name
+ * @param data typically knot_rdata_t::data
+ * @param len typically knot_rdata_t::len
+ * @return 0 or error code
+ */
+KR_EXPORT
+int kr_zonecut_del(struct kr_zonecut *cut, const knot_dname_t *ns, const void *data, int len);
+
+/**
+ * Delete all addresses associated with the given name.
+ * @param cut
+ * @param ns name server name
+ * @return 0 or error code
+ */
+KR_EXPORT
+int kr_zonecut_del_all(struct kr_zonecut *cut, const knot_dname_t *ns);
+
+/**
+ * Find nameserver address list in the zone cut.
+ *
+ * @note This can be used for membership test, a non-null pack is returned
+ * if the nameserver name exists.
+ *
+ * @param cut
+ * @param ns name server name
+ * @return pack of addresses or NULL
+ */
+KR_EXPORT KR_PURE
+pack_t *kr_zonecut_find(struct kr_zonecut *cut, const knot_dname_t *ns);
+
+/**
+ * Populate zone cut with a root zone using SBELT :rfc:`1034`
+ *
+ * @param ctx resolution context (to fetch root hints)
+ * @param cut zone cut to be populated
+ * @return 0 or error code
+ */
+KR_EXPORT
+int kr_zonecut_set_sbelt(struct kr_context *ctx, struct kr_zonecut *cut);
+
+/**
+ * Populate zone cut address set from cache.
+ *
+ * @param ctx resolution context (to fetch data from LRU caches)
+ * @param cut zone cut to be populated
+ * @param name QNAME to start finding zone cut for
+ * @param qry query for timestamp and stale-serving decisions
+ * @param secured set to true if want secured zone cut, will return false if it is provably insecure
+ * @return 0 or error code (ENOENT if it doesn't find anything)
+ */
+KR_EXPORT
+int kr_zonecut_find_cached(struct kr_context *ctx, struct kr_zonecut *cut,
+ const knot_dname_t *name, const struct kr_query *qry,
+ bool * restrict secured);
+/**
+ * Check if any address is present in the zone cut.
+ *
+ * @param cut zone cut to check
+ * @return true/false
+ */
+KR_EXPORT
+bool kr_zonecut_is_empty(struct kr_zonecut *cut);
+