diff options
Diffstat (limited to '')
-rw-r--r-- | lib/zonecut.c | 593 |
1 files changed, 593 insertions, 0 deletions
diff --git a/lib/zonecut.c b/lib/zonecut.c new file mode 100644 index 0000000..2bbd26f --- /dev/null +++ b/lib/zonecut.c @@ -0,0 +1,593 @@ +/* Copyright (C) CZ.NIC, z.s.p.o. <knot-resolver@labs.nic.cz> + * SPDX-License-Identifier: GPL-3.0-or-later + */ + +#include "lib/zonecut.h" + +#include "contrib/cleanup.h" +#include "lib/defines.h" +#include "lib/generic/pack.h" +#include "lib/resolve.h" +#include "lib/rplan.h" + +#include <libknot/descriptor.h> +#include <libknot/packet/wire.h> +#include <libknot/rrtype/rdname.h> + +#define VERBOSE_MSG(qry, ...) kr_log_q(qry, ZCUT, __VA_ARGS__) + +/** Information for one NS name + address type. */ +typedef enum { + AI_UNINITED = 0, + AI_DISABLED, /**< Can't use this addrset. */ + AI_CYCLED, /**< Skipped due to cycle detection; see implementation for details. */ + AI_LAST_BAD = AI_CYCLED, /** bad states: <= AI_LAST_BAD */ + AI_UNKNOWN, /**< Don't know status of this RRset; various reasons. */ + AI_EMPTY, /**< No usable address (may mean e.g. just NODATA). */ + AI_OK, /**< At least one usable address. + * LATER: we might be interested whether it's only glue. */ +} addrset_info_t; + + +static void update_cut_name(struct kr_zonecut *cut, const knot_dname_t *name) +{ + if (knot_dname_is_equal(name, cut->name)) { + return; + } + knot_dname_t *next_name = knot_dname_copy(name, cut->pool); + mm_free(cut->pool, cut->name); + cut->name = next_name; +} + +int kr_zonecut_init(struct kr_zonecut *cut, const knot_dname_t *name, knot_mm_t *pool) +{ + if (!cut || !name) { + return kr_error(EINVAL); + } + + memset(cut, 0, sizeof(*cut)); + cut->name = knot_dname_copy(name, pool); + cut->pool = pool; + cut->nsset = trie_create(pool); + return cut->name && cut->nsset ? kr_ok() : kr_error(ENOMEM); +} + +/** Completely free a pack_t. */ +static inline void free_addr_set(pack_t *pack, knot_mm_t *pool) +{ + if (kr_fails_assert(pack)) { + /* promised we don't store NULL packs */ + return; + } + pack_clear_mm(*pack, mm_free, pool); + mm_free(pool, pack); +} +/** Trivial wrapper for use in trie_apply, due to ugly casting. */ +static int free_addr_set_cb(trie_val_t *v, void *pool) +{ + free_addr_set(*v, pool); + return kr_ok(); +} + +void kr_zonecut_deinit(struct kr_zonecut *cut) +{ + if (!cut) { + return; + } + mm_free(cut->pool, cut->name); + if (cut->nsset) { + trie_apply(cut->nsset, free_addr_set_cb, cut->pool); + trie_free(cut->nsset); + } + knot_rrset_free(cut->key, cut->pool); + knot_rrset_free(cut->trust_anchor, cut->pool); +} + +void kr_zonecut_move(struct kr_zonecut *to, const struct kr_zonecut *from) +{ + kr_require(to && from); + kr_zonecut_deinit(to); + memcpy(to, from, sizeof(*to)); +} + +void kr_zonecut_set(struct kr_zonecut *cut, const knot_dname_t *name) +{ + if (!cut || !name) { + return; + } + knot_rrset_t *key, *ta; + key = cut->key; cut->key = NULL; + ta = cut->trust_anchor; cut->trust_anchor = NULL; + kr_zonecut_deinit(cut); + kr_zonecut_init(cut, name, cut->pool); + cut->key = key; + cut->trust_anchor = ta; +} + +int kr_zonecut_copy(struct kr_zonecut *dst, const struct kr_zonecut *src) +{ + if (!dst || !src) { + return kr_error(EINVAL); + } + if (!dst->nsset) { + dst->nsset = trie_create(dst->pool); + } + /* Copy the contents, one by one. */ + int ret = kr_ok(); + trie_it_t *it; + for (it = trie_it_begin(src->nsset); !trie_it_finished(it); trie_it_next(it)) { + size_t klen; + const char * const k = trie_it_key(it, &klen); + pack_t **new_pack = (pack_t **)trie_get_ins(dst->nsset, k, klen); + if (!new_pack) { + ret = kr_error(ENOMEM); + break; + } + const pack_t *old_pack = *trie_it_val(it); + ret = pack_clone(new_pack, old_pack, dst->pool); + if (ret) break; + } + trie_it_free(it); + return ret; +} + +int kr_zonecut_copy_trust(struct kr_zonecut *dst, const struct kr_zonecut *src) +{ + knot_rrset_t *key_copy = NULL; + knot_rrset_t *ta_copy = NULL; + + if (src->key) { + key_copy = knot_rrset_copy(src->key, dst->pool); + if (!key_copy) { + return kr_error(ENOMEM); + } + } + + if (src->trust_anchor) { + ta_copy = knot_rrset_copy(src->trust_anchor, dst->pool); + if (!ta_copy) { + knot_rrset_free(key_copy, dst->pool); + return kr_error(ENOMEM); + } + } + + knot_rrset_free(dst->key, dst->pool); + dst->key = key_copy; + knot_rrset_free(dst->trust_anchor, dst->pool); + dst->trust_anchor = ta_copy; + + return kr_ok(); +} + +int kr_zonecut_add(struct kr_zonecut *cut, const knot_dname_t *ns, const void *data, int len) +{ + if (kr_fails_assert(cut && ns && cut->nsset && (!data || len > 0))) + return kr_error(EINVAL); + /* Disabled; add_reverse_pair() misuses this for domain name in rdata. */ + if (false && data && len != sizeof(struct in_addr) + && len != sizeof(struct in6_addr)) { + kr_assert(!EINVAL); + return kr_error(EINVAL); + } + + /* Get a pack_t for the ns. */ + pack_t **pack = (pack_t **)trie_get_ins(cut->nsset, (const char *)ns, knot_dname_size(ns)); + if (!pack) return kr_error(ENOMEM); + if (*pack == NULL) { + *pack = mm_alloc(cut->pool, sizeof(pack_t)); + if (*pack == NULL) return kr_error(ENOMEM); + pack_init(**pack); + } + /* Insert data (if has any) */ + if (data == NULL) { + return kr_ok(); + } + /* Check for duplicates */ + if (pack_obj_find(*pack, data, len)) { + return kr_ok(); + } + /* Push new address */ + int ret = pack_reserve_mm(**pack, 1, len, kr_memreserve, cut->pool); + if (ret != 0) { + return kr_error(ENOMEM); + } + return pack_obj_push(*pack, data, len); +} + +int kr_zonecut_del(struct kr_zonecut *cut, const knot_dname_t *ns, const void *data, int len) +{ + if (!cut || !ns || (data && len <= 0)) { + return kr_error(EINVAL); + } + + /* Find the address list. */ + int ret = kr_ok(); + pack_t *pack = kr_zonecut_find(cut, ns); + if (pack == NULL) { + return kr_error(ENOENT); + } + /* Remove address from the pack. */ + if (data) { + ret = pack_obj_del(pack, data, len); + } + /* No servers left, remove NS from the set. */ + if (pack->len == 0) { + free_addr_set(pack, cut->pool); + ret = trie_del(cut->nsset, (const char *)ns, knot_dname_size(ns), NULL); + if (kr_fails_assert(ret == 0)) /* only KNOT_ENOENT and that *can't* happen */ + return kr_error(ret); + return kr_ok(); + } + + return ret; +} + +int kr_zonecut_del_all(struct kr_zonecut *cut, const knot_dname_t *ns) +{ + if (!cut || !ns) { + return kr_error(EINVAL); + } + + /* Find the address list; then free and remove it. */ + pack_t *pack; + int ret = trie_del(cut->nsset, (const char *)ns, knot_dname_size(ns), + (trie_val_t *)&pack); + if (ret) { /* deletion failed */ + kr_assert(ret == KNOT_ENOENT); + return kr_error(ENOENT); + } + free_addr_set(pack, cut->pool); + return kr_ok(); +} + +pack_t *kr_zonecut_find(struct kr_zonecut *cut, const knot_dname_t *ns) +{ + if (!cut || !ns) { + return NULL; + } + trie_val_t *val = trie_get_try(cut->nsset, (const char *)ns, knot_dname_size(ns)); + /* we get pointer to the pack_t pointer */ + return val ? (pack_t *)*val : NULL; +} + +static int has_address(trie_val_t *v, void *baton_) +{ + const pack_t *pack = *v; + const bool found = pack != NULL && pack->len != 0; + return found; +} + +bool kr_zonecut_is_empty(struct kr_zonecut *cut) +{ + if (kr_fails_assert(cut && cut->nsset)) + return true; + return !trie_apply(cut->nsset, has_address, NULL); +} + +int kr_zonecut_set_sbelt(struct kr_context *ctx, struct kr_zonecut *cut) +{ + if (!ctx || !cut || !ctx->root_hints.nsset) { + return kr_error(EINVAL); + } + + trie_apply(cut->nsset, free_addr_set_cb, cut->pool); + trie_clear(cut->nsset); + + const uint8_t *const dname_root = (const uint8_t *)/*sign-cast*/(""); + update_cut_name(cut, dname_root); + /* Copy root hints from resolution context. */ + return kr_zonecut_copy(cut, &ctx->root_hints); +} + +/** Fetch address for zone cut. Any rank is accepted (i.e. glue as well). */ +static addrset_info_t fetch_addr(pack_t *addrs, const knot_dname_t *ns, uint16_t rrtype, + int *addr_budget, + knot_mm_t *mm_pool, const struct kr_query *qry) +// LATER(optim.): excessive data copying +{ + int rdlen; + switch (rrtype) { + case KNOT_RRTYPE_A: + if (qry->flags.NO_IPV4) + return AI_DISABLED; + rdlen = 4; + break; + case KNOT_RRTYPE_AAAA: + if (qry->flags.NO_IPV6 || no6_is_bad()) + return AI_DISABLED; + rdlen = 16; + break; + default: + kr_assert(!EINVAL); + return AI_UNKNOWN; + } + + struct kr_context *ctx = qry->request->ctx; + struct kr_cache_p peek; + if (kr_cache_peek_exact(&ctx->cache, ns, rrtype, &peek) != 0) { + return AI_UNKNOWN; + } + int32_t new_ttl = kr_cache_ttl(&peek, qry, ns, rrtype); + if (new_ttl < 0) { + return AI_UNKNOWN; + } + + knot_rrset_t cached_rr; + knot_rrset_init(&cached_rr, /*const-cast*/(knot_dname_t *)ns, rrtype, + KNOT_CLASS_IN, new_ttl); + if (kr_cache_materialize(&cached_rr.rrs, &peek, mm_pool) < 0) { + return AI_UNKNOWN; + } + + *addr_budget -= cached_rr.rrs.count - 1; + if (*addr_budget < 0) { + cached_rr.rrs.count += *addr_budget; + *addr_budget = 0; + } + + /* Reserve memory in *addrs. Implementation detail: + * pack_t cares for lengths, so we don't store those in the data. */ + const size_t pack_extra_size = cached_rr.rrs.size + - cached_rr.rrs.count * offsetof(knot_rdata_t, len); + int ret = pack_reserve_mm(*addrs, cached_rr.rrs.count, pack_extra_size, + kr_memreserve, mm_pool); + kr_require(ret == 0); /* ENOMEM "probably" */ + + int usable_cnt = 0; + addrset_info_t result = AI_EMPTY; + knot_rdata_t *rd = cached_rr.rrs.rdata; + for (uint16_t i = 0; i < cached_rr.rrs.count; ++i, rd = knot_rdataset_next(rd)) { + if (unlikely(rd->len != rdlen)) { + VERBOSE_MSG(qry, "bad NS address length %d for rrtype %d, skipping\n", + (int)rd->len, (int)rrtype); + continue; + } + result = AI_OK; + ++usable_cnt; + + ret = pack_obj_push(addrs, rd->data, rd->len); + kr_assert(!ret); /* didn't fit because of incorrectly reserved memory */ + /* LATER: for now we lose quite some information here, + * as keeping it would need substantial changes on other places, + * and it turned out to be premature optimization (most likely). + * We might e.g. skip adding unusable addresses, + * and either keep some rtt information associated + * or even finish up choosing the set to send packets to. + * Overall there's some overlap with nsrep.c functionality. + */ + } + if (usable_cnt != cached_rr.rrs.count) { + VERBOSE_MSG(qry, "usable NS addresses: %d/%d\n", + usable_cnt, cached_rr.rrs.count); + } + return result; +} + +/** Fetch best NS for zone cut. */ +static int fetch_ns(struct kr_context *ctx, struct kr_zonecut *cut, + const knot_dname_t *name, const struct kr_query *qry, + uint8_t * restrict rank) +{ + struct kr_cache_p peek; + int ret = kr_cache_peek_exact(&ctx->cache, name, KNOT_RRTYPE_NS, &peek); + if (ret != 0) { + return ret; + } + /* Note: we accept *any* rank from the cache. We assume that nothing + * completely untrustworthy could get into the cache, e.g out-of-bailiwick + * records that weren't validated. + */ + *rank = peek.rank; + + int32_t new_ttl = kr_cache_ttl(&peek, qry, name, KNOT_RRTYPE_NS); + if (new_ttl < 0) { + return kr_error(ESTALE); + } + /* Materialize the rdataset temporarily, for simplicity. */ + knot_rdataset_t ns_rds = { 0 }; + ret = kr_cache_materialize(&ns_rds, &peek, cut->pool); + if (ret < 0) { + return ret; + } + + /* Consider at most 13 first NSs (like root). It's a trivial approach + * to limit our resources when choosing NSs. Otherwise DoS might be viable. + * We're not aware of any reasonable use case for having many NSs. */ + if (ns_rds.count > 13) { + if (kr_log_is_debug_qry(ZCUT, qry)) { + auto_free char *name_txt = kr_dname_text(name); + VERBOSE_MSG(qry, "NS %s too large, reducing from %d names\n", + name_txt, (int)ns_rds.count); + } + ns_rds.count = 13; + } + /* Also trivially limit the total address count: + * first A and first AAAA are for free per NS, + * but the rest get a shared small limit and get skipped if exhausted. */ + int addr_budget = 8; + + /* Insert name servers for this zone cut, addresses will be looked up + * on-demand (either from cache or iteratively) */ + bool all_bad = true; /**< All NSs (seen so far) are in a bad state. */ + knot_rdata_t *rdata_i = ns_rds.rdata; + for (unsigned i = 0; i < ns_rds.count; + ++i, rdata_i = knot_rdataset_next(rdata_i)) { + const knot_dname_t *ns_name = knot_ns_name(rdata_i); + const size_t ns_size = knot_dname_size(ns_name); + + /* Get a new pack within the nsset. */ + pack_t **pack = (pack_t **)trie_get_ins(cut->nsset, + (const char *)ns_name, ns_size); + if (!pack) return kr_error(ENOMEM); + kr_assert(!*pack); /* not critical, really */ + *pack = mm_alloc(cut->pool, sizeof(pack_t)); + if (!*pack) return kr_error(ENOMEM); + pack_init(**pack); + + addrset_info_t infos[2]; + + /* Fetch NS reputation and decide whether to prefetch A/AAAA records. */ + infos[0] = fetch_addr(*pack, ns_name, KNOT_RRTYPE_A, &addr_budget, + cut->pool, qry); + infos[1] = fetch_addr(*pack, ns_name, KNOT_RRTYPE_AAAA, &addr_budget, + cut->pool, qry); + + #if 0 /* rather unlikely to be useful unless changing some zcut code */ + if (kr_log_is_debug_qry(ZCUT, qry)) { + auto_free char *ns_name_txt = kr_dname_text(ns_name); + VERBOSE_MSG(qry, "NS %s infos: %d, %d\n", + ns_name_txt, (int)infos[0], (int)infos[1]); + } + #endif + + /* AI_CYCLED checks. + * If an ancestor query has its zone cut in the state that + * it's looking for name or address(es) of some NS(s), + * we want to avoid doing so with a NS that lies under its cut. + * Instead we need to consider such names unusable in the cut (for now). */ + if (infos[0] != AI_UNKNOWN && infos[1] != AI_UNKNOWN) { + /* Optimization: the following loop would be pointless. */ + all_bad = false; + continue; + } + for (const struct kr_query *aq = qry; aq->parent; aq = aq->parent) { + const struct kr_qflags *aqpf = &aq->parent->flags; + if ( (aqpf->AWAIT_CUT && aq->stype == KNOT_RRTYPE_NS) + || (aqpf->AWAIT_IPV4 && aq->stype == KNOT_RRTYPE_A) + || (aqpf->AWAIT_IPV6 && aq->stype == KNOT_RRTYPE_AAAA)) { + if (knot_dname_in_bailiwick(ns_name, + aq->parent->zone_cut.name)) { + for (int j = 0; j < 2; ++j) + if (infos[j] == AI_UNKNOWN) + infos[j] = AI_CYCLED; + break; + } + } else { + /* This ancestor waits for other reason that + * NS name or address, so we're out of a direct cycle. */ + break; + } + } + all_bad = all_bad && infos[0] <= AI_LAST_BAD && infos[1] <= AI_LAST_BAD; + } + + if (all_bad && kr_log_is_debug_qry(ZCUT, qry)) { + auto_free char *name_txt = kr_dname_text(name); + VERBOSE_MSG(qry, "cut %s: all NSs bad, count = %d\n", + name_txt, (int)ns_rds.count); + } + + kr_assert(addr_budget >= 0); + if (addr_budget <= 0 && kr_log_is_debug_qry(ZCUT, qry)) { + auto_free char *name_txt = kr_dname_text(name); + VERBOSE_MSG(qry, "NS %s have too many addresses together, reduced\n", + name_txt); + } + + knot_rdataset_clear(&ns_rds, cut->pool); + return all_bad ? ELOOP : kr_ok(); +} + +/** + * Fetch secure RRSet of given type. + */ +static int fetch_secure_rrset(knot_rrset_t **rr, struct kr_cache *cache, + const knot_dname_t *owner, uint16_t type, knot_mm_t *pool, + const struct kr_query *qry) +{ + if (kr_fails_assert(rr)) + return kr_error(EINVAL); + /* peek, check rank and TTL */ + struct kr_cache_p peek; + int ret = kr_cache_peek_exact(cache, owner, type, &peek); + if (ret != 0) + return ret; + if (!kr_rank_test(peek.rank, KR_RANK_SECURE)) + return kr_error(ENOENT); + int32_t new_ttl = kr_cache_ttl(&peek, qry, owner, type); + if (new_ttl < 0) + return kr_error(ESTALE); + /* materialize a new RRset */ + knot_rrset_free(*rr, pool); + *rr = mm_alloc(pool, sizeof(knot_rrset_t)); + if (*rr == NULL) + return kr_error(ENOMEM); + owner = knot_dname_copy(/*const-cast*/(knot_dname_t *)owner, pool); + if (!owner) { + mm_free(pool, *rr); + *rr = NULL; + return kr_error(ENOMEM); + } + knot_rrset_init(*rr, /*const-cast*/(knot_dname_t *)owner, type, + KNOT_CLASS_IN, new_ttl); + ret = kr_cache_materialize(&(*rr)->rrs, &peek, pool); + if (ret < 0) { + knot_rrset_free(*rr, pool); + *rr = NULL; + return ret; + } + + return kr_ok(); +} + +int kr_zonecut_find_cached(struct kr_context *ctx, struct kr_zonecut *cut, + const knot_dname_t *name, const struct kr_query *qry, + bool * restrict secured) +{ + if (!ctx || !cut || !name) + return kr_error(EINVAL); + /* I'm not sure whether the caller always passes a clean state; + * mixing doesn't seem to make sense in any case, so let's clear it. + * We don't bother freeing the packs, as they're on mempool. */ + trie_clear(cut->nsset); + /* Copy name as it may overlap with cut name that is to be replaced. */ + knot_dname_t *qname = knot_dname_copy(name, cut->pool); + if (!qname) { + return kr_error(ENOMEM); + } + /* Start at QNAME. */ + int ret; + const knot_dname_t *label = qname; + while (true) { + /* Fetch NS first and see if it's insecure. */ + uint8_t rank = 0; + const bool is_root = (label[0] == '\0'); + ret = fetch_ns(ctx, cut, label, qry, &rank); + if (ret == 0) { + /* Flag as insecure if cached as this */ + if (kr_rank_test(rank, KR_RANK_INSECURE)) { + *secured = false; + } + /* Fetch DS and DNSKEY if caller wants secure zone cut */ + int ret_ds = 1, ret_dnskey = 1; + if (*secured || is_root) { + ret_ds = fetch_secure_rrset(&cut->trust_anchor, &ctx->cache, + label, KNOT_RRTYPE_DS, cut->pool, qry); + ret_dnskey = fetch_secure_rrset(&cut->key, &ctx->cache, + label, KNOT_RRTYPE_DNSKEY, cut->pool, qry); + } + update_cut_name(cut, label); + if (kr_log_is_debug_qry(ZCUT, qry)) { + auto_free char *label_str = kr_dname_text(label); + VERBOSE_MSG(qry, + "found cut: %s (rank 0%.2o return codes: DS %d, DNSKEY %d)\n", + label_str, rank, ret_ds, ret_dnskey); + } + ret = kr_ok(); + break; + } /* else */ + + trie_clear(cut->nsset); + /* Subtract label from QNAME. */ + if (!is_root) { + label = knot_wire_next_label(label, NULL); + } else { + ret = kr_error(ENOENT); + break; + } + } + + kr_cache_commit(&ctx->cache); + mm_free(cut->pool, qname); + return ret; +} |