1
0
Fork 0
knot-resolver/lib/zonecut.c
Daniel Baumann fbc604e215
Adding upstream version 5.7.5.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
2025-06-21 13:56:17 +02:00

593 lines
17 KiB
C

/* Copyright (C) CZ.NIC, z.s.p.o. <knot-resolver@labs.nic.cz>
* SPDX-License-Identifier: GPL-3.0-or-later
*/
#include "lib/zonecut.h"
#include "contrib/cleanup.h"
#include "lib/defines.h"
#include "lib/generic/pack.h"
#include "lib/resolve.h"
#include "lib/rplan.h"
#include <libknot/descriptor.h>
#include <libknot/packet/wire.h>
#include <libknot/rrtype/rdname.h>
#define VERBOSE_MSG(qry, ...) kr_log_q(qry, ZCUT, __VA_ARGS__)
/** Information for one NS name + address type. */
typedef enum {
AI_UNINITED = 0,
AI_DISABLED, /**< Can't use this addrset. */
AI_CYCLED, /**< Skipped due to cycle detection; see implementation for details. */
AI_LAST_BAD = AI_CYCLED, /** bad states: <= AI_LAST_BAD */
AI_UNKNOWN, /**< Don't know status of this RRset; various reasons. */
AI_EMPTY, /**< No usable address (may mean e.g. just NODATA). */
AI_OK, /**< At least one usable address.
* LATER: we might be interested whether it's only glue. */
} addrset_info_t;
static void update_cut_name(struct kr_zonecut *cut, const knot_dname_t *name)
{
if (knot_dname_is_equal(name, cut->name)) {
return;
}
knot_dname_t *next_name = knot_dname_copy(name, cut->pool);
mm_free(cut->pool, cut->name);
cut->name = next_name;
}
int kr_zonecut_init(struct kr_zonecut *cut, const knot_dname_t *name, knot_mm_t *pool)
{
if (!cut || !name) {
return kr_error(EINVAL);
}
memset(cut, 0, sizeof(*cut));
cut->name = knot_dname_copy(name, pool);
cut->pool = pool;
cut->nsset = trie_create(pool);
return cut->name && cut->nsset ? kr_ok() : kr_error(ENOMEM);
}
/** Completely free a pack_t. */
static inline void free_addr_set(pack_t *pack, knot_mm_t *pool)
{
if (kr_fails_assert(pack)) {
/* promised we don't store NULL packs */
return;
}
pack_clear_mm(*pack, mm_free, pool);
mm_free(pool, pack);
}
/** Trivial wrapper for use in trie_apply, due to ugly casting. */
static int free_addr_set_cb(trie_val_t *v, void *pool)
{
free_addr_set(*v, pool);
return kr_ok();
}
void kr_zonecut_deinit(struct kr_zonecut *cut)
{
if (!cut) {
return;
}
mm_free(cut->pool, cut->name);
if (cut->nsset) {
trie_apply(cut->nsset, free_addr_set_cb, cut->pool);
trie_free(cut->nsset);
}
knot_rrset_free(cut->key, cut->pool);
knot_rrset_free(cut->trust_anchor, cut->pool);
}
void kr_zonecut_move(struct kr_zonecut *to, const struct kr_zonecut *from)
{
kr_require(to && from);
kr_zonecut_deinit(to);
memcpy(to, from, sizeof(*to));
}
void kr_zonecut_set(struct kr_zonecut *cut, const knot_dname_t *name)
{
if (!cut || !name) {
return;
}
knot_rrset_t *key, *ta;
key = cut->key; cut->key = NULL;
ta = cut->trust_anchor; cut->trust_anchor = NULL;
kr_zonecut_deinit(cut);
kr_zonecut_init(cut, name, cut->pool);
cut->key = key;
cut->trust_anchor = ta;
}
int kr_zonecut_copy(struct kr_zonecut *dst, const struct kr_zonecut *src)
{
if (!dst || !src) {
return kr_error(EINVAL);
}
if (!dst->nsset) {
dst->nsset = trie_create(dst->pool);
}
/* Copy the contents, one by one. */
int ret = kr_ok();
trie_it_t *it;
for (it = trie_it_begin(src->nsset); !trie_it_finished(it); trie_it_next(it)) {
size_t klen;
const char * const k = trie_it_key(it, &klen);
pack_t **new_pack = (pack_t **)trie_get_ins(dst->nsset, k, klen);
if (!new_pack) {
ret = kr_error(ENOMEM);
break;
}
const pack_t *old_pack = *trie_it_val(it);
ret = pack_clone(new_pack, old_pack, dst->pool);
if (ret) break;
}
trie_it_free(it);
return ret;
}
int kr_zonecut_copy_trust(struct kr_zonecut *dst, const struct kr_zonecut *src)
{
knot_rrset_t *key_copy = NULL;
knot_rrset_t *ta_copy = NULL;
if (src->key) {
key_copy = knot_rrset_copy(src->key, dst->pool);
if (!key_copy) {
return kr_error(ENOMEM);
}
}
if (src->trust_anchor) {
ta_copy = knot_rrset_copy(src->trust_anchor, dst->pool);
if (!ta_copy) {
knot_rrset_free(key_copy, dst->pool);
return kr_error(ENOMEM);
}
}
knot_rrset_free(dst->key, dst->pool);
dst->key = key_copy;
knot_rrset_free(dst->trust_anchor, dst->pool);
dst->trust_anchor = ta_copy;
return kr_ok();
}
int kr_zonecut_add(struct kr_zonecut *cut, const knot_dname_t *ns, const void *data, int len)
{
if (kr_fails_assert(cut && ns && cut->nsset && (!data || len > 0)))
return kr_error(EINVAL);
/* Disabled; add_reverse_pair() misuses this for domain name in rdata. */
if (false && data && len != sizeof(struct in_addr)
&& len != sizeof(struct in6_addr)) {
kr_assert(!EINVAL);
return kr_error(EINVAL);
}
/* Get a pack_t for the ns. */
pack_t **pack = (pack_t **)trie_get_ins(cut->nsset, (const char *)ns, knot_dname_size(ns));
if (!pack) return kr_error(ENOMEM);
if (*pack == NULL) {
*pack = mm_alloc(cut->pool, sizeof(pack_t));
if (*pack == NULL) return kr_error(ENOMEM);
pack_init(**pack);
}
/* Insert data (if has any) */
if (data == NULL) {
return kr_ok();
}
/* Check for duplicates */
if (pack_obj_find(*pack, data, len)) {
return kr_ok();
}
/* Push new address */
int ret = pack_reserve_mm(**pack, 1, len, kr_memreserve, cut->pool);
if (ret != 0) {
return kr_error(ENOMEM);
}
return pack_obj_push(*pack, data, len);
}
int kr_zonecut_del(struct kr_zonecut *cut, const knot_dname_t *ns, const void *data, int len)
{
if (!cut || !ns || (data && len <= 0)) {
return kr_error(EINVAL);
}
/* Find the address list. */
int ret = kr_ok();
pack_t *pack = kr_zonecut_find(cut, ns);
if (pack == NULL) {
return kr_error(ENOENT);
}
/* Remove address from the pack. */
if (data) {
ret = pack_obj_del(pack, data, len);
}
/* No servers left, remove NS from the set. */
if (pack->len == 0) {
free_addr_set(pack, cut->pool);
ret = trie_del(cut->nsset, (const char *)ns, knot_dname_size(ns), NULL);
if (kr_fails_assert(ret == 0)) /* only KNOT_ENOENT and that *can't* happen */
return kr_error(ret);
return kr_ok();
}
return ret;
}
int kr_zonecut_del_all(struct kr_zonecut *cut, const knot_dname_t *ns)
{
if (!cut || !ns) {
return kr_error(EINVAL);
}
/* Find the address list; then free and remove it. */
pack_t *pack;
int ret = trie_del(cut->nsset, (const char *)ns, knot_dname_size(ns),
(trie_val_t *)&pack);
if (ret) { /* deletion failed */
kr_assert(ret == KNOT_ENOENT);
return kr_error(ENOENT);
}
free_addr_set(pack, cut->pool);
return kr_ok();
}
pack_t *kr_zonecut_find(struct kr_zonecut *cut, const knot_dname_t *ns)
{
if (!cut || !ns) {
return NULL;
}
trie_val_t *val = trie_get_try(cut->nsset, (const char *)ns, knot_dname_size(ns));
/* we get pointer to the pack_t pointer */
return val ? (pack_t *)*val : NULL;
}
static int has_address(trie_val_t *v, void *baton_)
{
const pack_t *pack = *v;
const bool found = pack != NULL && pack->len != 0;
return found;
}
bool kr_zonecut_is_empty(struct kr_zonecut *cut)
{
if (kr_fails_assert(cut && cut->nsset))
return true;
return !trie_apply(cut->nsset, has_address, NULL);
}
int kr_zonecut_set_sbelt(struct kr_context *ctx, struct kr_zonecut *cut)
{
if (!ctx || !cut || !ctx->root_hints.nsset) {
return kr_error(EINVAL);
}
trie_apply(cut->nsset, free_addr_set_cb, cut->pool);
trie_clear(cut->nsset);
const uint8_t *const dname_root = (const uint8_t *)/*sign-cast*/("");
update_cut_name(cut, dname_root);
/* Copy root hints from resolution context. */
return kr_zonecut_copy(cut, &ctx->root_hints);
}
/** Fetch address for zone cut. Any rank is accepted (i.e. glue as well). */
static addrset_info_t fetch_addr(pack_t *addrs, const knot_dname_t *ns, uint16_t rrtype,
int *addr_budget,
knot_mm_t *mm_pool, const struct kr_query *qry)
// LATER(optim.): excessive data copying
{
int rdlen;
switch (rrtype) {
case KNOT_RRTYPE_A:
if (qry->flags.NO_IPV4)
return AI_DISABLED;
rdlen = 4;
break;
case KNOT_RRTYPE_AAAA:
if (qry->flags.NO_IPV6 || no6_is_bad())
return AI_DISABLED;
rdlen = 16;
break;
default:
kr_assert(!EINVAL);
return AI_UNKNOWN;
}
struct kr_context *ctx = qry->request->ctx;
struct kr_cache_p peek;
if (kr_cache_peek_exact(&ctx->cache, ns, rrtype, &peek) != 0) {
return AI_UNKNOWN;
}
int32_t new_ttl = kr_cache_ttl(&peek, qry, ns, rrtype);
if (new_ttl < 0) {
return AI_UNKNOWN;
}
knot_rrset_t cached_rr;
knot_rrset_init(&cached_rr, /*const-cast*/(knot_dname_t *)ns, rrtype,
KNOT_CLASS_IN, new_ttl);
if (kr_cache_materialize(&cached_rr.rrs, &peek, mm_pool) < 0) {
return AI_UNKNOWN;
}
*addr_budget -= cached_rr.rrs.count - 1;
if (*addr_budget < 0) {
cached_rr.rrs.count += *addr_budget;
*addr_budget = 0;
}
/* Reserve memory in *addrs. Implementation detail:
* pack_t cares for lengths, so we don't store those in the data. */
const size_t pack_extra_size = cached_rr.rrs.size
- cached_rr.rrs.count * offsetof(knot_rdata_t, len);
int ret = pack_reserve_mm(*addrs, cached_rr.rrs.count, pack_extra_size,
kr_memreserve, mm_pool);
kr_require(ret == 0); /* ENOMEM "probably" */
int usable_cnt = 0;
addrset_info_t result = AI_EMPTY;
knot_rdata_t *rd = cached_rr.rrs.rdata;
for (uint16_t i = 0; i < cached_rr.rrs.count; ++i, rd = knot_rdataset_next(rd)) {
if (unlikely(rd->len != rdlen)) {
VERBOSE_MSG(qry, "bad NS address length %d for rrtype %d, skipping\n",
(int)rd->len, (int)rrtype);
continue;
}
result = AI_OK;
++usable_cnt;
ret = pack_obj_push(addrs, rd->data, rd->len);
kr_assert(!ret); /* didn't fit because of incorrectly reserved memory */
/* LATER: for now we lose quite some information here,
* as keeping it would need substantial changes on other places,
* and it turned out to be premature optimization (most likely).
* We might e.g. skip adding unusable addresses,
* and either keep some rtt information associated
* or even finish up choosing the set to send packets to.
* Overall there's some overlap with nsrep.c functionality.
*/
}
if (usable_cnt != cached_rr.rrs.count) {
VERBOSE_MSG(qry, "usable NS addresses: %d/%d\n",
usable_cnt, cached_rr.rrs.count);
}
return result;
}
/** Fetch best NS for zone cut. */
static int fetch_ns(struct kr_context *ctx, struct kr_zonecut *cut,
const knot_dname_t *name, const struct kr_query *qry,
uint8_t * restrict rank)
{
struct kr_cache_p peek;
int ret = kr_cache_peek_exact(&ctx->cache, name, KNOT_RRTYPE_NS, &peek);
if (ret != 0) {
return ret;
}
/* Note: we accept *any* rank from the cache. We assume that nothing
* completely untrustworthy could get into the cache, e.g out-of-bailiwick
* records that weren't validated.
*/
*rank = peek.rank;
int32_t new_ttl = kr_cache_ttl(&peek, qry, name, KNOT_RRTYPE_NS);
if (new_ttl < 0) {
return kr_error(ESTALE);
}
/* Materialize the rdataset temporarily, for simplicity. */
knot_rdataset_t ns_rds = { 0 };
ret = kr_cache_materialize(&ns_rds, &peek, cut->pool);
if (ret < 0) {
return ret;
}
/* Consider at most 13 first NSs (like root). It's a trivial approach
* to limit our resources when choosing NSs. Otherwise DoS might be viable.
* We're not aware of any reasonable use case for having many NSs. */
if (ns_rds.count > 13) {
if (kr_log_is_debug_qry(ZCUT, qry)) {
auto_free char *name_txt = kr_dname_text(name);
VERBOSE_MSG(qry, "NS %s too large, reducing from %d names\n",
name_txt, (int)ns_rds.count);
}
ns_rds.count = 13;
}
/* Also trivially limit the total address count:
* first A and first AAAA are for free per NS,
* but the rest get a shared small limit and get skipped if exhausted. */
int addr_budget = 8;
/* Insert name servers for this zone cut, addresses will be looked up
* on-demand (either from cache or iteratively) */
bool all_bad = true; /**< All NSs (seen so far) are in a bad state. */
knot_rdata_t *rdata_i = ns_rds.rdata;
for (unsigned i = 0; i < ns_rds.count;
++i, rdata_i = knot_rdataset_next(rdata_i)) {
const knot_dname_t *ns_name = knot_ns_name(rdata_i);
const size_t ns_size = knot_dname_size(ns_name);
/* Get a new pack within the nsset. */
pack_t **pack = (pack_t **)trie_get_ins(cut->nsset,
(const char *)ns_name, ns_size);
if (!pack) return kr_error(ENOMEM);
kr_assert(!*pack); /* not critical, really */
*pack = mm_alloc(cut->pool, sizeof(pack_t));
if (!*pack) return kr_error(ENOMEM);
pack_init(**pack);
addrset_info_t infos[2];
/* Fetch NS reputation and decide whether to prefetch A/AAAA records. */
infos[0] = fetch_addr(*pack, ns_name, KNOT_RRTYPE_A, &addr_budget,
cut->pool, qry);
infos[1] = fetch_addr(*pack, ns_name, KNOT_RRTYPE_AAAA, &addr_budget,
cut->pool, qry);
#if 0 /* rather unlikely to be useful unless changing some zcut code */
if (kr_log_is_debug_qry(ZCUT, qry)) {
auto_free char *ns_name_txt = kr_dname_text(ns_name);
VERBOSE_MSG(qry, "NS %s infos: %d, %d\n",
ns_name_txt, (int)infos[0], (int)infos[1]);
}
#endif
/* AI_CYCLED checks.
* If an ancestor query has its zone cut in the state that
* it's looking for name or address(es) of some NS(s),
* we want to avoid doing so with a NS that lies under its cut.
* Instead we need to consider such names unusable in the cut (for now). */
if (infos[0] != AI_UNKNOWN && infos[1] != AI_UNKNOWN) {
/* Optimization: the following loop would be pointless. */
all_bad = false;
continue;
}
for (const struct kr_query *aq = qry; aq->parent; aq = aq->parent) {
const struct kr_qflags *aqpf = &aq->parent->flags;
if ( (aqpf->AWAIT_CUT && aq->stype == KNOT_RRTYPE_NS)
|| (aqpf->AWAIT_IPV4 && aq->stype == KNOT_RRTYPE_A)
|| (aqpf->AWAIT_IPV6 && aq->stype == KNOT_RRTYPE_AAAA)) {
if (knot_dname_in_bailiwick(ns_name,
aq->parent->zone_cut.name)) {
for (int j = 0; j < 2; ++j)
if (infos[j] == AI_UNKNOWN)
infos[j] = AI_CYCLED;
break;
}
} else {
/* This ancestor waits for other reason that
* NS name or address, so we're out of a direct cycle. */
break;
}
}
all_bad = all_bad && infos[0] <= AI_LAST_BAD && infos[1] <= AI_LAST_BAD;
}
if (all_bad && kr_log_is_debug_qry(ZCUT, qry)) {
auto_free char *name_txt = kr_dname_text(name);
VERBOSE_MSG(qry, "cut %s: all NSs bad, count = %d\n",
name_txt, (int)ns_rds.count);
}
kr_assert(addr_budget >= 0);
if (addr_budget <= 0 && kr_log_is_debug_qry(ZCUT, qry)) {
auto_free char *name_txt = kr_dname_text(name);
VERBOSE_MSG(qry, "NS %s have too many addresses together, reduced\n",
name_txt);
}
knot_rdataset_clear(&ns_rds, cut->pool);
return all_bad ? ELOOP : kr_ok();
}
/**
* Fetch secure RRSet of given type.
*/
static int fetch_secure_rrset(knot_rrset_t **rr, struct kr_cache *cache,
const knot_dname_t *owner, uint16_t type, knot_mm_t *pool,
const struct kr_query *qry)
{
if (kr_fails_assert(rr))
return kr_error(EINVAL);
/* peek, check rank and TTL */
struct kr_cache_p peek;
int ret = kr_cache_peek_exact(cache, owner, type, &peek);
if (ret != 0)
return ret;
if (!kr_rank_test(peek.rank, KR_RANK_SECURE))
return kr_error(ENOENT);
int32_t new_ttl = kr_cache_ttl(&peek, qry, owner, type);
if (new_ttl < 0)
return kr_error(ESTALE);
/* materialize a new RRset */
knot_rrset_free(*rr, pool);
*rr = mm_alloc(pool, sizeof(knot_rrset_t));
if (*rr == NULL)
return kr_error(ENOMEM);
owner = knot_dname_copy(/*const-cast*/(knot_dname_t *)owner, pool);
if (!owner) {
mm_free(pool, *rr);
*rr = NULL;
return kr_error(ENOMEM);
}
knot_rrset_init(*rr, /*const-cast*/(knot_dname_t *)owner, type,
KNOT_CLASS_IN, new_ttl);
ret = kr_cache_materialize(&(*rr)->rrs, &peek, pool);
if (ret < 0) {
knot_rrset_free(*rr, pool);
*rr = NULL;
return ret;
}
return kr_ok();
}
int kr_zonecut_find_cached(struct kr_context *ctx, struct kr_zonecut *cut,
const knot_dname_t *name, const struct kr_query *qry,
bool * restrict secured)
{
if (!ctx || !cut || !name)
return kr_error(EINVAL);
/* I'm not sure whether the caller always passes a clean state;
* mixing doesn't seem to make sense in any case, so let's clear it.
* We don't bother freeing the packs, as they're on mempool. */
trie_clear(cut->nsset);
/* Copy name as it may overlap with cut name that is to be replaced. */
knot_dname_t *qname = knot_dname_copy(name, cut->pool);
if (!qname) {
return kr_error(ENOMEM);
}
/* Start at QNAME. */
int ret;
const knot_dname_t *label = qname;
while (true) {
/* Fetch NS first and see if it's insecure. */
uint8_t rank = 0;
const bool is_root = (label[0] == '\0');
ret = fetch_ns(ctx, cut, label, qry, &rank);
if (ret == 0) {
/* Flag as insecure if cached as this */
if (kr_rank_test(rank, KR_RANK_INSECURE)) {
*secured = false;
}
/* Fetch DS and DNSKEY if caller wants secure zone cut */
int ret_ds = 1, ret_dnskey = 1;
if (*secured || is_root) {
ret_ds = fetch_secure_rrset(&cut->trust_anchor, &ctx->cache,
label, KNOT_RRTYPE_DS, cut->pool, qry);
ret_dnskey = fetch_secure_rrset(&cut->key, &ctx->cache,
label, KNOT_RRTYPE_DNSKEY, cut->pool, qry);
}
update_cut_name(cut, label);
if (kr_log_is_debug_qry(ZCUT, qry)) {
auto_free char *label_str = kr_dname_text(label);
VERBOSE_MSG(qry,
"found cut: %s (rank 0%.2o return codes: DS %d, DNSKEY %d)\n",
label_str, rank, ret_ds, ret_dnskey);
}
ret = kr_ok();
break;
} /* else */
trie_clear(cut->nsset);
/* Subtract label from QNAME. */
if (!is_root) {
label = knot_dname_next_label(label);
} else {
ret = kr_error(ENOENT);
break;
}
}
kr_cache_commit(&ctx->cache);
mm_free(cut->pool, qname);
return ret;
}