diff options
Diffstat (limited to '')
-rw-r--r-- | lib/cache/nsec3.c | 495 |
1 files changed, 495 insertions, 0 deletions
diff --git a/lib/cache/nsec3.c b/lib/cache/nsec3.c new file mode 100644 index 0000000..2716456 --- /dev/null +++ b/lib/cache/nsec3.c @@ -0,0 +1,495 @@ +/* Copyright (C) CZ.NIC, z.s.p.o. <knot-resolver@labs.nic.cz> + * SPDX-License-Identifier: GPL-3.0-or-later + */ + +/** @file + * Implementation of NSEC3 handling. Prototypes in ./impl.h + */ + +#include "lib/cache/impl.h" + +#include "contrib/base32hex.h" +#include "lib/dnssec/nsec.h" +#include "lib/dnssec/nsec3.h" +#include "lib/layer/iterate.h" + +#include <libknot/rrtype/nsec3.h> + +static const knot_db_val_t VAL_EMPTY = { NULL, 0 }; + +/** Common part: write all but the NSEC3 hash. */ +static knot_db_val_t key_NSEC3_common(struct key *k, const knot_dname_t *zname, + const nsec_p_hash_t nsec_p_hash) +{ + if (kr_fails_assert(k && zname && !kr_dname_lf(k->buf, zname, false))) + return VAL_EMPTY; + + /* CACHE_KEY_DEF: key == zone's dname_lf + '\0' + '3' + nsec_p hash (4B) + * + NSEC3 hash (20B == NSEC3_HASH_LEN binary!) + * LATER(optim.) nsec_p hash: perhaps 2B would give a sufficient probability + * of avoiding collisions. + */ + uint8_t *begin = k->buf + 1 + k->zlf_len; /* one byte after zone's zero */ + begin[0] = 0; + begin[1] = '3'; /* tag for NSEC3 */ + k->type = KNOT_RRTYPE_NSEC3; + memcpy(begin + 2, &nsec_p_hash, sizeof(nsec_p_hash)); + return (knot_db_val_t){ + .data = k->buf + 1, + .len = begin + 2 + sizeof(nsec_p_hash) - (k->buf + 1), + }; +} + +knot_db_val_t key_NSEC3(struct key *k, const knot_dname_t *nsec3_name, + const nsec_p_hash_t nsec_p_hash) +{ + knot_db_val_t val = key_NSEC3_common(k, nsec3_name /*only zname required*/, + nsec_p_hash); + if (!val.data) return val; + int len = base32hex_decode(nsec3_name + 1, nsec3_name[0], + knot_db_val_bound(val), KR_CACHE_KEY_MAXLEN - val.len); + if (len != NSEC3_HASH_LEN) { + return VAL_EMPTY; + } + val.len += len; + return val; +} + +/** Construct a string key for for NSEC3 predecessor-search, from an non-NSEC3 name. + * \note k->zlf_len and k->zname are assumed to have been correctly set */ +static knot_db_val_t key_NSEC3_name(struct key *k, const knot_dname_t *name, + const bool add_wildcard, const struct nsec_p *nsec_p) +{ + bool ok = k && name && nsec_p && nsec_p->raw; + if (!ok) return VAL_EMPTY; + knot_db_val_t val = key_NSEC3_common(k, k->zname, nsec_p->hash); + if (!val.data) return val; + + /* Make `name` point to correctly wildcarded owner name. */ + uint8_t buf[KNOT_DNAME_MAXLEN]; + int name_len; + if (add_wildcard) { + buf[0] = '\1'; + buf[1] = '*'; + name_len = knot_dname_to_wire(buf + 2, name, sizeof(buf) - 2); + if (name_len < 0) return VAL_EMPTY; /* wants wildcard but doesn't fit */ + name = buf; + name_len += 2; + } else { + name_len = knot_dname_size(name); + } + /* Append the NSEC3 hash. */ + const dnssec_binary_t dname = { + .size = name_len, + .data = (uint8_t *)/*const-cast*/name, + }; + + if (kr_fails_assert(!kr_nsec3_limited_params(&nsec_p->libknot))) { + /* This is mainly defensive; it shouldn't happen thanks to downgrades. */ + return VAL_EMPTY; + } + #if 0 // LATER(optim.): this requires a patched libdnssec - tries to realloc() + dnssec_binary_t hash = { + .size = KR_CACHE_KEY_MAXLEN - val.len, + .data = val.data + val.len, + }; + int ret = dnssec_nsec3_hash(&dname, &nsec_p->libknot, &hash); + if (ret != DNSSEC_EOK) return VAL_EMPTY; + if (kr_fails_assert(hash.size == NSEC3_HASH_LEN)) + return VAL_EMPTY; + + #else + dnssec_binary_t hash = { .size = 0, .data = NULL }; + int ret = dnssec_nsec3_hash(&dname, &nsec_p->libknot, &hash); + if (ret != DNSSEC_EOK) return VAL_EMPTY; + if (kr_fails_assert(hash.size == NSEC3_HASH_LEN && hash.data)) + return VAL_EMPTY; + memcpy(knot_db_val_bound(val), hash.data, NSEC3_HASH_LEN); + free(hash.data); + #endif + + val.len += hash.size; + return val; +} + +/** Return h1 < h2, semantically on NSEC3 hashes. */ +static inline bool nsec3_hash_ordered(const uint8_t *h1, const uint8_t *h2) +{ + return memcmp(h1, h2, NSEC3_HASH_LEN) < 0; +} + +/** NSEC3 range search. + * + * \param key Pass output of key_NSEC3(k, ...) + * \param nsec_p Restrict to this NSEC3 parameter-set. + * \param value[out] The raw data of the NSEC3 cache record (optional; consistency checked). + * \param exact_match[out] Whether the key was matched exactly or just covered (optional). + * \param hash_low[out] Output the low end hash of covering NSEC3, pointing within DB (optional). + * \param new_ttl[out] New TTL of the NSEC3 (optional). + * \return Error message or NULL. + * \note The function itself does *no* bitmap checks, e.g. RFC 6840 sec. 4. + */ +static const char * find_leq_NSEC3(struct kr_cache *cache, const struct kr_query *qry, + const knot_db_val_t key, const struct key *k, const struct nsec_p *nsec_p, + knot_db_val_t *value, bool *exact_match, const uint8_t **hash_low, + uint32_t *new_ttl) +{ + /* Do the cache operation. */ + const size_t hash_off = key_nsec3_hash_off(k); + if (kr_fails_assert(key.data && key.len >= hash_off)) + return "range search ERROR"; + knot_db_val_t key_found = key; + knot_db_val_t val = { NULL, 0 }; + int ret = cache_op(cache, read_leq, &key_found, &val); + /* ^^ LATER(optim.): incrementing key and doing less-than search + * would probably be slightly more efficient with LMDB, + * but the code complexity would grow considerably. */ + if (ret < 0) { + if (kr_fails_assert(ret == kr_error(ENOENT))) { + return "range search ERROR"; + } else { + return "range search miss"; + } + } + if (value) { + *value = val; + } + /* Check consistency, TTL, rank. */ + const bool is_exact = (ret == 0); + if (exact_match) { + *exact_match = is_exact; + } + const struct entry_h *eh = entry_h_consistent_NSEC(val); + if (!eh) { + /* This might be just finding something else than NSEC3 entry, + * in case we searched before the very first one in the zone. */ + return "range search found inconsistent entry"; + } + /* Passing just zone name instead of owner. */ + int32_t new_ttl_ = get_new_ttl(eh, qry, k->zname, KNOT_RRTYPE_NSEC3, + qry->timestamp.tv_sec); + if (new_ttl_ < 0 || !kr_rank_test(eh->rank, KR_RANK_SECURE)) { + return "range search found stale or insecure entry"; + /* TODO: remove the stale record *and* retry, + * in case we haven't run off. Perhaps start by in_zone check. */ + } + if (new_ttl) { + *new_ttl = new_ttl_; + } + if (hash_low) { + *hash_low = (uint8_t *)key_found.data + hash_off; + } + if (is_exact) { + /* Nothing else to do. */ + return NULL; + } + /* The NSEC3 starts strictly before our target name; + * now check that it still belongs into that zone and chain. */ + const uint8_t *nsec_p_raw = eh->data + KR_CACHE_RR_COUNT_SIZE + + 2 /* RDLENGTH from rfc1034 */; + const int nsec_p_len = nsec_p_rdlen(nsec_p_raw); + const bool same_chain = key_found.len == hash_off + NSEC3_HASH_LEN + /* CACHE_KEY_DEF */ + && memcmp(key.data, key_found.data, hash_off) == 0 + /* exact comparison of NSEC3 parameters */ + && nsec_p_len == nsec_p_rdlen(nsec_p->raw) + && memcmp(nsec_p_raw, nsec_p->raw, nsec_p_len) == 0; + if (!same_chain) { + return "range search miss (!same_chain)"; + } + /* We know it starts before sname, so let's check the other end. + * A. find the next hash and check its length. */ + if (kr_fails_assert(KR_CACHE_RR_COUNT_SIZE == 2 && get_uint16(eh->data) != 0)) + return "ERROR"; /* TODO: more checks? Also, `next` computation is kinda messy. */ + const uint8_t *hash_next = nsec_p_raw + nsec_p_len + + sizeof(uint8_t) /* hash length from rfc5155 */; + if (hash_next[-1] != NSEC3_HASH_LEN) { + return "unexpected next hash length"; + } + /* B. do the actual range check. */ + const uint8_t * const hash_searched = (uint8_t *)key.data + hash_off; + bool covers = /* we know for sure that the low end is before the searched name */ + nsec3_hash_ordered(hash_searched, hash_next) + /* and the wrap-around case */ + || nsec3_hash_ordered(hash_next, (const uint8_t *)key_found.data + hash_off); + if (!covers) { + return "range search miss (!covers)"; + } + return NULL; +} + +/** Extract textual representation of NSEC3 hash from a cache key. + * \param text must have length at least NSEC3_HASH_TXT_LEN+1 (will get 0-terminated). */ +static void key_NSEC3_hash2text(const knot_db_val_t key, char *text) +{ + kr_require(key.data && key.len > NSEC3_HASH_LEN); + const uint8_t *hash_raw = knot_db_val_bound(key) - NSEC3_HASH_LEN; + /* CACHE_KEY_DEF ^^ */ + int len = base32hex_encode(hash_raw, NSEC3_HASH_LEN, (uint8_t *)text, + NSEC3_HASH_TXT_LEN); + kr_assert(len == NSEC3_HASH_TXT_LEN); + text[NSEC3_HASH_TXT_LEN] = '\0'; +} + +/** Reconstruct a name into a buffer (assuming length at least KNOT_DNAME_MAXLEN). + * \return kr_ok() or error code (<0). */ +static int dname_wire_reconstruct(knot_dname_t *buf, const knot_dname_t *zname, + const uint8_t *hash_raw) +{ + int len = base32hex_encode(hash_raw, NSEC3_HASH_LEN, buf + 1, NSEC3_HASH_TXT_LEN); + if (kr_fails_assert(len == NSEC3_HASH_TXT_LEN)) + return kr_error(EINVAL); + buf[0] = len; + int ret = knot_dname_to_wire(buf + 1 + len, zname, KNOT_DNAME_MAXLEN - 1 - len); + return ret < 0 ? kr_error(ret) : kr_ok(); +} + +static void nsec3_hash2text(const knot_dname_t *owner, char *text) +{ + kr_require(owner[0] == NSEC3_HASH_TXT_LEN); + memcpy(text, owner + 1, MIN(owner[0], NSEC3_HASH_TXT_LEN)); + text[NSEC3_HASH_TXT_LEN] = '\0'; +} + +int nsec3_encloser(struct key *k, struct answer *ans, + const int sname_labels, int *clencl_labels, + const struct kr_query *qry, struct kr_cache *cache) +{ + static const int ESKIP = ABS(ENOENT); + /* Basic sanity check. */ + const bool ok = k && k->zname && ans && clencl_labels + && qry && cache; + if (kr_fails_assert(ok)) + return kr_error(EINVAL); + + /*** Find the closest encloser - cycle: name starting at sname, + * proceeding while longer than zname, shortening by one label on step. + * We need a pair where a name doesn't exist *and* its parent does. */ + /* LATER(optim.): perhaps iterate in the other order - that + * should help significantly against deep queries where we have + * a shallow proof in the cache. We can also optimize by using + * only exact search unless we had a match in the previous iteration. */ + const int zname_labels = knot_dname_labels(k->zname, NULL); + int last_nxproven_labels = -1; + const knot_dname_t *name = qry->sname; + + /* Avoid doing too much work on SHA1; we might consider that a part of mitigating + * CVE-2023-50868: NSEC3 closest encloser proof can exhaust CPU + * As currently the code iterates from the longest name, we limit that. + * Note that we don't want to limit too much, as the alternative usually includes + * sending more queries upstream, which would come with nontrivial work, too. + */ + const int max_labels = zname_labels + kr_nsec3_max_depth(&ans->nsec_p.libknot); + if (sname_labels > max_labels) + VERBOSE_MSG(qry, "=> NSEC3 hashing partly skipped due to too long SNAME (CVE-2023-50868)\n"); + + for (int name_labels = sname_labels; name_labels >= zname_labels; + --name_labels, name += 1 + name[0]) { + if (name_labels > max_labels) + continue; // avoid the hashing + + /* Find a previous-or-equal NSEC3 in cache covering the name, + * checking TTL etc. */ + const knot_db_val_t key = key_NSEC3_name(k, name, false, &ans->nsec_p); + if (!key.data) continue; + WITH_VERBOSE(qry) { + char hash_txt[NSEC3_HASH_TXT_LEN + 1]; + key_NSEC3_hash2text(key, hash_txt); + VERBOSE_MSG(qry, "=> NSEC3 depth %d: hash %s\n", + name_labels - zname_labels, hash_txt); + } + knot_db_val_t val = { NULL, 0 }; + bool exact_match; + uint32_t new_ttl; + const uint8_t *hash_low; + const char *err = find_leq_NSEC3(cache, qry, key, k, &ans->nsec_p, &val, + &exact_match, &hash_low, &new_ttl); + if (err) { + WITH_VERBOSE(qry) { + auto_free char *name_str = kr_dname_text(name); + VERBOSE_MSG(qry, "=> NSEC3 encloser error for %s: %s\n", + name_str, err); + } + continue; + } + if (exact_match && name_labels != sname_labels + && name_labels + 1 != last_nxproven_labels) { + /* This name exists (checked rank and TTL), and it's + * neither of the two interesting cases, so we do not + * keep searching for non-existence above this name. */ + VERBOSE_MSG(qry, + "=> NSEC3 encloser: only found existence of an ancestor\n"); + return ESKIP; + } + /* Optimization: avoid the rest of the last iteration if pointless. */ + if (!exact_match && name_labels == zname_labels + && last_nxproven_labels != name_labels + 1) { + break; + } + + /* Basic checks OK -> materialize data, cleaning any previous + * records on that answer index (unsuccessful attempts). */ + knot_dname_t owner[KNOT_DNAME_MAXLEN]; + { + int ret = dname_wire_reconstruct(owner, k->zname, hash_low); + if (unlikely(ret)) continue; + } + const int ans_id = (exact_match && name_labels + 1 == last_nxproven_labels) + ? AR_CPE : AR_NSEC; + { + const struct entry_h *nsec_eh = val.data; + memset(&ans->rrsets[ans_id], 0, sizeof(ans->rrsets[ans_id])); + int ret = entry2answer(ans, ans_id, nsec_eh, knot_db_val_bound(val), + owner, KNOT_RRTYPE_NSEC3, new_ttl); + if (ret) return kr_error(ret); + } + + if (!exact_match) { + /* Non-existence proven, but we don't know if `name` + * is the next closer name. + * Note: we don't need to check for the sname being + * delegated away by this record, as with NSEC3 only + * *exact* match on an ancestor could do that. */ + last_nxproven_labels = name_labels; + WITH_VERBOSE(qry) { + char hash_low_txt[NSEC3_HASH_TXT_LEN + 1]; + nsec3_hash2text(owner, hash_low_txt); + VERBOSE_MSG(qry, + "=> NSEC3 depth %d: covered by %s -> TODO, new TTL %d\n", + name_labels - zname_labels, hash_low_txt, new_ttl); + } + continue; + } + + /* Exactly matched NSEC3: two cases, one after another. */ + const knot_rrset_t *nsec_rr = ans->rrsets[ans_id].set.rr; + const uint8_t *bm = knot_nsec3_bitmap(nsec_rr->rrs.rdata); + uint16_t bm_size = knot_nsec3_bitmap_len(nsec_rr->rrs.rdata); + if (kr_fails_assert(bm)) + return kr_error(EFAULT); + if (name_labels == sname_labels) { + if (kr_nsec_bitmap_nodata_check(bm, bm_size, qry->stype, + nsec_rr->owner) != 0) { + VERBOSE_MSG(qry, + "=> NSEC3 sname: match but failed type check\n"); + return ESKIP; + } + /* NODATA proven; just need to add SOA+RRSIG later */ + VERBOSE_MSG(qry, + "=> NSEC3 sname: match proved NODATA, new TTL %d\n", + new_ttl); + ans->rcode = PKT_NODATA; + return kr_ok(); + + } /* else */ + + if (kr_fails_assert(name_labels + 1 == last_nxproven_labels)) + return kr_error(EINVAL); + if (kr_nsec_children_in_zone_check(bm, bm_size) != 0) { + VERBOSE_MSG(qry, + "=> NSEC3 encloser: found but delegated (or error)\n"); + return ESKIP; + } + /* NXDOMAIN proven *except* for wildcards. */ + WITH_VERBOSE(qry) { + auto_free char *name_str = kr_dname_text(name); + VERBOSE_MSG(qry, + "=> NSEC3 encloser: confirmed as %s, new TTL %d\n", + name_str, new_ttl); + } + *clencl_labels = name_labels; + ans->rcode = PKT_NXDOMAIN; + /* Avoid repeated NSEC3 - remove either if the hashes match. + * This is very unlikely in larger zones: 1/size (per attempt). + * Well, deduplication would happen anyway when the answer + * from cache is read by kresd (internally). */ + if (unlikely(0 == memcmp(ans->rrsets[AR_NSEC].set.rr->owner + 1, + ans->rrsets[AR_CPE ].set.rr->owner + 1, + NSEC3_HASH_LEN))) { + memset(&ans->rrsets[AR_CPE], 0, sizeof(ans->rrsets[AR_CPE])); + /* LATER(optim.): perhaps check this earlier and avoid some work? */ + } + return kr_ok(); + } + + /* We've ran out of options. */ + if (last_nxproven_labels > 0) { + /* We didn't manage to prove existence of the closest encloser, + * meaning the only chance left is a *positive* wildcard record. */ + *clencl_labels = last_nxproven_labels - 1; + ans->rcode = PKT_NXDOMAIN; + /* FIXME: review */ + } + return ESKIP; +} + +int nsec3_src_synth(struct key *k, struct answer *ans, const knot_dname_t *clencl_name, + const struct kr_query *qry, struct kr_cache *cache) +{ + /* Find a previous-or-equal NSEC3 in cache covering or matching + * the source of synthesis, checking TTL etc. */ + const knot_db_val_t key = key_NSEC3_name(k, clencl_name, true, &ans->nsec_p); + if (!key.data) return kr_error(1); + WITH_VERBOSE(qry) { + char hash_txt[NSEC3_HASH_TXT_LEN + 1]; + key_NSEC3_hash2text(key, hash_txt); + VERBOSE_MSG(qry, "=> NSEC3 wildcard: hash %s\n", hash_txt); + } + knot_db_val_t val = { NULL, 0 }; + bool exact_match; + uint32_t new_ttl; + const uint8_t *hash_low; + const char *err = find_leq_NSEC3(cache, qry, key, k, &ans->nsec_p, &val, + &exact_match, &hash_low, &new_ttl); + if (err) { + VERBOSE_MSG(qry, "=> NSEC3 wildcard: %s\n", err); + return kr_ok(); + } + + /* LATER(optim.): avoid duplicities in answer. */ + + /* Basic checks OK -> materialize the data (speculatively). */ + knot_dname_t owner[KNOT_DNAME_MAXLEN]; + { + int ret = dname_wire_reconstruct(owner, k->zname, hash_low); + if (unlikely(ret)) return kr_ok(); + const struct entry_h *nsec_eh = val.data; + ret = entry2answer(ans, AR_WILD, nsec_eh, knot_db_val_bound(val), + owner, KNOT_RRTYPE_NSEC3, new_ttl); + if (ret) return kr_error(ret); + } + const knot_rrset_t *nsec_rr = ans->rrsets[AR_WILD].set.rr; + + if (!exact_match) { + /* The record proves wildcard non-existence. */ + WITH_VERBOSE(qry) { + char hash_low_txt[NSEC3_HASH_TXT_LEN + 1]; + nsec3_hash2text(owner, hash_low_txt); + VERBOSE_MSG(qry, + "=> NSEC3 wildcard: covered by %s -> TODO, new TTL %d\n", + hash_low_txt, new_ttl); + } + return AR_SOA; + } + + /* The wildcard exists. Find if it's NODATA - check type bitmap. */ + const uint8_t *bm = knot_nsec3_bitmap(nsec_rr->rrs.rdata); + uint16_t bm_size = knot_nsec3_bitmap_len(nsec_rr->rrs.rdata); + if (kr_fails_assert(bm)) + return kr_error(EFAULT); + if (kr_nsec_bitmap_nodata_check(bm, bm_size, qry->stype, nsec_rr->owner) == 0) { + /* NODATA proven; just need to add SOA+RRSIG later */ + VERBOSE_MSG(qry, "=> NSEC3 wildcard: match proved NODATA, new TTL %d\n", + new_ttl); + ans->rcode = PKT_NODATA; + return AR_SOA; + + } /* else */ + /* The data probably exists -> don't add this NSEC3 + * and (later) try to find the real wildcard data */ + VERBOSE_MSG(qry, "=> NSEC3 wildcard: should exist (or error)\n"); + ans->rcode = PKT_NOERROR; + memset(&ans->rrsets[AR_WILD], 0, sizeof(ans->rrsets[AR_WILD])); + return kr_ok(); +} + |