diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 15:26:00 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 15:26:00 +0000 |
commit | 830407e88f9d40d954356c3754f2647f91d5c06a (patch) | |
tree | d6a0ece6feea91f3c656166dbaa884ef8a29740e /lib/cache/impl.h | |
parent | Initial commit. (diff) | |
download | knot-resolver-830407e88f9d40d954356c3754f2647f91d5c06a.tar.xz knot-resolver-830407e88f9d40d954356c3754f2647f91d5c06a.zip |
Adding upstream version 5.6.0.upstream/5.6.0upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'lib/cache/impl.h')
-rw-r--r-- | lib/cache/impl.h | 439 |
1 files changed, 439 insertions, 0 deletions
diff --git a/lib/cache/impl.h b/lib/cache/impl.h new file mode 100644 index 0000000..305f36e --- /dev/null +++ b/lib/cache/impl.h @@ -0,0 +1,439 @@ +/* Copyright (C) CZ.NIC, z.s.p.o. <knot-resolver@labs.nic.cz> + * SPDX-License-Identifier: GPL-3.0-or-later + */ + +/** @file + * Header internal for cache implementation(s). + * Only LMDB works for now. + */ +#pragma once + +#include <stdbool.h> +#include <stdint.h> + +#include <libdnssec/error.h> +#include <libdnssec/nsec.h> +#include <libknot/consts.h> +#include <libknot/db/db.h> +#include <libknot/dname.h> + +#include "contrib/cleanup.h" +#include "contrib/murmurhash3/murmurhash3.h" /* hash() for nsec_p_hash() */ +#include "lib/cache/cdb_api.h" +#include "lib/resolve.h" + +/* Cache entry values - binary layout. + * + * It depends on type which is recognizable by the key. + * Code depending on the contents of the key is marked by CACHE_KEY_DEF. + * + * 'E' entry (exact hit): + * - ktype == NS: struct entry_apex - multiple types inside (NS and xNAME); + * - ktype != NS: struct entry_h + * * is_packet: uint16_t length, the rest is opaque and handled by ./entry_pkt.c + * * otherwise RRset + its RRSIG set (possibly empty). + * '1' or '3' entry (NSEC or NSEC3) + * - struct entry_h, contents is the same as for exact hit + * - flags don't make sense there + */ + +struct entry_h { + uint32_t time; /**< The time of inception. */ + uint32_t ttl; /**< TTL at inception moment. Assuming it fits into int32_t ATM. */ + uint8_t rank : 6; /**< See enum kr_rank */ + bool is_packet : 1; /**< Negative-answer packet for insecure/bogus name. */ + bool has_optout : 1; /**< Only for packets; persisted DNSSEC_OPTOUT. */ + uint8_t _pad; /**< We need even alignment for data now. */ + uint8_t data[]; +/* Well, we don't really need packing or alignment changes, + * but due to LMDB the whole structure may not be stored at an aligned address, + * and we need compilers (for non-x86) to know it to avoid SIGBUS (test: UBSAN). */ +} __attribute__ ((packed,aligned(1))); +struct entry_apex; + +/** Check basic consistency of entry_h for 'E' entries, not looking into ->data. + * (for is_packet the length of data is checked) + */ +KR_EXPORT +struct entry_h * entry_h_consistent_E(knot_db_val_t data, uint16_t type); + +struct entry_apex * entry_apex_consistent(knot_db_val_t val); + +/** Consistency check, ATM common for NSEC and NSEC3. */ +static inline struct entry_h * entry_h_consistent_NSEC(knot_db_val_t data) +{ + /* ATM it's enough to just extend the checks for exact entries. */ + const struct entry_h *eh = entry_h_consistent_E(data, KNOT_RRTYPE_NSEC); + bool ok = eh != NULL; + ok = ok && !eh->is_packet && !eh->has_optout; + return ok ? /*const-cast*/(struct entry_h *)eh : NULL; +} + +static inline struct entry_h * entry_h_consistent(knot_db_val_t data, uint16_t type) +{ + switch (type) { + case KNOT_RRTYPE_NSEC: + case KNOT_RRTYPE_NSEC3: + return entry_h_consistent_NSEC(data); + default: + return entry_h_consistent_E(data, type); + } +} + +/* nsec_p* - NSEC* chain parameters */ + +static inline int nsec_p_rdlen(const uint8_t *rdata) +{ + //TODO: do we really need the zero case? + return rdata ? 5 + rdata[4] : 0; /* rfc5155 4.2 and 3.2. */ +} +static const int NSEC_P_MAXLEN = sizeof(uint32_t) + 5 + 255; // TODO: remove?? + +/** Hash of NSEC3 parameters, used as a tag to separate different chains for same zone. */ +typedef uint32_t nsec_p_hash_t; +static inline nsec_p_hash_t nsec_p_mkHash(const uint8_t *nsec_p) +{ + kr_require(nsec_p && !(KNOT_NSEC3_FLAG_OPT_OUT & nsec_p[1])); + return hash((const char *)nsec_p, nsec_p_rdlen(nsec_p)); +} + +/** NSEC* parameters for the chain. */ +struct nsec_p { + const uint8_t *raw; /**< Pointer to raw NSEC3 parameters; NULL for NSEC. */ + nsec_p_hash_t hash; /**< Hash of `raw`, used for cache keys. */ + dnssec_nsec3_params_t libknot; /**< Format for libknot; owns malloced memory! */ +}; + + + +/** LATER(optim.): this is overshot, but struct key usage should be cheap ATM. */ +#define KR_CACHE_KEY_MAXLEN (KNOT_DNAME_MAXLEN + 100) /* CACHE_KEY_DEF */ + +struct key { + const knot_dname_t *zname; /**< current zone name (points within qry->sname) */ + uint8_t zlf_len; /**< length of current zone's lookup format */ + + /** Corresponding key type; e.g. NS for CNAME. + * Note: NSEC type is ambiguous (exact and range key). */ + uint16_t type; + /** The key data start at buf+1, and buf[0] contains some length. + * For details see key_exact* and key_NSEC* functions. */ + uint8_t buf[KR_CACHE_KEY_MAXLEN]; + /* LATER(opt.): ^^ probably change the anchoring, so that kr_dname_lf() + * doesn't need to move data after knot_dname_lf(). */ +}; + +static inline size_t key_nwz_off(const struct key *k) +{ + /* CACHE_KEY_DEF: zone name lf + 0 ('1' or '3'). + * NSEC '1' case continues just with the name within zone. */ + return k->zlf_len + 2; +} +static inline size_t key_nsec3_hash_off(const struct key *k) +{ + /* CACHE_KEY_DEF NSEC3: tag (nsec_p_hash_t) + 20 bytes NSEC3 name hash) */ + return key_nwz_off(k) + sizeof(nsec_p_hash_t); +} +/** Hash is always SHA1; I see no plans to standardize anything else. + * https://www.iana.org/assignments/dnssec-nsec3-parameters/dnssec-nsec3-parameters.xhtml#dnssec-nsec3-parameters-3 + */ +static const int NSEC3_HASH_LEN = 20, + NSEC3_HASH_TXT_LEN = 32; + +/** Finish constructing string key for for exact search. + * It's assumed that kr_dname_lf(k->buf, owner, *) had been ran. + */ +knot_db_val_t key_exact_type_maypkt(struct key *k, uint16_t type); + +/** Like key_exact_type_maypkt but with extra checks if used for RRs only. */ +static inline knot_db_val_t key_exact_type(struct key *k, uint16_t type) +{ + switch (type) { + /* Sanity check: forbidden types represented in other way(s). */ + case KNOT_RRTYPE_NSEC: + case KNOT_RRTYPE_NSEC3: + kr_assert(false); + return (knot_db_val_t){ NULL, 0 }; + } + return key_exact_type_maypkt(k, type); +} + + +/* entry_h chaining; implementation in ./entry_list.c */ + +enum { ENTRY_APEX_NSECS_CNT = 2 }; + +/** Header of 'E' entry with ktype == NS. Inside is private to ./entry_list.c + * + * We store xNAME at NS type to lower the number of searches in closest_NS(). + * CNAME is only considered for equal name, of course. + * We also store NSEC* parameters at NS type. + */ +struct entry_apex { + /* ENTRY_H_FLAGS */ + bool has_ns : 1; + bool has_cname : 1; + bool has_dname : 1; + + uint8_t pad_; /**< 1 byte + 2 bytes + x bytes would be weird; let's do 2+2+x. */ + + /** We have two slots for NSEC* parameters. + * + * This array describes how they're filled; + * values: 0: none, 1: NSEC, 3: NSEC3. + * + * Two slots are a compromise to smoothly handle normal rollovers + * (either changing NSEC3 parameters or between NSEC and NSEC3). */ + int8_t nsecs[ENTRY_APEX_NSECS_CNT]; + uint8_t data[]; + /* XXX: if not first, stamp of last being the first? + * Purpose: save cache operations if rolled the algo/params long ago. */ +}; + +/** Indices for decompressed entry_list_t. */ +enum EL { + EL_NS = ENTRY_APEX_NSECS_CNT, + EL_CNAME, + EL_DNAME, + EL_LENGTH +}; +/** Decompressed entry_apex. It's an array of unparsed entry_h references. + * Note: arrays are passed "by reference" to functions (in C99). */ +typedef knot_db_val_t entry_list_t[EL_LENGTH]; + +static inline uint16_t EL2RRTYPE(enum EL i) +{ + switch (i) { + case EL_NS: return KNOT_RRTYPE_NS; + case EL_CNAME: return KNOT_RRTYPE_CNAME; + case EL_DNAME: return KNOT_RRTYPE_DNAME; + default: kr_assert(false); return 0; + } +} + +/** There may be multiple entries within, so rewind `val` to the one we want. + * + * ATM there are multiple types only for the NS ktype - it also accommodates xNAMEs. + * \note `val->len` represents the bound of the whole list, not of a single entry. + * \note in case of ENOENT, `val` is still rewound to the beginning of the next entry. + * \return error code + * TODO: maybe get rid of this API? + */ +int entry_h_seek(knot_db_val_t *val, uint16_t type); + +/** Prepare space to insert an entry. + * + * Some checks are performed (rank, TTL), the current entry in cache is copied + * with a hole ready for the new entry (old one of the same type is cut out). + * + * \param val_new_entry The only changing parameter; ->len is read, ->data written. + * \return error code + */ +int entry_h_splice( + knot_db_val_t *val_new_entry, uint8_t rank, + const knot_db_val_t key, const uint16_t ktype, const uint16_t type, + const knot_dname_t *owner/*log only*/, + const struct kr_query *qry, struct kr_cache *cache, uint32_t timestamp); + +/** Parse an entry_apex into individual items. @return error code. */ +KR_EXPORT int entry_list_parse(const knot_db_val_t val, entry_list_t list); + +static inline size_t to_even(size_t n) +{ + return n + (n & 1); +} + +static inline int entry_list_serial_size(const entry_list_t list) +{ + int size = offsetof(struct entry_apex, data); + for (int i = 0; i < EL_LENGTH; ++i) { + size += to_even(list[i].len); + } + return size; +} + +/** Fill contents of an entry_apex. + * + * @note NULL pointers are overwritten - caller may like to fill the space later. + */ +void entry_list_memcpy(struct entry_apex *ea, entry_list_t list); + + + +/* Packet caching; implementation in ./entry_pkt.c */ + +/** Stash the packet into cache (if suitable, etc.) + * \param needs_pkt we need the packet due to not stashing some RRs; + * see stash_rrset() for details + * It assumes check_dname_for_lf(). */ +void stash_pkt(const knot_pkt_t *pkt, const struct kr_query *qry, + const struct kr_request *req, bool needs_pkt); + +/** Try answering from packet cache, given an entry_h. + * + * This assumes the TTL is OK and entry_h_consistent, but it may still return error. + * On success it handles all the rest, incl. qry->flags. + */ +int answer_from_pkt(kr_layer_t *ctx, knot_pkt_t *pkt, uint16_t type, + const struct entry_h *eh, const void *eh_bound, uint32_t new_ttl); + + +/** Record is expiring if it has less than 1% TTL (or less than 5s) */ +static inline bool is_expiring(uint32_t orig_ttl, uint32_t new_ttl) +{ + int64_t nttl = new_ttl; /* avoid potential over/under-flow */ + return 100 * (nttl - 5) < orig_ttl; +} + +/** Returns signed result so you can inspect how much stale the RR is. + * + * @param owner name for stale-serving decisions. You may pass NULL to disable stale. + * @note: NSEC* uses zone name ATM; for NSEC3 the owner may not even be knowable. + * @param type for stale-serving. + */ +int32_t get_new_ttl(const struct entry_h *entry, const struct kr_query *qry, + const knot_dname_t *owner, uint16_t type, uint32_t now); + + +/* RRset (de)materialization; implementation in ./entry_rr.c */ + +/** Size of the RR count field */ +#define KR_CACHE_RR_COUNT_SIZE sizeof(uint16_t) + +/** Compute size of serialized rdataset. NULL is accepted as empty set. */ +static inline int rdataset_dematerialize_size(const knot_rdataset_t *rds) +{ + return KR_CACHE_RR_COUNT_SIZE + (rds == NULL ? 0 : rds->size); +} + +/** Analyze the length of a dematerialized rdataset. + * Note that in the data it's KR_CACHE_RR_COUNT_SIZE and then this returned size. */ +static inline int rdataset_dematerialized_size(const uint8_t *data, uint16_t *rdataset_count) +{ + uint16_t count; + static_assert(sizeof(count) == KR_CACHE_RR_COUNT_SIZE, + "Unexpected KR_CACHE_RR_COUNT_SIZE."); + memcpy(&count, data, sizeof(count)); + const uint8_t *rdata = data + sizeof(count); + if (rdataset_count) // memcpy is safe for unaligned case (on non-x86) + memcpy(rdataset_count, &count, sizeof(count)); + for (int i = 0; i < count; ++i) { + __typeof__(((knot_rdata_t *)NULL)->len) len; // memcpy as above + memcpy(&len, rdata + offsetof(knot_rdata_t, len), sizeof(len)); + rdata += knot_rdata_size(len); + } + return rdata - (data + sizeof(count)); +} + +/** Serialize an rdataset. It may be NULL as short-hand for empty. */ +void rdataset_dematerialize(const knot_rdataset_t *rds, uint8_t * restrict data); + + +/** Partially constructed answer when gathering RRsets from cache. */ +struct answer { + int rcode; /**< PKT_NODATA, etc. */ + struct nsec_p nsec_p; /**< Don't mix different NSEC* parameters in one answer. */ + knot_mm_t *mm; /**< Allocator for rrsets */ + struct answer_rrset { + ranked_rr_array_entry_t set; /**< set+rank for the main data */ + knot_rdataset_t sig_rds; /**< RRSIG data, if any */ + } rrsets[1+1+3]; /**< see AR_ANSWER and friends; only required records are filled */ +}; +enum { + AR_ANSWER = 0, /**< Positive answer record. It might be wildcard-expanded. */ + AR_SOA, /**< SOA record. */ + AR_NSEC, /**< NSEC* covering or matching the SNAME (next closer name in NSEC3 case). */ + AR_WILD, /**< NSEC* covering or matching the source of synthesis. */ + AR_CPE, /**< NSEC3 matching the closest provable encloser. */ +}; + +/** Materialize RRset + RRSIGs into ans->rrsets[id]. + * LATER(optim.): it's slightly wasteful that we allocate knot_rrset_t for the packet + * + * \return error code. They are all bad conditions and "guarded" by kresd's assertions. + */ +int entry2answer(struct answer *ans, int id, + const struct entry_h *eh, const uint8_t *eh_bound, + const knot_dname_t *owner, uint16_t type, uint32_t new_ttl); + + +/* Preparing knot_pkt_t for cache answer from RRs; implementation in ./knot_pkt.c */ + +/** Prepare answer packet to be filled by RRs (without RR data in wire). */ +int pkt_renew(knot_pkt_t *pkt, const knot_dname_t *name, uint16_t type); + +/** Append RRset + its RRSIGs into the current section (*shallow* copy), with given rank. + * + * \note it works with empty set as well (skipped) + * \note pkt->wire is not updated in any way + * \note KNOT_CLASS_IN is assumed + * \note Whole RRsets are put into the pseudo-packet; + * normal parsed packets would only contain single-RR sets. + */ +int pkt_append(knot_pkt_t *pkt, const struct answer_rrset *rrset, uint8_t rank); + + + +/* NSEC (1) stuff. Implementation in ./nsec1.c */ + +/** Construct a string key for for NSEC (1) predecessor-search. + * \param add_wildcard Act as if the name was extended by "*." + * \note k->zlf_len is assumed to have been correctly set */ +knot_db_val_t key_NSEC1(struct key *k, const knot_dname_t *name, bool add_wildcard); + +/** Closest encloser check for NSEC (1). + * To understand the interface, see the call point. + * \param k space to store key + input: zname and zlf_len + * \return 0: success; >0: try other (NSEC3); <0: exit cache immediately. */ +int nsec1_encloser(struct key *k, struct answer *ans, + const int sname_labels, int *clencl_labels, + knot_db_val_t *cover_low_kwz, knot_db_val_t *cover_hi_kwz, + const struct kr_query *qry, struct kr_cache *cache); + +/** Source of synthesis (SS) check for NSEC (1). + * To understand the interface, see the call point. + * \return 0: continue; <0: exit cache immediately; + * AR_SOA: skip to adding SOA (SS was covered or matched for NODATA). */ +int nsec1_src_synth(struct key *k, struct answer *ans, const knot_dname_t *clencl_name, + knot_db_val_t cover_low_kwz, knot_db_val_t cover_hi_kwz, + const struct kr_query *qry, struct kr_cache *cache); + + +/* NSEC3 stuff. Implementation in ./nsec3.c */ + +/** Construct a string key for for NSEC3 predecessor-search, from an NSEC3 name. + * \note k->zlf_len is assumed to have been correctly set */ +knot_db_val_t key_NSEC3(struct key *k, const knot_dname_t *nsec3_name, + const nsec_p_hash_t nsec_p_hash); + +/** TODO. See nsec1_encloser(...) */ +int nsec3_encloser(struct key *k, struct answer *ans, + const int sname_labels, int *clencl_labels, + const struct kr_query *qry, struct kr_cache *cache); + +/** TODO. See nsec1_src_synth(...) */ +int nsec3_src_synth(struct key *k, struct answer *ans, const knot_dname_t *clencl_name, + const struct kr_query *qry, struct kr_cache *cache); + + + +#define VERBOSE_MSG(qry, ...) kr_log_q((qry), CACHE, ## __VA_ARGS__) +#define WITH_VERBOSE(qry) if (kr_log_is_debug_qry(CACHE, (qry))) + +/** Shorthand for operations on cache backend */ +#define cache_op(cache, op, ...) (cache)->api->op((cache)->db, &(cache)->stats, ## __VA_ARGS__) + + +static inline uint16_t get_uint16(const void *address) +{ + uint16_t tmp; + memcpy(&tmp, address, sizeof(tmp)); + return tmp; +} + +/** Useful pattern, especially as void-pointer arithmetic isn't standard-compliant. */ +static inline uint8_t * knot_db_val_bound(knot_db_val_t val) +{ + return (uint8_t *)val.data + val.len; +} + |