summaryrefslogtreecommitdiffstats
path: root/lib/cache/impl.h
blob: 305f36eb9ed5028f453bb0fa51b463a2ae90faef (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
/*  Copyright (C) CZ.NIC, z.s.p.o. <knot-resolver@labs.nic.cz>
 *  SPDX-License-Identifier: GPL-3.0-or-later
 */

/** @file
 * Header internal for cache implementation(s).
 * Only LMDB works for now.
 */
#pragma once

#include <stdbool.h>
#include <stdint.h>

#include <libdnssec/error.h>
#include <libdnssec/nsec.h>
#include <libknot/consts.h>
#include <libknot/db/db.h>
#include <libknot/dname.h>

#include "contrib/cleanup.h"
#include "contrib/murmurhash3/murmurhash3.h" /* hash() for nsec_p_hash() */
#include "lib/cache/cdb_api.h"
#include "lib/resolve.h"

/* Cache entry values - binary layout.
 *
 * It depends on type which is recognizable by the key.
 * Code depending on the contents of the key is marked by CACHE_KEY_DEF.
 *
 * 'E' entry (exact hit):
 *	- ktype == NS: struct entry_apex - multiple types inside (NS and xNAME);
 *	- ktype != NS: struct entry_h
 *	    * is_packet: uint16_t length, the rest is opaque and handled by ./entry_pkt.c
 *	    * otherwise RRset + its RRSIG set (possibly empty).
 * '1' or '3' entry (NSEC or NSEC3)
 *	- struct entry_h, contents is the same as for exact hit
 *	- flags don't make sense there
 */

struct entry_h {
	uint32_t time;	/**< The time of inception. */
	uint32_t ttl;	/**< TTL at inception moment.  Assuming it fits into int32_t ATM. */
	uint8_t  rank : 6;	/**< See enum kr_rank */
	bool is_packet : 1;	/**< Negative-answer packet for insecure/bogus name. */
	bool has_optout : 1;	/**< Only for packets; persisted DNSSEC_OPTOUT. */
	uint8_t _pad;		/**< We need even alignment for data now. */
	uint8_t data[];
/* Well, we don't really need packing or alignment changes,
 * but due to LMDB the whole structure may not be stored at an aligned address,
 * and we need compilers (for non-x86) to know it to avoid SIGBUS (test: UBSAN). */
} __attribute__ ((packed,aligned(1)));
struct entry_apex;

/** Check basic consistency of entry_h for 'E' entries, not looking into ->data.
 * (for is_packet the length of data is checked)
 */
KR_EXPORT
struct entry_h * entry_h_consistent_E(knot_db_val_t data, uint16_t type);

struct entry_apex * entry_apex_consistent(knot_db_val_t val);

/** Consistency check, ATM common for NSEC and NSEC3. */
static inline struct entry_h * entry_h_consistent_NSEC(knot_db_val_t data)
{
	/* ATM it's enough to just extend the checks for exact entries. */
	const struct entry_h *eh = entry_h_consistent_E(data, KNOT_RRTYPE_NSEC);
	bool ok = eh != NULL;
	ok = ok && !eh->is_packet && !eh->has_optout;
	return ok ? /*const-cast*/(struct entry_h *)eh : NULL;
}

static inline struct entry_h * entry_h_consistent(knot_db_val_t data, uint16_t type)
{
	switch (type) {
	case KNOT_RRTYPE_NSEC:
	case KNOT_RRTYPE_NSEC3:
		return entry_h_consistent_NSEC(data);
	default:
		return entry_h_consistent_E(data, type);
	}
}

/* nsec_p* - NSEC* chain parameters */

static inline int nsec_p_rdlen(const uint8_t *rdata)
{
	//TODO: do we really need the zero case?
	return rdata ? 5 + rdata[4] : 0; /* rfc5155 4.2 and 3.2. */
}
static const int NSEC_P_MAXLEN = sizeof(uint32_t) + 5 + 255; // TODO: remove??

/** Hash of NSEC3 parameters, used as a tag to separate different chains for same zone. */
typedef uint32_t nsec_p_hash_t;
static inline nsec_p_hash_t nsec_p_mkHash(const uint8_t *nsec_p)
{
	kr_require(nsec_p && !(KNOT_NSEC3_FLAG_OPT_OUT & nsec_p[1]));
	return hash((const char *)nsec_p, nsec_p_rdlen(nsec_p));
}

/** NSEC* parameters for the chain. */
struct nsec_p {
	const uint8_t *raw; /**< Pointer to raw NSEC3 parameters; NULL for NSEC. */
	nsec_p_hash_t hash; /**< Hash of `raw`, used for cache keys. */
	dnssec_nsec3_params_t libknot; /**< Format for libknot; owns malloced memory! */
};



/** LATER(optim.): this is overshot, but struct key usage should be cheap ATM. */
#define KR_CACHE_KEY_MAXLEN (KNOT_DNAME_MAXLEN + 100) /* CACHE_KEY_DEF */

struct key {
	const knot_dname_t *zname; /**< current zone name (points within qry->sname) */
	uint8_t zlf_len; /**< length of current zone's lookup format */

	/** Corresponding key type; e.g. NS for CNAME.
	 * Note: NSEC type is ambiguous (exact and range key). */
	uint16_t type;
	/** The key data start at buf+1, and buf[0] contains some length.
	 * For details see key_exact* and key_NSEC* functions. */
	uint8_t buf[KR_CACHE_KEY_MAXLEN];
	/* LATER(opt.): ^^ probably change the anchoring, so that kr_dname_lf()
	 * doesn't need to move data after knot_dname_lf(). */
};

static inline size_t key_nwz_off(const struct key *k)
{
	/* CACHE_KEY_DEF: zone name lf + 0 ('1' or '3').
	 * NSEC '1' case continues just with the name within zone. */
	return k->zlf_len + 2;
}
static inline size_t key_nsec3_hash_off(const struct key *k)
{
	/* CACHE_KEY_DEF NSEC3: tag (nsec_p_hash_t) + 20 bytes NSEC3 name hash) */
	return key_nwz_off(k) + sizeof(nsec_p_hash_t);
}
/** Hash is always SHA1; I see no plans to standardize anything else.
 * https://www.iana.org/assignments/dnssec-nsec3-parameters/dnssec-nsec3-parameters.xhtml#dnssec-nsec3-parameters-3
 */
static const int NSEC3_HASH_LEN = 20,
		 NSEC3_HASH_TXT_LEN = 32;

/** Finish constructing string key for for exact search.
 * It's assumed that kr_dname_lf(k->buf, owner, *) had been ran.
 */
knot_db_val_t key_exact_type_maypkt(struct key *k, uint16_t type);

/** Like key_exact_type_maypkt but with extra checks if used for RRs only. */
static inline knot_db_val_t key_exact_type(struct key *k, uint16_t type)
{
	switch (type) {
	/* Sanity check: forbidden types represented in other way(s). */
	case KNOT_RRTYPE_NSEC:
	case KNOT_RRTYPE_NSEC3:
		kr_assert(false);
		return (knot_db_val_t){ NULL, 0 };
	}
	return key_exact_type_maypkt(k, type);
}


/* entry_h chaining; implementation in ./entry_list.c */

enum { ENTRY_APEX_NSECS_CNT = 2 };

/** Header of 'E' entry with ktype == NS.  Inside is private to ./entry_list.c
 *
 * We store xNAME at NS type to lower the number of searches in closest_NS().
 * CNAME is only considered for equal name, of course.
 * We also store NSEC* parameters at NS type.
 */
struct entry_apex {
	/* ENTRY_H_FLAGS */
	bool has_ns : 1;
	bool has_cname : 1;
	bool has_dname : 1;

	uint8_t pad_; /**< 1 byte + 2 bytes + x bytes would be weird; let's do 2+2+x. */

	/** We have two slots for NSEC* parameters.
	 *
	 * This array describes how they're filled;
	 * values:  0: none, 1: NSEC, 3: NSEC3.
	 *
	 * Two slots are a compromise to smoothly handle normal rollovers
	 * (either changing NSEC3 parameters or between NSEC and NSEC3). */
	int8_t nsecs[ENTRY_APEX_NSECS_CNT];
	uint8_t data[];
	/* XXX: if not first, stamp of last being the first?
	 * Purpose: save cache operations if rolled the algo/params long ago. */
};

/** Indices for decompressed entry_list_t. */
enum EL {
	EL_NS = ENTRY_APEX_NSECS_CNT,
	EL_CNAME,
	EL_DNAME,
	EL_LENGTH
};
/** Decompressed entry_apex.  It's an array of unparsed entry_h references.
 * Note: arrays are passed "by reference" to functions (in C99). */
typedef knot_db_val_t entry_list_t[EL_LENGTH];

static inline uint16_t EL2RRTYPE(enum EL i)
{
	switch (i) {
	case EL_NS:	return KNOT_RRTYPE_NS;
	case EL_CNAME:	return KNOT_RRTYPE_CNAME;
	case EL_DNAME:	return KNOT_RRTYPE_DNAME;
	default:	kr_assert(false);  return 0;
	}
}

/** There may be multiple entries within, so rewind `val` to the one we want.
 *
 * ATM there are multiple types only for the NS ktype - it also accommodates xNAMEs.
 * \note `val->len` represents the bound of the whole list, not of a single entry.
 * \note in case of ENOENT, `val` is still rewound to the beginning of the next entry.
 * \return error code
 * TODO: maybe get rid of this API?
 */
int entry_h_seek(knot_db_val_t *val, uint16_t type);

/** Prepare space to insert an entry.
 *
 * Some checks are performed (rank, TTL), the current entry in cache is copied
 * with a hole ready for the new entry (old one of the same type is cut out).
 *
 * \param val_new_entry The only changing parameter; ->len is read, ->data written.
 * \return error code
 */
int entry_h_splice(
	knot_db_val_t *val_new_entry, uint8_t rank,
	const knot_db_val_t key, const uint16_t ktype, const uint16_t type,
	const knot_dname_t *owner/*log only*/,
	const struct kr_query *qry, struct kr_cache *cache, uint32_t timestamp);

/** Parse an entry_apex into individual items.  @return error code. */
KR_EXPORT int entry_list_parse(const knot_db_val_t val, entry_list_t list);

static inline size_t to_even(size_t n)
{
	return n + (n & 1);
}

static inline int entry_list_serial_size(const entry_list_t list)
{
	int size = offsetof(struct entry_apex, data);
	for (int i = 0; i < EL_LENGTH; ++i) {
		size += to_even(list[i].len);
	}
	return size;
}

/** Fill contents of an entry_apex.
 *
 * @note NULL pointers are overwritten - caller may like to fill the space later.
 */
void entry_list_memcpy(struct entry_apex *ea, entry_list_t list);



/* Packet caching; implementation in ./entry_pkt.c */

/** Stash the packet into cache (if suitable, etc.)
 * \param needs_pkt we need the packet due to not stashing some RRs;
 * 		see stash_rrset() for details
 * It assumes check_dname_for_lf(). */
void stash_pkt(const knot_pkt_t *pkt, const struct kr_query *qry,
		const struct kr_request *req, bool needs_pkt);

/** Try answering from packet cache, given an entry_h.
 *
 * This assumes the TTL is OK and entry_h_consistent, but it may still return error.
 * On success it handles all the rest, incl. qry->flags.
 */
int answer_from_pkt(kr_layer_t *ctx, knot_pkt_t *pkt, uint16_t type,
		const struct entry_h *eh, const void *eh_bound, uint32_t new_ttl);


/** Record is expiring if it has less than 1% TTL (or less than 5s) */
static inline bool is_expiring(uint32_t orig_ttl, uint32_t new_ttl)
{
	int64_t nttl = new_ttl; /* avoid potential over/under-flow */
	return 100 * (nttl - 5) < orig_ttl;
}

/** Returns signed result so you can inspect how much stale the RR is.
 *
 * @param owner name for stale-serving decisions.  You may pass NULL to disable stale.
 * @note: NSEC* uses zone name ATM; for NSEC3 the owner may not even be knowable.
 * @param type for stale-serving.
 */
int32_t get_new_ttl(const struct entry_h *entry, const struct kr_query *qry,
                    const knot_dname_t *owner, uint16_t type, uint32_t now);


/* RRset (de)materialization; implementation in ./entry_rr.c */

/** Size of the RR count field */
#define KR_CACHE_RR_COUNT_SIZE sizeof(uint16_t)

/** Compute size of serialized rdataset.  NULL is accepted as empty set. */
static inline int rdataset_dematerialize_size(const knot_rdataset_t *rds)
{
	return KR_CACHE_RR_COUNT_SIZE + (rds == NULL ? 0 : rds->size);
}

/** Analyze the length of a dematerialized rdataset.
 * Note that in the data it's KR_CACHE_RR_COUNT_SIZE and then this returned size. */
static inline int rdataset_dematerialized_size(const uint8_t *data, uint16_t *rdataset_count)
{
	uint16_t count;
	static_assert(sizeof(count) == KR_CACHE_RR_COUNT_SIZE,
			"Unexpected KR_CACHE_RR_COUNT_SIZE.");
	memcpy(&count, data, sizeof(count));
	const uint8_t *rdata = data + sizeof(count);
	if (rdataset_count) // memcpy is safe for unaligned case (on non-x86)
		memcpy(rdataset_count, &count, sizeof(count));
	for (int i = 0; i < count; ++i) {
		__typeof__(((knot_rdata_t *)NULL)->len) len; // memcpy as above
		memcpy(&len, rdata + offsetof(knot_rdata_t, len), sizeof(len));
		rdata += knot_rdata_size(len);
	}
	return rdata - (data + sizeof(count));
}

/** Serialize an rdataset.  It may be NULL as short-hand for empty. */
void rdataset_dematerialize(const knot_rdataset_t *rds, uint8_t * restrict data);


/** Partially constructed answer when gathering RRsets from cache. */
struct answer {
	int rcode;		/**< PKT_NODATA, etc. */
	struct nsec_p nsec_p;	/**< Don't mix different NSEC* parameters in one answer. */
	knot_mm_t *mm;		/**< Allocator for rrsets */
	struct answer_rrset {
		ranked_rr_array_entry_t set;	/**< set+rank for the main data */
		knot_rdataset_t sig_rds;	/**< RRSIG data, if any */
	} rrsets[1+1+3]; /**< see AR_ANSWER and friends; only required records are filled */
};
enum {
	AR_ANSWER = 0, /**< Positive answer record.  It might be wildcard-expanded. */
	AR_SOA,  /**< SOA record. */
	AR_NSEC, /**< NSEC* covering or matching the SNAME (next closer name in NSEC3 case). */
	AR_WILD, /**< NSEC* covering or matching the source of synthesis. */
	AR_CPE,  /**< NSEC3 matching the closest provable encloser. */
};

/** Materialize RRset + RRSIGs into ans->rrsets[id].
 * LATER(optim.): it's slightly wasteful that we allocate knot_rrset_t for the packet
 *
 * \return error code.  They are all bad conditions and "guarded" by kresd's assertions.
 */
int entry2answer(struct answer *ans, int id,
		const struct entry_h *eh, const uint8_t *eh_bound,
		const knot_dname_t *owner, uint16_t type, uint32_t new_ttl);


/* Preparing knot_pkt_t for cache answer from RRs; implementation in ./knot_pkt.c */

/** Prepare answer packet to be filled by RRs (without RR data in wire). */
int pkt_renew(knot_pkt_t *pkt, const knot_dname_t *name, uint16_t type);

/** Append RRset + its RRSIGs into the current section (*shallow* copy), with given rank.
 *
 * \note it works with empty set as well (skipped)
 * \note pkt->wire is not updated in any way
 * \note KNOT_CLASS_IN is assumed
 * \note Whole RRsets are put into the pseudo-packet;
 *       normal parsed packets would only contain single-RR sets.
 */
int pkt_append(knot_pkt_t *pkt, const struct answer_rrset *rrset, uint8_t rank);



/* NSEC (1) stuff.  Implementation in ./nsec1.c */

/** Construct a string key for for NSEC (1) predecessor-search.
 * \param add_wildcard Act as if the name was extended by "*."
 * \note k->zlf_len is assumed to have been correctly set */
knot_db_val_t key_NSEC1(struct key *k, const knot_dname_t *name, bool add_wildcard);

/** Closest encloser check for NSEC (1).
 * To understand the interface, see the call point.
 * \param k	space to store key + input: zname and zlf_len
 * \return 0: success;  >0: try other (NSEC3);  <0: exit cache immediately. */
int nsec1_encloser(struct key *k, struct answer *ans,
		   const int sname_labels, int *clencl_labels,
		   knot_db_val_t *cover_low_kwz, knot_db_val_t *cover_hi_kwz,
		   const struct kr_query *qry, struct kr_cache *cache);

/** Source of synthesis (SS) check for NSEC (1).
 * To understand the interface, see the call point.
 * \return 0: continue; <0: exit cache immediately;
 * 	AR_SOA: skip to adding SOA (SS was covered or matched for NODATA). */
int nsec1_src_synth(struct key *k, struct answer *ans, const knot_dname_t *clencl_name,
		    knot_db_val_t cover_low_kwz, knot_db_val_t cover_hi_kwz,
		    const struct kr_query *qry, struct kr_cache *cache);


/* NSEC3 stuff.  Implementation in ./nsec3.c */

/** Construct a string key for for NSEC3 predecessor-search, from an NSEC3 name.
 * \note k->zlf_len is assumed to have been correctly set */
knot_db_val_t key_NSEC3(struct key *k, const knot_dname_t *nsec3_name,
			const nsec_p_hash_t nsec_p_hash);

/** TODO.  See nsec1_encloser(...) */
int nsec3_encloser(struct key *k, struct answer *ans,
		   const int sname_labels, int *clencl_labels,
		   const struct kr_query *qry, struct kr_cache *cache);

/** TODO.  See nsec1_src_synth(...) */
int nsec3_src_synth(struct key *k, struct answer *ans, const knot_dname_t *clencl_name,
		    const struct kr_query *qry, struct kr_cache *cache);



#define VERBOSE_MSG(qry, ...) kr_log_q((qry), CACHE,  ## __VA_ARGS__)
#define WITH_VERBOSE(qry) if (kr_log_is_debug_qry(CACHE, (qry)))

/** Shorthand for operations on cache backend */
#define cache_op(cache, op, ...) (cache)->api->op((cache)->db, &(cache)->stats, ## __VA_ARGS__)


static inline uint16_t get_uint16(const void *address)
{
	uint16_t tmp;
	memcpy(&tmp, address, sizeof(tmp));
	return tmp;
}

/** Useful pattern, especially as void-pointer arithmetic isn't standard-compliant. */
static inline uint8_t * knot_db_val_bound(knot_db_val_t val)
{
	return (uint8_t *)val.data + val.len;
}