diff options
Diffstat (limited to '')
-rw-r--r-- | lib/cache/entry_list.c | 293 |
1 files changed, 293 insertions, 0 deletions
diff --git a/lib/cache/entry_list.c b/lib/cache/entry_list.c new file mode 100644 index 0000000..6a5001c --- /dev/null +++ b/lib/cache/entry_list.c @@ -0,0 +1,293 @@ +/* Copyright (C) 2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +/** @file + * Implementation of chaining in struct entry_h. Prototypes in ./impl.h + */ + +#include "lib/cache/impl.h" +#include "lib/utils.h" + + +static int entry_h_len(knot_db_val_t val); + + +void entry_list_memcpy(struct entry_apex *ea, entry_list_t list) +{ + assert(ea); + memset(ea, 0, offsetof(struct entry_apex, data)); + ea->has_ns = list[EL_NS ].len; + ea->has_cname = list[EL_CNAME ].len; + ea->has_dname = list[EL_DNAME ].len; + for (int i = 0; i < ENTRY_APEX_NSECS_CNT; ++i) { + ea->nsecs[i] = list[i].len == 0 ? 0 : + (list[i].len == 4 ? 1 : 3); + } + uint8_t *it = ea->data; + for (int i = 0; i < EL_LENGTH; ++i) { + if (list[i].data) { + memcpy(it, list[i].data, list[i].len); + /* LATER(optim.): coalesce consecutive writes? */ + } else { + list[i].data = it; + } + it += to_even(list[i].len); + } +} + +int entry_list_parse(const knot_db_val_t val, entry_list_t list) +{ + const bool ok = val.data && val.len && list; + if (!ok) { + assert(!EINVAL); + return kr_error(EINVAL); + } + /* Parse the apex itself (nsec parameters). */ + const struct entry_apex *ea = entry_apex_consistent(val); + if (!ea) { + return kr_error(EILSEQ); + } + const uint8_t *it = ea->data, + *it_bound = knot_db_val_bound(val); + for (int i = 0; i < ENTRY_APEX_NSECS_CNT; ++i) { + if (it > it_bound) { + return kr_error(EILSEQ); + } + list[i].data = (void *)it; + switch (ea->nsecs[i]) { + case 0: + list[i].len = 0; + break; + case 1: + list[i].len = sizeof(uint32_t); /* just timestamp */ + break; + case 3: { /* timestamp + NSEC3PARAM wire */ + if (it + sizeof(uint32_t) + 4 > it_bound) { + return kr_error(EILSEQ); + } + list[i].len = sizeof(uint32_t) + + nsec_p_rdlen(it + sizeof(uint32_t)); + break; + } + default: + return kr_error(EILSEQ); + }; + it += to_even(list[i].len); + } + /* Parse every entry_h. */ + for (int i = ENTRY_APEX_NSECS_CNT; i < EL_LENGTH; ++i) { + list[i].data = (void *)it; + bool has_type; + switch (i) { + case EL_NS: has_type = ea->has_ns; break; + case EL_CNAME: has_type = ea->has_cname; break; + case EL_DNAME: has_type = ea->has_dname; break; + default: assert(false); return kr_error(EINVAL); /* something very bad */ + } + if (!has_type) { + list[i].len = 0; + continue; + } + if (it >= it_bound) { + assert(!EILSEQ); + return kr_error(EILSEQ); + } + const int len = entry_h_len( + (knot_db_val_t){ .data = (void *)it, .len = it_bound - it }); + if (len < 0) { + assert(false); + return kr_error(len); + } + list[i].len = len; + it += to_even(len); + } + assert(it == it_bound); + return kr_ok(); +} + +/** Given a valid entry header, find its length (i.e. offset of the next entry). + * \param val The beginning of the data and the bound (read only). + */ +static int entry_h_len(const knot_db_val_t val) +{ + const bool ok = val.data && ((ssize_t)val.len) > 0; + if (!ok) return kr_error(EINVAL); + const struct entry_h *eh = val.data; + const uint8_t *d = eh->data; /* iterates over the data in entry */ + const uint8_t *data_bound = knot_db_val_bound(val); + if (d >= data_bound) return kr_error(EILSEQ); + if (!eh->is_packet) { /* Positive RRset + its RRsig set (may be empty). */ + int sets = 2; + while (sets-- > 0) { + d += rdataset_dematerialized_size(d); + if (d > data_bound) { + assert(!EILSEQ); + return kr_error(EILSEQ); + } + } + } else { /* A "packet" (opaque ATM). */ + uint16_t len; + if (d + sizeof(len) > data_bound) return kr_error(EILSEQ); + memcpy(&len, d, sizeof(len)); + d += 2 + to_even(len); + } + if (d > data_bound) { + assert(!EILSEQ); + return kr_error(EILSEQ); + } + return d - (uint8_t *)val.data; +} + +struct entry_apex * entry_apex_consistent(knot_db_val_t val) +{ + //XXX: check lengths, etc. + return val.data; +} + +/* See the header file. */ +int entry_h_seek(knot_db_val_t *val, uint16_t type) +{ + int i = -1; + switch (type) { + case KNOT_RRTYPE_NS: i = EL_NS; break; + case KNOT_RRTYPE_CNAME: i = EL_CNAME; break; + case KNOT_RRTYPE_DNAME: i = EL_DNAME; break; + default: return kr_ok(); + } + + entry_list_t el; + int ret = entry_list_parse(*val, el); + if (ret) return ret; + *val = el[i]; + return val->len ? kr_ok() : kr_error(ENOENT); +} + +static int cache_write_or_clear(struct kr_cache *cache, const knot_db_val_t *key, + knot_db_val_t *val, const struct kr_query *qry) +{ + int ret = cache_op(cache, write, key, val, 1); + if (!ret) return kr_ok(); + /* Clear cache if overfull. It's nontrivial to do better with LMDB. + * LATER: some garbage-collection mechanism. */ + if (ret == kr_error(ENOSPC)) { + ret = kr_cache_clear(cache); + const char *msg = "[cache] clearing because overfull, ret = %d\n"; + if (ret) { + kr_log_error(msg, ret); + } else { + kr_log_info(msg, ret); + ret = kr_error(ENOSPC); + } + return ret; + } + VERBOSE_MSG(qry, "=> failed backend write, ret = %d\n", ret); + return kr_error(ret ? ret : ENOSPC); +} + + +/* See the header file. */ +int entry_h_splice( + knot_db_val_t *val_new_entry, uint8_t rank, + const knot_db_val_t key, const uint16_t ktype, const uint16_t type, + const knot_dname_t *owner/*log only*/, + const struct kr_query *qry, struct kr_cache *cache, uint32_t timestamp) +{ + //TODO: another review, perhaps incuding the API + const bool ok = val_new_entry && val_new_entry->len > 0; + if (!ok) { + assert(!EINVAL); + return kr_error(EINVAL); + } + + int i_type; + switch (type) { + case KNOT_RRTYPE_NS: i_type = EL_NS; break; + case KNOT_RRTYPE_CNAME: i_type = EL_CNAME; break; + case KNOT_RRTYPE_DNAME: i_type = EL_DNAME; break; + default: i_type = 0; + } + + /* Get eh_orig (original entry), and also el list if multi-entry case. */ + const struct entry_h *eh_orig = NULL; + entry_list_t el; + int ret = -1; + if (!kr_rank_test(rank, KR_RANK_SECURE) || ktype == KNOT_RRTYPE_NS) { + knot_db_val_t val; + ret = cache_op(cache, read, &key, &val, 1); + if (i_type) { + if (!ret) ret = entry_list_parse(val, el); + if (ret) memset(el, 0, sizeof(el)); + val = el[i_type]; + } + /* val is on the entry, in either case (or error) */ + if (!ret) { + eh_orig = entry_h_consistent(val, type); + } + } else { + /* We want to fully overwrite the entry, so don't even read it. */ + memset(el, 0, sizeof(el)); + } + + if (!kr_rank_test(rank, KR_RANK_SECURE) && eh_orig) { + /* If equal rank was accepted, spoofing a *single* answer would be + * enough to e.g. override NS record in AUTHORITY section. + * This way they would have to hit the first answer + * (whenever TTL nears expiration). + * Stale-serving is NOT considered, but TTL 1 would be considered + * as expiring anyway, ... */ + int32_t old_ttl = get_new_ttl(eh_orig, qry, NULL, 0, timestamp); + if (old_ttl > 0 && !is_expiring(eh_orig->ttl, old_ttl) + && rank <= eh_orig->rank) { + WITH_VERBOSE(qry) { + auto_free char *type_str = kr_rrtype_text(type), + *owner_str = kr_dname_text(owner); + VERBOSE_MSG(qry, "=> not overwriting %s %s\n", + type_str, owner_str); + } + return kr_error(EEXIST); + } + } + + if (!i_type) { + /* The non-list types are trivial now. */ + return cache_write_or_clear(cache, &key, val_new_entry, qry); + } + /* Now we're in trouble. In some cases, parts of data to be written + * is an lmdb entry that may be invalidated by our write request. + * (lmdb does even in-place updates!) Therefore we copy all into a buffer. + * (We don't bother deallocating from the mempool.) + * LATER(optim.): do this only when neccessary, or perhaps another approach. + * This is also complicated by the fact that the val_new_entry part + * is to be written *afterwards* by the caller. + */ + el[i_type] = (knot_db_val_t){ + .len = val_new_entry->len, + .data = NULL, /* perhaps unclear in the entry_h_splice() API */ + }; + knot_db_val_t val = { + .len = entry_list_serial_size(el), + .data = NULL, + }; + void *buf = mm_alloc(&qry->request->pool, val.len); + entry_list_memcpy(buf, el); + ret = cache_write_or_clear(cache, &key, &val, qry); + if (ret) return kr_error(ret); + memcpy(val.data, buf, val.len); /* we also copy the "empty" space, but well... */ + val_new_entry->data = (uint8_t *)val.data + + ((uint8_t *)el[i_type].data - (uint8_t *)buf); + return kr_ok(); +} + |