/* Copyright (C) 2014-2017 CZ.NIC, z.s.p.o. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include #include #include #include #include #include #include #include #include #include "lib/resolve.h" #include "lib/layer.h" #include "lib/rplan.h" #include "lib/layer/iterate.h" #include "lib/dnssec/ta.h" #include "lib/dnssec.h" #if defined(ENABLE_COOKIES) #include "lib/cookies/control.h" #include "lib/cookies/helper.h" #include "lib/cookies/nonce.h" #else /* Define compatibility macros */ #define KNOT_EDNS_OPTION_COOKIE 10 #endif /* defined(ENABLE_COOKIES) */ #define VERBOSE_MSG(qry, ...) QRVERBOSE((qry), "resl", __VA_ARGS__) bool kr_rank_check(uint8_t rank) { switch (rank & ~KR_RANK_AUTH) { case KR_RANK_INITIAL: case KR_RANK_OMIT: case KR_RANK_TRY: case KR_RANK_INDET: case KR_RANK_BOGUS: case KR_RANK_MISMATCH: case KR_RANK_MISSING: case KR_RANK_INSECURE: case KR_RANK_SECURE: return true; default: return false; } } /** @internal Set @a yielded to all RRs with matching @a qry_uid. */ static void set_yield(ranked_rr_array_t *array, const uint32_t qry_uid, const bool yielded) { for (unsigned i = 0; i < array->len; ++i) { ranked_rr_array_entry_t *entry = array->at[i]; if (entry->qry_uid == qry_uid) { entry->yielded = yielded; } } } /** * @internal Defer execution of current query. * The current layer state and input will be pushed to a stack and resumed on next iteration. */ static int consume_yield(kr_layer_t *ctx, knot_pkt_t *pkt) { struct kr_request *req = ctx->req; size_t pkt_size = pkt->size; if (knot_pkt_has_tsig(pkt)) { pkt_size += pkt->tsig_wire.len; } knot_pkt_t *pkt_copy = knot_pkt_new(NULL, pkt_size, &req->pool); struct kr_layer_pickle *pickle = mm_alloc(&req->pool, sizeof(*pickle)); if (pickle && pkt_copy && knot_pkt_copy(pkt_copy, pkt) == 0) { struct kr_query *qry = req->current_query; pickle->api = ctx->api; pickle->state = ctx->state; pickle->pkt = pkt_copy; pickle->next = qry->deferred; qry->deferred = pickle; set_yield(&req->answ_selected, qry->uid, true); set_yield(&req->auth_selected, qry->uid, true); return kr_ok(); } return kr_error(ENOMEM); } static int begin_yield(kr_layer_t *ctx) { return kr_ok(); } static int reset_yield(kr_layer_t *ctx) { return kr_ok(); } static int finish_yield(kr_layer_t *ctx) { return kr_ok(); } static int produce_yield(kr_layer_t *ctx, knot_pkt_t *pkt) { return kr_ok(); } static int checkout_yield(kr_layer_t *ctx, knot_pkt_t *packet, struct sockaddr *dst, int type) { return kr_ok(); } static int answer_finalize_yield(kr_layer_t *ctx) { return kr_ok(); } /** @internal Macro for iterating module layers. */ #define RESUME_LAYERS(from, r, qry, func, ...) \ (r)->current_query = (qry); \ for (size_t i = (from); i < (r)->ctx->modules->len; ++i) { \ struct kr_module *mod = (r)->ctx->modules->at[i]; \ if (mod->layer) { \ struct kr_layer layer = {.state = (r)->state, .api = mod->layer(mod), .req = (r)}; \ if (layer.api && layer.api->func) { \ (r)->state = layer.api->func(&layer, ##__VA_ARGS__); \ if ((r)->state == KR_STATE_YIELD) { \ func ## _yield(&layer, ##__VA_ARGS__); \ break; \ } \ } \ } \ } /* Invalidate current query. */ \ (r)->current_query = NULL /** @internal Macro for starting module iteration. */ #define ITERATE_LAYERS(req, qry, func, ...) RESUME_LAYERS(0, req, qry, func, ##__VA_ARGS__) /** @internal Find layer id matching API. */ static inline size_t layer_id(struct kr_request *req, const struct kr_layer_api *api) { module_array_t *modules = req->ctx->modules; for (size_t i = 0; i < modules->len; ++i) { struct kr_module *mod = modules->at[i]; if (mod->layer && mod->layer(mod) == api) { return i; } } return 0; /* Not found, try all. */ } /* @internal We don't need to deal with locale here */ KR_CONST static inline bool isletter(unsigned chr) { return (chr | 0x20 /* tolower */) - 'a' <= 'z' - 'a'; } /* Randomize QNAME letter case. * This adds 32 bits of randomness at maximum, but that's more than an average domain name length. * https://tools.ietf.org/html/draft-vixie-dnsext-dns0x20-00 */ static void randomized_qname_case(knot_dname_t * restrict qname, uint32_t secret) { if (secret == 0) { return; } assert(qname); const int len = knot_dname_size(qname) - 2; /* Skip first, last label. */ for (int i = 0; i < len; ++i) { if (isletter(*++qname)) { *qname ^= ((secret >> (i & 31)) & 1) * 0x20; } } } /** Invalidate current NS/addr pair. */ static int invalidate_ns(struct kr_rplan *rplan, struct kr_query *qry) { if (qry->ns.addr[0].ip.sa_family != AF_UNSPEC) { const char *addr = kr_inaddr(&qry->ns.addr[0].ip); int addr_len = kr_inaddr_len(&qry->ns.addr[0].ip); return kr_zonecut_del(&qry->zone_cut, qry->ns.name, addr, addr_len); } else { return kr_zonecut_del_all(&qry->zone_cut, qry->ns.name); } } /** This turns of QNAME minimisation if there is a non-terminal between current zone cut, and name target. * It save several minimization steps, as the zone cut is likely final one. */ static void check_empty_nonterms(struct kr_query *qry, knot_pkt_t *pkt, struct kr_cache *cache, uint32_t timestamp) { // FIXME cleanup, etc. #if 0 if (qry->flags.NO_MINIMIZE) { return; } const knot_dname_t *target = qry->sname; const knot_dname_t *cut_name = qry->zone_cut.name; if (!target || !cut_name) return; struct kr_cache_entry *entry = NULL; /* @note: The non-terminal must be direct child of zone cut (e.g. label distance <= 2), * otherwise this would risk leaking information to parent if the NODATA TTD > zone cut TTD. */ int labels = knot_dname_labels(target, NULL) - knot_dname_labels(cut_name, NULL); while (target[0] && labels > 2) { target = knot_wire_next_label(target, NULL); --labels; } for (int i = 0; i < labels; ++i) { int ret = kr_cache_peek(cache, KR_CACHE_PKT, target, KNOT_RRTYPE_NS, &entry, ×tamp); if (ret == 0) { /* Either NXDOMAIN or NODATA, start here. */ /* @todo We could stop resolution here for NXDOMAIN, but we can't because of broken CDNs */ qry->flags.NO_MINIMIZE = true; kr_make_query(qry, pkt); break; } assert(target[0]); target = knot_wire_next_label(target, NULL); } kr_cache_sync(cache); #endif } static int ns_fetch_cut(struct kr_query *qry, const knot_dname_t *requested_name, struct kr_request *req, knot_pkt_t *pkt) { /* It can occur that here parent query already have * provably insecured zonecut which not in the cache yet. */ struct kr_qflags pflags; if (qry->parent) { pflags = qry->parent->flags; } const bool is_insecured = qry->parent != NULL && !(pflags.AWAIT_IPV4 || pflags.AWAIT_IPV6) && (pflags.DNSSEC_INSECURE || pflags.DNSSEC_NODS); /* Want DNSSEC if it's possible to secure this name * (e.g. is covered by any TA) */ if (is_insecured) { /* If parent is unsecured we don't want DNSSEC * even if cut name is covered by TA. */ qry->flags.DNSSEC_WANT = false; qry->flags.DNSSEC_INSECURE = true; VERBOSE_MSG(qry, "=> going insecure because parent query is insecure\n"); } else if (kr_ta_covers_qry(req->ctx, qry->zone_cut.name, KNOT_RRTYPE_NS)) { qry->flags.DNSSEC_WANT = true; } else { qry->flags.DNSSEC_WANT = false; VERBOSE_MSG(qry, "=> going insecure because there's no covering TA\n"); } struct kr_zonecut cut_found; kr_zonecut_init(&cut_found, requested_name, req->rplan.pool); /* Cut that has been found can differs from cut that has been requested. * So if not already insecured, * try to fetch ta & keys even if initial cut name not covered by TA */ bool secured = !is_insecured; int ret = kr_zonecut_find_cached(req->ctx, &cut_found, requested_name, qry, &secured); if (ret == kr_error(ENOENT)) { /* No cached cut found, start from SBELT * and issue priming query. */ kr_zonecut_deinit(&cut_found); ret = kr_zonecut_set_sbelt(req->ctx, &qry->zone_cut); if (ret != 0) { return KR_STATE_FAIL; } VERBOSE_MSG(qry, "=> using root hints\n"); qry->flags.AWAIT_CUT = false; return KR_STATE_DONE; } else if (ret != kr_ok()) { kr_zonecut_deinit(&cut_found); return KR_STATE_FAIL; } /* Find out security status. * Go insecure if the zone cut is provably insecure */ if ((qry->flags.DNSSEC_WANT) && !secured) { VERBOSE_MSG(qry, "=> NS is provably without DS, going insecure\n"); qry->flags.DNSSEC_WANT = false; qry->flags.DNSSEC_INSECURE = true; } /* Zonecut name can change, check it again * to prevent unnecessary DS & DNSKEY queries */ if (!(qry->flags.DNSSEC_INSECURE) && kr_ta_covers_qry(req->ctx, cut_found.name, KNOT_RRTYPE_NS)) { qry->flags.DNSSEC_WANT = true; } else { qry->flags.DNSSEC_WANT = false; } /* Check if any DNSKEY found for cached cut */ if (qry->flags.DNSSEC_WANT && cut_found.key == NULL && kr_zonecut_is_empty(&cut_found)) { /* Cut found and there are no proofs of zone insecurity. * But no DNSKEY found and no glue fetched. * We have got circular dependency - must fetch A\AAAA * from authoritative, but we have no key to verify it. */ kr_zonecut_deinit(&cut_found); if (requested_name[0] != '\0' ) { /* If not root - try next label */ return KR_STATE_CONSUME; } /* No cached cut & keys found, start from SBELT */ ret = kr_zonecut_set_sbelt(req->ctx, &qry->zone_cut); if (ret != 0) { return KR_STATE_FAIL; } VERBOSE_MSG(qry, "=> using root hints\n"); qry->flags.AWAIT_CUT = false; return KR_STATE_DONE; } /* Use the found zone cut. */ kr_zonecut_move(&qry->zone_cut, &cut_found); /* Check if there's a non-terminal between target and current cut. */ struct kr_cache *cache = &req->ctx->cache; check_empty_nonterms(qry, pkt, cache, qry->timestamp.tv_sec); /* Cut found */ return KR_STATE_PRODUCE; } static int ns_resolve_addr(struct kr_query *qry, struct kr_request *param) { struct kr_rplan *rplan = ¶m->rplan; struct kr_context *ctx = param->ctx; /* Start NS queries from root, to avoid certain cases * where a NS drops out of cache and the rest is unavailable, * this would lead to dependency loop in current zone cut. * Prefer IPv6 and continue with IPv4 if not available. */ uint16_t next_type = 0; if (!(qry->flags.AWAIT_IPV6) && !(ctx->options.NO_IPV6)) { next_type = KNOT_RRTYPE_AAAA; qry->flags.AWAIT_IPV6 = true; } else if (!(qry->flags.AWAIT_IPV4) && !(ctx->options.NO_IPV4)) { next_type = KNOT_RRTYPE_A; qry->flags.AWAIT_IPV4 = true; /* Hmm, no useable IPv6 then. */ qry->ns.reputation |= KR_NS_NOIP6; kr_nsrep_update_rep(&qry->ns, qry->ns.reputation, ctx->cache_rep); } /* Bail out if the query is already pending or dependency loop. */ if (!next_type || kr_rplan_satisfies(qry->parent, qry->ns.name, KNOT_CLASS_IN, next_type)) { /* Fall back to SBELT if root server query fails. */ if (!next_type && qry->zone_cut.name[0] == '\0') { VERBOSE_MSG(qry, "=> fallback to root hints\n"); kr_zonecut_set_sbelt(ctx, &qry->zone_cut); qry->flags.NO_THROTTLE = true; /* Pick even bad SBELT servers */ return kr_error(EAGAIN); } /* No IPv4 nor IPv6, flag server as unusable. */ VERBOSE_MSG(qry, "=> unresolvable NS address, bailing out\n"); qry->ns.reputation |= KR_NS_NOIP4 | KR_NS_NOIP6; kr_nsrep_update_rep(&qry->ns, qry->ns.reputation, ctx->cache_rep); invalidate_ns(rplan, qry); return kr_error(EHOSTUNREACH); } /* Push new query to the resolution plan */ struct kr_query *next = kr_rplan_push(rplan, qry, qry->ns.name, KNOT_CLASS_IN, next_type); if (!next) { return kr_error(ENOMEM); } next->flags.NONAUTH = true; /* At the root level with no NS addresses, add SBELT subrequest. */ int ret = 0; if (qry->zone_cut.name[0] == '\0') { ret = kr_zonecut_set_sbelt(ctx, &next->zone_cut); if (ret == 0) { /* Copy TA and key since it's the same cut to avoid lookup. */ kr_zonecut_copy_trust(&next->zone_cut, &qry->zone_cut); kr_zonecut_set_sbelt(ctx, &qry->zone_cut); /* Add SBELT to parent in case query fails. */ qry->flags.NO_THROTTLE = true; /* Pick even bad SBELT servers */ } } else { next->flags.AWAIT_CUT = true; } return ret; } static int edns_put(knot_pkt_t *pkt, bool reclaim) { if (!pkt->opt_rr) { return kr_ok(); } if (reclaim) { /* Reclaim reserved size. */ int ret = knot_pkt_reclaim(pkt, knot_edns_wire_size(pkt->opt_rr)); if (ret != 0) { return ret; } } /* Write to packet. */ assert(pkt->current == KNOT_ADDITIONAL); return knot_pkt_put(pkt, KNOT_COMPR_HINT_NONE, pkt->opt_rr, KNOT_PF_FREE); } /** Removes last EDNS OPT RR written to the packet. */ static int edns_erase_and_reserve(knot_pkt_t *pkt) { /* Nothing to be done. */ if (!pkt || !pkt->opt_rr) { return 0; } /* Fail if the data are located elsewhere than at the end of packet. */ if (pkt->current != KNOT_ADDITIONAL || pkt->opt_rr != &pkt->rr[pkt->rrset_count - 1]) { return -1; } size_t len = knot_rrset_size(pkt->opt_rr); int16_t rr_removed = pkt->opt_rr->rrs.count; /* Decrease rrset counters. */ pkt->rrset_count -= 1; pkt->sections[pkt->current].count -= 1; pkt->size -= len; knot_wire_add_arcount(pkt->wire, -rr_removed); /* ADDITIONAL */ pkt->opt_rr = NULL; /* Reserve the freed space. */ return knot_pkt_reserve(pkt, len); } static int edns_create(knot_pkt_t *pkt, knot_pkt_t *template, struct kr_request *req) { pkt->opt_rr = knot_rrset_copy(req->ctx->opt_rr, &pkt->mm); size_t wire_size = knot_edns_wire_size(pkt->opt_rr); #if defined(ENABLE_COOKIES) if (req->ctx->cookie_ctx.clnt.enabled || req->ctx->cookie_ctx.srvr.enabled) { wire_size += KR_COOKIE_OPT_MAX_LEN; } #endif /* defined(ENABLE_COOKIES) */ if (req->qsource.flags.tls) { if (req->ctx->tls_padding == -1) /* FIXME: we do not know how to reserve space for the * default padding policy, since we can't predict what * it will select. So i'm just guessing :/ */ wire_size += KNOT_EDNS_OPTION_HDRLEN + 512; if (req->ctx->tls_padding >= 2) wire_size += KNOT_EDNS_OPTION_HDRLEN + req->ctx->tls_padding; } return knot_pkt_reserve(pkt, wire_size); } static int answer_prepare(struct kr_request *req, knot_pkt_t *query) { knot_pkt_t *answer = req->answer; if (knot_pkt_init_response(answer, query) != 0) { return kr_error(ENOMEM); /* Failed to initialize answer */ } /* Handle EDNS in the query */ if (knot_pkt_has_edns(query)) { answer->opt_rr = knot_rrset_copy(req->ctx->opt_rr, &answer->mm); if (answer->opt_rr == NULL){ return kr_error(ENOMEM); } /* Set DO bit if set (DNSSEC requested). */ if (knot_pkt_has_dnssec(query)) { knot_edns_set_do(answer->opt_rr); } } return kr_ok(); } /** @return error code, ignoring if forced to truncate the packet. */ static int write_extra_records(const rr_array_t *arr, uint16_t reorder, knot_pkt_t *answer) { for (size_t i = 0; i < arr->len; ++i) { int err = knot_pkt_put_rotate(answer, 0, arr->at[i], reorder, 0); if (err != KNOT_EOK) { return err == KNOT_ESPACE ? kr_ok() : kr_error(err); } } return kr_ok(); } /** * @param all_secure optionally &&-combine security of written RRs into its value. * (i.e. if you pass a pointer to false, it will always remain) * @param all_cname optionally output if all written RRs are CNAMEs and RRSIGs of CNAMEs * @return error code, ignoring if forced to truncate the packet. */ static int write_extra_ranked_records(const ranked_rr_array_t *arr, uint16_t reorder, knot_pkt_t *answer, bool *all_secure, bool *all_cname) { const bool has_dnssec = knot_pkt_has_dnssec(answer); bool all_sec = true; bool all_cn = (all_cname != NULL); /* optim.: init as false if not needed */ int err = kr_ok(); for (size_t i = 0; i < arr->len; ++i) { ranked_rr_array_entry_t * entry = arr->at[i]; if (!entry->to_wire) { continue; } knot_rrset_t *rr = entry->rr; if (!has_dnssec) { if (rr->type != knot_pkt_qtype(answer) && knot_rrtype_is_dnssec(rr->type)) { continue; } } err = knot_pkt_put_rotate(answer, 0, rr, reorder, 0); if (err != KNOT_EOK) { if (err == KNOT_ESPACE) { err = kr_ok(); } break; } if (rr->type != KNOT_RRTYPE_RRSIG) { all_sec = all_sec && kr_rank_test(entry->rank, KR_RANK_SECURE); } all_cn = all_cn && kr_rrset_type_maysig(entry->rr) == KNOT_RRTYPE_CNAME; } if (all_secure) { *all_secure = *all_secure && all_sec; } if (all_cname) { *all_cname = all_cn; } return err; } /** @internal Add an EDNS padding RR into the answer if requested and required. */ static int answer_padding(struct kr_request *request) { if (!request || !request->answer || !request->ctx) { assert(false); return kr_error(EINVAL); } int32_t padding = request->ctx->tls_padding; knot_pkt_t *answer = request->answer; knot_rrset_t *opt_rr = answer->opt_rr; int32_t pad_bytes = -1; if (padding == -1) { /* use the default padding policy from libknot */ pad_bytes = knot_pkt_default_padding_size(answer, opt_rr); } if (padding >= 2) { int32_t max_pad_bytes = knot_edns_get_payload(opt_rr) - (answer->size + knot_rrset_size(opt_rr)); pad_bytes = MIN(knot_edns_alignment_size(answer->size, knot_rrset_size(opt_rr), padding), max_pad_bytes); } if (pad_bytes >= 0) { uint8_t zeros[MAX(1, pad_bytes)]; memset(zeros, 0, sizeof(zeros)); int r = knot_edns_add_option(opt_rr, KNOT_EDNS_OPTION_PADDING, pad_bytes, zeros, &answer->mm); if (r != KNOT_EOK) { knot_rrset_clear(opt_rr, &answer->mm); return kr_error(r); } } return kr_ok(); } static int answer_fail(struct kr_request *request) { knot_pkt_t *answer = request->answer; int ret = kr_pkt_clear_payload(answer); knot_wire_clear_ad(answer->wire); knot_wire_clear_aa(answer->wire); knot_wire_set_rcode(answer->wire, KNOT_RCODE_SERVFAIL); if (ret == 0 && answer->opt_rr) { /* OPT in SERVFAIL response is still useful for cookies/additional info. */ knot_pkt_begin(answer, KNOT_ADDITIONAL); answer_padding(request); /* Ignore failed padding in SERVFAIL answer. */ ret = edns_put(answer, false); } return ret; } static int answer_finalize(struct kr_request *request, int state) { struct kr_rplan *rplan = &request->rplan; knot_pkt_t *answer = request->answer; /* Always set SERVFAIL for bogus answers. */ if (state == KR_STATE_FAIL && rplan->pending.len > 0) { struct kr_query *last = array_tail(rplan->pending); if ((last->flags.DNSSEC_WANT) && (last->flags.DNSSEC_BOGUS)) { return answer_fail(request); } } struct kr_query *last = rplan->resolved.len > 0 ? array_tail(rplan->resolved) : NULL; /* TODO ^^^^ this is slightly fragile */ /* AD flag. We can only change `secure` from true to false. * Be conservative. Primary approach: check ranks of all RRs in wire. * Only "negative answers" need special handling. */ bool secure = last != NULL && state == KR_STATE_DONE /*< suspicious otherwise */ && knot_pkt_qtype(answer) != KNOT_RRTYPE_RRSIG; if (last && (last->flags.STUB)) { secure = false; /* don't trust forwarding for now */ } if (last && (last->flags.DNSSEC_OPTOUT)) { VERBOSE_MSG(NULL, "AD: opt-out\n"); secure = false; /* the last answer is insecure due to opt-out */ } const uint16_t reorder = last ? last->reorder : 0; bool answ_all_cnames = false/*arbitrary*/; if (request->answ_selected.len > 0) { assert(answer->current <= KNOT_ANSWER); /* Write answer records. */ if (answer->current < KNOT_ANSWER) { knot_pkt_begin(answer, KNOT_ANSWER); } if (write_extra_ranked_records(&request->answ_selected, reorder, answer, &secure, &answ_all_cnames)) { return answer_fail(request); } } /* Write authority records. */ if (answer->current < KNOT_AUTHORITY) { knot_pkt_begin(answer, KNOT_AUTHORITY); } if (write_extra_ranked_records(&request->auth_selected, reorder, answer, &secure, NULL)) { return answer_fail(request); } /* Write additional records. */ knot_pkt_begin(answer, KNOT_ADDITIONAL); if (write_extra_records(&request->additional, reorder, answer)) { return answer_fail(request); } /* Write EDNS information */ if (answer->opt_rr) { if (request->qsource.flags.tls) { if (answer_padding(request) != kr_ok()) { return answer_fail(request); } } knot_pkt_begin(answer, KNOT_ADDITIONAL); int ret = knot_pkt_put(answer, KNOT_COMPR_HINT_NONE, answer->opt_rr, KNOT_PF_FREE); if (ret != KNOT_EOK) { return answer_fail(request); } } if (!last) secure = false; /*< should be no-op, mostly documentation */ /* AD: "negative answers" need more handling. */ if (kr_response_classify(answer) != PKT_NOERROR /* Additionally check for CNAME chains that "end in NODATA", * as those would also be PKT_NOERROR. */ || (answ_all_cnames && knot_pkt_qtype(answer) != KNOT_RRTYPE_CNAME)) { secure = secure && last->flags.DNSSEC_WANT && !last->flags.DNSSEC_BOGUS && !last->flags.DNSSEC_INSECURE; } if (secure) { struct kr_query *cname_parent = last->cname_parent; while (cname_parent != NULL) { if (cname_parent->flags.DNSSEC_OPTOUT) { secure = false; break; } cname_parent = cname_parent->cname_parent; } } /* No detailed analysis ATM, just _SECURE or not. * LATER: request->rank might better be computed in validator's finish phase. */ VERBOSE_MSG(last, "AD: request%s classified as SECURE\n", secure ? "" : " NOT"); request->rank = secure ? KR_RANK_SECURE : KR_RANK_INITIAL; /* Clear AD if not secure. ATM answer has AD=1 if requested secured answer. */ if (!secure) { knot_wire_clear_ad(answer->wire); } return kr_ok(); } static int query_finalize(struct kr_request *request, struct kr_query *qry, knot_pkt_t *pkt) { int ret = 0; knot_pkt_begin(pkt, KNOT_ADDITIONAL); if (!(qry->flags.SAFEMODE)) { /* Remove any EDNS records from any previous iteration. */ ret = edns_erase_and_reserve(pkt); if (ret == 0) { ret = edns_create(pkt, request->answer, request); } if (ret == 0) { /* Stub resolution (ask for +rd and +do) */ if (qry->flags.STUB) { knot_wire_set_rd(pkt->wire); if (knot_pkt_has_dnssec(request->qsource.packet)) { knot_edns_set_do(pkt->opt_rr); } if (knot_wire_get_cd(request->qsource.packet->wire)) { knot_wire_set_cd(pkt->wire); } /* Full resolution (ask for +cd and +do) */ } else if (qry->flags.FORWARD) { knot_wire_set_rd(pkt->wire); knot_edns_set_do(pkt->opt_rr); knot_wire_set_cd(pkt->wire); } else if (qry->flags.DNSSEC_WANT) { knot_edns_set_do(pkt->opt_rr); knot_wire_set_cd(pkt->wire); } } } return ret; } int kr_resolve_begin(struct kr_request *request, struct kr_context *ctx, knot_pkt_t *answer) { /* Initialize request */ request->ctx = ctx; request->answer = answer; request->options = ctx->options; request->state = KR_STATE_CONSUME; request->current_query = NULL; array_init(request->additional); array_init(request->answ_selected); array_init(request->auth_selected); array_init(request->add_selected); request->answ_validated = false; request->auth_validated = false; request->rank = KR_RANK_INITIAL; request->trace_log = NULL; request->trace_finish = NULL; /* Expect first query */ kr_rplan_init(&request->rplan, request, &request->pool); return KR_STATE_CONSUME; } static int resolve_query(struct kr_request *request, const knot_pkt_t *packet) { struct kr_rplan *rplan = &request->rplan; const knot_dname_t *qname = knot_pkt_qname(packet); uint16_t qclass = knot_pkt_qclass(packet); uint16_t qtype = knot_pkt_qtype(packet); struct kr_query *qry = NULL; struct kr_context *ctx = request->ctx; struct kr_cookie_ctx *cookie_ctx = ctx ? &ctx->cookie_ctx : NULL; if (qname != NULL) { qry = kr_rplan_push(rplan, NULL, qname, qclass, qtype); } else if (cookie_ctx && cookie_ctx->srvr.enabled && knot_wire_get_qdcount(packet->wire) == 0 && knot_pkt_has_edns(packet) && knot_pkt_edns_option(packet, KNOT_EDNS_OPTION_COOKIE)) { /* Plan empty query only for cookies. */ qry = kr_rplan_push_empty(rplan, NULL); } if (!qry) { return KR_STATE_FAIL; } if (qname != NULL) { /* Deferred zone cut lookup for this query. */ qry->flags.AWAIT_CUT = true; /* Want DNSSEC if it's posible to secure this name (e.g. is covered by any TA) */ if ((knot_wire_get_ad(packet->wire) || knot_pkt_has_dnssec(packet)) && kr_ta_covers_qry(request->ctx, qname, qtype)) { qry->flags.DNSSEC_WANT = true; } } /* Initialize answer packet */ knot_pkt_t *answer = request->answer; knot_wire_set_qr(answer->wire); knot_wire_clear_aa(answer->wire); knot_wire_set_ra(answer->wire); knot_wire_set_rcode(answer->wire, KNOT_RCODE_NOERROR); assert(request->qsource.packet); if (knot_wire_get_cd(request->qsource.packet->wire)) { knot_wire_set_cd(answer->wire); } else if (qry->flags.DNSSEC_WANT) { knot_wire_set_ad(answer->wire); } /* Expect answer, pop if satisfied immediately */ ITERATE_LAYERS(request, qry, begin); if ((request->state & KR_STATE_DONE) != 0) { kr_rplan_pop(rplan, qry); } else if (qname == NULL) { /* it is an empty query which must be resolved by `begin` layer of cookie module. If query isn't resolved, fail. */ request->state = KR_STATE_FAIL; } return request->state; } KR_PURE static bool kr_inaddr_equal(const struct sockaddr *a, const struct sockaddr *b) { const int a_len = kr_inaddr_len(a); const int b_len = kr_inaddr_len(b); return a_len == b_len && memcmp(kr_inaddr(a), kr_inaddr(b), a_len) == 0; } static void update_nslist_rtt(struct kr_context *ctx, struct kr_query *qry, const struct sockaddr *src) { /* Do not track in safe mode. */ if (qry->flags.SAFEMODE) { return; } /* Calculate total resolution time from the time the query was generated. */ uint64_t elapsed = kr_now() - qry->timestamp_mono; elapsed = elapsed > UINT_MAX ? UINT_MAX : elapsed; /* NSs in the preference list prior to the one who responded will be penalised * with the RETRY timer interval. This is because we know they didn't respond * for N retries, so their RTT must be at least N * RETRY. * The NS in the preference list that responded will have RTT relative to the * time when the query was sent out, not when it was originated. */ for (size_t i = 0; i < KR_NSREP_MAXADDR; ++i) { const struct sockaddr *addr = &qry->ns.addr[i].ip; if (addr->sa_family == AF_UNSPEC) { break; } /* If this address is the source of the answer, update its RTT */ if (kr_inaddr_equal(src, addr)) { kr_nsrep_update_rtt(&qry->ns, addr, elapsed, ctx->cache_rtt, KR_NS_UPDATE); WITH_VERBOSE(qry) { char addr_str[INET6_ADDRSTRLEN]; inet_ntop(addr->sa_family, kr_inaddr(addr), addr_str, sizeof(addr_str)); VERBOSE_MSG(qry, "<= server: '%s' rtt: %"PRIu64" ms\n", addr_str, elapsed); } } else { /* Response didn't come from this IP, but we know the RTT must be at least * several RETRY timer tries, e.g. if we have addresses [a, b, c] and we have * tried [a, b] when the answer from 'a' came after 350ms, then we know * that 'b' didn't respond for at least 350 - (1 * 300) ms. We can't say that * its RTT is 50ms, but we can say that its score shouldn't be less than 50. */ kr_nsrep_update_rtt(&qry->ns, addr, elapsed, ctx->cache_rtt, KR_NS_MAX); WITH_VERBOSE(qry) { char addr_str[INET6_ADDRSTRLEN]; inet_ntop(addr->sa_family, kr_inaddr(addr), addr_str, sizeof(addr_str)); VERBOSE_MSG(qry, "<= server: '%s' rtt: >= %"PRIu64" ms\n", addr_str, elapsed); } } /* Subtract query start time from elapsed time */ if (elapsed < KR_CONN_RETRY) { break; } elapsed = elapsed - KR_CONN_RETRY; } } static void update_nslist_score(struct kr_request *request, struct kr_query *qry, const struct sockaddr *src, knot_pkt_t *packet) { struct kr_context *ctx = request->ctx; /* On successful answer, update preference list RTT and penalise timer */ if (request->state != KR_STATE_FAIL) { /* Update RTT information for preference list */ update_nslist_rtt(ctx, qry, src); /* Do not complete NS address resolution on soft-fail. */ const int rcode = packet ? knot_wire_get_rcode(packet->wire) : 0; if (rcode != KNOT_RCODE_SERVFAIL && rcode != KNOT_RCODE_REFUSED) { qry->flags.AWAIT_IPV6 = false; qry->flags.AWAIT_IPV4 = false; } else { /* Penalize SERVFAILs. */ kr_nsrep_update_rtt(&qry->ns, src, KR_NS_PENALTY, ctx->cache_rtt, KR_NS_ADD); } } } static bool resolution_time_exceeded(struct kr_query *qry, uint64_t now) { uint64_t resolving_time = now - qry->creation_time_mono; if (resolving_time > KR_RESOLVE_TIME_LIMIT) { WITH_VERBOSE(qry) { VERBOSE_MSG(qry, "query resolution time limit exceeded\n"); } return true; } return false; } int kr_resolve_consume(struct kr_request *request, const struct sockaddr *src, knot_pkt_t *packet) { struct kr_rplan *rplan = &request->rplan; /* Empty resolution plan, push packet as the new query */ if (packet && kr_rplan_empty(rplan)) { if (answer_prepare(request, packet) != 0) { return KR_STATE_FAIL; } return resolve_query(request, packet); } /* Different processing for network error */ struct kr_query *qry = array_tail(rplan->pending); /* Check overall resolution time */ if (resolution_time_exceeded(qry, kr_now())) { return KR_STATE_FAIL; } bool tried_tcp = (qry->flags.TCP); if (!packet || packet->size == 0) { if (tried_tcp) { request->state = KR_STATE_FAIL; } else { qry->flags.TCP = true; } } else { /* Packet cleared, derandomize QNAME. */ knot_dname_t *qname_raw = knot_pkt_qname(packet); if (qname_raw && qry->secret != 0) { randomized_qname_case(qname_raw, qry->secret); } request->state = KR_STATE_CONSUME; if (qry->flags.CACHED) { ITERATE_LAYERS(request, qry, consume, packet); } else { /* Fill in source and latency information. */ request->upstream.rtt = kr_now() - qry->timestamp_mono; request->upstream.addr = src; ITERATE_LAYERS(request, qry, consume, packet); /* Clear temporary information */ request->upstream.addr = NULL; request->upstream.rtt = 0; } } /* Track RTT for iterative answers */ if (src && !(qry->flags.CACHED)) { update_nslist_score(request, qry, src, packet); } /* Resolution failed, invalidate current NS. */ if (request->state == KR_STATE_FAIL) { invalidate_ns(rplan, qry); qry->flags.RESOLVED = false; } /* Pop query if resolved. */ if (request->state == KR_STATE_YIELD) { return KR_STATE_PRODUCE; /* Requery */ } else if (qry->flags.RESOLVED) { kr_rplan_pop(rplan, qry); } else if (!tried_tcp && (qry->flags.TCP)) { return KR_STATE_PRODUCE; /* Requery over TCP */ } else { /* Clear query flags for next attempt */ qry->flags.CACHED = false; if (!request->options.TCP) { qry->flags.TCP = false; } } ITERATE_LAYERS(request, qry, reset); /* Do not finish with bogus answer. */ if (qry->flags.DNSSEC_BOGUS) { return KR_STATE_FAIL; } return kr_rplan_empty(&request->rplan) ? KR_STATE_DONE : KR_STATE_PRODUCE; } /** @internal Spawn subrequest in current zone cut (no minimization or lookup). */ static struct kr_query *zone_cut_subreq(struct kr_rplan *rplan, struct kr_query *parent, const knot_dname_t *qname, uint16_t qtype) { struct kr_query *next = kr_rplan_push(rplan, parent, qname, parent->sclass, qtype); if (!next) { return NULL; } kr_zonecut_set(&next->zone_cut, parent->zone_cut.name); if (kr_zonecut_copy(&next->zone_cut, &parent->zone_cut) != 0 || kr_zonecut_copy_trust(&next->zone_cut, &parent->zone_cut) != 0) { return NULL; } next->flags.NO_MINIMIZE = true; if (parent->flags.DNSSEC_WANT) { next->flags.DNSSEC_WANT = true; } return next; } static int forward_trust_chain_check(struct kr_request *request, struct kr_query *qry, bool resume) { struct kr_rplan *rplan = &request->rplan; map_t *trust_anchors = &request->ctx->trust_anchors; map_t *negative_anchors = &request->ctx->negative_anchors; if (qry->parent != NULL && !(qry->forward_flags.CNAME) && !(qry->flags.DNS64_MARK) && knot_dname_in_bailiwick(qry->zone_cut.name, qry->parent->zone_cut.name) >= 0) { return KR_STATE_PRODUCE; } assert(qry->flags.FORWARD); if (!trust_anchors) { qry->flags.AWAIT_CUT = false; return KR_STATE_PRODUCE; } if (qry->flags.DNSSEC_INSECURE) { qry->flags.AWAIT_CUT = false; return KR_STATE_PRODUCE; } if (qry->forward_flags.NO_MINIMIZE) { qry->flags.AWAIT_CUT = false; return KR_STATE_PRODUCE; } const knot_dname_t *start_name = qry->sname; if ((qry->flags.AWAIT_CUT) && !resume) { qry->flags.AWAIT_CUT = false; const knot_dname_t *longest_ta = kr_ta_get_longest_name(trust_anchors, qry->sname); if (longest_ta) { start_name = longest_ta; qry->zone_cut.name = knot_dname_copy(start_name, qry->zone_cut.pool); qry->flags.DNSSEC_WANT = true; } else { qry->flags.DNSSEC_WANT = false; return KR_STATE_PRODUCE; } } bool has_ta = (qry->zone_cut.trust_anchor != NULL); knot_dname_t *ta_name = (has_ta ? qry->zone_cut.trust_anchor->owner : NULL); bool refetch_ta = (!has_ta || !knot_dname_is_equal(qry->zone_cut.name, ta_name)); bool is_dnskey_subreq = kr_rplan_satisfies(qry, ta_name, KNOT_CLASS_IN, KNOT_RRTYPE_DNSKEY); bool refetch_key = has_ta && (!qry->zone_cut.key || !knot_dname_is_equal(ta_name, qry->zone_cut.key->owner)); if (refetch_key && !is_dnskey_subreq) { struct kr_query *next = zone_cut_subreq(rplan, qry, ta_name, KNOT_RRTYPE_DNSKEY); if (!next) { return KR_STATE_FAIL; } return KR_STATE_DONE; } int name_offset = 1; const knot_dname_t *wanted_name; bool nods, ds_req, ns_req, minimized, ns_exist; do { wanted_name = start_name; ds_req = false; ns_req = false; ns_exist = true; int cut_labels = knot_dname_labels(qry->zone_cut.name, NULL); int wanted_name_labels = knot_dname_labels(wanted_name, NULL); while (wanted_name[0] && wanted_name_labels > cut_labels + name_offset) { wanted_name = knot_wire_next_label(wanted_name, NULL); wanted_name_labels -= 1; } minimized = (wanted_name != qry->sname); for (int i = 0; i < request->rplan.resolved.len; ++i) { struct kr_query *q = request->rplan.resolved.at[i]; if (q->parent == qry && q->sclass == qry->sclass && (q->stype == KNOT_RRTYPE_DS || q->stype == KNOT_RRTYPE_NS) && knot_dname_is_equal(q->sname, wanted_name)) { if (q->stype == KNOT_RRTYPE_DS) { ds_req = true; if (q->flags.CNAME) { ns_exist = false; } else if (!(q->flags.DNSSEC_OPTOUT)) { int ret = kr_dnssec_matches_name_and_type(&request->auth_selected, q->uid, wanted_name, KNOT_RRTYPE_NS); ns_exist = (ret == kr_ok()); } } else { if (q->flags.CNAME) { ns_exist = false; } ns_req = true; } } } if (ds_req && ns_exist && !ns_req && (minimized || resume)) { struct kr_query *next = zone_cut_subreq(rplan, qry, wanted_name, KNOT_RRTYPE_NS); if (!next) { return KR_STATE_FAIL; } return KR_STATE_DONE; } if (qry->parent == NULL && (qry->flags.CNAME) && ds_req && ns_req) { return KR_STATE_PRODUCE; } /* set `nods` */ if ((qry->stype == KNOT_RRTYPE_DS) && knot_dname_is_equal(wanted_name, qry->sname)) { nods = true; } else if (resume && !ds_req) { nods = false; } else if (!minimized && qry->stype != KNOT_RRTYPE_DNSKEY) { nods = true; } else { nods = ds_req; } name_offset += 1; } while (ds_req && (ns_req || !ns_exist) && minimized); /* Disable DNSSEC if it enters NTA. */ if (kr_ta_get(negative_anchors, wanted_name)){ VERBOSE_MSG(qry, ">< negative TA, going insecure\n"); qry->flags.DNSSEC_WANT = false; } /* Enable DNSSEC if enters a new island of trust. */ bool want_secured = (qry->flags.DNSSEC_WANT) && !knot_wire_get_cd(request->qsource.packet->wire); if (!(qry->flags.DNSSEC_WANT) && !knot_wire_get_cd(request->qsource.packet->wire) && kr_ta_get(trust_anchors, wanted_name)) { qry->flags.DNSSEC_WANT = true; want_secured = true; WITH_VERBOSE(qry) { KR_DNAME_GET_STR(qname_str, wanted_name); VERBOSE_MSG(qry, ">< TA: '%s'\n", qname_str); } } if (want_secured && !qry->zone_cut.trust_anchor) { knot_rrset_t *ta_rr = kr_ta_get(trust_anchors, wanted_name); if (!ta_rr) { char name[] = "\0"; ta_rr = kr_ta_get(trust_anchors, (knot_dname_t*)name); } if (ta_rr) { qry->zone_cut.trust_anchor = knot_rrset_copy(ta_rr, qry->zone_cut.pool); } } has_ta = (qry->zone_cut.trust_anchor != NULL); ta_name = (has_ta ? qry->zone_cut.trust_anchor->owner : NULL); refetch_ta = (!has_ta || !knot_dname_is_equal(wanted_name, ta_name)); if (!nods && want_secured && refetch_ta) { struct kr_query *next = zone_cut_subreq(rplan, qry, wanted_name, KNOT_RRTYPE_DS); if (!next) { return KR_STATE_FAIL; } return KR_STATE_DONE; } /* Try to fetch missing DNSKEY. * Do not fetch if this is a DNSKEY subrequest to avoid circular dependency. */ is_dnskey_subreq = kr_rplan_satisfies(qry, ta_name, KNOT_CLASS_IN, KNOT_RRTYPE_DNSKEY); refetch_key = has_ta && (!qry->zone_cut.key || !knot_dname_is_equal(ta_name, qry->zone_cut.key->owner)); if (want_secured && refetch_key && !is_dnskey_subreq) { struct kr_query *next = zone_cut_subreq(rplan, qry, ta_name, KNOT_RRTYPE_DNSKEY); if (!next) { return KR_STATE_FAIL; } return KR_STATE_DONE; } return KR_STATE_PRODUCE; } /* @todo: Validator refactoring, keep this in driver for now. */ static int trust_chain_check(struct kr_request *request, struct kr_query *qry) { struct kr_rplan *rplan = &request->rplan; map_t *trust_anchors = &request->ctx->trust_anchors; map_t *negative_anchors = &request->ctx->negative_anchors; /* Disable DNSSEC if it enters NTA. */ if (kr_ta_get(negative_anchors, qry->zone_cut.name)){ VERBOSE_MSG(qry, ">< negative TA, going insecure\n"); qry->flags.DNSSEC_WANT = false; qry->flags.DNSSEC_INSECURE = true; } if (qry->flags.DNSSEC_NODS) { /* This is the next query iteration with minimized qname. * At previous iteration DS non-existance has been proven */ qry->flags.DNSSEC_NODS = false; qry->flags.DNSSEC_WANT = false; qry->flags.DNSSEC_INSECURE = true; } /* Enable DNSSEC if entering a new (or different) island of trust, * and update the TA RRset if required. */ bool want_secured = (qry->flags.DNSSEC_WANT) && !knot_wire_get_cd(request->qsource.packet->wire); knot_rrset_t *ta_rr = kr_ta_get(trust_anchors, qry->zone_cut.name); if (!knot_wire_get_cd(request->qsource.packet->wire) && ta_rr) { qry->flags.DNSSEC_WANT = true; want_secured = true; if (qry->zone_cut.trust_anchor == NULL || !knot_dname_is_equal(qry->zone_cut.trust_anchor->owner, qry->zone_cut.name)) { mm_free(qry->zone_cut.pool, qry->zone_cut.trust_anchor); qry->zone_cut.trust_anchor = knot_rrset_copy(ta_rr, qry->zone_cut.pool); WITH_VERBOSE(qry) { KR_DNAME_GET_STR(qname_str, ta_rr->owner); VERBOSE_MSG(qry, ">< TA: '%s'\n", qname_str); } } } /* Try to fetch missing DS (from above the cut). */ const bool has_ta = (qry->zone_cut.trust_anchor != NULL); const knot_dname_t *ta_name = (has_ta ? qry->zone_cut.trust_anchor->owner : NULL); const bool refetch_ta = !has_ta || !knot_dname_is_equal(qry->zone_cut.name, ta_name); if (want_secured && refetch_ta) { /* @todo we could fetch the information from the parent cut, but we don't remember that now */ struct kr_query *next = kr_rplan_push(rplan, qry, qry->zone_cut.name, qry->sclass, KNOT_RRTYPE_DS); if (!next) { return KR_STATE_FAIL; } next->flags.AWAIT_CUT = true; next->flags.DNSSEC_WANT = true; return KR_STATE_DONE; } /* Try to fetch missing DNSKEY (either missing or above current cut). * Do not fetch if this is a DNSKEY subrequest to avoid circular dependency. */ const bool is_dnskey_subreq = kr_rplan_satisfies(qry, ta_name, KNOT_CLASS_IN, KNOT_RRTYPE_DNSKEY); const bool refetch_key = has_ta && (!qry->zone_cut.key || !knot_dname_is_equal(ta_name, qry->zone_cut.key->owner)); if (want_secured && refetch_key && !is_dnskey_subreq) { struct kr_query *next = zone_cut_subreq(rplan, qry, ta_name, KNOT_RRTYPE_DNSKEY); if (!next) { return KR_STATE_FAIL; } return KR_STATE_DONE; } return KR_STATE_PRODUCE; } /** @internal Check current zone cut status and credibility, spawn subrequests if needed. */ static int zone_cut_check(struct kr_request *request, struct kr_query *qry, knot_pkt_t *packet) /* TODO: using cache on this point in this way just isn't nice; remove in time */ { /* Stub mode, just forward and do not solve cut. */ if (qry->flags.STUB) { return KR_STATE_PRODUCE; } /* Forwarding to upstream resolver mode. * Since forwarding targets already are in qry->ns - * cut fetching is not needed. */ if (qry->flags.FORWARD) { return forward_trust_chain_check(request, qry, false); } if (!(qry->flags.AWAIT_CUT)) { /* The query was resolved from cache. * Spawn DS \ DNSKEY requests if needed and exit */ return trust_chain_check(request, qry); } /* The query wasn't resolved from cache, * now it's the time to look up closest zone cut from cache. */ struct kr_cache *cache = &request->ctx->cache; if (!kr_cache_is_open(cache)) { int ret = kr_zonecut_set_sbelt(request->ctx, &qry->zone_cut); if (ret != 0) { return KR_STATE_FAIL; } VERBOSE_MSG(qry, "=> no cache open, using root hints\n"); qry->flags.AWAIT_CUT = false; return KR_STATE_DONE; } const knot_dname_t *requested_name = qry->sname; /* If at/subdomain of parent zone cut, start from its encloser. * This is for case when we get to a dead end * (and need glue from parent), or DS refetch. */ if (qry->parent) { const knot_dname_t *parent = qry->parent->zone_cut.name; if (parent[0] != '\0' && knot_dname_in_bailiwick(qry->sname, parent) >= 0) { requested_name = knot_wire_next_label(parent, NULL); } } else if ((qry->stype == KNOT_RRTYPE_DS) && (qry->sname[0] != '\0')) { /* If this is explicit DS query, start from encloser too. */ requested_name = knot_wire_next_label(requested_name, NULL); } int state = KR_STATE_FAIL; do { state = ns_fetch_cut(qry, requested_name, request, packet); if (state == KR_STATE_DONE || state == KR_STATE_FAIL) { return state; } else if (state == KR_STATE_CONSUME) { requested_name = knot_wire_next_label(requested_name, NULL); } } while (state == KR_STATE_CONSUME); /* Update minimized QNAME if zone cut changed */ if (qry->zone_cut.name && qry->zone_cut.name[0] != '\0' && !(qry->flags.NO_MINIMIZE)) { if (kr_make_query(qry, packet) != 0) { return KR_STATE_FAIL; } } qry->flags.AWAIT_CUT = false; /* Check trust chain */ return trust_chain_check(request, qry); } int kr_resolve_produce(struct kr_request *request, struct sockaddr **dst, int *type, knot_pkt_t *packet) { struct kr_rplan *rplan = &request->rplan; unsigned ns_election_iter = 0; /* No query left for resolution */ if (kr_rplan_empty(rplan)) { return KR_STATE_FAIL; } /* If we have deferred answers, resume them. */ struct kr_query *qry = array_tail(rplan->pending); if (qry->deferred != NULL) { /* @todo: Refactoring validator, check trust chain before resuming. */ int state = 0; if (((qry->flags.FORWARD) == 0) || ((qry->stype == KNOT_RRTYPE_DS) && (qry->flags.CNAME))) { state = trust_chain_check(request, qry); } else { state = forward_trust_chain_check(request, qry, true); } switch(state) { case KR_STATE_FAIL: return KR_STATE_FAIL; case KR_STATE_DONE: return KR_STATE_PRODUCE; default: break; } VERBOSE_MSG(qry, "=> resuming yielded answer\n"); struct kr_layer_pickle *pickle = qry->deferred; request->state = KR_STATE_YIELD; set_yield(&request->answ_selected, qry->uid, false); set_yield(&request->auth_selected, qry->uid, false); RESUME_LAYERS(layer_id(request, pickle->api), request, qry, consume, pickle->pkt); if (request->state != KR_STATE_YIELD) { /* No new deferred answers, take the next */ qry->deferred = pickle->next; } } else { /* Caller is interested in always tracking a zone cut, even if the answer is cached * this is normally not required, and incurrs another cache lookups for cached answer. */ if (qry->flags.ALWAYS_CUT) { if (!(qry->flags.STUB)) { switch(zone_cut_check(request, qry, packet)) { case KR_STATE_FAIL: return KR_STATE_FAIL; case KR_STATE_DONE: return KR_STATE_PRODUCE; default: break; } } } /* Resolve current query and produce dependent or finish */ request->state = KR_STATE_PRODUCE; ITERATE_LAYERS(request, qry, produce, packet); if (request->state != KR_STATE_FAIL && knot_wire_get_qr(packet->wire)) { /* Produced an answer from cache, consume it. */ qry->secret = 0; request->state = KR_STATE_CONSUME; ITERATE_LAYERS(request, qry, consume, packet); } } switch(request->state) { case KR_STATE_FAIL: return request->state; case KR_STATE_CONSUME: break; case KR_STATE_DONE: default: /* Current query is done */ if (qry->flags.RESOLVED && request->state != KR_STATE_YIELD) { kr_rplan_pop(rplan, qry); } ITERATE_LAYERS(request, qry, reset); return kr_rplan_empty(rplan) ? KR_STATE_DONE : KR_STATE_PRODUCE; } /* This query has RD=0 or is ANY, stop here. */ if (qry->stype == KNOT_RRTYPE_ANY || !knot_wire_get_rd(request->qsource.packet->wire)) { VERBOSE_MSG(qry, "=> qtype is ANY or RD=0, bail out\n"); return KR_STATE_FAIL; } /* Update zone cut, spawn new subrequests. */ if (!(qry->flags.STUB)) { int state = zone_cut_check(request, qry, packet); switch(state) { case KR_STATE_FAIL: return KR_STATE_FAIL; case KR_STATE_DONE: return KR_STATE_PRODUCE; default: break; } } ns_election: /* If the query has already selected a NS and is waiting for IPv4/IPv6 record, * elect best address only, otherwise elect a completely new NS. */ if(++ns_election_iter >= KR_ITER_LIMIT) { VERBOSE_MSG(qry, "=> couldn't converge NS selection, bail out\n"); return KR_STATE_FAIL; } const struct kr_qflags qflg = qry->flags; const bool retry = qflg.TCP || qflg.BADCOOKIE_AGAIN; if (qflg.AWAIT_IPV4 || qflg.AWAIT_IPV6) { kr_nsrep_elect_addr(qry, request->ctx); } else if (qflg.FORWARD || qflg.STUB) { kr_nsrep_sort(&qry->ns, request->ctx); if (qry->ns.score > KR_NS_MAX_SCORE) { /* At the moment all NS have bad reputation. * But there can be existing connections*/ VERBOSE_MSG(qry, "=> no valid NS left\n"); return KR_STATE_FAIL; } } else if (!qry->ns.name || !retry) { /* Keep NS when requerying/stub/badcookie. */ /* Root DNSKEY must be fetched from the hints to avoid chicken and egg problem. */ if (qry->sname[0] == '\0' && qry->stype == KNOT_RRTYPE_DNSKEY) { kr_zonecut_set_sbelt(request->ctx, &qry->zone_cut); qry->flags.NO_THROTTLE = true; /* Pick even bad SBELT servers */ } kr_nsrep_elect(qry, request->ctx); if (qry->ns.score > KR_NS_MAX_SCORE) { if (kr_zonecut_is_empty(&qry->zone_cut)) { VERBOSE_MSG(qry, "=> no NS with an address\n"); } else { VERBOSE_MSG(qry, "=> no valid NS left\n"); } if (!qry->flags.NO_NS_FOUND) { qry->flags.NO_NS_FOUND = true; } else { ITERATE_LAYERS(request, qry, reset); kr_rplan_pop(rplan, qry); } return KR_STATE_PRODUCE; } } /* Resolve address records */ if (qry->ns.addr[0].ip.sa_family == AF_UNSPEC) { int ret = ns_resolve_addr(qry, request); if (ret != 0) { qry->flags.AWAIT_IPV6 = false; qry->flags.AWAIT_IPV4 = false; qry->flags.TCP = false; qry->ns.name = NULL; goto ns_election; /* Must try different NS */ } ITERATE_LAYERS(request, qry, reset); return KR_STATE_PRODUCE; } /* Randomize query case (if not in safe mode or turned off) */ qry->secret = (qry->flags.SAFEMODE || qry->flags.NO_0X20) ? 0 : kr_rand_bytes(sizeof(qry->secret)); knot_dname_t *qname_raw = knot_pkt_qname(packet); randomized_qname_case(qname_raw, qry->secret); /* * Additional query is going to be finalized when calling * kr_resolve_checkout(). */ qry->timestamp_mono = kr_now(); *dst = &qry->ns.addr[0].ip; *type = (qry->flags.TCP) ? SOCK_STREAM : SOCK_DGRAM; return request->state; } #if defined(ENABLE_COOKIES) /** Update DNS cookie data in packet. */ static bool outbound_request_update_cookies(struct kr_request *req, const struct sockaddr *src, const struct sockaddr *dst) { assert(req); /* RFC7873 4.1 strongly requires server address. */ if (!dst) { return false; } struct kr_cookie_settings *clnt_sett = &req->ctx->cookie_ctx.clnt; /* Cookies disabled or packet has no EDNS section. */ if (!clnt_sett->enabled) { return true; } /* * RFC7873 4.1 recommends using also the client address. The matter is * also discussed in section 6. */ kr_request_put_cookie(&clnt_sett->current, req->ctx->cache_cookie, src, dst, req); return true; } #endif /* defined(ENABLE_COOKIES) */ int kr_resolve_checkout(struct kr_request *request, const struct sockaddr *src, struct sockaddr *dst, int type, knot_pkt_t *packet) { /* @todo: Update documentation if this function becomes approved. */ struct kr_rplan *rplan = &request->rplan; if (knot_wire_get_qr(packet->wire) != 0) { return kr_ok(); } /* No query left for resolution */ if (kr_rplan_empty(rplan)) { return kr_error(EINVAL); } struct kr_query *qry = array_tail(rplan->pending); #if defined(ENABLE_COOKIES) /* Update DNS cookies in request. */ if (type == SOCK_DGRAM) { /* @todo: Add cookies also over TCP? */ /* * The actual server IP address is needed before generating the * actual cookie. If we don't know the server address then we * also don't know the actual cookie size. */ if (!outbound_request_update_cookies(request, src, dst)) { return kr_error(EINVAL); } } #endif /* defined(ENABLE_COOKIES) */ int ret = query_finalize(request, qry, packet); if (ret != 0) { return kr_error(EINVAL); } /* Track changes in minimization secret to enable/disable minimization */ uint32_t old_minimization_secret = qry->secret; /* Run the checkout layers and cancel on failure. * The checkout layer doesn't persist the state, so canceled subrequests * don't affect the resolution or rest of the processing. */ int state = request->state; ITERATE_LAYERS(request, qry, checkout, packet, dst, type); if (request->state == KR_STATE_FAIL) { request->state = state; /* Restore */ return kr_error(ECANCELED); } /* Randomize query case (if secret changed) */ knot_dname_t *qname = (knot_dname_t *)knot_pkt_qname(packet); if (qry->secret != old_minimization_secret) { randomized_qname_case(qname, qry->secret); } /* Write down OPT unless in safemode */ if (!(qry->flags.SAFEMODE)) { ret = edns_put(packet, true); if (ret != 0) { return kr_error(EINVAL); } } WITH_VERBOSE(qry) { KR_DNAME_GET_STR(qname_str, knot_pkt_qname(packet)); KR_DNAME_GET_STR(zonecut_str, qry->zone_cut.name); KR_RRTYPE_GET_STR(type_str, knot_pkt_qtype(packet)); for (size_t i = 0; i < KR_NSREP_MAXADDR; ++i) { struct sockaddr *addr = &qry->ns.addr[i].ip; if (addr->sa_family == AF_UNSPEC) { break; } if (!kr_inaddr_equal(dst, addr)) { continue; } const char *ns_str = kr_straddr(addr); VERBOSE_MSG(qry, "=> id: '%05u' querying: '%s' score: %u zone cut: '%s' " "qname: '%s' qtype: '%s' proto: '%s'\n", qry->id, ns_str ? ns_str : "", qry->ns.score, zonecut_str, qname_str, type_str, (qry->flags.TCP) ? "tcp" : "udp"); break; }} return kr_ok(); } int kr_resolve_finish(struct kr_request *request, int state) { /* Finalize answer and construct wire-buffer. */ ITERATE_LAYERS(request, NULL, answer_finalize); if (request->state == KR_STATE_FAIL) { state = KR_STATE_FAIL; } else if (answer_finalize(request, state) != 0) { state = KR_STATE_FAIL; } /* Error during processing, internal failure */ if (state != KR_STATE_DONE) { knot_pkt_t *answer = request->answer; if (knot_wire_get_rcode(answer->wire) == KNOT_RCODE_NOERROR) { knot_wire_clear_ad(answer->wire); knot_wire_clear_aa(answer->wire); knot_wire_set_rcode(answer->wire, KNOT_RCODE_SERVFAIL); } } request->state = state; ITERATE_LAYERS(request, NULL, finish); #ifndef NOVERBOSELOG struct kr_rplan *rplan = &request->rplan; struct kr_query *last = kr_rplan_last(rplan); VERBOSE_MSG(last, "finished: %d, queries: %zu, mempool: %zu B\n", request->state, rplan->resolved.len, (size_t) mp_total_size(request->pool.ctx)); #endif /* Trace request finish */ if (request->trace_finish) { request->trace_finish(request); } /* Uninstall all tracepoints */ request->trace_finish = NULL; request->trace_log = NULL; return KR_STATE_DONE; } struct kr_rplan *kr_resolve_plan(struct kr_request *request) { if (request) { return &request->rplan; } return NULL; } knot_mm_t *kr_resolve_pool(struct kr_request *request) { if (request) { return &request->pool; } return NULL; } #undef VERBOSE_MSG