diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-09-12 04:45:08 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-09-12 04:45:08 +0000 |
commit | 3ade2b375d3e928a06a39bb5ce48e59ea054f9c8 (patch) | |
tree | 23c9115f88363ed22bc4afbeb4901994d036f189 /src/libknot | |
parent | Releasing progress-linux version 3.3.9-1~progress7.99u1. (diff) | |
download | knot-3ade2b375d3e928a06a39bb5ce48e59ea054f9c8.tar.xz knot-3ade2b375d3e928a06a39bb5ce48e59ea054f9c8.zip |
Merging upstream version 3.4.0.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
31 files changed, 1650 insertions, 795 deletions
diff --git a/src/libknot/Makefile.inc b/src/libknot/Makefile.inc index f62d836..d09ff55 100755 --- a/src/libknot/Makefile.inc +++ b/src/libknot/Makefile.inc @@ -37,6 +37,8 @@ nobase_include_libknot_HEADERS = \ libknot/packet/wire.h \ libknot/probe/data.h \ libknot/probe/probe.h \ + libknot/quic/tls.h \ + libknot/quic/tls_common.h \ libknot/rdata.h \ libknot/rdataset.h \ libknot/rrset-dump.h \ @@ -78,6 +80,8 @@ libknot_la_SOURCES = \ libknot/packet/rrset-wire.c \ libknot/probe/data.c \ libknot/probe/probe.c \ + libknot/quic/tls.c \ + libknot/quic/tls_common.c \ libknot/rdataset.c \ libknot/rrset-dump.c \ libknot/rrset.c \ diff --git a/src/libknot/attribute.h b/src/libknot/attribute.h index 525aef3..ce464fc 100644 --- a/src/libknot/attribute.h +++ b/src/libknot/attribute.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2021 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> +/* Copyright (C) 2024 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -35,17 +35,19 @@ /*! \brief GNU C function attributes. */ #if __GNUC__ >= 3 -#define _pure_ __attribute__ ((pure)) -#define _const_ __attribute__ ((const)) -#define _noreturn_ __attribute__ ((noreturn)) -#define _malloc_ __attribute__ ((malloc)) -#define _mustcheck_ __attribute__ ((warn_unused_result)) +#define _pure_ __attribute__ ((pure)) +#define _const_ __attribute__ ((const)) +#define _noreturn_ __attribute__ ((noreturn)) +#define _malloc_ __attribute__ ((malloc)) +#define _mustcheck_ __attribute__ ((warn_unused_result)) +#define _nonnull_(...) __attribute__ ((nonnull(__VA_ARGS__))) #else #define _pure_ #define _const_ #define _noreturn_ #define _malloc_ #define _mustcheck_ +#define _nonnull_ #endif /*! @} */ diff --git a/src/libknot/control/control.c b/src/libknot/control/control.c index 671896f..8cddd5d 100644 --- a/src/libknot/control/control.c +++ b/src/libknot/control/control.c @@ -36,9 +36,6 @@ #define CTL_BUFF_SIZE (256 * 1024) #endif -/*! Listen backlog size. */ -#define DEFAULT_LISTEN_BACKLOG 5 - /*! Default socket operations timeout in milliseconds. */ #define DEFAULT_TIMEOUT (30 * 1000) @@ -166,6 +163,18 @@ knot_ctl_t* knot_ctl_alloc(void) } _public_ +knot_ctl_t* knot_ctl_clone(knot_ctl_t *ctx) +{ + knot_ctl_t *res = knot_ctl_alloc(); + if (res != NULL) { + res->timeout = ctx->timeout; + res->sock = ctx->sock; + ctx->sock = -1; + } + return res; +} + +_public_ void knot_ctl_free(knot_ctl_t *ctx) { if (ctx == NULL) { @@ -194,13 +203,7 @@ void knot_ctl_set_timeout(knot_ctl_t *ctx, int timeout_ms) } _public_ -int knot_ctl_bind(knot_ctl_t *ctx, const char *path) -{ - return knot_ctl_bind2(ctx, path, DEFAULT_LISTEN_BACKLOG); -} - -_public_ -int knot_ctl_bind2(knot_ctl_t *ctx, const char *path, unsigned backlog) +int knot_ctl_bind(knot_ctl_t *ctx, const char *path, unsigned backlog) { if (ctx == NULL || path == NULL) { return KNOT_EINVAL; diff --git a/src/libknot/control/control.h b/src/libknot/control/control.h index 8ab1e10..f110e3f 100644 --- a/src/libknot/control/control.h +++ b/src/libknot/control/control.h @@ -65,6 +65,18 @@ typedef struct knot_ctl knot_ctl_t; knot_ctl_t* knot_ctl_alloc(void); /*! + * \brief Allocates a control context based on an existing one. + * + * \param[in,out] ctx Original control context. + * + * \note The listen_sock is kept at the original, the current sock is taken + * by the clone and RESET in the original! + * + * \return Control context or NULL. + */ +knot_ctl_t* knot_ctl_clone(knot_ctl_t *ctx); + +/*! * Deallocates a control context. * * \param[in] ctx Control context. @@ -86,17 +98,13 @@ void knot_ctl_set_timeout(knot_ctl_t *ctx, int timeout_ms); * * \note Server operation. * - * \param[in] ctx Control context. - * \param[in] path Control UNIX socket path. + * \param[in] ctx Control context. + * \param[in] path Control UNIX socket path. + * \param[in] backlog Socket listen backlog size. * * \return Error code, KNOT_EOK if successful. */ -int knot_ctl_bind(knot_ctl_t *ctx, const char *path); - -/*! - * Same as knot_ctl_bind() with socket backlog specification. - */ -int knot_ctl_bind2(knot_ctl_t *ctx, const char *path, unsigned backlog); +int knot_ctl_bind(knot_ctl_t *ctx, const char *path, unsigned backlog); /*! * Unbinds a control socket. diff --git a/src/libknot/dname.c b/src/libknot/dname.c index 31b8a5f..d166f8d 100644 --- a/src/libknot/dname.c +++ b/src/libknot/dname.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2022 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> +/* Copyright (C) 2024 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -62,11 +62,11 @@ static int dname_align(const uint8_t **d1, uint8_t d1_labels, assert(d1 && d2); for (unsigned j = d1_labels; j < d2_labels; ++j) { - *d2 = knot_wire_next_label(*d2, NULL); + *d2 = knot_dname_next_label(*d2); } for (unsigned j = d2_labels; j < d1_labels; ++j) { - *d1 = knot_wire_next_label(*d1, NULL); + *d1 = knot_dname_next_label(*d1); } return (d1_labels < d2_labels) ? d1_labels : d2_labels; @@ -190,7 +190,7 @@ _public_ int knot_dname_unpack(uint8_t *dst, const knot_dname_t *src, size_t maxlen, const uint8_t *pkt) { - if (dst == NULL || src == NULL) { + if (dst == NULL || src == NULL || pkt == NULL) { return KNOT_EINVAL; } @@ -533,7 +533,7 @@ size_t knot_dname_size(const knot_dname_t *name) _public_ size_t knot_dname_realsize(const knot_dname_t *name, const uint8_t *pkt) { - if (name == NULL) { + if (name == NULL || pkt == NULL) { return 0; } @@ -573,8 +573,8 @@ size_t knot_dname_matched_labels(const knot_dname_t *d1, const knot_dname_t *d2) } /* Next label. */ - d1 = knot_wire_next_label(d1, NULL); - d2 = knot_wire_next_label(d2, NULL); + d1 = knot_dname_next_label(d1); + d2 = knot_dname_next_label(d2); --common; } @@ -596,7 +596,7 @@ knot_dname_t *knot_dname_replace_suffix(const knot_dname_t *name, unsigned label } size_t prefix_lbs = dname_lbs - labels; - size_t prefix_len = knot_dname_prefixlen(name, prefix_lbs, NULL); + size_t prefix_len = knot_dname_prefixlen(name, prefix_lbs); size_t suffix_len = knot_dname_size(suffix); if (prefix_len == 0 || suffix_len == 0) { return NULL; @@ -614,7 +614,7 @@ knot_dname_t *knot_dname_replace_suffix(const knot_dname_t *name, unsigned label while (prefix_lbs > 0) { memcpy(dst, name, *name + 1); dst += *name + 1; - name = knot_wire_next_label(name, NULL); + name = knot_dname_next_label(name); --prefix_lbs; } @@ -622,7 +622,7 @@ knot_dname_t *knot_dname_replace_suffix(const knot_dname_t *name, unsigned label while (*suffix != '\0') { memcpy(dst, suffix, *suffix + 1); dst += *suffix + 1; - suffix = knot_wire_next_label(suffix, NULL); + suffix = knot_dname_next_label(suffix); } *dst = '\0'; @@ -684,8 +684,8 @@ inline static bool dname_is_equal(const knot_dname_t *d1, const knot_dname_t *d2 while (*d1 != '\0' || *d2 != '\0') { if (label_is_equal(d1, d2, no_case)) { - d1 = knot_wire_next_label(d1, NULL); - d2 = knot_wire_next_label(d2, NULL); + d1 = knot_dname_next_label(d1); + d2 = knot_dname_next_label(d2); } else { return false; } @@ -707,7 +707,7 @@ bool knot_dname_is_case_equal(const knot_dname_t *d1, const knot_dname_t *d2) } _public_ -size_t knot_dname_prefixlen(const uint8_t *name, unsigned nlabels, const uint8_t *pkt) +size_t knot_dname_prefixlen(const uint8_t *name, unsigned nlabels) { if (name == NULL) { return 0; @@ -718,13 +718,10 @@ size_t knot_dname_prefixlen(const uint8_t *name, unsigned nlabels, const uint8_t return 0; } - /* Seek first real label occurrence. */ - name = knot_wire_seek_label(name, pkt); - size_t len = 0; while (*name != '\0') { len += *name + 1; - name = knot_wire_next_label(name, pkt); + name = knot_dname_next_label(name); if (--nlabels == 0) { /* Count N first labels only. */ break; } @@ -743,7 +740,8 @@ size_t knot_dname_labels(const uint8_t *name, const uint8_t *pkt) size_t count = 0; while (*name != '\0') { ++count; - name = knot_wire_next_label(name, pkt); + name = (pkt == NULL) ? knot_dname_next_label(name) : + knot_wire_next_label(name, pkt); if (name == NULL) { return 0; } @@ -794,7 +792,7 @@ int knot_dname_in_bailiwick(const knot_dname_t *name, const knot_dname_t *bailiw } for (int i = 0; i < label_diff; ++i) { - name = knot_wire_next_label(name, NULL); + name = knot_dname_next_label(name); } return knot_dname_is_equal(name, bailiwick) ? label_diff : KNOT_EOUTOFZONE; diff --git a/src/libknot/dname.h b/src/libknot/dname.h index 5733de9..6d72f90 100644 --- a/src/libknot/dname.h +++ b/src/libknot/dname.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2022 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> +/* Copyright (C) 2024 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -48,7 +48,7 @@ typedef char knot_dname_txt_storage_t[KNOT_DNAME_TXT_MAXLEN + 1]; * * \param name Name on the wire. * \param endp Name boundary. - * \param pkt Wire. + * \param pkt Wire (can be NULL if not compressed). * * \retval (compressed) size of the domain name. * \retval KNOT_EINVAL @@ -195,7 +195,7 @@ size_t knot_dname_size(const knot_dname_t *name); * \brief Returns full size of the given domain name (expanded compression ptrs). * * \param name Domain name to get the size of. - * \param pkt Related packet (or NULL if unpacked) + * \param pkt Related packet. * * \retval size of the domain name. * \retval 0 if invalid argument. @@ -295,12 +295,11 @@ bool knot_dname_is_case_equal(const knot_dname_t *d1, const knot_dname_t *d2); * * \param name Domain name. * \param nlabels First N labels. - * \param pkt Related packet (or NULL if not compressed). * * \return Length of the prefix. */ _pure_ -size_t knot_dname_prefixlen(const uint8_t *name, unsigned nlabels, const uint8_t *pkt); +size_t knot_dname_prefixlen(const uint8_t *name, unsigned nlabels); /*! * \brief Return number of labels in the domain name. diff --git a/src/libknot/errcode.h b/src/libknot/errcode.h index 6ec4a94..3ee326c 100644 --- a/src/libknot/errcode.h +++ b/src/libknot/errcode.h @@ -110,6 +110,7 @@ enum knot_error { KNOT_EBADCERTKEY, KNOT_EFACCES, KNOT_EBACKUPDATA, + KNOT_ECPUCOMPAT, KNOT_GENERAL_ERROR = -900, @@ -121,6 +122,7 @@ enum knot_error { KNOT_NET_EADDR, KNOT_NET_ESOCKET, KNOT_NET_ECONNECT, + KNOT_NET_EHSHAKE, KNOT_NET_ESEND, KNOT_NET_ERECV, KNOT_NET_ETIMEOUT, @@ -176,6 +178,7 @@ enum knot_error { KNOT_NO_PUBLIC_KEY, KNOT_NO_PRIVATE_KEY, KNOT_NO_READY_KEY, + KNOT_ESOON_EXPIRE, KNOT_DNSSEC_EKEYTAG_LIMIT, KNOT_DNSSEC_EXTRA_NSEC, diff --git a/src/libknot/error.c b/src/libknot/error.c index ae9b973..59f0a7a 100644 --- a/src/libknot/error.c +++ b/src/libknot/error.c @@ -109,6 +109,7 @@ static const struct error errors[] = { { KNOT_EBADCERTKEY, "unknown certificate key" }, { KNOT_EFACCES, "file permission denied" }, { KNOT_EBACKUPDATA, "requested data not in backup" }, + { KNOT_ECPUCOMPAT, "incompatible CPU architecture" }, { KNOT_GENERAL_ERROR, "unknown general error" }, @@ -120,6 +121,7 @@ static const struct error errors[] = { { KNOT_NET_EADDR, "bad address or host name" }, { KNOT_NET_ESOCKET, "can't create socket" }, { KNOT_NET_ECONNECT, "can't connect" }, + { KNOT_NET_EHSHAKE, "handshake failed" }, { KNOT_NET_ESEND, "can't send data" }, { KNOT_NET_ERECV, "can't receive data" }, { KNOT_NET_ETIMEOUT, "network timeout" }, @@ -175,6 +177,7 @@ static const struct error errors[] = { { KNOT_NO_PUBLIC_KEY, "no public key" }, { KNOT_NO_PRIVATE_KEY, "no private key" }, { KNOT_NO_READY_KEY, "no key ready for submission" }, + { KNOT_ESOON_EXPIRE, "oncoming RRSIG expiration" }, { KNOT_DNSSEC_EKEYTAG_LIMIT, "many keys with equal keytag" }, { KNOT_DNSSEC_EXTRA_NSEC, "superfluous NSEC(3)" }, diff --git a/src/libknot/packet/pkt.h b/src/libknot/packet/pkt.h index 383f55e..da69c8c 100644 --- a/src/libknot/packet/pkt.h +++ b/src/libknot/packet/pkt.h @@ -52,7 +52,6 @@ enum { KNOT_PF_NOCANON = 1 << 5, /*!< Don't canonicalize rrsets during parsing. */ KNOT_PF_ORIGTTL = 1 << 6, /*!< Write RRSIGs with their original TTL. */ KNOT_PF_SOAMINTTL = 1 << 7, /*!< Write SOA with its minimum-ttl as TTL. */ - KNOT_PF_BUFENOUGH = 1 << 8, /*!< The output buffer is big enough for the output. */ }; typedef struct knot_pkt knot_pkt_t; diff --git a/src/libknot/packet/rrset-wire.c b/src/libknot/packet/rrset-wire.c index ef3a068..1d4f78e 100644 --- a/src/libknot/packet/rrset-wire.c +++ b/src/libknot/packet/rrset-wire.c @@ -73,6 +73,7 @@ static bool dname_equal_wire(const knot_dname_t *d1, const knot_dname_t *d2, { assert(d1); assert(d2); + assert(wire); d2 = knot_wire_seek_label(d2, wire); @@ -80,7 +81,7 @@ static bool dname_equal_wire(const knot_dname_t *d1, const knot_dname_t *d2, if (!label_is_equal(d1, d2)) { return false; } - d1 = knot_wire_next_label(d1, NULL); + d1 = knot_dname_next_label(d1); d2 = knot_wire_next_label(d2, wire); } @@ -170,7 +171,7 @@ static int write_rdata_naptr_header(const uint8_t **src, size_t *src_avail, written += (len); \ } -#define CHECK_NEXT_LABEL(res) \ +#define CHECK_WIRE_NEXT_LABEL(res) \ if (res == NULL) { return KNOT_EINVAL; } /*! @@ -201,7 +202,7 @@ static int compr_put_dname(const knot_dname_t *dname, uint8_t *dst, uint16_t max int suffix_labels = compr->suffix.labels; while (suffix_labels > name_labels) { suffix = knot_wire_next_label(suffix, compr->wire); - CHECK_NEXT_LABEL(suffix); + CHECK_WIRE_NEXT_LABEL(suffix); --suffix_labels; } @@ -210,8 +211,7 @@ static int compr_put_dname(const knot_dname_t *dname, uint8_t *dst, uint16_t max uint16_t written = 0; while (name_labels > suffix_labels) { WRITE_LABEL(dst, written, dname, max, (*dname + 1)); - dname = knot_wire_next_label(dname, NULL); - CHECK_NEXT_LABEL(dname); + dname = knot_dname_next_label(dname); --name_labels; } @@ -221,10 +221,9 @@ static int compr_put_dname(const knot_dname_t *dname, uint8_t *dst, uint16_t max const knot_dname_t *compr_ptr = suffix; while (dname[0] != '\0') { // Next labels. - const knot_dname_t *next_dname = knot_wire_next_label(dname, NULL); - CHECK_NEXT_LABEL(next_dname); + const knot_dname_t *next_dname = knot_dname_next_label(dname); const knot_dname_t *next_suffix = knot_wire_next_label(suffix, compr->wire); - CHECK_NEXT_LABEL(next_suffix); + CHECK_WIRE_NEXT_LABEL(next_suffix); // Two labels match, extend suffix length. if (!label_is_equal(dname, suffix)) { @@ -495,7 +494,7 @@ static int write_rr(const knot_rrset_t *rrset, uint16_t rrset_index, _public_ int knot_rrset_to_wire_extra(const knot_rrset_t *rrset, uint8_t *wire, - uint16_t max_size, uint16_t rotate, + uint32_t max_size, uint16_t rotate, knot_compr_t *compr, uint16_t flags) { if (rrset == NULL || wire == NULL) { @@ -511,11 +510,6 @@ int knot_rrset_to_wire_extra(const knot_rrset_t *rrset, uint8_t *wire, uint8_t *write = wire; size_t capacity = max_size; - // FIXME remove this and make the max_size parameter uint32_t in next major libknot release! - if ((flags & KNOT_PF_BUFENOUGH)) { - capacity = SIZE_MAX; - } - uint16_t count = rrset->rrs.count; knot_rdata_t *rdata = rotate > 1 ? knot_rdataset_at(&rrset->rrs, rotate - 1) : rrset->rrs.rdata; for (int i = rotate; i < count + rotate; i++) { diff --git a/src/libknot/packet/rrset-wire.h b/src/libknot/packet/rrset-wire.h index 3be0cba..3247df8 100644 --- a/src/libknot/packet/rrset-wire.h +++ b/src/libknot/packet/rrset-wire.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2018 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> +/* Copyright (C) 2023 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -41,12 +41,12 @@ * \return Output size, negative number on error (KNOT_E*). */ int knot_rrset_to_wire_extra(const knot_rrset_t *rrset, uint8_t *wire, - uint16_t max_size, uint16_t rotate, + uint32_t max_size, uint16_t rotate, knot_compr_t *compr, uint16_t flags); /*! \brief Same as knot_rrset_to_wire_extra but without rrset rotation and flags. */ static inline int knot_rrset_to_wire(const knot_rrset_t *rrset, uint8_t *wire, - uint16_t max_size, knot_compr_t *compr) + uint32_t max_size, knot_compr_t *compr) { return knot_rrset_to_wire_extra(rrset, wire, max_size, 0, compr, 0); } diff --git a/src/libknot/packet/wire.h b/src/libknot/packet/wire.h index 630cd83..cff4b21 100644 --- a/src/libknot/packet/wire.h +++ b/src/libknot/packet/wire.h @@ -1025,13 +1025,11 @@ static inline uint16_t knot_wire_get_pointer(const uint8_t *pos) return (knot_wire_read_u16(pos) - KNOT_WIRE_PTR_BASE); // Return offset. } -_pure_ _mustcheck_ +_pure_ _mustcheck_ _nonnull_(2) static inline const uint8_t *knot_wire_seek_label(const uint8_t *lp, const uint8_t *wire) { + assert(wire); while (knot_wire_is_pointer(lp)) { - if (!wire) { - return NULL; - } const uint8_t *new_lp = wire + knot_wire_get_pointer(lp); if (new_lp >= lp) { assert(0); @@ -1042,12 +1040,21 @@ static inline const uint8_t *knot_wire_seek_label(const uint8_t *lp, const uint8 return lp; } -_pure_ _mustcheck_ +_pure_ _mustcheck_ _nonnull_(1, 2) static inline const uint8_t *knot_wire_next_label(const uint8_t *lp, const uint8_t *wire) { - if (!lp || !lp[0]) /* No label after final label. */ - return NULL; + assert(lp); + assert(lp[0] > 0); // Not a terminal label. return knot_wire_seek_label(lp + (lp[0] + sizeof(uint8_t)), wire); } +_pure_ _mustcheck_ _nonnull_(1) +static inline const uint8_t *knot_dname_next_label(const uint8_t *lp) +{ + assert(lp); + assert(lp[0] > 0); // Not a terminal label. + assert(!knot_wire_is_pointer(lp)); + return lp + (lp[0] + sizeof(uint8_t)); +} + /*! @} */ diff --git a/src/libknot/quic/quic.c b/src/libknot/quic/quic.c index f9d1d1d..4eb84c3 100644 --- a/src/libknot/quic/quic.c +++ b/src/libknot/quic/quic.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2023 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> +/* Copyright (C) 2024 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -33,7 +33,6 @@ #include "contrib/macros.h" #include "contrib/sockaddr.h" -#include "contrib/string.h" #include "contrib/ucw/lists.h" #include "libknot/endian.h" #include "libdnssec/error.h" @@ -58,19 +57,6 @@ #define TLS_CALLBACK_ERR (-1) -const gnutls_datum_t doq_alpn = { - (unsigned char *)"doq", 3 -}; - -typedef struct knot_quic_creds { - gnutls_certificate_credentials_t tls_cert; - gnutls_anti_replay_t tls_anti_replay; - gnutls_datum_t tls_ticket_key; - bool peer; - uint8_t peer_pin_len; - uint8_t peer_pin[]; -} knot_quic_creds_t; - typedef struct knot_quic_session { node_t n; gnutls_datum_t tls_session; @@ -153,223 +139,6 @@ session_free: return ret; } -static int tls_anti_replay_db_add_func(void *dbf, time_t exp_time, - const gnutls_datum_t *key, - const gnutls_datum_t *data) -{ - return 0; -} - -static void tls_session_ticket_key_free(gnutls_datum_t *ticket) -{ - gnutls_memset(ticket->data, 0, ticket->size); - gnutls_free(ticket->data); -} - -static int self_key(gnutls_x509_privkey_t *privkey, const char *key_file) -{ - gnutls_datum_t data = { 0 }; - - int ret = gnutls_x509_privkey_init(privkey); - if (ret != GNUTLS_E_SUCCESS) { - return ret; - } - - int fd = open(key_file, O_RDONLY); - if (fd != -1) { - struct stat stat; - if (fstat(fd, &stat) != 0 || - (data.data = gnutls_malloc(stat.st_size)) == NULL || - read(fd, data.data, stat.st_size) != stat.st_size) { - ret = GNUTLS_E_KEYFILE_ERROR; - goto finish; - } - - data.size = stat.st_size; - ret = gnutls_x509_privkey_import_pkcs8(*privkey, &data, GNUTLS_X509_FMT_PEM, - NULL, GNUTLS_PKCS_PLAIN); - if (ret != GNUTLS_E_SUCCESS) { - goto finish; - } - } else { - ret = gnutls_x509_privkey_generate(*privkey, GNUTLS_PK_EDDSA_ED25519, - GNUTLS_CURVE_TO_BITS(GNUTLS_ECC_CURVE_ED25519), 0); - if (ret != GNUTLS_E_SUCCESS) { - goto finish; - } - - ret = gnutls_x509_privkey_export2_pkcs8(*privkey, GNUTLS_X509_FMT_PEM, NULL, - GNUTLS_PKCS_PLAIN, &data); - if (ret != GNUTLS_E_SUCCESS || - (fd = open(key_file, O_WRONLY | O_CREAT, 0600)) == -1 || - write(fd, data.data, data.size) != data.size) { - ret = GNUTLS_E_KEYFILE_ERROR; - goto finish; - } - } - -finish: - close(fd); - gnutls_free(data.data); - if (ret != GNUTLS_E_SUCCESS) { - gnutls_x509_privkey_deinit(*privkey); - *privkey = NULL; - } - return ret; -} - -static int self_signed_cert(gnutls_certificate_credentials_t tls_cert, - const char *key_file) -{ - gnutls_x509_privkey_t privkey = NULL; - gnutls_x509_crt_t cert = NULL; - - char *hostname = sockaddr_hostname(); - if (hostname == NULL) { - return GNUTLS_E_MEMORY_ERROR; - } - - int ret; - uint8_t serial[16]; - gnutls_rnd(GNUTLS_RND_NONCE, serial, sizeof(serial)); - // Clear the left-most bit to be a positive number (two's complement form). - serial[0] &= 0x7F; - -#define CHK(cmd) if ((ret = (cmd)) != GNUTLS_E_SUCCESS) { goto finish; } -#define NOW_DAYS(days) (time(NULL) + 24 * 3600 * (days)) - - CHK(self_key(&privkey, key_file)); - - CHK(gnutls_x509_crt_init(&cert)); - CHK(gnutls_x509_crt_set_version(cert, 3)); - CHK(gnutls_x509_crt_set_serial(cert, serial, sizeof(serial))); - CHK(gnutls_x509_crt_set_activation_time(cert, NOW_DAYS(-1))); - CHK(gnutls_x509_crt_set_expiration_time(cert, NOW_DAYS(10 * 365))); - CHK(gnutls_x509_crt_set_dn_by_oid(cert, GNUTLS_OID_X520_COMMON_NAME, 0, - hostname, strlen(hostname))); - CHK(gnutls_x509_crt_set_key(cert, privkey)); - CHK(gnutls_x509_crt_sign2(cert, cert, privkey, GNUTLS_DIG_SHA512, 0)); - - ret = gnutls_certificate_set_x509_key(tls_cert, &cert, 1, privkey); - -finish: - free(hostname); - gnutls_x509_crt_deinit(cert); - gnutls_x509_privkey_deinit(privkey); - - return ret; -} - -_public_ -struct knot_quic_creds *knot_quic_init_creds(const char *cert_file, - const char *key_file) -{ - knot_quic_creds_t *creds = calloc(1, sizeof(*creds)); - if (creds == NULL) { - return NULL; - } - - int ret = gnutls_certificate_allocate_credentials(&creds->tls_cert); - if (ret != GNUTLS_E_SUCCESS) { - goto fail; - } - - ret = gnutls_anti_replay_init(&creds->tls_anti_replay); - if (ret != GNUTLS_E_SUCCESS) { - goto fail; - } - gnutls_anti_replay_set_add_function(creds->tls_anti_replay, tls_anti_replay_db_add_func); - gnutls_anti_replay_set_ptr(creds->tls_anti_replay, NULL); - - if (cert_file != NULL) { - ret = gnutls_certificate_set_x509_key_file(creds->tls_cert, - cert_file, key_file, - GNUTLS_X509_FMT_PEM); - } else { - ret = self_signed_cert(creds->tls_cert, key_file); - } - if (ret != GNUTLS_E_SUCCESS) { - goto fail; - } - - ret = gnutls_session_ticket_key_generate(&creds->tls_ticket_key); - if (ret != GNUTLS_E_SUCCESS) { - goto fail; - } - - return creds; -fail: - knot_quic_free_creds(creds); - return NULL; -} - -_public_ -struct knot_quic_creds *knot_quic_init_creds_peer(const struct knot_quic_creds *local_creds, - const uint8_t *peer_pin, - uint8_t peer_pin_len) -{ - knot_quic_creds_t *creds = calloc(1, sizeof(*creds) + peer_pin_len); - if (creds == NULL) { - return NULL; - } - - if (local_creds != NULL) { - creds->peer = true; - creds->tls_cert = local_creds->tls_cert; - } else { - int ret = gnutls_certificate_allocate_credentials(&creds->tls_cert); - if (ret != GNUTLS_E_SUCCESS) { - free(creds); - return NULL; - } - } - - if (peer_pin_len > 0 && peer_pin != NULL) { - memcpy(creds->peer_pin, peer_pin, peer_pin_len); - creds->peer_pin_len = peer_pin_len; - } - - return creds; -} - -_public_ -int knot_quic_creds_cert(struct knot_quic_creds *creds, struct gnutls_x509_crt_int **cert) -{ - if (creds == NULL || cert == NULL) { - return KNOT_EINVAL; - } - - gnutls_x509_crt_t *certs; - unsigned cert_count; - int ret = gnutls_certificate_get_x509_crt(creds->tls_cert, 0, &certs, &cert_count); - if (ret == GNUTLS_E_SUCCESS) { - if (cert_count == 0) { - gnutls_x509_crt_deinit(*certs); - return KNOT_ENOENT; - } - *cert = *certs; - free(certs); - } - return ret; -} - -_public_ -void knot_quic_free_creds(struct knot_quic_creds *creds) -{ - if (creds == NULL) { - return; - } - - if (!creds->peer && creds->tls_cert != NULL) { - gnutls_certificate_free_credentials(creds->tls_cert); - } - gnutls_anti_replay_deinit(creds->tls_anti_replay); - if (creds->tls_ticket_key.data != NULL) { - tls_session_ticket_key_free(&creds->tls_ticket_key); - } - free(creds); -} - static ngtcp2_conn *get_conn(ngtcp2_crypto_conn_ref *conn_ref) { return ((knot_quic_conn_t *)conn_ref->user_data)->conn; @@ -377,51 +146,31 @@ static ngtcp2_conn *get_conn(ngtcp2_crypto_conn_ref *conn_ref) static int tls_init_conn_session(knot_quic_conn_t *conn, bool server) { - if (gnutls_init(&conn->tls_session, (server ? GNUTLS_SERVER : GNUTLS_CLIENT) | - GNUTLS_ENABLE_EARLY_DATA | GNUTLS_NO_AUTO_SEND_TICKET | - GNUTLS_NO_END_OF_EARLY_DATA) != GNUTLS_E_SUCCESS) { - return TLS_CALLBACK_ERR; - } - - gnutls_certificate_send_x509_rdn_sequence(conn->tls_session, 1); - gnutls_certificate_server_set_request(conn->tls_session, GNUTLS_CERT_REQUEST); - - if (gnutls_priority_set_direct(conn->tls_session, QUIC_PRIORITIES, - NULL) != GNUTLS_E_SUCCESS) { + int ret = knot_tls_session(&conn->tls_session, conn->quic_table->creds, + conn->quic_table->priority, "\x03""doq", + true, server); + if (ret != KNOT_EOK) { return TLS_CALLBACK_ERR; } - if (server && gnutls_session_ticket_enable_server(conn->tls_session, - &conn->quic_table->creds->tls_ticket_key) != GNUTLS_E_SUCCESS) { - return TLS_CALLBACK_ERR; + if (server) { + ret = ngtcp2_crypto_gnutls_configure_server_session(conn->tls_session); + } else { + ret = ngtcp2_crypto_gnutls_configure_client_session(conn->tls_session); } - - int ret = ngtcp2_crypto_gnutls_configure_server_session(conn->tls_session); - if (ret != 0) { + if (ret != NGTCP2_NO_ERROR) { return TLS_CALLBACK_ERR; } - gnutls_record_set_max_early_data_size(conn->tls_session, 0xffffffffu); - conn->conn_ref = (nc_conn_ref_placeholder_t) { .get_conn = get_conn, .user_data = conn }; - _Static_assert(sizeof(nc_conn_ref_placeholder_t) == sizeof(ngtcp2_crypto_conn_ref), "invalid placeholder for conn_ref"); + _Static_assert(sizeof(nc_conn_ref_placeholder_t) == sizeof(ngtcp2_crypto_conn_ref), + "invalid placeholder for conn_ref"); gnutls_session_set_ptr(conn->tls_session, &conn->conn_ref); - if (server) { - gnutls_anti_replay_enable(conn->tls_session, conn->quic_table->creds->tls_anti_replay); - - } - if (gnutls_credentials_set(conn->tls_session, GNUTLS_CRD_CERTIFICATE, - conn->quic_table->creds->tls_cert) != GNUTLS_E_SUCCESS) { - return TLS_CALLBACK_ERR; - } - - gnutls_alpn_set_protocols(conn->tls_session, &doq_alpn, 1, GNUTLS_ALPN_MANDATORY); - ngtcp2_conn_set_tls_native_handle(conn->conn, conn->tls_session); return KNOT_EOK; @@ -477,54 +226,6 @@ uint16_t knot_quic_conn_local_port(knot_quic_conn_t *conn) return ((const struct sockaddr_in6 *)path->local.addr)->sin6_port; } -_public_ -void knot_quic_conn_pin(knot_quic_conn_t *conn, uint8_t *pin, size_t *pin_size, bool local) -{ - if (conn == NULL) { - goto error; - } - - const gnutls_datum_t *data = NULL; - if (local) { - data = gnutls_certificate_get_ours(conn->tls_session); - } else { - unsigned count = 0; - data = gnutls_certificate_get_peers(conn->tls_session, &count); - if (count == 0) { - goto error; - } - } - if (data == NULL) { - goto error; - } - - gnutls_x509_crt_t cert; - int ret = gnutls_x509_crt_init(&cert); - if (ret != GNUTLS_E_SUCCESS) { - goto error; - } - - ret = gnutls_x509_crt_import(cert, data, GNUTLS_X509_FMT_DER); - if (ret != GNUTLS_E_SUCCESS) { - gnutls_x509_crt_deinit(cert); - goto error; - } - - ret = gnutls_x509_crt_get_key_id(cert, GNUTLS_KEYID_USE_SHA256, pin, pin_size); - if (ret != GNUTLS_E_SUCCESS) { - gnutls_x509_crt_deinit(cert); - goto error; - } - - gnutls_x509_crt_deinit(cert); - - return; -error: - if (pin_size != NULL) { - *pin_size = 0; - } -} - static void knot_quic_rand_cb(uint8_t *dest, size_t destlen, const ngtcp2_rand_ctx *rand_ctx) { (void)rand_ctx; @@ -602,18 +303,8 @@ static int handshake_completed_cb(ngtcp2_conn *conn, void *user_data) ctx->flags |= KNOT_QUIC_CONN_HANDSHAKE_DONE; if (!ngtcp2_conn_is_server(conn)) { - knot_quic_creds_t *creds = ctx->quic_table->creds; - if (creds->peer_pin_len == 0) { - return 0; - } - uint8_t pin[KNOT_QUIC_PIN_LEN]; - size_t pin_size = sizeof(pin); - knot_quic_conn_pin(ctx, pin, &pin_size, false); - if (pin_size != creds->peer_pin_len || - const_time_memcmp(pin, creds->peer_pin, pin_size) != 0) { - return NGTCP2_ERR_CALLBACK_FAILURE; - } - return 0; + return knot_tls_pin_check(ctx->tls_session, ctx->quic_table->creds) + == KNOT_EOK ? 0 : NGTCP2_ERR_CALLBACK_FAILURE; } if (gnutls_session_ticket_send(ctx->tls_session, 1, 0) != GNUTLS_E_SUCCESS) { @@ -945,6 +636,10 @@ int knot_quic_handle(knot_quic_table_t *table, knot_quic_reply_t *reply, goto finish; } + if (conn != NULL && (conn->flags & KNOT_QUIC_CONN_BLOCKED)) { + return KNOT_EOK; + } + ngtcp2_path path; path.remote.addr = (struct sockaddr *)reply->ip_rem; path.remote.addrlen = addr_len((struct sockaddr_in6 *)reply->ip_rem); @@ -1249,6 +944,8 @@ int knot_quic_send(knot_quic_table_t *quic_table, knot_quic_conn_t *conn, return KNOT_EINVAL; } else if (reply->handle_ret < 0) { return reply->handle_ret; + } else if ((conn->flags & KNOT_QUIC_CONN_BLOCKED) && !(flags & KNOT_QUIC_SEND_IGNORE_BLOCKED)) { + return KNOT_EOK; } else if (reply->handle_ret > 0) { return send_special(quic_table, reply, conn); } else if (conn == NULL) { diff --git a/src/libknot/quic/quic.h b/src/libknot/quic/quic.h index 29a02e0..b4acb33 100644 --- a/src/libknot/quic/quic.h +++ b/src/libknot/quic/quic.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2023 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> +/* Copyright (C) 2024 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -29,20 +29,18 @@ #include <netinet/in.h> #include "libknot/quic/quic_conn.h" - -#define KNOT_QUIC_PIN_LEN 32 +#include "libknot/quic/tls_common.h" #define KNOT_QUIC_HANDLE_RET_CLOSE 2000 // RFC 9250 #define KNOT_QUIC_ERR_EXCESSIVE_LOAD 0x4 -struct gnutls_x509_crt_int; -struct knot_quic_creds; struct knot_quic_session; typedef enum { KNOT_QUIC_SEND_IGNORE_LASTBYTE = (1 << 0), + KNOT_QUIC_SEND_IGNORE_BLOCKED = (1 << 1), } knot_quic_send_flag_t; typedef struct knot_quic_reply { @@ -87,45 +85,6 @@ struct knot_quic_session *knot_quic_session_save(knot_quic_conn_t *conn); int knot_quic_session_load(knot_quic_conn_t *conn, struct knot_quic_session *session); /*! - * \brief Init server TLS certificate for DoQ. - * - * \param cert_file X509 certificate PEM file path/name (NULL if auto-generated). - * \param key_file Key PEM file path/name. - * - * \return Initialized creds. - */ -struct knot_quic_creds *knot_quic_init_creds(const char *cert_file, - const char *key_file); - -/*! - * \brief Init peer TLS certificate for DoQ. - * - * \param local_creds Local credentials if server. - * \param peer_pin Optional peer certificate pin to check. - * \param peer_pin_len Length of the peer pin. Set 0 if not specified. - * - * \return Initialized creds. - */ -struct knot_quic_creds *knot_quic_init_creds_peer(const struct knot_quic_creds *local_creds, - const uint8_t *peer_pin, - uint8_t peer_pin_len); - -/*! - * \brief Gets the certificate from credentials. - * - * \param creds TLS credentials. - * \param cert Output certificate. - * - * \return KNOT_E* - */ -int knot_quic_creds_cert(struct knot_quic_creds *creds, struct gnutls_x509_crt_int **cert); - -/*! - * \brief Deinit server TLS certificate for DoQ. - */ -void knot_quic_free_creds(struct knot_quic_creds *creds); - -/*! * \brief Returns timeout value for the connection. */ uint64_t quic_conn_get_timeout(knot_quic_conn_t *conn); @@ -156,18 +115,6 @@ uint32_t knot_quic_conn_rtt(knot_quic_conn_t *conn); uint16_t knot_quic_conn_local_port(knot_quic_conn_t *conn); /*! - * \brief Gets local or remote certificate pin. - * - * \note Zero output pin_size value means no certificate available or error. - * - * \param conn QUIC connection. - * \param pin Output certificate pin. - * \param pin_size Input size of the storage / output size of the stored pin. - * \param local Local or remote certificate indication. - */ -void knot_quic_conn_pin(knot_quic_conn_t *conn, uint8_t *pin, size_t *pin_size, bool local); - -/*! * \brief Create new outgoing QUIC connection. * * \param table QUIC connections table to be added to. diff --git a/src/libknot/quic/quic_conn.c b/src/libknot/quic/quic_conn.c index 6616573..1a3b9df 100644 --- a/src/libknot/quic/quic_conn.c +++ b/src/libknot/quic/quic_conn.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2023 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> +/* Copyright (C) 2024 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -29,6 +29,7 @@ #include "libdnssec/random.h" #include "libknot/attribute.h" #include "libknot/error.h" +#include "libknot/quic/tls_common.h" #include "libknot/quic/quic.h" #include "libknot/xdp/tcp_iobuf.h" #include "libknot/wire.h" @@ -45,7 +46,7 @@ static int cmp_expiry_heap_nodes(void *c1, void *c2) _public_ knot_quic_table_t *knot_quic_table_new(size_t max_conns, size_t max_ibufs, size_t max_obufs, - size_t udp_payload, struct knot_quic_creds *creds) + size_t udp_payload, struct knot_creds *creds) { size_t table_size = max_conns * BUCKETS_PER_CONNS; @@ -61,9 +62,17 @@ knot_quic_table_t *knot_quic_table_new(size_t max_conns, size_t max_ibufs, size_ res->obufs_max = max_obufs; res->udp_payload_limit = udp_payload; + int ret = gnutls_priority_init2(&res->priority, KNOT_TLS_PRIORITIES, NULL, + GNUTLS_PRIORITY_INIT_DEF_APPEND); + if (ret != GNUTLS_E_SUCCESS) { + free(res); + return NULL; + } + res->expiry_heap = malloc(sizeof(struct heap)); if (res->expiry_heap == NULL || !heap_init(res->expiry_heap, cmp_expiry_heap_nodes, 0)) { free(res->expiry_heap); + gnutls_priority_deinit(res->priority); free(res); return NULL; } @@ -92,6 +101,7 @@ void knot_quic_table_free(knot_quic_table_t *table) assert(table->ibufs_size == 0); assert(table->obufs_size == 0); + gnutls_priority_deinit(table->priority); heap_deinit(table->expiry_heap); free(table->expiry_heap); free(table); @@ -118,7 +128,9 @@ void knot_quic_table_sweep(knot_quic_table_t *table, struct knot_quic_reply *swe while (!EMPTY_HEAP(table->expiry_heap)) { knot_quic_conn_t *c = *(knot_quic_conn_t **)HHEAD(table->expiry_heap); - if (table->usage > table->max_conns) { + if ((c->flags & KNOT_QUIC_CONN_BLOCKED)) { + break; // highly inprobable + } else if (table->usage > table->max_conns) { knot_sweep_stats_incr(stats, KNOT_SWEEP_CTR_LIMIT_CONN); send_excessive_load(c, sweep_reply, table); knot_quic_table_rem(c, table); @@ -476,7 +488,7 @@ uint8_t *knot_quic_stream_add_data(knot_quic_conn_t *conn, int64_t stream_id, add_tail((list_t *)&s->outbufs, (node_t *)obuf); s->obufs_size += obuf->len; conn->obufs_size += obuf->len; - conn->quic_table->obufs_size += obuf->len; + ATOMIC_ADD(conn->quic_table->obufs_size, obuf->len); return obuf->buf + prefix; } @@ -497,7 +509,7 @@ void knot_quic_stream_ack_data(knot_quic_conn_t *conn, int64_t stream_id, assert(HEAD(*obs) != first); // help CLANG analyzer understand what rem_node did and that further usage of HEAD(*obs) is safe s->obufs_size -= first->len; conn->obufs_size -= first->len; - conn->quic_table->obufs_size -= first->len; + ATOMIC_SUB(conn->quic_table->obufs_size, first->len); s->first_offset += first->len; free(first); if (s->unsent_obuf == first) { @@ -556,6 +568,19 @@ void knot_quic_stream_mark_sent(knot_quic_conn_t *conn, int64_t stream_id, } _public_ +void knot_quic_conn_block(knot_quic_conn_t *conn, bool block) +{ + if (block) { + conn->flags |= KNOT_QUIC_CONN_BLOCKED; + conn->next_expiry = UINT64_MAX; + conn_heap_reschedule(conn, conn->quic_table); + } else { + conn->flags &= ~KNOT_QUIC_CONN_BLOCKED; + quic_conn_mark_used(conn, conn->quic_table); + } +} + +_public_ void knot_quic_cleanup(knot_quic_conn_t *conns[], size_t n_conns) { for (size_t i = 0; i < n_conns; i++) { diff --git a/src/libknot/quic/quic_conn.h b/src/libknot/quic/quic_conn.h index 64ead51..49e0631 100644 --- a/src/libknot/quic/quic_conn.h +++ b/src/libknot/quic/quic_conn.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2023 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> +/* Copyright (C) 2024 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -29,10 +29,13 @@ #include <stdint.h> #include <sys/uio.h> +#include "contrib/atomic.h" + #define MAX_STREAMS_PER_CONN 10 // this limits the number of un-finished streams per conn (i.e. if response has been recvd with FIN, it doesn't count) +struct gnutls_priority_st; struct ngtcp2_cid; // declaration taken from wherever in ngtcp2 -struct knot_quic_creds; +struct knot_creds; struct knot_quic_reply; struct knot_sweep_stats; @@ -70,6 +73,7 @@ typedef struct { typedef enum { KNOT_QUIC_CONN_HANDSHAKE_DONE = (1 << 0), KNOT_QUIC_CONN_SESSION_TAKEN = (1 << 1), + KNOT_QUIC_CONN_BLOCKED = (1 << 2), } knot_quic_conn_flag_t; typedef struct knot_quic_conn { @@ -111,12 +115,13 @@ typedef struct knot_quic_table { size_t ibufs_max; size_t obufs_max; size_t ibufs_size; - size_t obufs_size; + knot_atomic_size_t obufs_size; size_t udp_payload_limit; // for simplicity not distinguishing IPv4/6 void (*log_cb)(const char *); const char *qlog_dir; uint64_t hash_secret[4]; - struct knot_quic_creds *creds; + struct knot_creds *creds; + struct gnutls_priority_st *priority; struct heap *expiry_heap; knot_quic_cid_t *conns[]; } knot_quic_table_t; @@ -133,7 +138,7 @@ typedef struct knot_quic_table { * \return Allocated table, or NULL. */ knot_quic_table_t *knot_quic_table_new(size_t max_conns, size_t max_ibufs, size_t max_obufs, - size_t udp_payload, struct knot_quic_creds *creds); + size_t udp_payload, struct knot_creds *creds); /*! * \brief Free QUIC table including its contents. @@ -306,6 +311,11 @@ void knot_quic_stream_mark_sent(knot_quic_conn_t *conn, int64_t stream_id, size_t amount_sent); /*! + * \brief (Un)block the connection for incoming/outgoing traffic and sweep. + */ +void knot_quic_conn_block(knot_quic_conn_t *conn, bool block); + +/*! * \brief Free rest of resources of closed conns. * * \param conns Array with recently used conns (possibly NULLs). diff --git a/src/libknot/quic/tls.c b/src/libknot/quic/tls.c new file mode 100644 index 0000000..01172df --- /dev/null +++ b/src/libknot/quic/tls.c @@ -0,0 +1,262 @@ +/* Copyright (C) 2024 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +#include <arpa/inet.h> +#include <assert.h> +#include <gnutls/crypto.h> +#include <gnutls/gnutls.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/socket.h> + +#include "libknot/quic/tls.h" + +#include "contrib/macros.h" +#include "contrib/time.h" +#include "libknot/attribute.h" +#include "libknot/error.h" +#include "libknot/quic/tls_common.h" + +_public_ +knot_tls_ctx_t *knot_tls_ctx_new(struct knot_creds *creds, unsigned io_timeout, + unsigned hs_timeout, bool server) +{ + knot_tls_ctx_t *res = calloc(1, sizeof(*res)); + if (res == NULL) { + return NULL; + } + + res->creds = creds; + res->handshake_timeout = hs_timeout; + res->io_timeout = io_timeout; + res->server = server; + + int ret = gnutls_priority_init2(&res->priority, KNOT_TLS_PRIORITIES, NULL, + GNUTLS_PRIORITY_INIT_DEF_APPEND); + if (ret != GNUTLS_E_SUCCESS) { + free(res); + return NULL; + } + + return res; +} + +_public_ +void knot_tls_ctx_free(knot_tls_ctx_t *ctx) +{ + if (ctx != NULL) { + gnutls_priority_deinit(ctx->priority); + free(ctx); + } +} + +_public_ +knot_tls_conn_t *knot_tls_conn_new(knot_tls_ctx_t *ctx, int sock_fd) +{ + knot_tls_conn_t *res = calloc(1, sizeof(*res)); + if (res == NULL) { + return NULL; + } + res->ctx = ctx; + res->fd = sock_fd; + + int ret = knot_tls_session(&res->session, ctx->creds, ctx->priority, + "\x03""dot", false, ctx->server); + if (ret != KNOT_EOK) { + goto fail; + } + + gnutls_transport_set_int(res->session, sock_fd); // Use internal recv/send/poll. + gnutls_handshake_set_timeout(res->session, ctx->handshake_timeout); + + return res; +fail: + gnutls_deinit(res->session); + free(res); + return NULL; +} + +_public_ +void knot_tls_conn_del(knot_tls_conn_t *conn) +{ + if (conn != NULL && conn->fd_clones_count-- < 1) { + gnutls_deinit(conn->session); + free(conn); + } +} + +_public_ +int knot_tls_handshake(knot_tls_conn_t *conn, bool oneshot) +{ + if (conn->flags & (KNOT_TLS_CONN_HANDSHAKE_DONE | KNOT_TLS_CONN_BLOCKED)) { + return KNOT_EOK; + } + + /* Check if NB socket is writeable. */ + int opt; + socklen_t opt_len = sizeof(opt); + int ret = getsockopt(conn->fd, SOL_SOCKET, SO_ERROR, &opt, &opt_len); + if (ret < 0 || opt == ECONNREFUSED) { + return KNOT_NET_ECONNECT; + } + + gnutls_record_set_timeout(conn->session, conn->ctx->io_timeout); + do { + ret = gnutls_handshake(conn->session); + } while (!oneshot && ret < 0 && gnutls_error_is_fatal(ret) == 0); + + switch (ret) { + case GNUTLS_E_SUCCESS: + conn->flags |= KNOT_TLS_CONN_HANDSHAKE_DONE; + return knot_tls_pin_check(conn->session, conn->ctx->creds); + case GNUTLS_E_TIMEDOUT: + return KNOT_NET_ETIMEOUT; + default: + if (gnutls_error_is_fatal(ret) == 0) { + return KNOT_EAGAIN; + } else { + return KNOT_NET_EHSHAKE; + } + } +} + +#define TIMEOUT_CTX_INIT \ + struct timespec begin, end; \ + if (*timeout_ptr > 0) { \ + clock_gettime(CLOCK_MONOTONIC, &begin); \ + } + +#define TIMEOUT_CTX_UPDATE \ + if (*timeout_ptr > 0) { \ + clock_gettime(CLOCK_MONOTONIC, &end); \ + int running_ms = time_diff_ms(&begin, &end); \ + *timeout_ptr = MAX(*timeout_ptr - running_ms, 0); \ + } + +static ssize_t recv_data(knot_tls_conn_t *conn, void *data, size_t size, int *timeout_ptr) +{ + gnutls_record_set_timeout(conn->session, *timeout_ptr); + + size_t total = 0; + ssize_t res; + while (total < size) { + TIMEOUT_CTX_INIT + res = gnutls_record_recv(conn->session, data + total, size - total); + if (res > 0) { + total += res; + } else if (res == 0) { + return KNOT_ECONNRESET; + } else if (gnutls_error_is_fatal(res) != 0) { + return KNOT_NET_ERECV; + } + TIMEOUT_CTX_UPDATE + gnutls_record_set_timeout(conn->session, *timeout_ptr); + } + + assert(total == size); + return size; +} + +_public_ +ssize_t knot_tls_recv_dns(knot_tls_conn_t *conn, void *data, size_t size) +{ + if (conn == NULL || data == NULL) { + return KNOT_EINVAL; + } + + if (conn->flags & KNOT_TLS_CONN_BLOCKED) { + return 0; + } + + ssize_t ret = knot_tls_handshake(conn, false); + if (ret != KNOT_EOK) { + return ret; + } + + int timeout = conn->ctx->io_timeout; + + uint16_t msg_len; + ret = recv_data(conn, &msg_len, sizeof(msg_len), &timeout); + if (ret != sizeof(msg_len)) { + return ret; + } + + msg_len = ntohs(msg_len); + if (size < msg_len) { + return KNOT_ESPACE; + } + + ret = recv_data(conn, data, msg_len, &timeout); + if (ret != size) { + return ret; + } + + return msg_len; +} + +_public_ +ssize_t knot_tls_send_dns(knot_tls_conn_t *conn, void *data, size_t size) +{ + if (conn == NULL || data == NULL || size > UINT16_MAX) { + return KNOT_EINVAL; + } + + ssize_t res = knot_tls_handshake(conn, false); + if (res != KNOT_EOK) { + return res; + } + + // Enable data buffering. + gnutls_record_cork(conn->session); + + uint16_t msg_len = htons(size); + res = gnutls_record_send(conn->session, &msg_len, sizeof(msg_len)); + if (res != sizeof(msg_len)) { + return KNOT_NET_ESEND; + } + + res = gnutls_record_send(conn->session, data, size); + if (res != size) { + return KNOT_NET_ESEND; + } + + int timeout = conn->ctx->io_timeout, *timeout_ptr = &timeout; + gnutls_record_set_timeout(conn->session, timeout); + + // Send the buffered data. + while (gnutls_record_check_corked(conn->session) > 0) { + TIMEOUT_CTX_INIT + int ret = gnutls_record_uncork(conn->session, 0); + if (ret < 0 && gnutls_error_is_fatal(ret) != 0) { + return ret == GNUTLS_E_TIMEDOUT ? KNOT_ETIMEOUT : + KNOT_NET_ESEND; + } + TIMEOUT_CTX_UPDATE + gnutls_record_set_timeout(conn->session, timeout); + } + + return size; +} + +_public_ +void knot_tls_conn_block(knot_tls_conn_t *conn, bool block) +{ + if (block) { + conn->flags |= KNOT_TLS_CONN_BLOCKED; + } else { + conn->flags &= ~KNOT_TLS_CONN_BLOCKED; + } +} diff --git a/src/libknot/quic/tls.h b/src/libknot/quic/tls.h new file mode 100644 index 0000000..7801ca8 --- /dev/null +++ b/src/libknot/quic/tls.h @@ -0,0 +1,135 @@ +/* Copyright (C) 2024 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +/*! + * \file + * + * \brief Pure TLS functionality. + * + * \addtogroup quic + * @{ + */ + +#pragma once + +#include <stdbool.h> +#include <stdint.h> +#include <sys/types.h> + +struct gnutls_priority_st; + +typedef enum { + KNOT_TLS_CONN_HANDSHAKE_DONE = (1 << 0), + KNOT_TLS_CONN_SESSION_TAKEN = (1 << 1), // unused, to be implemeted later + KNOT_TLS_CONN_BLOCKED = (1 << 2), +} knot_tls_conn_flag_t; + +typedef struct knot_tls_ctx { + struct knot_creds *creds; + struct gnutls_priority_st *priority; + unsigned handshake_timeout; + unsigned io_timeout; + bool server; +} knot_tls_ctx_t; + +typedef struct knot_tls_conn { + struct gnutls_session_int *session; + struct knot_tls_ctx *ctx; + int fd; + unsigned fd_clones_count; + knot_tls_conn_flag_t flags; +} knot_tls_conn_t; + +/*! + * \brief Initialize DoT answering context. + * + * \param creds Certificate credentials. + * \param io_timeout Connections' IO-timeout (in milliseconds). + * \param hs_timeout Handshake timeout (in milliseconds). + * \param server Server context (otherwise client). + * + * \return Initialized context or NULL. + */ +knot_tls_ctx_t *knot_tls_ctx_new(struct knot_creds *creds, unsigned io_timeout, + unsigned hs_timeout, bool server); + +/*! + * \brief Free DoT answering context. + */ +void knot_tls_ctx_free(knot_tls_ctx_t *ctx); + +/*! + * \brief Initialize DoT connection. + * + * \param ctx DoT answering context. + * \param sock_fd Opened TCP connection socket. + * + * \return Connection struct or NULL. + */ +knot_tls_conn_t *knot_tls_conn_new(knot_tls_ctx_t *ctx, int sock_fd); + +/*! + * \brief Free DoT connection struct. + * + * \note Doesn't close the TCP connection socket. + */ +void knot_tls_conn_del(knot_tls_conn_t *conn); + +/*! + * \brief Perform the TLS handshake (via gnutls_handshake()). + * + * \note This is also done by the recv/send functions. + * + * \param conn DoT connection. + * \param oneshot If set, don't wait untill the handshake is finished. + * + * \retval KNOT_EOK Handshake successfully finished. + * \retval KNOT_EGAIN Handshake not finished, call me again. + * \retval KNOT_NET_EHSHAKE Handshake error. + * \retval KNOT_NET_ECONNECT Socket not connected. + */ +int knot_tls_handshake(knot_tls_conn_t *conn, bool oneshot); + +/*! + * \brief Receive a size-word-prefixed DNS message. + * + * \param conn DoT connection. + * \param data Destination buffer. + * \param size Maximum buffer size. + * + * \return Either the DNS message size received or negative error code. + * + * \note The two-byte-size-prefix is stripped upon reception, not stored to the buffer. + */ +ssize_t knot_tls_recv_dns(knot_tls_conn_t *conn, void *data, size_t size); + +/*! + * \brief Send a size-word-prefixed DNS message. + * + * \param conn DoT connection. + * \param data DNS payload. + * \param size Payload size. + * + * \return Either exactly 'size' or a negative error code. + */ +ssize_t knot_tls_send_dns(knot_tls_conn_t *conn, void *data, size_t size); + +/*! + * \brief Set or unset the conection's BLOCKED flag. + */ +void knot_tls_conn_block(knot_tls_conn_t *conn, bool block); + +/*! @} */ diff --git a/src/libknot/quic/tls_common.c b/src/libknot/quic/tls_common.c new file mode 100644 index 0000000..d1647d8 --- /dev/null +++ b/src/libknot/quic/tls_common.c @@ -0,0 +1,472 @@ +/* Copyright (C) 2024 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +#include <fcntl.h> +#include <gnutls/crypto.h> +#include <gnutls/gnutls.h> +#include <gnutls/x509.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <time.h> +#include <unistd.h> + +#include "libknot/quic/tls_common.h" + +#include "contrib/atomic.h" +#include "contrib/sockaddr.h" +#include "contrib/string.h" +#include "libknot/attribute.h" +#include "libknot/error.h" + +typedef struct knot_creds { + knot_atomic_ptr_t cert_creds; // Current credentials. + gnutls_certificate_credentials_t cert_creds_prev; // Previous credentials (for pending connections). + gnutls_anti_replay_t tls_anti_replay; + gnutls_datum_t tls_ticket_key; + bool peer; + uint8_t peer_pin_len; + uint8_t peer_pin[]; +} knot_creds_t; + +static int tls_anti_replay_db_add_func(void *dbf, time_t exp_time, + const gnutls_datum_t *key, + const gnutls_datum_t *data) +{ + return 0; +} + +static void tls_session_ticket_key_free(gnutls_datum_t *ticket) +{ + memzero(ticket->data, ticket->size); + gnutls_free(ticket->data); +} + +static int self_key(gnutls_x509_privkey_t *privkey, const char *key_file) +{ + gnutls_datum_t data = { 0 }; + + int ret = gnutls_x509_privkey_init(privkey); + if (ret != GNUTLS_E_SUCCESS) { + return ret; + } + + int fd = open(key_file, O_RDONLY); + if (fd != -1) { + struct stat stat; + if (fstat(fd, &stat) != 0 || + (data.data = gnutls_malloc(stat.st_size)) == NULL || + read(fd, data.data, stat.st_size) != stat.st_size) { + ret = GNUTLS_E_KEYFILE_ERROR; + goto finish; + } + + data.size = stat.st_size; + ret = gnutls_x509_privkey_import_pkcs8(*privkey, &data, GNUTLS_X509_FMT_PEM, + NULL, GNUTLS_PKCS_PLAIN); + if (ret != GNUTLS_E_SUCCESS) { + goto finish; + } + } else { + ret = gnutls_x509_privkey_generate(*privkey, GNUTLS_PK_EDDSA_ED25519, + GNUTLS_CURVE_TO_BITS(GNUTLS_ECC_CURVE_ED25519), 0); + if (ret != GNUTLS_E_SUCCESS) { + goto finish; + } + + ret = gnutls_x509_privkey_export2_pkcs8(*privkey, GNUTLS_X509_FMT_PEM, NULL, + GNUTLS_PKCS_PLAIN, &data); + if (ret != GNUTLS_E_SUCCESS || + (fd = open(key_file, O_WRONLY | O_CREAT, 0600)) == -1 || + write(fd, data.data, data.size) != data.size) { + ret = GNUTLS_E_KEYFILE_ERROR; + goto finish; + } + } + +finish: + if (fd > -1) { + close(fd); + } + gnutls_free(data.data); + if (ret != GNUTLS_E_SUCCESS) { + gnutls_x509_privkey_deinit(*privkey); + *privkey = NULL; + } + return ret; +} + +static int self_signed_cert(gnutls_certificate_credentials_t tls_cert, + const char *key_file) +{ + gnutls_x509_privkey_t privkey = NULL; + gnutls_x509_crt_t cert = NULL; + + char *hostname = sockaddr_hostname(); + if (hostname == NULL) { + return GNUTLS_E_MEMORY_ERROR; + } + + int ret; + uint8_t serial[16]; + gnutls_rnd(GNUTLS_RND_NONCE, serial, sizeof(serial)); + // Clear the left-most bit to be a positive number (two's complement form). + serial[0] &= 0x7F; + +#define CHK(cmd) if ((ret = (cmd)) != GNUTLS_E_SUCCESS) { goto finish; } +#define NOW_DAYS(days) (time(NULL) + 24 * 3600 * (days)) + + CHK(self_key(&privkey, key_file)); + + CHK(gnutls_x509_crt_init(&cert)); + CHK(gnutls_x509_crt_set_version(cert, 3)); + CHK(gnutls_x509_crt_set_serial(cert, serial, sizeof(serial))); + CHK(gnutls_x509_crt_set_activation_time(cert, NOW_DAYS(-1))); + CHK(gnutls_x509_crt_set_expiration_time(cert, NOW_DAYS(10 * 365))); + CHK(gnutls_x509_crt_set_dn_by_oid(cert, GNUTLS_OID_X520_COMMON_NAME, 0, + hostname, strlen(hostname))); + CHK(gnutls_x509_crt_set_key(cert, privkey)); + CHK(gnutls_x509_crt_sign2(cert, cert, privkey, GNUTLS_DIG_SHA512, 0)); + + ret = gnutls_certificate_set_x509_key(tls_cert, &cert, 1, privkey); + +finish: + free(hostname); + gnutls_x509_crt_deinit(cert); + gnutls_x509_privkey_deinit(privkey); + + return ret; +} + +_public_ +struct knot_creds *knot_creds_init(const char *key_file, const char *cert_file) +{ + knot_creds_t *creds = calloc(1, sizeof(*creds)); + if (creds == NULL) { + return NULL; + } + + int ret = knot_creds_update(creds, key_file, cert_file); + if (ret != KNOT_EOK) { + goto fail; + } + + ret = gnutls_anti_replay_init(&creds->tls_anti_replay); + if (ret != GNUTLS_E_SUCCESS) { + goto fail; + } + gnutls_anti_replay_set_add_function(creds->tls_anti_replay, tls_anti_replay_db_add_func); + gnutls_anti_replay_set_ptr(creds->tls_anti_replay, NULL); + + ret = gnutls_session_ticket_key_generate(&creds->tls_ticket_key); + if (ret != GNUTLS_E_SUCCESS) { + goto fail; + } + + return creds; +fail: + knot_creds_free(creds); + return NULL; +} + +_public_ +struct knot_creds *knot_creds_init_peer(const struct knot_creds *local_creds, + const uint8_t *peer_pin, + uint8_t peer_pin_len) +{ + knot_creds_t *creds = calloc(1, sizeof(*creds) + peer_pin_len); + if (creds == NULL) { + return NULL; + } + + if (local_creds != NULL) { + creds->peer = true; + creds->cert_creds = ATOMIC_GET(local_creds->cert_creds); + } else { + gnutls_certificate_credentials_t new_creds; + int ret = gnutls_certificate_allocate_credentials(&new_creds); + if (ret != GNUTLS_E_SUCCESS) { + free(creds); + return NULL; + } + creds->cert_creds = new_creds; + } + + if (peer_pin_len > 0 && peer_pin != NULL) { + memcpy(creds->peer_pin, peer_pin, peer_pin_len); + creds->peer_pin_len = peer_pin_len; + } + + return creds; +} + +static int creds_cert(gnutls_certificate_credentials_t creds, + struct gnutls_x509_crt_int **cert) +{ + gnutls_x509_crt_t *certs; + unsigned cert_count; + int ret = gnutls_certificate_get_x509_crt(creds, 0, &certs, &cert_count); + if (ret == GNUTLS_E_SUCCESS) { + if (cert_count == 0) { + gnutls_x509_crt_deinit(*certs); + return KNOT_ENOENT; + } + *cert = *certs; + free(certs); + return KNOT_EOK; + } + return KNOT_ERROR; +} + +static int creds_changed(gnutls_certificate_credentials_t creds, + gnutls_certificate_credentials_t prev, + bool self_cert, bool *changed) +{ + if (creds == NULL || prev == NULL) { + *changed = true; + return KNOT_EOK; + } + + gnutls_x509_crt_t cert = NULL, cert_prev = NULL; + + int ret = creds_cert(creds, &cert); + if (ret != KNOT_EOK) { + goto failed; + } + ret = creds_cert(prev, &cert_prev); + if (ret != KNOT_EOK) { + goto failed; + } + + if (self_cert) { + uint8_t pin[KNOT_TLS_PIN_LEN], pin_prev[KNOT_TLS_PIN_LEN]; + size_t pin_size = sizeof(pin), pin_prev_size = sizeof(pin_prev); + + ret = gnutls_x509_crt_get_key_id(cert, GNUTLS_KEYID_USE_SHA256, + pin, &pin_size); + if (ret != KNOT_EOK) { + goto failed; + } + ret = gnutls_x509_crt_get_key_id(cert_prev, GNUTLS_KEYID_USE_SHA256, + pin_prev, &pin_prev_size); + if (ret != KNOT_EOK) { + goto failed; + } + + *changed = (pin_size != pin_prev_size) || + memcmp(pin, pin_prev, pin_size) != 0; + } else { + *changed = (gnutls_x509_crt_equals(cert, cert_prev) == 0); + } + + ret = KNOT_EOK; +failed: + gnutls_x509_crt_deinit(cert); + gnutls_x509_crt_deinit(cert_prev); + + return ret; +} + +_public_ +int knot_creds_update(struct knot_creds *creds, const char *key_file, const char *cert_file) +{ + if (creds == NULL || key_file == NULL) { + return KNOT_EINVAL; + } + + gnutls_certificate_credentials_t new_creds; + int ret = gnutls_certificate_allocate_credentials(&new_creds); + if (ret != GNUTLS_E_SUCCESS) { + return KNOT_ENOMEM; + } + + if (cert_file != NULL) { + ret = gnutls_certificate_set_x509_key_file(new_creds, + cert_file, key_file, + GNUTLS_X509_FMT_PEM); + } else { + ret = self_signed_cert(new_creds, key_file); + } + if (ret != GNUTLS_E_SUCCESS) { + gnutls_certificate_free_credentials(new_creds); + return KNOT_EFILE; + } + + bool changed = false; + ret = creds_changed(new_creds, ATOMIC_GET(creds->cert_creds), + cert_file == NULL, &changed); + if (ret != KNOT_EOK) { + gnutls_certificate_free_credentials(new_creds); + return ret; + } + + if (changed) { + if (creds->cert_creds_prev != NULL) { + gnutls_certificate_free_credentials(creds->cert_creds_prev); + } + creds->cert_creds_prev = ATOMIC_XCHG(creds->cert_creds, new_creds); + } else { + gnutls_certificate_free_credentials(new_creds); + } + + return KNOT_EOK; +} + +_public_ +int knot_creds_cert(struct knot_creds *creds, struct gnutls_x509_crt_int **cert) +{ + if (creds == NULL || cert == NULL) { + return KNOT_EINVAL; + } + + return creds_cert(ATOMIC_GET(creds->cert_creds), cert); +} + +_public_ +void knot_creds_free(struct knot_creds *creds) +{ + if (creds == NULL) { + return; + } + + if (!creds->peer && creds->cert_creds != NULL) { + gnutls_certificate_free_credentials(creds->cert_creds); + if (creds->cert_creds_prev != NULL) { + gnutls_certificate_free_credentials(creds->cert_creds_prev); + } + } + gnutls_anti_replay_deinit(creds->tls_anti_replay); + if (creds->tls_ticket_key.data != NULL) { + tls_session_ticket_key_free(&creds->tls_ticket_key); + } + free(creds); +} + +_public_ +int knot_tls_session(struct gnutls_session_int **session, + struct knot_creds *creds, + struct gnutls_priority_st *priority, + const char *alpn, + bool early_data, + bool server) +{ + if (session == NULL || creds == NULL || priority == NULL || alpn == NULL) { + return KNOT_EINVAL; + } + + gnutls_init_flags_t flags = GNUTLS_NO_SIGNAL; + if (early_data) { + flags |= GNUTLS_ENABLE_EARLY_DATA; +#ifdef ENABLE_QUIC // Next flags aren't available in older GnuTLS versions. + flags |= GNUTLS_NO_AUTO_SEND_TICKET | GNUTLS_NO_END_OF_EARLY_DATA; +#endif + } + + int ret = gnutls_init(session, (server ? GNUTLS_SERVER : GNUTLS_CLIENT) | flags); + if (ret == GNUTLS_E_SUCCESS) { + gnutls_certificate_send_x509_rdn_sequence(*session, 1); + gnutls_certificate_server_set_request(*session, GNUTLS_CERT_REQUEST); + ret = gnutls_priority_set(*session, priority); + } + if (server && ret == GNUTLS_E_SUCCESS) { + ret = gnutls_session_ticket_enable_server(*session, &creds->tls_ticket_key); + } + if (ret == GNUTLS_E_SUCCESS) { + const gnutls_datum_t alpn_datum = { (void *)&alpn[1], alpn[0] }; + gnutls_alpn_set_protocols(*session, &alpn_datum, 1, GNUTLS_ALPN_MANDATORY); + if (early_data) { + gnutls_record_set_max_early_data_size(*session, 0xffffffffu); + } + if (server) { + gnutls_anti_replay_enable(*session, creds->tls_anti_replay); + } + ret = gnutls_credentials_set(*session, GNUTLS_CRD_CERTIFICATE, + ATOMIC_GET(creds->cert_creds)); + } + if (ret != GNUTLS_E_SUCCESS) { + gnutls_deinit(*session); + *session = NULL; + } + return ret == GNUTLS_E_SUCCESS ? KNOT_EOK : KNOT_ERROR; +} + +_public_ +void knot_tls_pin(struct gnutls_session_int *session, uint8_t *pin, + size_t *pin_size, bool local) +{ + if (session == NULL) { + goto error; + } + + const gnutls_datum_t *data = NULL; + if (local) { + data = gnutls_certificate_get_ours(session); + } else { + unsigned count = 0; + data = gnutls_certificate_get_peers(session, &count); + if (count == 0) { + goto error; + } + } + if (data == NULL) { + goto error; + } + + gnutls_x509_crt_t cert; + int ret = gnutls_x509_crt_init(&cert); + if (ret != GNUTLS_E_SUCCESS) { + goto error; + } + + ret = gnutls_x509_crt_import(cert, data, GNUTLS_X509_FMT_DER); + if (ret != GNUTLS_E_SUCCESS) { + gnutls_x509_crt_deinit(cert); + goto error; + } + + ret = gnutls_x509_crt_get_key_id(cert, GNUTLS_KEYID_USE_SHA256, pin, pin_size); + if (ret != GNUTLS_E_SUCCESS) { + gnutls_x509_crt_deinit(cert); + goto error; + } + + gnutls_x509_crt_deinit(cert); + + return; +error: + if (pin_size != NULL) { + *pin_size = 0; + } +} + +_public_ +int knot_tls_pin_check(struct gnutls_session_int *session, + struct knot_creds *creds) +{ + if (creds->peer_pin_len == 0) { + return KNOT_EOK; + } + + uint8_t pin[KNOT_TLS_PIN_LEN]; + size_t pin_size = sizeof(pin); + knot_tls_pin(session, pin, &pin_size, false); + if (pin_size != creds->peer_pin_len || + const_time_memcmp(pin, creds->peer_pin, pin_size) != 0) { + return KNOT_EBADCERTKEY; + } + + return KNOT_EOK; +} diff --git a/src/libknot/quic/tls_common.h b/src/libknot/quic/tls_common.h new file mode 100644 index 0000000..934f256 --- /dev/null +++ b/src/libknot/quic/tls_common.h @@ -0,0 +1,134 @@ +/* Copyright (C) 2024 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +/*! + * \file + * + * \brief Credentials handling common to QUIC and TLS. + * + * \addtogroup quic + * @{ + */ + +#pragma once + +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> + +#define KNOT_TLS_PIN_LEN 32 +#define KNOT_TLS_PRIORITIES "-VERS-ALL:+VERS-TLS1.3:" \ + "-GROUP-ALL:+GROUP-X25519:+GROUP-SECP256R1:" \ + "+GROUP-SECP384R1:+GROUP-SECP521R1" + +struct gnutls_priority_st; +struct gnutls_session_int; +struct gnutls_x509_crt_int; +struct knot_creds; + +/*! + * \brief Init server TLS key and certificate for DoQ. + * + * \param key_file Key PEM file path/name. + * \param cert_file X509 certificate PEM file path/name (NULL if auto-generated). + * + * \return Initialized creds. + */ +struct knot_creds *knot_creds_init(const char *key_file, const char *cert_file); + +/*! + * \brief Init peer TLS key and certificate for DoQ. + * + * \param local_creds Local credentials if server. + * \param peer_pin Optional peer certificate pin to check. + * \param peer_pin_len Length of the peer pin. Set 0 if not specified. + * + * \return Initialized creds. + */ +struct knot_creds *knot_creds_init_peer(const struct knot_creds *local_creds, + const uint8_t *peer_pin, + uint8_t peer_pin_len); + +/*! + * \brief Load new server TLS key and certificate for DoQ. + * + * \param creds Server credentials where key/cert pair will be updated. + * \param key_file Key PEM file path/name. + * \param cert_file X509 certificate PEM file path/name (NULL if auto-generated). + * + * \return KNOT_E* + */ +int knot_creds_update(struct knot_creds *creds, const char *key_file, const char *cert_file); + +/*! + * \brief Gets the certificate from credentials. + * + * \param creds TLS credentials. + * \param cert Output certificate. + * + * \return KNOT_E* + */ +int knot_creds_cert(struct knot_creds *creds, struct gnutls_x509_crt_int **cert); + +/*! + * \brief Deinit server TLS certificate for DoQ. + */ +void knot_creds_free(struct knot_creds *creds); + +/*! + * \brief Initialize GnuTLS session with credentials, ALPN, etc. + * + * \param session Out: initialized GnuTLS session struct. + * \param creds Certificate credentials. + * \param priority Session priority configuration. + * \param alpn ALPN string, first byte is the string length. + * \param early_data Allow early data. + * \param server Should be server session (otherwise client). + * + * \return KNOT_E* + */ +int knot_tls_session(struct gnutls_session_int **session, + struct knot_creds *creds, + struct gnutls_priority_st *priority, + const char *alpn, + bool early_data, + bool server); + +/*! + * \brief Gets local or remote certificate pin. + * + * \note Zero output pin_size value means no certificate available or error. + * + * \param session TLS connection. + * \param pin Output certificate pin. + * \param pin_size Input size of the storage / output size of the stored pin. + * \param local Local or remote certificate indication. + */ +void knot_tls_pin(struct gnutls_session_int *session, uint8_t *pin, + size_t *pin_size, bool local); + +/*! + * \brief Checks remote certificate pin in the session against credentials. + * + * \param session TLS connection. + * \param creds TLS credentials. + * + * \return KNOT_EOK or KNOT_EBADCERTKEY + */ +int knot_tls_pin_check(struct gnutls_session_int *session, + struct knot_creds *creds); + +/*! @} */ diff --git a/src/libknot/rrset-dump.c b/src/libknot/rrset-dump.c index ea406ca..9fac99d 100644 --- a/src/libknot/rrset-dump.c +++ b/src/libknot/rrset-dump.c @@ -508,10 +508,46 @@ static void wire_len_data_encode_to_str(rrset_dump_params_t *p, } } -static void wire_data_omit(rrset_dump_params_t *p) +static void wire_data_omit(rrset_dump_params_t *p, + const size_t len_len, + const bool print_len) { CHECK_PRET + size_t in_len; + + // First len_len bytes are data length. + CHECK_INMAX(len_len) + + // Read data length. + switch (len_len) { + case 0: + in_len = p->in_max; + break; + case 2: + in_len = knot_wire_read_u16(p->in); + break; + default: + p->ret = -1; + return; + } + + // If required print data length. + if (print_len == true && len_len != 0) { + assert(len_len == 2); + wire_num16_to_str(p); + CHECK_PRET + + // If something follows, print one space character. + if (in_len > 0) { + dump_string(p, " "); + CHECK_PRET + } + } else { + p->in += len_len; + p->in_max -= len_len; + } + const char *omit_message = "[omitted]"; const size_t omlen = strlen(omit_message); @@ -527,8 +563,8 @@ static void wire_data_omit(rrset_dump_params_t *p) STRING_TERMINATION - p->in += p->in_max; - p->in_max = 0; + p->in += in_len; + p->in_max -= in_len; } static void wire_dnskey_to_tag(rrset_dump_params_t *p) @@ -1735,6 +1771,10 @@ static void dnskey_info(const uint8_t *rdata, #define DUMP_TYPE wire_type_to_str(p); CHECK_RET(p); #define DUMP_HEX wire_data_encode_to_str(p, &hex_encode, \ &hex_encode_alloc); CHECK_RET(p); +#define DUMP_OMIT wire_data_omit(p, 0, false); CHECK_RET(p); +#define DUMP_HEX_OMIT if (p->style->hide_crypto) { DUMP_OMIT; } \ + else if (p->style->wrap) { WRAP_INIT; DUMP_HEX; WRAP_END; } \ + else { DUMP_HEX; } #define DUMP_BASE64 wire_data_encode_to_str(p, &knot_base64_encode, \ &knot_base64_encode_alloc); CHECK_RET(p); #define DUMP_HASH wire_len_data_encode_to_str(p, &knot_base32hex_encode, \ @@ -1743,9 +1783,9 @@ static void dnskey_info(const uint8_t *rdata, 1, false, "-"); CHECK_RET(p); #define DUMP_TSIG_DGST wire_len_data_encode_to_str(p, &knot_base64_encode, \ 2, true, ""); CHECK_RET(p); +#define DUMP_TSIG_OMIT wire_data_omit(p, 2, true); CHECK_RET(p); #define DUMP_TSIG_DATA wire_len_data_encode_to_str(p, &num48_encode, \ 2, true, ""); CHECK_RET(p); -#define DUMP_OMIT wire_data_omit(p); CHECK_RET(p); #define DUMP_KEY_OMIT wire_dnskey_to_tag(p); CHECK_RET(p); #define DUMP_TEXT wire_text_to_str1(p, true, false); CHECK_RET(p); #define DUMP_LONG_TEXT wire_text_to_str(p, p->in_max, NULL, true, false); CHECK_RET(p); @@ -1923,16 +1963,17 @@ static int dump_naptr(DUMP_PARAMS) static int dump_cert(DUMP_PARAMS) { - if (p->style->wrap) { - DUMP_NUM16; DUMP_SPACE; - DUMP_NUM16; DUMP_SPACE; - DUMP_NUM8; DUMP_SPACE; WRAP_INIT; + DUMP_NUM16; DUMP_SPACE; + DUMP_NUM16; DUMP_SPACE; + DUMP_NUM8; DUMP_SPACE; + + if (p->style->hide_crypto) { + DUMP_OMIT; + } else if (p->style->wrap) { + WRAP_INIT; DUMP_BASE64; WRAP_END; } else { - DUMP_NUM16; DUMP_SPACE; - DUMP_NUM16; DUMP_SPACE; - DUMP_NUM8; DUMP_SPACE; DUMP_BASE64; } @@ -1982,34 +2023,19 @@ static int dump_apl(DUMP_PARAMS) static int dump_ds(DUMP_PARAMS) { - if (p->style->wrap) { - DUMP_NUM16; DUMP_SPACE; - DUMP_NUM8; DUMP_SPACE; - DUMP_NUM8; DUMP_SPACE; WRAP_INIT; - DUMP_HEX; - WRAP_END; - } else { - DUMP_NUM16; DUMP_SPACE; - DUMP_NUM8; DUMP_SPACE; - DUMP_NUM8; DUMP_SPACE; - DUMP_HEX; - } + DUMP_NUM16; DUMP_SPACE; + DUMP_NUM8; DUMP_SPACE; + DUMP_NUM8; DUMP_SPACE; + DUMP_HEX_OMIT; DUMP_END; } static int dump_sshfp(DUMP_PARAMS) { - if (p->style->wrap) { - DUMP_NUM8; DUMP_SPACE; - DUMP_NUM8; DUMP_SPACE; WRAP_INIT; - DUMP_HEX; - WRAP_END; - } else { - DUMP_NUM8; DUMP_SPACE; - DUMP_NUM8; DUMP_SPACE; - DUMP_HEX; - } + DUMP_NUM8; DUMP_SPACE; + DUMP_NUM8; DUMP_SPACE; + DUMP_HEX_OMIT; DUMP_END; } @@ -2067,7 +2093,9 @@ static int dump_nsec(DUMP_PARAMS) static int dump_dhcid(DUMP_PARAMS) { - if (p->style->wrap) { + if (p->style->hide_crypto) { + DUMP_OMIT; + } else if (p->style->wrap) { WRAP_INIT; DUMP_BASE64; WRAP_END; @@ -2112,18 +2140,10 @@ static int dump_nsec3param(DUMP_PARAMS) static int dump_tlsa(DUMP_PARAMS) { - if (p->style->wrap) { - DUMP_NUM8; DUMP_SPACE; - DUMP_NUM8; DUMP_SPACE; - DUMP_NUM8; DUMP_SPACE; WRAP_INIT; - DUMP_HEX; - WRAP_END; - } else { - DUMP_NUM8; DUMP_SPACE; - DUMP_NUM8; DUMP_SPACE; - DUMP_NUM8; DUMP_SPACE; - DUMP_HEX; - } + DUMP_NUM8; DUMP_SPACE; + DUMP_NUM8; DUMP_SPACE; + DUMP_NUM8; DUMP_SPACE; + DUMP_HEX_OMIT; DUMP_END; } @@ -2139,18 +2159,10 @@ static int dump_csync(DUMP_PARAMS) static int dump_zonemd(DUMP_PARAMS) { - if (p->style->wrap) { - DUMP_NUM32; DUMP_SPACE; - DUMP_NUM8; DUMP_SPACE; - DUMP_NUM8; DUMP_SPACE; WRAP_INIT; - DUMP_HEX; - WRAP_END; - } else { - DUMP_NUM32; DUMP_SPACE; - DUMP_NUM8; DUMP_SPACE; - DUMP_NUM8; DUMP_SPACE; - DUMP_HEX; - } + DUMP_NUM32; DUMP_SPACE; + DUMP_NUM8; DUMP_SPACE; + DUMP_NUM8; DUMP_SPACE; + DUMP_HEX_OMIT; DUMP_END; } @@ -2184,7 +2196,11 @@ static int dump_tsig(DUMP_PARAMS) DUMP_DNAME; DUMP_SPACE; DUMP_NUM48; DUMP_SPACE; DUMP_NUM16; DUMP_SPACE; WRAP_INIT; - DUMP_TSIG_DGST; WRAP_LINE; + if (p->style->hide_crypto) { + DUMP_TSIG_OMIT; WRAP_LINE; + } else { + DUMP_TSIG_DGST; WRAP_LINE; + } DUMP_NUM16; DUMP_SPACE; DUMP_TSIG_RCODE; DUMP_SPACE; DUMP_TSIG_DATA; @@ -2193,7 +2209,11 @@ static int dump_tsig(DUMP_PARAMS) DUMP_DNAME; DUMP_SPACE; DUMP_NUM48; DUMP_SPACE; DUMP_NUM16; DUMP_SPACE; - DUMP_TSIG_DGST; DUMP_SPACE; + if (p->style->hide_crypto) { + DUMP_TSIG_OMIT; DUMP_SPACE; + } else { + DUMP_TSIG_DGST; DUMP_SPACE; + } DUMP_NUM16; DUMP_SPACE; DUMP_TSIG_RCODE; DUMP_SPACE; DUMP_TSIG_DATA; diff --git a/src/libknot/rrtype/tsig.c b/src/libknot/rrtype/tsig.c index 83f8436..0002963 100644 --- a/src/libknot/rrtype/tsig.c +++ b/src/libknot/rrtype/tsig.c @@ -245,7 +245,11 @@ int knot_tsig_rdata_set_other_data(knot_rrset_t *tsig, uint16_t len, _public_ const knot_dname_t *knot_tsig_rdata_alg_name(const knot_rrset_t *tsig) { - return knot_rdataset_at(&tsig->rrs, 0)->data; + const knot_rdata_t *rr_data = knot_rdataset_at(&tsig->rrs, 0); + if (!rr_data) { + return NULL; + } + return rr_data->data; } _public_ diff --git a/src/libknot/version.h b/src/libknot/version.h index 38e0b74..8e21f49 100644 --- a/src/libknot/version.h +++ b/src/libknot/version.h @@ -17,8 +17,8 @@ #pragma once #define KNOT_VERSION_MAJOR 3 -#define KNOT_VERSION_MINOR 3 -#define KNOT_VERSION_PATCH 0x09 +#define KNOT_VERSION_MINOR 4 +#define KNOT_VERSION_PATCH 0x00 #define KNOT_VERSION_HEX ((KNOT_VERSION_MAJOR << 16) | \ (KNOT_VERSION_MINOR << 8) | \ diff --git a/src/libknot/xdp/Makefile.in b/src/libknot/xdp/Makefile.in index 8aa77ce..f065f3e 100644 --- a/src/libknot/xdp/Makefile.in +++ b/src/libknot/xdp/Makefile.in @@ -270,6 +270,8 @@ infodir = @infodir@ install_sh = @install_sh@ libbpf_CFLAGS = @libbpf_CFLAGS@ libbpf_LIBS = @libbpf_LIBS@ +libdbus_CFLAGS = @libdbus_CFLAGS@ +libdbus_LIBS = @libdbus_LIBS@ libdir = @libdir@ libdnssec_SONAME = @libdnssec_SONAME@ libdnssec_SOVERSION = @libdnssec_SOVERSION@ @@ -281,8 +283,6 @@ libfstrm_CFLAGS = @libfstrm_CFLAGS@ libfstrm_LIBS = @libfstrm_LIBS@ libidn2_CFLAGS = @libidn2_CFLAGS@ libidn2_LIBS = @libidn2_LIBS@ -libidn_CFLAGS = @libidn_CFLAGS@ -libidn_LIBS = @libidn_LIBS@ libknot_SONAME = @libknot_SONAME@ libknot_SOVERSION = @libknot_SOVERSION@ libknot_VERSION_INFO = @libknot_VERSION_INFO@ @@ -300,7 +300,6 @@ libprotobuf_c_CFLAGS = @libprotobuf_c_CFLAGS@ libprotobuf_c_LIBS = @libprotobuf_c_LIBS@ liburcu_CFLAGS = @liburcu_CFLAGS@ liburcu_LIBS = @liburcu_LIBS@ -liburcu_PKGCONFIG = @liburcu_PKGCONFIG@ libxdp_CFLAGS = @libxdp_CFLAGS@ libxdp_LIBS = @libxdp_LIBS@ libzscanner_SONAME = @libzscanner_SONAME@ diff --git a/src/libknot/xdp/bpf-user.h b/src/libknot/xdp/bpf-user.h index 37aac61..b76c9d6 100644 --- a/src/libknot/xdp/bpf-user.h +++ b/src/libknot/xdp/bpf-user.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2022 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> +/* Copyright (C) 2024 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -60,8 +60,10 @@ struct kxsk_umem { /*! The memory frames. */ struct umem_frame *frames; + /*! Size of RX and TX rings. */ + uint16_t ring_size; /*! The number of free frames (for TX). */ - uint32_t tx_free_count; + uint16_t tx_free_count; /*! Stack of indices of the free frames (for TX). */ uint16_t tx_free_indices[]; }; @@ -82,15 +84,15 @@ struct knot_xdp_socket { /*! If non-NULL, it's a mocked socket with this send function. */ int (*send_mock)(struct knot_xdp_socket *, const knot_xdp_msg_t[], uint32_t, uint32_t *); - /*! The kernel has to be woken up by a syscall indication. */ - bool kernel_needs_wakeup; - /*! The limit of frame size. */ unsigned frame_limit; /*! Mapping of interface indices to VLAN tags. */ uint16_t *vlan_map; uint16_t vlan_map_max; + + /*! Enabled preferred busy polling. */ + bool busy_poll; }; /*! diff --git a/src/libknot/xdp/tcp.c b/src/libknot/xdp/tcp.c index eae73a9..d219db9 100644 --- a/src/libknot/xdp/tcp.c +++ b/src/libknot/xdp/tcp.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2023 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> +/* Copyright (C) 2024 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -287,172 +287,162 @@ static void conn_update(knot_tcp_conn_t *conn, const knot_xdp_msg_t *msg) } _public_ -int knot_tcp_recv(knot_tcp_relay_t *relays, knot_xdp_msg_t msgs[], uint32_t msg_count, +int knot_tcp_recv(knot_tcp_relay_t *relay, knot_xdp_msg_t *msg, knot_tcp_table_t *tcp_table, knot_tcp_table_t *syn_table, knot_tcp_ignore_t ignore) { - if (msg_count == 0) { - return KNOT_EOK; - } - if (relays == NULL || msgs == NULL || tcp_table == NULL) { + if (relay == NULL || msg == NULL || tcp_table == NULL) { return KNOT_EINVAL; } - memset(relays, 0, msg_count * sizeof(*relays)); + memset(relay, 0, sizeof(*relay)); - knot_tcp_relay_t *relay = relays; int ret = KNOT_EOK; - for (knot_xdp_msg_t *msg = msgs; msg != msgs + msg_count && ret == KNOT_EOK; msg++) { - if (!(msg->flags & KNOT_XDP_MSG_TCP)) { - continue; - } + if (!(msg->flags & KNOT_XDP_MSG_TCP)) { + return KNOT_EOK; + } - uint64_t conn_hash = 0; - knot_tcp_conn_t **pconn = tcp_table_lookup(&msg->ip_from, &msg->ip_to, - &conn_hash, tcp_table); - knot_tcp_conn_t *conn = *pconn; - bool seq_ack_match = check_seq_ack(msg, conn); - if (seq_ack_match) { - assert(conn->mss != 0); - conn_update(conn, msg); - - rem_align_pointers(conn, tcp_table); - rem_node(tcp_conn_node(conn)); - add_tail(tcp_table_timeout(tcp_table), tcp_conn_node(conn)); - - if (msg->flags & KNOT_XDP_MSG_ACK) { - conn->acked = msg->ackno; - knot_tcp_outbufs_ack(&conn->outbufs, msg->ackno, &tcp_table->outbufs_total); - } + uint64_t conn_hash = 0; + knot_tcp_conn_t **pconn = tcp_table_lookup(&msg->ip_from, &msg->ip_to, + &conn_hash, tcp_table); + knot_tcp_conn_t *conn = *pconn; + bool seq_ack_match = check_seq_ack(msg, conn); + if (seq_ack_match) { + assert(conn->mss != 0); + conn_update(conn, msg); + + rem_align_pointers(conn, tcp_table); + rem_node(tcp_conn_node(conn)); + add_tail(tcp_table_timeout(tcp_table), tcp_conn_node(conn)); + + if (msg->flags & KNOT_XDP_MSG_ACK) { + conn->acked = msg->ackno; + knot_tcp_outbufs_ack(&conn->outbufs, msg->ackno, &tcp_table->outbufs_total); } + } - relay->msg = msg; - relay->conn = conn; + relay->msg = msg; + relay->conn = conn; - // process incoming data - if (seq_ack_match && (msg->flags & KNOT_XDP_MSG_ACK) && msg->payload.iov_len > 0) { - if (!(ignore & XDP_TCP_IGNORE_DATA_ACK)) { - relay->auto_answer = KNOT_XDP_MSG_ACK; - } - ret = knot_tcp_inbufs_upd(&conn->inbuf, msg->payload, false, - &relay->inbf, &tcp_table->inbufs_total); - if (ret != KNOT_EOK) { - break; - } - if (conn->inbuf.iov_len > 0 && tcp_table->next_ibuf == NULL) { - tcp_table->next_ibuf = conn; - } + // process incoming data + if (seq_ack_match && (msg->flags & KNOT_XDP_MSG_ACK) && msg->payload.iov_len > 0) { + if (!(ignore & XDP_TCP_IGNORE_DATA_ACK)) { + relay->auto_answer = KNOT_XDP_MSG_ACK; + } + ret = knot_tcp_inbufs_upd(&conn->inbuf, msg->payload, false, + &relay->inbf, &tcp_table->inbufs_total); + if (ret != KNOT_EOK) { + return ret; + } + if (conn->inbuf.iov_len > 0 && tcp_table->next_ibuf == NULL) { + tcp_table->next_ibuf = conn; } + } - // process TCP connection state - switch (msg->flags & (KNOT_XDP_MSG_SYN | KNOT_XDP_MSG_ACK | - KNOT_XDP_MSG_FIN | KNOT_XDP_MSG_RST)) { - case KNOT_XDP_MSG_SYN: - case (KNOT_XDP_MSG_SYN | KNOT_XDP_MSG_ACK): - if (conn == NULL) { - bool synack = (msg->flags & KNOT_XDP_MSG_ACK); - - knot_tcp_table_t *add_table = tcp_table; - if (syn_table != NULL) { - if (synack) { - break; // creating conn based on SYN+ACK is only for kxdpgun, disallow in knotd - } - add_table = syn_table; - if (*tcp_table_lookup(&msg->ip_from, &msg->ip_to, &conn_hash, syn_table) != NULL) { - break; - } - } + // process TCP connection state + switch (msg->flags & (KNOT_XDP_MSG_SYN | KNOT_XDP_MSG_ACK | + KNOT_XDP_MSG_FIN | KNOT_XDP_MSG_RST)) { + case KNOT_XDP_MSG_SYN: + case (KNOT_XDP_MSG_SYN | KNOT_XDP_MSG_ACK): + if (conn == NULL) { + bool synack = (msg->flags & KNOT_XDP_MSG_ACK); - ret = tcp_table_add(msg, conn_hash, add_table, &relay->conn); - if (ret == KNOT_EOK) { - relay->action = synack ? XDP_TCP_ESTABLISH : XDP_TCP_SYN; - if (!(ignore & XDP_TCP_IGNORE_ESTABLISH)) { - relay->auto_answer = synack ? KNOT_XDP_MSG_ACK : (KNOT_XDP_MSG_SYN | KNOT_XDP_MSG_ACK); - } - - conn = relay->conn; - conn->state = synack ? XDP_TCP_NORMAL: XDP_TCP_ESTABLISHING; - conn->mss = MAX(msg->mss, 536); // minimal MSS, most importantly not zero! - conn->window_scale = msg->win_scale; - conn_update(conn, msg); - if (!synack) { - conn->acked = dnssec_random_uint32_t(); - conn->ackno = conn->acked; - } + knot_tcp_table_t *add_table = tcp_table; + if (syn_table != NULL) { + if (synack) { + break; // creating conn based on SYN+ACK is only for kxdpgun, disallow in knotd + } + add_table = syn_table; + if (*tcp_table_lookup(&msg->ip_from, &msg->ip_to, &conn_hash, syn_table) != NULL) { + break; } - } else { - relay->auto_answer = KNOT_XDP_MSG_ACK; } - break; - case KNOT_XDP_MSG_ACK: - if (!seq_ack_match) { - if (syn_table != NULL && msg->payload.iov_len == 0 && conn == NULL && - (pconn = tcp_table_lookup(&msg->ip_from, &msg->ip_to, &conn_hash, syn_table)) != NULL && - (conn = *pconn) != NULL && check_seq_ack(msg, conn)) { - // move conn from syn_table to tcp_table - tcp_table_remove(pconn, syn_table); - tcp_table_insert(conn, conn_hash, tcp_table); - relay->conn = conn; - relay->action = XDP_TCP_ESTABLISH; - conn->state = XDP_TCP_NORMAL; - conn_update(conn, msg); + + ret = tcp_table_add(msg, conn_hash, add_table, &relay->conn); + if (ret == KNOT_EOK) { + relay->action = synack ? XDP_TCP_ESTABLISH : XDP_TCP_SYN; + if (!(ignore & XDP_TCP_IGNORE_ESTABLISH)) { + relay->auto_answer = synack ? KNOT_XDP_MSG_ACK : (KNOT_XDP_MSG_SYN | KNOT_XDP_MSG_ACK); } - } else { - switch (conn->state) { - case XDP_TCP_NORMAL: - case XDP_TCP_CLOSING1: // just a mess, ignore - break; - case XDP_TCP_ESTABLISHING: - conn->state = XDP_TCP_NORMAL; - relay->action = XDP_TCP_ESTABLISH; - break; - case XDP_TCP_CLOSING2: - if (msg->payload.iov_len == 0) { // otherwise ignore close - tcp_table_remove(pconn, tcp_table); - relay->answer = XDP_TCP_FREE; - } - break; + + conn = relay->conn; + conn->state = synack ? XDP_TCP_NORMAL: XDP_TCP_ESTABLISHING; + conn->mss = MAX(msg->mss, 536); // minimal MSS, most importantly not zero! + conn->window_scale = msg->win_scale; + conn_update(conn, msg); + if (!synack) { + conn->acked = dnssec_random_uint32_t(); + conn->ackno = conn->acked; } } - break; - case (KNOT_XDP_MSG_FIN | KNOT_XDP_MSG_ACK): - if (ignore & XDP_TCP_IGNORE_FIN) { - break; + } else { + relay->auto_answer = KNOT_XDP_MSG_ACK; + } + break; + case KNOT_XDP_MSG_ACK: + if (!seq_ack_match) { + if (syn_table != NULL && msg->payload.iov_len == 0 && conn == NULL && + (pconn = tcp_table_lookup(&msg->ip_from, &msg->ip_to, &conn_hash, syn_table)) != NULL && + (conn = *pconn) != NULL && check_seq_ack(msg, conn)) { + // move conn from syn_table to tcp_table + tcp_table_remove(pconn, syn_table); + tcp_table_insert(conn, conn_hash, tcp_table); + relay->conn = conn; + relay->action = XDP_TCP_ESTABLISH; + conn->state = XDP_TCP_NORMAL; + conn_update(conn, msg); } - if (!seq_ack_match) { - if (conn != NULL) { - relay->auto_answer = KNOT_XDP_MSG_RST; - relay->auto_seqno = msg->ackno; - } // else ignore. It would be better and possible, but no big value for the price of CPU. - } else { - if (conn->state == XDP_TCP_CLOSING1) { - relay->action = XDP_TCP_CLOSE; - relay->auto_answer = KNOT_XDP_MSG_ACK; - relay->answer = XDP_TCP_FREE; + } else { + switch (conn->state) { + case XDP_TCP_NORMAL: + case XDP_TCP_CLOSING1: // just a mess, ignore + break; + case XDP_TCP_ESTABLISHING: + conn->state = XDP_TCP_NORMAL; + relay->action = XDP_TCP_ESTABLISH; + break; + case XDP_TCP_CLOSING2: + if (msg->payload.iov_len == 0) { // otherwise ignore close tcp_table_remove(pconn, tcp_table); - } else if (msg->payload.iov_len == 0) { // otherwise ignore FIN - relay->action = XDP_TCP_CLOSE; - relay->auto_answer = KNOT_XDP_MSG_FIN | KNOT_XDP_MSG_ACK; - conn->state = XDP_TCP_CLOSING2; + relay->answer = XDP_TCP_FREE; } + break; } + } + break; + case (KNOT_XDP_MSG_FIN | KNOT_XDP_MSG_ACK): + if (ignore & XDP_TCP_IGNORE_FIN) { break; - case KNOT_XDP_MSG_RST: - if (conn != NULL && msg->seqno == conn->seqno) { - relay->action = XDP_TCP_RESET; - tcp_table_remove(pconn, tcp_table); - relay->answer = XDP_TCP_FREE; - } else if (conn != NULL) { + } + if (!seq_ack_match) { + if (conn != NULL) { + relay->auto_answer = KNOT_XDP_MSG_RST; + relay->auto_seqno = msg->ackno; + } // else ignore. It would be better and possible, but no big value for the price of CPU. + } else { + if (conn->state == XDP_TCP_CLOSING1) { + relay->action = XDP_TCP_CLOSE; relay->auto_answer = KNOT_XDP_MSG_ACK; + relay->answer = XDP_TCP_FREE; + tcp_table_remove(pconn, tcp_table); + } else if (msg->payload.iov_len == 0) { // otherwise ignore FIN + relay->action = XDP_TCP_CLOSE; + relay->auto_answer = KNOT_XDP_MSG_FIN | KNOT_XDP_MSG_ACK; + conn->state = XDP_TCP_CLOSING2; } - break; - default: - break; } - - if (!knot_tcp_relay_empty(relay)) { - relay++; + break; + case KNOT_XDP_MSG_RST: + if (conn != NULL && msg->seqno == conn->seqno) { + relay->action = XDP_TCP_RESET; + tcp_table_remove(pconn, tcp_table); + relay->answer = XDP_TCP_FREE; + } else if (conn != NULL) { + relay->auto_answer = KNOT_XDP_MSG_ACK; } + break; + default: + break; } return ret; diff --git a/src/libknot/xdp/tcp.h b/src/libknot/xdp/tcp.h index 09fe652..39a30fd 100644 --- a/src/libknot/xdp/tcp.h +++ b/src/libknot/xdp/tcp.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2023 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> +/* Copyright (C) 2024 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -148,18 +148,19 @@ knot_tcp_table_t *knot_tcp_table_new(size_t size, knot_tcp_table_t *secret_share void knot_tcp_table_free(knot_tcp_table_t *table); /*! - * \brief Process received packets, prepare automatic responses (e.g. ACK), pick incoming data. + * \brief Process received packet, prepare automatic response (e.g. ACK), pick incoming data. * - * \param relays Out: relays to be filled with message/connection details. - * \param msgs Packets received by knot_xdp_recv(). - * \param msg_count Number of received packets. + * \param relay Out: relay to be filled with message/connection details. + * \param msg Packet received by knot_xdp_recv(). * \param tcp_table Table of TCP connections. * \param syn_table Optional: extra table for handling partially established connections. * \param ignore Ignore specific TCP packets indication. * + * \note resulting relay might be knot_tcp_relay_empty() + * * \return KNOT_E* */ -int knot_tcp_recv(knot_tcp_relay_t *relays, knot_xdp_msg_t msgs[], uint32_t msg_count, +int knot_tcp_recv(knot_tcp_relay_t *relay, knot_xdp_msg_t *msg, knot_tcp_table_t *tcp_table, knot_tcp_table_t *syn_table, knot_tcp_ignore_t ignore); diff --git a/src/libknot/xdp/xdp.c b/src/libknot/xdp/xdp.c index 8286884..132f5c4 100644 --- a/src/libknot/xdp/xdp.c +++ b/src/libknot/xdp/xdp.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2023 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> +/* Copyright (C) 2024 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -38,63 +38,68 @@ #include "contrib/net.h" #define FRAME_SIZE 2048 - -#define FRAME_COUNT_TX 2048 -#define FRAME_COUNT_RX 2048 -#define FRAME_COUNT (FRAME_COUNT_TX + FRAME_COUNT_RX) - -#define RING_LEN_TX FRAME_COUNT_TX -#define RING_LEN_CQ FRAME_COUNT_TX -#define RING_LEN_RX FRAME_COUNT_RX -/* It's recommended that the FQ ring size >= HW RX ring size + AF_XDP RX ring size. */ -#define RING_LEN_FQ (2 * FRAME_COUNT_RX) - -#define ALLOC_RETRY_NUM 15 -#define ALLOC_RETRY_DELAY 20 // In nanoseconds. - -/* With recent compilers we statically check #defines for settings that - * get refused by AF_XDP drivers (in current versions, at least). */ -#if (__STDC_VERSION__ >= 201112L) -#define IS_POWER_OF_2(n) (((n) & (n - 1)) == 0) -_Static_assert((FRAME_SIZE == 4096 || FRAME_SIZE == 2048) - && IS_POWER_OF_2(RING_LEN_TX) && IS_POWER_OF_2(RING_LEN_RX) - && IS_POWER_OF_2(RING_LEN_CQ) && IS_POWER_OF_2(RING_LEN_FQ) - && FRAME_COUNT_TX <= (1 << 16) /* see tx_free_indices */ - , "Incorrect #define combination for AF_XDP."); -#endif +#define DEFAULT_RING_SIZE 2048 +#define RETRY_DELAY 20 // In nanoseconds. struct umem_frame { uint8_t bytes[FRAME_SIZE]; }; -static int configure_xsk_umem(struct kxsk_umem **out_umem, bool extra_frames) +static bool valid_config(const knot_xdp_config_t *config) +{ + if (FRAME_SIZE != 2048 && FRAME_SIZE != 4096) { + return false; + } + + if (config == NULL) { + return true; + } + + if ((config->ring_size & (config->ring_size - 1)) != 0) { + return false; + } + + return true; +} + +static uint32_t ring_size(const knot_xdp_config_t *config) +{ + return config != NULL ? config->ring_size : DEFAULT_RING_SIZE; +} + +static int configure_xsk_umem(struct kxsk_umem **out_umem, uint32_t ring_size) { /* Allocate memory and call driver to create the UMEM. */ struct kxsk_umem *umem = calloc(1, offsetof(struct kxsk_umem, tx_free_indices) - + sizeof(umem->tx_free_indices[0]) * FRAME_COUNT_TX); + + sizeof(umem->tx_free_indices[0]) * ring_size); if (umem == NULL) { return KNOT_ENOMEM; } + umem->ring_size = ring_size; - size_t frame_count = FRAME_COUNT + (extra_frames ? FRAME_COUNT_RX : 0); + /* It's recommended that the FQ ring size >= HW RX ring size + AF_XDP RX ring size. + * However, the performance is better if FQ size == AF_XDP RX size. */ + const uint32_t FQ_SIZE = umem->ring_size; + const uint32_t CQ_SIZE = umem->ring_size; + const uint32_t FRAMES = FQ_SIZE + CQ_SIZE; int ret = posix_memalign((void **)&umem->frames, getpagesize(), - FRAME_SIZE * frame_count); + FRAME_SIZE * FRAMES); if (ret != 0) { free(umem); return KNOT_ENOMEM; } - const struct xsk_umem_config config = { - .fill_size = RING_LEN_FQ, - .comp_size = RING_LEN_CQ, + const struct xsk_umem_config umem_config = { + .fill_size = FQ_SIZE, + .comp_size = CQ_SIZE, .frame_size = FRAME_SIZE, .frame_headroom = KNOT_XDP_PKT_ALIGNMENT, }; - ret = xsk_umem__create(&umem->umem, umem->frames, FRAME_SIZE * frame_count, - &umem->fq, &umem->cq, &config); + ret = xsk_umem__create(&umem->umem, umem->frames, FRAME_SIZE * FRAMES, + &umem->fq, &umem->cq, &umem_config); if (ret != KNOT_EOK) { free(umem->frames); free(umem); @@ -103,23 +108,23 @@ static int configure_xsk_umem(struct kxsk_umem **out_umem, bool extra_frames) *out_umem = umem; /* Designate the starting chunk of buffers for TX, and put them onto the stack. */ - umem->tx_free_count = FRAME_COUNT_TX; - for (uint32_t i = 0; i < FRAME_COUNT_TX; ++i) { + umem->tx_free_count = CQ_SIZE; + for (uint32_t i = 0; i < CQ_SIZE; ++i) { umem->tx_free_indices[i] = i; } /* Designate the rest of buffers for RX, and pass them to the driver. */ uint32_t idx = 0; - ret = xsk_ring_prod__reserve(&umem->fq, frame_count - FRAME_COUNT_TX, &idx); - if (ret != frame_count - FRAME_COUNT_TX) { + ret = xsk_ring_prod__reserve(&umem->fq, FQ_SIZE, &idx); + if (ret != FQ_SIZE) { assert(0); return KNOT_ERROR; } assert(idx == 0); - for (uint32_t i = FRAME_COUNT_TX; i < frame_count; ++i) { + for (uint32_t i = CQ_SIZE; i < CQ_SIZE + FQ_SIZE; ++i) { *xsk_ring_prod__fill_addr(&umem->fq, idx++) = i * FRAME_SIZE; } - xsk_ring_prod__submit(&umem->fq, frame_count - FRAME_COUNT_TX); + xsk_ring_prod__submit(&umem->fq, FQ_SIZE); return KNOT_EOK; } @@ -131,6 +136,33 @@ static void deconfigure_xsk_umem(struct kxsk_umem *umem) free(umem); } +static int enable_busypoll(int socket, unsigned timeout_us, unsigned budget) +{ +#if defined(SO_PREFER_BUSY_POLL) && defined(SO_BUSY_POLL_BUDGET) + int opt_val = 1; + if (setsockopt(socket, SOL_SOCKET, SO_PREFER_BUSY_POLL, + &opt_val, sizeof(opt_val)) != 0) { + return knot_map_errno(); + } + + opt_val = timeout_us; + if (setsockopt(socket, SOL_SOCKET, SO_BUSY_POLL, + &opt_val, sizeof(opt_val)) != 0) { + return knot_map_errno(); + } + + opt_val = budget; + if (setsockopt(socket, SOL_SOCKET, SO_BUSY_POLL_BUDGET, + &opt_val, sizeof(opt_val)) != 0) { + return knot_map_errno(); + } + + return KNOT_EOK; +#else + return KNOT_ENOTSUP; +#endif +} + static int configure_xsk_socket(struct kxsk_umem *umem, const struct kxsk_iface *iface, knot_xdp_socket_t **out_sock, @@ -143,14 +175,14 @@ static int configure_xsk_socket(struct kxsk_umem *umem, xsk_info->iface = iface; xsk_info->umem = umem; - uint16_t bind_flags = 0; + uint16_t bind_flags = XDP_USE_NEED_WAKEUP; if (config != NULL && config->force_copy) { bind_flags |= XDP_COPY; } const struct xsk_socket_config sock_conf = { - .tx_size = RING_LEN_TX, - .rx_size = RING_LEN_RX, + .tx_size = umem->ring_size, + .rx_size = umem->ring_size, .libbpf_flags = XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD, .bind_flags = bind_flags, }; @@ -163,6 +195,17 @@ static int configure_xsk_socket(struct kxsk_umem *umem, return ret; } + if (config != NULL && config->busy_poll_budget > 0) { + ret = enable_busypoll(xsk_socket__fd(xsk_info->xsk), + config->busy_poll_timeout, config->busy_poll_budget); + if (ret != KNOT_EOK) { + xsk_socket__delete(xsk_info->xsk); + free(xsk_info); + return ret; + } + xsk_info->busy_poll = true; + } + *out_sock = xsk_info; return KNOT_EOK; } @@ -172,7 +215,7 @@ int knot_xdp_init(knot_xdp_socket_t **socket, const char *if_name, int if_queue, knot_xdp_filter_flag_t flags, uint16_t udp_port, uint16_t quic_port, knot_xdp_load_bpf_t load_bpf, const knot_xdp_config_t *xdp_config) { - if (socket == NULL || if_name == NULL || + if (socket == NULL || if_name == NULL || !valid_config(xdp_config) || (udp_port == quic_port && (flags & KNOT_XDP_FILTER_UDP) && (flags & KNOT_XDP_FILTER_QUIC)) || (flags & (KNOT_XDP_FILTER_UDP | KNOT_XDP_FILTER_TCP | KNOT_XDP_FILTER_QUIC)) == 0) { return KNOT_EINVAL; @@ -187,7 +230,7 @@ int knot_xdp_init(knot_xdp_socket_t **socket, const char *if_name, int if_queue, /* Initialize shared packet_buffer for umem usage. */ struct kxsk_umem *umem = NULL; - ret = configure_xsk_umem(&umem, xdp_config->extra_frames); + ret = configure_xsk_umem(&umem, ring_size(xdp_config)); if (ret != KNOT_EOK) { kxsk_iface_free(iface); return ret; @@ -266,7 +309,7 @@ static void tx_free_relative(struct kxsk_umem *umem, uint64_t addr_relative) { /* The address may not point to *start* of buffer, but `/` solves that. */ uint64_t index = addr_relative / FRAME_SIZE; - assert(index < FRAME_COUNT); + assert(index < umem->ring_size); umem->tx_free_indices[umem->tx_free_count++] = index; } @@ -285,7 +328,7 @@ void knot_xdp_send_prepare(knot_xdp_socket_t *socket) if (completed == 0) { return; } - assert(umem->tx_free_count + completed <= FRAME_COUNT_TX); + assert(umem->tx_free_count + completed <= umem->ring_size); for (uint32_t i = 0; i < completed; ++i) { uint64_t addr_relative = *xsk_ring_cons__comp_addr(cq, idx++); @@ -301,12 +344,13 @@ static struct umem_frame *alloc_tx_frame(knot_xdp_socket_t *socket) return malloc(sizeof(struct umem_frame)); } - const struct timespec delay = { .tv_nsec = ALLOC_RETRY_DELAY }; struct kxsk_umem *umem = socket->umem; - for (int i = 0; unlikely(umem->tx_free_count == 0); i++) { - if (i == ALLOC_RETRY_NUM) { - return NULL; + const struct timespec delay = { .tv_nsec = RETRY_DELAY }; + while (unlikely(umem->tx_free_count == 0)) { + if (socket->busy_poll || xsk_ring_prod__needs_wakeup(&socket->tx)) { + (void)sendto(xsk_socket__fd(socket->xsk), NULL, 0, + MSG_DONTWAIT, NULL, 0); } nanosleep(&delay, NULL); knot_xdp_send_prepare(socket); @@ -381,9 +425,7 @@ int knot_xdp_send(knot_xdp_socket_t *socket, const knot_xdp_msg_t msgs[], } if (unlikely(socket->send_mock != NULL)) { int ret = socket->send_mock(socket, msgs, count, sent); - for (uint32_t i = 0; i < count; ++i) { - free_unsent(socket, &msgs[i]); - } + knot_xdp_send_free(socket, msgs, count); return ret; } @@ -393,12 +435,13 @@ int knot_xdp_send(knot_xdp_socket_t *socket, const knot_xdp_msg_t msgs[], * and the API doesn't allow "cancelling reservations". * Therefore we handle `socket->tx.cached_prod` by hand. */ - if (xsk_prod_nb_free(&socket->tx, count) < count) { - /* This situation was sometimes observed in the emulated XDP mode. */ - for (uint32_t i = 0; i < count; ++i) { - free_unsent(socket, &msgs[i]); + const struct timespec delay = { .tv_nsec = RETRY_DELAY }; + while (unlikely(xsk_prod_nb_free(&socket->tx, count) < count)) { + if (socket->busy_poll || xsk_ring_prod__needs_wakeup(&socket->tx)) { + (void)sendto(xsk_socket__fd(socket->xsk), NULL, 0, + MSG_DONTWAIT, NULL, 0); } - return KNOT_ENOBUFS; + nanosleep(&delay, NULL); } uint32_t idx = socket->tx.cached_prod; @@ -425,7 +468,6 @@ int knot_xdp_send(knot_xdp_socket_t *socket, const knot_xdp_msg_t msgs[], assert(*sent <= count); socket->tx.cached_prod = idx; xsk_ring_prod__submit(&socket->tx, *sent); - socket->kernel_needs_wakeup = true; return KNOT_EOK; } @@ -446,34 +488,19 @@ int knot_xdp_send_finish(knot_xdp_socket_t *socket) return KNOT_EINVAL; } - /* Trigger sending queued packets. */ - if (!socket->kernel_needs_wakeup) { + if (!socket->busy_poll && !xsk_ring_prod__needs_wakeup(&socket->tx)) { return KNOT_EOK; } int ret = sendto(xsk_socket__fd(socket->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0); - const bool is_ok = (ret >= 0); - // List of "safe" errors taken from - // https://github.com/torvalds/linux/blame/master/samples/bpf/xdpsock_user.c - const bool is_again = !is_ok && (errno == ENOBUFS || errno == EAGAIN - || errno == EBUSY || errno == ENETDOWN); - // Some of the !is_ok cases are a little unclear - what to do about the syscall, - // including how caller of _sendmsg_finish() should react. - if (is_ok || !is_again) { - socket->kernel_needs_wakeup = false; - } - if (is_again) { - return KNOT_EAGAIN; - } else if (is_ok) { + if (ret >= 0) { return KNOT_EOK; + } else if (errno == ENOBUFS || errno == EAGAIN || errno == EBUSY || + errno == ENETDOWN) { + return KNOT_EAGAIN; } else { return -errno; } - /* This syscall might be avoided with a newer kernel feature (>= 5.4): - https://www.kernel.org/doc/html/latest/networking/af_xdp.html#xdp-use-need-wakeup-bind-flag - Unfortunately it's not easy to continue supporting older kernels - when using this feature on newer ones. - */ } _public_ @@ -518,7 +545,7 @@ int knot_xdp_recv(knot_xdp_socket_t *socket, knot_xdp_msg_t msgs[], static uint8_t *msg_uframe_ptr(const knot_xdp_msg_t *msg) { - return NULL + ((msg->payload.iov_base - NULL) & ~(FRAME_SIZE - 1)); + return (uint8_t *)((uintptr_t)msg->payload.iov_base & ~(FRAME_SIZE - 1)); } _public_ @@ -529,30 +556,32 @@ void knot_xdp_recv_finish(knot_xdp_socket_t *socket, const knot_xdp_msg_t msgs[] return; } - const struct timespec delay = { .tv_nsec = ALLOC_RETRY_DELAY }; - struct kxsk_umem *const umem = socket->umem; struct xsk_ring_prod *const fq = &umem->fq; uint32_t idx = 0; - uint32_t reserved = xsk_ring_prod__reserve(fq, count, &idx); - for (int i = 0; unlikely(reserved < count); i++) { - if (i == ALLOC_RETRY_NUM) { - return; + const struct timespec delay = { .tv_nsec = RETRY_DELAY }; + while (unlikely(xsk_ring_prod__reserve(fq, count, &idx) != count)) { + if (socket->busy_poll || xsk_ring_prod__needs_wakeup(fq)) { + (void)recvfrom(xsk_socket__fd(socket->xsk), NULL, 0, + MSG_DONTWAIT, NULL, NULL); } nanosleep(&delay, NULL); - reserved = xsk_ring_prod__reserve(fq, count, &idx); } - for (uint32_t i = 0; i < reserved; ++i) { + for (uint32_t i = 0; i < count; ++i) { uint8_t *uframe_p = msg_uframe_ptr(&msgs[i]); uint64_t offset = uframe_p - umem->frames->bytes; *xsk_ring_prod__fill_addr(fq, idx++) = offset; } - xsk_ring_prod__submit(fq, reserved); + xsk_ring_prod__submit(fq, count); + // recvfrom() here slightly worsens the performance, poll is called later anyway. } +// The number of busy frames +#define RING_BUSY(ring) ((*(ring)->producer - *(ring)->consumer) & (ring)->mask) + _public_ void knot_xdp_socket_info(const knot_xdp_socket_t *socket, FILE *file) { @@ -560,10 +589,6 @@ void knot_xdp_socket_info(const knot_xdp_socket_t *socket, FILE *file) return; } - // The number of busy frames - #define RING_BUSY(ring) \ - ((*(ring)->producer - *(ring)->consumer) & (ring)->mask) - #define RING_PRINFO(name, ring) \ fprintf(file, "Ring %s: size %4d, busy %4d (prod %4d, cons %4d)\n", \ name, (unsigned)(ring)->size, \ @@ -571,11 +596,11 @@ void knot_xdp_socket_info(const knot_xdp_socket_t *socket, FILE *file) (unsigned)*(ring)->producer, (unsigned)*(ring)->consumer) const int rx_busyf = RING_BUSY(&socket->umem->fq) + RING_BUSY(&socket->rx); - fprintf(file, "\nLOST RX frames: %4d", (int)(FRAME_COUNT_RX - rx_busyf)); + fprintf(file, "\nLOST RX frames: %4d", (int)(socket->umem->ring_size - rx_busyf)); const int tx_busyf = RING_BUSY(&socket->umem->cq) + RING_BUSY(&socket->tx); const int tx_freef = socket->umem->tx_free_count; - fprintf(file, "\nLOST TX frames: %4d\n", (int)(FRAME_COUNT_TX - tx_busyf - tx_freef)); + fprintf(file, "\nLOST TX frames: %4d\n", (int)(socket->umem->ring_size - tx_busyf - tx_freef)); RING_PRINFO("FQ", &socket->umem->fq); RING_PRINFO("RX", &socket->rx); @@ -583,3 +608,39 @@ void knot_xdp_socket_info(const knot_xdp_socket_t *socket, FILE *file) RING_PRINFO("CQ", &socket->umem->cq); fprintf(file, "TX free frames: %4d\n", tx_freef); } + +_public_ +int knot_xdp_socket_stats(knot_xdp_socket_t *socket, knot_xdp_stats_t *stats) +{ + if (socket == NULL || stats == NULL) { + return KNOT_EINVAL; + } + + memset(stats, 0, sizeof(*stats)); + + stats->if_name = socket->iface->if_name; + stats->if_index = socket->iface->if_index; + stats->if_queue = socket->iface->if_queue; + + struct xdp_statistics xdp_stats; + socklen_t optlen = sizeof(xdp_stats); + + int fd = knot_xdp_socket_fd(socket); + int ret = getsockopt(fd, SOL_XDP, XDP_STATISTICS, &xdp_stats, &optlen); + if (ret != 0) { + return knot_map_errno(); + } else if (optlen != sizeof(xdp_stats)) { + return KNOT_EINVAL; + } + + size_t common_size = MIN(sizeof(xdp_stats), sizeof(stats->socket)); + memcpy(&stats->socket, &xdp_stats, common_size); + + stats->rings.tx_busy = socket->umem->ring_size - socket->umem->tx_free_count; + stats->rings.fq_fill = RING_BUSY(&socket->umem->fq); + stats->rings.rx_fill = RING_BUSY(&socket->rx); + stats->rings.tx_fill = RING_BUSY(&socket->tx); + stats->rings.cq_fill = RING_BUSY(&socket->umem->cq); + + return KNOT_EOK; +} diff --git a/src/libknot/xdp/xdp.h b/src/libknot/xdp/xdp.h index 6c8bb1e..5944d44 100644 --- a/src/libknot/xdp/xdp.h +++ b/src/libknot/xdp/xdp.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2023 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> +/* Copyright (C) 2024 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -53,14 +53,54 @@ typedef struct knot_xdp_socket knot_xdp_socket_t; /*! \brief Configuration of XDP socket. */ struct knot_xdp_config { - bool force_generic; /*!< Use generic XDP mode (avoid driver/hadrware implementation). */ - bool force_copy; /*!< Force copying packet data between kernel and user-space (avoid zero-copy). */ - bool extra_frames; /*!< Extra FQ frames. */ + uint16_t ring_size; /*!< Size of RX and TX rings (must be power of 2). */ + bool force_generic; /*!< Use generic XDP mode (avoid driver/hardware implementation). */ + bool force_copy; /*!< Force copying packet data between kernel and user-space (avoid zero-copy). */ + unsigned busy_poll_timeout; /*!< Preferred busy poll budget (0 means disabled). */ + unsigned busy_poll_budget; /*!< Preferred busy poll timeout (in microseconds) . */ }; /*! \brief Configuration of XDP socket. */ typedef struct knot_xdp_config knot_xdp_config_t; +/*! \brief Various statistics of an XDP socket (optimally kernel >=5.9). */ +typedef struct { + /*! Interface name. */ + const char *if_name; + /*! Interface name index (derived from ifname). */ + int if_index; + /*! Network card queue id. */ + unsigned if_queue; + /*! Counters (xdp_statistics) retrieved from the kernel via XDP_STATISTICS. */ + struct { + /*! Dropped for other reasons. */ + uint64_t rx_dropped; + /*! Dropped due to invalid descriptor. */ + uint64_t rx_invalid; + /*! Dropped due to invalid descriptor. */ + uint64_t tx_invalid; + /*! Dropped due to rx ring being full. */ + uint64_t rx_full; + /*! Failed to retrieve item from fill ring. */ + uint64_t fq_empty; + /*! Failed to retrieve item from tx ring. */ + uint64_t tx_empty; + } socket; + /*! States of rings of the XDP socket. */ + struct { + /*! Busy TX buffers. */ + uint16_t tx_busy; + /*! Free buffers to consume from FQ ring. */ + uint16_t fq_fill; + /*! Pending buffers in TX ring. */ + uint16_t rx_fill; + /*! Pending buffers in RX ring. */ + uint16_t tx_fill; + /*! Pending buffers in CQ ring. */ + uint16_t cq_fill; + } rings; +} knot_xdp_stats_t; + /*! * \brief Initialize XDP socket. * @@ -196,4 +236,14 @@ void knot_xdp_recv_finish(knot_xdp_socket_t *socket, const knot_xdp_msg_t msgs[] */ void knot_xdp_socket_info(const knot_xdp_socket_t *socket, FILE *file); +/*! + * \brief Gets various statistics of the XDP socket. + * + * \param socket XDP socket. + * \param stats Output structure. + * + * \return KNOT_E* + */ +int knot_xdp_socket_stats(knot_xdp_socket_t *socket, knot_xdp_stats_t *stats); + /*! @} */ diff --git a/src/libknot/yparser/ypschema.h b/src/libknot/yparser/ypschema.h index 57ced72..7fca93e 100644 --- a/src/libknot/yparser/ypschema.h +++ b/src/libknot/yparser/ypschema.h @@ -130,6 +130,8 @@ typedef union { int64_t dflt; /*! Possible unit type. */ yp_style_t unit; + /*! Alternative default value. */ + int64_t dflt_alt; } i; /*! Boolean variables. */ struct { diff --git a/src/libknot/yparser/yptrafo.c b/src/libknot/yparser/yptrafo.c index 60b3717..764a5d1 100644 --- a/src/libknot/yparser/yptrafo.c +++ b/src/libknot/yparser/yptrafo.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2023 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> +/* Copyright (C) 2024 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -31,25 +31,31 @@ #include "contrib/wire_ctx.h" enum { - UNIT_BYTE = 'B', - UNIT_KILO = 'K', - UNIT_MEGA = 'M', - UNIT_GIGA = 'G', - UNIT_SEC = 's', - UNIT_MIN = 'm', - UNIT_HOUR = 'h', - UNIT_DAY = 'd' + UNIT_BYTE = 'B', + UNIT_KILO = 'K', + UNIT_MEGA = 'M', + UNIT_GIGA = 'G', + UNIT_SEC = 's', + UNIT_MIN = 'm', + UNIT_HOUR = 'h', + UNIT_DAY = 'd', + UNIT_WEEK = 'w', + UNIT_MONTH = 'M', + UNIT_YEAR = 'y', }; enum { - MULTI_BYTE = 1, - MULTI_KILO = 1024, - MULTI_MEGA = 1024 * 1024, - MULTI_GIGA = 1024 * 1024 * 1024, - MULTI_SEC = 1, - MULTI_MIN = 60, - MULTI_HOUR = 3600, - MULTI_DAY = 24 * 3600 + MULTI_BYTE = 1, + MULTI_KILO = 1024, + MULTI_MEGA = 1024 * 1024, + MULTI_GIGA = 1024 * 1024 * 1024, + MULTI_SEC = 1, + MULTI_MIN = 60, + MULTI_HOUR = 3600, + MULTI_DAY = 24 * 3600, + MULTI_WEEK = MULTI_DAY * 7, + MULTI_MONTH = MULTI_DAY * 30, + MULTI_YEAR = MULTI_DAY * 365, }; static wire_ctx_t copy_in( @@ -186,6 +192,15 @@ static int remove_unit( case UNIT_DAY: multiplier = MULTI_DAY; break; + case UNIT_WEEK: + multiplier = MULTI_WEEK; + break; + case UNIT_MONTH: + multiplier = MULTI_MONTH; + break; + case UNIT_YEAR: + multiplier = MULTI_YEAR; + break; default: return KNOT_EINVAL; } @@ -295,9 +310,18 @@ static void add_unit( } else if (*number < MULTI_DAY) { multiplier = MULTI_HOUR; new_unit = UNIT_HOUR; - } else { + } else if (*number < MULTI_WEEK) { multiplier = MULTI_DAY; new_unit = UNIT_DAY; + } else if (*number < MULTI_MONTH) { + multiplier = MULTI_WEEK; + new_unit = UNIT_WEEK; + } else if (*number < MULTI_YEAR) { + multiplier = MULTI_MONTH; + new_unit = UNIT_MONTH; + } else { + multiplier = MULTI_YEAR; + new_unit = UNIT_YEAR; } } |