summaryrefslogtreecommitdiffstats
path: root/src/libknot/quic
diff options
context:
space:
mode:
Diffstat (limited to 'src/libknot/quic')
-rw-r--r--src/libknot/quic/quic.c1294
-rw-r--r--src/libknot/quic/quic.h213
-rw-r--r--src/libknot/quic/quic_conn.c577
-rw-r--r--src/libknot/quic/quic_conn.h326
4 files changed, 2410 insertions, 0 deletions
diff --git a/src/libknot/quic/quic.c b/src/libknot/quic/quic.c
new file mode 100644
index 0000000..5e447e7
--- /dev/null
+++ b/src/libknot/quic/quic.c
@@ -0,0 +1,1294 @@
+/* Copyright (C) 2023 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include <assert.h>
+#include <fcntl.h>
+#include <gnutls/gnutls.h>
+#include <gnutls/crypto.h>
+#include <gnutls/x509.h>
+#include <ngtcp2/ngtcp2.h>
+#include <ngtcp2/ngtcp2_crypto.h>
+#include <ngtcp2/ngtcp2_crypto_gnutls.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <time.h>
+
+#include "libknot/quic/quic.h"
+
+#include "contrib/macros.h"
+#include "contrib/sockaddr.h"
+#include "contrib/string.h"
+#include "contrib/ucw/lists.h"
+#include "libknot/endian.h"
+#include "libdnssec/error.h"
+#include "libdnssec/random.h"
+#include "libknot/attribute.h"
+#include "libknot/endian.h"
+#include "libknot/error.h"
+#include "libknot/wire.h"
+
+#define SERVER_DEFAULT_SCIDLEN 18
+
+#define QUIC_DEFAULT_VERSION "-VERS-ALL:+VERS-TLS1.3"
+#define QUIC_DEFAULT_GROUPS "-GROUP-ALL:+GROUP-X25519:+GROUP-SECP256R1:+GROUP-SECP384R1:+GROUP-SECP521R1"
+#define QUIC_PRIORITIES "%DISABLE_TLS13_COMPAT_MODE:NORMAL:"QUIC_DEFAULT_VERSION":"QUIC_DEFAULT_GROUPS
+
+#define QUIC_SEND_VERSION_NEGOTIATION NGTCP2_ERR_VERSION_NEGOTIATION
+#define QUIC_SEND_RETRY NGTCP2_ERR_RETRY
+#define QUIC_SEND_STATELESS_RESET (-NGTCP2_STATELESS_RESET_TOKENLEN)
+#define QUIC_SEND_CONN_CLOSE (-KNOT_QUIC_HANDLE_RET_CLOSE)
+#define QUIC_SEND_EXCESSIVE_LOAD (-KNOT_QUIC_ERR_EXCESSIVE_LOAD)
+
+#define TLS_CALLBACK_ERR (-1)
+
+const gnutls_datum_t doq_alpn = {
+ (unsigned char *)"doq", 3
+};
+
+typedef struct knot_quic_creds {
+ gnutls_certificate_credentials_t tls_cert;
+ gnutls_anti_replay_t tls_anti_replay;
+ gnutls_datum_t tls_ticket_key;
+ bool peer;
+ uint8_t peer_pin_len;
+ uint8_t peer_pin[];
+} knot_quic_creds_t;
+
+typedef struct knot_quic_session {
+ node_t n;
+ gnutls_datum_t tls_session;
+ size_t quic_params_len;
+ uint8_t quic_params[sizeof(ngtcp2_transport_params)];
+} knot_quic_session_t;
+
+static unsigned addr_len(const struct sockaddr_in6 *ss)
+{
+ return (ss->sin6_family == AF_INET6 ?
+ sizeof(struct sockaddr_in6) : sizeof(struct sockaddr_in));
+}
+
+_public_
+bool knot_quic_session_available(knot_quic_conn_t *conn)
+{
+ return conn != NULL && !(conn->flags & KNOT_QUIC_CONN_SESSION_TAKEN) &&
+ (gnutls_session_get_flags(conn->tls_session) & GNUTLS_SFLAGS_SESSION_TICKET);
+}
+
+_public_
+struct knot_quic_session *knot_quic_session_save(knot_quic_conn_t *conn)
+{
+ if (!knot_quic_session_available(conn)) {
+ return NULL;
+ }
+
+ knot_quic_session_t *session = malloc(sizeof(*session));
+ if (session == NULL) {
+ return NULL;
+ }
+
+ int ret = gnutls_session_get_data2(conn->tls_session, &session->tls_session);
+ if (ret != GNUTLS_E_SUCCESS) {
+ free(session);
+ return NULL;
+ }
+ conn->flags |= KNOT_QUIC_CONN_SESSION_TAKEN;
+
+ ngtcp2_ssize ret2 =
+ ngtcp2_conn_encode_0rtt_transport_params(conn->conn, session->quic_params,
+ sizeof(session->quic_params));
+ if (ret2 < 0) {
+ free(session);
+ return NULL;
+ }
+ session->quic_params_len = ret2;
+
+ return session;
+}
+
+_public_
+int knot_quic_session_load(knot_quic_conn_t *conn, struct knot_quic_session *session)
+{
+ if (session == NULL) {
+ return KNOT_EINVAL;
+ }
+
+ int ret = KNOT_EOK;
+ if (conn == NULL) { // Just cleanup the session.
+ goto session_free;
+ }
+
+ ret = gnutls_session_set_data(conn->tls_session, session->tls_session.data,
+ session->tls_session.size);
+ if (ret != GNUTLS_E_SUCCESS) {
+ ret = KNOT_ERROR;
+ goto session_free;
+ }
+
+ ret = ngtcp2_conn_decode_and_set_0rtt_transport_params(conn->conn, session->quic_params,
+ session->quic_params_len);
+ if (ret != 0) {
+ ret = KNOT_ERROR;
+ }
+
+session_free:
+ gnutls_free(session->tls_session.data);
+ free(session);
+ return ret;
+}
+
+static int tls_anti_replay_db_add_func(void *dbf, time_t exp_time,
+ const gnutls_datum_t *key,
+ const gnutls_datum_t *data)
+{
+ return 0;
+}
+
+static void tls_session_ticket_key_free(gnutls_datum_t *ticket)
+{
+ gnutls_memset(ticket->data, 0, ticket->size);
+ gnutls_free(ticket->data);
+}
+
+static int self_key(gnutls_x509_privkey_t *privkey, const char *key_file)
+{
+ gnutls_datum_t data = { 0 };
+
+ int ret = gnutls_x509_privkey_init(privkey);
+ if (ret != GNUTLS_E_SUCCESS) {
+ return ret;
+ }
+
+ int fd = open(key_file, O_RDONLY);
+ if (fd != -1) {
+ struct stat stat;
+ if (fstat(fd, &stat) != 0 ||
+ (data.data = gnutls_malloc(stat.st_size)) == NULL ||
+ read(fd, data.data, stat.st_size) != stat.st_size) {
+ ret = GNUTLS_E_KEYFILE_ERROR;
+ goto finish;
+ }
+
+ data.size = stat.st_size;
+ ret = gnutls_x509_privkey_import_pkcs8(*privkey, &data, GNUTLS_X509_FMT_PEM,
+ NULL, GNUTLS_PKCS_PLAIN);
+ if (ret != GNUTLS_E_SUCCESS) {
+ goto finish;
+ }
+ } else {
+ ret = gnutls_x509_privkey_generate(*privkey, GNUTLS_PK_EDDSA_ED25519,
+ GNUTLS_CURVE_TO_BITS(GNUTLS_ECC_CURVE_ED25519), 0);
+ if (ret != GNUTLS_E_SUCCESS) {
+ goto finish;
+ }
+
+ ret = gnutls_x509_privkey_export2_pkcs8(*privkey, GNUTLS_X509_FMT_PEM, NULL,
+ GNUTLS_PKCS_PLAIN, &data);
+ if (ret != GNUTLS_E_SUCCESS ||
+ (fd = open(key_file, O_WRONLY | O_CREAT, 0600)) == -1 ||
+ write(fd, data.data, data.size) != data.size) {
+ ret = GNUTLS_E_KEYFILE_ERROR;
+ goto finish;
+ }
+ }
+
+finish:
+ close(fd);
+ gnutls_free(data.data);
+ if (ret != GNUTLS_E_SUCCESS) {
+ gnutls_x509_privkey_deinit(*privkey);
+ *privkey = NULL;
+ }
+ return ret;
+}
+
+static int self_signed_cert(gnutls_certificate_credentials_t tls_cert,
+ const char *key_file)
+{
+ gnutls_x509_privkey_t privkey = NULL;
+ gnutls_x509_crt_t cert = NULL;
+
+ char *hostname = sockaddr_hostname();
+ if (hostname == NULL) {
+ return GNUTLS_E_MEMORY_ERROR;
+ }
+
+ int ret;
+ uint8_t serial[16];
+ gnutls_rnd(GNUTLS_RND_NONCE, serial, sizeof(serial));
+ // Clear the left-most bit to be a positive number (two's complement form).
+ serial[0] &= 0x7F;
+
+#define CHK(cmd) if ((ret = (cmd)) != GNUTLS_E_SUCCESS) { goto finish; }
+#define NOW_DAYS(days) (time(NULL) + 24 * 3600 * (days))
+
+ CHK(self_key(&privkey, key_file));
+
+ CHK(gnutls_x509_crt_init(&cert));
+ CHK(gnutls_x509_crt_set_version(cert, 3));
+ CHK(gnutls_x509_crt_set_serial(cert, serial, sizeof(serial)));
+ CHK(gnutls_x509_crt_set_activation_time(cert, NOW_DAYS(-1)));
+ CHK(gnutls_x509_crt_set_expiration_time(cert, NOW_DAYS(10 * 365)));
+ CHK(gnutls_x509_crt_set_dn_by_oid(cert, GNUTLS_OID_X520_COMMON_NAME, 0,
+ hostname, strlen(hostname)));
+ CHK(gnutls_x509_crt_set_key(cert, privkey));
+ CHK(gnutls_x509_crt_sign2(cert, cert, privkey, GNUTLS_DIG_SHA512, 0));
+
+ ret = gnutls_certificate_set_x509_key(tls_cert, &cert, 1, privkey);
+
+finish:
+ free(hostname);
+ gnutls_x509_crt_deinit(cert);
+ gnutls_x509_privkey_deinit(privkey);
+
+ return ret;
+}
+
+_public_
+struct knot_quic_creds *knot_quic_init_creds(const char *cert_file,
+ const char *key_file)
+{
+ knot_quic_creds_t *creds = calloc(1, sizeof(*creds));
+ if (creds == NULL) {
+ return NULL;
+ }
+
+ int ret = gnutls_certificate_allocate_credentials(&creds->tls_cert);
+ if (ret != GNUTLS_E_SUCCESS) {
+ goto fail;
+ }
+
+ ret = gnutls_anti_replay_init(&creds->tls_anti_replay);
+ if (ret != GNUTLS_E_SUCCESS) {
+ goto fail;
+ }
+ gnutls_anti_replay_set_add_function(creds->tls_anti_replay, tls_anti_replay_db_add_func);
+ gnutls_anti_replay_set_ptr(creds->tls_anti_replay, NULL);
+
+ if (cert_file != NULL) {
+ ret = gnutls_certificate_set_x509_key_file(creds->tls_cert,
+ cert_file, key_file,
+ GNUTLS_X509_FMT_PEM);
+ } else {
+ ret = self_signed_cert(creds->tls_cert, key_file);
+ }
+ if (ret != GNUTLS_E_SUCCESS) {
+ goto fail;
+ }
+
+ ret = gnutls_session_ticket_key_generate(&creds->tls_ticket_key);
+ if (ret != GNUTLS_E_SUCCESS) {
+ goto fail;
+ }
+
+ return creds;
+fail:
+ knot_quic_free_creds(creds);
+ return NULL;
+}
+
+_public_
+struct knot_quic_creds *knot_quic_init_creds_peer(const struct knot_quic_creds *local_creds,
+ const uint8_t *peer_pin,
+ uint8_t peer_pin_len)
+{
+ knot_quic_creds_t *creds = calloc(1, sizeof(*creds) + peer_pin_len);
+ if (creds == NULL) {
+ return NULL;
+ }
+
+ if (local_creds != NULL) {
+ creds->peer = true;
+ creds->tls_cert = local_creds->tls_cert;
+ } else {
+ int ret = gnutls_certificate_allocate_credentials(&creds->tls_cert);
+ if (ret != GNUTLS_E_SUCCESS) {
+ free(creds);
+ return NULL;
+ }
+ }
+
+ if (peer_pin_len > 0 && peer_pin != NULL) {
+ memcpy(creds->peer_pin, peer_pin, peer_pin_len);
+ creds->peer_pin_len = peer_pin_len;
+ }
+
+ return creds;
+}
+
+_public_
+int knot_quic_creds_cert(struct knot_quic_creds *creds, struct gnutls_x509_crt_int **cert)
+{
+ if (creds == NULL || cert == NULL) {
+ return KNOT_EINVAL;
+ }
+
+ gnutls_x509_crt_t *certs;
+ unsigned cert_count;
+ int ret = gnutls_certificate_get_x509_crt(creds->tls_cert, 0, &certs, &cert_count);
+ if (ret == GNUTLS_E_SUCCESS) {
+ if (cert_count == 0) {
+ gnutls_x509_crt_deinit(*certs);
+ return KNOT_ENOENT;
+ }
+ *cert = *certs;
+ free(certs);
+ }
+ return ret;
+}
+
+_public_
+void knot_quic_free_creds(struct knot_quic_creds *creds)
+{
+ if (creds == NULL) {
+ return;
+ }
+
+ if (!creds->peer && creds->tls_cert != NULL) {
+ gnutls_certificate_free_credentials(creds->tls_cert);
+ }
+ gnutls_anti_replay_deinit(creds->tls_anti_replay);
+ if (creds->tls_ticket_key.data != NULL) {
+ tls_session_ticket_key_free(&creds->tls_ticket_key);
+ }
+ free(creds);
+}
+
+static ngtcp2_conn *get_conn(ngtcp2_crypto_conn_ref *conn_ref)
+{
+ return ((knot_quic_conn_t *)conn_ref->user_data)->conn;
+}
+
+static int tls_init_conn_session(knot_quic_conn_t *conn, bool server)
+{
+ if (gnutls_init(&conn->tls_session, (server ? GNUTLS_SERVER : GNUTLS_CLIENT) |
+ GNUTLS_ENABLE_EARLY_DATA | GNUTLS_NO_AUTO_SEND_TICKET |
+ GNUTLS_NO_END_OF_EARLY_DATA) != GNUTLS_E_SUCCESS) {
+ return TLS_CALLBACK_ERR;
+ }
+
+ gnutls_certificate_send_x509_rdn_sequence(conn->tls_session, 1);
+ gnutls_certificate_server_set_request(conn->tls_session, GNUTLS_CERT_REQUEST);
+
+ if (gnutls_priority_set_direct(conn->tls_session, QUIC_PRIORITIES,
+ NULL) != GNUTLS_E_SUCCESS) {
+ return TLS_CALLBACK_ERR;
+ }
+
+ if (server && gnutls_session_ticket_enable_server(conn->tls_session,
+ &conn->quic_table->creds->tls_ticket_key) != GNUTLS_E_SUCCESS) {
+ return TLS_CALLBACK_ERR;
+ }
+
+ int ret = ngtcp2_crypto_gnutls_configure_server_session(conn->tls_session);
+ if (ret != 0) {
+ return TLS_CALLBACK_ERR;
+ }
+
+ gnutls_record_set_max_early_data_size(conn->tls_session, 0xffffffffu);
+
+ conn->conn_ref = (nc_conn_ref_placeholder_t) {
+ .get_conn = get_conn,
+ .user_data = conn
+ };
+
+ _Static_assert(sizeof(nc_conn_ref_placeholder_t) == sizeof(ngtcp2_crypto_conn_ref), "invalid placeholder for conn_ref");
+ gnutls_session_set_ptr(conn->tls_session, &conn->conn_ref);
+
+ if (server) {
+ gnutls_anti_replay_enable(conn->tls_session, conn->quic_table->creds->tls_anti_replay);
+
+ }
+ if (gnutls_credentials_set(conn->tls_session, GNUTLS_CRD_CERTIFICATE,
+ conn->quic_table->creds->tls_cert) != GNUTLS_E_SUCCESS) {
+ return TLS_CALLBACK_ERR;
+ }
+
+ gnutls_alpn_set_protocols(conn->tls_session, &doq_alpn, 1, GNUTLS_ALPN_MANDATORY);
+
+ ngtcp2_conn_set_tls_native_handle(conn->conn, conn->tls_session);
+
+ return KNOT_EOK;
+}
+
+static uint64_t get_timestamp(void)
+{
+ struct timespec ts;
+ if (clock_gettime(CLOCK_MONOTONIC, &ts) != 0) {
+ assert(0);
+ }
+
+ return (uint64_t)ts.tv_sec * NGTCP2_SECONDS + (uint64_t)ts.tv_nsec;
+}
+
+uint64_t quic_conn_get_timeout(knot_quic_conn_t *conn)
+{
+ return ngtcp2_conn_get_expiry(conn->conn);
+}
+
+bool quic_conn_timeout(knot_quic_conn_t *conn, uint64_t *now)
+{
+ if (*now == 0) {
+ *now = get_timestamp();
+ }
+ return *now > quic_conn_get_timeout(conn);
+}
+
+_public_
+int64_t knot_quic_conn_next_timeout(knot_quic_conn_t *conn)
+{
+ return (((int64_t)quic_conn_get_timeout(conn) - (int64_t)get_timestamp()) / 1000000L);
+}
+
+_public_
+int knot_quic_hanle_expiry(knot_quic_conn_t *conn)
+{
+ return ngtcp2_conn_handle_expiry(conn->conn, get_timestamp()) == NGTCP2_NO_ERROR ? KNOT_EOK : KNOT_ECONN;
+}
+
+_public_
+uint32_t knot_quic_conn_rtt(knot_quic_conn_t *conn)
+{
+ ngtcp2_conn_info info = { 0 };
+ ngtcp2_conn_get_conn_info(conn->conn, &info);
+ return info.smoothed_rtt / 1000; // nanosec --> usec
+}
+
+_public_
+uint16_t knot_quic_conn_local_port(knot_quic_conn_t *conn)
+{
+ const ngtcp2_path *path = ngtcp2_conn_get_path(conn->conn);
+ return ((const struct sockaddr_in6 *)path->local.addr)->sin6_port;
+}
+
+_public_
+void knot_quic_conn_pin(knot_quic_conn_t *conn, uint8_t *pin, size_t *pin_size, bool local)
+{
+ if (conn == NULL) {
+ goto error;
+ }
+
+ const gnutls_datum_t *data = NULL;
+ if (local) {
+ data = gnutls_certificate_get_ours(conn->tls_session);
+ } else {
+ unsigned count = 0;
+ data = gnutls_certificate_get_peers(conn->tls_session, &count);
+ if (count == 0) {
+ goto error;
+ }
+ }
+ if (data == NULL) {
+ goto error;
+ }
+
+ gnutls_x509_crt_t cert;
+ int ret = gnutls_x509_crt_init(&cert);
+ if (ret != GNUTLS_E_SUCCESS) {
+ goto error;
+ }
+
+ ret = gnutls_x509_crt_import(cert, data, GNUTLS_X509_FMT_DER);
+ if (ret != GNUTLS_E_SUCCESS) {
+ gnutls_x509_crt_deinit(cert);
+ goto error;
+ }
+
+ ret = gnutls_x509_crt_get_key_id(cert, GNUTLS_KEYID_USE_SHA256, pin, pin_size);
+ if (ret != GNUTLS_E_SUCCESS) {
+ gnutls_x509_crt_deinit(cert);
+ goto error;
+ }
+
+ gnutls_x509_crt_deinit(cert);
+
+ return;
+error:
+ if (pin_size != NULL) {
+ *pin_size = 0;
+ }
+}
+
+static void knot_quic_rand_cb(uint8_t *dest, size_t destlen, const ngtcp2_rand_ctx *rand_ctx)
+{
+ (void)rand_ctx;
+ dnssec_random_buffer(dest, destlen);
+}
+
+static void init_random_cid(ngtcp2_cid *cid, size_t len)
+{
+ if (len == 0) {
+ len = SERVER_DEFAULT_SCIDLEN;
+ }
+
+ if (dnssec_random_buffer(cid->data, len) != DNSSEC_EOK) {
+ cid->datalen = 0;
+ } else {
+ cid->datalen = len;
+ }
+}
+
+static bool init_unique_cid(ngtcp2_cid *cid, size_t len, knot_quic_table_t *table)
+{
+ do {
+ if (init_random_cid(cid, len), cid->datalen == 0) {
+ return false;
+ }
+ } while (quic_table_lookup(cid, table) != NULL);
+ return true;
+}
+
+static int get_new_connection_id(ngtcp2_conn *conn, ngtcp2_cid *cid,
+ uint8_t *token, size_t cidlen,
+ void *user_data)
+{
+ knot_quic_conn_t *ctx = (knot_quic_conn_t *)user_data;
+ assert(ctx->conn == conn);
+
+ if (!init_unique_cid(cid, cidlen, ctx->quic_table)) {
+ return NGTCP2_ERR_CALLBACK_FAILURE;
+ }
+
+ knot_quic_cid_t **addto = quic_table_insert(ctx, cid, ctx->quic_table);
+ (void)addto;
+
+ if (token != NULL &&
+ ngtcp2_crypto_generate_stateless_reset_token(
+ token, (uint8_t *)ctx->quic_table->hash_secret,
+ sizeof(ctx->quic_table->hash_secret), cid) != 0) {
+ return NGTCP2_ERR_CALLBACK_FAILURE;
+ }
+
+ return 0;
+}
+
+static int remove_connection_id(ngtcp2_conn *conn, const ngtcp2_cid *cid,
+ void *user_data)
+{
+ knot_quic_conn_t *ctx = (knot_quic_conn_t *)user_data;
+ assert(ctx->conn == conn);
+
+ knot_quic_cid_t **torem = quic_table_lookup2(cid, ctx->quic_table);
+ if (torem != NULL) {
+ assert((*torem)->conn == ctx);
+ quic_table_rem2(torem, ctx->quic_table);
+ }
+
+ return 0;
+}
+
+static int handshake_completed_cb(ngtcp2_conn *conn, void *user_data)
+{
+ knot_quic_conn_t *ctx = (knot_quic_conn_t *)user_data;
+ assert(ctx->conn == conn);
+
+ assert(!(ctx->flags & KNOT_QUIC_CONN_HANDSHAKE_DONE));
+ ctx->flags |= KNOT_QUIC_CONN_HANDSHAKE_DONE;
+
+ if (!ngtcp2_conn_is_server(conn)) {
+ knot_quic_creds_t *creds = ctx->quic_table->creds;
+ if (creds->peer_pin_len == 0) {
+ return 0;
+ }
+ uint8_t pin[KNOT_QUIC_PIN_LEN];
+ size_t pin_size = sizeof(pin);
+ knot_quic_conn_pin(ctx, pin, &pin_size, false);
+ if (pin_size != creds->peer_pin_len ||
+ const_time_memcmp(pin, creds->peer_pin, pin_size) != 0) {
+ return NGTCP2_ERR_CALLBACK_FAILURE;
+ }
+ return 0;
+ }
+
+ if (gnutls_session_ticket_send(ctx->tls_session, 1, 0) != GNUTLS_E_SUCCESS) {
+ return TLS_CALLBACK_ERR;
+ }
+
+ uint8_t token[NGTCP2_CRYPTO_MAX_REGULAR_TOKENLEN];
+ ngtcp2_path path = *ngtcp2_conn_get_path(ctx->conn);
+ uint64_t ts = get_timestamp();
+ ngtcp2_ssize tokenlen = ngtcp2_crypto_generate_regular_token(token,
+ (uint8_t *)ctx->quic_table->hash_secret,
+ sizeof(ctx->quic_table->hash_secret),
+ path.remote.addr, path.remote.addrlen, ts);
+ if (tokenlen < 0) {
+ return NGTCP2_ERR_CALLBACK_FAILURE;
+ }
+
+ if (ngtcp2_conn_submit_new_token(ctx->conn, token, tokenlen) != 0) {
+ return NGTCP2_ERR_CALLBACK_FAILURE;
+ }
+
+ return 0;
+}
+
+static int recv_stream_data(ngtcp2_conn *conn, uint32_t flags,
+ int64_t stream_id, uint64_t offset,
+ const uint8_t *data, size_t datalen,
+ void *user_data, void *stream_user_data)
+{
+ (void)(stream_user_data); // always NULL
+ (void)(offset); // QUIC shall ensure that data arrive in-order
+
+ knot_quic_conn_t *ctx = (knot_quic_conn_t *)user_data;
+ assert(ctx->conn == conn);
+
+ int ret = knot_quic_stream_recv_data(ctx, stream_id, data, datalen,
+ (flags & NGTCP2_STREAM_DATA_FLAG_FIN));
+
+ return ret == KNOT_EOK ? 0 : NGTCP2_ERR_CALLBACK_FAILURE;
+}
+
+static int acked_stream_data_offset_cb(ngtcp2_conn *conn, int64_t stream_id,
+ uint64_t offset, uint64_t datalen,
+ void *user_data, void *stream_user_data)
+{
+ knot_quic_conn_t *ctx = (knot_quic_conn_t *)user_data;
+
+ bool keep = !ngtcp2_conn_is_server(conn); // kxdpgun: await incomming reply after query sent&acked
+
+ knot_quic_stream_ack_data(ctx, stream_id, offset + datalen, keep);
+
+ return 0;
+}
+
+static int stream_closed(ngtcp2_conn *conn, uint32_t flags, int64_t stream_id,
+ uint64_t app_error_code, void *user_data, void *stream_user_data)
+{
+ knot_quic_conn_t *ctx = (knot_quic_conn_t *)user_data;
+ assert(ctx->conn == conn);
+
+ // NOTE possible error is stored in (flags & NGTCP2_STREAM_CLOSE_FLAG_APP_ERROR_CODE_SET)
+
+ bool keep = !ngtcp2_conn_is_server(conn); // kxdpgun: process incomming reply after recvd&closed
+ if (!keep) {
+ knot_quic_conn_stream_free(ctx, stream_id);
+ }
+ return 0;
+}
+
+static int recv_stateless_rst(ngtcp2_conn *conn, const ngtcp2_pkt_stateless_reset *sr,
+ void *user_data)
+{
+ // NOTE server can't receive stateless resets, only client
+
+ // ngtcp2 verified stateless reset token already
+ (void)(sr);
+
+ knot_quic_conn_t *ctx = (knot_quic_conn_t *)user_data;
+ assert(ctx->conn == conn);
+
+ knot_quic_table_rem(ctx, ctx->quic_table);
+ knot_quic_cleanup(&ctx, 1);
+
+ return 0;
+}
+
+static int recv_stream_rst(ngtcp2_conn *conn, int64_t stream_id, uint64_t final_size,
+ uint64_t app_error_code, void *user_data, void *stream_user_data)
+{
+ (void)final_size;
+ return stream_closed(conn, NGTCP2_STREAM_CLOSE_FLAG_APP_ERROR_CODE_SET,
+ stream_id, app_error_code, user_data, stream_user_data);
+}
+
+static void user_printf(void *user_data, const char *format, ...)
+{
+ knot_quic_conn_t *ctx = (knot_quic_conn_t *)user_data;
+ if (ctx->quic_table->log_cb != NULL) {
+ char buf[256];
+ va_list args;
+ va_start(args, format);
+ vsnprintf(buf, sizeof(buf), format, args);
+ va_end(args);
+ ctx->quic_table->log_cb(buf);
+ }
+}
+
+static void hex_encode(const uint8_t *in, const uint32_t in_len, char *out)
+{
+ static const char hex[] = "0123456789abcdef";
+
+ for (uint32_t i = 0; i < in_len; i++) {
+ out[2 * i] = hex[in[i] / 16];
+ out[2 * i + 1] = hex[in[i] % 16];
+ }
+}
+
+static void user_qlog(void *user_data, uint32_t flags, const void *data, size_t datalen)
+{
+ knot_quic_conn_t *ctx = (knot_quic_conn_t *)user_data;
+ if (ctx->quic_table->qlog_dir != NULL) {
+ if (ctx->qlog_fd < 0) {
+ const ngtcp2_cid *cid = ngtcp2_conn_get_client_initial_dcid(ctx->conn);
+ if (cid->datalen == 0) {
+ cid = ngtcp2_conn_get_dcid(ctx->conn);
+ }
+ unsigned qlog_dir_len = strlen(ctx->quic_table->qlog_dir);
+ unsigned qlog_name_len = qlog_dir_len + 2 * cid->datalen + 7;
+ char qlog_name[qlog_name_len];
+ memcpy(qlog_name, ctx->quic_table->qlog_dir, qlog_dir_len);
+ qlog_name[qlog_dir_len] = '/';
+ hex_encode(cid->data, cid->datalen, qlog_name + qlog_dir_len + 1);
+ memcpy(qlog_name + qlog_name_len - 6, ".qlog", 6);
+
+ ctx->qlog_fd = open(qlog_name, O_CREAT | O_WRONLY | O_APPEND, 0666);
+ }
+ if (ctx->qlog_fd >= 0) { // othewise silently skip
+ _unused_ ssize_t unused = write(ctx->qlog_fd, data, datalen);
+ if (flags & NGTCP2_QLOG_WRITE_FLAG_FIN) {
+ close(ctx->qlog_fd);
+ ctx->qlog_fd = -1;
+ }
+ }
+ }
+}
+
+static int conn_new(ngtcp2_conn **pconn, const ngtcp2_path *path, const ngtcp2_cid *scid,
+ const ngtcp2_cid *dcid, const ngtcp2_cid *odcid, uint32_t version,
+ uint64_t now, uint64_t idle_timeout_ns,
+ knot_quic_conn_t *qconn, bool server, bool retry_sent)
+{
+ knot_quic_table_t *qtable = qconn->quic_table;
+
+ // I. CALLBACKS
+ const ngtcp2_callbacks callbacks = {
+ ngtcp2_crypto_client_initial_cb,
+ ngtcp2_crypto_recv_client_initial_cb,
+ ngtcp2_crypto_recv_crypto_data_cb,
+ handshake_completed_cb,
+ NULL, // recv_version_negotiation not needed on server, nor kxdpgun
+ ngtcp2_crypto_encrypt_cb,
+ ngtcp2_crypto_decrypt_cb,
+ ngtcp2_crypto_hp_mask_cb,
+ recv_stream_data,
+ acked_stream_data_offset_cb,
+ NULL, // stream_opened
+ stream_closed,
+ recv_stateless_rst,
+ ngtcp2_crypto_recv_retry_cb,
+ NULL, // extend_max_streams_bidi
+ NULL, // extend_max_streams_uni
+ knot_quic_rand_cb,
+ get_new_connection_id,
+ remove_connection_id,
+ ngtcp2_crypto_update_key_cb,
+ NULL, // path_validation,
+ NULL, // select_preferred_addr
+ recv_stream_rst,
+ NULL, // extend_max_remote_streams_bidi, might be useful to some allocation optimizations?
+ NULL, // extend_max_remote_streams_uni
+ NULL, // extend_max_stream_data,
+ NULL, // dcid_status
+ NULL, // handshake_confirmed
+ NULL, // recv_new_token
+ ngtcp2_crypto_delete_crypto_aead_ctx_cb,
+ ngtcp2_crypto_delete_crypto_cipher_ctx_cb,
+ NULL, // recv_datagram
+ NULL, // ack_datagram
+ NULL, // lost_datagram
+ ngtcp2_crypto_get_path_challenge_data_cb,
+ NULL, // stream_stop_sending
+ ngtcp2_crypto_version_negotiation_cb,
+ NULL, // recv_rx_key
+ NULL // recv_tx_key
+ };
+
+ // II. SETTINGS
+ ngtcp2_settings settings;
+ ngtcp2_settings_default(&settings);
+ settings.initial_ts = now;
+ if (qtable->log_cb != NULL) {
+ settings.log_printf = user_printf;
+ }
+ if (qtable->qlog_dir != NULL) {
+ settings.qlog_write = user_qlog;
+ }
+ if (qtable->udp_payload_limit != 0) {
+ settings.max_tx_udp_payload_size = qtable->udp_payload_limit;
+ }
+
+ settings.handshake_timeout = idle_timeout_ns; // NOTE setting handshake timeout to idle_timeout for simplicity
+ settings.no_pmtud = true;
+
+ // III. PARAMS
+ ngtcp2_transport_params params;
+ ngtcp2_transport_params_default(&params);
+
+ params.disable_active_migration = true;
+ params.initial_max_streams_uni = 0;
+ params.initial_max_streams_bidi = 1024;
+ params.initial_max_stream_data_bidi_local = NGTCP2_MAX_VARINT;
+ params.initial_max_stream_data_bidi_remote = 102400;
+ params.initial_max_data = NGTCP2_MAX_VARINT;
+
+ params.max_idle_timeout = idle_timeout_ns;
+ // params.stateless_reset_token_present = 1;
+ // params.active_connection_id_limit = 7;
+ if (odcid != NULL) {
+ params.original_dcid = *odcid;
+ params.original_dcid_present = true;
+ }
+
+ if (retry_sent) {
+ assert(scid);
+ params.retry_scid_present = 1;
+ params.retry_scid = *scid;
+ }
+ if (dnssec_random_buffer(params.stateless_reset_token, NGTCP2_STATELESS_RESET_TOKENLEN) != DNSSEC_EOK) {
+ return KNOT_ERROR;
+ }
+
+ if (server) {
+ return ngtcp2_conn_server_new(pconn, dcid, scid, path, version, &callbacks,
+ &settings, &params, NULL, qconn);
+ } else {
+ return ngtcp2_conn_client_new(pconn, dcid, scid, path, version, &callbacks,
+ &settings, &params, NULL, qconn);
+ }
+}
+
+_public_
+int knot_quic_client(knot_quic_table_t *table, struct sockaddr_in6 *dest,
+ struct sockaddr_in6 *via, const char *server_name,
+ knot_quic_conn_t **out_conn)
+{
+ ngtcp2_cid scid = { 0 }, dcid = { 0 };
+ uint64_t now = get_timestamp();
+
+ if (table == NULL || dest == NULL || via == NULL || out_conn == NULL) {
+ return KNOT_EINVAL;
+ }
+
+ init_random_cid(&scid, 0);
+ init_random_cid(&dcid, 0);
+
+ knot_quic_conn_t *conn = quic_table_add(NULL, &dcid, table);
+ if (conn == NULL) {
+ return ENOMEM;
+ }
+
+ ngtcp2_path path;
+ path.remote.addr = (struct sockaddr *)dest;
+ path.remote.addrlen = addr_len((const struct sockaddr_in6 *)dest);
+ path.local.addr = (struct sockaddr *)via;
+ path.local.addrlen = addr_len((const struct sockaddr_in6 *)via);
+
+ int ret = conn_new(&conn->conn, &path, &dcid, &scid, NULL, NGTCP2_PROTO_VER_V1, now,
+ 5000000000L, conn, false, false);
+ if (ret == KNOT_EOK) {
+ ret = tls_init_conn_session(conn, false);
+ }
+ if (ret == KNOT_EOK && server_name != NULL) {
+ ret = gnutls_server_name_set(conn->tls_session, GNUTLS_NAME_DNS,
+ server_name, strlen(server_name));
+ }
+ if (ret != KNOT_EOK) {
+ knot_quic_table_rem(conn, table);
+ knot_quic_cleanup(&conn, 1);
+ return ret;
+ }
+
+ *out_conn = conn;
+ return KNOT_EOK;
+}
+
+_public_
+int knot_quic_handle(knot_quic_table_t *table, knot_quic_reply_t *reply,
+ uint64_t idle_timeout, knot_quic_conn_t **out_conn)
+{
+ *out_conn = NULL;
+ if (table == NULL || reply == NULL || out_conn == NULL) {
+ return KNOT_EINVAL;
+ }
+
+ ngtcp2_version_cid decoded_cids = { 0 };
+ ngtcp2_cid scid = { 0 }, dcid = { 0 }, odcid = { 0 };
+ uint64_t now = get_timestamp();
+ if (reply->in_payload->iov_len < 1) {
+ reply->handle_ret = KNOT_EOK;
+ return KNOT_EOK;
+ }
+ int ret = ngtcp2_pkt_decode_version_cid(&decoded_cids,
+ reply->in_payload->iov_base,
+ reply->in_payload->iov_len,
+ SERVER_DEFAULT_SCIDLEN);
+ if (ret == NGTCP2_ERR_VERSION_NEGOTIATION) {
+ ret = -QUIC_SEND_VERSION_NEGOTIATION;
+ goto finish;
+ } else if (ret != NGTCP2_NO_ERROR) {
+ goto finish;
+ }
+ ngtcp2_cid_init(&dcid, decoded_cids.dcid, decoded_cids.dcidlen);
+ ngtcp2_cid_init(&scid, decoded_cids.scid, decoded_cids.scidlen);
+
+ knot_quic_conn_t *conn = quic_table_lookup(&dcid, table);
+
+ if (decoded_cids.version == 0 /* short header */ && conn == NULL) {
+ ret = KNOT_EOK; // NOOP
+ goto finish;
+ }
+
+ ngtcp2_path path;
+ path.remote.addr = (struct sockaddr *)reply->ip_rem;
+ path.remote.addrlen = addr_len((struct sockaddr_in6 *)reply->ip_rem);
+ path.local.addr = (struct sockaddr *)reply->ip_loc;
+ path.local.addrlen = addr_len((struct sockaddr_in6 *)reply->ip_loc);
+
+ if (conn == NULL) {
+ // new conn
+
+ ngtcp2_pkt_hd header = { 0 };
+ ret = ngtcp2_accept(&header, reply->in_payload->iov_base,
+ reply->in_payload->iov_len);
+ if (ret == NGTCP2_ERR_RETRY) {
+ ret = -QUIC_SEND_RETRY;
+ goto finish;
+ } else if (ret != NGTCP2_NO_ERROR) { // discard packet
+ ret = KNOT_EOK;
+ goto finish;
+ }
+
+ assert(header.type == NGTCP2_PKT_INITIAL);
+ if (header.tokenlen == 0 && quic_require_retry(table)) {
+ ret = -QUIC_SEND_RETRY;
+ goto finish;
+ }
+
+ if (header.tokenlen > 0) {
+ ret = ngtcp2_crypto_verify_retry_token(
+ &odcid, header.token, header.tokenlen,
+ (const uint8_t *)table->hash_secret,
+ sizeof(table->hash_secret), header.version,
+ (const struct sockaddr *)reply->ip_rem,
+ addr_len((struct sockaddr_in6 *)reply->ip_rem),
+ &dcid, idle_timeout, now // NOTE setting retry token validity to idle_timeout for simplicity
+ );
+ if (ret != 0) {
+ ret = KNOT_EOK;
+ goto finish;
+ }
+ } else {
+ memcpy(&odcid, &dcid, sizeof(odcid));
+ }
+
+ // server chooses his CID to his liking
+ if (!init_unique_cid(&dcid, 0, table)) {
+ ret = KNOT_ERROR;
+ goto finish;
+ }
+
+ conn = quic_table_add(NULL, &dcid, table);
+ if (conn == NULL) {
+ ret = KNOT_ENOMEM;
+ goto finish;
+ }
+
+ ret = conn_new(&conn->conn, &path, &dcid, &scid, &odcid, decoded_cids.version,
+ now, idle_timeout, conn, true, header.tokenlen > 0);
+ if (ret >= 0) {
+ ret = tls_init_conn_session(conn, true);
+ }
+ if (ret < 0) {
+ knot_quic_table_rem(conn, table);
+ *out_conn = conn; // we need knot_quic_cleanup() by the caller afterwards
+ goto finish;
+ }
+ }
+
+ ngtcp2_pkt_info pi = { .ecn = reply->ecn, };
+
+ ret = ngtcp2_conn_read_pkt(conn->conn, &path, &pi, reply->in_payload->iov_base,
+ reply->in_payload->iov_len, now);
+
+ *out_conn = conn;
+ if (ret == NGTCP2_ERR_DRAINING) { // received CONNECTION_CLOSE from the counterpart
+ knot_quic_table_rem(conn, table);
+ ret = KNOT_EOK;
+ goto finish;
+ } else if (ngtcp2_err_is_fatal(ret)) { // connection doomed
+ if (ret == NGTCP2_ERR_CALLBACK_FAILURE) {
+ ret = KNOT_EBADCERTKEY;
+ } else {
+ ret = KNOT_ECONN;
+ }
+ knot_quic_table_rem(conn, table);
+ goto finish;
+ } else if (ret != NGTCP2_NO_ERROR) { // non-fatal error, discard packet
+ ret = KNOT_EOK;
+ goto finish;
+ }
+
+ quic_conn_mark_used(conn, table);
+
+ ret = KNOT_EOK;
+finish:
+ reply->handle_ret = ret;
+ return ret;
+}
+
+static bool stream_exists(knot_quic_conn_t *conn, int64_t stream_id)
+{
+ // TRICK, we never use stream_user_data
+ return (ngtcp2_conn_set_stream_user_data(conn->conn, stream_id, NULL) == NGTCP2_NO_ERROR);
+}
+
+static int send_stream(knot_quic_table_t *quic_table, knot_quic_reply_t *rpl,
+ knot_quic_conn_t *relay, int64_t stream_id,
+ uint8_t *data, size_t len, bool fin, ngtcp2_ssize *sent)
+{
+ (void)quic_table;
+ assert(stream_id >= 0 || (data == NULL && len == 0));
+
+ while (stream_id >= 0 && !stream_exists(relay, stream_id)) {
+ int64_t opened = 0;
+ int ret = ngtcp2_conn_open_bidi_stream(relay->conn, &opened, NULL);
+ if (ret != KNOT_EOK) {
+ return ret;
+ }
+ assert((bool)(opened == stream_id) == stream_exists(relay, stream_id));
+ }
+
+ int ret = rpl->alloc_reply(rpl);
+ if (ret != KNOT_EOK) {
+ return ret;
+ }
+
+ uint32_t fl = ((stream_id >= 0 && fin) ? NGTCP2_WRITE_STREAM_FLAG_FIN :
+ NGTCP2_WRITE_STREAM_FLAG_NONE);
+ ngtcp2_vec vec = { .base = data, .len = len };
+ ngtcp2_pkt_info pi = { 0 };
+
+ struct sockaddr_storage path_loc = { 0 }, path_rem = { 0 };
+ ngtcp2_path path = { .local = { .addr = (struct sockaddr *)&path_loc, .addrlen = sizeof(path_loc) },
+ .remote = { .addr = (struct sockaddr *)&path_rem, .addrlen = sizeof(path_rem) },
+ .user_data = NULL };
+ bool find_path = (rpl->ip_rem == NULL);
+ assert(find_path == (bool)(rpl->ip_loc == NULL));
+
+ ret = ngtcp2_conn_writev_stream(relay->conn, find_path ? &path : NULL, &pi,
+ rpl->out_payload->iov_base, rpl->out_payload->iov_len,
+ sent, fl, stream_id, &vec,
+ (stream_id >= 0 ? 1 : 0), get_timestamp());
+ if (ret <= 0) {
+ rpl->free_reply(rpl);
+ return ret;
+ }
+ if (*sent < 0) {
+ *sent = 0;
+ }
+
+ rpl->out_payload->iov_len = ret;
+ rpl->ecn = pi.ecn;
+ if (find_path) {
+ rpl->ip_loc = &path_loc;
+ rpl->ip_rem = &path_rem;
+ }
+ ret = rpl->send_reply(rpl);
+ if (find_path) {
+ rpl->ip_loc = NULL;
+ rpl->ip_rem = NULL;
+ }
+ if (ret == KNOT_EOK) {
+ return 1;
+ }
+ return ret;
+}
+
+static int send_special(knot_quic_table_t *quic_table, knot_quic_reply_t *rpl,
+ knot_quic_conn_t *relay /* only for connection close */)
+{
+ int ret = rpl->alloc_reply(rpl);
+ if (ret != KNOT_EOK) {
+ return ret;
+ }
+
+ uint64_t now = get_timestamp();
+ ngtcp2_version_cid decoded_cids = { 0 };
+ ngtcp2_cid scid = { 0 }, dcid = { 0 };
+ int dvc_ret = NGTCP2_ERR_FATAL;
+
+ if ((rpl->handle_ret == -QUIC_SEND_VERSION_NEGOTIATION ||
+ rpl->handle_ret == -QUIC_SEND_RETRY) &&
+ rpl->in_payload != NULL && rpl->in_payload->iov_len > 0) {
+ dvc_ret = ngtcp2_pkt_decode_version_cid(
+ &decoded_cids, rpl->in_payload->iov_base,
+ rpl->in_payload->iov_len, SERVER_DEFAULT_SCIDLEN);
+ }
+
+ uint8_t rnd = 0;
+ dnssec_random_buffer(&rnd, sizeof(rnd));
+ uint32_t supported_quic[1] = { NGTCP2_PROTO_VER_V1 };
+ ngtcp2_cid new_dcid;
+ uint8_t retry_token[NGTCP2_CRYPTO_MAX_RETRY_TOKENLEN];
+ uint8_t stateless_reset_token[NGTCP2_STATELESS_RESET_TOKENLEN];
+ uint8_t sreset_rand[NGTCP2_MIN_STATELESS_RESET_RANDLEN];
+ dnssec_random_buffer(sreset_rand, sizeof(sreset_rand));
+ ngtcp2_ccerr ccerr;
+ ngtcp2_ccerr_default(&ccerr);
+ ngtcp2_pkt_info pi = { 0 };
+
+ struct sockaddr_storage path_loc = { 0 }, path_rem = { 0 };
+ ngtcp2_path path = { .local = { .addr = (struct sockaddr *)&path_loc, .addrlen = sizeof(path_loc) },
+ .remote = { .addr = (struct sockaddr *)&path_rem, .addrlen = sizeof(path_rem) },
+ .user_data = NULL };
+ bool find_path = (rpl->ip_rem == NULL);
+ assert(find_path == (bool)(rpl->ip_loc == NULL));
+ assert(!find_path || rpl->handle_ret == -QUIC_SEND_EXCESSIVE_LOAD);
+
+ switch (rpl->handle_ret) {
+ case -QUIC_SEND_VERSION_NEGOTIATION:
+ if (dvc_ret != NGTCP2_ERR_VERSION_NEGOTIATION) {
+ rpl->free_reply(rpl);
+ return KNOT_ERROR;
+ }
+ ret = ngtcp2_pkt_write_version_negotiation(
+ rpl->out_payload->iov_base, rpl->out_payload->iov_len,
+ rnd, decoded_cids.scid, decoded_cids.scidlen, decoded_cids.dcid,
+ decoded_cids.dcidlen, supported_quic,
+ sizeof(supported_quic) / sizeof(*supported_quic)
+ );
+ break;
+ case -QUIC_SEND_RETRY:
+ ngtcp2_cid_init(&dcid, decoded_cids.dcid, decoded_cids.dcidlen);
+ ngtcp2_cid_init(&scid, decoded_cids.scid, decoded_cids.scidlen);
+
+ init_random_cid(&new_dcid, 0);
+
+ ret = ngtcp2_crypto_generate_retry_token(
+ retry_token, (const uint8_t *)quic_table->hash_secret,
+ sizeof(quic_table->hash_secret), decoded_cids.version,
+ (const struct sockaddr *)rpl->ip_rem, sockaddr_len(rpl->ip_rem),
+ &new_dcid, &dcid, now
+ );
+
+ if (ret >= 0) {
+ ret = ngtcp2_crypto_write_retry(
+ rpl->out_payload->iov_base, rpl->out_payload->iov_len,
+ decoded_cids.version, &scid, &new_dcid, &dcid,
+ retry_token, ret
+ );
+ }
+ break;
+ case -QUIC_SEND_STATELESS_RESET:
+ ret = ngtcp2_pkt_write_stateless_reset(
+ rpl->out_payload->iov_base, rpl->out_payload->iov_len,
+ stateless_reset_token, sreset_rand, sizeof(sreset_rand)
+ );
+ break;
+ case -QUIC_SEND_CONN_CLOSE:
+ ret = ngtcp2_conn_write_connection_close(
+ relay->conn, NULL, &pi, rpl->out_payload->iov_base,
+ rpl->out_payload->iov_len, &ccerr, now
+ );
+ break;
+ case -QUIC_SEND_EXCESSIVE_LOAD:
+ ccerr.type = NGTCP2_CCERR_TYPE_APPLICATION;
+ ccerr.error_code = KNOT_QUIC_ERR_EXCESSIVE_LOAD;
+ ret = ngtcp2_conn_write_connection_close(
+ relay->conn, find_path ? &path : NULL, &pi, rpl->out_payload->iov_base,
+ rpl->out_payload->iov_len, &ccerr, now
+ );
+ break;
+ default:
+ ret = KNOT_EINVAL;
+ break;
+ }
+
+ if (ret < 0) {
+ rpl->free_reply(rpl);
+ } else {
+ rpl->out_payload->iov_len = ret;
+ rpl->ecn = pi.ecn;
+ if (find_path) {
+ rpl->ip_loc = &path_loc;
+ rpl->ip_rem = &path_rem;
+ }
+ ret = rpl->send_reply(rpl);
+ if (find_path) {
+ rpl->ip_loc = NULL;
+ rpl->ip_rem = NULL;
+ }
+ }
+ return ret;
+}
+
+_public_
+int knot_quic_send(knot_quic_table_t *quic_table, knot_quic_conn_t *conn,
+ knot_quic_reply_t *reply, unsigned max_msgs,
+ knot_quic_send_flag_t flags)
+{
+ if (quic_table == NULL || conn == NULL || reply == NULL) {
+ return KNOT_EINVAL;
+ } else if (reply->handle_ret < 0) {
+ return reply->handle_ret;
+ } else if (reply->handle_ret > 0) {
+ return send_special(quic_table, reply, conn);
+ } else if (conn == NULL) {
+ return KNOT_EINVAL;
+ } else if (conn->conn == NULL) {
+ return KNOT_EOK;
+ }
+
+ if (!(conn->flags & KNOT_QUIC_CONN_HANDSHAKE_DONE)) {
+ max_msgs = 1;
+ }
+
+ unsigned sent_msgs = 0, stream_msgs = 0, ignore_last = ((flags & KNOT_QUIC_SEND_IGNORE_LASTBYTE) ? 1 : 0);
+ int ret = 1;
+ for (int64_t si = 0; si < conn->streams_count && sent_msgs < max_msgs; /* NO INCREMENT */) {
+ int64_t stream_id = 4 * (conn->streams_first + si);
+
+ ngtcp2_ssize sent = 0;
+ size_t uf = conn->streams[si].unsent_offset;
+ knot_quic_obuf_t *uo = conn->streams[si].unsent_obuf;
+ if (uo == NULL) {
+ si++;
+ continue;
+ }
+
+ bool fin = (((node_t *)uo->node.next)->next == NULL) && ignore_last == 0;
+ ret = send_stream(quic_table, reply, conn, stream_id,
+ uo->buf + uf, uo->len - uf - ignore_last,
+ fin, &sent);
+ if (ret < 0) {
+ return ret;
+ }
+
+ sent_msgs++;
+ stream_msgs++;
+ if (sent > 0 && ignore_last > 0) {
+ sent++;
+ }
+ if (sent > 0) {
+ knot_quic_stream_mark_sent(conn, stream_id, sent);
+ }
+
+ if (stream_msgs >= max_msgs / conn->streams_count) {
+ stream_msgs = 0;
+ si++; // if this stream is sending too much, give chance to other streams
+ }
+ }
+
+ while (ret == 1) {
+ ngtcp2_ssize unused = 0;
+ ret = send_stream(quic_table, reply, conn, -1, NULL, 0, false, &unused);
+ }
+
+ return ret;
+}
diff --git a/src/libknot/quic/quic.h b/src/libknot/quic/quic.h
new file mode 100644
index 0000000..29a02e0
--- /dev/null
+++ b/src/libknot/quic/quic.h
@@ -0,0 +1,213 @@
+/* Copyright (C) 2023 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+/*!
+ * \file
+ *
+ * \brief General QUIC functionality.
+ *
+ * \addtogroup quic
+ * @{
+ */
+
+#pragma once
+
+#include <sys/types.h>
+#include <netinet/in.h>
+
+#include "libknot/quic/quic_conn.h"
+
+#define KNOT_QUIC_PIN_LEN 32
+
+#define KNOT_QUIC_HANDLE_RET_CLOSE 2000
+
+// RFC 9250
+#define KNOT_QUIC_ERR_EXCESSIVE_LOAD 0x4
+
+struct gnutls_x509_crt_int;
+struct knot_quic_creds;
+struct knot_quic_session;
+
+typedef enum {
+ KNOT_QUIC_SEND_IGNORE_LASTBYTE = (1 << 0),
+} knot_quic_send_flag_t;
+
+typedef struct knot_quic_reply {
+ const struct sockaddr_storage *ip_rem;
+ const struct sockaddr_storage *ip_loc;
+ struct iovec *in_payload;
+ struct iovec *out_payload;
+ void *in_ctx;
+ void *out_ctx;
+
+ void *sock;
+ int handle_ret;
+ uint8_t ecn;
+
+ int (*alloc_reply)(struct knot_quic_reply *);
+ int (*send_reply)(struct knot_quic_reply *);
+ void (*free_reply)(struct knot_quic_reply *);
+} knot_quic_reply_t;
+
+/*!
+ * \brief Check if session ticket can be taken out of this connection.
+ */
+bool knot_quic_session_available(knot_quic_conn_t *conn);
+
+/*!
+ * \brief Gets data needed for session resumption.
+ *
+ * \param conn QUIC connection.
+ *
+ * \return QUIC session context.
+ */
+struct knot_quic_session *knot_quic_session_save(knot_quic_conn_t *conn);
+
+/*!
+ * \brief Loads data needed for session resumption.
+ *
+ * \param conn QUIC connection.
+ * \param session QUIC session context.
+ *
+ * \return KNOT_E*
+ */
+int knot_quic_session_load(knot_quic_conn_t *conn, struct knot_quic_session *session);
+
+/*!
+ * \brief Init server TLS certificate for DoQ.
+ *
+ * \param cert_file X509 certificate PEM file path/name (NULL if auto-generated).
+ * \param key_file Key PEM file path/name.
+ *
+ * \return Initialized creds.
+ */
+struct knot_quic_creds *knot_quic_init_creds(const char *cert_file,
+ const char *key_file);
+
+/*!
+ * \brief Init peer TLS certificate for DoQ.
+ *
+ * \param local_creds Local credentials if server.
+ * \param peer_pin Optional peer certificate pin to check.
+ * \param peer_pin_len Length of the peer pin. Set 0 if not specified.
+ *
+ * \return Initialized creds.
+ */
+struct knot_quic_creds *knot_quic_init_creds_peer(const struct knot_quic_creds *local_creds,
+ const uint8_t *peer_pin,
+ uint8_t peer_pin_len);
+
+/*!
+ * \brief Gets the certificate from credentials.
+ *
+ * \param creds TLS credentials.
+ * \param cert Output certificate.
+ *
+ * \return KNOT_E*
+ */
+int knot_quic_creds_cert(struct knot_quic_creds *creds, struct gnutls_x509_crt_int **cert);
+
+/*!
+ * \brief Deinit server TLS certificate for DoQ.
+ */
+void knot_quic_free_creds(struct knot_quic_creds *creds);
+
+/*!
+ * \brief Returns timeout value for the connection.
+ */
+uint64_t quic_conn_get_timeout(knot_quic_conn_t *conn);
+
+/*!
+ * \brief Check if connection timed out due to inactivity.
+ *
+ * \param conn QUIC connection.
+ * \param now In/out: current monotonic time. Use zero first and reuse for
+ * next calls for optimization.
+ *
+ * \return True if the connection timed out idle.
+ */
+bool quic_conn_timeout(knot_quic_conn_t *conn, uint64_t *now);
+
+int64_t knot_quic_conn_next_timeout(knot_quic_conn_t *conn);
+
+int knot_quic_hanle_expiry(knot_quic_conn_t *conn);
+
+/*!
+ * \brief Returns measured connection RTT in usecs.
+ */
+uint32_t knot_quic_conn_rtt(knot_quic_conn_t *conn);
+
+/*!
+ * \brief Returns the port from local-address of given conn IN BIG ENDIAN.
+ */
+uint16_t knot_quic_conn_local_port(knot_quic_conn_t *conn);
+
+/*!
+ * \brief Gets local or remote certificate pin.
+ *
+ * \note Zero output pin_size value means no certificate available or error.
+ *
+ * \param conn QUIC connection.
+ * \param pin Output certificate pin.
+ * \param pin_size Input size of the storage / output size of the stored pin.
+ * \param local Local or remote certificate indication.
+ */
+void knot_quic_conn_pin(knot_quic_conn_t *conn, uint8_t *pin, size_t *pin_size, bool local);
+
+/*!
+ * \brief Create new outgoing QUIC connection.
+ *
+ * \param table QUIC connections table to be added to.
+ * \param dest Destination IP address.
+ * \param via Source IP address.
+ * \param server_name Optional server name.
+ * \param out_conn Out: new connection.
+ *
+ * \return KNOT_E*
+ */
+int knot_quic_client(knot_quic_table_t *table, struct sockaddr_in6 *dest,
+ struct sockaddr_in6 *via, const char *server_name,
+ knot_quic_conn_t **out_conn);
+
+/*!
+ * \brief Handle incoming QUIC packet.
+ *
+ * \param table QUIC connectoins table.
+ * \param reply Incoming packet info.
+ * \param idle_timeout Configured idle timeout for connections (in nanoseconds).
+ * \param out_conn Out: QUIC connection that this packet belongs to.
+ *
+ * \return KNOT_E* or -QUIC_SEND_*
+ */
+int knot_quic_handle(knot_quic_table_t *table, knot_quic_reply_t *reply,
+ uint64_t idle_timeout, knot_quic_conn_t **out_conn);
+
+/*!
+ * \brief Send outgoing QUIC packet(s) for a connection.
+ *
+ * \param quic_table QUIC connection table.
+ * \param conn QUIC connection.
+ * \param reply Incoming/outgoing packet info.
+ * \param max_msgs Maxmimum packets to be sent.
+ * \param flags Various options for special use-cases.
+ *
+ * \return KNOT_E*
+ */
+int knot_quic_send(knot_quic_table_t *quic_table, knot_quic_conn_t *conn,
+ knot_quic_reply_t *reply, unsigned max_msgs,
+ knot_quic_send_flag_t flags);
+
+/*! @} */
diff --git a/src/libknot/quic/quic_conn.c b/src/libknot/quic/quic_conn.c
new file mode 100644
index 0000000..6616573
--- /dev/null
+++ b/src/libknot/quic/quic_conn.c
@@ -0,0 +1,577 @@
+/* Copyright (C) 2023 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include <assert.h>
+#include <gnutls/gnutls.h>
+#include <ngtcp2/ngtcp2.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "libknot/quic/quic_conn.h"
+
+#include "contrib/macros.h"
+#include "contrib/openbsd/siphash.h"
+#include "contrib/ucw/heap.h"
+#include "contrib/ucw/lists.h"
+#include "libdnssec/random.h"
+#include "libknot/attribute.h"
+#include "libknot/error.h"
+#include "libknot/quic/quic.h"
+#include "libknot/xdp/tcp_iobuf.h"
+#include "libknot/wire.h"
+
+#define STREAM_INCR 4 // DoQ only uses client-initiated bi-directional streams, so stream IDs increment by four
+#define BUCKETS_PER_CONNS 8 // Each connecion has several dCIDs, and each CID takes one hash table bucket.
+
+static int cmp_expiry_heap_nodes(void *c1, void *c2)
+{
+ if (((knot_quic_conn_t *)c1)->next_expiry < ((knot_quic_conn_t *)c2)->next_expiry) return -1;
+ if (((knot_quic_conn_t *)c1)->next_expiry > ((knot_quic_conn_t *)c2)->next_expiry) return 1;
+ return 0;
+}
+
+_public_
+knot_quic_table_t *knot_quic_table_new(size_t max_conns, size_t max_ibufs, size_t max_obufs,
+ size_t udp_payload, struct knot_quic_creds *creds)
+{
+ size_t table_size = max_conns * BUCKETS_PER_CONNS;
+
+ knot_quic_table_t *res = calloc(1, sizeof(*res) + table_size * sizeof(res->conns[0]));
+ if (res == NULL || creds == NULL) {
+ free(res);
+ return NULL;
+ }
+
+ res->size = table_size;
+ res->max_conns = max_conns;
+ res->ibufs_max = max_ibufs;
+ res->obufs_max = max_obufs;
+ res->udp_payload_limit = udp_payload;
+
+ res->expiry_heap = malloc(sizeof(struct heap));
+ if (res->expiry_heap == NULL || !heap_init(res->expiry_heap, cmp_expiry_heap_nodes, 0)) {
+ free(res->expiry_heap);
+ free(res);
+ return NULL;
+ }
+
+ res->creds = creds;
+
+ res->hash_secret[0] = dnssec_random_uint64_t();
+ res->hash_secret[1] = dnssec_random_uint64_t();
+ res->hash_secret[2] = dnssec_random_uint64_t();
+ res->hash_secret[3] = dnssec_random_uint64_t();
+
+ return res;
+}
+
+_public_
+void knot_quic_table_free(knot_quic_table_t *table)
+{
+ if (table != NULL) {
+ while (!EMPTY_HEAP(table->expiry_heap)) {
+ knot_quic_conn_t *c = *(knot_quic_conn_t **)HHEAD(table->expiry_heap);
+ knot_quic_table_rem(c, table);
+ knot_quic_cleanup(&c, 1);
+ }
+ assert(table->usage == 0);
+ assert(table->pointers == 0);
+ assert(table->ibufs_size == 0);
+ assert(table->obufs_size == 0);
+
+ heap_deinit(table->expiry_heap);
+ free(table->expiry_heap);
+ free(table);
+ }
+}
+
+static void send_excessive_load(knot_quic_conn_t *conn, struct knot_quic_reply *reply,
+ knot_quic_table_t *table)
+{
+ if (reply != NULL) {
+ reply->handle_ret = KNOT_QUIC_ERR_EXCESSIVE_LOAD;
+ (void)knot_quic_send(table, conn, reply, 0, 0);
+ }
+}
+
+_public_
+void knot_quic_table_sweep(knot_quic_table_t *table, struct knot_quic_reply *sweep_reply,
+ struct knot_sweep_stats *stats)
+{
+ uint64_t now = 0;
+ if (table == NULL || stats == NULL) {
+ return;
+ }
+
+ while (!EMPTY_HEAP(table->expiry_heap)) {
+ knot_quic_conn_t *c = *(knot_quic_conn_t **)HHEAD(table->expiry_heap);
+ if (table->usage > table->max_conns) {
+ knot_sweep_stats_incr(stats, KNOT_SWEEP_CTR_LIMIT_CONN);
+ send_excessive_load(c, sweep_reply, table);
+ knot_quic_table_rem(c, table);
+ } else if (table->obufs_size > table->obufs_max) {
+ knot_sweep_stats_incr(stats, KNOT_SWEEP_CTR_LIMIT_OBUF);
+ send_excessive_load(c, sweep_reply, table);
+ knot_quic_table_rem(c, table);
+ } else if (table->ibufs_size > table->ibufs_max) {
+ knot_sweep_stats_incr(stats, KNOT_SWEEP_CTR_LIMIT_IBUF);
+ send_excessive_load(c, sweep_reply, table);
+ knot_quic_table_rem(c, table);
+ } else if (quic_conn_timeout(c, &now)) {
+ int ret = ngtcp2_conn_handle_expiry(c->conn, now);
+ if (ret != NGTCP2_NO_ERROR) { // usually NGTCP2_ERR_IDLE_CLOSE or NGTCP2_ERR_HANDSHAKE_TIMEOUT
+ knot_sweep_stats_incr(stats, KNOT_SWEEP_CTR_TIMEOUT);
+ knot_quic_table_rem(c, table);
+ } else {
+ if (sweep_reply != NULL) {
+ sweep_reply->handle_ret = KNOT_EOK;
+ (void)knot_quic_send(table, c, sweep_reply, 0, 0);
+ }
+ quic_conn_mark_used(c, table);
+ }
+ }
+ knot_quic_cleanup(&c, 1);
+
+ if (*(knot_quic_conn_t **)HHEAD(table->expiry_heap) == c) { // HHEAD already handled, NOOP, avoid infinite loop
+ break;
+ }
+ }
+}
+
+static uint64_t cid2hash(const ngtcp2_cid *cid, knot_quic_table_t *table)
+{
+ SIPHASH_CTX ctx;
+ SipHash24_Init(&ctx, (const SIPHASH_KEY *)(table->hash_secret));
+ SipHash24_Update(&ctx, cid->data, MIN(cid->datalen, 8));
+ uint64_t ret = SipHash24_End(&ctx);
+ return ret;
+}
+
+knot_quic_cid_t **quic_table_insert(knot_quic_conn_t *conn, const ngtcp2_cid *cid,
+ knot_quic_table_t *table)
+{
+ uint64_t hash = cid2hash(cid, table);
+
+ knot_quic_cid_t *cidobj = malloc(sizeof(*cidobj));
+ if (cidobj == NULL) {
+ return NULL;
+ }
+ _Static_assert(sizeof(*cid) <= sizeof(cidobj->cid_placeholder), "insufficient placeholder for CID struct");
+ memcpy(cidobj->cid_placeholder, cid, sizeof(*cid));
+ cidobj->conn = conn;
+
+ knot_quic_cid_t **addto = table->conns + (hash % table->size);
+
+ cidobj->next = *addto;
+ *addto = cidobj;
+ table->pointers++;
+
+ return addto;
+}
+
+knot_quic_conn_t *quic_table_add(ngtcp2_conn *ngconn, const ngtcp2_cid *cid,
+ knot_quic_table_t *table)
+{
+ knot_quic_conn_t *conn = calloc(1, sizeof(*conn));
+ if (conn == NULL) {
+ return NULL;
+ }
+
+ conn->conn = ngconn;
+ conn->quic_table = table;
+ conn->stream_inprocess = -1;
+ conn->qlog_fd = -1;
+
+ conn->next_expiry = UINT64_MAX;
+ if (!heap_insert(table->expiry_heap, (heap_val_t *)conn)) {
+ free(conn);
+ return NULL;
+ }
+
+ knot_quic_cid_t **addto = quic_table_insert(conn, cid, table);
+ if (addto == NULL) {
+ heap_delete(table->expiry_heap, heap_find(table->expiry_heap, (heap_val_t *)conn));
+ free(conn);
+ return NULL;
+ }
+ table->usage++;
+
+ return conn;
+}
+
+knot_quic_cid_t **quic_table_lookup2(const ngtcp2_cid *cid, knot_quic_table_t *table)
+{
+ uint64_t hash = cid2hash(cid, table);
+
+ knot_quic_cid_t **res = table->conns + (hash % table->size);
+ while (*res != NULL && !ngtcp2_cid_eq(cid, (const ngtcp2_cid *)(*res)->cid_placeholder)) {
+ res = &(*res)->next;
+ }
+ return res;
+}
+
+knot_quic_conn_t *quic_table_lookup(const ngtcp2_cid *cid, knot_quic_table_t *table)
+{
+ knot_quic_cid_t **pcid = quic_table_lookup2(cid, table);
+ assert(pcid != NULL);
+ return *pcid == NULL ? NULL : (*pcid)->conn;
+}
+
+static void conn_heap_reschedule(knot_quic_conn_t *conn, knot_quic_table_t *table)
+{
+ heap_replace(table->expiry_heap, heap_find(table->expiry_heap, (heap_val_t *)conn), (heap_val_t *)conn);
+}
+
+void quic_conn_mark_used(knot_quic_conn_t *conn, knot_quic_table_t *table)
+{
+ conn->next_expiry = quic_conn_get_timeout(conn);
+ conn_heap_reschedule(conn, table);
+}
+
+void quic_table_rem2(knot_quic_cid_t **pcid, knot_quic_table_t *table)
+{
+ knot_quic_cid_t *cid = *pcid;
+ *pcid = cid->next;
+ free(cid);
+ table->pointers--;
+}
+
+_public_
+void knot_quic_conn_stream_free(knot_quic_conn_t *conn, int64_t stream_id)
+{
+ knot_quic_stream_t *s = knot_quic_conn_get_stream(conn, stream_id, false);
+ if (s != NULL && s->inbuf.iov_len > 0) {
+ free(s->inbuf.iov_base);
+ conn->ibufs_size -= buffer_alloc_size(s->inbuf.iov_len);
+ conn->quic_table->ibufs_size -= buffer_alloc_size(s->inbuf.iov_len);
+ memset(&s->inbuf, 0, sizeof(s->inbuf));
+ }
+ while (s != NULL && s->inbufs != NULL) {
+ void *tofree = s->inbufs;
+ s->inbufs = s->inbufs->next;
+ free(tofree);
+ }
+ knot_quic_stream_ack_data(conn, stream_id, SIZE_MAX, false);
+}
+
+_public_
+void knot_quic_table_rem(knot_quic_conn_t *conn, knot_quic_table_t *table)
+{
+ if (conn == NULL || conn->conn == NULL || table == NULL) {
+ return;
+ }
+
+ if (conn->streams_count == -1) { // kxdpgun special
+ conn->streams_count = 1;
+ }
+ for (ssize_t i = conn->streams_count - 1; i >= 0; i--) {
+ knot_quic_conn_stream_free(conn, (i + conn->streams_first) * 4);
+ }
+ assert(conn->streams_count <= 0);
+ assert(conn->obufs_size == 0);
+
+ size_t num_scid = ngtcp2_conn_get_scid(conn->conn, NULL);
+ ngtcp2_cid *scids = calloc(num_scid, sizeof(*scids));
+ ngtcp2_conn_get_scid(conn->conn, scids);
+
+ for (size_t i = 0; i < num_scid; i++) {
+ knot_quic_cid_t **pcid = quic_table_lookup2(&scids[i], table);
+ assert(pcid != NULL);
+ if (*pcid == NULL) {
+ continue;
+ }
+ assert((*pcid)->conn == conn);
+ quic_table_rem2(pcid, table);
+ }
+
+ int pos = heap_find(table->expiry_heap, (heap_val_t *)conn);
+ heap_delete(table->expiry_heap, pos);
+
+ free(scids);
+
+ gnutls_deinit(conn->tls_session);
+ ngtcp2_conn_del(conn->conn);
+ conn->conn = NULL;
+
+ table->usage--;
+}
+
+_public_
+knot_quic_stream_t *knot_quic_conn_get_stream(knot_quic_conn_t *conn,
+ int64_t stream_id, bool create)
+{
+ if (stream_id % 4 != 0 || conn == NULL) {
+ return NULL;
+ }
+ stream_id /= 4;
+
+ if (conn->streams_first > stream_id) {
+ return NULL;
+ }
+ if (conn->streams_count > stream_id - conn->streams_first) {
+ return &conn->streams[stream_id - conn->streams_first];
+ }
+
+ if (create) {
+ size_t new_streams_count;
+ knot_quic_stream_t *new_streams;
+
+ if (conn->streams_count == 0) {
+ new_streams = malloc(sizeof(new_streams[0]));
+ if (new_streams == NULL) {
+ return NULL;
+ }
+ new_streams_count = 1;
+ conn->streams_first = stream_id;
+ } else {
+ new_streams_count = stream_id + 1 - conn->streams_first;
+ if (new_streams_count > MAX_STREAMS_PER_CONN) {
+ return NULL;
+ }
+ new_streams = realloc(conn->streams, new_streams_count * sizeof(*new_streams));
+ if (new_streams == NULL) {
+ return NULL;
+ }
+ }
+
+ for (knot_quic_stream_t *si = new_streams;
+ si < new_streams + conn->streams_count; si++) {
+ if (si->obufs_size == 0) {
+ init_list((list_t *)&si->outbufs);
+ } else {
+ fix_list((list_t *)&si->outbufs);
+ }
+ }
+
+ for (knot_quic_stream_t *si = new_streams + conn->streams_count;
+ si < new_streams + new_streams_count; si++) {
+ memset(si, 0, sizeof(*si));
+ init_list((list_t *)&si->outbufs);
+ }
+ conn->streams = new_streams;
+ conn->streams_count = new_streams_count;
+
+ return &conn->streams[stream_id - conn->streams_first];
+ }
+ return NULL;
+}
+
+_public_
+knot_quic_stream_t *knot_quic_conn_new_stream(knot_quic_conn_t *conn)
+{
+ int64_t new_id = (conn->streams_first + conn->streams_count) * 4;
+ return knot_quic_conn_get_stream(conn, new_id, true);
+}
+
+static void stream_inprocess(knot_quic_conn_t *conn, knot_quic_stream_t *stream)
+{
+ int16_t idx = stream - conn->streams;
+ assert(idx >= 0);
+ assert(idx < conn->streams_count);
+ if (conn->stream_inprocess < 0 || conn->stream_inprocess > idx) {
+ conn->stream_inprocess = idx;
+ }
+}
+
+static void stream_outprocess(knot_quic_conn_t *conn, knot_quic_stream_t *stream)
+{
+ if (stream != &conn->streams[conn->stream_inprocess]) {
+ return;
+ }
+
+ for (int16_t idx = conn->stream_inprocess + 1; idx < conn->streams_count; idx++) {
+ stream = &conn->streams[idx];
+ if (stream->inbufs != NULL) {
+ conn->stream_inprocess = stream - conn->streams;
+ return;
+ }
+ }
+ conn->stream_inprocess = -1;
+}
+
+int knot_quic_stream_recv_data(knot_quic_conn_t *conn, int64_t stream_id,
+ const uint8_t *data, size_t len, bool fin)
+{
+ if (len == 0 || conn == NULL || data == NULL) {
+ return KNOT_EINVAL;
+ }
+
+ knot_quic_stream_t *stream = knot_quic_conn_get_stream(conn, stream_id, true);
+ if (stream == NULL) {
+ return KNOT_ENOENT;
+ }
+
+ struct iovec in = { (void *)data, len };
+ ssize_t prev_ibufs_size = conn->ibufs_size;
+ int ret = knot_tcp_inbufs_upd(&stream->inbuf, in, true,
+ &stream->inbufs, &conn->ibufs_size);
+ conn->quic_table->ibufs_size += (ssize_t)conn->ibufs_size - prev_ibufs_size;
+ if (ret != KNOT_EOK) {
+ return ret;
+ }
+
+ if (fin && stream->inbufs == NULL) {
+ return KNOT_ESEMCHECK;
+ }
+
+ if (stream->inbufs != NULL) {
+ stream_inprocess(conn, stream);
+ }
+ return KNOT_EOK;
+}
+
+_public_
+knot_quic_stream_t *knot_quic_stream_get_process(knot_quic_conn_t *conn,
+ int64_t *stream_id)
+{
+ if (conn == NULL || conn->stream_inprocess < 0) {
+ return NULL;
+ }
+
+ knot_quic_stream_t *stream = &conn->streams[conn->stream_inprocess];
+ *stream_id = (conn->streams_first + conn->stream_inprocess) * 4;
+ stream_outprocess(conn, stream);
+ return stream;
+}
+
+_public_
+uint8_t *knot_quic_stream_add_data(knot_quic_conn_t *conn, int64_t stream_id,
+ uint8_t *data, size_t len)
+{
+ knot_quic_stream_t *s = knot_quic_conn_get_stream(conn, stream_id, true);
+ if (s == NULL) {
+ return NULL;
+ }
+
+ size_t prefix = sizeof(uint16_t);
+
+ knot_quic_obuf_t *obuf = malloc(sizeof(*obuf) + prefix + len);
+ if (obuf == NULL) {
+ return NULL;
+ }
+
+ obuf->len = len + prefix;
+ knot_wire_write_u16(obuf->buf, len);
+ if (data != NULL) {
+ memcpy(obuf->buf + prefix, data, len);
+ }
+
+ list_t *list = (list_t *)&s->outbufs;
+ if (EMPTY_LIST(*list)) {
+ s->unsent_obuf = obuf;
+ }
+ add_tail((list_t *)&s->outbufs, (node_t *)obuf);
+ s->obufs_size += obuf->len;
+ conn->obufs_size += obuf->len;
+ conn->quic_table->obufs_size += obuf->len;
+
+ return obuf->buf + prefix;
+}
+
+void knot_quic_stream_ack_data(knot_quic_conn_t *conn, int64_t stream_id,
+ size_t end_acked, bool keep_stream)
+{
+ knot_quic_stream_t *s = knot_quic_conn_get_stream(conn, stream_id, false);
+ if (s == NULL) {
+ return;
+ }
+
+ list_t *obs = (list_t *)&s->outbufs;
+
+ knot_quic_obuf_t *first;
+ while (!EMPTY_LIST(*obs) && end_acked >= (first = HEAD(*obs))->len + s->first_offset) {
+ rem_node((node_t *)first);
+ assert(HEAD(*obs) != first); // help CLANG analyzer understand what rem_node did and that further usage of HEAD(*obs) is safe
+ s->obufs_size -= first->len;
+ conn->obufs_size -= first->len;
+ conn->quic_table->obufs_size -= first->len;
+ s->first_offset += first->len;
+ free(first);
+ if (s->unsent_obuf == first) {
+ s->unsent_obuf = EMPTY_LIST(*obs) ? NULL : HEAD(*obs);
+ s->unsent_offset = 0;
+ }
+ }
+
+ if (EMPTY_LIST(*obs) && !keep_stream) {
+ stream_outprocess(conn, s);
+ memset(s, 0, sizeof(*s));
+ init_list((list_t *)&s->outbufs);
+ while (s = &conn->streams[0], s->inbuf.iov_len == 0 && s->inbufs == NULL && s->obufs_size == 0) {
+ assert(conn->streams_count > 0);
+ conn->streams_count--;
+
+ if (conn->streams_count == 0) {
+ free(conn->streams);
+ conn->streams = 0;
+ conn->streams_first = 0;
+ break;
+ } else {
+ conn->streams_first++;
+ conn->stream_inprocess--;
+ memmove(s, s + 1, sizeof(*s) * conn->streams_count);
+ // possible realloc to shrink allocated space, but probably useless
+ for (knot_quic_stream_t *si = s; si < s + conn->streams_count; si++) {
+ if (si->obufs_size == 0) {
+ init_list((list_t *)&si->outbufs);
+ } else {
+ fix_list((list_t *)&si->outbufs);
+ }
+ }
+ }
+ }
+ }
+}
+
+void knot_quic_stream_mark_sent(knot_quic_conn_t *conn, int64_t stream_id,
+ size_t amount_sent)
+{
+ knot_quic_stream_t *s = knot_quic_conn_get_stream(conn, stream_id, false);
+ if (s == NULL) {
+ return;
+ }
+
+ s->unsent_offset += amount_sent;
+ assert(s->unsent_offset <= s->unsent_obuf->len);
+ if (s->unsent_offset == s->unsent_obuf->len) {
+ s->unsent_offset = 0;
+ s->unsent_obuf = (knot_quic_obuf_t *)s->unsent_obuf->node.next;
+ if (s->unsent_obuf->node.next == NULL) { // already behind the tail of list
+ s->unsent_obuf = NULL;
+ }
+ }
+}
+
+_public_
+void knot_quic_cleanup(knot_quic_conn_t *conns[], size_t n_conns)
+{
+ for (size_t i = 0; i < n_conns; i++) {
+ if (conns[i] != NULL && conns[i]->conn == NULL) {
+ free(conns[i]);
+ for (size_t j = i + 1; j < n_conns; j++) {
+ if (conns[j] == conns[i]) {
+ conns[j] = NULL;
+ }
+ }
+ }
+ }
+}
+
+bool quic_require_retry(knot_quic_table_t *table)
+{
+ (void)table;
+ return false;
+}
diff --git a/src/libknot/quic/quic_conn.h b/src/libknot/quic/quic_conn.h
new file mode 100644
index 0000000..64ead51
--- /dev/null
+++ b/src/libknot/quic/quic_conn.h
@@ -0,0 +1,326 @@
+/* Copyright (C) 2023 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+/*!
+ * \file
+ *
+ * \brief QUIC connection management.
+ *
+ * \addtogroup quic
+ * @{
+ */
+
+#pragma once
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <sys/uio.h>
+
+#define MAX_STREAMS_PER_CONN 10 // this limits the number of un-finished streams per conn (i.e. if response has been recvd with FIN, it doesn't count)
+
+struct ngtcp2_cid; // declaration taken from wherever in ngtcp2
+struct knot_quic_creds;
+struct knot_quic_reply;
+struct knot_sweep_stats;
+
+// those are equivalent to contrib/ucw/lists.h , just must not be included.
+typedef struct knot_quic_ucw_node {
+ struct knot_quic_ucw_node *next, *prev;
+} knot_quic_ucw_node_t;
+typedef struct knot_quic_ucw_list {
+ knot_quic_ucw_node_t head, tail;
+} knot_quic_ucw_list_t;
+
+typedef struct {
+ void *get_conn;
+ void *user_data;
+} nc_conn_ref_placeholder_t;
+
+typedef struct {
+ knot_quic_ucw_node_t node;
+ size_t len;
+ uint8_t buf[];
+} knot_quic_obuf_t;
+
+typedef struct {
+ struct iovec inbuf;
+ struct knot_tcp_inbufs_upd_res *inbufs;
+ size_t firstib_consumed;
+ knot_quic_ucw_list_t outbufs;
+ size_t obufs_size;
+
+ knot_quic_obuf_t *unsent_obuf;
+ size_t first_offset;
+ size_t unsent_offset;
+} knot_quic_stream_t;
+
+typedef enum {
+ KNOT_QUIC_CONN_HANDSHAKE_DONE = (1 << 0),
+ KNOT_QUIC_CONN_SESSION_TAKEN = (1 << 1),
+} knot_quic_conn_flag_t;
+
+typedef struct knot_quic_conn {
+ int heap_node_placeholder; // MUST be first field of the struct
+ uint64_t next_expiry;
+
+ nc_conn_ref_placeholder_t conn_ref; // placeholder for internal struct ngtcp2_crypto_conn_ref
+
+ struct ngtcp2_conn *conn;
+
+ struct gnutls_session_int *tls_session;
+
+ knot_quic_stream_t *streams;
+ int16_t streams_count; // number of allocated streams structures
+ int16_t stream_inprocess; // index of first stream that has complete incomming data to be processed (aka inbuf_fin)
+ knot_quic_conn_flag_t flags;
+ int qlog_fd;
+ int64_t streams_first; // stream_id/4 of first allocated stream
+ size_t ibufs_size;
+ size_t obufs_size;
+
+ struct knot_quic_table *quic_table;
+
+ struct knot_quic_conn *next;
+} knot_quic_conn_t;
+
+typedef struct knot_quic_cid {
+ uint8_t cid_placeholder[32];
+ knot_quic_conn_t *conn;
+ struct knot_quic_cid *next;
+} knot_quic_cid_t;
+
+typedef struct knot_quic_table {
+ uint32_t flags; // unused yet
+ size_t size;
+ size_t usage;
+ size_t pointers;
+ size_t max_conns;
+ size_t ibufs_max;
+ size_t obufs_max;
+ size_t ibufs_size;
+ size_t obufs_size;
+ size_t udp_payload_limit; // for simplicity not distinguishing IPv4/6
+ void (*log_cb)(const char *);
+ const char *qlog_dir;
+ uint64_t hash_secret[4];
+ struct knot_quic_creds *creds;
+ struct heap *expiry_heap;
+ knot_quic_cid_t *conns[];
+} knot_quic_table_t;
+
+/*!
+ * \brief Allocate QUIC connections hash table.
+ *
+ * \param max_conns Maximum nuber of connections.
+ * \param max_ibufs Maximum size of buffers for fragmented incomming DNS msgs.
+ * \param max_obufs Maximum size of buffers for un-ACKed outgoing data.
+ * \param udp_payload Maximum UDP payload size (both IPv4 and 6).
+ * \param creds QUIC crypto context..
+ *
+ * \return Allocated table, or NULL.
+ */
+knot_quic_table_t *knot_quic_table_new(size_t max_conns, size_t max_ibufs, size_t max_obufs,
+ size_t udp_payload, struct knot_quic_creds *creds);
+
+/*!
+ * \brief Free QUIC table including its contents.
+ *
+ * \param table Table to be freed.
+ */
+void knot_quic_table_free(knot_quic_table_t *table);
+
+/*!
+ * \brief Close timed out connections and some oldest ones if table full.
+ *
+ * \param table QUIC table to be cleaned up.
+ * \param sweep_reply Optional: reply structure to send sweep-initiated packets to the client.
+ * \param stats Out: sweep statistics.
+ */
+void knot_quic_table_sweep(knot_quic_table_t *table, struct knot_quic_reply *sweep_reply,
+ struct knot_sweep_stats *stats);
+
+/*!
+ * \brief Add new connection/CID link to table.
+ *
+ * \param conn QUIC connection linked.
+ * \param cid New CID to be added.
+ * \param table QUIC table to be modified.
+ *
+ * \return Pointer on the CID reference in table, or NULL.
+ */
+knot_quic_cid_t **quic_table_insert(knot_quic_conn_t *conn,
+ const struct ngtcp2_cid *cid,
+ knot_quic_table_t *table);
+
+/*!
+ * \brief Add new connection to the table, allocating conn struct.
+ *
+ * \param ngconn Ngtcp2 conn struct.
+ * \param cid CID to be linked (usually oscid for server).
+ * \param table QUIC table to be modified.
+ *
+ * \return Allocated (and linked) Knot conn struct, or NULL.
+ */
+knot_quic_conn_t *quic_table_add(struct ngtcp2_conn *ngconn,
+ const struct ngtcp2_cid *cid,
+ knot_quic_table_t *table);
+
+/*!
+ * \brief Lookup connection/CID link in table.
+ *
+ * \param cid CID to be searched for.
+ * \param table QUIC table.
+ *
+ * \return Pointer on the CID reference in table, or NULL.
+ */
+knot_quic_cid_t **quic_table_lookup2(const struct ngtcp2_cid *cid,
+ knot_quic_table_t *table);
+
+/*!
+ * \brief Lookup QUIC connection in table.
+ *
+ * \param cid CID to be searched for.
+ * \param table QUIC table.
+ *
+ * \return Connection that the CID belongs to, or NULL.
+ */
+knot_quic_conn_t *quic_table_lookup(const struct ngtcp2_cid *cid,
+ knot_quic_table_t *table);
+
+/*!
+ * \brief Re-schedule connection expiry timer.
+ */
+void quic_conn_mark_used(knot_quic_conn_t *conn, knot_quic_table_t *table);
+
+/*!
+ * \brief Remove connection/CID link from table.
+ *
+ * \param pcid CID to be removed.
+ * \param table QUIC table.
+ */
+void quic_table_rem2(knot_quic_cid_t **pcid, knot_quic_table_t *table);
+
+/*!
+ * \brief Remove specified stream from QUIC connection, freeing all buffers.
+ *
+ * \param conn QUIC connection to remove from.
+ * \param stream_id Stream QUIC ID.
+ */
+void knot_quic_conn_stream_free(knot_quic_conn_t *conn, int64_t stream_id);
+
+/*!
+ * \brief Remove and deinitialize connection completely.
+ *
+ * \param conn Connection to be removed.
+ * \param table Table to remove from.
+ */
+void knot_quic_table_rem(knot_quic_conn_t *conn, knot_quic_table_t *table);
+
+/*!
+ * \brief Fetch or initialize a QUIC stream.
+ *
+ * \param conn QUIC connection.
+ * \param stream_id Stream QUIC ID.
+ * \param create Trigger stream creation if not exists.
+ *
+ * \return Stream or NULL.
+ */
+knot_quic_stream_t *knot_quic_conn_get_stream(knot_quic_conn_t *conn,
+ int64_t stream_id, bool create);
+
+/*!
+ * \brief Create a new, subsequent stream.
+ */
+knot_quic_stream_t *knot_quic_conn_new_stream(knot_quic_conn_t *conn);
+
+/*!
+ * \brief Process incomming stream data to stream structure.
+ *
+ * \param conn QUIC connection that has received data.
+ * \param stream_id Stream QUIC ID of the incomming data.
+ * \param data Incomming payload data.
+ * \param len Incomming payload data length.
+ * \param fin FIN flag set for incomming data.
+ *
+ * \return KNOT_E*
+ */
+int knot_quic_stream_recv_data(knot_quic_conn_t *conn, int64_t stream_id,
+ const uint8_t *data, size_t len, bool fin);
+
+/*!
+ * \brief Get next stream which has pending incomming data to be processed.
+ *
+ * \param conn QUIC connection.
+ * \param stream_id Out: strem QUIC ID of the returned stream.
+ *
+ * \return Stream with incomming data.
+ */
+knot_quic_stream_t *knot_quic_stream_get_process(knot_quic_conn_t *conn,
+ int64_t *stream_id);
+
+/*!
+ * \brief Add outgiong data to the stream for sending.
+ *
+ * \param conn QUIC connection that shall send data.
+ * \param stream_id Stream ID for outgoing data.
+ * \param data Data payload.
+ * \param len Data payload length.
+ *
+ * \return NULL if error, or pinter at the data in outgiong buffer.
+ */
+uint8_t *knot_quic_stream_add_data(knot_quic_conn_t *conn, int64_t stream_id,
+ uint8_t *data, size_t len);
+
+/*!
+ * \brief Mark outgiong data as acknowledged after ACK received.
+ *
+ * \param conn QUIC connection that received ACK.
+ * \param stream_id Stream ID of ACKed data.
+ * \param end_acked Offset of ACKed data + ACKed length.
+ * \param keep_stream Don't free the stream even when ACKed all outgoing data.
+ */
+void knot_quic_stream_ack_data(knot_quic_conn_t *conn, int64_t stream_id,
+ size_t end_acked, bool keep_stream);
+
+/*!
+ * \brief Mark outgoing data as sent.
+ *
+ * \param conn QUIC connection that sent data.
+ * \param stream_id Stream ID of sent data.
+ * \param amount_sent Length of sent data.
+ */
+void knot_quic_stream_mark_sent(knot_quic_conn_t *conn, int64_t stream_id,
+ size_t amount_sent);
+
+/*!
+ * \brief Free rest of resources of closed conns.
+ *
+ * \param conns Array with recently used conns (possibly NULLs).
+ * \param n_conns Size of the array.
+ */
+void knot_quic_cleanup(knot_quic_conn_t *conns[], size_t n_conns);
+
+/*!
+ * \brief Toggle sending Retry packet as a reaction to Initial packet of new connection.
+ *
+ * \param table Connection table.
+ *
+ * \return True if instead of continuing handshake, Retry packet shall be sent
+ * to verify counterpart's address.
+ */
+bool quic_require_retry(knot_quic_table_t *table);
+
+/*! @} */