diff options
Diffstat (limited to 'src/libknot/quic')
-rw-r--r-- | src/libknot/quic/quic.c | 1294 | ||||
-rw-r--r-- | src/libknot/quic/quic.h | 213 | ||||
-rw-r--r-- | src/libknot/quic/quic_conn.c | 577 | ||||
-rw-r--r-- | src/libknot/quic/quic_conn.h | 326 |
4 files changed, 2410 insertions, 0 deletions
diff --git a/src/libknot/quic/quic.c b/src/libknot/quic/quic.c new file mode 100644 index 0000000..5e447e7 --- /dev/null +++ b/src/libknot/quic/quic.c @@ -0,0 +1,1294 @@ +/* Copyright (C) 2023 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +#include <assert.h> +#include <fcntl.h> +#include <gnutls/gnutls.h> +#include <gnutls/crypto.h> +#include <gnutls/x509.h> +#include <ngtcp2/ngtcp2.h> +#include <ngtcp2/ngtcp2_crypto.h> +#include <ngtcp2/ngtcp2_crypto_gnutls.h> +#include <stdbool.h> +#include <stdio.h> +#include <string.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <time.h> + +#include "libknot/quic/quic.h" + +#include "contrib/macros.h" +#include "contrib/sockaddr.h" +#include "contrib/string.h" +#include "contrib/ucw/lists.h" +#include "libknot/endian.h" +#include "libdnssec/error.h" +#include "libdnssec/random.h" +#include "libknot/attribute.h" +#include "libknot/endian.h" +#include "libknot/error.h" +#include "libknot/wire.h" + +#define SERVER_DEFAULT_SCIDLEN 18 + +#define QUIC_DEFAULT_VERSION "-VERS-ALL:+VERS-TLS1.3" +#define QUIC_DEFAULT_GROUPS "-GROUP-ALL:+GROUP-X25519:+GROUP-SECP256R1:+GROUP-SECP384R1:+GROUP-SECP521R1" +#define QUIC_PRIORITIES "%DISABLE_TLS13_COMPAT_MODE:NORMAL:"QUIC_DEFAULT_VERSION":"QUIC_DEFAULT_GROUPS + +#define QUIC_SEND_VERSION_NEGOTIATION NGTCP2_ERR_VERSION_NEGOTIATION +#define QUIC_SEND_RETRY NGTCP2_ERR_RETRY +#define QUIC_SEND_STATELESS_RESET (-NGTCP2_STATELESS_RESET_TOKENLEN) +#define QUIC_SEND_CONN_CLOSE (-KNOT_QUIC_HANDLE_RET_CLOSE) +#define QUIC_SEND_EXCESSIVE_LOAD (-KNOT_QUIC_ERR_EXCESSIVE_LOAD) + +#define TLS_CALLBACK_ERR (-1) + +const gnutls_datum_t doq_alpn = { + (unsigned char *)"doq", 3 +}; + +typedef struct knot_quic_creds { + gnutls_certificate_credentials_t tls_cert; + gnutls_anti_replay_t tls_anti_replay; + gnutls_datum_t tls_ticket_key; + bool peer; + uint8_t peer_pin_len; + uint8_t peer_pin[]; +} knot_quic_creds_t; + +typedef struct knot_quic_session { + node_t n; + gnutls_datum_t tls_session; + size_t quic_params_len; + uint8_t quic_params[sizeof(ngtcp2_transport_params)]; +} knot_quic_session_t; + +static unsigned addr_len(const struct sockaddr_in6 *ss) +{ + return (ss->sin6_family == AF_INET6 ? + sizeof(struct sockaddr_in6) : sizeof(struct sockaddr_in)); +} + +_public_ +bool knot_quic_session_available(knot_quic_conn_t *conn) +{ + return conn != NULL && !(conn->flags & KNOT_QUIC_CONN_SESSION_TAKEN) && + (gnutls_session_get_flags(conn->tls_session) & GNUTLS_SFLAGS_SESSION_TICKET); +} + +_public_ +struct knot_quic_session *knot_quic_session_save(knot_quic_conn_t *conn) +{ + if (!knot_quic_session_available(conn)) { + return NULL; + } + + knot_quic_session_t *session = malloc(sizeof(*session)); + if (session == NULL) { + return NULL; + } + + int ret = gnutls_session_get_data2(conn->tls_session, &session->tls_session); + if (ret != GNUTLS_E_SUCCESS) { + free(session); + return NULL; + } + conn->flags |= KNOT_QUIC_CONN_SESSION_TAKEN; + + ngtcp2_ssize ret2 = + ngtcp2_conn_encode_0rtt_transport_params(conn->conn, session->quic_params, + sizeof(session->quic_params)); + if (ret2 < 0) { + free(session); + return NULL; + } + session->quic_params_len = ret2; + + return session; +} + +_public_ +int knot_quic_session_load(knot_quic_conn_t *conn, struct knot_quic_session *session) +{ + if (session == NULL) { + return KNOT_EINVAL; + } + + int ret = KNOT_EOK; + if (conn == NULL) { // Just cleanup the session. + goto session_free; + } + + ret = gnutls_session_set_data(conn->tls_session, session->tls_session.data, + session->tls_session.size); + if (ret != GNUTLS_E_SUCCESS) { + ret = KNOT_ERROR; + goto session_free; + } + + ret = ngtcp2_conn_decode_and_set_0rtt_transport_params(conn->conn, session->quic_params, + session->quic_params_len); + if (ret != 0) { + ret = KNOT_ERROR; + } + +session_free: + gnutls_free(session->tls_session.data); + free(session); + return ret; +} + +static int tls_anti_replay_db_add_func(void *dbf, time_t exp_time, + const gnutls_datum_t *key, + const gnutls_datum_t *data) +{ + return 0; +} + +static void tls_session_ticket_key_free(gnutls_datum_t *ticket) +{ + gnutls_memset(ticket->data, 0, ticket->size); + gnutls_free(ticket->data); +} + +static int self_key(gnutls_x509_privkey_t *privkey, const char *key_file) +{ + gnutls_datum_t data = { 0 }; + + int ret = gnutls_x509_privkey_init(privkey); + if (ret != GNUTLS_E_SUCCESS) { + return ret; + } + + int fd = open(key_file, O_RDONLY); + if (fd != -1) { + struct stat stat; + if (fstat(fd, &stat) != 0 || + (data.data = gnutls_malloc(stat.st_size)) == NULL || + read(fd, data.data, stat.st_size) != stat.st_size) { + ret = GNUTLS_E_KEYFILE_ERROR; + goto finish; + } + + data.size = stat.st_size; + ret = gnutls_x509_privkey_import_pkcs8(*privkey, &data, GNUTLS_X509_FMT_PEM, + NULL, GNUTLS_PKCS_PLAIN); + if (ret != GNUTLS_E_SUCCESS) { + goto finish; + } + } else { + ret = gnutls_x509_privkey_generate(*privkey, GNUTLS_PK_EDDSA_ED25519, + GNUTLS_CURVE_TO_BITS(GNUTLS_ECC_CURVE_ED25519), 0); + if (ret != GNUTLS_E_SUCCESS) { + goto finish; + } + + ret = gnutls_x509_privkey_export2_pkcs8(*privkey, GNUTLS_X509_FMT_PEM, NULL, + GNUTLS_PKCS_PLAIN, &data); + if (ret != GNUTLS_E_SUCCESS || + (fd = open(key_file, O_WRONLY | O_CREAT, 0600)) == -1 || + write(fd, data.data, data.size) != data.size) { + ret = GNUTLS_E_KEYFILE_ERROR; + goto finish; + } + } + +finish: + close(fd); + gnutls_free(data.data); + if (ret != GNUTLS_E_SUCCESS) { + gnutls_x509_privkey_deinit(*privkey); + *privkey = NULL; + } + return ret; +} + +static int self_signed_cert(gnutls_certificate_credentials_t tls_cert, + const char *key_file) +{ + gnutls_x509_privkey_t privkey = NULL; + gnutls_x509_crt_t cert = NULL; + + char *hostname = sockaddr_hostname(); + if (hostname == NULL) { + return GNUTLS_E_MEMORY_ERROR; + } + + int ret; + uint8_t serial[16]; + gnutls_rnd(GNUTLS_RND_NONCE, serial, sizeof(serial)); + // Clear the left-most bit to be a positive number (two's complement form). + serial[0] &= 0x7F; + +#define CHK(cmd) if ((ret = (cmd)) != GNUTLS_E_SUCCESS) { goto finish; } +#define NOW_DAYS(days) (time(NULL) + 24 * 3600 * (days)) + + CHK(self_key(&privkey, key_file)); + + CHK(gnutls_x509_crt_init(&cert)); + CHK(gnutls_x509_crt_set_version(cert, 3)); + CHK(gnutls_x509_crt_set_serial(cert, serial, sizeof(serial))); + CHK(gnutls_x509_crt_set_activation_time(cert, NOW_DAYS(-1))); + CHK(gnutls_x509_crt_set_expiration_time(cert, NOW_DAYS(10 * 365))); + CHK(gnutls_x509_crt_set_dn_by_oid(cert, GNUTLS_OID_X520_COMMON_NAME, 0, + hostname, strlen(hostname))); + CHK(gnutls_x509_crt_set_key(cert, privkey)); + CHK(gnutls_x509_crt_sign2(cert, cert, privkey, GNUTLS_DIG_SHA512, 0)); + + ret = gnutls_certificate_set_x509_key(tls_cert, &cert, 1, privkey); + +finish: + free(hostname); + gnutls_x509_crt_deinit(cert); + gnutls_x509_privkey_deinit(privkey); + + return ret; +} + +_public_ +struct knot_quic_creds *knot_quic_init_creds(const char *cert_file, + const char *key_file) +{ + knot_quic_creds_t *creds = calloc(1, sizeof(*creds)); + if (creds == NULL) { + return NULL; + } + + int ret = gnutls_certificate_allocate_credentials(&creds->tls_cert); + if (ret != GNUTLS_E_SUCCESS) { + goto fail; + } + + ret = gnutls_anti_replay_init(&creds->tls_anti_replay); + if (ret != GNUTLS_E_SUCCESS) { + goto fail; + } + gnutls_anti_replay_set_add_function(creds->tls_anti_replay, tls_anti_replay_db_add_func); + gnutls_anti_replay_set_ptr(creds->tls_anti_replay, NULL); + + if (cert_file != NULL) { + ret = gnutls_certificate_set_x509_key_file(creds->tls_cert, + cert_file, key_file, + GNUTLS_X509_FMT_PEM); + } else { + ret = self_signed_cert(creds->tls_cert, key_file); + } + if (ret != GNUTLS_E_SUCCESS) { + goto fail; + } + + ret = gnutls_session_ticket_key_generate(&creds->tls_ticket_key); + if (ret != GNUTLS_E_SUCCESS) { + goto fail; + } + + return creds; +fail: + knot_quic_free_creds(creds); + return NULL; +} + +_public_ +struct knot_quic_creds *knot_quic_init_creds_peer(const struct knot_quic_creds *local_creds, + const uint8_t *peer_pin, + uint8_t peer_pin_len) +{ + knot_quic_creds_t *creds = calloc(1, sizeof(*creds) + peer_pin_len); + if (creds == NULL) { + return NULL; + } + + if (local_creds != NULL) { + creds->peer = true; + creds->tls_cert = local_creds->tls_cert; + } else { + int ret = gnutls_certificate_allocate_credentials(&creds->tls_cert); + if (ret != GNUTLS_E_SUCCESS) { + free(creds); + return NULL; + } + } + + if (peer_pin_len > 0 && peer_pin != NULL) { + memcpy(creds->peer_pin, peer_pin, peer_pin_len); + creds->peer_pin_len = peer_pin_len; + } + + return creds; +} + +_public_ +int knot_quic_creds_cert(struct knot_quic_creds *creds, struct gnutls_x509_crt_int **cert) +{ + if (creds == NULL || cert == NULL) { + return KNOT_EINVAL; + } + + gnutls_x509_crt_t *certs; + unsigned cert_count; + int ret = gnutls_certificate_get_x509_crt(creds->tls_cert, 0, &certs, &cert_count); + if (ret == GNUTLS_E_SUCCESS) { + if (cert_count == 0) { + gnutls_x509_crt_deinit(*certs); + return KNOT_ENOENT; + } + *cert = *certs; + free(certs); + } + return ret; +} + +_public_ +void knot_quic_free_creds(struct knot_quic_creds *creds) +{ + if (creds == NULL) { + return; + } + + if (!creds->peer && creds->tls_cert != NULL) { + gnutls_certificate_free_credentials(creds->tls_cert); + } + gnutls_anti_replay_deinit(creds->tls_anti_replay); + if (creds->tls_ticket_key.data != NULL) { + tls_session_ticket_key_free(&creds->tls_ticket_key); + } + free(creds); +} + +static ngtcp2_conn *get_conn(ngtcp2_crypto_conn_ref *conn_ref) +{ + return ((knot_quic_conn_t *)conn_ref->user_data)->conn; +} + +static int tls_init_conn_session(knot_quic_conn_t *conn, bool server) +{ + if (gnutls_init(&conn->tls_session, (server ? GNUTLS_SERVER : GNUTLS_CLIENT) | + GNUTLS_ENABLE_EARLY_DATA | GNUTLS_NO_AUTO_SEND_TICKET | + GNUTLS_NO_END_OF_EARLY_DATA) != GNUTLS_E_SUCCESS) { + return TLS_CALLBACK_ERR; + } + + gnutls_certificate_send_x509_rdn_sequence(conn->tls_session, 1); + gnutls_certificate_server_set_request(conn->tls_session, GNUTLS_CERT_REQUEST); + + if (gnutls_priority_set_direct(conn->tls_session, QUIC_PRIORITIES, + NULL) != GNUTLS_E_SUCCESS) { + return TLS_CALLBACK_ERR; + } + + if (server && gnutls_session_ticket_enable_server(conn->tls_session, + &conn->quic_table->creds->tls_ticket_key) != GNUTLS_E_SUCCESS) { + return TLS_CALLBACK_ERR; + } + + int ret = ngtcp2_crypto_gnutls_configure_server_session(conn->tls_session); + if (ret != 0) { + return TLS_CALLBACK_ERR; + } + + gnutls_record_set_max_early_data_size(conn->tls_session, 0xffffffffu); + + conn->conn_ref = (nc_conn_ref_placeholder_t) { + .get_conn = get_conn, + .user_data = conn + }; + + _Static_assert(sizeof(nc_conn_ref_placeholder_t) == sizeof(ngtcp2_crypto_conn_ref), "invalid placeholder for conn_ref"); + gnutls_session_set_ptr(conn->tls_session, &conn->conn_ref); + + if (server) { + gnutls_anti_replay_enable(conn->tls_session, conn->quic_table->creds->tls_anti_replay); + + } + if (gnutls_credentials_set(conn->tls_session, GNUTLS_CRD_CERTIFICATE, + conn->quic_table->creds->tls_cert) != GNUTLS_E_SUCCESS) { + return TLS_CALLBACK_ERR; + } + + gnutls_alpn_set_protocols(conn->tls_session, &doq_alpn, 1, GNUTLS_ALPN_MANDATORY); + + ngtcp2_conn_set_tls_native_handle(conn->conn, conn->tls_session); + + return KNOT_EOK; +} + +static uint64_t get_timestamp(void) +{ + struct timespec ts; + if (clock_gettime(CLOCK_MONOTONIC, &ts) != 0) { + assert(0); + } + + return (uint64_t)ts.tv_sec * NGTCP2_SECONDS + (uint64_t)ts.tv_nsec; +} + +uint64_t quic_conn_get_timeout(knot_quic_conn_t *conn) +{ + return ngtcp2_conn_get_expiry(conn->conn); +} + +bool quic_conn_timeout(knot_quic_conn_t *conn, uint64_t *now) +{ + if (*now == 0) { + *now = get_timestamp(); + } + return *now > quic_conn_get_timeout(conn); +} + +_public_ +int64_t knot_quic_conn_next_timeout(knot_quic_conn_t *conn) +{ + return (((int64_t)quic_conn_get_timeout(conn) - (int64_t)get_timestamp()) / 1000000L); +} + +_public_ +int knot_quic_hanle_expiry(knot_quic_conn_t *conn) +{ + return ngtcp2_conn_handle_expiry(conn->conn, get_timestamp()) == NGTCP2_NO_ERROR ? KNOT_EOK : KNOT_ECONN; +} + +_public_ +uint32_t knot_quic_conn_rtt(knot_quic_conn_t *conn) +{ + ngtcp2_conn_info info = { 0 }; + ngtcp2_conn_get_conn_info(conn->conn, &info); + return info.smoothed_rtt / 1000; // nanosec --> usec +} + +_public_ +uint16_t knot_quic_conn_local_port(knot_quic_conn_t *conn) +{ + const ngtcp2_path *path = ngtcp2_conn_get_path(conn->conn); + return ((const struct sockaddr_in6 *)path->local.addr)->sin6_port; +} + +_public_ +void knot_quic_conn_pin(knot_quic_conn_t *conn, uint8_t *pin, size_t *pin_size, bool local) +{ + if (conn == NULL) { + goto error; + } + + const gnutls_datum_t *data = NULL; + if (local) { + data = gnutls_certificate_get_ours(conn->tls_session); + } else { + unsigned count = 0; + data = gnutls_certificate_get_peers(conn->tls_session, &count); + if (count == 0) { + goto error; + } + } + if (data == NULL) { + goto error; + } + + gnutls_x509_crt_t cert; + int ret = gnutls_x509_crt_init(&cert); + if (ret != GNUTLS_E_SUCCESS) { + goto error; + } + + ret = gnutls_x509_crt_import(cert, data, GNUTLS_X509_FMT_DER); + if (ret != GNUTLS_E_SUCCESS) { + gnutls_x509_crt_deinit(cert); + goto error; + } + + ret = gnutls_x509_crt_get_key_id(cert, GNUTLS_KEYID_USE_SHA256, pin, pin_size); + if (ret != GNUTLS_E_SUCCESS) { + gnutls_x509_crt_deinit(cert); + goto error; + } + + gnutls_x509_crt_deinit(cert); + + return; +error: + if (pin_size != NULL) { + *pin_size = 0; + } +} + +static void knot_quic_rand_cb(uint8_t *dest, size_t destlen, const ngtcp2_rand_ctx *rand_ctx) +{ + (void)rand_ctx; + dnssec_random_buffer(dest, destlen); +} + +static void init_random_cid(ngtcp2_cid *cid, size_t len) +{ + if (len == 0) { + len = SERVER_DEFAULT_SCIDLEN; + } + + if (dnssec_random_buffer(cid->data, len) != DNSSEC_EOK) { + cid->datalen = 0; + } else { + cid->datalen = len; + } +} + +static bool init_unique_cid(ngtcp2_cid *cid, size_t len, knot_quic_table_t *table) +{ + do { + if (init_random_cid(cid, len), cid->datalen == 0) { + return false; + } + } while (quic_table_lookup(cid, table) != NULL); + return true; +} + +static int get_new_connection_id(ngtcp2_conn *conn, ngtcp2_cid *cid, + uint8_t *token, size_t cidlen, + void *user_data) +{ + knot_quic_conn_t *ctx = (knot_quic_conn_t *)user_data; + assert(ctx->conn == conn); + + if (!init_unique_cid(cid, cidlen, ctx->quic_table)) { + return NGTCP2_ERR_CALLBACK_FAILURE; + } + + knot_quic_cid_t **addto = quic_table_insert(ctx, cid, ctx->quic_table); + (void)addto; + + if (token != NULL && + ngtcp2_crypto_generate_stateless_reset_token( + token, (uint8_t *)ctx->quic_table->hash_secret, + sizeof(ctx->quic_table->hash_secret), cid) != 0) { + return NGTCP2_ERR_CALLBACK_FAILURE; + } + + return 0; +} + +static int remove_connection_id(ngtcp2_conn *conn, const ngtcp2_cid *cid, + void *user_data) +{ + knot_quic_conn_t *ctx = (knot_quic_conn_t *)user_data; + assert(ctx->conn == conn); + + knot_quic_cid_t **torem = quic_table_lookup2(cid, ctx->quic_table); + if (torem != NULL) { + assert((*torem)->conn == ctx); + quic_table_rem2(torem, ctx->quic_table); + } + + return 0; +} + +static int handshake_completed_cb(ngtcp2_conn *conn, void *user_data) +{ + knot_quic_conn_t *ctx = (knot_quic_conn_t *)user_data; + assert(ctx->conn == conn); + + assert(!(ctx->flags & KNOT_QUIC_CONN_HANDSHAKE_DONE)); + ctx->flags |= KNOT_QUIC_CONN_HANDSHAKE_DONE; + + if (!ngtcp2_conn_is_server(conn)) { + knot_quic_creds_t *creds = ctx->quic_table->creds; + if (creds->peer_pin_len == 0) { + return 0; + } + uint8_t pin[KNOT_QUIC_PIN_LEN]; + size_t pin_size = sizeof(pin); + knot_quic_conn_pin(ctx, pin, &pin_size, false); + if (pin_size != creds->peer_pin_len || + const_time_memcmp(pin, creds->peer_pin, pin_size) != 0) { + return NGTCP2_ERR_CALLBACK_FAILURE; + } + return 0; + } + + if (gnutls_session_ticket_send(ctx->tls_session, 1, 0) != GNUTLS_E_SUCCESS) { + return TLS_CALLBACK_ERR; + } + + uint8_t token[NGTCP2_CRYPTO_MAX_REGULAR_TOKENLEN]; + ngtcp2_path path = *ngtcp2_conn_get_path(ctx->conn); + uint64_t ts = get_timestamp(); + ngtcp2_ssize tokenlen = ngtcp2_crypto_generate_regular_token(token, + (uint8_t *)ctx->quic_table->hash_secret, + sizeof(ctx->quic_table->hash_secret), + path.remote.addr, path.remote.addrlen, ts); + if (tokenlen < 0) { + return NGTCP2_ERR_CALLBACK_FAILURE; + } + + if (ngtcp2_conn_submit_new_token(ctx->conn, token, tokenlen) != 0) { + return NGTCP2_ERR_CALLBACK_FAILURE; + } + + return 0; +} + +static int recv_stream_data(ngtcp2_conn *conn, uint32_t flags, + int64_t stream_id, uint64_t offset, + const uint8_t *data, size_t datalen, + void *user_data, void *stream_user_data) +{ + (void)(stream_user_data); // always NULL + (void)(offset); // QUIC shall ensure that data arrive in-order + + knot_quic_conn_t *ctx = (knot_quic_conn_t *)user_data; + assert(ctx->conn == conn); + + int ret = knot_quic_stream_recv_data(ctx, stream_id, data, datalen, + (flags & NGTCP2_STREAM_DATA_FLAG_FIN)); + + return ret == KNOT_EOK ? 0 : NGTCP2_ERR_CALLBACK_FAILURE; +} + +static int acked_stream_data_offset_cb(ngtcp2_conn *conn, int64_t stream_id, + uint64_t offset, uint64_t datalen, + void *user_data, void *stream_user_data) +{ + knot_quic_conn_t *ctx = (knot_quic_conn_t *)user_data; + + bool keep = !ngtcp2_conn_is_server(conn); // kxdpgun: await incomming reply after query sent&acked + + knot_quic_stream_ack_data(ctx, stream_id, offset + datalen, keep); + + return 0; +} + +static int stream_closed(ngtcp2_conn *conn, uint32_t flags, int64_t stream_id, + uint64_t app_error_code, void *user_data, void *stream_user_data) +{ + knot_quic_conn_t *ctx = (knot_quic_conn_t *)user_data; + assert(ctx->conn == conn); + + // NOTE possible error is stored in (flags & NGTCP2_STREAM_CLOSE_FLAG_APP_ERROR_CODE_SET) + + bool keep = !ngtcp2_conn_is_server(conn); // kxdpgun: process incomming reply after recvd&closed + if (!keep) { + knot_quic_conn_stream_free(ctx, stream_id); + } + return 0; +} + +static int recv_stateless_rst(ngtcp2_conn *conn, const ngtcp2_pkt_stateless_reset *sr, + void *user_data) +{ + // NOTE server can't receive stateless resets, only client + + // ngtcp2 verified stateless reset token already + (void)(sr); + + knot_quic_conn_t *ctx = (knot_quic_conn_t *)user_data; + assert(ctx->conn == conn); + + knot_quic_table_rem(ctx, ctx->quic_table); + knot_quic_cleanup(&ctx, 1); + + return 0; +} + +static int recv_stream_rst(ngtcp2_conn *conn, int64_t stream_id, uint64_t final_size, + uint64_t app_error_code, void *user_data, void *stream_user_data) +{ + (void)final_size; + return stream_closed(conn, NGTCP2_STREAM_CLOSE_FLAG_APP_ERROR_CODE_SET, + stream_id, app_error_code, user_data, stream_user_data); +} + +static void user_printf(void *user_data, const char *format, ...) +{ + knot_quic_conn_t *ctx = (knot_quic_conn_t *)user_data; + if (ctx->quic_table->log_cb != NULL) { + char buf[256]; + va_list args; + va_start(args, format); + vsnprintf(buf, sizeof(buf), format, args); + va_end(args); + ctx->quic_table->log_cb(buf); + } +} + +static void hex_encode(const uint8_t *in, const uint32_t in_len, char *out) +{ + static const char hex[] = "0123456789abcdef"; + + for (uint32_t i = 0; i < in_len; i++) { + out[2 * i] = hex[in[i] / 16]; + out[2 * i + 1] = hex[in[i] % 16]; + } +} + +static void user_qlog(void *user_data, uint32_t flags, const void *data, size_t datalen) +{ + knot_quic_conn_t *ctx = (knot_quic_conn_t *)user_data; + if (ctx->quic_table->qlog_dir != NULL) { + if (ctx->qlog_fd < 0) { + const ngtcp2_cid *cid = ngtcp2_conn_get_client_initial_dcid(ctx->conn); + if (cid->datalen == 0) { + cid = ngtcp2_conn_get_dcid(ctx->conn); + } + unsigned qlog_dir_len = strlen(ctx->quic_table->qlog_dir); + unsigned qlog_name_len = qlog_dir_len + 2 * cid->datalen + 7; + char qlog_name[qlog_name_len]; + memcpy(qlog_name, ctx->quic_table->qlog_dir, qlog_dir_len); + qlog_name[qlog_dir_len] = '/'; + hex_encode(cid->data, cid->datalen, qlog_name + qlog_dir_len + 1); + memcpy(qlog_name + qlog_name_len - 6, ".qlog", 6); + + ctx->qlog_fd = open(qlog_name, O_CREAT | O_WRONLY | O_APPEND, 0666); + } + if (ctx->qlog_fd >= 0) { // othewise silently skip + _unused_ ssize_t unused = write(ctx->qlog_fd, data, datalen); + if (flags & NGTCP2_QLOG_WRITE_FLAG_FIN) { + close(ctx->qlog_fd); + ctx->qlog_fd = -1; + } + } + } +} + +static int conn_new(ngtcp2_conn **pconn, const ngtcp2_path *path, const ngtcp2_cid *scid, + const ngtcp2_cid *dcid, const ngtcp2_cid *odcid, uint32_t version, + uint64_t now, uint64_t idle_timeout_ns, + knot_quic_conn_t *qconn, bool server, bool retry_sent) +{ + knot_quic_table_t *qtable = qconn->quic_table; + + // I. CALLBACKS + const ngtcp2_callbacks callbacks = { + ngtcp2_crypto_client_initial_cb, + ngtcp2_crypto_recv_client_initial_cb, + ngtcp2_crypto_recv_crypto_data_cb, + handshake_completed_cb, + NULL, // recv_version_negotiation not needed on server, nor kxdpgun + ngtcp2_crypto_encrypt_cb, + ngtcp2_crypto_decrypt_cb, + ngtcp2_crypto_hp_mask_cb, + recv_stream_data, + acked_stream_data_offset_cb, + NULL, // stream_opened + stream_closed, + recv_stateless_rst, + ngtcp2_crypto_recv_retry_cb, + NULL, // extend_max_streams_bidi + NULL, // extend_max_streams_uni + knot_quic_rand_cb, + get_new_connection_id, + remove_connection_id, + ngtcp2_crypto_update_key_cb, + NULL, // path_validation, + NULL, // select_preferred_addr + recv_stream_rst, + NULL, // extend_max_remote_streams_bidi, might be useful to some allocation optimizations? + NULL, // extend_max_remote_streams_uni + NULL, // extend_max_stream_data, + NULL, // dcid_status + NULL, // handshake_confirmed + NULL, // recv_new_token + ngtcp2_crypto_delete_crypto_aead_ctx_cb, + ngtcp2_crypto_delete_crypto_cipher_ctx_cb, + NULL, // recv_datagram + NULL, // ack_datagram + NULL, // lost_datagram + ngtcp2_crypto_get_path_challenge_data_cb, + NULL, // stream_stop_sending + ngtcp2_crypto_version_negotiation_cb, + NULL, // recv_rx_key + NULL // recv_tx_key + }; + + // II. SETTINGS + ngtcp2_settings settings; + ngtcp2_settings_default(&settings); + settings.initial_ts = now; + if (qtable->log_cb != NULL) { + settings.log_printf = user_printf; + } + if (qtable->qlog_dir != NULL) { + settings.qlog_write = user_qlog; + } + if (qtable->udp_payload_limit != 0) { + settings.max_tx_udp_payload_size = qtable->udp_payload_limit; + } + + settings.handshake_timeout = idle_timeout_ns; // NOTE setting handshake timeout to idle_timeout for simplicity + settings.no_pmtud = true; + + // III. PARAMS + ngtcp2_transport_params params; + ngtcp2_transport_params_default(¶ms); + + params.disable_active_migration = true; + params.initial_max_streams_uni = 0; + params.initial_max_streams_bidi = 1024; + params.initial_max_stream_data_bidi_local = NGTCP2_MAX_VARINT; + params.initial_max_stream_data_bidi_remote = 102400; + params.initial_max_data = NGTCP2_MAX_VARINT; + + params.max_idle_timeout = idle_timeout_ns; + // params.stateless_reset_token_present = 1; + // params.active_connection_id_limit = 7; + if (odcid != NULL) { + params.original_dcid = *odcid; + params.original_dcid_present = true; + } + + if (retry_sent) { + assert(scid); + params.retry_scid_present = 1; + params.retry_scid = *scid; + } + if (dnssec_random_buffer(params.stateless_reset_token, NGTCP2_STATELESS_RESET_TOKENLEN) != DNSSEC_EOK) { + return KNOT_ERROR; + } + + if (server) { + return ngtcp2_conn_server_new(pconn, dcid, scid, path, version, &callbacks, + &settings, ¶ms, NULL, qconn); + } else { + return ngtcp2_conn_client_new(pconn, dcid, scid, path, version, &callbacks, + &settings, ¶ms, NULL, qconn); + } +} + +_public_ +int knot_quic_client(knot_quic_table_t *table, struct sockaddr_in6 *dest, + struct sockaddr_in6 *via, const char *server_name, + knot_quic_conn_t **out_conn) +{ + ngtcp2_cid scid = { 0 }, dcid = { 0 }; + uint64_t now = get_timestamp(); + + if (table == NULL || dest == NULL || via == NULL || out_conn == NULL) { + return KNOT_EINVAL; + } + + init_random_cid(&scid, 0); + init_random_cid(&dcid, 0); + + knot_quic_conn_t *conn = quic_table_add(NULL, &dcid, table); + if (conn == NULL) { + return ENOMEM; + } + + ngtcp2_path path; + path.remote.addr = (struct sockaddr *)dest; + path.remote.addrlen = addr_len((const struct sockaddr_in6 *)dest); + path.local.addr = (struct sockaddr *)via; + path.local.addrlen = addr_len((const struct sockaddr_in6 *)via); + + int ret = conn_new(&conn->conn, &path, &dcid, &scid, NULL, NGTCP2_PROTO_VER_V1, now, + 5000000000L, conn, false, false); + if (ret == KNOT_EOK) { + ret = tls_init_conn_session(conn, false); + } + if (ret == KNOT_EOK && server_name != NULL) { + ret = gnutls_server_name_set(conn->tls_session, GNUTLS_NAME_DNS, + server_name, strlen(server_name)); + } + if (ret != KNOT_EOK) { + knot_quic_table_rem(conn, table); + knot_quic_cleanup(&conn, 1); + return ret; + } + + *out_conn = conn; + return KNOT_EOK; +} + +_public_ +int knot_quic_handle(knot_quic_table_t *table, knot_quic_reply_t *reply, + uint64_t idle_timeout, knot_quic_conn_t **out_conn) +{ + *out_conn = NULL; + if (table == NULL || reply == NULL || out_conn == NULL) { + return KNOT_EINVAL; + } + + ngtcp2_version_cid decoded_cids = { 0 }; + ngtcp2_cid scid = { 0 }, dcid = { 0 }, odcid = { 0 }; + uint64_t now = get_timestamp(); + if (reply->in_payload->iov_len < 1) { + reply->handle_ret = KNOT_EOK; + return KNOT_EOK; + } + int ret = ngtcp2_pkt_decode_version_cid(&decoded_cids, + reply->in_payload->iov_base, + reply->in_payload->iov_len, + SERVER_DEFAULT_SCIDLEN); + if (ret == NGTCP2_ERR_VERSION_NEGOTIATION) { + ret = -QUIC_SEND_VERSION_NEGOTIATION; + goto finish; + } else if (ret != NGTCP2_NO_ERROR) { + goto finish; + } + ngtcp2_cid_init(&dcid, decoded_cids.dcid, decoded_cids.dcidlen); + ngtcp2_cid_init(&scid, decoded_cids.scid, decoded_cids.scidlen); + + knot_quic_conn_t *conn = quic_table_lookup(&dcid, table); + + if (decoded_cids.version == 0 /* short header */ && conn == NULL) { + ret = KNOT_EOK; // NOOP + goto finish; + } + + ngtcp2_path path; + path.remote.addr = (struct sockaddr *)reply->ip_rem; + path.remote.addrlen = addr_len((struct sockaddr_in6 *)reply->ip_rem); + path.local.addr = (struct sockaddr *)reply->ip_loc; + path.local.addrlen = addr_len((struct sockaddr_in6 *)reply->ip_loc); + + if (conn == NULL) { + // new conn + + ngtcp2_pkt_hd header = { 0 }; + ret = ngtcp2_accept(&header, reply->in_payload->iov_base, + reply->in_payload->iov_len); + if (ret == NGTCP2_ERR_RETRY) { + ret = -QUIC_SEND_RETRY; + goto finish; + } else if (ret != NGTCP2_NO_ERROR) { // discard packet + ret = KNOT_EOK; + goto finish; + } + + assert(header.type == NGTCP2_PKT_INITIAL); + if (header.tokenlen == 0 && quic_require_retry(table)) { + ret = -QUIC_SEND_RETRY; + goto finish; + } + + if (header.tokenlen > 0) { + ret = ngtcp2_crypto_verify_retry_token( + &odcid, header.token, header.tokenlen, + (const uint8_t *)table->hash_secret, + sizeof(table->hash_secret), header.version, + (const struct sockaddr *)reply->ip_rem, + addr_len((struct sockaddr_in6 *)reply->ip_rem), + &dcid, idle_timeout, now // NOTE setting retry token validity to idle_timeout for simplicity + ); + if (ret != 0) { + ret = KNOT_EOK; + goto finish; + } + } else { + memcpy(&odcid, &dcid, sizeof(odcid)); + } + + // server chooses his CID to his liking + if (!init_unique_cid(&dcid, 0, table)) { + ret = KNOT_ERROR; + goto finish; + } + + conn = quic_table_add(NULL, &dcid, table); + if (conn == NULL) { + ret = KNOT_ENOMEM; + goto finish; + } + + ret = conn_new(&conn->conn, &path, &dcid, &scid, &odcid, decoded_cids.version, + now, idle_timeout, conn, true, header.tokenlen > 0); + if (ret >= 0) { + ret = tls_init_conn_session(conn, true); + } + if (ret < 0) { + knot_quic_table_rem(conn, table); + *out_conn = conn; // we need knot_quic_cleanup() by the caller afterwards + goto finish; + } + } + + ngtcp2_pkt_info pi = { .ecn = reply->ecn, }; + + ret = ngtcp2_conn_read_pkt(conn->conn, &path, &pi, reply->in_payload->iov_base, + reply->in_payload->iov_len, now); + + *out_conn = conn; + if (ret == NGTCP2_ERR_DRAINING) { // received CONNECTION_CLOSE from the counterpart + knot_quic_table_rem(conn, table); + ret = KNOT_EOK; + goto finish; + } else if (ngtcp2_err_is_fatal(ret)) { // connection doomed + if (ret == NGTCP2_ERR_CALLBACK_FAILURE) { + ret = KNOT_EBADCERTKEY; + } else { + ret = KNOT_ECONN; + } + knot_quic_table_rem(conn, table); + goto finish; + } else if (ret != NGTCP2_NO_ERROR) { // non-fatal error, discard packet + ret = KNOT_EOK; + goto finish; + } + + quic_conn_mark_used(conn, table); + + ret = KNOT_EOK; +finish: + reply->handle_ret = ret; + return ret; +} + +static bool stream_exists(knot_quic_conn_t *conn, int64_t stream_id) +{ + // TRICK, we never use stream_user_data + return (ngtcp2_conn_set_stream_user_data(conn->conn, stream_id, NULL) == NGTCP2_NO_ERROR); +} + +static int send_stream(knot_quic_table_t *quic_table, knot_quic_reply_t *rpl, + knot_quic_conn_t *relay, int64_t stream_id, + uint8_t *data, size_t len, bool fin, ngtcp2_ssize *sent) +{ + (void)quic_table; + assert(stream_id >= 0 || (data == NULL && len == 0)); + + while (stream_id >= 0 && !stream_exists(relay, stream_id)) { + int64_t opened = 0; + int ret = ngtcp2_conn_open_bidi_stream(relay->conn, &opened, NULL); + if (ret != KNOT_EOK) { + return ret; + } + assert((bool)(opened == stream_id) == stream_exists(relay, stream_id)); + } + + int ret = rpl->alloc_reply(rpl); + if (ret != KNOT_EOK) { + return ret; + } + + uint32_t fl = ((stream_id >= 0 && fin) ? NGTCP2_WRITE_STREAM_FLAG_FIN : + NGTCP2_WRITE_STREAM_FLAG_NONE); + ngtcp2_vec vec = { .base = data, .len = len }; + ngtcp2_pkt_info pi = { 0 }; + + struct sockaddr_storage path_loc = { 0 }, path_rem = { 0 }; + ngtcp2_path path = { .local = { .addr = (struct sockaddr *)&path_loc, .addrlen = sizeof(path_loc) }, + .remote = { .addr = (struct sockaddr *)&path_rem, .addrlen = sizeof(path_rem) }, + .user_data = NULL }; + bool find_path = (rpl->ip_rem == NULL); + assert(find_path == (bool)(rpl->ip_loc == NULL)); + + ret = ngtcp2_conn_writev_stream(relay->conn, find_path ? &path : NULL, &pi, + rpl->out_payload->iov_base, rpl->out_payload->iov_len, + sent, fl, stream_id, &vec, + (stream_id >= 0 ? 1 : 0), get_timestamp()); + if (ret <= 0) { + rpl->free_reply(rpl); + return ret; + } + if (*sent < 0) { + *sent = 0; + } + + rpl->out_payload->iov_len = ret; + rpl->ecn = pi.ecn; + if (find_path) { + rpl->ip_loc = &path_loc; + rpl->ip_rem = &path_rem; + } + ret = rpl->send_reply(rpl); + if (find_path) { + rpl->ip_loc = NULL; + rpl->ip_rem = NULL; + } + if (ret == KNOT_EOK) { + return 1; + } + return ret; +} + +static int send_special(knot_quic_table_t *quic_table, knot_quic_reply_t *rpl, + knot_quic_conn_t *relay /* only for connection close */) +{ + int ret = rpl->alloc_reply(rpl); + if (ret != KNOT_EOK) { + return ret; + } + + uint64_t now = get_timestamp(); + ngtcp2_version_cid decoded_cids = { 0 }; + ngtcp2_cid scid = { 0 }, dcid = { 0 }; + int dvc_ret = NGTCP2_ERR_FATAL; + + if ((rpl->handle_ret == -QUIC_SEND_VERSION_NEGOTIATION || + rpl->handle_ret == -QUIC_SEND_RETRY) && + rpl->in_payload != NULL && rpl->in_payload->iov_len > 0) { + dvc_ret = ngtcp2_pkt_decode_version_cid( + &decoded_cids, rpl->in_payload->iov_base, + rpl->in_payload->iov_len, SERVER_DEFAULT_SCIDLEN); + } + + uint8_t rnd = 0; + dnssec_random_buffer(&rnd, sizeof(rnd)); + uint32_t supported_quic[1] = { NGTCP2_PROTO_VER_V1 }; + ngtcp2_cid new_dcid; + uint8_t retry_token[NGTCP2_CRYPTO_MAX_RETRY_TOKENLEN]; + uint8_t stateless_reset_token[NGTCP2_STATELESS_RESET_TOKENLEN]; + uint8_t sreset_rand[NGTCP2_MIN_STATELESS_RESET_RANDLEN]; + dnssec_random_buffer(sreset_rand, sizeof(sreset_rand)); + ngtcp2_ccerr ccerr; + ngtcp2_ccerr_default(&ccerr); + ngtcp2_pkt_info pi = { 0 }; + + struct sockaddr_storage path_loc = { 0 }, path_rem = { 0 }; + ngtcp2_path path = { .local = { .addr = (struct sockaddr *)&path_loc, .addrlen = sizeof(path_loc) }, + .remote = { .addr = (struct sockaddr *)&path_rem, .addrlen = sizeof(path_rem) }, + .user_data = NULL }; + bool find_path = (rpl->ip_rem == NULL); + assert(find_path == (bool)(rpl->ip_loc == NULL)); + assert(!find_path || rpl->handle_ret == -QUIC_SEND_EXCESSIVE_LOAD); + + switch (rpl->handle_ret) { + case -QUIC_SEND_VERSION_NEGOTIATION: + if (dvc_ret != NGTCP2_ERR_VERSION_NEGOTIATION) { + rpl->free_reply(rpl); + return KNOT_ERROR; + } + ret = ngtcp2_pkt_write_version_negotiation( + rpl->out_payload->iov_base, rpl->out_payload->iov_len, + rnd, decoded_cids.scid, decoded_cids.scidlen, decoded_cids.dcid, + decoded_cids.dcidlen, supported_quic, + sizeof(supported_quic) / sizeof(*supported_quic) + ); + break; + case -QUIC_SEND_RETRY: + ngtcp2_cid_init(&dcid, decoded_cids.dcid, decoded_cids.dcidlen); + ngtcp2_cid_init(&scid, decoded_cids.scid, decoded_cids.scidlen); + + init_random_cid(&new_dcid, 0); + + ret = ngtcp2_crypto_generate_retry_token( + retry_token, (const uint8_t *)quic_table->hash_secret, + sizeof(quic_table->hash_secret), decoded_cids.version, + (const struct sockaddr *)rpl->ip_rem, sockaddr_len(rpl->ip_rem), + &new_dcid, &dcid, now + ); + + if (ret >= 0) { + ret = ngtcp2_crypto_write_retry( + rpl->out_payload->iov_base, rpl->out_payload->iov_len, + decoded_cids.version, &scid, &new_dcid, &dcid, + retry_token, ret + ); + } + break; + case -QUIC_SEND_STATELESS_RESET: + ret = ngtcp2_pkt_write_stateless_reset( + rpl->out_payload->iov_base, rpl->out_payload->iov_len, + stateless_reset_token, sreset_rand, sizeof(sreset_rand) + ); + break; + case -QUIC_SEND_CONN_CLOSE: + ret = ngtcp2_conn_write_connection_close( + relay->conn, NULL, &pi, rpl->out_payload->iov_base, + rpl->out_payload->iov_len, &ccerr, now + ); + break; + case -QUIC_SEND_EXCESSIVE_LOAD: + ccerr.type = NGTCP2_CCERR_TYPE_APPLICATION; + ccerr.error_code = KNOT_QUIC_ERR_EXCESSIVE_LOAD; + ret = ngtcp2_conn_write_connection_close( + relay->conn, find_path ? &path : NULL, &pi, rpl->out_payload->iov_base, + rpl->out_payload->iov_len, &ccerr, now + ); + break; + default: + ret = KNOT_EINVAL; + break; + } + + if (ret < 0) { + rpl->free_reply(rpl); + } else { + rpl->out_payload->iov_len = ret; + rpl->ecn = pi.ecn; + if (find_path) { + rpl->ip_loc = &path_loc; + rpl->ip_rem = &path_rem; + } + ret = rpl->send_reply(rpl); + if (find_path) { + rpl->ip_loc = NULL; + rpl->ip_rem = NULL; + } + } + return ret; +} + +_public_ +int knot_quic_send(knot_quic_table_t *quic_table, knot_quic_conn_t *conn, + knot_quic_reply_t *reply, unsigned max_msgs, + knot_quic_send_flag_t flags) +{ + if (quic_table == NULL || conn == NULL || reply == NULL) { + return KNOT_EINVAL; + } else if (reply->handle_ret < 0) { + return reply->handle_ret; + } else if (reply->handle_ret > 0) { + return send_special(quic_table, reply, conn); + } else if (conn == NULL) { + return KNOT_EINVAL; + } else if (conn->conn == NULL) { + return KNOT_EOK; + } + + if (!(conn->flags & KNOT_QUIC_CONN_HANDSHAKE_DONE)) { + max_msgs = 1; + } + + unsigned sent_msgs = 0, stream_msgs = 0, ignore_last = ((flags & KNOT_QUIC_SEND_IGNORE_LASTBYTE) ? 1 : 0); + int ret = 1; + for (int64_t si = 0; si < conn->streams_count && sent_msgs < max_msgs; /* NO INCREMENT */) { + int64_t stream_id = 4 * (conn->streams_first + si); + + ngtcp2_ssize sent = 0; + size_t uf = conn->streams[si].unsent_offset; + knot_quic_obuf_t *uo = conn->streams[si].unsent_obuf; + if (uo == NULL) { + si++; + continue; + } + + bool fin = (((node_t *)uo->node.next)->next == NULL) && ignore_last == 0; + ret = send_stream(quic_table, reply, conn, stream_id, + uo->buf + uf, uo->len - uf - ignore_last, + fin, &sent); + if (ret < 0) { + return ret; + } + + sent_msgs++; + stream_msgs++; + if (sent > 0 && ignore_last > 0) { + sent++; + } + if (sent > 0) { + knot_quic_stream_mark_sent(conn, stream_id, sent); + } + + if (stream_msgs >= max_msgs / conn->streams_count) { + stream_msgs = 0; + si++; // if this stream is sending too much, give chance to other streams + } + } + + while (ret == 1) { + ngtcp2_ssize unused = 0; + ret = send_stream(quic_table, reply, conn, -1, NULL, 0, false, &unused); + } + + return ret; +} diff --git a/src/libknot/quic/quic.h b/src/libknot/quic/quic.h new file mode 100644 index 0000000..29a02e0 --- /dev/null +++ b/src/libknot/quic/quic.h @@ -0,0 +1,213 @@ +/* Copyright (C) 2023 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +/*! + * \file + * + * \brief General QUIC functionality. + * + * \addtogroup quic + * @{ + */ + +#pragma once + +#include <sys/types.h> +#include <netinet/in.h> + +#include "libknot/quic/quic_conn.h" + +#define KNOT_QUIC_PIN_LEN 32 + +#define KNOT_QUIC_HANDLE_RET_CLOSE 2000 + +// RFC 9250 +#define KNOT_QUIC_ERR_EXCESSIVE_LOAD 0x4 + +struct gnutls_x509_crt_int; +struct knot_quic_creds; +struct knot_quic_session; + +typedef enum { + KNOT_QUIC_SEND_IGNORE_LASTBYTE = (1 << 0), +} knot_quic_send_flag_t; + +typedef struct knot_quic_reply { + const struct sockaddr_storage *ip_rem; + const struct sockaddr_storage *ip_loc; + struct iovec *in_payload; + struct iovec *out_payload; + void *in_ctx; + void *out_ctx; + + void *sock; + int handle_ret; + uint8_t ecn; + + int (*alloc_reply)(struct knot_quic_reply *); + int (*send_reply)(struct knot_quic_reply *); + void (*free_reply)(struct knot_quic_reply *); +} knot_quic_reply_t; + +/*! + * \brief Check if session ticket can be taken out of this connection. + */ +bool knot_quic_session_available(knot_quic_conn_t *conn); + +/*! + * \brief Gets data needed for session resumption. + * + * \param conn QUIC connection. + * + * \return QUIC session context. + */ +struct knot_quic_session *knot_quic_session_save(knot_quic_conn_t *conn); + +/*! + * \brief Loads data needed for session resumption. + * + * \param conn QUIC connection. + * \param session QUIC session context. + * + * \return KNOT_E* + */ +int knot_quic_session_load(knot_quic_conn_t *conn, struct knot_quic_session *session); + +/*! + * \brief Init server TLS certificate for DoQ. + * + * \param cert_file X509 certificate PEM file path/name (NULL if auto-generated). + * \param key_file Key PEM file path/name. + * + * \return Initialized creds. + */ +struct knot_quic_creds *knot_quic_init_creds(const char *cert_file, + const char *key_file); + +/*! + * \brief Init peer TLS certificate for DoQ. + * + * \param local_creds Local credentials if server. + * \param peer_pin Optional peer certificate pin to check. + * \param peer_pin_len Length of the peer pin. Set 0 if not specified. + * + * \return Initialized creds. + */ +struct knot_quic_creds *knot_quic_init_creds_peer(const struct knot_quic_creds *local_creds, + const uint8_t *peer_pin, + uint8_t peer_pin_len); + +/*! + * \brief Gets the certificate from credentials. + * + * \param creds TLS credentials. + * \param cert Output certificate. + * + * \return KNOT_E* + */ +int knot_quic_creds_cert(struct knot_quic_creds *creds, struct gnutls_x509_crt_int **cert); + +/*! + * \brief Deinit server TLS certificate for DoQ. + */ +void knot_quic_free_creds(struct knot_quic_creds *creds); + +/*! + * \brief Returns timeout value for the connection. + */ +uint64_t quic_conn_get_timeout(knot_quic_conn_t *conn); + +/*! + * \brief Check if connection timed out due to inactivity. + * + * \param conn QUIC connection. + * \param now In/out: current monotonic time. Use zero first and reuse for + * next calls for optimization. + * + * \return True if the connection timed out idle. + */ +bool quic_conn_timeout(knot_quic_conn_t *conn, uint64_t *now); + +int64_t knot_quic_conn_next_timeout(knot_quic_conn_t *conn); + +int knot_quic_hanle_expiry(knot_quic_conn_t *conn); + +/*! + * \brief Returns measured connection RTT in usecs. + */ +uint32_t knot_quic_conn_rtt(knot_quic_conn_t *conn); + +/*! + * \brief Returns the port from local-address of given conn IN BIG ENDIAN. + */ +uint16_t knot_quic_conn_local_port(knot_quic_conn_t *conn); + +/*! + * \brief Gets local or remote certificate pin. + * + * \note Zero output pin_size value means no certificate available or error. + * + * \param conn QUIC connection. + * \param pin Output certificate pin. + * \param pin_size Input size of the storage / output size of the stored pin. + * \param local Local or remote certificate indication. + */ +void knot_quic_conn_pin(knot_quic_conn_t *conn, uint8_t *pin, size_t *pin_size, bool local); + +/*! + * \brief Create new outgoing QUIC connection. + * + * \param table QUIC connections table to be added to. + * \param dest Destination IP address. + * \param via Source IP address. + * \param server_name Optional server name. + * \param out_conn Out: new connection. + * + * \return KNOT_E* + */ +int knot_quic_client(knot_quic_table_t *table, struct sockaddr_in6 *dest, + struct sockaddr_in6 *via, const char *server_name, + knot_quic_conn_t **out_conn); + +/*! + * \brief Handle incoming QUIC packet. + * + * \param table QUIC connectoins table. + * \param reply Incoming packet info. + * \param idle_timeout Configured idle timeout for connections (in nanoseconds). + * \param out_conn Out: QUIC connection that this packet belongs to. + * + * \return KNOT_E* or -QUIC_SEND_* + */ +int knot_quic_handle(knot_quic_table_t *table, knot_quic_reply_t *reply, + uint64_t idle_timeout, knot_quic_conn_t **out_conn); + +/*! + * \brief Send outgoing QUIC packet(s) for a connection. + * + * \param quic_table QUIC connection table. + * \param conn QUIC connection. + * \param reply Incoming/outgoing packet info. + * \param max_msgs Maxmimum packets to be sent. + * \param flags Various options for special use-cases. + * + * \return KNOT_E* + */ +int knot_quic_send(knot_quic_table_t *quic_table, knot_quic_conn_t *conn, + knot_quic_reply_t *reply, unsigned max_msgs, + knot_quic_send_flag_t flags); + +/*! @} */ diff --git a/src/libknot/quic/quic_conn.c b/src/libknot/quic/quic_conn.c new file mode 100644 index 0000000..6616573 --- /dev/null +++ b/src/libknot/quic/quic_conn.c @@ -0,0 +1,577 @@ +/* Copyright (C) 2023 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +#include <assert.h> +#include <gnutls/gnutls.h> +#include <ngtcp2/ngtcp2.h> +#include <stdio.h> +#include <string.h> + +#include "libknot/quic/quic_conn.h" + +#include "contrib/macros.h" +#include "contrib/openbsd/siphash.h" +#include "contrib/ucw/heap.h" +#include "contrib/ucw/lists.h" +#include "libdnssec/random.h" +#include "libknot/attribute.h" +#include "libknot/error.h" +#include "libknot/quic/quic.h" +#include "libknot/xdp/tcp_iobuf.h" +#include "libknot/wire.h" + +#define STREAM_INCR 4 // DoQ only uses client-initiated bi-directional streams, so stream IDs increment by four +#define BUCKETS_PER_CONNS 8 // Each connecion has several dCIDs, and each CID takes one hash table bucket. + +static int cmp_expiry_heap_nodes(void *c1, void *c2) +{ + if (((knot_quic_conn_t *)c1)->next_expiry < ((knot_quic_conn_t *)c2)->next_expiry) return -1; + if (((knot_quic_conn_t *)c1)->next_expiry > ((knot_quic_conn_t *)c2)->next_expiry) return 1; + return 0; +} + +_public_ +knot_quic_table_t *knot_quic_table_new(size_t max_conns, size_t max_ibufs, size_t max_obufs, + size_t udp_payload, struct knot_quic_creds *creds) +{ + size_t table_size = max_conns * BUCKETS_PER_CONNS; + + knot_quic_table_t *res = calloc(1, sizeof(*res) + table_size * sizeof(res->conns[0])); + if (res == NULL || creds == NULL) { + free(res); + return NULL; + } + + res->size = table_size; + res->max_conns = max_conns; + res->ibufs_max = max_ibufs; + res->obufs_max = max_obufs; + res->udp_payload_limit = udp_payload; + + res->expiry_heap = malloc(sizeof(struct heap)); + if (res->expiry_heap == NULL || !heap_init(res->expiry_heap, cmp_expiry_heap_nodes, 0)) { + free(res->expiry_heap); + free(res); + return NULL; + } + + res->creds = creds; + + res->hash_secret[0] = dnssec_random_uint64_t(); + res->hash_secret[1] = dnssec_random_uint64_t(); + res->hash_secret[2] = dnssec_random_uint64_t(); + res->hash_secret[3] = dnssec_random_uint64_t(); + + return res; +} + +_public_ +void knot_quic_table_free(knot_quic_table_t *table) +{ + if (table != NULL) { + while (!EMPTY_HEAP(table->expiry_heap)) { + knot_quic_conn_t *c = *(knot_quic_conn_t **)HHEAD(table->expiry_heap); + knot_quic_table_rem(c, table); + knot_quic_cleanup(&c, 1); + } + assert(table->usage == 0); + assert(table->pointers == 0); + assert(table->ibufs_size == 0); + assert(table->obufs_size == 0); + + heap_deinit(table->expiry_heap); + free(table->expiry_heap); + free(table); + } +} + +static void send_excessive_load(knot_quic_conn_t *conn, struct knot_quic_reply *reply, + knot_quic_table_t *table) +{ + if (reply != NULL) { + reply->handle_ret = KNOT_QUIC_ERR_EXCESSIVE_LOAD; + (void)knot_quic_send(table, conn, reply, 0, 0); + } +} + +_public_ +void knot_quic_table_sweep(knot_quic_table_t *table, struct knot_quic_reply *sweep_reply, + struct knot_sweep_stats *stats) +{ + uint64_t now = 0; + if (table == NULL || stats == NULL) { + return; + } + + while (!EMPTY_HEAP(table->expiry_heap)) { + knot_quic_conn_t *c = *(knot_quic_conn_t **)HHEAD(table->expiry_heap); + if (table->usage > table->max_conns) { + knot_sweep_stats_incr(stats, KNOT_SWEEP_CTR_LIMIT_CONN); + send_excessive_load(c, sweep_reply, table); + knot_quic_table_rem(c, table); + } else if (table->obufs_size > table->obufs_max) { + knot_sweep_stats_incr(stats, KNOT_SWEEP_CTR_LIMIT_OBUF); + send_excessive_load(c, sweep_reply, table); + knot_quic_table_rem(c, table); + } else if (table->ibufs_size > table->ibufs_max) { + knot_sweep_stats_incr(stats, KNOT_SWEEP_CTR_LIMIT_IBUF); + send_excessive_load(c, sweep_reply, table); + knot_quic_table_rem(c, table); + } else if (quic_conn_timeout(c, &now)) { + int ret = ngtcp2_conn_handle_expiry(c->conn, now); + if (ret != NGTCP2_NO_ERROR) { // usually NGTCP2_ERR_IDLE_CLOSE or NGTCP2_ERR_HANDSHAKE_TIMEOUT + knot_sweep_stats_incr(stats, KNOT_SWEEP_CTR_TIMEOUT); + knot_quic_table_rem(c, table); + } else { + if (sweep_reply != NULL) { + sweep_reply->handle_ret = KNOT_EOK; + (void)knot_quic_send(table, c, sweep_reply, 0, 0); + } + quic_conn_mark_used(c, table); + } + } + knot_quic_cleanup(&c, 1); + + if (*(knot_quic_conn_t **)HHEAD(table->expiry_heap) == c) { // HHEAD already handled, NOOP, avoid infinite loop + break; + } + } +} + +static uint64_t cid2hash(const ngtcp2_cid *cid, knot_quic_table_t *table) +{ + SIPHASH_CTX ctx; + SipHash24_Init(&ctx, (const SIPHASH_KEY *)(table->hash_secret)); + SipHash24_Update(&ctx, cid->data, MIN(cid->datalen, 8)); + uint64_t ret = SipHash24_End(&ctx); + return ret; +} + +knot_quic_cid_t **quic_table_insert(knot_quic_conn_t *conn, const ngtcp2_cid *cid, + knot_quic_table_t *table) +{ + uint64_t hash = cid2hash(cid, table); + + knot_quic_cid_t *cidobj = malloc(sizeof(*cidobj)); + if (cidobj == NULL) { + return NULL; + } + _Static_assert(sizeof(*cid) <= sizeof(cidobj->cid_placeholder), "insufficient placeholder for CID struct"); + memcpy(cidobj->cid_placeholder, cid, sizeof(*cid)); + cidobj->conn = conn; + + knot_quic_cid_t **addto = table->conns + (hash % table->size); + + cidobj->next = *addto; + *addto = cidobj; + table->pointers++; + + return addto; +} + +knot_quic_conn_t *quic_table_add(ngtcp2_conn *ngconn, const ngtcp2_cid *cid, + knot_quic_table_t *table) +{ + knot_quic_conn_t *conn = calloc(1, sizeof(*conn)); + if (conn == NULL) { + return NULL; + } + + conn->conn = ngconn; + conn->quic_table = table; + conn->stream_inprocess = -1; + conn->qlog_fd = -1; + + conn->next_expiry = UINT64_MAX; + if (!heap_insert(table->expiry_heap, (heap_val_t *)conn)) { + free(conn); + return NULL; + } + + knot_quic_cid_t **addto = quic_table_insert(conn, cid, table); + if (addto == NULL) { + heap_delete(table->expiry_heap, heap_find(table->expiry_heap, (heap_val_t *)conn)); + free(conn); + return NULL; + } + table->usage++; + + return conn; +} + +knot_quic_cid_t **quic_table_lookup2(const ngtcp2_cid *cid, knot_quic_table_t *table) +{ + uint64_t hash = cid2hash(cid, table); + + knot_quic_cid_t **res = table->conns + (hash % table->size); + while (*res != NULL && !ngtcp2_cid_eq(cid, (const ngtcp2_cid *)(*res)->cid_placeholder)) { + res = &(*res)->next; + } + return res; +} + +knot_quic_conn_t *quic_table_lookup(const ngtcp2_cid *cid, knot_quic_table_t *table) +{ + knot_quic_cid_t **pcid = quic_table_lookup2(cid, table); + assert(pcid != NULL); + return *pcid == NULL ? NULL : (*pcid)->conn; +} + +static void conn_heap_reschedule(knot_quic_conn_t *conn, knot_quic_table_t *table) +{ + heap_replace(table->expiry_heap, heap_find(table->expiry_heap, (heap_val_t *)conn), (heap_val_t *)conn); +} + +void quic_conn_mark_used(knot_quic_conn_t *conn, knot_quic_table_t *table) +{ + conn->next_expiry = quic_conn_get_timeout(conn); + conn_heap_reschedule(conn, table); +} + +void quic_table_rem2(knot_quic_cid_t **pcid, knot_quic_table_t *table) +{ + knot_quic_cid_t *cid = *pcid; + *pcid = cid->next; + free(cid); + table->pointers--; +} + +_public_ +void knot_quic_conn_stream_free(knot_quic_conn_t *conn, int64_t stream_id) +{ + knot_quic_stream_t *s = knot_quic_conn_get_stream(conn, stream_id, false); + if (s != NULL && s->inbuf.iov_len > 0) { + free(s->inbuf.iov_base); + conn->ibufs_size -= buffer_alloc_size(s->inbuf.iov_len); + conn->quic_table->ibufs_size -= buffer_alloc_size(s->inbuf.iov_len); + memset(&s->inbuf, 0, sizeof(s->inbuf)); + } + while (s != NULL && s->inbufs != NULL) { + void *tofree = s->inbufs; + s->inbufs = s->inbufs->next; + free(tofree); + } + knot_quic_stream_ack_data(conn, stream_id, SIZE_MAX, false); +} + +_public_ +void knot_quic_table_rem(knot_quic_conn_t *conn, knot_quic_table_t *table) +{ + if (conn == NULL || conn->conn == NULL || table == NULL) { + return; + } + + if (conn->streams_count == -1) { // kxdpgun special + conn->streams_count = 1; + } + for (ssize_t i = conn->streams_count - 1; i >= 0; i--) { + knot_quic_conn_stream_free(conn, (i + conn->streams_first) * 4); + } + assert(conn->streams_count <= 0); + assert(conn->obufs_size == 0); + + size_t num_scid = ngtcp2_conn_get_scid(conn->conn, NULL); + ngtcp2_cid *scids = calloc(num_scid, sizeof(*scids)); + ngtcp2_conn_get_scid(conn->conn, scids); + + for (size_t i = 0; i < num_scid; i++) { + knot_quic_cid_t **pcid = quic_table_lookup2(&scids[i], table); + assert(pcid != NULL); + if (*pcid == NULL) { + continue; + } + assert((*pcid)->conn == conn); + quic_table_rem2(pcid, table); + } + + int pos = heap_find(table->expiry_heap, (heap_val_t *)conn); + heap_delete(table->expiry_heap, pos); + + free(scids); + + gnutls_deinit(conn->tls_session); + ngtcp2_conn_del(conn->conn); + conn->conn = NULL; + + table->usage--; +} + +_public_ +knot_quic_stream_t *knot_quic_conn_get_stream(knot_quic_conn_t *conn, + int64_t stream_id, bool create) +{ + if (stream_id % 4 != 0 || conn == NULL) { + return NULL; + } + stream_id /= 4; + + if (conn->streams_first > stream_id) { + return NULL; + } + if (conn->streams_count > stream_id - conn->streams_first) { + return &conn->streams[stream_id - conn->streams_first]; + } + + if (create) { + size_t new_streams_count; + knot_quic_stream_t *new_streams; + + if (conn->streams_count == 0) { + new_streams = malloc(sizeof(new_streams[0])); + if (new_streams == NULL) { + return NULL; + } + new_streams_count = 1; + conn->streams_first = stream_id; + } else { + new_streams_count = stream_id + 1 - conn->streams_first; + if (new_streams_count > MAX_STREAMS_PER_CONN) { + return NULL; + } + new_streams = realloc(conn->streams, new_streams_count * sizeof(*new_streams)); + if (new_streams == NULL) { + return NULL; + } + } + + for (knot_quic_stream_t *si = new_streams; + si < new_streams + conn->streams_count; si++) { + if (si->obufs_size == 0) { + init_list((list_t *)&si->outbufs); + } else { + fix_list((list_t *)&si->outbufs); + } + } + + for (knot_quic_stream_t *si = new_streams + conn->streams_count; + si < new_streams + new_streams_count; si++) { + memset(si, 0, sizeof(*si)); + init_list((list_t *)&si->outbufs); + } + conn->streams = new_streams; + conn->streams_count = new_streams_count; + + return &conn->streams[stream_id - conn->streams_first]; + } + return NULL; +} + +_public_ +knot_quic_stream_t *knot_quic_conn_new_stream(knot_quic_conn_t *conn) +{ + int64_t new_id = (conn->streams_first + conn->streams_count) * 4; + return knot_quic_conn_get_stream(conn, new_id, true); +} + +static void stream_inprocess(knot_quic_conn_t *conn, knot_quic_stream_t *stream) +{ + int16_t idx = stream - conn->streams; + assert(idx >= 0); + assert(idx < conn->streams_count); + if (conn->stream_inprocess < 0 || conn->stream_inprocess > idx) { + conn->stream_inprocess = idx; + } +} + +static void stream_outprocess(knot_quic_conn_t *conn, knot_quic_stream_t *stream) +{ + if (stream != &conn->streams[conn->stream_inprocess]) { + return; + } + + for (int16_t idx = conn->stream_inprocess + 1; idx < conn->streams_count; idx++) { + stream = &conn->streams[idx]; + if (stream->inbufs != NULL) { + conn->stream_inprocess = stream - conn->streams; + return; + } + } + conn->stream_inprocess = -1; +} + +int knot_quic_stream_recv_data(knot_quic_conn_t *conn, int64_t stream_id, + const uint8_t *data, size_t len, bool fin) +{ + if (len == 0 || conn == NULL || data == NULL) { + return KNOT_EINVAL; + } + + knot_quic_stream_t *stream = knot_quic_conn_get_stream(conn, stream_id, true); + if (stream == NULL) { + return KNOT_ENOENT; + } + + struct iovec in = { (void *)data, len }; + ssize_t prev_ibufs_size = conn->ibufs_size; + int ret = knot_tcp_inbufs_upd(&stream->inbuf, in, true, + &stream->inbufs, &conn->ibufs_size); + conn->quic_table->ibufs_size += (ssize_t)conn->ibufs_size - prev_ibufs_size; + if (ret != KNOT_EOK) { + return ret; + } + + if (fin && stream->inbufs == NULL) { + return KNOT_ESEMCHECK; + } + + if (stream->inbufs != NULL) { + stream_inprocess(conn, stream); + } + return KNOT_EOK; +} + +_public_ +knot_quic_stream_t *knot_quic_stream_get_process(knot_quic_conn_t *conn, + int64_t *stream_id) +{ + if (conn == NULL || conn->stream_inprocess < 0) { + return NULL; + } + + knot_quic_stream_t *stream = &conn->streams[conn->stream_inprocess]; + *stream_id = (conn->streams_first + conn->stream_inprocess) * 4; + stream_outprocess(conn, stream); + return stream; +} + +_public_ +uint8_t *knot_quic_stream_add_data(knot_quic_conn_t *conn, int64_t stream_id, + uint8_t *data, size_t len) +{ + knot_quic_stream_t *s = knot_quic_conn_get_stream(conn, stream_id, true); + if (s == NULL) { + return NULL; + } + + size_t prefix = sizeof(uint16_t); + + knot_quic_obuf_t *obuf = malloc(sizeof(*obuf) + prefix + len); + if (obuf == NULL) { + return NULL; + } + + obuf->len = len + prefix; + knot_wire_write_u16(obuf->buf, len); + if (data != NULL) { + memcpy(obuf->buf + prefix, data, len); + } + + list_t *list = (list_t *)&s->outbufs; + if (EMPTY_LIST(*list)) { + s->unsent_obuf = obuf; + } + add_tail((list_t *)&s->outbufs, (node_t *)obuf); + s->obufs_size += obuf->len; + conn->obufs_size += obuf->len; + conn->quic_table->obufs_size += obuf->len; + + return obuf->buf + prefix; +} + +void knot_quic_stream_ack_data(knot_quic_conn_t *conn, int64_t stream_id, + size_t end_acked, bool keep_stream) +{ + knot_quic_stream_t *s = knot_quic_conn_get_stream(conn, stream_id, false); + if (s == NULL) { + return; + } + + list_t *obs = (list_t *)&s->outbufs; + + knot_quic_obuf_t *first; + while (!EMPTY_LIST(*obs) && end_acked >= (first = HEAD(*obs))->len + s->first_offset) { + rem_node((node_t *)first); + assert(HEAD(*obs) != first); // help CLANG analyzer understand what rem_node did and that further usage of HEAD(*obs) is safe + s->obufs_size -= first->len; + conn->obufs_size -= first->len; + conn->quic_table->obufs_size -= first->len; + s->first_offset += first->len; + free(first); + if (s->unsent_obuf == first) { + s->unsent_obuf = EMPTY_LIST(*obs) ? NULL : HEAD(*obs); + s->unsent_offset = 0; + } + } + + if (EMPTY_LIST(*obs) && !keep_stream) { + stream_outprocess(conn, s); + memset(s, 0, sizeof(*s)); + init_list((list_t *)&s->outbufs); + while (s = &conn->streams[0], s->inbuf.iov_len == 0 && s->inbufs == NULL && s->obufs_size == 0) { + assert(conn->streams_count > 0); + conn->streams_count--; + + if (conn->streams_count == 0) { + free(conn->streams); + conn->streams = 0; + conn->streams_first = 0; + break; + } else { + conn->streams_first++; + conn->stream_inprocess--; + memmove(s, s + 1, sizeof(*s) * conn->streams_count); + // possible realloc to shrink allocated space, but probably useless + for (knot_quic_stream_t *si = s; si < s + conn->streams_count; si++) { + if (si->obufs_size == 0) { + init_list((list_t *)&si->outbufs); + } else { + fix_list((list_t *)&si->outbufs); + } + } + } + } + } +} + +void knot_quic_stream_mark_sent(knot_quic_conn_t *conn, int64_t stream_id, + size_t amount_sent) +{ + knot_quic_stream_t *s = knot_quic_conn_get_stream(conn, stream_id, false); + if (s == NULL) { + return; + } + + s->unsent_offset += amount_sent; + assert(s->unsent_offset <= s->unsent_obuf->len); + if (s->unsent_offset == s->unsent_obuf->len) { + s->unsent_offset = 0; + s->unsent_obuf = (knot_quic_obuf_t *)s->unsent_obuf->node.next; + if (s->unsent_obuf->node.next == NULL) { // already behind the tail of list + s->unsent_obuf = NULL; + } + } +} + +_public_ +void knot_quic_cleanup(knot_quic_conn_t *conns[], size_t n_conns) +{ + for (size_t i = 0; i < n_conns; i++) { + if (conns[i] != NULL && conns[i]->conn == NULL) { + free(conns[i]); + for (size_t j = i + 1; j < n_conns; j++) { + if (conns[j] == conns[i]) { + conns[j] = NULL; + } + } + } + } +} + +bool quic_require_retry(knot_quic_table_t *table) +{ + (void)table; + return false; +} diff --git a/src/libknot/quic/quic_conn.h b/src/libknot/quic/quic_conn.h new file mode 100644 index 0000000..64ead51 --- /dev/null +++ b/src/libknot/quic/quic_conn.h @@ -0,0 +1,326 @@ +/* Copyright (C) 2023 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +/*! + * \file + * + * \brief QUIC connection management. + * + * \addtogroup quic + * @{ + */ + +#pragma once + +#include <stdbool.h> +#include <stdint.h> +#include <sys/uio.h> + +#define MAX_STREAMS_PER_CONN 10 // this limits the number of un-finished streams per conn (i.e. if response has been recvd with FIN, it doesn't count) + +struct ngtcp2_cid; // declaration taken from wherever in ngtcp2 +struct knot_quic_creds; +struct knot_quic_reply; +struct knot_sweep_stats; + +// those are equivalent to contrib/ucw/lists.h , just must not be included. +typedef struct knot_quic_ucw_node { + struct knot_quic_ucw_node *next, *prev; +} knot_quic_ucw_node_t; +typedef struct knot_quic_ucw_list { + knot_quic_ucw_node_t head, tail; +} knot_quic_ucw_list_t; + +typedef struct { + void *get_conn; + void *user_data; +} nc_conn_ref_placeholder_t; + +typedef struct { + knot_quic_ucw_node_t node; + size_t len; + uint8_t buf[]; +} knot_quic_obuf_t; + +typedef struct { + struct iovec inbuf; + struct knot_tcp_inbufs_upd_res *inbufs; + size_t firstib_consumed; + knot_quic_ucw_list_t outbufs; + size_t obufs_size; + + knot_quic_obuf_t *unsent_obuf; + size_t first_offset; + size_t unsent_offset; +} knot_quic_stream_t; + +typedef enum { + KNOT_QUIC_CONN_HANDSHAKE_DONE = (1 << 0), + KNOT_QUIC_CONN_SESSION_TAKEN = (1 << 1), +} knot_quic_conn_flag_t; + +typedef struct knot_quic_conn { + int heap_node_placeholder; // MUST be first field of the struct + uint64_t next_expiry; + + nc_conn_ref_placeholder_t conn_ref; // placeholder for internal struct ngtcp2_crypto_conn_ref + + struct ngtcp2_conn *conn; + + struct gnutls_session_int *tls_session; + + knot_quic_stream_t *streams; + int16_t streams_count; // number of allocated streams structures + int16_t stream_inprocess; // index of first stream that has complete incomming data to be processed (aka inbuf_fin) + knot_quic_conn_flag_t flags; + int qlog_fd; + int64_t streams_first; // stream_id/4 of first allocated stream + size_t ibufs_size; + size_t obufs_size; + + struct knot_quic_table *quic_table; + + struct knot_quic_conn *next; +} knot_quic_conn_t; + +typedef struct knot_quic_cid { + uint8_t cid_placeholder[32]; + knot_quic_conn_t *conn; + struct knot_quic_cid *next; +} knot_quic_cid_t; + +typedef struct knot_quic_table { + uint32_t flags; // unused yet + size_t size; + size_t usage; + size_t pointers; + size_t max_conns; + size_t ibufs_max; + size_t obufs_max; + size_t ibufs_size; + size_t obufs_size; + size_t udp_payload_limit; // for simplicity not distinguishing IPv4/6 + void (*log_cb)(const char *); + const char *qlog_dir; + uint64_t hash_secret[4]; + struct knot_quic_creds *creds; + struct heap *expiry_heap; + knot_quic_cid_t *conns[]; +} knot_quic_table_t; + +/*! + * \brief Allocate QUIC connections hash table. + * + * \param max_conns Maximum nuber of connections. + * \param max_ibufs Maximum size of buffers for fragmented incomming DNS msgs. + * \param max_obufs Maximum size of buffers for un-ACKed outgoing data. + * \param udp_payload Maximum UDP payload size (both IPv4 and 6). + * \param creds QUIC crypto context.. + * + * \return Allocated table, or NULL. + */ +knot_quic_table_t *knot_quic_table_new(size_t max_conns, size_t max_ibufs, size_t max_obufs, + size_t udp_payload, struct knot_quic_creds *creds); + +/*! + * \brief Free QUIC table including its contents. + * + * \param table Table to be freed. + */ +void knot_quic_table_free(knot_quic_table_t *table); + +/*! + * \brief Close timed out connections and some oldest ones if table full. + * + * \param table QUIC table to be cleaned up. + * \param sweep_reply Optional: reply structure to send sweep-initiated packets to the client. + * \param stats Out: sweep statistics. + */ +void knot_quic_table_sweep(knot_quic_table_t *table, struct knot_quic_reply *sweep_reply, + struct knot_sweep_stats *stats); + +/*! + * \brief Add new connection/CID link to table. + * + * \param conn QUIC connection linked. + * \param cid New CID to be added. + * \param table QUIC table to be modified. + * + * \return Pointer on the CID reference in table, or NULL. + */ +knot_quic_cid_t **quic_table_insert(knot_quic_conn_t *conn, + const struct ngtcp2_cid *cid, + knot_quic_table_t *table); + +/*! + * \brief Add new connection to the table, allocating conn struct. + * + * \param ngconn Ngtcp2 conn struct. + * \param cid CID to be linked (usually oscid for server). + * \param table QUIC table to be modified. + * + * \return Allocated (and linked) Knot conn struct, or NULL. + */ +knot_quic_conn_t *quic_table_add(struct ngtcp2_conn *ngconn, + const struct ngtcp2_cid *cid, + knot_quic_table_t *table); + +/*! + * \brief Lookup connection/CID link in table. + * + * \param cid CID to be searched for. + * \param table QUIC table. + * + * \return Pointer on the CID reference in table, or NULL. + */ +knot_quic_cid_t **quic_table_lookup2(const struct ngtcp2_cid *cid, + knot_quic_table_t *table); + +/*! + * \brief Lookup QUIC connection in table. + * + * \param cid CID to be searched for. + * \param table QUIC table. + * + * \return Connection that the CID belongs to, or NULL. + */ +knot_quic_conn_t *quic_table_lookup(const struct ngtcp2_cid *cid, + knot_quic_table_t *table); + +/*! + * \brief Re-schedule connection expiry timer. + */ +void quic_conn_mark_used(knot_quic_conn_t *conn, knot_quic_table_t *table); + +/*! + * \brief Remove connection/CID link from table. + * + * \param pcid CID to be removed. + * \param table QUIC table. + */ +void quic_table_rem2(knot_quic_cid_t **pcid, knot_quic_table_t *table); + +/*! + * \brief Remove specified stream from QUIC connection, freeing all buffers. + * + * \param conn QUIC connection to remove from. + * \param stream_id Stream QUIC ID. + */ +void knot_quic_conn_stream_free(knot_quic_conn_t *conn, int64_t stream_id); + +/*! + * \brief Remove and deinitialize connection completely. + * + * \param conn Connection to be removed. + * \param table Table to remove from. + */ +void knot_quic_table_rem(knot_quic_conn_t *conn, knot_quic_table_t *table); + +/*! + * \brief Fetch or initialize a QUIC stream. + * + * \param conn QUIC connection. + * \param stream_id Stream QUIC ID. + * \param create Trigger stream creation if not exists. + * + * \return Stream or NULL. + */ +knot_quic_stream_t *knot_quic_conn_get_stream(knot_quic_conn_t *conn, + int64_t stream_id, bool create); + +/*! + * \brief Create a new, subsequent stream. + */ +knot_quic_stream_t *knot_quic_conn_new_stream(knot_quic_conn_t *conn); + +/*! + * \brief Process incomming stream data to stream structure. + * + * \param conn QUIC connection that has received data. + * \param stream_id Stream QUIC ID of the incomming data. + * \param data Incomming payload data. + * \param len Incomming payload data length. + * \param fin FIN flag set for incomming data. + * + * \return KNOT_E* + */ +int knot_quic_stream_recv_data(knot_quic_conn_t *conn, int64_t stream_id, + const uint8_t *data, size_t len, bool fin); + +/*! + * \brief Get next stream which has pending incomming data to be processed. + * + * \param conn QUIC connection. + * \param stream_id Out: strem QUIC ID of the returned stream. + * + * \return Stream with incomming data. + */ +knot_quic_stream_t *knot_quic_stream_get_process(knot_quic_conn_t *conn, + int64_t *stream_id); + +/*! + * \brief Add outgiong data to the stream for sending. + * + * \param conn QUIC connection that shall send data. + * \param stream_id Stream ID for outgoing data. + * \param data Data payload. + * \param len Data payload length. + * + * \return NULL if error, or pinter at the data in outgiong buffer. + */ +uint8_t *knot_quic_stream_add_data(knot_quic_conn_t *conn, int64_t stream_id, + uint8_t *data, size_t len); + +/*! + * \brief Mark outgiong data as acknowledged after ACK received. + * + * \param conn QUIC connection that received ACK. + * \param stream_id Stream ID of ACKed data. + * \param end_acked Offset of ACKed data + ACKed length. + * \param keep_stream Don't free the stream even when ACKed all outgoing data. + */ +void knot_quic_stream_ack_data(knot_quic_conn_t *conn, int64_t stream_id, + size_t end_acked, bool keep_stream); + +/*! + * \brief Mark outgoing data as sent. + * + * \param conn QUIC connection that sent data. + * \param stream_id Stream ID of sent data. + * \param amount_sent Length of sent data. + */ +void knot_quic_stream_mark_sent(knot_quic_conn_t *conn, int64_t stream_id, + size_t amount_sent); + +/*! + * \brief Free rest of resources of closed conns. + * + * \param conns Array with recently used conns (possibly NULLs). + * \param n_conns Size of the array. + */ +void knot_quic_cleanup(knot_quic_conn_t *conns[], size_t n_conns); + +/*! + * \brief Toggle sending Retry packet as a reaction to Initial packet of new connection. + * + * \param table Connection table. + * + * \return True if instead of continuing handshake, Retry packet shall be sent + * to verify counterpart's address. + */ +bool quic_require_retry(knot_quic_table_t *table); + +/*! @} */ |