summaryrefslogtreecommitdiffstats
path: root/lib/isc/netmgr
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 15:59:48 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 15:59:48 +0000
commit3b9b6d0b8e7f798023c9d109c490449d528fde80 (patch)
tree2e1c188dd7b8d7475cd163de9ae02c428343669b /lib/isc/netmgr
parentInitial commit. (diff)
downloadbind9-3b9b6d0b8e7f798023c9d109c490449d528fde80.tar.xz
bind9-3b9b6d0b8e7f798023c9d109c490449d528fde80.zip
Adding upstream version 1:9.18.19.upstream/1%9.18.19upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'lib/isc/netmgr')
-rw-r--r--lib/isc/netmgr/http.c3755
-rw-r--r--lib/isc/netmgr/netmgr-int.h2273
-rw-r--r--lib/isc/netmgr/netmgr.c3991
-rw-r--r--lib/isc/netmgr/tcp.c1456
-rw-r--r--lib/isc/netmgr/tcpdns.c1500
-rw-r--r--lib/isc/netmgr/timer.c120
-rw-r--r--lib/isc/netmgr/tlsdns.c2363
-rw-r--r--lib/isc/netmgr/tlsstream.c1348
-rw-r--r--lib/isc/netmgr/udp.c1405
-rw-r--r--lib/isc/netmgr/uv-compat.c140
-rw-r--r--lib/isc/netmgr/uv-compat.h126
-rw-r--r--lib/isc/netmgr/uverr2result.c105
12 files changed, 18582 insertions, 0 deletions
diff --git a/lib/isc/netmgr/http.c b/lib/isc/netmgr/http.c
new file mode 100644
index 0000000..f2d3e2d
--- /dev/null
+++ b/lib/isc/netmgr/http.c
@@ -0,0 +1,3755 @@
+/*
+ * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
+ *
+ * SPDX-License-Identifier: MPL-2.0
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, you can obtain one at https://mozilla.org/MPL/2.0/.
+ *
+ * See the COPYRIGHT file distributed with this work for additional
+ * information regarding copyright ownership.
+ */
+
+#include <ctype.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <nghttp2/nghttp2.h>
+#include <signal.h>
+#include <string.h>
+
+#include <isc/base64.h>
+#include <isc/log.h>
+#include <isc/netmgr.h>
+#include <isc/print.h>
+#include <isc/sockaddr.h>
+#include <isc/tls.h>
+#include <isc/url.h>
+#include <isc/util.h>
+
+#include "netmgr-int.h"
+
+#define AUTHEXTRA 7
+
+#define MAX_DNS_MESSAGE_SIZE (UINT16_MAX)
+
+#define DNS_MEDIA_TYPE "application/dns-message"
+
+/*
+ * See https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Cache-Control
+ * for additional details. Basically it means "avoid caching by any
+ * means."
+ */
+#define DEFAULT_CACHE_CONTROL "no-cache, no-store, must-revalidate"
+
+/*
+ * If server during request processing surpasses any of the limits
+ * below, it will just reset the stream without returning any error
+ * codes in a response. Ideally, these parameters should be
+ * configurable both globally and per every HTTP endpoint description
+ * in the configuration file, but for now it should be enough.
+ */
+
+/*
+ * 128K should be enough to encode 64K of data into base64url inside GET
+ * request and have extra space for other headers
+ */
+#define MAX_ALLOWED_DATA_IN_HEADERS (MAX_DNS_MESSAGE_SIZE * 2)
+
+#define MAX_ALLOWED_DATA_IN_POST \
+ (MAX_DNS_MESSAGE_SIZE + MAX_DNS_MESSAGE_SIZE / 2)
+
+#define HEADER_MATCH(header, name, namelen) \
+ (((namelen) == sizeof(header) - 1) && \
+ (strncasecmp((header), (const char *)(name), (namelen)) == 0))
+
+#define MIN_SUCCESSFUL_HTTP_STATUS (200)
+#define MAX_SUCCESSFUL_HTTP_STATUS (299)
+
+/* This definition sets the upper limit of pending write buffer to an
+ * adequate enough value. That is done mostly to fight a limitation
+ * for a max TLS record size in flamethrower (2K). In a perfect world
+ * this constant should not be required, if we ever move closer to
+ * that state, the constant, and corresponding code, should be
+ * removed. For now the limit seems adequate enough to fight
+ * "tinygrams" problem. */
+#define FLUSH_HTTP_WRITE_BUFFER_AFTER (1536)
+
+/* This switch is here mostly to test the code interoperability with
+ * buggy implementations */
+#define ENABLE_HTTP_WRITE_BUFFERING 1
+
+#define SUCCESSFUL_HTTP_STATUS(code) \
+ ((code) >= MIN_SUCCESSFUL_HTTP_STATUS && \
+ (code) <= MAX_SUCCESSFUL_HTTP_STATUS)
+
+#define INITIAL_DNS_MESSAGE_BUFFER_SIZE (512)
+
+typedef struct isc_nm_http_response_status {
+ size_t code;
+ size_t content_length;
+ bool content_type_valid;
+} isc_nm_http_response_status_t;
+
+typedef struct http_cstream {
+ isc_nm_recv_cb_t read_cb;
+ void *read_cbarg;
+ isc_nm_cb_t connect_cb;
+ void *connect_cbarg;
+
+ bool sending;
+ bool reading;
+
+ char *uri;
+ isc_url_parser_t up;
+
+ char *authority;
+ size_t authoritylen;
+ char *path;
+
+ isc_buffer_t *rbuf;
+
+ size_t pathlen;
+ int32_t stream_id;
+
+ bool post; /* POST or GET */
+ isc_buffer_t *postdata;
+ char *GET_path;
+ size_t GET_path_len;
+
+ isc_nm_http_response_status_t response_status;
+ isc_nmsocket_t *httpsock;
+ LINK(struct http_cstream) link;
+} http_cstream_t;
+
+#define HTTP2_SESSION_MAGIC ISC_MAGIC('H', '2', 'S', 'S')
+#define VALID_HTTP2_SESSION(t) ISC_MAGIC_VALID(t, HTTP2_SESSION_MAGIC)
+
+typedef ISC_LIST(isc__nm_uvreq_t) isc__nm_http_pending_callbacks_t;
+
+struct isc_nm_http_session {
+ unsigned int magic;
+ isc_refcount_t references;
+ isc_mem_t *mctx;
+
+ size_t sending;
+ bool reading;
+ bool closed;
+ bool closing;
+
+ nghttp2_session *ngsession;
+ bool client;
+
+ ISC_LIST(http_cstream_t) cstreams;
+ ISC_LIST(isc_nmsocket_h2_t) sstreams;
+ size_t nsstreams;
+
+ isc_nmhandle_t *handle;
+ isc_nmhandle_t *client_httphandle;
+ isc_nmsocket_t *serversocket;
+
+ isc_buffer_t *buf;
+
+ isc_tlsctx_t *tlsctx;
+ uint32_t max_concurrent_streams;
+
+ isc__nm_http_pending_callbacks_t pending_write_callbacks;
+ isc_buffer_t *pending_write_data;
+};
+
+typedef enum isc_http_error_responses {
+ ISC_HTTP_ERROR_SUCCESS, /* 200 */
+ ISC_HTTP_ERROR_NOT_FOUND, /* 404 */
+ ISC_HTTP_ERROR_PAYLOAD_TOO_LARGE, /* 413 */
+ ISC_HTTP_ERROR_URI_TOO_LONG, /* 414 */
+ ISC_HTTP_ERROR_UNSUPPORTED_MEDIA_TYPE, /* 415 */
+ ISC_HTTP_ERROR_BAD_REQUEST, /* 400 */
+ ISC_HTTP_ERROR_NOT_IMPLEMENTED, /* 501 */
+ ISC_HTTP_ERROR_GENERIC, /* 500 Internal Server Error */
+ ISC_HTTP_ERROR_MAX
+} isc_http_error_responses_t;
+
+typedef struct isc_http_send_req {
+ isc_nm_http_session_t *session;
+ isc_nmhandle_t *transphandle;
+ isc_nmhandle_t *httphandle;
+ isc_nm_cb_t cb;
+ void *cbarg;
+ isc_buffer_t *pending_write_data;
+ isc__nm_http_pending_callbacks_t pending_write_callbacks;
+} isc_http_send_req_t;
+
+#define HTTP_ENDPOINTS_MAGIC ISC_MAGIC('H', 'T', 'E', 'P')
+#define VALID_HTTP_ENDPOINTS(t) ISC_MAGIC_VALID(t, HTTP_ENDPOINTS_MAGIC)
+
+static bool
+http_send_outgoing(isc_nm_http_session_t *session, isc_nmhandle_t *httphandle,
+ isc_nm_cb_t cb, void *cbarg);
+
+static void
+http_do_bio(isc_nm_http_session_t *session, isc_nmhandle_t *send_httphandle,
+ isc_nm_cb_t send_cb, void *send_cbarg);
+
+static void
+failed_httpstream_read_cb(isc_nmsocket_t *sock, isc_result_t result,
+ isc_nm_http_session_t *session);
+
+static void
+client_call_failed_read_cb(isc_result_t result, isc_nm_http_session_t *session);
+
+static void
+server_call_failed_read_cb(isc_result_t result, isc_nm_http_session_t *session);
+
+static void
+failed_read_cb(isc_result_t result, isc_nm_http_session_t *session);
+
+static isc_result_t
+server_send_error_response(const isc_http_error_responses_t error,
+ nghttp2_session *ngsession, isc_nmsocket_t *socket);
+
+static isc_result_t
+client_send(isc_nmhandle_t *handle, const isc_region_t *region);
+
+static void
+finish_http_session(isc_nm_http_session_t *session);
+
+static void
+http_transpost_tcp_nodelay(isc_nmhandle_t *transphandle);
+
+static void
+call_pending_callbacks(isc__nm_http_pending_callbacks_t pending_callbacks,
+ isc_result_t result);
+
+static void
+server_call_cb(isc_nmsocket_t *socket, isc_nm_http_session_t *session,
+ const isc_result_t result, isc_region_t *data);
+
+static isc_nm_httphandler_t *
+http_endpoints_find(const char *request_path,
+ const isc_nm_http_endpoints_t *restrict eps);
+
+static void
+http_init_listener_endpoints(isc_nmsocket_t *listener,
+ isc_nm_http_endpoints_t *epset);
+
+static void
+http_cleanup_listener_endpoints(isc_nmsocket_t *listener);
+
+static isc_nm_http_endpoints_t *
+http_get_listener_endpoints(isc_nmsocket_t *listener, const int tid);
+
+static bool
+http_session_active(isc_nm_http_session_t *session) {
+ REQUIRE(VALID_HTTP2_SESSION(session));
+ return (!session->closed && !session->closing);
+}
+
+static void *
+http_malloc(size_t sz, isc_mem_t *mctx) {
+ return (isc_mem_allocate(mctx, sz));
+}
+
+static void *
+http_calloc(size_t n, size_t sz, isc_mem_t *mctx) {
+ const size_t msize = n * sz;
+ void *data = isc_mem_allocate(mctx, msize);
+
+ memset(data, 0, msize);
+ return (data);
+}
+
+static void *
+http_realloc(void *p, size_t newsz, isc_mem_t *mctx) {
+ return (isc_mem_reallocate(mctx, p, newsz));
+}
+
+static void
+http_free(void *p, isc_mem_t *mctx) {
+ if (p == NULL) { /* as standard free() behaves */
+ return;
+ }
+ isc_mem_free(mctx, p);
+}
+
+static void
+init_nghttp2_mem(isc_mem_t *mctx, nghttp2_mem *mem) {
+ *mem = (nghttp2_mem){ .malloc = (nghttp2_malloc)http_malloc,
+ .calloc = (nghttp2_calloc)http_calloc,
+ .realloc = (nghttp2_realloc)http_realloc,
+ .free = (nghttp2_free)http_free,
+ .mem_user_data = mctx };
+}
+
+static void
+new_session(isc_mem_t *mctx, isc_tlsctx_t *tctx,
+ isc_nm_http_session_t **sessionp) {
+ isc_nm_http_session_t *session = NULL;
+
+ REQUIRE(sessionp != NULL && *sessionp == NULL);
+ REQUIRE(mctx != NULL);
+
+ session = isc_mem_get(mctx, sizeof(isc_nm_http_session_t));
+ *session = (isc_nm_http_session_t){ .magic = HTTP2_SESSION_MAGIC,
+ .tlsctx = tctx };
+ isc_refcount_init(&session->references, 1);
+ isc_mem_attach(mctx, &session->mctx);
+ ISC_LIST_INIT(session->cstreams);
+ ISC_LIST_INIT(session->sstreams);
+ ISC_LIST_INIT(session->pending_write_callbacks);
+
+ *sessionp = session;
+}
+
+void
+isc__nm_httpsession_attach(isc_nm_http_session_t *source,
+ isc_nm_http_session_t **targetp) {
+ REQUIRE(VALID_HTTP2_SESSION(source));
+ REQUIRE(targetp != NULL && *targetp == NULL);
+
+ isc_refcount_increment(&source->references);
+
+ *targetp = source;
+}
+
+void
+isc__nm_httpsession_detach(isc_nm_http_session_t **sessionp) {
+ isc_nm_http_session_t *session = NULL;
+
+ REQUIRE(sessionp != NULL);
+
+ session = *sessionp;
+ *sessionp = NULL;
+
+ REQUIRE(VALID_HTTP2_SESSION(session));
+
+ if (isc_refcount_decrement(&session->references) > 1) {
+ return;
+ }
+
+ finish_http_session(session);
+
+ INSIST(ISC_LIST_EMPTY(session->sstreams));
+ INSIST(ISC_LIST_EMPTY(session->cstreams));
+
+ if (session->ngsession != NULL) {
+ nghttp2_session_del(session->ngsession);
+ session->ngsession = NULL;
+ }
+
+ if (session->buf != NULL) {
+ isc_buffer_free(&session->buf);
+ }
+
+ /* We need an acquire memory barrier here */
+ (void)isc_refcount_current(&session->references);
+
+ session->magic = 0;
+ isc_mem_putanddetach(&session->mctx, session,
+ sizeof(isc_nm_http_session_t));
+}
+
+static http_cstream_t *
+find_http_cstream(int32_t stream_id, isc_nm_http_session_t *session) {
+ http_cstream_t *cstream = NULL;
+ REQUIRE(VALID_HTTP2_SESSION(session));
+
+ if (ISC_LIST_EMPTY(session->cstreams)) {
+ return (NULL);
+ }
+
+ for (cstream = ISC_LIST_HEAD(session->cstreams); cstream != NULL;
+ cstream = ISC_LIST_NEXT(cstream, link))
+ {
+ if (cstream->stream_id == stream_id) {
+ break;
+ }
+ }
+
+ /* LRU-like behaviour */
+ if (cstream && ISC_LIST_HEAD(session->cstreams) != cstream) {
+ ISC_LIST_UNLINK(session->cstreams, cstream, link);
+ ISC_LIST_PREPEND(session->cstreams, cstream, link);
+ }
+
+ return (cstream);
+}
+
+static isc_result_t
+new_http_cstream(isc_nmsocket_t *sock, http_cstream_t **streamp) {
+ isc_mem_t *mctx = sock->mgr->mctx;
+ const char *uri = NULL;
+ bool post;
+ http_cstream_t *stream = NULL;
+ isc_result_t result;
+
+ uri = sock->h2.session->handle->sock->h2.connect.uri;
+ post = sock->h2.session->handle->sock->h2.connect.post;
+
+ stream = isc_mem_get(mctx, sizeof(http_cstream_t));
+ *stream = (http_cstream_t){ .stream_id = -1,
+ .post = post,
+ .uri = isc_mem_strdup(mctx, uri) };
+ ISC_LINK_INIT(stream, link);
+
+ result = isc_url_parse(stream->uri, strlen(stream->uri), 0,
+ &stream->up);
+ if (result != ISC_R_SUCCESS) {
+ isc_mem_free(mctx, stream->uri);
+ isc_mem_put(mctx, stream, sizeof(http_cstream_t));
+ return (result);
+ }
+
+ isc__nmsocket_attach(sock, &stream->httpsock);
+ stream->authoritylen = stream->up.field_data[ISC_UF_HOST].len;
+ stream->authority = isc_mem_get(mctx, stream->authoritylen + AUTHEXTRA);
+ memmove(stream->authority, &uri[stream->up.field_data[ISC_UF_HOST].off],
+ stream->up.field_data[ISC_UF_HOST].len);
+
+ if (stream->up.field_set & (1 << ISC_UF_PORT)) {
+ stream->authoritylen += (size_t)snprintf(
+ stream->authority +
+ stream->up.field_data[ISC_UF_HOST].len,
+ AUTHEXTRA, ":%u", stream->up.port);
+ }
+
+ /* If we don't have path in URI, we use "/" as path. */
+ stream->pathlen = 1;
+ if (stream->up.field_set & (1 << ISC_UF_PATH)) {
+ stream->pathlen = stream->up.field_data[ISC_UF_PATH].len;
+ }
+ if (stream->up.field_set & (1 << ISC_UF_QUERY)) {
+ /* +1 for '?' character */
+ stream->pathlen +=
+ (size_t)(stream->up.field_data[ISC_UF_QUERY].len + 1);
+ }
+
+ stream->path = isc_mem_get(mctx, stream->pathlen);
+ if (stream->up.field_set & (1 << ISC_UF_PATH)) {
+ memmove(stream->path,
+ &uri[stream->up.field_data[ISC_UF_PATH].off],
+ stream->up.field_data[ISC_UF_PATH].len);
+ } else {
+ stream->path[0] = '/';
+ }
+
+ if (stream->up.field_set & (1 << ISC_UF_QUERY)) {
+ stream->path[stream->pathlen -
+ stream->up.field_data[ISC_UF_QUERY].len - 1] = '?';
+ memmove(stream->path + stream->pathlen -
+ stream->up.field_data[ISC_UF_QUERY].len,
+ &uri[stream->up.field_data[ISC_UF_QUERY].off],
+ stream->up.field_data[ISC_UF_QUERY].len);
+ }
+
+ isc_buffer_allocate(mctx, &stream->rbuf,
+ INITIAL_DNS_MESSAGE_BUFFER_SIZE);
+ isc_buffer_setautorealloc(stream->rbuf, true);
+
+ ISC_LIST_PREPEND(sock->h2.session->cstreams, stream, link);
+ *streamp = stream;
+
+ return (ISC_R_SUCCESS);
+}
+
+static void
+put_http_cstream(isc_mem_t *mctx, http_cstream_t *stream) {
+ isc_mem_put(mctx, stream->path, stream->pathlen);
+ isc_mem_put(mctx, stream->authority,
+ stream->up.field_data[ISC_UF_HOST].len + AUTHEXTRA);
+ isc_mem_free(mctx, stream->uri);
+ if (stream->GET_path != NULL) {
+ isc_mem_free(mctx, stream->GET_path);
+ stream->GET_path = NULL;
+ stream->GET_path_len = 0;
+ }
+
+ if (stream->postdata != NULL) {
+ INSIST(stream->post);
+ isc_buffer_free(&stream->postdata);
+ }
+
+ if (stream == stream->httpsock->h2.connect.cstream) {
+ stream->httpsock->h2.connect.cstream = NULL;
+ }
+ if (ISC_LINK_LINKED(stream, link)) {
+ ISC_LIST_UNLINK(stream->httpsock->h2.session->cstreams, stream,
+ link);
+ }
+ isc__nmsocket_detach(&stream->httpsock);
+
+ isc_buffer_free(&stream->rbuf);
+ isc_mem_put(mctx, stream, sizeof(http_cstream_t));
+}
+
+static void
+finish_http_session(isc_nm_http_session_t *session) {
+ if (session->closed) {
+ return;
+ }
+
+ if (session->handle != NULL) {
+ if (!session->closed) {
+ session->closed = true;
+ isc_nm_cancelread(session->handle);
+ }
+
+ if (session->client) {
+ client_call_failed_read_cb(ISC_R_UNEXPECTED, session);
+ } else {
+ server_call_failed_read_cb(ISC_R_UNEXPECTED, session);
+ }
+
+ call_pending_callbacks(session->pending_write_callbacks,
+ ISC_R_UNEXPECTED);
+ ISC_LIST_INIT(session->pending_write_callbacks);
+
+ if (session->pending_write_data != NULL) {
+ isc_buffer_free(&session->pending_write_data);
+ }
+
+ isc_nmhandle_detach(&session->handle);
+ }
+
+ if (session->client_httphandle != NULL) {
+ isc_nmhandle_detach(&session->client_httphandle);
+ }
+
+ INSIST(ISC_LIST_EMPTY(session->cstreams));
+
+ /* detach from server socket */
+ if (session->serversocket != NULL) {
+ isc__nmsocket_detach(&session->serversocket);
+ }
+ session->closed = true;
+}
+
+static int
+on_client_data_chunk_recv_callback(int32_t stream_id, const uint8_t *data,
+ size_t len, isc_nm_http_session_t *session) {
+ http_cstream_t *cstream = find_http_cstream(stream_id, session);
+
+ if (cstream != NULL) {
+ size_t new_rbufsize = len;
+ INSIST(cstream->rbuf != NULL);
+ new_rbufsize += isc_buffer_usedlength(cstream->rbuf);
+ if (new_rbufsize <= MAX_DNS_MESSAGE_SIZE &&
+ new_rbufsize <= cstream->response_status.content_length)
+ {
+ isc_buffer_putmem(cstream->rbuf, data, len);
+ } else {
+ return (NGHTTP2_ERR_TEMPORAL_CALLBACK_FAILURE);
+ }
+ } else {
+ return (NGHTTP2_ERR_CALLBACK_FAILURE);
+ }
+
+ return (0);
+}
+
+static int
+on_server_data_chunk_recv_callback(int32_t stream_id, const uint8_t *data,
+ size_t len, isc_nm_http_session_t *session) {
+ isc_nmsocket_h2_t *h2 = ISC_LIST_HEAD(session->sstreams);
+ while (h2 != NULL) {
+ if (stream_id == h2->stream_id) {
+ if (isc_buffer_base(&h2->rbuf) == NULL) {
+ isc_buffer_init(
+ &h2->rbuf,
+ isc_mem_allocate(session->mctx,
+ h2->content_length),
+ MAX_DNS_MESSAGE_SIZE);
+ }
+ size_t new_bufsize = isc_buffer_usedlength(&h2->rbuf) +
+ len;
+ if (new_bufsize <= MAX_DNS_MESSAGE_SIZE &&
+ new_bufsize <= h2->content_length)
+ {
+ isc_buffer_putmem(&h2->rbuf, data, len);
+ break;
+ }
+
+ return (NGHTTP2_ERR_TEMPORAL_CALLBACK_FAILURE);
+ }
+ h2 = ISC_LIST_NEXT(h2, link);
+ }
+ if (h2 == NULL) {
+ return (NGHTTP2_ERR_CALLBACK_FAILURE);
+ }
+
+ return (0);
+}
+
+static int
+on_data_chunk_recv_callback(nghttp2_session *ngsession, uint8_t flags,
+ int32_t stream_id, const uint8_t *data, size_t len,
+ void *user_data) {
+ isc_nm_http_session_t *session = (isc_nm_http_session_t *)user_data;
+ int rv;
+
+ UNUSED(ngsession);
+ UNUSED(flags);
+
+ if (session->client) {
+ rv = on_client_data_chunk_recv_callback(stream_id, data, len,
+ session);
+ } else {
+ rv = on_server_data_chunk_recv_callback(stream_id, data, len,
+ session);
+ }
+
+ return (rv);
+}
+
+static void
+call_unlink_cstream_readcb(http_cstream_t *cstream,
+ isc_nm_http_session_t *session,
+ isc_result_t result) {
+ isc_region_t read_data;
+ REQUIRE(VALID_HTTP2_SESSION(session));
+ REQUIRE(cstream != NULL);
+ ISC_LIST_UNLINK(session->cstreams, cstream, link);
+ INSIST(VALID_NMHANDLE(session->client_httphandle));
+ isc_buffer_usedregion(cstream->rbuf, &read_data);
+ cstream->read_cb(session->client_httphandle, result, &read_data,
+ cstream->read_cbarg);
+ put_http_cstream(session->mctx, cstream);
+}
+
+static int
+on_client_stream_close_callback(int32_t stream_id,
+ isc_nm_http_session_t *session) {
+ http_cstream_t *cstream = find_http_cstream(stream_id, session);
+
+ if (cstream != NULL) {
+ isc_result_t result =
+ SUCCESSFUL_HTTP_STATUS(cstream->response_status.code)
+ ? ISC_R_SUCCESS
+ : ISC_R_FAILURE;
+ call_unlink_cstream_readcb(cstream, session, result);
+ if (ISC_LIST_EMPTY(session->cstreams)) {
+ int rv = 0;
+ rv = nghttp2_session_terminate_session(
+ session->ngsession, NGHTTP2_NO_ERROR);
+ if (rv != 0) {
+ return (rv);
+ }
+ /* Mark the session as closing one to finish it on a
+ * subsequent call to http_do_bio() */
+ session->closing = true;
+ }
+ } else {
+ return (NGHTTP2_ERR_CALLBACK_FAILURE);
+ }
+
+ return (0);
+}
+
+static int
+on_server_stream_close_callback(int32_t stream_id,
+ isc_nm_http_session_t *session) {
+ isc_nmsocket_t *sock = nghttp2_session_get_stream_user_data(
+ session->ngsession, stream_id);
+ int rv = 0;
+
+ ISC_LIST_UNLINK(session->sstreams, &sock->h2, link);
+ session->nsstreams--;
+
+ /*
+ * By making a call to isc__nmsocket_prep_destroy(), we ensure that
+ * the socket gets marked as inactive, allowing the HTTP/2 data
+ * associated with it to be properly disposed of eventually.
+ *
+ * An HTTP/2 stream socket will normally be marked as inactive in
+ * the normal course of operation. However, when browsers terminate
+ * HTTP/2 streams prematurely (e.g. by sending RST_STREAM),
+ * corresponding sockets can remain marked as active, retaining
+ * references to the HTTP/2 data (most notably the session objects),
+ * preventing them from being correctly freed and leading to BIND
+ * hanging on shutdown. Calling isc__nmsocket_prep_destroy()
+ * ensures that this will not happen.
+ */
+ isc__nmsocket_prep_destroy(sock);
+ isc__nmsocket_detach(&sock);
+ return (rv);
+}
+
+static int
+on_stream_close_callback(nghttp2_session *ngsession, int32_t stream_id,
+ uint32_t error_code, void *user_data) {
+ isc_nm_http_session_t *session = (isc_nm_http_session_t *)user_data;
+ int rv = 0;
+
+ REQUIRE(VALID_HTTP2_SESSION(session));
+ REQUIRE(session->ngsession == ngsession);
+
+ UNUSED(error_code);
+
+ if (session->client) {
+ rv = on_client_stream_close_callback(stream_id, session);
+ } else {
+ rv = on_server_stream_close_callback(stream_id, session);
+ }
+
+ return (rv);
+}
+
+static bool
+client_handle_status_header(http_cstream_t *cstream, const uint8_t *value,
+ const size_t valuelen) {
+ char tmp[32] = { 0 };
+ const size_t tmplen = sizeof(tmp) - 1;
+
+ strncpy(tmp, (const char *)value, ISC_MIN(tmplen, valuelen));
+ cstream->response_status.code = strtoul(tmp, NULL, 10);
+
+ if (SUCCESSFUL_HTTP_STATUS(cstream->response_status.code)) {
+ return (true);
+ }
+
+ return (false);
+}
+
+static bool
+client_handle_content_length_header(http_cstream_t *cstream,
+ const uint8_t *value,
+ const size_t valuelen) {
+ char tmp[32] = { 0 };
+ const size_t tmplen = sizeof(tmp) - 1;
+
+ strncpy(tmp, (const char *)value, ISC_MIN(tmplen, valuelen));
+ cstream->response_status.content_length = strtoul(tmp, NULL, 10);
+
+ if (cstream->response_status.content_length == 0 ||
+ cstream->response_status.content_length > MAX_DNS_MESSAGE_SIZE)
+ {
+ return (false);
+ }
+
+ return (true);
+}
+
+static bool
+client_handle_content_type_header(http_cstream_t *cstream, const uint8_t *value,
+ const size_t valuelen) {
+ const char type_dns_message[] = DNS_MEDIA_TYPE;
+ const size_t len = sizeof(type_dns_message) - 1;
+
+ UNUSED(valuelen);
+
+ if (strncasecmp((const char *)value, type_dns_message, len) == 0) {
+ cstream->response_status.content_type_valid = true;
+ return (true);
+ }
+
+ return (false);
+}
+
+static int
+client_on_header_callback(nghttp2_session *ngsession,
+ const nghttp2_frame *frame, const uint8_t *name,
+ size_t namelen, const uint8_t *value, size_t valuelen,
+ uint8_t flags, void *user_data) {
+ isc_nm_http_session_t *session = (isc_nm_http_session_t *)user_data;
+ http_cstream_t *cstream = NULL;
+ const char status[] = ":status";
+ const char content_length[] = "Content-Length";
+ const char content_type[] = "Content-Type";
+ bool header_ok = true;
+
+ REQUIRE(VALID_HTTP2_SESSION(session));
+ REQUIRE(session->client);
+
+ UNUSED(flags);
+ UNUSED(ngsession);
+
+ cstream = find_http_cstream(frame->hd.stream_id, session);
+ if (cstream == NULL) {
+ /*
+ * This could happen in two cases:
+ * - the server sent us bad data, or
+ * - we closed the session prematurely before receiving all
+ * responses (i.e., because of a belated or partial response).
+ */
+ return (NGHTTP2_ERR_CALLBACK_FAILURE);
+ }
+
+ INSIST(!ISC_LIST_EMPTY(session->cstreams));
+
+ switch (frame->hd.type) {
+ case NGHTTP2_HEADERS:
+ if (frame->headers.cat != NGHTTP2_HCAT_RESPONSE) {
+ break;
+ }
+
+ if (HEADER_MATCH(status, name, namelen)) {
+ header_ok = client_handle_status_header(cstream, value,
+ valuelen);
+ } else if (HEADER_MATCH(content_length, name, namelen)) {
+ header_ok = client_handle_content_length_header(
+ cstream, value, valuelen);
+ } else if (HEADER_MATCH(content_type, name, namelen)) {
+ header_ok = client_handle_content_type_header(
+ cstream, value, valuelen);
+ }
+ break;
+ }
+
+ if (!header_ok) {
+ return (NGHTTP2_ERR_TEMPORAL_CALLBACK_FAILURE);
+ }
+
+ return (0);
+}
+
+static void
+initialize_nghttp2_client_session(isc_nm_http_session_t *session) {
+ nghttp2_session_callbacks *callbacks = NULL;
+ nghttp2_option *option = NULL;
+ nghttp2_mem mem;
+
+ init_nghttp2_mem(session->mctx, &mem);
+ RUNTIME_CHECK(nghttp2_session_callbacks_new(&callbacks) == 0);
+ RUNTIME_CHECK(nghttp2_option_new(&option) == 0);
+
+#if NGHTTP2_VERSION_NUM >= (0x010c00)
+ nghttp2_option_set_max_send_header_block_length(
+ option, MAX_ALLOWED_DATA_IN_HEADERS);
+#endif
+
+ nghttp2_session_callbacks_set_on_data_chunk_recv_callback(
+ callbacks, on_data_chunk_recv_callback);
+
+ nghttp2_session_callbacks_set_on_stream_close_callback(
+ callbacks, on_stream_close_callback);
+
+ nghttp2_session_callbacks_set_on_header_callback(
+ callbacks, client_on_header_callback);
+
+ RUNTIME_CHECK(nghttp2_session_client_new3(&session->ngsession,
+ callbacks, session, option,
+ &mem) == 0);
+
+ nghttp2_option_del(option);
+ nghttp2_session_callbacks_del(callbacks);
+}
+
+static bool
+send_client_connection_header(isc_nm_http_session_t *session) {
+ nghttp2_settings_entry iv[] = { { NGHTTP2_SETTINGS_ENABLE_PUSH, 0 } };
+ int rv;
+
+ rv = nghttp2_submit_settings(session->ngsession, NGHTTP2_FLAG_NONE, iv,
+ sizeof(iv) / sizeof(iv[0]));
+ if (rv != 0) {
+ return (false);
+ }
+
+ return (true);
+}
+
+#define MAKE_NV(NAME, VALUE, VALUELEN) \
+ { \
+ (uint8_t *)(uintptr_t)(NAME), (uint8_t *)(uintptr_t)(VALUE), \
+ sizeof(NAME) - 1, VALUELEN, NGHTTP2_NV_FLAG_NONE \
+ }
+
+#define MAKE_NV2(NAME, VALUE) \
+ { \
+ (uint8_t *)(uintptr_t)(NAME), (uint8_t *)(uintptr_t)(VALUE), \
+ sizeof(NAME) - 1, sizeof(VALUE) - 1, \
+ NGHTTP2_NV_FLAG_NONE \
+ }
+
+static ssize_t
+client_read_callback(nghttp2_session *ngsession, int32_t stream_id,
+ uint8_t *buf, size_t length, uint32_t *data_flags,
+ nghttp2_data_source *source, void *user_data) {
+ isc_nm_http_session_t *session = (isc_nm_http_session_t *)user_data;
+ http_cstream_t *cstream = NULL;
+
+ REQUIRE(session->client);
+ REQUIRE(!ISC_LIST_EMPTY(session->cstreams));
+
+ UNUSED(ngsession);
+ UNUSED(source);
+
+ cstream = find_http_cstream(stream_id, session);
+ if (!cstream || cstream->stream_id != stream_id) {
+ /* We haven't found the stream, so we are not reading */
+ return (NGHTTP2_ERR_CALLBACK_FAILURE);
+ }
+
+ if (cstream->post) {
+ size_t len = isc_buffer_remaininglength(cstream->postdata);
+
+ if (len > length) {
+ len = length;
+ }
+
+ if (len > 0) {
+ memmove(buf, isc_buffer_current(cstream->postdata),
+ len);
+ isc_buffer_forward(cstream->postdata, len);
+ }
+
+ if (isc_buffer_remaininglength(cstream->postdata) == 0) {
+ *data_flags |= NGHTTP2_DATA_FLAG_EOF;
+ }
+
+ return (len);
+ } else {
+ *data_flags |= NGHTTP2_DATA_FLAG_EOF;
+ return (0);
+ }
+
+ return (0);
+}
+
+/*
+ * Send HTTP request to the remote peer.
+ */
+static isc_result_t
+client_submit_request(isc_nm_http_session_t *session, http_cstream_t *stream) {
+ int32_t stream_id;
+ char *uri = stream->uri;
+ isc_url_parser_t *up = &stream->up;
+ nghttp2_data_provider dp;
+
+ if (stream->post) {
+ char p[64];
+ snprintf(p, sizeof(p), "%u",
+ isc_buffer_usedlength(stream->postdata));
+ nghttp2_nv hdrs[] = {
+ MAKE_NV2(":method", "POST"),
+ MAKE_NV(":scheme",
+ &uri[up->field_data[ISC_UF_SCHEMA].off],
+ up->field_data[ISC_UF_SCHEMA].len),
+ MAKE_NV(":authority", stream->authority,
+ stream->authoritylen),
+ MAKE_NV(":path", stream->path, stream->pathlen),
+ MAKE_NV2("content-type", DNS_MEDIA_TYPE),
+ MAKE_NV2("accept", DNS_MEDIA_TYPE),
+ MAKE_NV("content-length", p, strlen(p)),
+ MAKE_NV2("cache-control", DEFAULT_CACHE_CONTROL)
+ };
+
+ dp = (nghttp2_data_provider){ .read_callback =
+ client_read_callback };
+ stream_id = nghttp2_submit_request(
+ session->ngsession, NULL, hdrs,
+ sizeof(hdrs) / sizeof(hdrs[0]), &dp, stream);
+ } else {
+ INSIST(stream->GET_path != NULL);
+ INSIST(stream->GET_path_len != 0);
+ nghttp2_nv hdrs[] = {
+ MAKE_NV2(":method", "GET"),
+ MAKE_NV(":scheme",
+ &uri[up->field_data[ISC_UF_SCHEMA].off],
+ up->field_data[ISC_UF_SCHEMA].len),
+ MAKE_NV(":authority", stream->authority,
+ stream->authoritylen),
+ MAKE_NV(":path", stream->GET_path,
+ stream->GET_path_len),
+ MAKE_NV2("accept", DNS_MEDIA_TYPE),
+ MAKE_NV2("cache-control", DEFAULT_CACHE_CONTROL)
+ };
+
+ dp = (nghttp2_data_provider){ .read_callback =
+ client_read_callback };
+ stream_id = nghttp2_submit_request(
+ session->ngsession, NULL, hdrs,
+ sizeof(hdrs) / sizeof(hdrs[0]), &dp, stream);
+ }
+ if (stream_id < 0) {
+ return (ISC_R_FAILURE);
+ }
+
+ stream->stream_id = stream_id;
+
+ return (ISC_R_SUCCESS);
+}
+
+/*
+ * Read callback from TLS socket.
+ */
+static void
+http_readcb(isc_nmhandle_t *handle, isc_result_t result, isc_region_t *region,
+ void *data) {
+ isc_nm_http_session_t *session = (isc_nm_http_session_t *)data;
+ ssize_t readlen;
+
+ REQUIRE(VALID_HTTP2_SESSION(session));
+
+ UNUSED(handle);
+
+ if (result != ISC_R_SUCCESS) {
+ if (result != ISC_R_TIMEDOUT) {
+ session->reading = false;
+ }
+ failed_read_cb(result, session);
+ return;
+ }
+
+ readlen = nghttp2_session_mem_recv(session->ngsession, region->base,
+ region->length);
+ if (readlen < 0) {
+ failed_read_cb(ISC_R_UNEXPECTED, session);
+ return;
+ }
+
+ if ((size_t)readlen < region->length) {
+ size_t unread_size = region->length - readlen;
+ if (session->buf == NULL) {
+ isc_buffer_allocate(session->mctx, &session->buf,
+ unread_size);
+ isc_buffer_setautorealloc(session->buf, true);
+ }
+ isc_buffer_putmem(session->buf, region->base + readlen,
+ unread_size);
+ isc_nm_pauseread(session->handle);
+ }
+
+ /* We might have something to receive or send, do IO */
+ http_do_bio(session, NULL, NULL, NULL);
+}
+
+static void
+call_pending_callbacks(isc__nm_http_pending_callbacks_t pending_callbacks,
+ isc_result_t result) {
+ isc__nm_uvreq_t *cbreq = ISC_LIST_HEAD(pending_callbacks);
+ while (cbreq != NULL) {
+ isc__nm_uvreq_t *next = ISC_LIST_NEXT(cbreq, link);
+ ISC_LIST_UNLINK(pending_callbacks, cbreq, link);
+ isc__nm_sendcb(cbreq->handle->sock, cbreq, result, false);
+ cbreq = next;
+ }
+}
+
+static void
+http_writecb(isc_nmhandle_t *handle, isc_result_t result, void *arg) {
+ isc_http_send_req_t *req = (isc_http_send_req_t *)arg;
+ isc_nm_http_session_t *session = req->session;
+ isc_nmhandle_t *transphandle = req->transphandle;
+
+ REQUIRE(VALID_HTTP2_SESSION(session));
+ REQUIRE(VALID_NMHANDLE(handle));
+
+ if (http_session_active(session)) {
+ INSIST(session->handle == handle);
+ }
+
+ call_pending_callbacks(req->pending_write_callbacks, result);
+
+ if (req->cb != NULL) {
+ req->cb(req->httphandle, result, req->cbarg);
+ isc_nmhandle_detach(&req->httphandle);
+ }
+
+ isc_buffer_free(&req->pending_write_data);
+ isc_mem_put(session->mctx, req, sizeof(*req));
+
+ session->sending--;
+ http_do_bio(session, NULL, NULL, NULL);
+ isc_nmhandle_detach(&transphandle);
+ if (result != ISC_R_SUCCESS && session->sending == 0) {
+ finish_http_session(session);
+ }
+ isc__nm_httpsession_detach(&session);
+}
+
+static void
+move_pending_send_callbacks(isc_nm_http_session_t *session,
+ isc_http_send_req_t *send) {
+ STATIC_ASSERT(
+ sizeof(session->pending_write_callbacks) ==
+ sizeof(send->pending_write_callbacks),
+ "size of pending writes requests callbacks lists differs");
+ memmove(&send->pending_write_callbacks,
+ &session->pending_write_callbacks,
+ sizeof(session->pending_write_callbacks));
+ ISC_LIST_INIT(session->pending_write_callbacks);
+}
+
+static bool
+http_send_outgoing(isc_nm_http_session_t *session, isc_nmhandle_t *httphandle,
+ isc_nm_cb_t cb, void *cbarg) {
+ isc_http_send_req_t *send = NULL;
+ size_t total = 0;
+ isc_region_t send_data = { 0 };
+ isc_nmhandle_t *transphandle = NULL;
+#ifdef ENABLE_HTTP_WRITE_BUFFERING
+ size_t max_total_write_size = 0;
+#endif /* ENABLE_HTTP_WRITE_BUFFERING */
+
+ if (!http_session_active(session) ||
+ (!nghttp2_session_want_write(session->ngsession) &&
+ session->pending_write_data == NULL))
+ {
+ return (false);
+ }
+
+ /* We need to attach to the session->handle earlier because as an
+ * indirect result of the nghttp2_session_mem_send() the session
+ * might get closed and the handle detached. However, there is
+ * still some outgoing data to handle and we need to call it
+ * anyway if only to get the write callback passed here to get
+ * called properly. */
+ isc_nmhandle_attach(session->handle, &transphandle);
+
+ while (nghttp2_session_want_write(session->ngsession)) {
+ const uint8_t *data = NULL;
+ const size_t pending =
+ nghttp2_session_mem_send(session->ngsession, &data);
+ const size_t new_total = total + pending;
+
+ /* Sometimes nghttp2_session_mem_send() does not return any
+ * data to send even though nghttp2_session_want_write()
+ * returns success. */
+ if (pending == 0 || data == NULL) {
+ break;
+ }
+
+ /* reallocate buffer if required */
+ if (session->pending_write_data == NULL) {
+ isc_buffer_allocate(session->mctx,
+ &session->pending_write_data,
+ INITIAL_DNS_MESSAGE_BUFFER_SIZE);
+ isc_buffer_setautorealloc(session->pending_write_data,
+ true);
+ }
+ isc_buffer_putmem(session->pending_write_data, data, pending);
+ total = new_total;
+ }
+
+#ifdef ENABLE_HTTP_WRITE_BUFFERING
+ if (session->pending_write_data != NULL) {
+ max_total_write_size =
+ isc_buffer_usedlength(session->pending_write_data);
+ }
+
+ /* Here we are trying to flush the pending writes buffer earlier
+ * to avoid hitting unnecessary limitations on a TLS record size
+ * within some tools (e.g. flamethrower). */
+ if (max_total_write_size >= FLUSH_HTTP_WRITE_BUFFER_AFTER) {
+ /* Case 1: We have equal or more than
+ * FLUSH_HTTP_WRITE_BUFFER_AFTER bytes to send. Let's flush it.
+ */
+ total = max_total_write_size;
+ } else if (session->sending > 0 && total > 0) {
+ /* Case 2: There is one or more write requests in flight and
+ * we have some new data form nghttp2 to send. Let's put the
+ * write callback (if any) into the pending write callbacks
+ * list. Then let's return from the function: as soon as the
+ * "in-flight" write callback get's called or we have reached
+ * FLUSH_HTTP_WRITE_BUFFER_AFTER bytes in the write buffer, we
+ * will flush the buffer. */
+ if (cb != NULL) {
+ isc__nm_uvreq_t *newcb = isc__nm_uvreq_get(
+ httphandle->sock->mgr, httphandle->sock);
+
+ INSIST(VALID_NMHANDLE(httphandle));
+ newcb->cb.send = cb;
+ newcb->cbarg = cbarg;
+ isc_nmhandle_attach(httphandle, &newcb->handle);
+ ISC_LIST_APPEND(session->pending_write_callbacks, newcb,
+ link);
+ }
+ goto nothing_to_send;
+ } else if (session->sending == 0 && total == 0 &&
+ session->pending_write_data != NULL)
+ {
+ /* Case 3: There is no write in flight and we haven't got
+ * anything new from nghttp2, but there is some data pending
+ * in the write buffer. Let's flush the buffer. */
+ isc_region_t region = { 0 };
+ total = isc_buffer_usedlength(session->pending_write_data);
+ INSIST(total > 0);
+ isc_buffer_usedregion(session->pending_write_data, &region);
+ INSIST(total == region.length);
+ } else {
+ /* The other cases are, uninteresting, fall-through ones. */
+ /* In the following cases (4-6) we will just bail out. */
+ /* Case 4: There is nothing new to send, nor anything in the
+ * write buffer. */
+ /* Case 5: There is nothing new to send and there is write
+ * request(s) in flight. */
+ /* Case 6: There is nothing new to send nor there are any
+ * write requests in flight. */
+
+ /* Case 7: There is some new data to send and there are no any
+ * write requests in flight: Let's send the data.*/
+ INSIST((total == 0 && session->pending_write_data == NULL) ||
+ (total == 0 && session->sending > 0) ||
+ (total == 0 && session->sending == 0) ||
+ (total > 0 && session->sending == 0));
+ }
+#else
+ INSIST(ISC_LIST_EMPTY(session->pending_write_callbacks));
+#endif /* ENABLE_HTTP_WRITE_BUFFERING */
+
+ if (total == 0) {
+ /* No data returned */
+ goto nothing_to_send;
+ }
+
+ /* If we have reached the point it means that we need to send some
+ * data and flush the outgoing buffer. The code below does that. */
+ send = isc_mem_get(session->mctx, sizeof(*send));
+
+ *send = (isc_http_send_req_t){ .pending_write_data =
+ session->pending_write_data,
+ .cb = cb,
+ .cbarg = cbarg };
+ session->pending_write_data = NULL;
+ move_pending_send_callbacks(session, send);
+
+ send->transphandle = transphandle;
+ isc__nm_httpsession_attach(session, &send->session);
+
+ if (cb != NULL) {
+ INSIST(VALID_NMHANDLE(httphandle));
+ isc_nmhandle_attach(httphandle, &send->httphandle);
+ }
+
+ session->sending++;
+ isc_buffer_usedregion(send->pending_write_data, &send_data);
+ isc_nm_send(transphandle, &send_data, http_writecb, send);
+ return (true);
+nothing_to_send:
+ isc_nmhandle_detach(&transphandle);
+ return (false);
+}
+
+static void
+http_do_bio(isc_nm_http_session_t *session, isc_nmhandle_t *send_httphandle,
+ isc_nm_cb_t send_cb, void *send_cbarg) {
+ REQUIRE(VALID_HTTP2_SESSION(session));
+
+ if (session->closed) {
+ return;
+ } else if (session->closing) {
+ /*
+ * There might be leftover callbacks waiting to be received
+ */
+ if (session->sending == 0) {
+ finish_http_session(session);
+ }
+ return;
+ } else if (nghttp2_session_want_read(session->ngsession) == 0 &&
+ nghttp2_session_want_write(session->ngsession) == 0 &&
+ session->pending_write_data == NULL)
+ {
+ session->closing = true;
+ return;
+ }
+
+ if (nghttp2_session_want_read(session->ngsession) != 0) {
+ if (!session->reading) {
+ /* We have not yet started reading from this handle */
+ isc_nm_read(session->handle, http_readcb, session);
+ session->reading = true;
+ } else if (session->buf != NULL) {
+ size_t remaining =
+ isc_buffer_remaininglength(session->buf);
+ /* Leftover data in the buffer, use it */
+ size_t readlen = nghttp2_session_mem_recv(
+ session->ngsession,
+ isc_buffer_current(session->buf), remaining);
+
+ if (readlen == remaining) {
+ isc_buffer_free(&session->buf);
+ } else {
+ isc_buffer_forward(session->buf, readlen);
+ }
+
+ http_do_bio(session, send_httphandle, send_cb,
+ send_cbarg);
+ return;
+ } else {
+ /* Resume reading, it's idempotent, wait for more */
+ isc_nm_resumeread(session->handle);
+ }
+ } else {
+ /* We don't want more data, stop reading for now */
+ isc_nm_pauseread(session->handle);
+ }
+
+ if (send_cb != NULL) {
+ INSIST(VALID_NMHANDLE(send_httphandle));
+ (void)http_send_outgoing(session, send_httphandle, send_cb,
+ send_cbarg);
+ } else {
+ INSIST(send_httphandle == NULL);
+ INSIST(send_cb == NULL);
+ INSIST(send_cbarg == NULL);
+ (void)http_send_outgoing(session, NULL, NULL, NULL);
+ }
+
+ return;
+}
+
+static isc_result_t
+get_http_cstream(isc_nmsocket_t *sock, http_cstream_t **streamp) {
+ http_cstream_t *cstream = sock->h2.connect.cstream;
+ isc_result_t result;
+
+ REQUIRE(streamp != NULL && *streamp == NULL);
+
+ sock->h2.connect.cstream = NULL;
+
+ if (cstream == NULL) {
+ result = new_http_cstream(sock, &cstream);
+ if (result != ISC_R_SUCCESS) {
+ INSIST(cstream == NULL);
+ return (result);
+ }
+ }
+
+ *streamp = cstream;
+ return (ISC_R_SUCCESS);
+}
+
+static void
+http_call_connect_cb(isc_nmsocket_t *sock, isc_nm_http_session_t *session,
+ isc_result_t result) {
+ isc__nm_uvreq_t *req = NULL;
+ isc_nmhandle_t *httphandle = isc__nmhandle_get(sock, &sock->peer,
+ &sock->iface);
+
+ REQUIRE(sock->connect_cb != NULL);
+
+ if (result == ISC_R_SUCCESS) {
+ req = isc__nm_uvreq_get(sock->mgr, sock);
+ req->cb.connect = sock->connect_cb;
+ req->cbarg = sock->connect_cbarg;
+ if (session != NULL) {
+ session->client_httphandle = httphandle;
+ req->handle = NULL;
+ isc_nmhandle_attach(httphandle, &req->handle);
+ } else {
+ req->handle = httphandle;
+ }
+
+ isc__nmsocket_clearcb(sock);
+ isc__nm_connectcb(sock, req, result, true);
+ } else {
+ void *cbarg = sock->connect_cbarg;
+ isc_nm_cb_t connect_cb = sock->connect_cb;
+
+ isc__nmsocket_clearcb(sock);
+ connect_cb(httphandle, result, cbarg);
+ isc_nmhandle_detach(&httphandle);
+ }
+}
+
+static void
+transport_connect_cb(isc_nmhandle_t *handle, isc_result_t result, void *cbarg) {
+ isc_nmsocket_t *http_sock = (isc_nmsocket_t *)cbarg;
+ isc_nmsocket_t *transp_sock = NULL;
+ isc_nm_http_session_t *session = NULL;
+ http_cstream_t *cstream = NULL;
+ isc_mem_t *mctx = NULL;
+
+ REQUIRE(VALID_NMSOCK(http_sock));
+ REQUIRE(VALID_NMHANDLE(handle));
+
+ transp_sock = handle->sock;
+
+ REQUIRE(VALID_NMSOCK(transp_sock));
+
+ mctx = transp_sock->mgr->mctx;
+
+ INSIST(http_sock->h2.connect.uri != NULL);
+
+ http_sock->tid = transp_sock->tid;
+ http_sock->h2.connect.tls_peer_verify_string =
+ isc_nm_verify_tls_peer_result_string(handle);
+ if (result != ISC_R_SUCCESS) {
+ goto error;
+ }
+
+ new_session(mctx, http_sock->h2.connect.tlsctx, &session);
+ session->client = true;
+ transp_sock->h2.session = session;
+ http_sock->h2.connect.tlsctx = NULL;
+ /* otherwise we will get some garbage output in DIG */
+ http_sock->iface = handle->sock->iface;
+ http_sock->peer = handle->sock->peer;
+
+ transp_sock->h2.connect.post = http_sock->h2.connect.post;
+ transp_sock->h2.connect.uri = http_sock->h2.connect.uri;
+ http_sock->h2.connect.uri = NULL;
+ isc__nm_httpsession_attach(session, &http_sock->h2.session);
+
+ if (session->tlsctx != NULL) {
+ const unsigned char *alpn = NULL;
+ unsigned int alpnlen = 0;
+
+ INSIST(transp_sock->type == isc_nm_tlssocket);
+
+ isc_tls_get_selected_alpn(transp_sock->tlsstream.tls, &alpn,
+ &alpnlen);
+ if (alpn == NULL || alpnlen != NGHTTP2_PROTO_VERSION_ID_LEN ||
+ memcmp(NGHTTP2_PROTO_VERSION_ID, alpn,
+ NGHTTP2_PROTO_VERSION_ID_LEN) != 0)
+ {
+ /*
+ * HTTP/2 negotiation error. Any sensible DoH
+ * client will fail if HTTP/2 cannot be
+ * negotiated via ALPN.
+ */
+ result = ISC_R_HTTP2ALPNERROR;
+ goto error;
+ }
+ }
+
+ isc_nmhandle_attach(handle, &session->handle);
+
+ initialize_nghttp2_client_session(session);
+ if (!send_client_connection_header(session)) {
+ goto error;
+ }
+
+ result = get_http_cstream(http_sock, &cstream);
+ http_sock->h2.connect.cstream = cstream;
+ if (result != ISC_R_SUCCESS) {
+ goto error;
+ }
+
+ http_transpost_tcp_nodelay(handle);
+
+ http_call_connect_cb(http_sock, session, result);
+
+ http_do_bio(session, NULL, NULL, NULL);
+ isc__nmsocket_detach(&http_sock);
+ return;
+
+error:
+ http_call_connect_cb(http_sock, session, result);
+
+ if (http_sock->h2.connect.uri != NULL) {
+ isc_mem_free(mctx, http_sock->h2.connect.uri);
+ }
+
+ isc__nmsocket_prep_destroy(http_sock);
+ isc__nmsocket_detach(&http_sock);
+}
+
+void
+isc_nm_httpconnect(isc_nm_t *mgr, isc_sockaddr_t *local, isc_sockaddr_t *peer,
+ const char *uri, bool post, isc_nm_cb_t cb, void *cbarg,
+ isc_tlsctx_t *tlsctx,
+ isc_tlsctx_client_session_cache_t *client_sess_cache,
+ unsigned int timeout, size_t extrahandlesize) {
+ isc_sockaddr_t local_interface;
+ isc_nmsocket_t *sock = NULL;
+
+ REQUIRE(VALID_NM(mgr));
+ REQUIRE(cb != NULL);
+ REQUIRE(peer != NULL);
+ REQUIRE(uri != NULL);
+ REQUIRE(*uri != '\0');
+
+ if (local == NULL) {
+ isc_sockaddr_anyofpf(&local_interface, peer->type.sa.sa_family);
+ local = &local_interface;
+ }
+
+ sock = isc_mem_get(mgr->mctx, sizeof(*sock));
+ isc__nmsocket_init(sock, mgr, isc_nm_httpsocket, local);
+
+ sock->extrahandlesize = extrahandlesize;
+ sock->connect_timeout = timeout;
+ sock->result = ISC_R_UNSET;
+ sock->connect_cb = cb;
+ sock->connect_cbarg = cbarg;
+ atomic_init(&sock->client, true);
+
+ if (isc__nm_closing(sock)) {
+ isc__nm_uvreq_t *req = isc__nm_uvreq_get(mgr, sock);
+
+ req->cb.connect = cb;
+ req->cbarg = cbarg;
+ req->peer = *peer;
+ req->local = *local;
+ req->handle = isc__nmhandle_get(sock, &req->peer, &sock->iface);
+
+ if (isc__nm_in_netthread()) {
+ sock->tid = isc_nm_tid();
+ }
+
+ isc__nmsocket_clearcb(sock);
+ isc__nm_connectcb(sock, req, ISC_R_SHUTTINGDOWN, true);
+ isc__nmsocket_prep_destroy(sock);
+ isc__nmsocket_detach(&sock);
+ return;
+ }
+
+ sock->h2 = (isc_nmsocket_h2_t){ .connect.uri = isc_mem_strdup(mgr->mctx,
+ uri),
+ .connect.post = post,
+ .connect.tlsctx = tlsctx };
+ ISC_LINK_INIT(&sock->h2, link);
+
+ /*
+ * We need to prevent the interface object data from going out of
+ * scope too early.
+ */
+ if (local == &local_interface) {
+ sock->h2.connect.local_interface = local_interface;
+ sock->iface = sock->h2.connect.local_interface;
+ }
+
+ if (tlsctx != NULL) {
+ isc_nm_tlsconnect(mgr, local, peer, transport_connect_cb, sock,
+ tlsctx, client_sess_cache, timeout, 0);
+ } else {
+ isc_nm_tcpconnect(mgr, local, peer, transport_connect_cb, sock,
+ timeout, 0);
+ }
+}
+
+static isc_result_t
+client_send(isc_nmhandle_t *handle, const isc_region_t *region) {
+ isc_result_t result = ISC_R_SUCCESS;
+ isc_nmsocket_t *sock = handle->sock;
+ isc_mem_t *mctx = sock->mgr->mctx;
+ isc_nm_http_session_t *session = sock->h2.session;
+ http_cstream_t *cstream = sock->h2.connect.cstream;
+
+ REQUIRE(VALID_HTTP2_SESSION(handle->sock->h2.session));
+ REQUIRE(session->client);
+ REQUIRE(region != NULL);
+ REQUIRE(region->base != NULL);
+ REQUIRE(region->length <= MAX_DNS_MESSAGE_SIZE);
+
+ if (session->closed) {
+ return (ISC_R_CANCELED);
+ }
+
+ INSIST(cstream != NULL);
+
+ if (cstream->post) {
+ /* POST */
+ isc_buffer_allocate(mctx, &cstream->postdata, region->length);
+ isc_buffer_putmem(cstream->postdata, region->base,
+ region->length);
+ } else {
+ /* GET */
+ size_t path_size = 0;
+ char *base64url_data = NULL;
+ size_t base64url_data_len = 0;
+ isc_buffer_t *buf = NULL;
+ isc_region_t data = *region;
+ isc_region_t base64_region;
+ size_t base64_len = ((4 * data.length / 3) + 3) & ~3;
+
+ isc_buffer_allocate(mctx, &buf, base64_len);
+
+ result = isc_base64_totext(&data, -1, "", buf);
+ if (result != ISC_R_SUCCESS) {
+ isc_buffer_free(&buf);
+ goto error;
+ }
+
+ isc_buffer_usedregion(buf, &base64_region);
+ INSIST(base64_region.length == base64_len);
+
+ base64url_data = isc__nm_base64_to_base64url(
+ mctx, (const char *)base64_region.base,
+ base64_region.length, &base64url_data_len);
+ isc_buffer_free(&buf);
+ if (base64url_data == NULL) {
+ goto error;
+ }
+
+ /* len("?dns=") + len(path) + len(base64url) + len("\0") */
+ path_size = cstream->pathlen + base64url_data_len + 5 + 1;
+ cstream->GET_path = isc_mem_allocate(mctx, path_size);
+ cstream->GET_path_len = (size_t)snprintf(
+ cstream->GET_path, path_size, "%.*s?dns=%s",
+ (int)cstream->pathlen, cstream->path, base64url_data);
+
+ INSIST(cstream->GET_path_len == (path_size - 1));
+ isc_mem_free(mctx, base64url_data);
+ }
+
+ cstream->sending = true;
+
+ sock->h2.connect.cstream = NULL;
+ result = client_submit_request(session, cstream);
+ if (result != ISC_R_SUCCESS) {
+ put_http_cstream(session->mctx, cstream);
+ goto error;
+ }
+
+error:
+ return (result);
+}
+
+isc_result_t
+isc__nm_http_request(isc_nmhandle_t *handle, isc_region_t *region,
+ isc_nm_recv_cb_t cb, void *cbarg) {
+ isc_result_t result = ISC_R_SUCCESS;
+ isc_nmsocket_t *sock = NULL;
+ http_cstream_t *cstream = NULL;
+
+ REQUIRE(VALID_NMHANDLE(handle));
+ REQUIRE(VALID_NMSOCK(handle->sock));
+ REQUIRE(handle->sock->tid == isc_nm_tid());
+ REQUIRE(atomic_load(&handle->sock->client));
+
+ REQUIRE(cb != NULL);
+
+ sock = handle->sock;
+
+ isc__nm_http_read(handle, cb, cbarg);
+ if (!http_session_active(handle->sock->h2.session)) {
+ /* the callback was called by isc__nm_http_read() */
+ return (ISC_R_CANCELED);
+ }
+ result = client_send(handle, region);
+ if (result != ISC_R_SUCCESS) {
+ goto error;
+ }
+
+ return (ISC_R_SUCCESS);
+
+error:
+ cstream = sock->h2.connect.cstream;
+ if (cstream->read_cb != NULL) {
+ cstream->read_cb(handle, result, NULL, cstream->read_cbarg);
+ }
+ return (result);
+}
+
+static int
+server_on_begin_headers_callback(nghttp2_session *ngsession,
+ const nghttp2_frame *frame, void *user_data) {
+ isc_nm_http_session_t *session = (isc_nm_http_session_t *)user_data;
+ isc_nmsocket_t *socket = NULL;
+
+ if (frame->hd.type != NGHTTP2_HEADERS ||
+ frame->headers.cat != NGHTTP2_HCAT_REQUEST)
+ {
+ return (0);
+ } else if (frame->hd.length > MAX_ALLOWED_DATA_IN_HEADERS) {
+ return (NGHTTP2_ERR_TEMPORAL_CALLBACK_FAILURE);
+ }
+
+ if (session->nsstreams >= session->max_concurrent_streams) {
+ return (NGHTTP2_ERR_CALLBACK_FAILURE);
+ }
+
+ socket = isc_mem_get(session->mctx, sizeof(isc_nmsocket_t));
+ isc__nmsocket_init(socket, session->serversocket->mgr,
+ isc_nm_httpsocket,
+ (isc_sockaddr_t *)&session->handle->sock->iface);
+ socket->peer = session->handle->sock->peer;
+ socket->h2 = (isc_nmsocket_h2_t){
+ .psock = socket,
+ .stream_id = frame->hd.stream_id,
+ .headers_error_code = ISC_HTTP_ERROR_SUCCESS,
+ .request_type = ISC_HTTP_REQ_UNSUPPORTED,
+ .request_scheme = ISC_HTTP_SCHEME_UNSUPPORTED
+ };
+ isc_buffer_initnull(&socket->h2.rbuf);
+ isc_buffer_initnull(&socket->h2.wbuf);
+ session->nsstreams++;
+ isc__nm_httpsession_attach(session, &socket->h2.session);
+ socket->tid = session->handle->sock->tid;
+ ISC_LINK_INIT(&socket->h2, link);
+ ISC_LIST_APPEND(session->sstreams, &socket->h2, link);
+
+ nghttp2_session_set_stream_user_data(ngsession, frame->hd.stream_id,
+ socket);
+ return (0);
+}
+
+static isc_nm_httphandler_t *
+find_server_request_handler(const char *request_path,
+ isc_nmsocket_t *serversocket, const int tid) {
+ isc_nm_httphandler_t *handler = NULL;
+
+ REQUIRE(VALID_NMSOCK(serversocket));
+
+ if (atomic_load(&serversocket->listening)) {
+ handler = http_endpoints_find(
+ request_path,
+ http_get_listener_endpoints(serversocket, tid));
+ }
+ return (handler);
+}
+
+static isc_http_error_responses_t
+server_handle_path_header(isc_nmsocket_t *socket, const uint8_t *value,
+ const size_t valuelen) {
+ isc_nm_httphandler_t *handler = NULL;
+ const uint8_t *qstr = NULL;
+ size_t vlen = valuelen;
+
+ qstr = memchr(value, '?', valuelen);
+ if (qstr != NULL) {
+ vlen = qstr - value;
+ }
+
+ if (socket->h2.request_path != NULL) {
+ isc_mem_free(socket->mgr->mctx, socket->h2.request_path);
+ }
+ socket->h2.request_path = isc_mem_strndup(
+ socket->mgr->mctx, (const char *)value, vlen + 1);
+
+ if (!isc_nm_http_path_isvalid(socket->h2.request_path)) {
+ isc_mem_free(socket->mgr->mctx, socket->h2.request_path);
+ socket->h2.request_path = NULL;
+ return (ISC_HTTP_ERROR_BAD_REQUEST);
+ }
+
+ handler = find_server_request_handler(socket->h2.request_path,
+ socket->h2.session->serversocket,
+ socket->tid);
+ if (handler != NULL) {
+ socket->h2.cb = handler->cb;
+ socket->h2.cbarg = handler->cbarg;
+ socket->extrahandlesize = handler->extrahandlesize;
+ } else {
+ isc_mem_free(socket->mgr->mctx, socket->h2.request_path);
+ socket->h2.request_path = NULL;
+ return (ISC_HTTP_ERROR_NOT_FOUND);
+ }
+
+ if (qstr != NULL) {
+ const char *dns_value = NULL;
+ size_t dns_value_len = 0;
+
+ if (isc__nm_parse_httpquery((const char *)qstr, &dns_value,
+ &dns_value_len))
+ {
+ const size_t decoded_size = dns_value_len / 4 * 3;
+ if (decoded_size <= MAX_DNS_MESSAGE_SIZE) {
+ if (socket->h2.query_data != NULL) {
+ isc_mem_free(socket->mgr->mctx,
+ socket->h2.query_data);
+ }
+ socket->h2.query_data =
+ isc__nm_base64url_to_base64(
+ socket->mgr->mctx, dns_value,
+ dns_value_len,
+ &socket->h2.query_data_len);
+ } else {
+ socket->h2.query_too_large = true;
+ return (ISC_HTTP_ERROR_PAYLOAD_TOO_LARGE);
+ }
+ } else {
+ return (ISC_HTTP_ERROR_BAD_REQUEST);
+ }
+ }
+ return (ISC_HTTP_ERROR_SUCCESS);
+}
+
+static isc_http_error_responses_t
+server_handle_method_header(isc_nmsocket_t *socket, const uint8_t *value,
+ const size_t valuelen) {
+ const char get[] = "GET";
+ const char post[] = "POST";
+
+ if (HEADER_MATCH(get, value, valuelen)) {
+ socket->h2.request_type = ISC_HTTP_REQ_GET;
+ } else if (HEADER_MATCH(post, value, valuelen)) {
+ socket->h2.request_type = ISC_HTTP_REQ_POST;
+ } else {
+ return (ISC_HTTP_ERROR_NOT_IMPLEMENTED);
+ }
+ return (ISC_HTTP_ERROR_SUCCESS);
+}
+
+static isc_http_error_responses_t
+server_handle_scheme_header(isc_nmsocket_t *socket, const uint8_t *value,
+ const size_t valuelen) {
+ const char http[] = "http";
+ const char http_secure[] = "https";
+
+ if (HEADER_MATCH(http_secure, value, valuelen)) {
+ socket->h2.request_scheme = ISC_HTTP_SCHEME_HTTP_SECURE;
+ } else if (HEADER_MATCH(http, value, valuelen)) {
+ socket->h2.request_scheme = ISC_HTTP_SCHEME_HTTP;
+ } else {
+ return (ISC_HTTP_ERROR_BAD_REQUEST);
+ }
+ return (ISC_HTTP_ERROR_SUCCESS);
+}
+
+static isc_http_error_responses_t
+server_handle_content_length_header(isc_nmsocket_t *socket,
+ const uint8_t *value,
+ const size_t valuelen) {
+ char tmp[32] = { 0 };
+ const size_t tmplen = sizeof(tmp) - 1;
+
+ strncpy(tmp, (const char *)value,
+ valuelen > tmplen ? tmplen : valuelen);
+ socket->h2.content_length = strtoul(tmp, NULL, 10);
+ if (socket->h2.content_length > MAX_DNS_MESSAGE_SIZE) {
+ return (ISC_HTTP_ERROR_PAYLOAD_TOO_LARGE);
+ } else if (socket->h2.content_length == 0) {
+ return (ISC_HTTP_ERROR_BAD_REQUEST);
+ }
+ return (ISC_HTTP_ERROR_SUCCESS);
+}
+
+static isc_http_error_responses_t
+server_handle_content_type_header(isc_nmsocket_t *socket, const uint8_t *value,
+ const size_t valuelen) {
+ const char type_dns_message[] = DNS_MEDIA_TYPE;
+ isc_http_error_responses_t resp = ISC_HTTP_ERROR_SUCCESS;
+
+ UNUSED(socket);
+
+ if (!HEADER_MATCH(type_dns_message, value, valuelen)) {
+ resp = ISC_HTTP_ERROR_UNSUPPORTED_MEDIA_TYPE;
+ }
+ return (resp);
+}
+
+static isc_http_error_responses_t
+server_handle_header(isc_nmsocket_t *socket, const uint8_t *name,
+ size_t namelen, const uint8_t *value,
+ const size_t valuelen) {
+ isc_http_error_responses_t code = ISC_HTTP_ERROR_SUCCESS;
+ bool was_error;
+ const char path[] = ":path";
+ const char method[] = ":method";
+ const char scheme[] = ":scheme";
+ const char content_length[] = "Content-Length";
+ const char content_type[] = "Content-Type";
+
+ was_error = socket->h2.headers_error_code != ISC_HTTP_ERROR_SUCCESS;
+ /*
+ * process Content-Length even when there was an error,
+ * to drop the connection earlier if required.
+ */
+ if (HEADER_MATCH(content_length, name, namelen)) {
+ code = server_handle_content_length_header(socket, value,
+ valuelen);
+ } else if (!was_error && HEADER_MATCH(path, name, namelen)) {
+ code = server_handle_path_header(socket, value, valuelen);
+ } else if (!was_error && HEADER_MATCH(method, name, namelen)) {
+ code = server_handle_method_header(socket, value, valuelen);
+ } else if (!was_error && HEADER_MATCH(scheme, name, namelen)) {
+ code = server_handle_scheme_header(socket, value, valuelen);
+ } else if (!was_error && HEADER_MATCH(content_type, name, namelen)) {
+ code = server_handle_content_type_header(socket, value,
+ valuelen);
+ }
+
+ return (code);
+}
+
+static int
+server_on_header_callback(nghttp2_session *session, const nghttp2_frame *frame,
+ const uint8_t *name, size_t namelen,
+ const uint8_t *value, size_t valuelen, uint8_t flags,
+ void *user_data) {
+ isc_nmsocket_t *socket = NULL;
+ isc_http_error_responses_t code = ISC_HTTP_ERROR_SUCCESS;
+
+ UNUSED(flags);
+ UNUSED(user_data);
+
+ socket = nghttp2_session_get_stream_user_data(session,
+ frame->hd.stream_id);
+ if (socket == NULL) {
+ return (NGHTTP2_ERR_TEMPORAL_CALLBACK_FAILURE);
+ }
+
+ socket->h2.headers_data_processed += (namelen + valuelen);
+
+ switch (frame->hd.type) {
+ case NGHTTP2_HEADERS:
+ if (frame->headers.cat != NGHTTP2_HCAT_REQUEST) {
+ break;
+ }
+ code = server_handle_header(socket, name, namelen, value,
+ valuelen);
+ break;
+ }
+
+ INSIST(socket != NULL);
+
+ if (socket->h2.headers_data_processed > MAX_ALLOWED_DATA_IN_HEADERS) {
+ return (NGHTTP2_ERR_TEMPORAL_CALLBACK_FAILURE);
+ } else if (socket->h2.content_length > MAX_ALLOWED_DATA_IN_POST) {
+ return (NGHTTP2_ERR_TEMPORAL_CALLBACK_FAILURE);
+ }
+
+ if (code == ISC_HTTP_ERROR_SUCCESS) {
+ return (0);
+ } else {
+ socket->h2.headers_error_code = code;
+ }
+
+ return (0);
+}
+
+static ssize_t
+server_read_callback(nghttp2_session *ngsession, int32_t stream_id,
+ uint8_t *buf, size_t length, uint32_t *data_flags,
+ nghttp2_data_source *source, void *user_data) {
+ isc_nm_http_session_t *session = (isc_nm_http_session_t *)user_data;
+ isc_nmsocket_t *socket = (isc_nmsocket_t *)source->ptr;
+ size_t buflen;
+
+ REQUIRE(socket->h2.stream_id == stream_id);
+
+ UNUSED(ngsession);
+ UNUSED(session);
+
+ buflen = isc_buffer_remaininglength(&socket->h2.wbuf);
+ if (buflen > length) {
+ buflen = length;
+ }
+
+ if (buflen > 0) {
+ (void)memmove(buf, isc_buffer_current(&socket->h2.wbuf),
+ buflen);
+ isc_buffer_forward(&socket->h2.wbuf, buflen);
+ }
+
+ if (isc_buffer_remaininglength(&socket->h2.wbuf) == 0) {
+ *data_flags |= NGHTTP2_DATA_FLAG_EOF;
+ }
+
+ return (buflen);
+}
+
+static isc_result_t
+server_send_response(nghttp2_session *ngsession, int32_t stream_id,
+ const nghttp2_nv *nva, size_t nvlen,
+ isc_nmsocket_t *socket) {
+ nghttp2_data_provider data_prd;
+ int rv;
+
+ if (socket->h2.response_submitted) {
+ /* NGHTTP2 will gladly accept new response (write request)
+ * from us even though we cannot send more than one over the
+ * same HTTP/2 stream. Thus, we need to handle this case
+ * manually. We will return failure code so that it will be
+ * passed to the write callback. */
+ return (ISC_R_FAILURE);
+ }
+
+ data_prd.source.ptr = socket;
+ data_prd.read_callback = server_read_callback;
+
+ rv = nghttp2_submit_response(ngsession, stream_id, nva, nvlen,
+ &data_prd);
+ if (rv != 0) {
+ return (ISC_R_FAILURE);
+ }
+
+ socket->h2.response_submitted = true;
+ return (ISC_R_SUCCESS);
+}
+
+#define MAKE_ERROR_REPLY(tag, code, desc) \
+ { \
+ tag, MAKE_NV2(":status", #code), desc \
+ }
+
+/*
+ * Here we use roughly the same error codes that Unbound uses.
+ * (https://blog.nlnetlabs.nl/dns-over-https-in-unbound/)
+ */
+
+static struct http_error_responses {
+ const isc_http_error_responses_t type;
+ const nghttp2_nv header;
+ const char *desc;
+} error_responses[] = {
+ MAKE_ERROR_REPLY(ISC_HTTP_ERROR_BAD_REQUEST, 400, "Bad Request"),
+ MAKE_ERROR_REPLY(ISC_HTTP_ERROR_NOT_FOUND, 404, "Not Found"),
+ MAKE_ERROR_REPLY(ISC_HTTP_ERROR_PAYLOAD_TOO_LARGE, 413,
+ "Payload Too Large"),
+ MAKE_ERROR_REPLY(ISC_HTTP_ERROR_URI_TOO_LONG, 414, "URI Too Long"),
+ MAKE_ERROR_REPLY(ISC_HTTP_ERROR_UNSUPPORTED_MEDIA_TYPE, 415,
+ "Unsupported Media Type"),
+ MAKE_ERROR_REPLY(ISC_HTTP_ERROR_GENERIC, 500, "Internal Server Error"),
+ MAKE_ERROR_REPLY(ISC_HTTP_ERROR_NOT_IMPLEMENTED, 501, "Not Implemented")
+};
+
+static void
+log_server_error_response(const isc_nmsocket_t *socket,
+ const struct http_error_responses *response) {
+ const int log_level = ISC_LOG_DEBUG(1);
+ isc_sockaddr_t client_addr;
+ isc_sockaddr_t local_addr;
+ char client_sabuf[ISC_SOCKADDR_FORMATSIZE];
+ char local_sabuf[ISC_SOCKADDR_FORMATSIZE];
+
+ if (!isc_log_wouldlog(isc_lctx, log_level)) {
+ return;
+ }
+
+ client_addr = isc_nmhandle_peeraddr(socket->h2.session->handle);
+ local_addr = isc_nmhandle_localaddr(socket->h2.session->handle);
+ isc_sockaddr_format(&client_addr, client_sabuf, sizeof(client_sabuf));
+ isc_sockaddr_format(&local_addr, local_sabuf, sizeof(local_sabuf));
+ isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL, ISC_LOGMODULE_NETMGR,
+ log_level, "HTTP/2 request from %s (on %s) failed: %s %s",
+ client_sabuf, local_sabuf, response->header.value,
+ response->desc);
+}
+
+static isc_result_t
+server_send_error_response(const isc_http_error_responses_t error,
+ nghttp2_session *ngsession, isc_nmsocket_t *socket) {
+ void *base;
+
+ REQUIRE(error != ISC_HTTP_ERROR_SUCCESS);
+
+ base = isc_buffer_base(&socket->h2.rbuf);
+ if (base != NULL) {
+ isc_mem_free(socket->h2.session->mctx, base);
+ isc_buffer_initnull(&socket->h2.rbuf);
+ }
+
+ /* We do not want the error response to be cached anywhere. */
+ socket->h2.min_ttl = 0;
+
+ for (size_t i = 0;
+ i < sizeof(error_responses) / sizeof(error_responses[0]); i++)
+ {
+ if (error_responses[i].type == error) {
+ log_server_error_response(socket, &error_responses[i]);
+ return (server_send_response(
+ ngsession, socket->h2.stream_id,
+ &error_responses[i].header, 1, socket));
+ }
+ }
+
+ return (server_send_error_response(ISC_HTTP_ERROR_GENERIC, ngsession,
+ socket));
+}
+
+static void
+server_call_cb(isc_nmsocket_t *socket, isc_nm_http_session_t *session,
+ const isc_result_t result, isc_region_t *data) {
+ isc_sockaddr_t addr;
+ isc_nmhandle_t *handle = NULL;
+
+ REQUIRE(VALID_NMSOCK(socket));
+ REQUIRE(VALID_HTTP2_SESSION(session));
+ REQUIRE(socket->h2.cb != NULL);
+
+ addr = isc_nmhandle_peeraddr(session->handle);
+ handle = isc__nmhandle_get(socket, &addr, NULL);
+ socket->h2.cb(handle, result, data, socket->h2.cbarg);
+ isc_nmhandle_detach(&handle);
+}
+
+void
+isc__nm_http_bad_request(isc_nmhandle_t *handle) {
+ isc_nmsocket_t *sock = NULL;
+
+ REQUIRE(VALID_NMHANDLE(handle));
+ REQUIRE(VALID_NMSOCK(handle->sock));
+ sock = handle->sock;
+ REQUIRE(sock->type == isc_nm_httpsocket);
+ REQUIRE(!atomic_load(&sock->client));
+ REQUIRE(VALID_HTTP2_SESSION(sock->h2.session));
+
+ (void)server_send_error_response(ISC_HTTP_ERROR_BAD_REQUEST,
+ sock->h2.session->ngsession, sock);
+}
+
+static int
+server_on_request_recv(nghttp2_session *ngsession,
+ isc_nm_http_session_t *session, isc_nmsocket_t *socket) {
+ isc_result_t result;
+ isc_http_error_responses_t code = ISC_HTTP_ERROR_SUCCESS;
+ isc_region_t data;
+ uint8_t tmp_buf[MAX_DNS_MESSAGE_SIZE];
+
+ code = socket->h2.headers_error_code;
+ if (code != ISC_HTTP_ERROR_SUCCESS) {
+ goto error;
+ }
+
+ if (socket->h2.request_path == NULL || socket->h2.cb == NULL) {
+ code = ISC_HTTP_ERROR_NOT_FOUND;
+ } else if (socket->h2.request_type == ISC_HTTP_REQ_POST &&
+ socket->h2.content_length == 0)
+ {
+ code = ISC_HTTP_ERROR_BAD_REQUEST;
+ } else if (socket->h2.request_type == ISC_HTTP_REQ_POST &&
+ isc_buffer_usedlength(&socket->h2.rbuf) >
+ socket->h2.content_length)
+ {
+ code = ISC_HTTP_ERROR_PAYLOAD_TOO_LARGE;
+ } else if (socket->h2.request_type == ISC_HTTP_REQ_POST &&
+ isc_buffer_usedlength(&socket->h2.rbuf) !=
+ socket->h2.content_length)
+ {
+ code = ISC_HTTP_ERROR_BAD_REQUEST;
+ } else if (socket->h2.request_type == ISC_HTTP_REQ_POST &&
+ socket->h2.query_data != NULL)
+ {
+ /* The spec does not mention which value the query string for
+ * POST should have. For GET we use its value to decode a DNS
+ * message from it, for POST the message is transferred in the
+ * body of the request. Taking it into account, it is much safer
+ * to treat POST
+ * requests with query strings as malformed ones. */
+ code = ISC_HTTP_ERROR_BAD_REQUEST;
+ } else if (socket->h2.request_type == ISC_HTTP_REQ_GET &&
+ socket->h2.content_length > 0)
+ {
+ code = ISC_HTTP_ERROR_BAD_REQUEST;
+ } else if (socket->h2.request_type == ISC_HTTP_REQ_GET &&
+ socket->h2.query_data == NULL)
+ {
+ /* A GET request without any query data - there is nothing to
+ * decode. */
+ INSIST(socket->h2.query_data_len == 0);
+ code = ISC_HTTP_ERROR_BAD_REQUEST;
+ }
+
+ if (code != ISC_HTTP_ERROR_SUCCESS) {
+ goto error;
+ }
+
+ if (socket->h2.request_type == ISC_HTTP_REQ_GET) {
+ isc_buffer_t decoded_buf;
+ isc_buffer_init(&decoded_buf, tmp_buf, sizeof(tmp_buf));
+ if (isc_base64_decodestring(socket->h2.query_data,
+ &decoded_buf) != ISC_R_SUCCESS)
+ {
+ code = ISC_HTTP_ERROR_BAD_REQUEST;
+ goto error;
+ }
+ isc_buffer_usedregion(&decoded_buf, &data);
+ } else if (socket->h2.request_type == ISC_HTTP_REQ_POST) {
+ INSIST(socket->h2.content_length > 0);
+ isc_buffer_usedregion(&socket->h2.rbuf, &data);
+ } else {
+ UNREACHABLE();
+ }
+
+ server_call_cb(socket, session, ISC_R_SUCCESS, &data);
+
+ return (0);
+
+error:
+ result = server_send_error_response(code, ngsession, socket);
+ if (result != ISC_R_SUCCESS) {
+ return (NGHTTP2_ERR_TEMPORAL_CALLBACK_FAILURE);
+ }
+ return (0);
+}
+
+void
+isc__nm_http_send(isc_nmhandle_t *handle, const isc_region_t *region,
+ isc_nm_cb_t cb, void *cbarg) {
+ isc_nmsocket_t *sock = NULL;
+ isc__netievent_httpsend_t *ievent = NULL;
+ isc__nm_uvreq_t *uvreq = NULL;
+
+ REQUIRE(VALID_NMHANDLE(handle));
+
+ sock = handle->sock;
+
+ REQUIRE(VALID_NMSOCK(sock));
+
+ uvreq = isc__nm_uvreq_get(sock->mgr, sock);
+ isc_nmhandle_attach(handle, &uvreq->handle);
+ uvreq->cb.send = cb;
+ uvreq->cbarg = cbarg;
+
+ uvreq->uvbuf.base = (char *)region->base;
+ uvreq->uvbuf.len = region->length;
+
+ ievent = isc__nm_get_netievent_httpsend(sock->mgr, sock, uvreq);
+ isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
+ (isc__netievent_t *)ievent);
+}
+
+static void
+failed_send_cb(isc_nmsocket_t *sock, isc__nm_uvreq_t *req,
+ isc_result_t eresult) {
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(VALID_UVREQ(req));
+
+ if (req->cb.send != NULL) {
+ isc__nm_sendcb(sock, req, eresult, true);
+ } else {
+ isc__nm_uvreq_put(&req, sock);
+ }
+}
+
+static void
+client_httpsend(isc_nmhandle_t *handle, isc_nmsocket_t *sock,
+ isc__nm_uvreq_t *req) {
+ isc_result_t result = ISC_R_SUCCESS;
+ isc_nm_cb_t cb = req->cb.send;
+ void *cbarg = req->cbarg;
+
+ result = client_send(
+ handle,
+ &(isc_region_t){ (uint8_t *)req->uvbuf.base, req->uvbuf.len });
+ if (result != ISC_R_SUCCESS) {
+ failed_send_cb(sock, req, result);
+ return;
+ }
+
+ http_do_bio(sock->h2.session, handle, cb, cbarg);
+ isc__nm_uvreq_put(&req, sock);
+}
+
+static void
+server_httpsend(isc_nmhandle_t *handle, isc_nmsocket_t *sock,
+ isc__nm_uvreq_t *req) {
+ size_t content_len_buf_len, cache_control_buf_len;
+ isc_result_t result = ISC_R_SUCCESS;
+ isc_nm_cb_t cb = req->cb.send;
+ void *cbarg = req->cbarg;
+ if (isc__nmsocket_closing(sock) ||
+ !http_session_active(handle->httpsession))
+ {
+ failed_send_cb(sock, req, ISC_R_CANCELED);
+ return;
+ }
+
+ INSIST(handle->httpsession->handle->sock->tid == isc_nm_tid());
+ INSIST(VALID_NMHANDLE(handle->httpsession->handle));
+ INSIST(VALID_NMSOCK(handle->httpsession->handle->sock));
+
+ isc_buffer_init(&sock->h2.wbuf, req->uvbuf.base, req->uvbuf.len);
+ isc_buffer_add(&sock->h2.wbuf, req->uvbuf.len);
+
+ content_len_buf_len = snprintf(sock->h2.clenbuf,
+ sizeof(sock->h2.clenbuf), "%lu",
+ (unsigned long)req->uvbuf.len);
+ if (sock->h2.min_ttl == 0) {
+ cache_control_buf_len =
+ snprintf(sock->h2.cache_control_buf,
+ sizeof(sock->h2.cache_control_buf), "%s",
+ DEFAULT_CACHE_CONTROL);
+ } else {
+ cache_control_buf_len =
+ snprintf(sock->h2.cache_control_buf,
+ sizeof(sock->h2.cache_control_buf),
+ "max-age=%" PRIu32, sock->h2.min_ttl);
+ }
+ const nghttp2_nv hdrs[] = { MAKE_NV2(":status", "200"),
+ MAKE_NV2("Content-Type", DNS_MEDIA_TYPE),
+ MAKE_NV("Content-Length", sock->h2.clenbuf,
+ content_len_buf_len),
+ MAKE_NV("Cache-Control",
+ sock->h2.cache_control_buf,
+ cache_control_buf_len) };
+
+ result = server_send_response(handle->httpsession->ngsession,
+ sock->h2.stream_id, hdrs,
+ sizeof(hdrs) / sizeof(nghttp2_nv), sock);
+
+ if (result == ISC_R_SUCCESS) {
+ http_do_bio(handle->httpsession, handle, cb, cbarg);
+ } else {
+ cb(handle, result, cbarg);
+ }
+ isc__nm_uvreq_put(&req, sock);
+}
+
+void
+isc__nm_async_httpsend(isc__networker_t *worker, isc__netievent_t *ev0) {
+ isc__netievent_httpsend_t *ievent = (isc__netievent_httpsend_t *)ev0;
+ isc_nmsocket_t *sock = ievent->sock;
+ isc__nm_uvreq_t *req = ievent->req;
+ isc_nmhandle_t *handle = NULL;
+ isc_nm_http_session_t *session = NULL;
+
+ UNUSED(worker);
+
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(VALID_UVREQ(req));
+ REQUIRE(VALID_HTTP2_SESSION(sock->h2.session));
+
+ ievent->req = NULL;
+ handle = req->handle;
+
+ REQUIRE(VALID_NMHANDLE(handle));
+
+ session = sock->h2.session;
+ if (session != NULL && session->client) {
+ client_httpsend(handle, sock, req);
+ } else {
+ server_httpsend(handle, sock, req);
+ }
+}
+
+void
+isc__nm_http_read(isc_nmhandle_t *handle, isc_nm_recv_cb_t cb, void *cbarg) {
+ isc_result_t result;
+ http_cstream_t *cstream = NULL;
+ isc_nm_http_session_t *session = NULL;
+
+ REQUIRE(VALID_NMHANDLE(handle));
+
+ session = handle->sock->h2.session;
+ if (!http_session_active(session)) {
+ cb(handle, ISC_R_CANCELED, NULL, cbarg);
+ return;
+ }
+
+ result = get_http_cstream(handle->sock, &cstream);
+ if (result != ISC_R_SUCCESS) {
+ return;
+ }
+
+ handle->sock->h2.connect.cstream = cstream;
+ cstream->read_cb = cb;
+ cstream->read_cbarg = cbarg;
+ cstream->reading = true;
+
+ if (cstream->sending) {
+ result = client_submit_request(session, cstream);
+ if (result != ISC_R_SUCCESS) {
+ put_http_cstream(session->mctx, cstream);
+ return;
+ }
+
+ http_do_bio(session, NULL, NULL, NULL);
+ }
+}
+
+static int
+server_on_frame_recv_callback(nghttp2_session *ngsession,
+ const nghttp2_frame *frame, void *user_data) {
+ isc_nm_http_session_t *session = (isc_nm_http_session_t *)user_data;
+ isc_nmsocket_t *socket = NULL;
+
+ switch (frame->hd.type) {
+ case NGHTTP2_DATA:
+ case NGHTTP2_HEADERS:
+ /* Check that the client request has finished */
+ if (frame->hd.flags & NGHTTP2_FLAG_END_STREAM) {
+ socket = nghttp2_session_get_stream_user_data(
+ ngsession, frame->hd.stream_id);
+
+ /*
+ * For DATA and HEADERS frame, this callback may be
+ * called after on_stream_close_callback. Check that
+ * the stream is still alive.
+ */
+ if (socket == NULL) {
+ return (0);
+ }
+
+ return (server_on_request_recv(ngsession, session,
+ socket));
+ }
+ break;
+ default:
+ break;
+ }
+ return (0);
+}
+
+static void
+initialize_nghttp2_server_session(isc_nm_http_session_t *session) {
+ nghttp2_session_callbacks *callbacks = NULL;
+ nghttp2_mem mem;
+
+ init_nghttp2_mem(session->mctx, &mem);
+
+ RUNTIME_CHECK(nghttp2_session_callbacks_new(&callbacks) == 0);
+
+ nghttp2_session_callbacks_set_on_data_chunk_recv_callback(
+ callbacks, on_data_chunk_recv_callback);
+
+ nghttp2_session_callbacks_set_on_stream_close_callback(
+ callbacks, on_stream_close_callback);
+
+ nghttp2_session_callbacks_set_on_header_callback(
+ callbacks, server_on_header_callback);
+
+ nghttp2_session_callbacks_set_on_begin_headers_callback(
+ callbacks, server_on_begin_headers_callback);
+
+ nghttp2_session_callbacks_set_on_frame_recv_callback(
+ callbacks, server_on_frame_recv_callback);
+
+ RUNTIME_CHECK(nghttp2_session_server_new3(&session->ngsession,
+ callbacks, session, NULL,
+ &mem) == 0);
+
+ nghttp2_session_callbacks_del(callbacks);
+}
+
+static int
+server_send_connection_header(isc_nm_http_session_t *session) {
+ nghttp2_settings_entry iv[1] = {
+ { NGHTTP2_SETTINGS_MAX_CONCURRENT_STREAMS,
+ session->max_concurrent_streams }
+ };
+ int rv;
+
+ rv = nghttp2_submit_settings(session->ngsession, NGHTTP2_FLAG_NONE, iv,
+ 1);
+ if (rv != 0) {
+ return (-1);
+ }
+ return (0);
+}
+
+/*
+ * It is advisable to disable Nagle's algorithm for HTTP/2
+ * connections because multiple HTTP/2 streams could be multiplexed
+ * over one transport connection. Thus, delays when delivering small
+ * packets could bring down performance for the whole session.
+ * HTTP/2 is meant to be used this way.
+ */
+static void
+http_transpost_tcp_nodelay(isc_nmhandle_t *transphandle) {
+ isc_nmsocket_t *tcpsock = NULL;
+ uv_os_fd_t tcp_fd = (uv_os_fd_t)-1;
+
+ if (transphandle->sock->type == isc_nm_tlssocket) {
+ tcpsock = transphandle->sock->outerhandle->sock;
+ } else {
+ tcpsock = transphandle->sock;
+ }
+
+ (void)uv_fileno((uv_handle_t *)&tcpsock->uv_handle.tcp, &tcp_fd);
+ RUNTIME_CHECK(tcp_fd != (uv_os_fd_t)-1);
+ (void)isc__nm_socket_tcp_nodelay((uv_os_sock_t)tcp_fd);
+}
+
+static isc_result_t
+httplisten_acceptcb(isc_nmhandle_t *handle, isc_result_t result, void *cbarg) {
+ isc_nmsocket_t *httplistensock = (isc_nmsocket_t *)cbarg;
+ isc_nm_http_session_t *session = NULL;
+ isc_nmsocket_t *listener = NULL, *httpserver = NULL;
+
+ REQUIRE(VALID_NMHANDLE(handle));
+ REQUIRE(VALID_NMSOCK(handle->sock));
+
+ if (handle->sock->type == isc_nm_tlssocket) {
+ REQUIRE(VALID_NMSOCK(handle->sock->listener));
+ listener = handle->sock->listener;
+ httpserver = listener->h2.httpserver;
+ } else {
+ REQUIRE(VALID_NMSOCK(handle->sock->server));
+ listener = handle->sock->server;
+ REQUIRE(VALID_NMSOCK(listener->parent));
+ httpserver = listener->parent->h2.httpserver;
+ }
+
+ /*
+ * NOTE: HTTP listener socket might be destroyed by the time this
+ * function gets invoked, so we need to do extra sanity checks to
+ * detect this case.
+ */
+ if (isc__nmsocket_closing(handle->sock) || httpserver == NULL) {
+ return (ISC_R_CANCELED);
+ }
+
+ if (result != ISC_R_SUCCESS) {
+ /* XXXWPK do nothing? */
+ return (result);
+ }
+
+ REQUIRE(VALID_NMSOCK(httplistensock));
+ INSIST(httplistensock == httpserver);
+
+ if (isc__nmsocket_closing(httplistensock) ||
+ !atomic_load(&httplistensock->listening))
+ {
+ return (ISC_R_CANCELED);
+ }
+
+ http_transpost_tcp_nodelay(handle);
+
+ new_session(httplistensock->mgr->mctx, NULL, &session);
+ session->max_concurrent_streams =
+ atomic_load(&httplistensock->h2.max_concurrent_streams);
+ initialize_nghttp2_server_session(session);
+ handle->sock->h2.session = session;
+
+ isc_nmhandle_attach(handle, &session->handle);
+ isc__nmsocket_attach(httplistensock, &session->serversocket);
+ server_send_connection_header(session);
+
+ /* TODO H2 */
+ http_do_bio(session, NULL, NULL, NULL);
+ return (ISC_R_SUCCESS);
+}
+
+isc_result_t
+isc_nm_listenhttp(isc_nm_t *mgr, isc_sockaddr_t *iface, int backlog,
+ isc_quota_t *quota, isc_tlsctx_t *ctx,
+ isc_nm_http_endpoints_t *eps, uint32_t max_concurrent_streams,
+ isc_nmsocket_t **sockp) {
+ isc_nmsocket_t *sock = NULL;
+ isc_result_t result;
+
+ REQUIRE(!ISC_LIST_EMPTY(eps->handlers));
+ REQUIRE(!ISC_LIST_EMPTY(eps->handler_cbargs));
+ REQUIRE(atomic_load(&eps->in_use) == false);
+
+ sock = isc_mem_get(mgr->mctx, sizeof(*sock));
+ isc__nmsocket_init(sock, mgr, isc_nm_httplistener, iface);
+ atomic_init(&sock->h2.max_concurrent_streams,
+ NGHTTP2_INITIAL_MAX_CONCURRENT_STREAMS);
+
+ isc_nmsocket_set_max_streams(sock, max_concurrent_streams);
+
+ atomic_store(&eps->in_use, true);
+ http_init_listener_endpoints(sock, eps);
+
+ if (ctx != NULL) {
+ result = isc_nm_listentls(mgr, iface, httplisten_acceptcb, sock,
+ sizeof(isc_nm_http_session_t),
+ backlog, quota, ctx, &sock->outer);
+ } else {
+ result = isc_nm_listentcp(mgr, iface, httplisten_acceptcb, sock,
+ sizeof(isc_nm_http_session_t),
+ backlog, quota, &sock->outer);
+ }
+
+ if (result != ISC_R_SUCCESS) {
+ atomic_store(&sock->closed, true);
+ isc__nmsocket_detach(&sock);
+ return (result);
+ }
+
+ isc__nmsocket_attach(sock, &sock->outer->h2.httpserver);
+
+ sock->nchildren = sock->outer->nchildren;
+ sock->result = ISC_R_UNSET;
+ sock->tid = 0;
+ sock->fd = (uv_os_sock_t)-1;
+
+ isc__nmsocket_barrier_init(sock);
+ atomic_init(&sock->rchildren, sock->nchildren);
+
+ atomic_store(&sock->listening, true);
+ *sockp = sock;
+ return (ISC_R_SUCCESS);
+}
+
+isc_nm_http_endpoints_t *
+isc_nm_http_endpoints_new(isc_mem_t *mctx) {
+ isc_nm_http_endpoints_t *restrict eps;
+ REQUIRE(mctx != NULL);
+
+ eps = isc_mem_get(mctx, sizeof(*eps));
+ *eps = (isc_nm_http_endpoints_t){ .mctx = NULL };
+
+ isc_mem_attach(mctx, &eps->mctx);
+ ISC_LIST_INIT(eps->handler_cbargs);
+ ISC_LIST_INIT(eps->handlers);
+ isc_refcount_init(&eps->references, 1);
+ atomic_init(&eps->in_use, false);
+ eps->magic = HTTP_ENDPOINTS_MAGIC;
+
+ return eps;
+}
+
+void
+isc_nm_http_endpoints_detach(isc_nm_http_endpoints_t **restrict epsp) {
+ isc_nm_http_endpoints_t *restrict eps;
+ isc_mem_t *mctx;
+ isc_nm_httphandler_t *handler = NULL;
+ isc_nm_httpcbarg_t *httpcbarg = NULL;
+
+ REQUIRE(epsp != NULL);
+ eps = *epsp;
+ REQUIRE(VALID_HTTP_ENDPOINTS(eps));
+
+ if (isc_refcount_decrement(&eps->references) > 1) {
+ *epsp = NULL;
+ return;
+ }
+
+ mctx = eps->mctx;
+
+ /* Delete all handlers */
+ handler = ISC_LIST_HEAD(eps->handlers);
+ while (handler != NULL) {
+ isc_nm_httphandler_t *next = NULL;
+
+ next = ISC_LIST_NEXT(handler, link);
+ ISC_LIST_DEQUEUE(eps->handlers, handler, link);
+ isc_mem_free(mctx, handler->path);
+ isc_mem_put(mctx, handler, sizeof(*handler));
+ handler = next;
+ }
+
+ httpcbarg = ISC_LIST_HEAD(eps->handler_cbargs);
+ while (httpcbarg != NULL) {
+ isc_nm_httpcbarg_t *next = NULL;
+
+ next = ISC_LIST_NEXT(httpcbarg, link);
+ ISC_LIST_DEQUEUE(eps->handler_cbargs, httpcbarg, link);
+ isc_mem_put(mctx, httpcbarg, sizeof(isc_nm_httpcbarg_t));
+ httpcbarg = next;
+ }
+
+ eps->magic = 0;
+
+ isc_mem_putanddetach(&mctx, eps, sizeof(*eps));
+ *epsp = NULL;
+}
+
+void
+isc_nm_http_endpoints_attach(isc_nm_http_endpoints_t *source,
+ isc_nm_http_endpoints_t **targetp) {
+ REQUIRE(VALID_HTTP_ENDPOINTS(source));
+ REQUIRE(targetp != NULL && *targetp == NULL);
+
+ isc_refcount_increment(&source->references);
+
+ *targetp = source;
+}
+
+static isc_nm_httphandler_t *
+http_endpoints_find(const char *request_path,
+ const isc_nm_http_endpoints_t *restrict eps) {
+ isc_nm_httphandler_t *handler = NULL;
+
+ REQUIRE(VALID_HTTP_ENDPOINTS(eps));
+
+ if (request_path == NULL || *request_path == '\0') {
+ return (NULL);
+ }
+
+ for (handler = ISC_LIST_HEAD(eps->handlers); handler != NULL;
+ handler = ISC_LIST_NEXT(handler, link))
+ {
+ if (!strcmp(request_path, handler->path)) {
+ break;
+ }
+ }
+
+ return (handler);
+}
+
+/*
+ * In DoH we just need to intercept the request - the response can be sent
+ * to the client code via the nmhandle directly as it's always just the
+ * http content.
+ */
+static void
+http_callback(isc_nmhandle_t *handle, isc_result_t result, isc_region_t *data,
+ void *arg) {
+ isc_nm_httpcbarg_t *httpcbarg = arg;
+
+ REQUIRE(VALID_NMHANDLE(handle));
+
+ if (result != ISC_R_SUCCESS) {
+ /* Shut down the client, then ourselves */
+ httpcbarg->cb(handle, result, NULL, httpcbarg->cbarg);
+ /* XXXWPK FREE */
+ return;
+ }
+ httpcbarg->cb(handle, result, data, httpcbarg->cbarg);
+}
+
+isc_result_t
+isc_nm_http_endpoints_add(isc_nm_http_endpoints_t *restrict eps,
+ const char *uri, const isc_nm_recv_cb_t cb,
+ void *cbarg, const size_t extrahandlesize) {
+ isc_mem_t *mctx;
+ isc_nm_httphandler_t *restrict handler = NULL;
+ isc_nm_httpcbarg_t *restrict httpcbarg = NULL;
+ bool newhandler = false;
+
+ REQUIRE(VALID_HTTP_ENDPOINTS(eps));
+ REQUIRE(isc_nm_http_path_isvalid(uri));
+ REQUIRE(atomic_load(&eps->in_use) == false);
+
+ mctx = eps->mctx;
+
+ httpcbarg = isc_mem_get(mctx, sizeof(isc_nm_httpcbarg_t));
+ *httpcbarg = (isc_nm_httpcbarg_t){ .cb = cb, .cbarg = cbarg };
+ ISC_LINK_INIT(httpcbarg, link);
+
+ if (http_endpoints_find(uri, eps) == NULL) {
+ handler = isc_mem_get(mctx, sizeof(*handler));
+ *handler = (isc_nm_httphandler_t){
+ .cb = http_callback,
+ .cbarg = httpcbarg,
+ .extrahandlesize = extrahandlesize,
+ .path = isc_mem_strdup(mctx, uri)
+ };
+ ISC_LINK_INIT(handler, link);
+
+ newhandler = true;
+ }
+
+ if (newhandler) {
+ ISC_LIST_APPEND(eps->handlers, handler, link);
+ }
+ ISC_LIST_APPEND(eps->handler_cbargs, httpcbarg, link);
+ return (ISC_R_SUCCESS);
+}
+
+void
+isc__nm_http_stoplistening(isc_nmsocket_t *sock) {
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->type == isc_nm_httplistener);
+
+ isc__nmsocket_stop(sock);
+}
+
+static void
+http_close_direct(isc_nmsocket_t *sock) {
+ isc_nm_http_session_t *session = NULL;
+
+ REQUIRE(VALID_NMSOCK(sock));
+
+ atomic_store(&sock->closed, true);
+ atomic_store(&sock->active, false);
+ session = sock->h2.session;
+
+ if (session != NULL && session->sending == 0 && !session->reading) {
+ /*
+ * The socket is going to be closed too early without been
+ * used even once (might happen in a case of low level
+ * error).
+ */
+ finish_http_session(session);
+ } else if (session != NULL && session->handle) {
+ http_do_bio(session, NULL, NULL, NULL);
+ }
+}
+
+void
+isc__nm_http_close(isc_nmsocket_t *sock) {
+ bool destroy = false;
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->type == isc_nm_httpsocket);
+ REQUIRE(!isc__nmsocket_active(sock));
+
+ if (!atomic_compare_exchange_strong(&sock->closing, &(bool){ false },
+ true))
+ {
+ return;
+ }
+
+ if (sock->h2.session != NULL && sock->h2.session->closed &&
+ sock->tid == isc_nm_tid())
+ {
+ isc__nm_httpsession_detach(&sock->h2.session);
+ destroy = true;
+ } else if (sock->h2.session == NULL && sock->tid == isc_nm_tid()) {
+ destroy = true;
+ }
+
+ if (destroy) {
+ http_close_direct(sock);
+ isc__nmsocket_prep_destroy(sock);
+ return;
+ }
+
+ isc__netievent_httpclose_t *ievent =
+ isc__nm_get_netievent_httpclose(sock->mgr, sock);
+
+ isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
+ (isc__netievent_t *)ievent);
+}
+
+void
+isc__nm_async_httpclose(isc__networker_t *worker, isc__netievent_t *ev0) {
+ isc__netievent_httpclose_t *ievent = (isc__netievent_httpclose_t *)ev0;
+ isc_nmsocket_t *sock = ievent->sock;
+
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->tid == isc_nm_tid());
+
+ UNUSED(worker);
+
+ http_close_direct(sock);
+}
+
+static void
+failed_httpstream_read_cb(isc_nmsocket_t *sock, isc_result_t result,
+ isc_nm_http_session_t *session) {
+ isc_region_t data;
+ REQUIRE(VALID_NMSOCK(sock));
+ INSIST(sock->type == isc_nm_httpsocket);
+
+ if (sock->h2.request_path == NULL) {
+ return;
+ }
+
+ INSIST(sock->h2.cbarg != NULL);
+
+ (void)nghttp2_submit_rst_stream(
+ session->ngsession, NGHTTP2_FLAG_END_STREAM, sock->h2.stream_id,
+ NGHTTP2_REFUSED_STREAM);
+ isc_buffer_usedregion(&sock->h2.rbuf, &data);
+ server_call_cb(sock, session, result, &data);
+}
+
+static void
+client_call_failed_read_cb(isc_result_t result,
+ isc_nm_http_session_t *session) {
+ http_cstream_t *cstream = NULL;
+
+ REQUIRE(VALID_HTTP2_SESSION(session));
+ REQUIRE(result != ISC_R_SUCCESS);
+
+ cstream = ISC_LIST_HEAD(session->cstreams);
+ while (cstream != NULL) {
+ http_cstream_t *next = ISC_LIST_NEXT(cstream, link);
+
+ /*
+ * read_cb could be NULL if cstream was allocated and added
+ * to the tracking list, but was not properly initialized due
+ * to a low-level error. It is safe to get rid of the object
+ * in such a case.
+ */
+ if (cstream->read_cb != NULL) {
+ isc_region_t read_data;
+ isc_buffer_usedregion(cstream->rbuf, &read_data);
+ cstream->read_cb(session->client_httphandle, result,
+ &read_data, cstream->read_cbarg);
+ }
+
+ if (result != ISC_R_TIMEDOUT || cstream->read_cb == NULL ||
+ !isc__nmsocket_timer_running(session->handle->sock))
+ {
+ ISC_LIST_DEQUEUE(session->cstreams, cstream, link);
+ put_http_cstream(session->mctx, cstream);
+ }
+
+ cstream = next;
+ }
+}
+
+static void
+server_call_failed_read_cb(isc_result_t result,
+ isc_nm_http_session_t *session) {
+ isc_nmsocket_h2_t *h2data = NULL; /* stream socket */
+
+ REQUIRE(VALID_HTTP2_SESSION(session));
+ REQUIRE(result != ISC_R_SUCCESS);
+
+ for (h2data = ISC_LIST_HEAD(session->sstreams); h2data != NULL;
+ h2data = ISC_LIST_NEXT(h2data, link))
+ {
+ failed_httpstream_read_cb(h2data->psock, result, session);
+ }
+
+ h2data = ISC_LIST_HEAD(session->sstreams);
+ while (h2data != NULL) {
+ isc_nmsocket_h2_t *next = ISC_LIST_NEXT(h2data, link);
+ ISC_LIST_DEQUEUE(session->sstreams, h2data, link);
+ /* Cleanup socket in place */
+ atomic_store(&h2data->psock->active, false);
+ atomic_store(&h2data->psock->closed, true);
+ isc__nmsocket_detach(&h2data->psock);
+
+ h2data = next;
+ }
+}
+
+static void
+failed_read_cb(isc_result_t result, isc_nm_http_session_t *session) {
+ if (session->client) {
+ client_call_failed_read_cb(result, session);
+ /*
+ * If result was ISC_R_TIMEDOUT and the timer was reset,
+ * then we still have active streams and should not close
+ * the session.
+ */
+ if (ISC_LIST_EMPTY(session->cstreams)) {
+ finish_http_session(session);
+ }
+ } else {
+ server_call_failed_read_cb(result, session);
+ /*
+ * All streams are now destroyed; close the session.
+ */
+ finish_http_session(session);
+ }
+}
+
+void
+isc__nm_http_set_maxage(isc_nmhandle_t *handle, const uint32_t ttl) {
+ isc_nm_http_session_t *session;
+ isc_nmsocket_t *sock;
+
+ REQUIRE(VALID_NMHANDLE(handle));
+ REQUIRE(VALID_NMSOCK(handle->sock));
+
+ sock = handle->sock;
+ session = sock->h2.session;
+
+ INSIST(VALID_HTTP2_SESSION(session));
+ INSIST(!session->client);
+
+ sock->h2.min_ttl = ttl;
+}
+
+bool
+isc__nm_http_has_encryption(const isc_nmhandle_t *handle) {
+ isc_nm_http_session_t *session;
+ isc_nmsocket_t *sock;
+
+ REQUIRE(VALID_NMHANDLE(handle));
+ REQUIRE(VALID_NMSOCK(handle->sock));
+
+ sock = handle->sock;
+ session = sock->h2.session;
+
+ INSIST(VALID_HTTP2_SESSION(session));
+
+ return (isc_nm_socket_type(session->handle) == isc_nm_tlssocket);
+}
+
+const char *
+isc__nm_http_verify_tls_peer_result_string(const isc_nmhandle_t *handle) {
+ isc_nmsocket_t *sock = NULL;
+ isc_nm_http_session_t *session;
+
+ REQUIRE(VALID_NMHANDLE(handle));
+ REQUIRE(VALID_NMSOCK(handle->sock));
+ REQUIRE(handle->sock->type == isc_nm_httpsocket);
+
+ sock = handle->sock;
+ session = sock->h2.session;
+
+ /*
+ * In the case of a low-level error the session->handle is not
+ * attached nor session object is created.
+ */
+ if (session == NULL && sock->h2.connect.tls_peer_verify_string != NULL)
+ {
+ return (sock->h2.connect.tls_peer_verify_string);
+ }
+
+ if (session == NULL) {
+ return (NULL);
+ }
+
+ INSIST(VALID_HTTP2_SESSION(session));
+
+ return (isc_nm_verify_tls_peer_result_string(session->handle));
+}
+
+void
+isc__nm_http_set_tlsctx(isc_nmsocket_t *listener, isc_tlsctx_t *tlsctx) {
+ REQUIRE(VALID_NMSOCK(listener));
+ REQUIRE(listener->type == isc_nm_httplistener);
+
+ isc_nmsocket_set_tlsctx(listener->outer, tlsctx);
+}
+
+void
+isc__nm_http_set_max_streams(isc_nmsocket_t *listener,
+ const uint32_t max_concurrent_streams) {
+ uint32_t max_streams = NGHTTP2_INITIAL_MAX_CONCURRENT_STREAMS;
+
+ REQUIRE(VALID_NMSOCK(listener));
+ REQUIRE(listener->type == isc_nm_httplistener);
+
+ if (max_concurrent_streams > 0 &&
+ max_concurrent_streams < NGHTTP2_INITIAL_MAX_CONCURRENT_STREAMS)
+ {
+ max_streams = max_concurrent_streams;
+ }
+
+ atomic_store(&listener->h2.max_concurrent_streams, max_streams);
+}
+
+void
+isc_nm_http_set_endpoints(isc_nmsocket_t *listener,
+ isc_nm_http_endpoints_t *eps) {
+ size_t nworkers;
+
+ REQUIRE(VALID_NMSOCK(listener));
+ REQUIRE(listener->type == isc_nm_httplistener);
+ REQUIRE(VALID_HTTP_ENDPOINTS(eps));
+
+ atomic_store(&eps->in_use, true);
+
+ nworkers = (size_t)listener->mgr->nworkers;
+ for (size_t i = 0; i < nworkers; i++) {
+ isc__netievent__http_eps_t *ievent =
+ isc__nm_get_netievent_httpendpoints(listener->mgr,
+ listener, eps);
+ isc__nm_enqueue_ievent(&listener->mgr->workers[i],
+ (isc__netievent_t *)ievent);
+ }
+}
+
+void
+isc__nm_async_httpendpoints(isc__networker_t *worker, isc__netievent_t *ev0) {
+ isc__netievent__http_eps_t *ievent = (isc__netievent__http_eps_t *)ev0;
+ const int tid = isc_nm_tid();
+ isc_nmsocket_t *listener = ievent->sock;
+ isc_nm_http_endpoints_t *eps = ievent->endpoints;
+ UNUSED(worker);
+
+ isc_nm_http_endpoints_detach(&listener->h2.listener_endpoints[tid]);
+ isc_nm_http_endpoints_attach(eps,
+ &listener->h2.listener_endpoints[tid]);
+}
+
+static void
+http_init_listener_endpoints(isc_nmsocket_t *listener,
+ isc_nm_http_endpoints_t *epset) {
+ size_t nworkers;
+
+ REQUIRE(VALID_NMSOCK(listener));
+ REQUIRE(VALID_NM(listener->mgr));
+ REQUIRE(VALID_HTTP_ENDPOINTS(epset));
+
+ nworkers = (size_t)listener->mgr->nworkers;
+ INSIST(nworkers > 0);
+
+ listener->h2.listener_endpoints =
+ isc_mem_get(listener->mgr->mctx,
+ sizeof(isc_nm_http_endpoints_t *) * nworkers);
+ listener->h2.n_listener_endpoints = nworkers;
+ for (size_t i = 0; i < nworkers; i++) {
+ listener->h2.listener_endpoints[i] = NULL;
+ isc_nm_http_endpoints_attach(
+ epset, &listener->h2.listener_endpoints[i]);
+ }
+}
+
+static void
+http_cleanup_listener_endpoints(isc_nmsocket_t *listener) {
+ REQUIRE(VALID_NM(listener->mgr));
+
+ if (listener->h2.listener_endpoints == NULL) {
+ return;
+ }
+
+ for (size_t i = 0; i < listener->h2.n_listener_endpoints; i++) {
+ isc_nm_http_endpoints_detach(
+ &listener->h2.listener_endpoints[i]);
+ }
+ isc_mem_put(listener->mgr->mctx, listener->h2.listener_endpoints,
+ sizeof(isc_nm_http_endpoints_t *) *
+ listener->h2.n_listener_endpoints);
+ listener->h2.n_listener_endpoints = 0;
+}
+
+static isc_nm_http_endpoints_t *
+http_get_listener_endpoints(isc_nmsocket_t *listener, const int tid) {
+ isc_nm_http_endpoints_t *eps;
+ REQUIRE(VALID_NMSOCK(listener));
+ REQUIRE(tid >= 0);
+ REQUIRE((size_t)tid < listener->h2.n_listener_endpoints);
+
+ eps = listener->h2.listener_endpoints[tid];
+ INSIST(eps != NULL);
+ return (eps);
+}
+
+static const bool base64url_validation_table[256] = {
+ false, false, false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false, false, false,
+ false, false, false, false, false, true, false, false, true, true,
+ true, true, true, true, true, true, true, true, false, false,
+ false, false, false, false, false, true, true, true, true, true,
+ true, true, true, true, true, true, true, true, true, true,
+ true, true, true, true, true, true, true, true, true, true,
+ true, false, false, false, false, true, false, true, true, true,
+ true, true, true, true, true, true, true, true, true, true,
+ true, true, true, true, true, true, true, true, true, true,
+ true, true, true, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false
+};
+
+char *
+isc__nm_base64url_to_base64(isc_mem_t *mem, const char *base64url,
+ const size_t base64url_len, size_t *res_len) {
+ char *res = NULL;
+ size_t i, k, len;
+
+ if (mem == NULL || base64url == NULL || base64url_len == 0) {
+ return (NULL);
+ }
+
+ len = base64url_len % 4 ? base64url_len + (4 - base64url_len % 4)
+ : base64url_len;
+ res = isc_mem_allocate(mem, len + 1); /* '\0' */
+
+ for (i = 0; i < base64url_len; i++) {
+ switch (base64url[i]) {
+ case '-':
+ res[i] = '+';
+ break;
+ case '_':
+ res[i] = '/';
+ break;
+ default:
+ if (base64url_validation_table[(size_t)base64url[i]]) {
+ res[i] = base64url[i];
+ } else {
+ isc_mem_free(mem, res);
+ return (NULL);
+ }
+ break;
+ }
+ }
+
+ if (base64url_len % 4 != 0) {
+ for (k = 0; k < (4 - base64url_len % 4); k++, i++) {
+ res[i] = '=';
+ }
+ }
+
+ INSIST(i == len);
+
+ if (res_len != NULL) {
+ *res_len = len;
+ }
+
+ res[len] = '\0';
+
+ return (res);
+}
+
+char *
+isc__nm_base64_to_base64url(isc_mem_t *mem, const char *base64,
+ const size_t base64_len, size_t *res_len) {
+ char *res = NULL;
+ size_t i;
+
+ if (mem == NULL || base64 == NULL || base64_len == 0) {
+ return (NULL);
+ }
+
+ res = isc_mem_allocate(mem, base64_len + 1); /* '\0' */
+
+ for (i = 0; i < base64_len; i++) {
+ switch (base64[i]) {
+ case '+':
+ res[i] = '-';
+ break;
+ case '/':
+ res[i] = '_';
+ break;
+ case '=':
+ goto end;
+ break;
+ default:
+ /*
+ * All other characters from the alphabet are the same
+ * for both base64 and base64url, so we can reuse the
+ * validation table for the rest of the characters.
+ */
+ if (base64[i] != '-' && base64[i] != '_' &&
+ base64url_validation_table[(size_t)base64[i]])
+ {
+ res[i] = base64[i];
+ } else {
+ isc_mem_free(mem, res);
+ return (NULL);
+ }
+ break;
+ }
+ }
+end:
+ if (res_len) {
+ *res_len = i;
+ }
+
+ res[i] = '\0';
+
+ return (res);
+}
+
+void
+isc__nm_http_initsocket(isc_nmsocket_t *sock) {
+ REQUIRE(sock != NULL);
+
+ sock->h2 = (isc_nmsocket_h2_t){
+ .request_type = ISC_HTTP_REQ_UNSUPPORTED,
+ .request_scheme = ISC_HTTP_SCHEME_UNSUPPORTED,
+ };
+}
+
+void
+isc__nm_http_cleanup_data(isc_nmsocket_t *sock) {
+ if ((sock->type == isc_nm_tcplistener ||
+ sock->type == isc_nm_tlslistener) &&
+ sock->h2.httpserver != NULL)
+ {
+ isc__nmsocket_detach(&sock->h2.httpserver);
+ }
+
+ if (sock->type == isc_nm_httplistener ||
+ sock->type == isc_nm_httpsocket)
+ {
+ if (sock->type == isc_nm_httplistener &&
+ sock->h2.listener_endpoints != NULL)
+ {
+ /* Delete all handlers */
+ http_cleanup_listener_endpoints(sock);
+ }
+
+ if (sock->h2.request_path != NULL) {
+ isc_mem_free(sock->mgr->mctx, sock->h2.request_path);
+ sock->h2.request_path = NULL;
+ }
+
+ if (sock->h2.query_data != NULL) {
+ isc_mem_free(sock->mgr->mctx, sock->h2.query_data);
+ sock->h2.query_data = NULL;
+ }
+
+ INSIST(sock->h2.connect.cstream == NULL);
+
+ if (isc_buffer_base(&sock->h2.rbuf) != NULL) {
+ void *base = isc_buffer_base(&sock->h2.rbuf);
+ isc_mem_free(sock->mgr->mctx, base);
+ isc_buffer_initnull(&sock->h2.rbuf);
+ }
+ }
+
+ if ((sock->type == isc_nm_httplistener ||
+ sock->type == isc_nm_httpsocket ||
+ sock->type == isc_nm_tcpsocket ||
+ sock->type == isc_nm_tlssocket) &&
+ sock->h2.session != NULL)
+ {
+ if (sock->h2.connect.uri != NULL) {
+ isc_mem_free(sock->mgr->mctx, sock->h2.connect.uri);
+ sock->h2.connect.uri = NULL;
+ }
+ isc__nm_httpsession_detach(&sock->h2.session);
+ }
+}
+
+void
+isc__nm_http_cleartimeout(isc_nmhandle_t *handle) {
+ isc_nmsocket_t *sock = NULL;
+
+ REQUIRE(VALID_NMHANDLE(handle));
+ REQUIRE(VALID_NMSOCK(handle->sock));
+ REQUIRE(handle->sock->type == isc_nm_httpsocket);
+
+ sock = handle->sock;
+ if (sock->h2.session != NULL && sock->h2.session->handle != NULL) {
+ INSIST(VALID_HTTP2_SESSION(sock->h2.session));
+ INSIST(VALID_NMHANDLE(sock->h2.session->handle));
+ isc_nmhandle_cleartimeout(sock->h2.session->handle);
+ }
+}
+
+void
+isc__nm_http_settimeout(isc_nmhandle_t *handle, uint32_t timeout) {
+ isc_nmsocket_t *sock = NULL;
+
+ REQUIRE(VALID_NMHANDLE(handle));
+ REQUIRE(VALID_NMSOCK(handle->sock));
+ REQUIRE(handle->sock->type == isc_nm_httpsocket);
+
+ sock = handle->sock;
+ if (sock->h2.session != NULL && sock->h2.session->handle != NULL) {
+ INSIST(VALID_HTTP2_SESSION(sock->h2.session));
+ INSIST(VALID_NMHANDLE(sock->h2.session->handle));
+ isc_nmhandle_settimeout(sock->h2.session->handle, timeout);
+ }
+}
+
+void
+isc__nmhandle_http_keepalive(isc_nmhandle_t *handle, bool value) {
+ isc_nmsocket_t *sock = NULL;
+
+ REQUIRE(VALID_NMHANDLE(handle));
+ REQUIRE(VALID_NMSOCK(handle->sock));
+ REQUIRE(handle->sock->type == isc_nm_httpsocket);
+
+ sock = handle->sock;
+ if (sock->h2.session != NULL && sock->h2.session->handle) {
+ INSIST(VALID_HTTP2_SESSION(sock->h2.session));
+ INSIST(VALID_NMHANDLE(sock->h2.session->handle));
+
+ isc_nmhandle_keepalive(sock->h2.session->handle, value);
+ }
+}
+
+void
+isc_nm_http_makeuri(const bool https, const isc_sockaddr_t *sa,
+ const char *hostname, const uint16_t http_port,
+ const char *abs_path, char *outbuf,
+ const size_t outbuf_len) {
+ char saddr[INET6_ADDRSTRLEN] = { 0 };
+ int family;
+ bool ipv6_addr = false;
+ struct sockaddr_in6 sa6;
+ uint16_t host_port = http_port;
+ const char *host = NULL;
+
+ REQUIRE(outbuf != NULL);
+ REQUIRE(outbuf_len != 0);
+ REQUIRE(isc_nm_http_path_isvalid(abs_path));
+
+ /* If hostname is specified, use that. */
+ if (hostname != NULL && hostname[0] != '\0') {
+ /*
+ * The host name could be an IPv6 address. If so,
+ * wrap it between [ and ].
+ */
+ if (inet_pton(AF_INET6, hostname, &sa6) == 1 &&
+ hostname[0] != '[')
+ {
+ ipv6_addr = true;
+ }
+ host = hostname;
+ } else {
+ /*
+ * A hostname was not specified; build one from
+ * the given IP address.
+ */
+ INSIST(sa != NULL);
+ family = ((const struct sockaddr *)&sa->type.sa)->sa_family;
+ host_port = ntohs(family == AF_INET ? sa->type.sin.sin_port
+ : sa->type.sin6.sin6_port);
+ ipv6_addr = family == AF_INET6;
+ (void)inet_ntop(
+ family,
+ family == AF_INET
+ ? (const struct sockaddr *)&sa->type.sin.sin_addr
+ : (const struct sockaddr *)&sa->type.sin6
+ .sin6_addr,
+ saddr, sizeof(saddr));
+ host = saddr;
+ }
+
+ /*
+ * If the port number was not specified, the default
+ * depends on whether we're using encryption or not.
+ */
+ if (host_port == 0) {
+ host_port = https ? 443 : 80;
+ }
+
+ (void)snprintf(outbuf, outbuf_len, "%s://%s%s%s:%u%s",
+ https ? "https" : "http", ipv6_addr ? "[" : "", host,
+ ipv6_addr ? "]" : "", host_port, abs_path);
+}
+
+/*
+ * DoH GET Query String Scanner-less Recursive Descent Parser/Verifier
+ *
+ * It is based on the following grammar (using WSN/EBNF):
+ *
+ * S = query-string.
+ * query-string = ['?'] { key-value-pair } EOF.
+ * key-value-pair = key '=' value [ '&' ].
+ * key = ('_' | alpha) { '_' | alnum}.
+ * value = value-char {value-char}.
+ * value-char = unreserved-char | percent-charcode.
+ * unreserved-char = alnum |'_' | '.' | '-' | '~'. (* RFC3986, Section 2.3 *)
+ * percent-charcode = '%' hexdigit hexdigit.
+ * ...
+ *
+ * Should be good enough.
+ */
+typedef struct isc_httpparser_state {
+ const char *str;
+
+ const char *last_key;
+ size_t last_key_len;
+
+ const char *last_value;
+ size_t last_value_len;
+
+ bool query_found;
+ const char *query;
+ size_t query_len;
+} isc_httpparser_state_t;
+
+#define MATCH(ch) (st->str[0] == (ch))
+#define MATCH_ALPHA() isalpha((unsigned char)(st->str[0]))
+#define MATCH_DIGIT() isdigit((unsigned char)(st->str[0]))
+#define MATCH_ALNUM() isalnum((unsigned char)(st->str[0]))
+#define MATCH_XDIGIT() isxdigit((unsigned char)(st->str[0]))
+#define ADVANCE() st->str++
+#define GETP() (st->str)
+
+static bool
+rule_query_string(isc_httpparser_state_t *st);
+
+bool
+isc__nm_parse_httpquery(const char *query_string, const char **start,
+ size_t *len) {
+ isc_httpparser_state_t state;
+
+ REQUIRE(start != NULL);
+ REQUIRE(len != NULL);
+
+ if (query_string == NULL || query_string[0] == '\0') {
+ return (false);
+ }
+
+ state = (isc_httpparser_state_t){ .str = query_string };
+ if (!rule_query_string(&state)) {
+ return (false);
+ }
+
+ if (!state.query_found) {
+ return (false);
+ }
+
+ *start = state.query;
+ *len = state.query_len;
+
+ return (true);
+}
+
+static bool
+rule_key_value_pair(isc_httpparser_state_t *st);
+
+static bool
+rule_key(isc_httpparser_state_t *st);
+
+static bool
+rule_value(isc_httpparser_state_t *st);
+
+static bool
+rule_value_char(isc_httpparser_state_t *st);
+
+static bool
+rule_percent_charcode(isc_httpparser_state_t *st);
+
+static bool
+rule_unreserved_char(isc_httpparser_state_t *st);
+
+static bool
+rule_query_string(isc_httpparser_state_t *st) {
+ if (MATCH('?')) {
+ ADVANCE();
+ }
+
+ while (rule_key_value_pair(st)) {
+ /* skip */;
+ }
+
+ if (!MATCH('\0')) {
+ return (false);
+ }
+
+ ADVANCE();
+ return (true);
+}
+
+static bool
+rule_key_value_pair(isc_httpparser_state_t *st) {
+ if (!rule_key(st)) {
+ return (false);
+ }
+
+ if (MATCH('=')) {
+ ADVANCE();
+ } else {
+ return (false);
+ }
+
+ if (rule_value(st)) {
+ const char dns[] = "dns";
+ if (st->last_key_len == sizeof(dns) - 1 &&
+ memcmp(st->last_key, dns, sizeof(dns) - 1) == 0)
+ {
+ st->query_found = true;
+ st->query = st->last_value;
+ st->query_len = st->last_value_len;
+ }
+ } else {
+ return (false);
+ }
+
+ if (MATCH('&')) {
+ ADVANCE();
+ }
+
+ return (true);
+}
+
+static bool
+rule_key(isc_httpparser_state_t *st) {
+ if (MATCH('_') || MATCH_ALPHA()) {
+ st->last_key = GETP();
+ ADVANCE();
+ } else {
+ return (false);
+ }
+
+ while (MATCH('_') || MATCH_ALNUM()) {
+ ADVANCE();
+ }
+
+ st->last_key_len = GETP() - st->last_key;
+ return (true);
+}
+
+static bool
+rule_value(isc_httpparser_state_t *st) {
+ const char *s = GETP();
+ if (!rule_value_char(st)) {
+ return (false);
+ }
+
+ st->last_value = s;
+ while (rule_value_char(st)) {
+ /* skip */;
+ }
+ st->last_value_len = GETP() - st->last_value;
+ return (true);
+}
+
+static bool
+rule_value_char(isc_httpparser_state_t *st) {
+ if (rule_unreserved_char(st)) {
+ return (true);
+ }
+
+ return (rule_percent_charcode(st));
+}
+
+static bool
+rule_unreserved_char(isc_httpparser_state_t *st) {
+ if (MATCH_ALNUM() || MATCH('_') || MATCH('.') || MATCH('-') ||
+ MATCH('~'))
+ {
+ ADVANCE();
+ return (true);
+ }
+ return (false);
+}
+
+static bool
+rule_percent_charcode(isc_httpparser_state_t *st) {
+ if (MATCH('%')) {
+ ADVANCE();
+ } else {
+ return (false);
+ }
+
+ if (!MATCH_XDIGIT()) {
+ return (false);
+ }
+ ADVANCE();
+
+ if (!MATCH_XDIGIT()) {
+ return (false);
+ }
+ ADVANCE();
+
+ return (true);
+}
+
+/*
+ * DoH URL Location Verifier. Based on the following grammar (EBNF/WSN
+ * notation):
+ *
+ * S = path_absolute.
+ * path_absolute = '/' [ segments ] '\0'.
+ * segments = segment_nz { slash_segment }.
+ * slash_segment = '/' segment.
+ * segment = { pchar }.
+ * segment_nz = pchar { pchar }.
+ * pchar = unreserved | pct_encoded | sub_delims | ':' | '@'.
+ * unreserved = ALPHA | DIGIT | '-' | '.' | '_' | '~'.
+ * pct_encoded = '%' XDIGIT XDIGIT.
+ * sub_delims = '!' | '$' | '&' | '\'' | '(' | ')' | '*' | '+' |
+ * ',' | ';' | '='.
+ *
+ * The grammar is extracted from RFC 3986. It is slightly modified to
+ * aid in parser creation, but the end result is the same
+ * (path_absolute is defined slightly differently - split into
+ * multiple productions).
+ *
+ * https://datatracker.ietf.org/doc/html/rfc3986#appendix-A
+ */
+
+typedef struct isc_http_location_parser_state {
+ const char *str;
+} isc_http_location_parser_state_t;
+
+static bool
+rule_loc_path_absolute(isc_http_location_parser_state_t *);
+
+static bool
+rule_loc_segments(isc_http_location_parser_state_t *);
+
+static bool
+rule_loc_slash_segment(isc_http_location_parser_state_t *);
+
+static bool
+rule_loc_segment(isc_http_location_parser_state_t *);
+
+static bool
+rule_loc_segment_nz(isc_http_location_parser_state_t *);
+
+static bool
+rule_loc_pchar(isc_http_location_parser_state_t *);
+
+static bool
+rule_loc_unreserved(isc_http_location_parser_state_t *);
+
+static bool
+rule_loc_pct_encoded(isc_http_location_parser_state_t *);
+
+static bool
+rule_loc_sub_delims(isc_http_location_parser_state_t *);
+
+static bool
+rule_loc_path_absolute(isc_http_location_parser_state_t *st) {
+ if (MATCH('/')) {
+ ADVANCE();
+ } else {
+ return (false);
+ }
+
+ (void)rule_loc_segments(st);
+
+ if (MATCH('\0')) {
+ ADVANCE();
+ } else {
+ return (false);
+ }
+
+ return (true);
+}
+
+static bool
+rule_loc_segments(isc_http_location_parser_state_t *st) {
+ if (!rule_loc_segment_nz(st)) {
+ return (false);
+ }
+
+ while (rule_loc_slash_segment(st)) {
+ /* zero or more */;
+ }
+
+ return (true);
+}
+
+static bool
+rule_loc_slash_segment(isc_http_location_parser_state_t *st) {
+ if (MATCH('/')) {
+ ADVANCE();
+ } else {
+ return (false);
+ }
+
+ return (rule_loc_segment(st));
+}
+
+static bool
+rule_loc_segment(isc_http_location_parser_state_t *st) {
+ while (rule_loc_pchar(st)) {
+ /* zero or more */;
+ }
+
+ return (true);
+}
+
+static bool
+rule_loc_segment_nz(isc_http_location_parser_state_t *st) {
+ if (!rule_loc_pchar(st)) {
+ return (false);
+ }
+
+ while (rule_loc_pchar(st)) {
+ /* zero or more */;
+ }
+
+ return (true);
+}
+
+static bool
+rule_loc_pchar(isc_http_location_parser_state_t *st) {
+ if (rule_loc_unreserved(st)) {
+ return (true);
+ } else if (rule_loc_pct_encoded(st)) {
+ return (true);
+ } else if (rule_loc_sub_delims(st)) {
+ return (true);
+ } else if (MATCH(':') || MATCH('@')) {
+ ADVANCE();
+ return (true);
+ }
+
+ return (false);
+}
+
+static bool
+rule_loc_unreserved(isc_http_location_parser_state_t *st) {
+ if (MATCH_ALPHA() | MATCH_DIGIT() | MATCH('-') | MATCH('.') |
+ MATCH('_') | MATCH('~'))
+ {
+ ADVANCE();
+ return (true);
+ }
+ return (false);
+}
+
+static bool
+rule_loc_pct_encoded(isc_http_location_parser_state_t *st) {
+ if (!MATCH('%')) {
+ return (false);
+ }
+ ADVANCE();
+
+ if (!MATCH_XDIGIT()) {
+ return (false);
+ }
+ ADVANCE();
+
+ if (!MATCH_XDIGIT()) {
+ return (false);
+ }
+ ADVANCE();
+
+ return (true);
+}
+
+static bool
+rule_loc_sub_delims(isc_http_location_parser_state_t *st) {
+ if (MATCH('!') | MATCH('$') | MATCH('&') | MATCH('\'') | MATCH('(') |
+ MATCH(')') | MATCH('*') | MATCH('+') | MATCH(',') | MATCH(';') |
+ MATCH('='))
+ {
+ ADVANCE();
+ return (true);
+ }
+
+ return (false);
+}
+
+bool
+isc_nm_http_path_isvalid(const char *path) {
+ isc_http_location_parser_state_t state = { 0 };
+
+ REQUIRE(path != NULL);
+
+ state.str = path;
+
+ return (rule_loc_path_absolute(&state));
+}
diff --git a/lib/isc/netmgr/netmgr-int.h b/lib/isc/netmgr/netmgr-int.h
new file mode 100644
index 0000000..364a933
--- /dev/null
+++ b/lib/isc/netmgr/netmgr-int.h
@@ -0,0 +1,2273 @@
+/*
+ * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
+ *
+ * SPDX-License-Identifier: MPL-2.0
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, you can obtain one at https://mozilla.org/MPL/2.0/.
+ *
+ * See the COPYRIGHT file distributed with this work for additional
+ * information regarding copyright ownership.
+ */
+
+#pragma once
+
+#include <unistd.h>
+#include <uv.h>
+
+#include <openssl/err.h>
+#include <openssl/ssl.h>
+
+#include <isc/astack.h>
+#include <isc/atomic.h>
+#include <isc/barrier.h>
+#include <isc/buffer.h>
+#include <isc/condition.h>
+#include <isc/magic.h>
+#include <isc/mem.h>
+#include <isc/netmgr.h>
+#include <isc/quota.h>
+#include <isc/random.h>
+#include <isc/refcount.h>
+#include <isc/region.h>
+#include <isc/result.h>
+#include <isc/sockaddr.h>
+#include <isc/stats.h>
+#include <isc/thread.h>
+#include <isc/tls.h>
+#include <isc/util.h>
+
+#include "uv-compat.h"
+
+#define ISC_NETMGR_TID_UNKNOWN -1
+
+/* Must be different from ISC_NETMGR_TID_UNKNOWN */
+#define ISC_NETMGR_NON_INTERLOCKED -2
+
+/*
+ * Receive buffers
+ */
+#if HAVE_DECL_UV_UDP_MMSG_CHUNK
+/*
+ * The value 20 here is UV__MMSG_MAXWIDTH taken from the current libuv source,
+ * libuv will not receive more that 20 datagrams in a single recvmmsg call.
+ */
+#define ISC_NETMGR_UDP_RECVBUF_SIZE (20 * UINT16_MAX)
+#else
+/*
+ * A single DNS message size
+ */
+#define ISC_NETMGR_UDP_RECVBUF_SIZE UINT16_MAX
+#endif
+
+/*
+ * The TCP receive buffer can fit one maximum sized DNS message plus its size,
+ * the receive buffer here affects TCP, DoT and DoH.
+ */
+#define ISC_NETMGR_TCP_RECVBUF_SIZE (sizeof(uint16_t) + UINT16_MAX)
+
+/* Pick the larger buffer */
+#define ISC_NETMGR_RECVBUF_SIZE \
+ (ISC_NETMGR_UDP_RECVBUF_SIZE >= ISC_NETMGR_TCP_RECVBUF_SIZE \
+ ? ISC_NETMGR_UDP_RECVBUF_SIZE \
+ : ISC_NETMGR_TCP_RECVBUF_SIZE)
+
+/*
+ * Send buffer
+ */
+#define ISC_NETMGR_SENDBUF_SIZE (sizeof(uint16_t) + UINT16_MAX)
+
+/*
+ * Make sure our RECVBUF size is large enough
+ */
+
+STATIC_ASSERT(ISC_NETMGR_UDP_RECVBUF_SIZE <= ISC_NETMGR_RECVBUF_SIZE,
+ "UDP receive buffer size must be smaller or equal than worker "
+ "receive buffer size");
+
+STATIC_ASSERT(ISC_NETMGR_TCP_RECVBUF_SIZE <= ISC_NETMGR_RECVBUF_SIZE,
+ "TCP receive buffer size must be smaller or equal than worker "
+ "receive buffer size");
+
+/*%
+ * Regular TCP buffer size.
+ */
+#define NM_REG_BUF 4096
+
+/*%
+ * Larger buffer for when the regular one isn't enough; this will
+ * hold two full DNS packets with lengths. netmgr receives 64k at
+ * most in TCPDNS or TLSDNS connections, so there's no risk of overrun
+ * when using a buffer this size.
+ */
+#define NM_BIG_BUF ISC_NETMGR_TCP_RECVBUF_SIZE * 2
+
+/*%
+ * Maximum segment size (MSS) of TCP socket on which the server responds to
+ * queries. Value lower than common MSS on Ethernet (1220, that is 1280 (IPv6
+ * minimum link MTU) - 40 (IPv6 fixed header) - 20 (TCP fixed header)) will
+ * address path MTU problem.
+ */
+#define NM_MAXSEG (1280 - 20 - 40)
+
+/*
+ * Define NETMGR_TRACE to activate tracing of handles and sockets.
+ * This will impair performance but enables us to quickly determine,
+ * if netmgr resources haven't been cleaned up on shutdown, which ones
+ * are still in use.
+ */
+#ifdef NETMGR_TRACE
+#define TRACE_SIZE 8
+
+void
+isc__nm_dump_active(isc_nm_t *nm);
+
+#if defined(__linux__)
+#include <syscall.h>
+#define gettid() (uint32_t) syscall(SYS_gettid)
+#else
+#define gettid() (uint32_t) pthread_self()
+#endif
+
+#ifdef NETMGR_TRACE_VERBOSE
+#define NETMGR_TRACE_LOG(format, ...) \
+ fprintf(stderr, "%" PRIu32 ":%d:%s:%u:%s:" format, gettid(), \
+ isc_nm_tid(), file, line, func, __VA_ARGS__)
+#else
+#define NETMGR_TRACE_LOG(format, ...) \
+ (void)file; \
+ (void)line; \
+ (void)func;
+#endif
+
+#define FLARG_PASS , file, line, func
+#define FLARG \
+ , const char *file __attribute__((unused)), \
+ unsigned int line __attribute__((unused)), \
+ const char *func __attribute__((unused))
+#define FLARG_IEVENT(ievent) \
+ const char *file = ievent->file; \
+ unsigned int line = ievent->line; \
+ const char *func = ievent->func;
+#define FLARG_IEVENT_PASS(ievent) \
+ ievent->file = file; \
+ ievent->line = line; \
+ ievent->func = func;
+#define isc__nm_uvreq_get(req, sock) \
+ isc___nm_uvreq_get(req, sock, __FILE__, __LINE__, __func__)
+#define isc__nm_uvreq_put(req, sock) \
+ isc___nm_uvreq_put(req, sock, __FILE__, __LINE__, __func__)
+#define isc__nmsocket_init(sock, mgr, type, iface) \
+ isc___nmsocket_init(sock, mgr, type, iface, __FILE__, __LINE__, \
+ __func__)
+#define isc__nmsocket_put(sockp) \
+ isc___nmsocket_put(sockp, __FILE__, __LINE__, __func__)
+#define isc__nmsocket_attach(sock, target) \
+ isc___nmsocket_attach(sock, target, __FILE__, __LINE__, __func__)
+#define isc__nmsocket_detach(socketp) \
+ isc___nmsocket_detach(socketp, __FILE__, __LINE__, __func__)
+#define isc__nmsocket_close(socketp) \
+ isc___nmsocket_close(socketp, __FILE__, __LINE__, __func__)
+#define isc__nmhandle_get(sock, peer, local) \
+ isc___nmhandle_get(sock, peer, local, __FILE__, __LINE__, __func__)
+#define isc__nmsocket_prep_destroy(sock) \
+ isc___nmsocket_prep_destroy(sock, __FILE__, __LINE__, __func__)
+#else
+#define NETMGR_TRACE_LOG(format, ...)
+
+#define FLARG_PASS
+#define FLARG
+#define FLARG_IEVENT(ievent)
+#define FLARG_IEVENT_PASS(ievent)
+#define isc__nm_uvreq_get(req, sock) isc___nm_uvreq_get(req, sock)
+#define isc__nm_uvreq_put(req, sock) isc___nm_uvreq_put(req, sock)
+#define isc__nmsocket_init(sock, mgr, type, iface) \
+ isc___nmsocket_init(sock, mgr, type, iface)
+#define isc__nmsocket_put(sockp) isc___nmsocket_put(sockp)
+#define isc__nmsocket_attach(sock, target) isc___nmsocket_attach(sock, target)
+#define isc__nmsocket_detach(socketp) isc___nmsocket_detach(socketp)
+#define isc__nmsocket_close(socketp) isc___nmsocket_close(socketp)
+#define isc__nmhandle_get(sock, peer, local) \
+ isc___nmhandle_get(sock, peer, local)
+#define isc__nmsocket_prep_destroy(sock) isc___nmsocket_prep_destroy(sock)
+#endif
+
+/*
+ * Queue types in the order of processing priority.
+ */
+typedef enum {
+ NETIEVENT_PRIORITY = 0,
+ NETIEVENT_PRIVILEGED = 1,
+ NETIEVENT_TASK = 2,
+ NETIEVENT_NORMAL = 3,
+ NETIEVENT_MAX = 4,
+} netievent_type_t;
+
+typedef struct isc__nm_uvreq isc__nm_uvreq_t;
+typedef struct isc__netievent isc__netievent_t;
+
+typedef ISC_LIST(isc__netievent_t) isc__netievent_list_t;
+
+typedef struct ievent {
+ isc_mutex_t lock;
+ isc_condition_t cond;
+ isc__netievent_list_t list;
+} ievent_t;
+
+/*
+ * Single network event loop worker.
+ */
+typedef struct isc__networker {
+ isc_nm_t *mgr;
+ int id; /* thread id */
+ uv_loop_t loop; /* libuv loop structure */
+ uv_async_t async; /* async channel to send
+ * data to this networker */
+ bool paused;
+ bool finished;
+ isc_thread_t thread;
+ ievent_t ievents[NETIEVENT_MAX];
+
+ isc_refcount_t references;
+ atomic_int_fast64_t pktcount;
+ char *recvbuf;
+ char *sendbuf;
+ bool recvbuf_inuse;
+} isc__networker_t;
+
+/*
+ * A general handle for a connection bound to a networker. For UDP
+ * connections we have peer address here, so both TCP and UDP can be
+ * handled with a simple send-like function
+ */
+#define NMHANDLE_MAGIC ISC_MAGIC('N', 'M', 'H', 'D')
+#define VALID_NMHANDLE(t) \
+ (ISC_MAGIC_VALID(t, NMHANDLE_MAGIC) && \
+ atomic_load(&(t)->references) > 0)
+
+typedef void (*isc__nm_closecb)(isc_nmhandle_t *);
+typedef struct isc_nm_http_session isc_nm_http_session_t;
+
+struct isc_nmhandle {
+ int magic;
+ isc_refcount_t references;
+
+ /*
+ * The socket is not 'attached' in the traditional
+ * reference-counting sense. Instead, we keep all handles in an
+ * array in the socket object. This way, we don't have circular
+ * dependencies and we can close all handles when we're destroying
+ * the socket.
+ */
+ isc_nmsocket_t *sock;
+
+ isc_nm_http_session_t *httpsession;
+
+ isc_sockaddr_t peer;
+ isc_sockaddr_t local;
+ isc_nm_opaquecb_t doreset; /* reset extra callback, external */
+ isc_nm_opaquecb_t dofree; /* free extra callback, external */
+#ifdef NETMGR_TRACE
+ void *backtrace[TRACE_SIZE];
+ int backtrace_size;
+ LINK(isc_nmhandle_t) active_link;
+#endif
+ void *opaque;
+ char extra[];
+};
+
+typedef enum isc__netievent_type {
+ netievent_udpconnect,
+ netievent_udpclose,
+ netievent_udpsend,
+ netievent_udpread,
+ netievent_udpcancel,
+
+ netievent_routeconnect,
+
+ netievent_tcpconnect,
+ netievent_tcpclose,
+ netievent_tcpsend,
+ netievent_tcpstartread,
+ netievent_tcppauseread,
+ netievent_tcpaccept,
+ netievent_tcpcancel,
+
+ netievent_tcpdnsaccept,
+ netievent_tcpdnsconnect,
+ netievent_tcpdnsclose,
+ netievent_tcpdnssend,
+ netievent_tcpdnsread,
+ netievent_tcpdnscancel,
+
+ netievent_tlsclose,
+ netievent_tlssend,
+ netievent_tlsstartread,
+ netievent_tlsconnect,
+ netievent_tlsdobio,
+ netievent_tlscancel,
+
+ netievent_tlsdnsaccept,
+ netievent_tlsdnsconnect,
+ netievent_tlsdnsclose,
+ netievent_tlsdnssend,
+ netievent_tlsdnsread,
+ netievent_tlsdnscancel,
+ netievent_tlsdnscycle,
+ netievent_tlsdnsshutdown,
+
+ netievent_httpclose,
+ netievent_httpsend,
+ netievent_httpendpoints,
+
+ netievent_shutdown,
+ netievent_stop,
+ netievent_pause,
+
+ netievent_connectcb,
+ netievent_readcb,
+ netievent_sendcb,
+
+ netievent_detach,
+ netievent_close,
+
+ netievent_task,
+ netievent_privilegedtask,
+
+ netievent_settlsctx,
+
+ /*
+ * event type values higher than this will be treated
+ * as high-priority events, which can be processed
+ * while the netmgr is pausing or paused.
+ */
+ netievent_prio = 0xff,
+
+ netievent_udplisten,
+ netievent_udpstop,
+ netievent_tcplisten,
+ netievent_tcpstop,
+ netievent_tcpdnslisten,
+ netievent_tcpdnsstop,
+ netievent_tlsdnslisten,
+ netievent_tlsdnsstop,
+ netievent_sockstop, /* for multilayer sockets */
+
+ netievent_resume,
+} isc__netievent_type;
+
+typedef union {
+ isc_nm_recv_cb_t recv;
+ isc_nm_cb_t send;
+ isc_nm_cb_t connect;
+ isc_nm_accept_cb_t accept;
+} isc__nm_cb_t;
+
+/*
+ * Wrapper around uv_req_t with 'our' fields in it. req->data should
+ * always point to its parent. Note that we always allocate more than
+ * sizeof(struct) because we make room for different req types;
+ */
+#define UVREQ_MAGIC ISC_MAGIC('N', 'M', 'U', 'R')
+#define VALID_UVREQ(t) ISC_MAGIC_VALID(t, UVREQ_MAGIC)
+
+typedef struct isc__nm_uvreq isc__nm_uvreq_t;
+struct isc__nm_uvreq {
+ int magic;
+ isc_nmsocket_t *sock;
+ isc_nmhandle_t *handle;
+ char tcplen[2]; /* The TCP DNS message length */
+ uv_buf_t uvbuf; /* translated isc_region_t, to be
+ * sent or received */
+ isc_sockaddr_t local; /* local address */
+ isc_sockaddr_t peer; /* peer address */
+ isc__nm_cb_t cb; /* callback */
+ void *cbarg; /* callback argument */
+ isc_nm_timer_t *timer; /* TCP write timer */
+ int connect_tries; /* connect retries */
+
+ union {
+ uv_handle_t handle;
+ uv_req_t req;
+ uv_getaddrinfo_t getaddrinfo;
+ uv_getnameinfo_t getnameinfo;
+ uv_shutdown_t shutdown;
+ uv_write_t write;
+ uv_connect_t connect;
+ uv_udp_send_t udp_send;
+ uv_fs_t fs;
+ uv_work_t work;
+ } uv_req;
+ ISC_LINK(isc__nm_uvreq_t) link;
+};
+
+void *
+isc__nm_get_netievent(isc_nm_t *mgr, isc__netievent_type type);
+/*%<
+ * Allocate an ievent and set the type.
+ */
+void
+isc__nm_put_netievent(isc_nm_t *mgr, void *ievent);
+
+/*
+ * The macros here are used to simulate the "inheritance" in C, there's the base
+ * netievent structure that contains just its own type and socket, and there are
+ * extended netievent types that also have handles or requests or other data.
+ *
+ * The macros here ensure that:
+ *
+ * 1. every netievent type has matching definition, declaration and
+ * implementation
+ *
+ * 2. we handle all the netievent types of same subclass the same, e.g. if the
+ * extended netievent contains handle, we always attach to the handle in
+ * the ctor and detach from the handle in dtor.
+ *
+ * There are three macros here for each netievent subclass:
+ *
+ * 1. NETIEVENT_*_TYPE(type) creates the typedef for each type; used below in
+ * this header
+ *
+ * 2. NETIEVENT_*_DECL(type) generates the declaration of the get and put
+ * functions (isc__nm_get_netievent_* and isc__nm_put_netievent_*); used
+ * below in this header
+ *
+ * 3. NETIEVENT_*_DEF(type) generates the definition of the functions; used
+ * either in netmgr.c or matching protocol file (e.g. udp.c, tcp.c, etc.)
+ */
+
+#define NETIEVENT__SOCKET \
+ isc__netievent_type type; \
+ ISC_LINK(isc__netievent_t) link; \
+ isc_nmsocket_t *sock; \
+ const char *file; \
+ unsigned int line; \
+ const char *func;
+
+typedef struct isc__netievent__socket {
+ NETIEVENT__SOCKET;
+} isc__netievent__socket_t;
+
+#define NETIEVENT_SOCKET_TYPE(type) \
+ typedef isc__netievent__socket_t isc__netievent_##type##_t;
+
+#define NETIEVENT_SOCKET_DECL(type) \
+ isc__netievent_##type##_t *isc__nm_get_netievent_##type( \
+ isc_nm_t *nm, isc_nmsocket_t *sock); \
+ void isc__nm_put_netievent_##type(isc_nm_t *nm, \
+ isc__netievent_##type##_t *ievent);
+
+#define NETIEVENT_SOCKET_DEF(type) \
+ isc__netievent_##type##_t *isc__nm_get_netievent_##type( \
+ isc_nm_t *nm, isc_nmsocket_t *sock) { \
+ isc__netievent_##type##_t *ievent = \
+ isc__nm_get_netievent(nm, netievent_##type); \
+ isc__nmsocket_attach(sock, &ievent->sock); \
+ \
+ return (ievent); \
+ } \
+ \
+ void isc__nm_put_netievent_##type(isc_nm_t *nm, \
+ isc__netievent_##type##_t *ievent) { \
+ isc__nmsocket_detach(&ievent->sock); \
+ isc__nm_put_netievent(nm, ievent); \
+ }
+
+typedef struct isc__netievent__socket_req {
+ NETIEVENT__SOCKET;
+ isc__nm_uvreq_t *req;
+} isc__netievent__socket_req_t;
+
+#define NETIEVENT_SOCKET_REQ_TYPE(type) \
+ typedef isc__netievent__socket_req_t isc__netievent_##type##_t;
+
+#define NETIEVENT_SOCKET_REQ_DECL(type) \
+ isc__netievent_##type##_t *isc__nm_get_netievent_##type( \
+ isc_nm_t *nm, isc_nmsocket_t *sock, isc__nm_uvreq_t *req); \
+ void isc__nm_put_netievent_##type(isc_nm_t *nm, \
+ isc__netievent_##type##_t *ievent);
+
+#define NETIEVENT_SOCKET_REQ_DEF(type) \
+ isc__netievent_##type##_t *isc__nm_get_netievent_##type( \
+ isc_nm_t *nm, isc_nmsocket_t *sock, isc__nm_uvreq_t *req) { \
+ isc__netievent_##type##_t *ievent = \
+ isc__nm_get_netievent(nm, netievent_##type); \
+ isc__nmsocket_attach(sock, &ievent->sock); \
+ ievent->req = req; \
+ \
+ return (ievent); \
+ } \
+ \
+ void isc__nm_put_netievent_##type(isc_nm_t *nm, \
+ isc__netievent_##type##_t *ievent) { \
+ isc__nmsocket_detach(&ievent->sock); \
+ isc__nm_put_netievent(nm, ievent); \
+ }
+
+typedef struct isc__netievent__socket_req_result {
+ NETIEVENT__SOCKET;
+ isc__nm_uvreq_t *req;
+ isc_result_t result;
+} isc__netievent__socket_req_result_t;
+
+#define NETIEVENT_SOCKET_REQ_RESULT_TYPE(type) \
+ typedef isc__netievent__socket_req_result_t isc__netievent_##type##_t;
+
+#define NETIEVENT_SOCKET_REQ_RESULT_DECL(type) \
+ isc__netievent_##type##_t *isc__nm_get_netievent_##type( \
+ isc_nm_t *nm, isc_nmsocket_t *sock, isc__nm_uvreq_t *req, \
+ isc_result_t result); \
+ void isc__nm_put_netievent_##type(isc_nm_t *nm, \
+ isc__netievent_##type##_t *ievent);
+
+#define NETIEVENT_SOCKET_REQ_RESULT_DEF(type) \
+ isc__netievent_##type##_t *isc__nm_get_netievent_##type( \
+ isc_nm_t *nm, isc_nmsocket_t *sock, isc__nm_uvreq_t *req, \
+ isc_result_t result) { \
+ isc__netievent_##type##_t *ievent = \
+ isc__nm_get_netievent(nm, netievent_##type); \
+ isc__nmsocket_attach(sock, &ievent->sock); \
+ ievent->req = req; \
+ ievent->result = result; \
+ \
+ return (ievent); \
+ } \
+ \
+ void isc__nm_put_netievent_##type(isc_nm_t *nm, \
+ isc__netievent_##type##_t *ievent) { \
+ isc__nmsocket_detach(&ievent->sock); \
+ isc__nm_put_netievent(nm, ievent); \
+ }
+
+typedef struct isc__netievent__socket_handle {
+ NETIEVENT__SOCKET;
+ isc_nmhandle_t *handle;
+} isc__netievent__socket_handle_t;
+
+#define NETIEVENT_SOCKET_HANDLE_TYPE(type) \
+ typedef isc__netievent__socket_handle_t isc__netievent_##type##_t;
+
+#define NETIEVENT_SOCKET_HANDLE_DECL(type) \
+ isc__netievent_##type##_t *isc__nm_get_netievent_##type( \
+ isc_nm_t *nm, isc_nmsocket_t *sock, isc_nmhandle_t *handle); \
+ void isc__nm_put_netievent_##type(isc_nm_t *nm, \
+ isc__netievent_##type##_t *ievent);
+
+#define NETIEVENT_SOCKET_HANDLE_DEF(type) \
+ isc__netievent_##type##_t *isc__nm_get_netievent_##type( \
+ isc_nm_t *nm, isc_nmsocket_t *sock, isc_nmhandle_t *handle) { \
+ isc__netievent_##type##_t *ievent = \
+ isc__nm_get_netievent(nm, netievent_##type); \
+ isc__nmsocket_attach(sock, &ievent->sock); \
+ isc_nmhandle_attach(handle, &ievent->handle); \
+ \
+ return (ievent); \
+ } \
+ \
+ void isc__nm_put_netievent_##type(isc_nm_t *nm, \
+ isc__netievent_##type##_t *ievent) { \
+ isc__nmsocket_detach(&ievent->sock); \
+ isc_nmhandle_detach(&ievent->handle); \
+ isc__nm_put_netievent(nm, ievent); \
+ }
+
+typedef struct isc__netievent__socket_quota {
+ NETIEVENT__SOCKET;
+ isc_quota_t *quota;
+} isc__netievent__socket_quota_t;
+
+#define NETIEVENT_SOCKET_QUOTA_TYPE(type) \
+ typedef isc__netievent__socket_quota_t isc__netievent_##type##_t;
+
+#define NETIEVENT_SOCKET_QUOTA_DECL(type) \
+ isc__netievent_##type##_t *isc__nm_get_netievent_##type( \
+ isc_nm_t *nm, isc_nmsocket_t *sock, isc_quota_t *quota); \
+ void isc__nm_put_netievent_##type(isc_nm_t *nm, \
+ isc__netievent_##type##_t *ievent);
+
+#define NETIEVENT_SOCKET_QUOTA_DEF(type) \
+ isc__netievent_##type##_t *isc__nm_get_netievent_##type( \
+ isc_nm_t *nm, isc_nmsocket_t *sock, isc_quota_t *quota) { \
+ isc__netievent_##type##_t *ievent = \
+ isc__nm_get_netievent(nm, netievent_##type); \
+ isc__nmsocket_attach(sock, &ievent->sock); \
+ ievent->quota = quota; \
+ \
+ return (ievent); \
+ } \
+ \
+ void isc__nm_put_netievent_##type(isc_nm_t *nm, \
+ isc__netievent_##type##_t *ievent) { \
+ isc__nmsocket_detach(&ievent->sock); \
+ isc__nm_put_netievent(nm, ievent); \
+ }
+
+typedef struct isc__netievent__task {
+ isc__netievent_type type;
+ ISC_LINK(isc__netievent_t) link;
+ isc_task_t *task;
+} isc__netievent__task_t;
+
+#define NETIEVENT_TASK_TYPE(type) \
+ typedef isc__netievent__task_t isc__netievent_##type##_t;
+
+#define NETIEVENT_TASK_DECL(type) \
+ isc__netievent_##type##_t *isc__nm_get_netievent_##type( \
+ isc_nm_t *nm, isc_task_t *task); \
+ void isc__nm_put_netievent_##type(isc_nm_t *nm, \
+ isc__netievent_##type##_t *ievent);
+
+#define NETIEVENT_TASK_DEF(type) \
+ isc__netievent_##type##_t *isc__nm_get_netievent_##type( \
+ isc_nm_t *nm, isc_task_t *task) { \
+ isc__netievent_##type##_t *ievent = \
+ isc__nm_get_netievent(nm, netievent_##type); \
+ ievent->task = task; \
+ \
+ return (ievent); \
+ } \
+ \
+ void isc__nm_put_netievent_##type(isc_nm_t *nm, \
+ isc__netievent_##type##_t *ievent) { \
+ ievent->task = NULL; \
+ isc__nm_put_netievent(nm, ievent); \
+ }
+
+typedef struct isc__netievent_udpsend {
+ NETIEVENT__SOCKET;
+ isc_sockaddr_t peer;
+ isc__nm_uvreq_t *req;
+} isc__netievent_udpsend_t;
+
+typedef struct isc__netievent_tlsconnect {
+ NETIEVENT__SOCKET;
+ SSL_CTX *ctx;
+ isc_sockaddr_t local; /* local address */
+ isc_sockaddr_t peer; /* peer address */
+} isc__netievent_tlsconnect_t;
+
+typedef struct isc__netievent {
+ isc__netievent_type type;
+ ISC_LINK(isc__netievent_t) link;
+} isc__netievent_t;
+
+#define NETIEVENT_TYPE(type) typedef isc__netievent_t isc__netievent_##type##_t;
+
+#define NETIEVENT_DECL(type) \
+ isc__netievent_##type##_t *isc__nm_get_netievent_##type(isc_nm_t *nm); \
+ void isc__nm_put_netievent_##type(isc_nm_t *nm, \
+ isc__netievent_##type##_t *ievent);
+
+#define NETIEVENT_DEF(type) \
+ isc__netievent_##type##_t *isc__nm_get_netievent_##type( \
+ isc_nm_t *nm) { \
+ isc__netievent_##type##_t *ievent = \
+ isc__nm_get_netievent(nm, netievent_##type); \
+ \
+ return (ievent); \
+ } \
+ \
+ void isc__nm_put_netievent_##type(isc_nm_t *nm, \
+ isc__netievent_##type##_t *ievent) { \
+ isc__nm_put_netievent(nm, ievent); \
+ }
+
+typedef struct isc__netievent__tlsctx {
+ NETIEVENT__SOCKET;
+ isc_tlsctx_t *tlsctx;
+} isc__netievent__tlsctx_t;
+
+#define NETIEVENT_SOCKET_TLSCTX_TYPE(type) \
+ typedef isc__netievent__tlsctx_t isc__netievent_##type##_t;
+
+#define NETIEVENT_SOCKET_TLSCTX_DECL(type) \
+ isc__netievent_##type##_t *isc__nm_get_netievent_##type( \
+ isc_nm_t *nm, isc_nmsocket_t *sock, isc_tlsctx_t *tlsctx); \
+ void isc__nm_put_netievent_##type(isc_nm_t *nm, \
+ isc__netievent_##type##_t *ievent);
+
+#define NETIEVENT_SOCKET_TLSCTX_DEF(type) \
+ isc__netievent_##type##_t *isc__nm_get_netievent_##type( \
+ isc_nm_t *nm, isc_nmsocket_t *sock, isc_tlsctx_t *tlsctx) { \
+ isc__netievent_##type##_t *ievent = \
+ isc__nm_get_netievent(nm, netievent_##type); \
+ isc__nmsocket_attach(sock, &ievent->sock); \
+ isc_tlsctx_attach(tlsctx, &ievent->tlsctx); \
+ \
+ return (ievent); \
+ } \
+ \
+ void isc__nm_put_netievent_##type(isc_nm_t *nm, \
+ isc__netievent_##type##_t *ievent) { \
+ isc_tlsctx_free(&ievent->tlsctx); \
+ isc__nmsocket_detach(&ievent->sock); \
+ isc__nm_put_netievent(nm, ievent); \
+ }
+
+#ifdef HAVE_LIBNGHTTP2
+typedef struct isc__netievent__http_eps {
+ NETIEVENT__SOCKET;
+ isc_nm_http_endpoints_t *endpoints;
+} isc__netievent__http_eps_t;
+
+#define NETIEVENT_SOCKET_HTTP_EPS_TYPE(type) \
+ typedef isc__netievent__http_eps_t isc__netievent_##type##_t;
+
+#define NETIEVENT_SOCKET_HTTP_EPS_DECL(type) \
+ isc__netievent_##type##_t *isc__nm_get_netievent_##type( \
+ isc_nm_t *nm, isc_nmsocket_t *sock, \
+ isc_nm_http_endpoints_t *endpoints); \
+ void isc__nm_put_netievent_##type(isc_nm_t *nm, \
+ isc__netievent_##type##_t *ievent);
+
+#define NETIEVENT_SOCKET_HTTP_EPS_DEF(type) \
+ isc__netievent_##type##_t *isc__nm_get_netievent_##type( \
+ isc_nm_t *nm, isc_nmsocket_t *sock, \
+ isc_nm_http_endpoints_t *endpoints) { \
+ isc__netievent_##type##_t *ievent = \
+ isc__nm_get_netievent(nm, netievent_##type); \
+ isc__nmsocket_attach(sock, &ievent->sock); \
+ isc_nm_http_endpoints_attach(endpoints, &ievent->endpoints); \
+ \
+ return (ievent); \
+ } \
+ \
+ void isc__nm_put_netievent_##type(isc_nm_t *nm, \
+ isc__netievent_##type##_t *ievent) { \
+ isc_nm_http_endpoints_detach(&ievent->endpoints); \
+ isc__nmsocket_detach(&ievent->sock); \
+ isc__nm_put_netievent(nm, ievent); \
+ }
+#endif /* HAVE_LIBNGHTTP2 */
+
+typedef union {
+ isc__netievent_t ni;
+ isc__netievent__socket_t nis;
+ isc__netievent__socket_req_t nisr;
+ isc__netievent_udpsend_t nius;
+ isc__netievent__socket_quota_t nisq;
+ isc__netievent_tlsconnect_t nitc;
+ isc__netievent__tlsctx_t nitls;
+#ifdef HAVE_LIBNGHTTP2
+ isc__netievent__http_eps_t nihttpeps;
+#endif /* HAVE_LIBNGHTTP2 */
+} isc__netievent_storage_t;
+
+/*
+ * Work item for a uv_work threadpool.
+ */
+typedef struct isc__nm_work {
+ isc_nm_t *netmgr;
+ uv_work_t req;
+ isc_nm_workcb_t cb;
+ isc_nm_after_workcb_t after_cb;
+ void *data;
+} isc__nm_work_t;
+
+/*
+ * Network manager
+ */
+#define NM_MAGIC ISC_MAGIC('N', 'E', 'T', 'M')
+#define VALID_NM(t) ISC_MAGIC_VALID(t, NM_MAGIC)
+
+struct isc_nm {
+ int magic;
+ isc_refcount_t references;
+ isc_mem_t *mctx;
+ int nworkers;
+ isc_mutex_t lock;
+ isc_condition_t wkstatecond;
+ isc_condition_t wkpausecond;
+ isc__networker_t *workers;
+
+ isc_stats_t *stats;
+
+ uint_fast32_t workers_running;
+ atomic_uint_fast32_t workers_paused;
+ atomic_uint_fast32_t maxudp;
+
+ bool load_balance_sockets;
+
+ atomic_bool paused;
+
+ /*
+ * Active connections are being closed and new connections are
+ * no longer allowed.
+ */
+ atomic_bool closing;
+
+ /*
+ * A worker is actively waiting for other workers, for example to
+ * stop listening; that means no other thread can do the same thing
+ * or pause, or we'll deadlock. We have to either re-enqueue our
+ * event or wait for the other one to finish if we want to pause.
+ */
+ atomic_int interlocked;
+
+ /*
+ * Timeout values for TCP connections, corresponding to
+ * tcp-intiial-timeout, tcp-idle-timeout, tcp-keepalive-timeout,
+ * and tcp-advertised-timeout. Note that these are stored in
+ * milliseconds so they can be used directly with the libuv timer,
+ * but they are configured in tenths of seconds.
+ */
+ atomic_uint_fast32_t init;
+ atomic_uint_fast32_t idle;
+ atomic_uint_fast32_t keepalive;
+ atomic_uint_fast32_t advertised;
+
+ isc_barrier_t pausing;
+ isc_barrier_t resuming;
+
+ /*
+ * Socket SO_RCVBUF and SO_SNDBUF values
+ */
+ atomic_int_fast32_t recv_udp_buffer_size;
+ atomic_int_fast32_t send_udp_buffer_size;
+ atomic_int_fast32_t recv_tcp_buffer_size;
+ atomic_int_fast32_t send_tcp_buffer_size;
+
+#ifdef NETMGR_TRACE
+ ISC_LIST(isc_nmsocket_t) active_sockets;
+#endif
+};
+
+/*%
+ * A universal structure for either a single socket or a group of
+ * dup'd/SO_REUSE_PORT-using sockets listening on the same interface.
+ */
+#define NMSOCK_MAGIC ISC_MAGIC('N', 'M', 'S', 'K')
+#define VALID_NMSOCK(t) ISC_MAGIC_VALID(t, NMSOCK_MAGIC)
+
+/*%
+ * Index into socket stat counter arrays.
+ */
+typedef enum {
+ STATID_OPEN = 0,
+ STATID_OPENFAIL = 1,
+ STATID_CLOSE = 2,
+ STATID_BINDFAIL = 3,
+ STATID_CONNECTFAIL = 4,
+ STATID_CONNECT = 5,
+ STATID_ACCEPTFAIL = 6,
+ STATID_ACCEPT = 7,
+ STATID_SENDFAIL = 8,
+ STATID_RECVFAIL = 9,
+ STATID_ACTIVE = 10,
+ STATID_MAX = 11,
+} isc__nm_statid_t;
+
+#if HAVE_LIBNGHTTP2
+typedef struct isc_nmsocket_tls_send_req {
+ isc_nmsocket_t *tlssock;
+ isc_region_t data;
+ isc_nm_cb_t cb;
+ void *cbarg;
+ isc_nmhandle_t *handle;
+ bool finish;
+ uint8_t smallbuf[512];
+} isc_nmsocket_tls_send_req_t;
+
+typedef enum isc_http_request_type {
+ ISC_HTTP_REQ_GET,
+ ISC_HTTP_REQ_POST,
+ ISC_HTTP_REQ_UNSUPPORTED
+} isc_http_request_type_t;
+
+typedef enum isc_http_scheme_type {
+ ISC_HTTP_SCHEME_HTTP,
+ ISC_HTTP_SCHEME_HTTP_SECURE,
+ ISC_HTTP_SCHEME_UNSUPPORTED
+} isc_http_scheme_type_t;
+
+typedef struct isc_nm_httpcbarg {
+ isc_nm_recv_cb_t cb;
+ void *cbarg;
+ LINK(struct isc_nm_httpcbarg) link;
+} isc_nm_httpcbarg_t;
+
+typedef struct isc_nm_httphandler {
+ char *path;
+ isc_nm_recv_cb_t cb;
+ void *cbarg;
+ size_t extrahandlesize;
+ LINK(struct isc_nm_httphandler) link;
+} isc_nm_httphandler_t;
+
+struct isc_nm_http_endpoints {
+ uint32_t magic;
+ isc_mem_t *mctx;
+
+ ISC_LIST(isc_nm_httphandler_t) handlers;
+ ISC_LIST(isc_nm_httpcbarg_t) handler_cbargs;
+
+ isc_refcount_t references;
+ atomic_bool in_use;
+};
+
+typedef struct isc_nmsocket_h2 {
+ isc_nmsocket_t *psock; /* owner of the structure */
+ char *request_path;
+ char *query_data;
+ size_t query_data_len;
+ bool query_too_large;
+ isc_nm_httphandler_t *handler;
+
+ isc_buffer_t rbuf;
+ isc_buffer_t wbuf;
+
+ int32_t stream_id;
+ isc_nm_http_session_t *session;
+
+ isc_nmsocket_t *httpserver;
+
+ /* maximum concurrent streams (server-side) */
+ atomic_uint_fast32_t max_concurrent_streams;
+
+ uint32_t min_ttl; /* used to set "max-age" in responses */
+
+ isc_http_request_type_t request_type;
+ isc_http_scheme_type_t request_scheme;
+
+ size_t content_length;
+ char clenbuf[128];
+
+ char cache_control_buf[128];
+
+ int headers_error_code;
+ size_t headers_data_processed;
+
+ isc_nm_recv_cb_t cb;
+ void *cbarg;
+ LINK(struct isc_nmsocket_h2) link;
+
+ isc_nm_http_endpoints_t **listener_endpoints;
+ size_t n_listener_endpoints;
+
+ bool response_submitted;
+ struct {
+ char *uri;
+ bool post;
+ isc_tlsctx_t *tlsctx;
+ isc_sockaddr_t local_interface;
+ void *cstream;
+ const char *tls_peer_verify_string;
+ } connect;
+} isc_nmsocket_h2_t;
+#endif /* HAVE_LIBNGHTTP2 */
+
+typedef void (*isc_nm_closehandlecb_t)(void *arg);
+/*%<
+ * Opaque callback function, used for isc_nmhandle 'reset' and 'free'
+ * callbacks.
+ */
+
+struct isc_nmsocket {
+ /*% Unlocked, RO */
+ int magic;
+ int tid;
+ isc_nmsocket_type type;
+ isc_nm_t *mgr;
+
+ /*% Parent socket for multithreaded listeners */
+ isc_nmsocket_t *parent;
+ /*% Listener socket this connection was accepted on */
+ isc_nmsocket_t *listener;
+ /*% Self socket */
+ isc_nmsocket_t *self;
+
+ isc_barrier_t startlistening;
+ isc_barrier_t stoplistening;
+
+ /*% TLS stuff */
+ struct tls {
+ isc_tls_t *tls;
+ isc_tlsctx_t *ctx;
+ isc_tlsctx_client_session_cache_t *client_sess_cache;
+ bool client_session_saved;
+ BIO *app_rbio;
+ BIO *app_wbio;
+ BIO *ssl_rbio;
+ BIO *ssl_wbio;
+ enum {
+ TLS_STATE_NONE,
+ TLS_STATE_HANDSHAKE,
+ TLS_STATE_IO,
+ TLS_STATE_ERROR,
+ TLS_STATE_CLOSING
+ } state;
+ isc_region_t senddata;
+ ISC_LIST(isc__nm_uvreq_t) sendreqs;
+ bool cycle;
+ isc_result_t pending_error;
+ /* List of active send requests. */
+ isc__nm_uvreq_t *pending_req;
+ bool alpn_negotiated;
+ const char *tls_verify_errmsg;
+ } tls;
+
+#if HAVE_LIBNGHTTP2
+ /*% TLS stuff */
+ struct tlsstream {
+ bool server;
+ BIO *bio_in;
+ BIO *bio_out;
+ isc_tls_t *tls;
+ isc_tlsctx_t *ctx;
+ isc_tlsctx_t **listener_tls_ctx; /*%< A context reference per
+ worker */
+ size_t n_listener_tls_ctx;
+ isc_tlsctx_client_session_cache_t *client_sess_cache;
+ bool client_session_saved;
+ isc_nmsocket_t *tlslistener;
+ isc_nmsocket_t *tlssocket;
+ atomic_bool result_updated;
+ enum {
+ TLS_INIT,
+ TLS_HANDSHAKE,
+ TLS_IO,
+ TLS_CLOSED
+ } state; /*%< The order of these is significant */
+ size_t nsending;
+ bool reading;
+ } tlsstream;
+
+ isc_nmsocket_h2_t h2;
+#endif /* HAVE_LIBNGHTTP2 */
+ /*%
+ * quota is the TCP client, attached when a TCP connection
+ * is established. pquota is a non-attached pointer to the
+ * TCP client quota, stored in listening sockets but only
+ * attached in connected sockets.
+ */
+ isc_quota_t *quota;
+ isc_quota_t *pquota;
+ isc_quota_cb_t quotacb;
+
+ /*%
+ * Socket statistics
+ */
+ const isc_statscounter_t *statsindex;
+
+ /*%
+ * TCP read/connect timeout timers.
+ */
+ uv_timer_t read_timer;
+ uint64_t read_timeout;
+ uint64_t connect_timeout;
+
+ /*%
+ * TCP write timeout timer.
+ */
+ uint64_t write_timeout;
+
+ /*% outer socket is for 'wrapped' sockets - e.g. tcpdns in tcp */
+ isc_nmsocket_t *outer;
+
+ /*% server socket for connections */
+ isc_nmsocket_t *server;
+
+ /*% Child sockets for multi-socket setups */
+ isc_nmsocket_t *children;
+ uint_fast32_t nchildren;
+ isc_sockaddr_t iface;
+ isc_nmhandle_t *statichandle;
+ isc_nmhandle_t *outerhandle;
+
+ /*% Extra data allocated at the end of each isc_nmhandle_t */
+ size_t extrahandlesize;
+
+ /*% TCP backlog */
+ int backlog;
+
+ /*% libuv data */
+ uv_os_sock_t fd;
+ union uv_any_handle uv_handle;
+
+ /*% Peer address */
+ isc_sockaddr_t peer;
+
+ /* Atomic */
+ /*% Number of running (e.g. listening) child sockets */
+ atomic_uint_fast32_t rchildren;
+
+ /*%
+ * Socket is active if it's listening, working, etc. If it's
+ * closing, then it doesn't make a sense, for example, to
+ * push handles or reqs for reuse.
+ */
+ atomic_bool active;
+ atomic_bool destroying;
+
+ bool route_sock;
+
+ /*%
+ * Socket is closed if it's not active and all the possible
+ * callbacks were fired, there are no active handles, etc.
+ * If active==false but closed==false, that means the socket
+ * is closing.
+ */
+ atomic_bool closing;
+ atomic_bool closed;
+ atomic_bool listening;
+ atomic_bool connecting;
+ atomic_bool connected;
+ atomic_bool accepting;
+ atomic_bool reading;
+ atomic_bool timedout;
+ isc_refcount_t references;
+
+ /*%
+ * Established an outgoing connection, as client not server.
+ */
+ atomic_bool client;
+
+ /*%
+ * TCPDNS socket has been set not to pipeline.
+ */
+ atomic_bool sequential;
+
+ /*%
+ * The socket is processing read callback, this is guard to not read
+ * data before the readcb is back.
+ */
+ bool processing;
+
+ /*%
+ * A TCP socket has had isc_nm_pauseread() called.
+ */
+ atomic_bool readpaused;
+
+ /*%
+ * A TCP or TCPDNS socket has been set to use the keepalive
+ * timeout instead of the default idle timeout.
+ */
+ atomic_bool keepalive;
+
+ /*%
+ * 'spare' handles for that can be reused to avoid allocations,
+ * for UDP.
+ */
+ isc_astack_t *inactivehandles;
+ isc_astack_t *inactivereqs;
+
+ /*%
+ * Used to wait for TCP listening events to complete, and
+ * for the number of running children to reach zero during
+ * shutdown.
+ *
+ * We use two condition variables to prevent the race where the netmgr
+ * threads would be able to finish and destroy the socket before it's
+ * unlocked by the isc_nm_listen<proto>() function. So, the flow is as
+ * follows:
+ *
+ * 1. parent thread creates all children sockets and passes then to
+ * netthreads, looks at the signaling variable and WAIT(cond) until
+ * the childrens are done initializing
+ *
+ * 2. the events get picked by netthreads, calls the libuv API (and
+ * either succeeds or fails) and WAIT(scond) until all other
+ * children sockets in netthreads are initialized and the listening
+ * socket lock is unlocked
+ *
+ * 3. the control is given back to the parent thread which now either
+ * returns success or shutdowns the listener if an error has
+ * occured in the children netthread
+ *
+ * NOTE: The other approach would be doing an extra attach to the parent
+ * listening socket, and then detach it in the parent thread, but that
+ * breaks the promise that once the libuv socket is initialized on the
+ * nmsocket, the nmsocket needs to be handled only by matching
+ * netthread, so in fact that would add a complexity in a way that
+ * isc__nmsocket_detach would have to be converted to use an
+ * asynchrounous netievent.
+ */
+ isc_mutex_t lock;
+ isc_condition_t cond;
+ isc_condition_t scond;
+
+ /*%
+ * Used to pass a result back from listen or connect events.
+ */
+ isc_result_t result;
+
+ /*%
+ * Current number of active handles.
+ */
+ atomic_int_fast32_t ah;
+
+ /*% Buffer for TCPDNS processing */
+ size_t buf_size;
+ size_t buf_len;
+ unsigned char *buf;
+
+ /*%
+ * This function will be called with handle->sock
+ * as the argument whenever a handle's references drop
+ * to zero, after its reset callback has been called.
+ */
+ isc_nm_closehandlecb_t closehandle_cb;
+
+ isc_nmhandle_t *recv_handle;
+ isc_nm_recv_cb_t recv_cb;
+ void *recv_cbarg;
+ bool recv_read;
+
+ isc_nm_cb_t connect_cb;
+ void *connect_cbarg;
+
+ isc_nm_accept_cb_t accept_cb;
+ void *accept_cbarg;
+
+ atomic_int_fast32_t active_child_connections;
+
+ isc_barrier_t barrier;
+ bool barrier_initialised;
+#ifdef NETMGR_TRACE
+ void *backtrace[TRACE_SIZE];
+ int backtrace_size;
+ LINK(isc_nmsocket_t) active_link;
+ ISC_LIST(isc_nmhandle_t) active_handles;
+#endif
+};
+
+bool
+isc__nm_in_netthread(void);
+/*%<
+ * Returns 'true' if we're in the network thread.
+ */
+
+void
+isc__nm_maybe_enqueue_ievent(isc__networker_t *worker, isc__netievent_t *event);
+/*%<
+ * If the caller is already in the matching nmthread, process the netievent
+ * directly, if not enqueue using isc__nm_enqueue_ievent().
+ */
+
+void
+isc__nm_enqueue_ievent(isc__networker_t *worker, isc__netievent_t *event);
+/*%<
+ * Enqueue an ievent onto a specific worker queue. (This the only safe
+ * way to use an isc__networker_t from another thread.)
+ */
+
+void
+isc__nm_free_uvbuf(isc_nmsocket_t *sock, const uv_buf_t *buf);
+/*%<
+ * Free a buffer allocated for a receive operation.
+ *
+ * Note that as currently implemented, this doesn't actually
+ * free anything, marks the isc__networker's UDP receive buffer
+ * as "not in use".
+ */
+
+isc_nmhandle_t *
+isc___nmhandle_get(isc_nmsocket_t *sock, isc_sockaddr_t *peer,
+ isc_sockaddr_t *local FLARG);
+/*%<
+ * Get a handle for the socket 'sock', allocating a new one
+ * if there isn't one available in 'sock->inactivehandles'.
+ *
+ * If 'peer' is not NULL, set the handle's peer address to 'peer',
+ * otherwise set it to 'sock->peer'.
+ *
+ * If 'local' is not NULL, set the handle's local address to 'local',
+ * otherwise set it to 'sock->iface->addr'.
+ *
+ * 'sock' will be attached to 'handle->sock'. The caller may need
+ * to detach the socket afterward.
+ */
+
+isc__nm_uvreq_t *
+isc___nm_uvreq_get(isc_nm_t *mgr, isc_nmsocket_t *sock FLARG);
+/*%<
+ * Get a UV request structure for the socket 'sock', allocating a
+ * new one if there isn't one available in 'sock->inactivereqs'.
+ */
+
+void
+isc___nm_uvreq_put(isc__nm_uvreq_t **req, isc_nmsocket_t *sock FLARG);
+/*%<
+ * Completes the use of a UV request structure, setting '*req' to NULL.
+ *
+ * The UV request is pushed onto the 'sock->inactivereqs' stack or,
+ * if that doesn't work, freed.
+ */
+
+void
+isc___nmsocket_init(isc_nmsocket_t *sock, isc_nm_t *mgr, isc_nmsocket_type type,
+ isc_sockaddr_t *iface FLARG);
+/*%<
+ * Initialize socket 'sock', attach it to 'mgr', and set it to type 'type'
+ * and its interface to 'iface'.
+ */
+
+void
+isc___nmsocket_attach(isc_nmsocket_t *sock, isc_nmsocket_t **target FLARG);
+/*%<
+ * Attach to a socket, increasing refcount
+ */
+
+void
+isc___nmsocket_detach(isc_nmsocket_t **socketp FLARG);
+/*%<
+ * Detach from socket, decreasing refcount and possibly destroying the
+ * socket if it's no longer referenced.
+ */
+
+void
+isc___nmsocket_prep_destroy(isc_nmsocket_t *sock FLARG);
+/*%<
+ * Market 'sock' as inactive, close it if necessary, and destroy it
+ * if there are no remaining references or active handles.
+ */
+
+void
+isc__nmsocket_shutdown(isc_nmsocket_t *sock);
+/*%<
+ * Initiate the socket shutdown which actively calls the active
+ * callbacks.
+ */
+
+void
+isc__nmsocket_reset(isc_nmsocket_t *sock);
+/*%<
+ * Reset and close the socket.
+ */
+
+bool
+isc__nmsocket_active(isc_nmsocket_t *sock);
+/*%<
+ * Determine whether 'sock' is active by checking 'sock->active'
+ * or, for child sockets, 'sock->parent->active'.
+ */
+
+bool
+isc__nmsocket_deactivate(isc_nmsocket_t *sock);
+/*%<
+ * @brief Deactivate active socket
+ *
+ * Atomically deactive the socket by setting @p sock->active or, for child
+ * sockets, @p sock->parent->active to @c false
+ *
+ * @param[in] sock - valid nmsocket
+ * @return @c false if the socket was already inactive, @c true otherwise
+ */
+
+void
+isc__nmsocket_clearcb(isc_nmsocket_t *sock);
+/*%<
+ * Clear the recv and accept callbacks in 'sock'.
+ */
+
+void
+isc__nmsocket_timer_stop(isc_nmsocket_t *sock);
+void
+isc__nmsocket_timer_start(isc_nmsocket_t *sock);
+void
+isc__nmsocket_timer_restart(isc_nmsocket_t *sock);
+bool
+isc__nmsocket_timer_running(isc_nmsocket_t *sock);
+/*%<
+ * Start/stop/restart/check the timeout on the socket
+ */
+
+void
+isc__nm_connectcb(isc_nmsocket_t *sock, isc__nm_uvreq_t *uvreq,
+ isc_result_t eresult, bool async);
+
+void
+isc__nm_async_connectcb(isc__networker_t *worker, isc__netievent_t *ev0);
+/*%<
+ * Issue a connect callback on the socket, used to call the callback
+ */
+
+void
+isc__nm_readcb(isc_nmsocket_t *sock, isc__nm_uvreq_t *uvreq,
+ isc_result_t eresult);
+void
+isc__nm_async_readcb(isc__networker_t *worker, isc__netievent_t *ev0);
+
+/*%<
+ * Issue a read callback on the socket, used to call the callback
+ * on failed conditions when the event can't be scheduled on the uv loop.
+ *
+ */
+
+void
+isc__nm_sendcb(isc_nmsocket_t *sock, isc__nm_uvreq_t *uvreq,
+ isc_result_t eresult, bool async);
+void
+isc__nm_async_sendcb(isc__networker_t *worker, isc__netievent_t *ev0);
+/*%<
+ * Issue a write callback on the socket, used to call the callback
+ * on failed conditions when the event can't be scheduled on the uv loop.
+ */
+
+void
+isc__nm_async_shutdown(isc__networker_t *worker, isc__netievent_t *ev0);
+/*%<
+ * Walk through all uv handles, get the underlying sockets and issue
+ * close on them.
+ */
+
+void
+isc__nm_udp_send(isc_nmhandle_t *handle, const isc_region_t *region,
+ isc_nm_cb_t cb, void *cbarg);
+/*%<
+ * Back-end implementation of isc_nm_send() for UDP handles.
+ */
+
+void
+isc__nm_udp_read(isc_nmhandle_t *handle, isc_nm_recv_cb_t cb, void *cbarg);
+/*
+ * Back-end implementation of isc_nm_read() for UDP handles.
+ */
+
+void
+isc__nm_udp_close(isc_nmsocket_t *sock);
+/*%<
+ * Close a UDP socket.
+ */
+
+void
+isc__nm_udp_cancelread(isc_nmhandle_t *handle);
+/*%<
+ * Stop reading on a connected UDP handle.
+ */
+
+void
+isc__nm_udp_shutdown(isc_nmsocket_t *sock);
+/*%<
+ * Called during the shutdown process to close and clean up connected
+ * sockets.
+ */
+
+void
+isc__nm_udp_stoplistening(isc_nmsocket_t *sock);
+/*%<
+ * Stop listening on 'sock'.
+ */
+
+void
+isc__nm_udp_settimeout(isc_nmhandle_t *handle, uint32_t timeout);
+/*%<
+ * Set or clear the recv timeout for the UDP socket associated with 'handle'.
+ */
+
+void
+isc__nm_async_udplisten(isc__networker_t *worker, isc__netievent_t *ev0);
+void
+isc__nm_async_udpconnect(isc__networker_t *worker, isc__netievent_t *ev0);
+void
+isc__nm_async_udpstop(isc__networker_t *worker, isc__netievent_t *ev0);
+void
+isc__nm_async_udpsend(isc__networker_t *worker, isc__netievent_t *ev0);
+void
+isc__nm_async_udpread(isc__networker_t *worker, isc__netievent_t *ev0);
+void
+isc__nm_async_udpcancel(isc__networker_t *worker, isc__netievent_t *ev0);
+void
+isc__nm_async_udpclose(isc__networker_t *worker, isc__netievent_t *ev0);
+/*%<
+ * Callback handlers for asynchronous UDP events (listen, stoplisten, send).
+ */
+
+void
+isc__nm_async_routeconnect(isc__networker_t *worker, isc__netievent_t *ev0);
+/*%<
+ * Callback handler for route socket events.
+ */
+
+void
+isc__nm_tcp_send(isc_nmhandle_t *handle, const isc_region_t *region,
+ isc_nm_cb_t cb, void *cbarg);
+/*%<
+ * Back-end implementation of isc_nm_send() for TCP handles.
+ */
+
+void
+isc__nm_tcp_read(isc_nmhandle_t *handle, isc_nm_recv_cb_t cb, void *cbarg);
+/*
+ * Back-end implementation of isc_nm_read() for TCP handles.
+ */
+
+void
+isc__nm_tcp_close(isc_nmsocket_t *sock);
+/*%<
+ * Close a TCP socket.
+ */
+void
+isc__nm_tcp_pauseread(isc_nmhandle_t *handle);
+/*%<
+ * Pause reading on this handle, while still remembering the callback.
+ */
+
+void
+isc__nm_tcp_resumeread(isc_nmhandle_t *handle);
+/*%<
+ * Resume reading from socket.
+ *
+ */
+
+void
+isc__nm_tcp_shutdown(isc_nmsocket_t *sock);
+/*%<
+ * Called during the shutdown process to close and clean up connected
+ * sockets.
+ */
+
+void
+isc__nm_tcp_cancelread(isc_nmhandle_t *handle);
+/*%<
+ * Stop reading on a connected TCP handle.
+ */
+
+void
+isc__nm_tcp_stoplistening(isc_nmsocket_t *sock);
+/*%<
+ * Stop listening on 'sock'.
+ */
+
+int_fast32_t
+isc__nm_tcp_listener_nactive(isc_nmsocket_t *sock);
+/*%<
+ * Returns the number of active connections for the TCP listener socket.
+ */
+
+void
+isc__nm_tcp_settimeout(isc_nmhandle_t *handle, uint32_t timeout);
+/*%<
+ * Set the read timeout for the TCP socket associated with 'handle'.
+ */
+
+void
+isc__nm_async_tcpconnect(isc__networker_t *worker, isc__netievent_t *ev0);
+void
+isc__nm_async_tcplisten(isc__networker_t *worker, isc__netievent_t *ev0);
+void
+isc__nm_async_tcpaccept(isc__networker_t *worker, isc__netievent_t *ev0);
+void
+isc__nm_async_tcpstop(isc__networker_t *worker, isc__netievent_t *ev0);
+void
+isc__nm_async_tcpsend(isc__networker_t *worker, isc__netievent_t *ev0);
+void
+isc__nm_async_startread(isc__networker_t *worker, isc__netievent_t *ev0);
+void
+isc__nm_async_pauseread(isc__networker_t *worker, isc__netievent_t *ev0);
+void
+isc__nm_async_tcpstartread(isc__networker_t *worker, isc__netievent_t *ev0);
+void
+isc__nm_async_tcppauseread(isc__networker_t *worker, isc__netievent_t *ev0);
+void
+isc__nm_async_tcpcancel(isc__networker_t *worker, isc__netievent_t *ev0);
+void
+isc__nm_async_tcpclose(isc__networker_t *worker, isc__netievent_t *ev0);
+/*%<
+ * Callback handlers for asynchronous TCP events (connect, listen,
+ * stoplisten, send, read, pause, close).
+ */
+
+void
+isc__nm_async_tlsclose(isc__networker_t *worker, isc__netievent_t *ev0);
+
+void
+isc__nm_async_tlssend(isc__networker_t *worker, isc__netievent_t *ev0);
+
+void
+isc__nm_async_tlsstartread(isc__networker_t *worker, isc__netievent_t *ev0);
+
+void
+isc__nm_async_tlsdobio(isc__networker_t *worker, isc__netievent_t *ev0);
+
+void
+isc__nm_async_tlscancel(isc__networker_t *worker, isc__netievent_t *ev0);
+/*%<
+ * Callback handlers for asynchronous TLS events.
+ */
+
+void
+isc__nm_tcpdns_send(isc_nmhandle_t *handle, isc_region_t *region,
+ isc_nm_cb_t cb, void *cbarg);
+/*%<
+ * Back-end implementation of isc_nm_send() for TCPDNS handles.
+ */
+
+void
+isc__nm_tcpdns_shutdown(isc_nmsocket_t *sock);
+
+void
+isc__nm_tcpdns_close(isc_nmsocket_t *sock);
+/*%<
+ * Close a TCPDNS socket.
+ */
+
+void
+isc__nm_tcpdns_stoplistening(isc_nmsocket_t *sock);
+/*%<
+ * Stop listening on 'sock'.
+ */
+
+void
+isc__nm_tcpdns_settimeout(isc_nmhandle_t *handle, uint32_t timeout);
+/*%<
+ * Set the read timeout and reset the timer for the TCPDNS socket
+ * associated with 'handle', and the TCP socket it wraps around.
+ */
+
+void
+isc__nm_async_tcpdnsaccept(isc__networker_t *worker, isc__netievent_t *ev0);
+void
+isc__nm_async_tcpdnsconnect(isc__networker_t *worker, isc__netievent_t *ev0);
+void
+isc__nm_async_tcpdnslisten(isc__networker_t *worker, isc__netievent_t *ev0);
+void
+isc__nm_async_tcpdnscancel(isc__networker_t *worker, isc__netievent_t *ev0);
+void
+isc__nm_async_tcpdnsclose(isc__networker_t *worker, isc__netievent_t *ev0);
+void
+isc__nm_async_tcpdnssend(isc__networker_t *worker, isc__netievent_t *ev0);
+void
+isc__nm_async_tcpdnsstop(isc__networker_t *worker, isc__netievent_t *ev0);
+void
+isc__nm_async_tcpdnsread(isc__networker_t *worker, isc__netievent_t *ev0);
+/*%<
+ * Callback handlers for asynchronous TCPDNS events.
+ */
+
+void
+isc__nm_tcpdns_read(isc_nmhandle_t *handle, isc_nm_recv_cb_t cb, void *cbarg);
+/*
+ * Back-end implementation of isc_nm_read() for TCPDNS handles.
+ */
+
+void
+isc__nm_tcpdns_cancelread(isc_nmhandle_t *handle);
+/*%<
+ * Stop reading on a connected TCPDNS handle.
+ */
+
+void
+isc__nm_tlsdns_send(isc_nmhandle_t *handle, isc_region_t *region,
+ isc_nm_cb_t cb, void *cbarg);
+
+void
+isc__nm_tlsdns_shutdown(isc_nmsocket_t *sock);
+
+void
+isc__nm_tlsdns_close(isc_nmsocket_t *sock);
+/*%<
+ * Close a TLSDNS socket.
+ */
+
+void
+isc__nm_tlsdns_stoplistening(isc_nmsocket_t *sock);
+/*%<
+ * Stop listening on 'sock'.
+ */
+
+void
+isc__nm_tlsdns_settimeout(isc_nmhandle_t *handle, uint32_t timeout);
+/*%<
+ * Set the read timeout and reset the timer for the TLSDNS socket
+ * associated with 'handle', and the TCP socket it wraps around.
+ */
+
+void
+isc__nm_tlsdns_read(isc_nmhandle_t *handle, isc_nm_recv_cb_t cb, void *cbarg);
+/*
+ * Back-end implementation of isc_nm_read() for TLSDNS handles.
+ */
+
+void
+isc__nm_tlsdns_cancelread(isc_nmhandle_t *handle);
+/*%<
+ * Stop reading on a connected TLSDNS handle.
+ */
+
+const char *
+isc__nm_tlsdns_verify_tls_peer_result_string(const isc_nmhandle_t *handle);
+
+void
+isc__nm_async_tlsdnscycle(isc__networker_t *worker, isc__netievent_t *ev0);
+void
+isc__nm_async_tlsdnsaccept(isc__networker_t *worker, isc__netievent_t *ev0);
+void
+isc__nm_async_tlsdnsconnect(isc__networker_t *worker, isc__netievent_t *ev0);
+void
+isc__nm_async_tlsdnslisten(isc__networker_t *worker, isc__netievent_t *ev0);
+void
+isc__nm_async_tlsdnscancel(isc__networker_t *worker, isc__netievent_t *ev0);
+void
+isc__nm_async_tlsdnsclose(isc__networker_t *worker, isc__netievent_t *ev0);
+void
+isc__nm_async_tlsdnssend(isc__networker_t *worker, isc__netievent_t *ev0);
+void
+isc__nm_async_tlsdnsstop(isc__networker_t *worker, isc__netievent_t *ev0);
+void
+isc__nm_async_tlsdnsshutdown(isc__networker_t *worker, isc__netievent_t *ev0);
+void
+isc__nm_async_tlsdnsread(isc__networker_t *worker, isc__netievent_t *ev0);
+void
+isc__nm_async_tlsdns_set_tlsctx(isc_nmsocket_t *listener, isc_tlsctx_t *tlsctx,
+ const int tid);
+/*%<
+ * Callback handlers for asynchronous TLSDNS events.
+ */
+
+isc_result_t
+isc__nm_tlsdns_xfr_checkperm(isc_nmsocket_t *sock);
+/*%<
+ * Check if it is permitted to do a zone transfer over the given TLSDNS
+ * socket.
+ *
+ * Returns:
+ * \li #ISC_R_SUCCESS Success, permission check passed successfully
+ * \li #ISC_R_DOTALPNERROR No permission because of ALPN tag mismatch
+ * \li any other result indicates failure (i.e. no permission)
+ *
+ * Requires:
+ * \li 'sock' is a valid TLSDNS socket.
+ */
+
+void
+isc__nm_tlsdns_cleanup_data(isc_nmsocket_t *sock);
+
+#if HAVE_LIBNGHTTP2
+void
+isc__nm_tls_send(isc_nmhandle_t *handle, const isc_region_t *region,
+ isc_nm_cb_t cb, void *cbarg);
+
+void
+isc__nm_tls_cancelread(isc_nmhandle_t *handle);
+
+/*%<
+ * Back-end implementation of isc_nm_send() for TLSDNS handles.
+ */
+
+void
+isc__nm_tls_read(isc_nmhandle_t *handle, isc_nm_recv_cb_t cb, void *cbarg);
+
+void
+isc__nm_tls_close(isc_nmsocket_t *sock);
+/*%<
+ * Close a TLS socket.
+ */
+
+void
+isc__nm_tls_pauseread(isc_nmhandle_t *handle);
+/*%<
+ * Pause reading on this handle, while still remembering the callback.
+ */
+
+void
+isc__nm_tls_resumeread(isc_nmhandle_t *handle);
+/*%<
+ * Resume reading from the handle.
+ *
+ */
+
+void
+isc__nm_tls_cleanup_data(isc_nmsocket_t *sock);
+
+void
+isc__nm_tls_stoplistening(isc_nmsocket_t *sock);
+
+void
+isc__nm_tls_settimeout(isc_nmhandle_t *handle, uint32_t timeout);
+void
+isc__nm_tls_cleartimeout(isc_nmhandle_t *handle);
+/*%<
+ * Set the read timeout and reset the timer for the socket
+ * associated with 'handle', and the TCP socket it wraps
+ * around.
+ */
+
+const char *
+isc__nm_tls_verify_tls_peer_result_string(const isc_nmhandle_t *handle);
+
+void
+isc__nmhandle_tls_keepalive(isc_nmhandle_t *handle, bool value);
+/*%<
+ * Set the keepalive value on the underlying TCP handle.
+ */
+
+void
+isc__nm_async_tls_set_tlsctx(isc_nmsocket_t *listener, isc_tlsctx_t *tlsctx,
+ const int tid);
+
+void
+isc__nmhandle_tls_setwritetimeout(isc_nmhandle_t *handle,
+ uint64_t write_timeout);
+
+void
+isc__nm_http_stoplistening(isc_nmsocket_t *sock);
+
+void
+isc__nm_http_settimeout(isc_nmhandle_t *handle, uint32_t timeout);
+void
+isc__nm_http_cleartimeout(isc_nmhandle_t *handle);
+/*%<
+ * Set the read timeout and reset the timer for the socket
+ * associated with 'handle', and the TLS/TCP socket it wraps
+ * around.
+ */
+
+void
+isc__nmhandle_http_keepalive(isc_nmhandle_t *handle, bool value);
+/*%<
+ * Set the keepalive value on the underlying session handle
+ */
+
+void
+isc__nm_http_initsocket(isc_nmsocket_t *sock);
+
+void
+isc__nm_http_cleanup_data(isc_nmsocket_t *sock);
+
+isc_result_t
+isc__nm_http_request(isc_nmhandle_t *handle, isc_region_t *region,
+ isc_nm_recv_cb_t reply_cb, void *cbarg);
+
+void
+isc__nm_http_send(isc_nmhandle_t *handle, const isc_region_t *region,
+ isc_nm_cb_t cb, void *cbarg);
+
+void
+isc__nm_http_read(isc_nmhandle_t *handle, isc_nm_recv_cb_t cb, void *cbarg);
+
+void
+isc__nm_http_close(isc_nmsocket_t *sock);
+
+void
+isc__nm_http_bad_request(isc_nmhandle_t *handle);
+/*%<
+ * Respond to the request with 400 "Bad Request" status.
+ *
+ * Requires:
+ * \li 'handle' is a valid HTTP netmgr handle object, referencing a server-side
+ * socket
+ */
+
+bool
+isc__nm_http_has_encryption(const isc_nmhandle_t *handle);
+
+void
+isc__nm_http_set_maxage(isc_nmhandle_t *handle, const uint32_t ttl);
+
+const char *
+isc__nm_http_verify_tls_peer_result_string(const isc_nmhandle_t *handle);
+
+void
+isc__nm_async_httpsend(isc__networker_t *worker, isc__netievent_t *ev0);
+
+void
+isc__nm_async_httpclose(isc__networker_t *worker, isc__netievent_t *ev0);
+
+void
+isc__nm_async_httpendpoints(isc__networker_t *worker, isc__netievent_t *ev0);
+
+bool
+isc__nm_parse_httpquery(const char *query_string, const char **start,
+ size_t *len);
+
+char *
+isc__nm_base64url_to_base64(isc_mem_t *mem, const char *base64url,
+ const size_t base64url_len, size_t *res_len);
+
+char *
+isc__nm_base64_to_base64url(isc_mem_t *mem, const char *base64,
+ const size_t base64_len, size_t *res_len);
+
+void
+isc__nm_httpsession_attach(isc_nm_http_session_t *source,
+ isc_nm_http_session_t **targetp);
+void
+isc__nm_httpsession_detach(isc_nm_http_session_t **sessionp);
+
+void
+isc__nm_http_set_tlsctx(isc_nmsocket_t *sock, isc_tlsctx_t *tlsctx);
+
+void
+isc__nm_http_set_max_streams(isc_nmsocket_t *listener,
+ const uint32_t max_concurrent_streams);
+
+#endif
+
+void
+isc__nm_async_settlsctx(isc__networker_t *worker, isc__netievent_t *ev0);
+
+#define isc__nm_uverr2result(x) \
+ isc___nm_uverr2result(x, true, __FILE__, __LINE__, __func__)
+isc_result_t
+isc___nm_uverr2result(int uverr, bool dolog, const char *file,
+ unsigned int line, const char *func);
+/*%<
+ * Convert a libuv error value into an isc_result_t. The
+ * list of supported error values is not complete; new users
+ * of this function should add any expected errors that are
+ * not already there.
+ */
+
+bool
+isc__nm_acquire_interlocked(isc_nm_t *mgr);
+/*%<
+ * Try to acquire interlocked state; return true if successful.
+ */
+
+void
+isc__nm_drop_interlocked(isc_nm_t *mgr);
+/*%<
+ * Drop interlocked state; signal waiters.
+ */
+
+void
+isc__nm_acquire_interlocked_force(isc_nm_t *mgr);
+/*%<
+ * Actively wait for interlocked state.
+ */
+
+void
+isc__nm_async_sockstop(isc__networker_t *worker, isc__netievent_t *ev0);
+
+void
+isc__nm_incstats(isc_nmsocket_t *sock, isc__nm_statid_t id);
+/*%<
+ * Increment socket-related statistics counters.
+ */
+
+void
+isc__nm_decstats(isc_nmsocket_t *sock, isc__nm_statid_t id);
+/*%<
+ * Decrement socket-related statistics counters.
+ */
+
+isc_result_t
+isc__nm_socket(int domain, int type, int protocol, uv_os_sock_t *sockp);
+/*%<
+ * Platform independent socket() version
+ */
+
+void
+isc__nm_closesocket(uv_os_sock_t sock);
+/*%<
+ * Platform independent closesocket() version
+ */
+
+isc_result_t
+isc__nm_socket_freebind(uv_os_sock_t fd, sa_family_t sa_family);
+/*%<
+ * Set the IP_FREEBIND (or equivalent) socket option on the uv_handle
+ */
+
+isc_result_t
+isc__nm_socket_reuse(uv_os_sock_t fd);
+/*%<
+ * Set the SO_REUSEADDR or SO_REUSEPORT (or equivalent) socket option on the fd
+ */
+
+isc_result_t
+isc__nm_socket_reuse_lb(uv_os_sock_t fd);
+/*%<
+ * Set the SO_REUSEPORT_LB (or equivalent) socket option on the fd
+ */
+
+isc_result_t
+isc__nm_socket_incoming_cpu(uv_os_sock_t fd);
+/*%<
+ * Set the SO_INCOMING_CPU socket option on the fd if available
+ */
+
+isc_result_t
+isc__nm_socket_disable_pmtud(uv_os_sock_t fd, sa_family_t sa_family);
+/*%<
+ * Disable the Path MTU Discovery, either by disabling IP(V6)_DONTFRAG socket
+ * option, or setting the IP(V6)_MTU_DISCOVER socket option to IP_PMTUDISC_OMIT
+ */
+
+isc_result_t
+isc__nm_socket_v6only(uv_os_sock_t fd, sa_family_t sa_family);
+/*%<
+ * Restrict the socket to sending and receiving IPv6 packets only
+ */
+
+isc_result_t
+isc__nm_socket_connectiontimeout(uv_os_sock_t fd, int timeout_ms);
+/*%<
+ * Set the connection timeout in milliseconds, on non-Linux platforms,
+ * the minimum value must be at least 1000 (1 second).
+ */
+
+isc_result_t
+isc__nm_socket_tcp_nodelay(uv_os_sock_t fd);
+/*%<
+ * Disables Nagle's algorithm on a TCP socket (sets TCP_NODELAY).
+ */
+
+isc_result_t
+isc__nm_socket_tcp_maxseg(uv_os_sock_t fd, int size);
+/*%<
+ * Set the TCP maximum segment size
+ */
+
+isc_result_t
+isc__nm_socket_min_mtu(uv_os_sock_t fd, sa_family_t sa_family);
+/*%<
+ * Use minimum MTU on IPv6 sockets
+ */
+
+void
+isc__nm_set_network_buffers(isc_nm_t *nm, uv_handle_t *handle);
+/*%>
+ * Sets the pre-configured network buffers size on the handle.
+ */
+
+void
+isc__nmsocket_barrier_init(isc_nmsocket_t *listener);
+/*%>
+ * Initialise the socket synchronisation barrier according to the
+ * number of children.
+ */
+
+void
+isc__nmsocket_stop(isc_nmsocket_t *listener);
+/*%>
+ * Broadcast "stop" event for a listener socket across all workers and
+ * wait its processing completion - then, stop and close the underlying
+ * transport listener socket.
+ *
+ * The primitive is used in multi-layer transport listener sockets to
+ * implement shutdown properly: after the broadcasted events has been
+ * processed it is safe to destroy the shared data within the listener
+ * socket (including shutting down the underlying transport listener
+ * socket).
+ */
+
+/*
+ * typedef all the netievent types
+ */
+
+NETIEVENT_SOCKET_TYPE(close);
+NETIEVENT_SOCKET_TYPE(tcpclose);
+NETIEVENT_SOCKET_TYPE(tcplisten);
+NETIEVENT_SOCKET_TYPE(tcppauseread);
+NETIEVENT_SOCKET_TYPE(tcpstop);
+NETIEVENT_SOCKET_TYPE(tlsclose);
+/* NETIEVENT_SOCKET_TYPE(tlsconnect); */ /* unique type, defined independently
+ */
+NETIEVENT_SOCKET_TYPE(tlsdobio);
+NETIEVENT_SOCKET_TYPE(tlsstartread);
+NETIEVENT_SOCKET_HANDLE_TYPE(tlscancel);
+NETIEVENT_SOCKET_TYPE(udpclose);
+NETIEVENT_SOCKET_TYPE(udplisten);
+NETIEVENT_SOCKET_TYPE(udpread);
+/* NETIEVENT_SOCKET_TYPE(udpsend); */ /* unique type, defined independently */
+NETIEVENT_SOCKET_TYPE(udpstop);
+
+NETIEVENT_SOCKET_TYPE(tcpdnsclose);
+NETIEVENT_SOCKET_TYPE(tcpdnsread);
+NETIEVENT_SOCKET_TYPE(tcpdnsstop);
+NETIEVENT_SOCKET_TYPE(tcpdnslisten);
+NETIEVENT_SOCKET_REQ_TYPE(tcpdnsconnect);
+NETIEVENT_SOCKET_REQ_TYPE(tcpdnssend);
+NETIEVENT_SOCKET_HANDLE_TYPE(tcpdnscancel);
+NETIEVENT_SOCKET_QUOTA_TYPE(tcpdnsaccept);
+
+NETIEVENT_SOCKET_TYPE(tlsdnsclose);
+NETIEVENT_SOCKET_TYPE(tlsdnsread);
+NETIEVENT_SOCKET_TYPE(tlsdnsstop);
+NETIEVENT_SOCKET_TYPE(tlsdnsshutdown);
+NETIEVENT_SOCKET_TYPE(tlsdnslisten);
+NETIEVENT_SOCKET_REQ_TYPE(tlsdnsconnect);
+NETIEVENT_SOCKET_REQ_TYPE(tlsdnssend);
+NETIEVENT_SOCKET_HANDLE_TYPE(tlsdnscancel);
+NETIEVENT_SOCKET_QUOTA_TYPE(tlsdnsaccept);
+NETIEVENT_SOCKET_TYPE(tlsdnscycle);
+
+#ifdef HAVE_LIBNGHTTP2
+NETIEVENT_SOCKET_REQ_TYPE(httpsend);
+NETIEVENT_SOCKET_TYPE(httpclose);
+NETIEVENT_SOCKET_HTTP_EPS_TYPE(httpendpoints);
+#endif /* HAVE_LIBNGHTTP2 */
+
+NETIEVENT_SOCKET_REQ_TYPE(tcpconnect);
+NETIEVENT_SOCKET_REQ_TYPE(tcpsend);
+NETIEVENT_SOCKET_TYPE(tcpstartread);
+NETIEVENT_SOCKET_REQ_TYPE(tlssend);
+NETIEVENT_SOCKET_REQ_TYPE(udpconnect);
+
+NETIEVENT_SOCKET_REQ_TYPE(routeconnect);
+
+NETIEVENT_SOCKET_REQ_RESULT_TYPE(connectcb);
+NETIEVENT_SOCKET_REQ_RESULT_TYPE(readcb);
+NETIEVENT_SOCKET_REQ_RESULT_TYPE(sendcb);
+
+NETIEVENT_SOCKET_HANDLE_TYPE(detach);
+NETIEVENT_SOCKET_HANDLE_TYPE(tcpcancel);
+NETIEVENT_SOCKET_HANDLE_TYPE(udpcancel);
+
+NETIEVENT_SOCKET_QUOTA_TYPE(tcpaccept);
+
+NETIEVENT_TYPE(pause);
+NETIEVENT_TYPE(resume);
+NETIEVENT_TYPE(shutdown);
+NETIEVENT_TYPE(stop);
+
+NETIEVENT_TASK_TYPE(task);
+NETIEVENT_TASK_TYPE(privilegedtask);
+
+NETIEVENT_SOCKET_TLSCTX_TYPE(settlsctx);
+NETIEVENT_SOCKET_TYPE(sockstop);
+
+/* Now declared the helper functions */
+
+NETIEVENT_SOCKET_DECL(close);
+NETIEVENT_SOCKET_DECL(tcpclose);
+NETIEVENT_SOCKET_DECL(tcplisten);
+NETIEVENT_SOCKET_DECL(tcppauseread);
+NETIEVENT_SOCKET_DECL(tcpstartread);
+NETIEVENT_SOCKET_DECL(tcpstop);
+NETIEVENT_SOCKET_DECL(tlsclose);
+NETIEVENT_SOCKET_DECL(tlsconnect);
+NETIEVENT_SOCKET_DECL(tlsdobio);
+NETIEVENT_SOCKET_DECL(tlsstartread);
+NETIEVENT_SOCKET_HANDLE_DECL(tlscancel);
+NETIEVENT_SOCKET_DECL(udpclose);
+NETIEVENT_SOCKET_DECL(udplisten);
+NETIEVENT_SOCKET_DECL(udpread);
+NETIEVENT_SOCKET_DECL(udpsend);
+NETIEVENT_SOCKET_DECL(udpstop);
+
+NETIEVENT_SOCKET_DECL(tcpdnsclose);
+NETIEVENT_SOCKET_DECL(tcpdnsread);
+NETIEVENT_SOCKET_DECL(tcpdnsstop);
+NETIEVENT_SOCKET_DECL(tcpdnslisten);
+NETIEVENT_SOCKET_REQ_DECL(tcpdnsconnect);
+NETIEVENT_SOCKET_REQ_DECL(tcpdnssend);
+NETIEVENT_SOCKET_HANDLE_DECL(tcpdnscancel);
+NETIEVENT_SOCKET_QUOTA_DECL(tcpdnsaccept);
+
+NETIEVENT_SOCKET_DECL(tlsdnsclose);
+NETIEVENT_SOCKET_DECL(tlsdnsread);
+NETIEVENT_SOCKET_DECL(tlsdnsstop);
+NETIEVENT_SOCKET_DECL(tlsdnsshutdown);
+NETIEVENT_SOCKET_DECL(tlsdnslisten);
+NETIEVENT_SOCKET_REQ_DECL(tlsdnsconnect);
+NETIEVENT_SOCKET_REQ_DECL(tlsdnssend);
+NETIEVENT_SOCKET_HANDLE_DECL(tlsdnscancel);
+NETIEVENT_SOCKET_QUOTA_DECL(tlsdnsaccept);
+NETIEVENT_SOCKET_DECL(tlsdnscycle);
+
+#ifdef HAVE_LIBNGHTTP2
+NETIEVENT_SOCKET_REQ_DECL(httpsend);
+NETIEVENT_SOCKET_DECL(httpclose);
+NETIEVENT_SOCKET_HTTP_EPS_DECL(httpendpoints);
+#endif /* HAVE_LIBNGHTTP2 */
+
+NETIEVENT_SOCKET_REQ_DECL(tcpconnect);
+NETIEVENT_SOCKET_REQ_DECL(tcpsend);
+NETIEVENT_SOCKET_REQ_DECL(tlssend);
+NETIEVENT_SOCKET_REQ_DECL(udpconnect);
+
+NETIEVENT_SOCKET_REQ_DECL(routeconnect);
+
+NETIEVENT_SOCKET_REQ_RESULT_DECL(connectcb);
+NETIEVENT_SOCKET_REQ_RESULT_DECL(readcb);
+NETIEVENT_SOCKET_REQ_RESULT_DECL(sendcb);
+
+NETIEVENT_SOCKET_HANDLE_DECL(udpcancel);
+NETIEVENT_SOCKET_HANDLE_DECL(tcpcancel);
+NETIEVENT_SOCKET_DECL(detach);
+
+NETIEVENT_SOCKET_QUOTA_DECL(tcpaccept);
+
+NETIEVENT_DECL(pause);
+NETIEVENT_DECL(resume);
+NETIEVENT_DECL(shutdown);
+NETIEVENT_DECL(stop);
+
+NETIEVENT_TASK_DECL(task);
+NETIEVENT_TASK_DECL(privilegedtask);
+
+NETIEVENT_SOCKET_TLSCTX_DECL(settlsctx);
+NETIEVENT_SOCKET_DECL(sockstop);
+
+void
+isc__nm_udp_failed_read_cb(isc_nmsocket_t *sock, isc_result_t result);
+void
+isc__nm_tcp_failed_read_cb(isc_nmsocket_t *sock, isc_result_t result);
+void
+isc__nm_tcpdns_failed_read_cb(isc_nmsocket_t *sock, isc_result_t result);
+void
+isc__nm_tlsdns_failed_read_cb(isc_nmsocket_t *sock, isc_result_t result,
+ bool async);
+
+isc_result_t
+isc__nm_tcpdns_processbuffer(isc_nmsocket_t *sock);
+isc_result_t
+isc__nm_tlsdns_processbuffer(isc_nmsocket_t *sock);
+
+isc__nm_uvreq_t *
+isc__nm_get_read_req(isc_nmsocket_t *sock, isc_sockaddr_t *sockaddr);
+
+void
+isc__nm_alloc_cb(uv_handle_t *handle, size_t size, uv_buf_t *buf);
+
+void
+isc__nm_udp_read_cb(uv_udp_t *handle, ssize_t nrecv, const uv_buf_t *buf,
+ const struct sockaddr *addr, unsigned flags);
+void
+isc__nm_tcp_read_cb(uv_stream_t *stream, ssize_t nread, const uv_buf_t *buf);
+void
+isc__nm_tcpdns_read_cb(uv_stream_t *stream, ssize_t nread, const uv_buf_t *buf);
+void
+isc__nm_tlsdns_read_cb(uv_stream_t *stream, ssize_t nread, const uv_buf_t *buf);
+
+isc_result_t
+isc__nm_start_reading(isc_nmsocket_t *sock);
+void
+isc__nm_stop_reading(isc_nmsocket_t *sock);
+isc_result_t
+isc__nm_process_sock_buffer(isc_nmsocket_t *sock);
+void
+isc__nm_resume_processing(void *arg);
+bool
+isc__nmsocket_closing(isc_nmsocket_t *sock);
+bool
+isc__nm_closing(isc_nmsocket_t *sock);
+
+void
+isc__nm_alloc_dnsbuf(isc_nmsocket_t *sock, size_t len);
+
+void
+isc__nm_failed_send_cb(isc_nmsocket_t *sock, isc__nm_uvreq_t *req,
+ isc_result_t eresult);
+void
+isc__nm_failed_accept_cb(isc_nmsocket_t *sock, isc_result_t eresult);
+void
+isc__nm_failed_connect_cb(isc_nmsocket_t *sock, isc__nm_uvreq_t *req,
+ isc_result_t eresult, bool async);
+void
+isc__nm_failed_read_cb(isc_nmsocket_t *sock, isc_result_t result, bool async);
+
+void
+isc__nm_accept_connection_log(isc_result_t result, bool can_log_quota);
+
+/*
+ * Timeout callbacks
+ */
+void
+isc__nmsocket_connecttimeout_cb(uv_timer_t *timer);
+void
+isc__nmsocket_readtimeout_cb(uv_timer_t *timer);
+void
+isc__nmsocket_writetimeout_cb(void *data, isc_result_t eresult);
+
+#define UV_RUNTIME_CHECK(func, ret) \
+ if (ret != 0) { \
+ FATAL_ERROR("%s failed: %s\n", #func, uv_strerror(ret)); \
+ }
+
+void
+isc__nmsocket_log_tls_session_reuse(isc_nmsocket_t *sock, isc_tls_t *tls);
diff --git a/lib/isc/netmgr/netmgr.c b/lib/isc/netmgr/netmgr.c
new file mode 100644
index 0000000..b19d468
--- /dev/null
+++ b/lib/isc/netmgr/netmgr.c
@@ -0,0 +1,3991 @@
+/*
+ * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
+ *
+ * SPDX-License-Identifier: MPL-2.0
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, you can obtain one at https://mozilla.org/MPL/2.0/.
+ *
+ * See the COPYRIGHT file distributed with this work for additional
+ * information regarding copyright ownership.
+ */
+
+#include <inttypes.h>
+#include <unistd.h>
+#include <uv.h>
+
+#include <isc/atomic.h>
+#include <isc/backtrace.h>
+#include <isc/barrier.h>
+#include <isc/buffer.h>
+#include <isc/condition.h>
+#include <isc/errno.h>
+#include <isc/list.h>
+#include <isc/log.h>
+#include <isc/magic.h>
+#include <isc/mem.h>
+#include <isc/netmgr.h>
+#include <isc/print.h>
+#include <isc/quota.h>
+#include <isc/random.h>
+#include <isc/refcount.h>
+#include <isc/region.h>
+#include <isc/result.h>
+#include <isc/sockaddr.h>
+#include <isc/stats.h>
+#include <isc/task.h>
+#include <isc/thread.h>
+#include <isc/tls.h>
+#include <isc/util.h>
+
+#include "netmgr-int.h"
+#include "netmgr_p.h"
+#include "openssl_shim.h"
+#include "trampoline_p.h"
+#include "uv-compat.h"
+
+/*%
+ * How many isc_nmhandles and isc_nm_uvreqs will we be
+ * caching for reuse in a socket.
+ */
+#define ISC_NM_HANDLES_STACK_SIZE 600
+#define ISC_NM_REQS_STACK_SIZE 600
+
+/*%
+ * Shortcut index arrays to get access to statistics counters.
+ */
+
+static const isc_statscounter_t udp4statsindex[] = {
+ isc_sockstatscounter_udp4open,
+ isc_sockstatscounter_udp4openfail,
+ isc_sockstatscounter_udp4close,
+ isc_sockstatscounter_udp4bindfail,
+ isc_sockstatscounter_udp4connectfail,
+ isc_sockstatscounter_udp4connect,
+ -1,
+ -1,
+ isc_sockstatscounter_udp4sendfail,
+ isc_sockstatscounter_udp4recvfail,
+ isc_sockstatscounter_udp4active
+};
+
+static const isc_statscounter_t udp6statsindex[] = {
+ isc_sockstatscounter_udp6open,
+ isc_sockstatscounter_udp6openfail,
+ isc_sockstatscounter_udp6close,
+ isc_sockstatscounter_udp6bindfail,
+ isc_sockstatscounter_udp6connectfail,
+ isc_sockstatscounter_udp6connect,
+ -1,
+ -1,
+ isc_sockstatscounter_udp6sendfail,
+ isc_sockstatscounter_udp6recvfail,
+ isc_sockstatscounter_udp6active
+};
+
+static const isc_statscounter_t tcp4statsindex[] = {
+ isc_sockstatscounter_tcp4open, isc_sockstatscounter_tcp4openfail,
+ isc_sockstatscounter_tcp4close, isc_sockstatscounter_tcp4bindfail,
+ isc_sockstatscounter_tcp4connectfail, isc_sockstatscounter_tcp4connect,
+ isc_sockstatscounter_tcp4acceptfail, isc_sockstatscounter_tcp4accept,
+ isc_sockstatscounter_tcp4sendfail, isc_sockstatscounter_tcp4recvfail,
+ isc_sockstatscounter_tcp4active
+};
+
+static const isc_statscounter_t tcp6statsindex[] = {
+ isc_sockstatscounter_tcp6open, isc_sockstatscounter_tcp6openfail,
+ isc_sockstatscounter_tcp6close, isc_sockstatscounter_tcp6bindfail,
+ isc_sockstatscounter_tcp6connectfail, isc_sockstatscounter_tcp6connect,
+ isc_sockstatscounter_tcp6acceptfail, isc_sockstatscounter_tcp6accept,
+ isc_sockstatscounter_tcp6sendfail, isc_sockstatscounter_tcp6recvfail,
+ isc_sockstatscounter_tcp6active
+};
+
+#if 0
+/* XXX: not currently used */
+static const isc_statscounter_t unixstatsindex[] = {
+ isc_sockstatscounter_unixopen,
+ isc_sockstatscounter_unixopenfail,
+ isc_sockstatscounter_unixclose,
+ isc_sockstatscounter_unixbindfail,
+ isc_sockstatscounter_unixconnectfail,
+ isc_sockstatscounter_unixconnect,
+ isc_sockstatscounter_unixacceptfail,
+ isc_sockstatscounter_unixaccept,
+ isc_sockstatscounter_unixsendfail,
+ isc_sockstatscounter_unixrecvfail,
+ isc_sockstatscounter_unixactive
+};
+#endif /* if 0 */
+
+/*
+ * libuv is not thread safe, but has mechanisms to pass messages
+ * between threads. Each socket is owned by a thread. For UDP
+ * sockets we have a set of sockets for each interface and we can
+ * choose a sibling and send the message directly. For TCP, or if
+ * we're calling from a non-networking thread, we need to pass the
+ * request using async_cb.
+ */
+
+static thread_local int isc__nm_tid_v = ISC_NETMGR_TID_UNKNOWN;
+
+static void
+nmsocket_maybe_destroy(isc_nmsocket_t *sock FLARG);
+static void
+nmhandle_free(isc_nmsocket_t *sock, isc_nmhandle_t *handle);
+static isc_threadresult_t
+nm_thread(isc_threadarg_t worker0);
+static void
+async_cb(uv_async_t *handle);
+
+static bool
+process_netievent(isc__networker_t *worker, isc__netievent_t *ievent);
+static isc_result_t
+process_queue(isc__networker_t *worker, netievent_type_t type);
+static void
+wait_for_priority_queue(isc__networker_t *worker);
+static void
+drain_queue(isc__networker_t *worker, netievent_type_t type);
+
+static void
+isc__nm_async_stop(isc__networker_t *worker, isc__netievent_t *ev0);
+static void
+isc__nm_async_pause(isc__networker_t *worker, isc__netievent_t *ev0);
+static void
+isc__nm_async_resume(isc__networker_t *worker, isc__netievent_t *ev0);
+static void
+isc__nm_async_detach(isc__networker_t *worker, isc__netievent_t *ev0);
+static void
+isc__nm_async_close(isc__networker_t *worker, isc__netievent_t *ev0);
+
+static void
+isc__nm_threadpool_initialize(uint32_t workers);
+static void
+isc__nm_work_cb(uv_work_t *req);
+static void
+isc__nm_after_work_cb(uv_work_t *req, int status);
+
+/*%<
+ * Issue a 'handle closed' callback on the socket.
+ */
+
+static void
+nmhandle_detach_cb(isc_nmhandle_t **handlep FLARG);
+
+int
+isc_nm_tid(void) {
+ return (isc__nm_tid_v);
+}
+
+bool
+isc__nm_in_netthread(void) {
+ return (isc__nm_tid_v >= 0);
+}
+
+void
+isc__nm_force_tid(int tid) {
+ isc__nm_tid_v = tid;
+}
+
+static void
+isc__nm_threadpool_initialize(uint32_t workers) {
+ char buf[11];
+ int r = uv_os_getenv("UV_THREADPOOL_SIZE", buf,
+ &(size_t){ sizeof(buf) });
+ if (r == UV_ENOENT) {
+ snprintf(buf, sizeof(buf), "%" PRIu32, workers);
+ uv_os_setenv("UV_THREADPOOL_SIZE", buf);
+ }
+}
+
+#if HAVE_DECL_UV_UDP_LINUX_RECVERR
+#define MINIMAL_UV_VERSION UV_VERSION(1, 42, 0)
+#elif HAVE_DECL_UV_UDP_MMSG_FREE
+#define MINIMAL_UV_VERSION UV_VERSION(1, 40, 0)
+#elif HAVE_DECL_UV_UDP_RECVMMSG
+#define MAXIMAL_UV_VERSION UV_VERSION(1, 39, 99)
+#define MINIMAL_UV_VERSION UV_VERSION(1, 37, 0)
+#else
+#define MAXIMAL_UV_VERSION UV_VERSION(1, 34, 99)
+#define MINIMAL_UV_VERSION UV_VERSION(1, 0, 0)
+#endif
+
+void
+isc__netmgr_create(isc_mem_t *mctx, uint32_t workers, isc_nm_t **netmgrp) {
+ isc_nm_t *mgr = NULL;
+ char name[32];
+
+ REQUIRE(workers > 0);
+
+#ifdef MAXIMAL_UV_VERSION
+ if (uv_version() > MAXIMAL_UV_VERSION) {
+ FATAL_ERROR("libuv version too new: running with libuv %s "
+ "when compiled with libuv %s will lead to "
+ "libuv failures",
+ uv_version_string(), UV_VERSION_STRING);
+ }
+#endif /* MAXIMAL_UV_VERSION */
+
+ if (uv_version() < MINIMAL_UV_VERSION) {
+ FATAL_ERROR("libuv version too old: running with libuv %s "
+ "when compiled with libuv %s will lead to "
+ "libuv failures",
+ uv_version_string(), UV_VERSION_STRING);
+ }
+
+ isc__nm_threadpool_initialize(workers);
+
+ mgr = isc_mem_get(mctx, sizeof(*mgr));
+ *mgr = (isc_nm_t){ .nworkers = workers };
+
+ isc_mem_attach(mctx, &mgr->mctx);
+ isc_mutex_init(&mgr->lock);
+ isc_condition_init(&mgr->wkstatecond);
+ isc_condition_init(&mgr->wkpausecond);
+ isc_refcount_init(&mgr->references, 1);
+ atomic_init(&mgr->maxudp, 0);
+ atomic_init(&mgr->interlocked, ISC_NETMGR_NON_INTERLOCKED);
+ atomic_init(&mgr->workers_paused, 0);
+ atomic_init(&mgr->paused, false);
+ atomic_init(&mgr->closing, false);
+ atomic_init(&mgr->recv_tcp_buffer_size, 0);
+ atomic_init(&mgr->send_tcp_buffer_size, 0);
+ atomic_init(&mgr->recv_udp_buffer_size, 0);
+ atomic_init(&mgr->send_udp_buffer_size, 0);
+#if HAVE_SO_REUSEPORT_LB
+ mgr->load_balance_sockets = true;
+#else
+ mgr->load_balance_sockets = false;
+#endif
+
+#ifdef NETMGR_TRACE
+ ISC_LIST_INIT(mgr->active_sockets);
+#endif
+
+ /*
+ * Default TCP timeout values.
+ * May be updated by isc_nm_tcptimeouts().
+ */
+ atomic_init(&mgr->init, 30000);
+ atomic_init(&mgr->idle, 30000);
+ atomic_init(&mgr->keepalive, 30000);
+ atomic_init(&mgr->advertised, 30000);
+
+ isc_barrier_init(&mgr->pausing, workers);
+ isc_barrier_init(&mgr->resuming, workers);
+
+ mgr->workers = isc_mem_get(mctx, workers * sizeof(isc__networker_t));
+ for (size_t i = 0; i < workers; i++) {
+ isc__networker_t *worker = &mgr->workers[i];
+ int r;
+
+ *worker = (isc__networker_t){
+ .mgr = mgr,
+ .id = i,
+ };
+
+ r = uv_loop_init(&worker->loop);
+ UV_RUNTIME_CHECK(uv_loop_init, r);
+
+ worker->loop.data = &mgr->workers[i];
+
+ r = uv_async_init(&worker->loop, &worker->async, async_cb);
+ UV_RUNTIME_CHECK(uv_async_init, r);
+
+ for (size_t type = 0; type < NETIEVENT_MAX; type++) {
+ isc_mutex_init(&worker->ievents[type].lock);
+ isc_condition_init(&worker->ievents[type].cond);
+ ISC_LIST_INIT(worker->ievents[type].list);
+ }
+
+ worker->recvbuf = isc_mem_get(mctx, ISC_NETMGR_RECVBUF_SIZE);
+ worker->sendbuf = isc_mem_get(mctx, ISC_NETMGR_SENDBUF_SIZE);
+
+ /*
+ * We need to do this here and not in nm_thread to avoid a
+ * race - we could exit isc_nm_start, launch nm_destroy,
+ * and nm_thread would still not be up.
+ */
+ mgr->workers_running++;
+ isc_thread_create(nm_thread, &mgr->workers[i], &worker->thread);
+
+ snprintf(name, sizeof(name), "isc-net-%04zu", i);
+ isc_thread_setname(worker->thread, name);
+ }
+
+ mgr->magic = NM_MAGIC;
+ *netmgrp = mgr;
+}
+
+/*
+ * Free the resources of the network manager.
+ */
+static void
+nm_destroy(isc_nm_t **mgr0) {
+ REQUIRE(VALID_NM(*mgr0));
+ REQUIRE(!isc__nm_in_netthread());
+
+ isc_nm_t *mgr = *mgr0;
+ *mgr0 = NULL;
+
+ isc_refcount_destroy(&mgr->references);
+
+ mgr->magic = 0;
+
+ for (int i = 0; i < mgr->nworkers; i++) {
+ isc__networker_t *worker = &mgr->workers[i];
+ isc__netievent_t *event = isc__nm_get_netievent_stop(mgr);
+ isc__nm_enqueue_ievent(worker, event);
+ }
+
+ LOCK(&mgr->lock);
+ while (mgr->workers_running > 0) {
+ WAIT(&mgr->wkstatecond, &mgr->lock);
+ }
+ UNLOCK(&mgr->lock);
+
+ for (int i = 0; i < mgr->nworkers; i++) {
+ isc__networker_t *worker = &mgr->workers[i];
+ int r;
+
+ r = uv_loop_close(&worker->loop);
+ UV_RUNTIME_CHECK(uv_loop_close, r);
+
+ for (size_t type = 0; type < NETIEVENT_MAX; type++) {
+ INSIST(ISC_LIST_EMPTY(worker->ievents[type].list));
+ isc_condition_destroy(&worker->ievents[type].cond);
+ isc_mutex_destroy(&worker->ievents[type].lock);
+ }
+
+ isc_mem_put(mgr->mctx, worker->sendbuf,
+ ISC_NETMGR_SENDBUF_SIZE);
+ isc_mem_put(mgr->mctx, worker->recvbuf,
+ ISC_NETMGR_RECVBUF_SIZE);
+ isc_thread_join(worker->thread, NULL);
+ }
+
+ if (mgr->stats != NULL) {
+ isc_stats_detach(&mgr->stats);
+ }
+
+ isc_barrier_destroy(&mgr->resuming);
+ isc_barrier_destroy(&mgr->pausing);
+
+ isc_condition_destroy(&mgr->wkstatecond);
+ isc_condition_destroy(&mgr->wkpausecond);
+ isc_mutex_destroy(&mgr->lock);
+
+ isc_mem_put(mgr->mctx, mgr->workers,
+ mgr->nworkers * sizeof(isc__networker_t));
+ isc_mem_putanddetach(&mgr->mctx, mgr, sizeof(*mgr));
+}
+
+static void
+enqueue_pause(isc__networker_t *worker) {
+ isc__netievent_pause_t *event =
+ isc__nm_get_netievent_pause(worker->mgr);
+ isc__nm_enqueue_ievent(worker, (isc__netievent_t *)event);
+}
+
+static void
+isc__nm_async_pause(isc__networker_t *worker, isc__netievent_t *ev0) {
+ UNUSED(ev0);
+ REQUIRE(worker->paused == false);
+
+ worker->paused = true;
+ uv_stop(&worker->loop);
+}
+
+void
+isc_nm_pause(isc_nm_t *mgr) {
+ REQUIRE(VALID_NM(mgr));
+ REQUIRE(!atomic_load(&mgr->paused));
+
+ isc__nm_acquire_interlocked_force(mgr);
+
+ if (isc__nm_in_netthread()) {
+ REQUIRE(isc_nm_tid() == 0);
+ }
+
+ for (int i = 0; i < mgr->nworkers; i++) {
+ isc__networker_t *worker = &mgr->workers[i];
+ if (i == isc_nm_tid()) {
+ isc__nm_async_pause(worker, NULL);
+ } else {
+ enqueue_pause(worker);
+ }
+ }
+
+ if (isc__nm_in_netthread()) {
+ atomic_fetch_add(&mgr->workers_paused, 1);
+ isc_barrier_wait(&mgr->pausing);
+ }
+
+ LOCK(&mgr->lock);
+ while (atomic_load(&mgr->workers_paused) != mgr->workers_running) {
+ WAIT(&mgr->wkstatecond, &mgr->lock);
+ }
+ UNLOCK(&mgr->lock);
+
+ atomic_compare_exchange_enforced(&mgr->paused, &(bool){ false }, true);
+}
+
+static void
+enqueue_resume(isc__networker_t *worker) {
+ isc__netievent_resume_t *event =
+ isc__nm_get_netievent_resume(worker->mgr);
+ isc__nm_enqueue_ievent(worker, (isc__netievent_t *)event);
+}
+
+static void
+isc__nm_async_resume(isc__networker_t *worker, isc__netievent_t *ev0) {
+ UNUSED(ev0);
+ REQUIRE(worker->paused == true);
+
+ worker->paused = false;
+}
+
+void
+isc_nm_resume(isc_nm_t *mgr) {
+ REQUIRE(VALID_NM(mgr));
+ REQUIRE(atomic_load(&mgr->paused));
+
+ if (isc__nm_in_netthread()) {
+ REQUIRE(isc_nm_tid() == 0);
+ drain_queue(&mgr->workers[isc_nm_tid()], NETIEVENT_PRIORITY);
+ }
+
+ for (int i = 0; i < mgr->nworkers; i++) {
+ isc__networker_t *worker = &mgr->workers[i];
+ if (i == isc_nm_tid()) {
+ isc__nm_async_resume(worker, NULL);
+ } else {
+ enqueue_resume(worker);
+ }
+ }
+
+ if (isc__nm_in_netthread()) {
+ drain_queue(&mgr->workers[isc_nm_tid()], NETIEVENT_PRIVILEGED);
+
+ atomic_fetch_sub(&mgr->workers_paused, 1);
+ isc_barrier_wait(&mgr->resuming);
+ }
+
+ LOCK(&mgr->lock);
+ while (atomic_load(&mgr->workers_paused) != 0) {
+ WAIT(&mgr->wkstatecond, &mgr->lock);
+ }
+ UNLOCK(&mgr->lock);
+
+ atomic_compare_exchange_enforced(&mgr->paused, &(bool){ true }, false);
+
+ isc__nm_drop_interlocked(mgr);
+}
+
+void
+isc_nm_attach(isc_nm_t *mgr, isc_nm_t **dst) {
+ REQUIRE(VALID_NM(mgr));
+ REQUIRE(dst != NULL && *dst == NULL);
+
+ isc_refcount_increment(&mgr->references);
+
+ *dst = mgr;
+}
+
+void
+isc_nm_detach(isc_nm_t **mgr0) {
+ isc_nm_t *mgr = NULL;
+
+ REQUIRE(mgr0 != NULL);
+ REQUIRE(VALID_NM(*mgr0));
+
+ mgr = *mgr0;
+ *mgr0 = NULL;
+
+ if (isc_refcount_decrement(&mgr->references) == 1) {
+ nm_destroy(&mgr);
+ }
+}
+
+void
+isc__netmgr_shutdown(isc_nm_t *mgr) {
+ REQUIRE(VALID_NM(mgr));
+
+ atomic_store(&mgr->closing, true);
+ for (int i = 0; i < mgr->nworkers; i++) {
+ isc__netievent_t *event = NULL;
+ event = isc__nm_get_netievent_shutdown(mgr);
+ isc__nm_enqueue_ievent(&mgr->workers[i], event);
+ }
+}
+
+void
+isc__netmgr_destroy(isc_nm_t **netmgrp) {
+ isc_nm_t *mgr = NULL;
+ int counter = 0;
+
+ REQUIRE(VALID_NM(*netmgrp));
+
+ mgr = *netmgrp;
+
+ /*
+ * Close active connections.
+ */
+ isc__netmgr_shutdown(mgr);
+
+ /*
+ * Wait for the manager to be dereferenced elsewhere.
+ */
+ while (isc_refcount_current(&mgr->references) > 1 && counter++ < 1000) {
+ uv_sleep(10);
+ }
+
+#ifdef NETMGR_TRACE
+ if (isc_refcount_current(&mgr->references) > 1) {
+ isc__nm_dump_active(mgr);
+ UNREACHABLE();
+ }
+#endif
+
+ /*
+ * Now just patiently wait
+ */
+ while (isc_refcount_current(&mgr->references) > 1) {
+ uv_sleep(10);
+ }
+
+ /*
+ * Detach final reference.
+ */
+ isc_nm_detach(netmgrp);
+}
+
+void
+isc_nm_maxudp(isc_nm_t *mgr, uint32_t maxudp) {
+ REQUIRE(VALID_NM(mgr));
+
+ atomic_store(&mgr->maxudp, maxudp);
+}
+
+void
+isc_nmhandle_setwritetimeout(isc_nmhandle_t *handle, uint64_t write_timeout) {
+ REQUIRE(VALID_NMHANDLE(handle));
+ REQUIRE(VALID_NMSOCK(handle->sock));
+ REQUIRE(handle->sock->tid == isc_nm_tid());
+
+ switch (handle->sock->type) {
+ case isc_nm_tcpsocket:
+ case isc_nm_udpsocket:
+ case isc_nm_tcpdnssocket:
+ case isc_nm_tlsdnssocket:
+ handle->sock->write_timeout = write_timeout;
+ break;
+#ifdef HAVE_LIBNGHTTP2
+ case isc_nm_tlssocket:
+ isc__nmhandle_tls_setwritetimeout(handle, write_timeout);
+ break;
+#endif /* HAVE_LIBNGHTTP2 */
+ default:
+ UNREACHABLE();
+ break;
+ }
+}
+
+void
+isc_nm_settimeouts(isc_nm_t *mgr, uint32_t init, uint32_t idle,
+ uint32_t keepalive, uint32_t advertised) {
+ REQUIRE(VALID_NM(mgr));
+
+ atomic_store(&mgr->init, init);
+ atomic_store(&mgr->idle, idle);
+ atomic_store(&mgr->keepalive, keepalive);
+ atomic_store(&mgr->advertised, advertised);
+}
+
+void
+isc_nm_setnetbuffers(isc_nm_t *mgr, int32_t recv_tcp, int32_t send_tcp,
+ int32_t recv_udp, int32_t send_udp) {
+ REQUIRE(VALID_NM(mgr));
+
+ atomic_store(&mgr->recv_tcp_buffer_size, recv_tcp);
+ atomic_store(&mgr->send_tcp_buffer_size, send_tcp);
+ atomic_store(&mgr->recv_udp_buffer_size, recv_udp);
+ atomic_store(&mgr->send_udp_buffer_size, send_udp);
+}
+
+bool
+isc_nm_getloadbalancesockets(isc_nm_t *mgr) {
+ REQUIRE(VALID_NM(mgr));
+
+ return (mgr->load_balance_sockets);
+}
+
+void
+isc_nm_setloadbalancesockets(isc_nm_t *mgr, bool enabled) {
+ REQUIRE(VALID_NM(mgr));
+
+#if HAVE_SO_REUSEPORT_LB
+ mgr->load_balance_sockets = enabled;
+#else
+ UNUSED(enabled);
+#endif
+}
+
+void
+isc_nm_gettimeouts(isc_nm_t *mgr, uint32_t *initial, uint32_t *idle,
+ uint32_t *keepalive, uint32_t *advertised) {
+ REQUIRE(VALID_NM(mgr));
+
+ if (initial != NULL) {
+ *initial = atomic_load(&mgr->init);
+ }
+
+ if (idle != NULL) {
+ *idle = atomic_load(&mgr->idle);
+ }
+
+ if (keepalive != NULL) {
+ *keepalive = atomic_load(&mgr->keepalive);
+ }
+
+ if (advertised != NULL) {
+ *advertised = atomic_load(&mgr->advertised);
+ }
+}
+
+/*
+ * nm_thread is a single worker thread, that runs uv_run event loop
+ * until asked to stop.
+ *
+ * There are four queues for asynchronous events:
+ *
+ * 1. priority queue - netievents on the priority queue are run even when
+ * the taskmgr enters exclusive mode and the netmgr is paused. This
+ * is needed to properly start listening on the interfaces, free
+ * resources on shutdown, or resume from a pause.
+ *
+ * 2. privileged task queue - only privileged tasks are queued here and
+ * this is the first queue that gets processed when network manager
+ * is unpaused using isc_nm_resume(). All netmgr workers need to
+ * clean the privileged task queue before they all proceed to normal
+ * operation. Both task queues are processed when the workers are
+ * shutting down.
+ *
+ * 3. task queue - only (traditional) tasks are scheduled here, and this
+ * queue and the privileged task queue are both processed when the
+ * netmgr workers are finishing. This is needed to process the task
+ * shutdown events.
+ *
+ * 4. normal queue - this is the queue with netmgr events, e.g. reading,
+ * sending, callbacks, etc.
+ */
+
+static isc_threadresult_t
+nm_thread(isc_threadarg_t worker0) {
+ isc__networker_t *worker = (isc__networker_t *)worker0;
+ isc_nm_t *mgr = worker->mgr;
+
+ isc__nm_tid_v = worker->id;
+
+ while (true) {
+ /*
+ * uv_run() runs async_cb() in a loop, which processes
+ * all four event queues until a "pause" or "stop" event
+ * is encountered. On pause, we process only priority and
+ * privileged events until resuming.
+ */
+ int r = uv_run(&worker->loop, UV_RUN_DEFAULT);
+ INSIST(r > 0 || worker->finished);
+
+ if (worker->paused) {
+ INSIST(atomic_load(&mgr->interlocked) != isc_nm_tid());
+
+ atomic_fetch_add(&mgr->workers_paused, 1);
+ if (isc_barrier_wait(&mgr->pausing) != 0) {
+ LOCK(&mgr->lock);
+ SIGNAL(&mgr->wkstatecond);
+ UNLOCK(&mgr->lock);
+ }
+
+ while (worker->paused) {
+ wait_for_priority_queue(worker);
+ }
+
+ /*
+ * All workers must drain the privileged event
+ * queue before we resume from pause.
+ */
+ drain_queue(worker, NETIEVENT_PRIVILEGED);
+
+ atomic_fetch_sub(&mgr->workers_paused, 1);
+ if (isc_barrier_wait(&mgr->resuming) != 0) {
+ LOCK(&mgr->lock);
+ SIGNAL(&mgr->wkstatecond);
+ UNLOCK(&mgr->lock);
+ }
+ }
+
+ if (r == 0) {
+ INSIST(worker->finished);
+ break;
+ }
+
+ INSIST(!worker->finished);
+ }
+
+ /*
+ * We are shutting down. Drain the queues.
+ */
+ drain_queue(worker, NETIEVENT_PRIVILEGED);
+ drain_queue(worker, NETIEVENT_TASK);
+
+ for (size_t type = 0; type < NETIEVENT_MAX; type++) {
+ LOCK(&worker->ievents[type].lock);
+ INSIST(ISC_LIST_EMPTY(worker->ievents[type].list));
+ UNLOCK(&worker->ievents[type].lock);
+ }
+
+ LOCK(&mgr->lock);
+ mgr->workers_running--;
+ SIGNAL(&mgr->wkstatecond);
+ UNLOCK(&mgr->lock);
+
+ return ((isc_threadresult_t)0);
+}
+
+static bool
+process_all_queues(isc__networker_t *worker) {
+ bool reschedule = false;
+ /*
+ * The queue processing functions will return false when the
+ * system is pausing or stopping and we don't want to process
+ * the other queues in such case, but we need the async event
+ * to be rescheduled in the next uv_run().
+ */
+ for (size_t type = 0; type < NETIEVENT_MAX; type++) {
+ isc_result_t result = process_queue(worker, type);
+ switch (result) {
+ case ISC_R_SUSPEND:
+ reschedule = true;
+ break;
+ case ISC_R_EMPTY:
+ /* empty queue */
+ break;
+ case ISC_R_SUCCESS:
+ reschedule = true;
+ break;
+ default:
+ UNREACHABLE();
+ }
+ }
+
+ return (reschedule);
+}
+
+/*
+ * async_cb() is a universal callback for 'async' events sent to event loop.
+ * It's the only way to safely pass data to the libuv event loop. We use a
+ * single async event and a set of lockless queues of 'isc__netievent_t'
+ * structures passed from other threads.
+ */
+static void
+async_cb(uv_async_t *handle) {
+ isc__networker_t *worker = (isc__networker_t *)handle->loop->data;
+
+ if (process_all_queues(worker)) {
+ /*
+ * If we didn't process all the events, we need to enqueue
+ * async_cb to be run in the next iteration of the uv_loop
+ */
+ uv_async_send(handle);
+ }
+}
+
+static void
+isc__nm_async_stop(isc__networker_t *worker, isc__netievent_t *ev0) {
+ UNUSED(ev0);
+ worker->finished = true;
+ /* Close the async handler */
+ uv_close((uv_handle_t *)&worker->async, NULL);
+}
+
+void
+isc_nm_task_enqueue(isc_nm_t *nm, isc_task_t *task, int threadid) {
+ isc__netievent_t *event = NULL;
+ int tid;
+ isc__networker_t *worker = NULL;
+
+ if (threadid == -1) {
+ tid = (int)isc_random_uniform(nm->nworkers);
+ } else {
+ tid = threadid % nm->nworkers;
+ }
+
+ worker = &nm->workers[tid];
+
+ if (isc_task_privileged(task)) {
+ event = (isc__netievent_t *)
+ isc__nm_get_netievent_privilegedtask(nm, task);
+ } else {
+ event = (isc__netievent_t *)isc__nm_get_netievent_task(nm,
+ task);
+ }
+
+ isc__nm_enqueue_ievent(worker, event);
+}
+
+#define isc__nm_async_privilegedtask(worker, ev0) \
+ isc__nm_async_task(worker, ev0)
+
+static void
+isc__nm_async_task(isc__networker_t *worker, isc__netievent_t *ev0) {
+ isc__netievent_task_t *ievent = (isc__netievent_task_t *)ev0;
+ isc_result_t result;
+
+ UNUSED(worker);
+
+ result = isc_task_run(ievent->task);
+
+ /*
+ * Tasks can block for a long time, especially when used by tools in
+ * interactive mode. Update the event loop's time to avoid unexpected
+ * errors when processing later events during the same callback.
+ * For example, newly started timers can fire too early, because the
+ * current time was stale. See the note about uv_update_time() in the
+ * https://docs.libuv.org/en/v1.x/timer.html#c.uv_timer_start page.
+ */
+ uv_update_time(&worker->loop);
+
+ switch (result) {
+ case ISC_R_QUOTA:
+ isc_task_ready(ievent->task);
+ return;
+ case ISC_R_SUCCESS:
+ return;
+ default:
+ UNREACHABLE();
+ }
+}
+
+static void
+wait_for_priority_queue(isc__networker_t *worker) {
+ isc_condition_t *cond = &worker->ievents[NETIEVENT_PRIORITY].cond;
+ isc_mutex_t *lock = &worker->ievents[NETIEVENT_PRIORITY].lock;
+ isc__netievent_list_t *list =
+ &(worker->ievents[NETIEVENT_PRIORITY].list);
+
+ LOCK(lock);
+ while (ISC_LIST_EMPTY(*list)) {
+ WAIT(cond, lock);
+ }
+ UNLOCK(lock);
+
+ drain_queue(worker, NETIEVENT_PRIORITY);
+}
+
+static void
+drain_queue(isc__networker_t *worker, netievent_type_t type) {
+ bool empty = false;
+ while (!empty) {
+ if (process_queue(worker, type) == ISC_R_EMPTY) {
+ LOCK(&worker->ievents[type].lock);
+ empty = ISC_LIST_EMPTY(worker->ievents[type].list);
+ UNLOCK(&worker->ievents[type].lock);
+ }
+ }
+}
+
+/*
+ * The two macros here generate the individual cases for the process_netievent()
+ * function. The NETIEVENT_CASE(type) macro is the common case, and
+ * NETIEVENT_CASE_NOMORE(type) is a macro that causes the loop in the
+ * process_queue() to stop, e.g. it's only used for the netievent that
+ * stops/pauses processing the enqueued netievents.
+ */
+#define NETIEVENT_CASE(type) \
+ case netievent_##type: { \
+ isc__nm_async_##type(worker, ievent); \
+ isc__nm_put_netievent_##type( \
+ worker->mgr, (isc__netievent_##type##_t *)ievent); \
+ return (true); \
+ }
+
+#define NETIEVENT_CASE_NOMORE(type) \
+ case netievent_##type: { \
+ isc__nm_async_##type(worker, ievent); \
+ isc__nm_put_netievent_##type(worker->mgr, ievent); \
+ return (false); \
+ }
+
+static bool
+process_netievent(isc__networker_t *worker, isc__netievent_t *ievent) {
+ REQUIRE(worker->id == isc_nm_tid());
+
+ switch (ievent->type) {
+ /* Don't process more ievents when we are stopping */
+ NETIEVENT_CASE_NOMORE(stop);
+
+ NETIEVENT_CASE(privilegedtask);
+ NETIEVENT_CASE(task);
+
+ NETIEVENT_CASE(udpconnect);
+ NETIEVENT_CASE(udplisten);
+ NETIEVENT_CASE(udpstop);
+ NETIEVENT_CASE(udpsend);
+ NETIEVENT_CASE(udpread);
+ NETIEVENT_CASE(udpcancel);
+ NETIEVENT_CASE(udpclose);
+
+ NETIEVENT_CASE(routeconnect);
+
+ NETIEVENT_CASE(tcpaccept);
+ NETIEVENT_CASE(tcpconnect);
+ NETIEVENT_CASE(tcplisten);
+ NETIEVENT_CASE(tcpstartread);
+ NETIEVENT_CASE(tcppauseread);
+ NETIEVENT_CASE(tcpsend);
+ NETIEVENT_CASE(tcpstop);
+ NETIEVENT_CASE(tcpcancel);
+ NETIEVENT_CASE(tcpclose);
+
+ NETIEVENT_CASE(tcpdnsaccept);
+ NETIEVENT_CASE(tcpdnslisten);
+ NETIEVENT_CASE(tcpdnsconnect);
+ NETIEVENT_CASE(tcpdnssend);
+ NETIEVENT_CASE(tcpdnscancel);
+ NETIEVENT_CASE(tcpdnsclose);
+ NETIEVENT_CASE(tcpdnsread);
+ NETIEVENT_CASE(tcpdnsstop);
+
+ NETIEVENT_CASE(tlsdnscycle);
+ NETIEVENT_CASE(tlsdnsaccept);
+ NETIEVENT_CASE(tlsdnslisten);
+ NETIEVENT_CASE(tlsdnsconnect);
+ NETIEVENT_CASE(tlsdnssend);
+ NETIEVENT_CASE(tlsdnscancel);
+ NETIEVENT_CASE(tlsdnsclose);
+ NETIEVENT_CASE(tlsdnsread);
+ NETIEVENT_CASE(tlsdnsstop);
+ NETIEVENT_CASE(tlsdnsshutdown);
+
+#if HAVE_LIBNGHTTP2
+ NETIEVENT_CASE(tlsstartread);
+ NETIEVENT_CASE(tlssend);
+ NETIEVENT_CASE(tlsclose);
+ NETIEVENT_CASE(tlsdobio);
+ NETIEVENT_CASE(tlscancel);
+
+ NETIEVENT_CASE(httpsend);
+ NETIEVENT_CASE(httpclose);
+ NETIEVENT_CASE(httpendpoints);
+#endif
+ NETIEVENT_CASE(settlsctx);
+ NETIEVENT_CASE(sockstop);
+
+ NETIEVENT_CASE(connectcb);
+ NETIEVENT_CASE(readcb);
+ NETIEVENT_CASE(sendcb);
+
+ NETIEVENT_CASE(close);
+ NETIEVENT_CASE(detach);
+
+ NETIEVENT_CASE(shutdown);
+ NETIEVENT_CASE(resume);
+ NETIEVENT_CASE_NOMORE(pause);
+ default:
+ UNREACHABLE();
+ }
+ return (true);
+}
+
+static isc_result_t
+process_queue(isc__networker_t *worker, netievent_type_t type) {
+ isc__netievent_t *ievent = NULL;
+ isc__netievent_list_t list;
+
+ ISC_LIST_INIT(list);
+
+ LOCK(&worker->ievents[type].lock);
+ ISC_LIST_MOVE(list, worker->ievents[type].list);
+ UNLOCK(&worker->ievents[type].lock);
+
+ ievent = ISC_LIST_HEAD(list);
+ if (ievent == NULL) {
+ /* There's nothing scheduled */
+ return (ISC_R_EMPTY);
+ }
+
+ while (ievent != NULL) {
+ isc__netievent_t *next = ISC_LIST_NEXT(ievent, link);
+ ISC_LIST_DEQUEUE(list, ievent, link);
+
+ if (!process_netievent(worker, ievent)) {
+ /* The netievent told us to stop */
+ if (!ISC_LIST_EMPTY(list)) {
+ /*
+ * Reschedule the rest of the unprocessed
+ * events.
+ */
+ LOCK(&worker->ievents[type].lock);
+ ISC_LIST_PREPENDLIST(worker->ievents[type].list,
+ list, link);
+ UNLOCK(&worker->ievents[type].lock);
+ }
+ return (ISC_R_SUSPEND);
+ }
+
+ ievent = next;
+ }
+
+ /* We processed at least one */
+ return (ISC_R_SUCCESS);
+}
+
+void *
+isc__nm_get_netievent(isc_nm_t *mgr, isc__netievent_type type) {
+ isc__netievent_storage_t *event = isc_mem_get(mgr->mctx,
+ sizeof(*event));
+
+ *event = (isc__netievent_storage_t){ .ni.type = type };
+ ISC_LINK_INIT(&(event->ni), link);
+ return (event);
+}
+
+void
+isc__nm_put_netievent(isc_nm_t *mgr, void *ievent) {
+ isc_mem_put(mgr->mctx, ievent, sizeof(isc__netievent_storage_t));
+}
+
+NETIEVENT_SOCKET_DEF(tcpclose);
+NETIEVENT_SOCKET_DEF(tcplisten);
+NETIEVENT_SOCKET_DEF(tcppauseread);
+NETIEVENT_SOCKET_DEF(tcpstartread);
+NETIEVENT_SOCKET_DEF(tcpstop);
+NETIEVENT_SOCKET_DEF(tlsclose);
+NETIEVENT_SOCKET_DEF(tlsconnect);
+NETIEVENT_SOCKET_DEF(tlsdobio);
+NETIEVENT_SOCKET_DEF(tlsstartread);
+NETIEVENT_SOCKET_HANDLE_DEF(tlscancel);
+NETIEVENT_SOCKET_DEF(udpclose);
+NETIEVENT_SOCKET_DEF(udplisten);
+NETIEVENT_SOCKET_DEF(udpread);
+NETIEVENT_SOCKET_DEF(udpsend);
+NETIEVENT_SOCKET_DEF(udpstop);
+
+NETIEVENT_SOCKET_DEF(tcpdnsclose);
+NETIEVENT_SOCKET_DEF(tcpdnsread);
+NETIEVENT_SOCKET_DEF(tcpdnsstop);
+NETIEVENT_SOCKET_DEF(tcpdnslisten);
+NETIEVENT_SOCKET_REQ_DEF(tcpdnsconnect);
+NETIEVENT_SOCKET_REQ_DEF(tcpdnssend);
+NETIEVENT_SOCKET_HANDLE_DEF(tcpdnscancel);
+NETIEVENT_SOCKET_QUOTA_DEF(tcpdnsaccept);
+
+NETIEVENT_SOCKET_DEF(tlsdnsclose);
+NETIEVENT_SOCKET_DEF(tlsdnsread);
+NETIEVENT_SOCKET_DEF(tlsdnsstop);
+NETIEVENT_SOCKET_DEF(tlsdnslisten);
+NETIEVENT_SOCKET_REQ_DEF(tlsdnsconnect);
+NETIEVENT_SOCKET_REQ_DEF(tlsdnssend);
+NETIEVENT_SOCKET_HANDLE_DEF(tlsdnscancel);
+NETIEVENT_SOCKET_QUOTA_DEF(tlsdnsaccept);
+NETIEVENT_SOCKET_DEF(tlsdnscycle);
+NETIEVENT_SOCKET_DEF(tlsdnsshutdown);
+
+#ifdef HAVE_LIBNGHTTP2
+NETIEVENT_SOCKET_REQ_DEF(httpsend);
+NETIEVENT_SOCKET_DEF(httpclose);
+NETIEVENT_SOCKET_HTTP_EPS_DEF(httpendpoints);
+#endif /* HAVE_LIBNGHTTP2 */
+
+NETIEVENT_SOCKET_REQ_DEF(tcpconnect);
+NETIEVENT_SOCKET_REQ_DEF(tcpsend);
+NETIEVENT_SOCKET_REQ_DEF(tlssend);
+NETIEVENT_SOCKET_REQ_DEF(udpconnect);
+NETIEVENT_SOCKET_REQ_DEF(routeconnect);
+NETIEVENT_SOCKET_REQ_RESULT_DEF(connectcb);
+NETIEVENT_SOCKET_REQ_RESULT_DEF(readcb);
+NETIEVENT_SOCKET_REQ_RESULT_DEF(sendcb);
+
+NETIEVENT_SOCKET_DEF(detach);
+NETIEVENT_SOCKET_HANDLE_DEF(tcpcancel);
+NETIEVENT_SOCKET_HANDLE_DEF(udpcancel);
+
+NETIEVENT_SOCKET_QUOTA_DEF(tcpaccept);
+
+NETIEVENT_SOCKET_DEF(close);
+NETIEVENT_DEF(pause);
+NETIEVENT_DEF(resume);
+NETIEVENT_DEF(shutdown);
+NETIEVENT_DEF(stop);
+
+NETIEVENT_TASK_DEF(task);
+NETIEVENT_TASK_DEF(privilegedtask);
+
+NETIEVENT_SOCKET_TLSCTX_DEF(settlsctx);
+NETIEVENT_SOCKET_DEF(sockstop);
+
+void
+isc__nm_maybe_enqueue_ievent(isc__networker_t *worker,
+ isc__netievent_t *event) {
+ /*
+ * If we are already in the matching nmthread, process the ievent
+ * directly.
+ */
+ if (worker->id == isc_nm_tid()) {
+ process_netievent(worker, event);
+ return;
+ }
+
+ isc__nm_enqueue_ievent(worker, event);
+}
+
+void
+isc__nm_enqueue_ievent(isc__networker_t *worker, isc__netievent_t *event) {
+ netievent_type_t type;
+
+ if (event->type > netievent_prio) {
+ type = NETIEVENT_PRIORITY;
+ } else {
+ switch (event->type) {
+ case netievent_prio:
+ UNREACHABLE();
+ break;
+ case netievent_privilegedtask:
+ type = NETIEVENT_PRIVILEGED;
+ break;
+ case netievent_task:
+ type = NETIEVENT_TASK;
+ break;
+ default:
+ type = NETIEVENT_NORMAL;
+ break;
+ }
+ }
+
+ /*
+ * We need to make sure this signal will be delivered and
+ * the queue will be processed.
+ */
+ LOCK(&worker->ievents[type].lock);
+ ISC_LIST_ENQUEUE(worker->ievents[type].list, event, link);
+ if (type == NETIEVENT_PRIORITY) {
+ SIGNAL(&worker->ievents[type].cond);
+ }
+ UNLOCK(&worker->ievents[type].lock);
+
+ uv_async_send(&worker->async);
+}
+
+bool
+isc__nmsocket_active(isc_nmsocket_t *sock) {
+ REQUIRE(VALID_NMSOCK(sock));
+ if (sock->parent != NULL) {
+ return (atomic_load(&sock->parent->active));
+ }
+
+ return (atomic_load(&sock->active));
+}
+
+bool
+isc__nmsocket_deactivate(isc_nmsocket_t *sock) {
+ REQUIRE(VALID_NMSOCK(sock));
+
+ if (sock->parent != NULL) {
+ return (atomic_compare_exchange_strong(&sock->parent->active,
+ &(bool){ true }, false));
+ }
+
+ return (atomic_compare_exchange_strong(&sock->active, &(bool){ true },
+ false));
+}
+
+void
+isc___nmsocket_attach(isc_nmsocket_t *sock, isc_nmsocket_t **target FLARG) {
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(target != NULL && *target == NULL);
+
+ isc_nmsocket_t *rsock = NULL;
+
+ if (sock->parent != NULL) {
+ rsock = sock->parent;
+ INSIST(rsock->parent == NULL); /* sanity check */
+ } else {
+ rsock = sock;
+ }
+
+ NETMGR_TRACE_LOG("isc__nmsocket_attach():%p->references = %" PRIuFAST32
+ "\n",
+ rsock, isc_refcount_current(&rsock->references) + 1);
+
+ isc_refcount_increment0(&rsock->references);
+
+ *target = sock;
+}
+
+/*
+ * Free all resources inside a socket (including its children if any).
+ */
+static void
+nmsocket_cleanup(isc_nmsocket_t *sock, bool dofree FLARG) {
+ isc_nmhandle_t *handle = NULL;
+ isc__nm_uvreq_t *uvreq = NULL;
+
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(!isc__nmsocket_active(sock));
+
+ NETMGR_TRACE_LOG("nmsocket_cleanup():%p->references = %" PRIuFAST32
+ "\n",
+ sock, isc_refcount_current(&sock->references));
+
+ isc__nm_decstats(sock, STATID_ACTIVE);
+
+ atomic_store(&sock->destroying, true);
+
+ if (sock->parent == NULL && sock->children != NULL) {
+ /*
+ * We shouldn't be here unless there are no active handles,
+ * so we can clean up and free the children.
+ */
+ for (size_t i = 0; i < sock->nchildren; i++) {
+ if (!atomic_load(&sock->children[i].destroying)) {
+ nmsocket_cleanup(&sock->children[i],
+ false FLARG_PASS);
+ }
+ }
+
+ /*
+ * This was a parent socket: destroy the listening
+ * barriers that synchronized the children.
+ */
+ isc_barrier_destroy(&sock->startlistening);
+ isc_barrier_destroy(&sock->stoplistening);
+
+ /*
+ * Now free them.
+ */
+ isc_mem_put(sock->mgr->mctx, sock->children,
+ sock->nchildren * sizeof(*sock));
+ sock->children = NULL;
+ sock->nchildren = 0;
+ }
+
+ sock->statichandle = NULL;
+
+ if (sock->outerhandle != NULL) {
+ isc__nmhandle_detach(&sock->outerhandle FLARG_PASS);
+ }
+
+ if (sock->outer != NULL) {
+ isc___nmsocket_detach(&sock->outer FLARG_PASS);
+ }
+
+ while ((handle = isc_astack_pop(sock->inactivehandles)) != NULL) {
+ nmhandle_free(sock, handle);
+ }
+
+ if (sock->buf != NULL) {
+ isc_mem_put(sock->mgr->mctx, sock->buf, sock->buf_size);
+ }
+
+ if (sock->quota != NULL) {
+ isc_quota_detach(&sock->quota);
+ }
+
+ sock->pquota = NULL;
+
+ isc_astack_destroy(sock->inactivehandles);
+
+ while ((uvreq = isc_astack_pop(sock->inactivereqs)) != NULL) {
+ isc_mem_put(sock->mgr->mctx, uvreq, sizeof(*uvreq));
+ }
+
+ isc_astack_destroy(sock->inactivereqs);
+ sock->magic = 0;
+
+ isc_condition_destroy(&sock->scond);
+ isc_condition_destroy(&sock->cond);
+ isc_mutex_destroy(&sock->lock);
+ isc__nm_tlsdns_cleanup_data(sock);
+#if HAVE_LIBNGHTTP2
+ isc__nm_tls_cleanup_data(sock);
+ isc__nm_http_cleanup_data(sock);
+#endif
+
+ INSIST(ISC_LIST_EMPTY(sock->tls.sendreqs));
+
+ if (sock->barrier_initialised) {
+ isc_barrier_destroy(&sock->barrier);
+ }
+
+#ifdef NETMGR_TRACE
+ LOCK(&sock->mgr->lock);
+ ISC_LIST_UNLINK(sock->mgr->active_sockets, sock, active_link);
+ UNLOCK(&sock->mgr->lock);
+#endif
+ if (dofree) {
+ isc_nm_t *mgr = sock->mgr;
+ isc_mem_put(mgr->mctx, sock, sizeof(*sock));
+ isc_nm_detach(&mgr);
+ } else {
+ isc_nm_detach(&sock->mgr);
+ }
+}
+
+static void
+nmsocket_maybe_destroy(isc_nmsocket_t *sock FLARG) {
+ int active_handles;
+ bool destroy = false;
+
+ NETMGR_TRACE_LOG("%s():%p->references = %" PRIuFAST32 "\n", __func__,
+ sock, isc_refcount_current(&sock->references));
+
+ if (sock->parent != NULL) {
+ /*
+ * This is a child socket and cannot be destroyed except
+ * as a side effect of destroying the parent, so let's go
+ * see if the parent is ready to be destroyed.
+ */
+ nmsocket_maybe_destroy(sock->parent FLARG_PASS);
+ return;
+ }
+
+ /*
+ * This is a parent socket (or a standalone). See whether the
+ * children have active handles before deciding whether to
+ * accept destruction.
+ */
+ LOCK(&sock->lock);
+ if (atomic_load(&sock->active) || atomic_load(&sock->destroying) ||
+ !atomic_load(&sock->closed) || atomic_load(&sock->references) != 0)
+ {
+ UNLOCK(&sock->lock);
+ return;
+ }
+
+ active_handles = atomic_load(&sock->ah);
+ if (sock->children != NULL) {
+ for (size_t i = 0; i < sock->nchildren; i++) {
+ LOCK(&sock->children[i].lock);
+ active_handles += atomic_load(&sock->children[i].ah);
+ UNLOCK(&sock->children[i].lock);
+ }
+ }
+
+ if (active_handles == 0 || sock->statichandle != NULL) {
+ destroy = true;
+ }
+
+ NETMGR_TRACE_LOG("%s:%p->active_handles = %d, .statichandle = %p\n",
+ __func__, sock, active_handles, sock->statichandle);
+
+ if (destroy) {
+ atomic_store(&sock->destroying, true);
+ UNLOCK(&sock->lock);
+ nmsocket_cleanup(sock, true FLARG_PASS);
+ } else {
+ UNLOCK(&sock->lock);
+ }
+}
+
+void
+isc___nmsocket_prep_destroy(isc_nmsocket_t *sock FLARG) {
+ REQUIRE(sock->parent == NULL);
+
+ NETMGR_TRACE_LOG("isc___nmsocket_prep_destroy():%p->references = "
+ "%" PRIuFAST32 "\n",
+ sock, isc_refcount_current(&sock->references));
+
+ /*
+ * The final external reference to the socket is gone. We can try
+ * destroying the socket, but we have to wait for all the inflight
+ * handles to finish first.
+ */
+ atomic_store(&sock->active, false);
+
+ /*
+ * If the socket has children, they'll need to be marked inactive
+ * so they can be cleaned up too.
+ */
+ if (sock->children != NULL) {
+ for (size_t i = 0; i < sock->nchildren; i++) {
+ atomic_store(&sock->children[i].active, false);
+ }
+ }
+
+ /*
+ * If we're here then we already stopped listening; otherwise
+ * we'd have a hanging reference from the listening process.
+ *
+ * If it's a regular socket we may need to close it.
+ */
+ if (!atomic_load(&sock->closed)) {
+ switch (sock->type) {
+ case isc_nm_udpsocket:
+ isc__nm_udp_close(sock);
+ return;
+ case isc_nm_tcpsocket:
+ isc__nm_tcp_close(sock);
+ return;
+ case isc_nm_tcpdnssocket:
+ isc__nm_tcpdns_close(sock);
+ return;
+ case isc_nm_tlsdnssocket:
+ isc__nm_tlsdns_close(sock);
+ return;
+#if HAVE_LIBNGHTTP2
+ case isc_nm_tlssocket:
+ isc__nm_tls_close(sock);
+ break;
+ case isc_nm_httpsocket:
+ isc__nm_http_close(sock);
+ return;
+#endif
+ default:
+ break;
+ }
+ }
+
+ nmsocket_maybe_destroy(sock FLARG_PASS);
+}
+
+void
+isc___nmsocket_detach(isc_nmsocket_t **sockp FLARG) {
+ REQUIRE(sockp != NULL && *sockp != NULL);
+ REQUIRE(VALID_NMSOCK(*sockp));
+
+ isc_nmsocket_t *sock = *sockp, *rsock = NULL;
+ *sockp = NULL;
+
+ /*
+ * If the socket is a part of a set (a child socket) we are
+ * counting references for the whole set at the parent.
+ */
+ if (sock->parent != NULL) {
+ rsock = sock->parent;
+ INSIST(rsock->parent == NULL); /* Sanity check */
+ } else {
+ rsock = sock;
+ }
+
+ NETMGR_TRACE_LOG("isc__nmsocket_detach():%p->references = %" PRIuFAST32
+ "\n",
+ rsock, isc_refcount_current(&rsock->references) - 1);
+
+ if (isc_refcount_decrement(&rsock->references) == 1) {
+ isc___nmsocket_prep_destroy(rsock FLARG_PASS);
+ }
+}
+
+void
+isc_nmsocket_close(isc_nmsocket_t **sockp) {
+ REQUIRE(sockp != NULL);
+ REQUIRE(VALID_NMSOCK(*sockp));
+ REQUIRE((*sockp)->type == isc_nm_udplistener ||
+ (*sockp)->type == isc_nm_tcplistener ||
+ (*sockp)->type == isc_nm_tcpdnslistener ||
+ (*sockp)->type == isc_nm_tlsdnslistener ||
+ (*sockp)->type == isc_nm_tlslistener ||
+ (*sockp)->type == isc_nm_httplistener);
+
+ isc__nmsocket_detach(sockp);
+}
+
+void
+isc___nmsocket_init(isc_nmsocket_t *sock, isc_nm_t *mgr, isc_nmsocket_type type,
+ isc_sockaddr_t *iface FLARG) {
+ uint16_t family;
+
+ REQUIRE(sock != NULL);
+ REQUIRE(mgr != NULL);
+
+ *sock = (isc_nmsocket_t){ .type = type,
+ .fd = -1,
+ .inactivehandles = isc_astack_new(
+ mgr->mctx, ISC_NM_HANDLES_STACK_SIZE),
+ .inactivereqs = isc_astack_new(
+ mgr->mctx, ISC_NM_REQS_STACK_SIZE) };
+
+ ISC_LIST_INIT(sock->tls.sendreqs);
+
+ if (iface != NULL) {
+ family = iface->type.sa.sa_family;
+ sock->iface = *iface;
+ } else {
+ family = AF_UNSPEC;
+ }
+
+#if NETMGR_TRACE
+ sock->backtrace_size = isc_backtrace(sock->backtrace, TRACE_SIZE);
+ ISC_LINK_INIT(sock, active_link);
+ ISC_LIST_INIT(sock->active_handles);
+ LOCK(&mgr->lock);
+ ISC_LIST_APPEND(mgr->active_sockets, sock, active_link);
+ UNLOCK(&mgr->lock);
+#endif
+
+ isc_nm_attach(mgr, &sock->mgr);
+ sock->uv_handle.handle.data = sock;
+
+ ISC_LINK_INIT(&sock->quotacb, link);
+
+ switch (type) {
+ case isc_nm_udpsocket:
+ case isc_nm_udplistener:
+ switch (family) {
+ case AF_INET:
+ sock->statsindex = udp4statsindex;
+ break;
+ case AF_INET6:
+ sock->statsindex = udp6statsindex;
+ break;
+ case AF_UNSPEC:
+ /*
+ * Route sockets are AF_UNSPEC, and don't
+ * have stats counters.
+ */
+ break;
+ default:
+ UNREACHABLE();
+ }
+ break;
+ case isc_nm_tcpsocket:
+ case isc_nm_tcplistener:
+ case isc_nm_tcpdnssocket:
+ case isc_nm_tcpdnslistener:
+ case isc_nm_tlsdnssocket:
+ case isc_nm_tlsdnslistener:
+ case isc_nm_httpsocket:
+ case isc_nm_httplistener:
+ switch (family) {
+ case AF_INET:
+ sock->statsindex = tcp4statsindex;
+ break;
+ case AF_INET6:
+ sock->statsindex = tcp6statsindex;
+ break;
+ default:
+ UNREACHABLE();
+ }
+ break;
+ default:
+ break;
+ }
+
+ isc_mutex_init(&sock->lock);
+ isc_condition_init(&sock->cond);
+ isc_condition_init(&sock->scond);
+ isc_refcount_init(&sock->references, 1);
+
+#if HAVE_LIBNGHTTP2
+ memset(&sock->tlsstream, 0, sizeof(sock->tlsstream));
+#endif /* HAVE_LIBNGHTTP2 */
+
+ NETMGR_TRACE_LOG("isc__nmsocket_init():%p->references = %" PRIuFAST32
+ "\n",
+ sock, isc_refcount_current(&sock->references));
+
+ atomic_init(&sock->active, true);
+ atomic_init(&sock->sequential, false);
+ atomic_init(&sock->readpaused, false);
+ atomic_init(&sock->closing, false);
+ atomic_init(&sock->listening, 0);
+ atomic_init(&sock->closed, 0);
+ atomic_init(&sock->destroying, 0);
+ atomic_init(&sock->ah, 0);
+ atomic_init(&sock->client, 0);
+ atomic_init(&sock->connecting, false);
+ atomic_init(&sock->keepalive, false);
+ atomic_init(&sock->connected, false);
+ atomic_init(&sock->timedout, false);
+
+ atomic_init(&sock->active_child_connections, 0);
+
+#if HAVE_LIBNGHTTP2
+ isc__nm_http_initsocket(sock);
+#endif
+
+ sock->magic = NMSOCK_MAGIC;
+
+ isc__nm_incstats(sock, STATID_ACTIVE);
+}
+
+void
+isc__nmsocket_clearcb(isc_nmsocket_t *sock) {
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(!isc__nm_in_netthread() || sock->tid == isc_nm_tid());
+
+ sock->recv_cb = NULL;
+ sock->recv_cbarg = NULL;
+ sock->accept_cb = NULL;
+ sock->accept_cbarg = NULL;
+ sock->connect_cb = NULL;
+ sock->connect_cbarg = NULL;
+}
+
+void
+isc__nm_free_uvbuf(isc_nmsocket_t *sock, const uv_buf_t *buf) {
+ isc__networker_t *worker = NULL;
+
+ REQUIRE(VALID_NMSOCK(sock));
+
+ worker = &sock->mgr->workers[sock->tid];
+ REQUIRE(buf->base == worker->recvbuf);
+
+ worker->recvbuf_inuse = false;
+}
+
+static isc_nmhandle_t *
+alloc_handle(isc_nmsocket_t *sock) {
+ isc_nmhandle_t *handle =
+ isc_mem_get(sock->mgr->mctx,
+ sizeof(isc_nmhandle_t) + sock->extrahandlesize);
+
+ *handle = (isc_nmhandle_t){ .magic = NMHANDLE_MAGIC };
+#ifdef NETMGR_TRACE
+ ISC_LINK_INIT(handle, active_link);
+#endif
+ isc_refcount_init(&handle->references, 1);
+
+ return (handle);
+}
+
+isc_nmhandle_t *
+isc___nmhandle_get(isc_nmsocket_t *sock, isc_sockaddr_t *peer,
+ isc_sockaddr_t *local FLARG) {
+ isc_nmhandle_t *handle = NULL;
+
+ REQUIRE(VALID_NMSOCK(sock));
+
+ handle = isc_astack_pop(sock->inactivehandles);
+
+ if (handle == NULL) {
+ handle = alloc_handle(sock);
+ } else {
+ isc_refcount_init(&handle->references, 1);
+ INSIST(VALID_NMHANDLE(handle));
+ }
+
+ NETMGR_TRACE_LOG(
+ "isc__nmhandle_get():handle %p->references = %" PRIuFAST32 "\n",
+ handle, isc_refcount_current(&handle->references));
+
+ isc___nmsocket_attach(sock, &handle->sock FLARG_PASS);
+
+#if NETMGR_TRACE
+ handle->backtrace_size = isc_backtrace(handle->backtrace, TRACE_SIZE);
+#endif
+
+ if (peer != NULL) {
+ handle->peer = *peer;
+ } else {
+ handle->peer = sock->peer;
+ }
+
+ if (local != NULL) {
+ handle->local = *local;
+ } else {
+ handle->local = sock->iface;
+ }
+
+ (void)atomic_fetch_add(&sock->ah, 1);
+
+#ifdef NETMGR_TRACE
+ LOCK(&sock->lock);
+ ISC_LIST_APPEND(sock->active_handles, handle, active_link);
+ UNLOCK(&sock->lock);
+#endif
+
+ switch (sock->type) {
+ case isc_nm_udpsocket:
+ case isc_nm_tcpdnssocket:
+ case isc_nm_tlsdnssocket:
+ if (!atomic_load(&sock->client)) {
+ break;
+ }
+ FALLTHROUGH;
+ case isc_nm_tcpsocket:
+ case isc_nm_tlssocket:
+ INSIST(sock->statichandle == NULL);
+
+ /*
+ * statichandle must be assigned, not attached;
+ * otherwise, if a handle was detached elsewhere
+ * it could never reach 0 references, and the
+ * handle and socket would never be freed.
+ */
+ sock->statichandle = handle;
+ break;
+ default:
+ break;
+ }
+
+#if HAVE_LIBNGHTTP2
+ if (sock->type == isc_nm_httpsocket && sock->h2.session) {
+ isc__nm_httpsession_attach(sock->h2.session,
+ &handle->httpsession);
+ }
+#endif
+
+ return (handle);
+}
+
+void
+isc__nmhandle_attach(isc_nmhandle_t *handle, isc_nmhandle_t **handlep FLARG) {
+ REQUIRE(VALID_NMHANDLE(handle));
+ REQUIRE(handlep != NULL && *handlep == NULL);
+
+ NETMGR_TRACE_LOG("isc__nmhandle_attach():handle %p->references = "
+ "%" PRIuFAST32 "\n",
+ handle, isc_refcount_current(&handle->references) + 1);
+
+ isc_refcount_increment(&handle->references);
+ *handlep = handle;
+}
+
+bool
+isc_nmhandle_is_stream(isc_nmhandle_t *handle) {
+ REQUIRE(VALID_NMHANDLE(handle));
+
+ return (handle->sock->type == isc_nm_tcpsocket ||
+ handle->sock->type == isc_nm_tcpdnssocket ||
+ handle->sock->type == isc_nm_tlssocket ||
+ handle->sock->type == isc_nm_tlsdnssocket ||
+ handle->sock->type == isc_nm_httpsocket);
+}
+
+static void
+nmhandle_free(isc_nmsocket_t *sock, isc_nmhandle_t *handle) {
+ size_t extra = sock->extrahandlesize;
+
+ isc_refcount_destroy(&handle->references);
+
+ if (handle->dofree != NULL) {
+ handle->dofree(handle->opaque);
+ }
+
+ *handle = (isc_nmhandle_t){ .magic = 0 };
+
+ isc_mem_put(sock->mgr->mctx, handle, sizeof(isc_nmhandle_t) + extra);
+}
+
+static void
+nmhandle_deactivate(isc_nmsocket_t *sock, isc_nmhandle_t *handle) {
+ bool reuse = false;
+ uint_fast32_t ah;
+
+ /*
+ * We do all of this under lock to avoid races with socket
+ * destruction. We have to do this now, because at this point the
+ * socket is either unused or still attached to event->sock.
+ */
+ LOCK(&sock->lock);
+
+#ifdef NETMGR_TRACE
+ ISC_LIST_UNLINK(sock->active_handles, handle, active_link);
+#endif
+
+ ah = atomic_fetch_sub(&sock->ah, 1);
+ INSIST(ah > 0);
+
+#if !__SANITIZE_ADDRESS__ && !__SANITIZE_THREAD__
+ if (atomic_load(&sock->active)) {
+ reuse = isc_astack_trypush(sock->inactivehandles, handle);
+ }
+#endif /* !__SANITIZE_ADDRESS__ && !__SANITIZE_THREAD__ */
+ if (!reuse) {
+ nmhandle_free(sock, handle);
+ }
+ UNLOCK(&sock->lock);
+}
+
+void
+isc__nmhandle_detach(isc_nmhandle_t **handlep FLARG) {
+ isc_nmsocket_t *sock = NULL;
+ isc_nmhandle_t *handle = NULL;
+
+ REQUIRE(handlep != NULL);
+ REQUIRE(VALID_NMHANDLE(*handlep));
+
+ handle = *handlep;
+ *handlep = NULL;
+
+ /*
+ * If the closehandle_cb is set, it needs to run asynchronously to
+ * ensure correct ordering of the isc__nm_process_sock_buffer().
+ */
+ sock = handle->sock;
+ if (sock->tid == isc_nm_tid() && sock->closehandle_cb == NULL) {
+ nmhandle_detach_cb(&handle FLARG_PASS);
+ } else {
+ isc__netievent_detach_t *event =
+ isc__nm_get_netievent_detach(sock->mgr, sock);
+ /*
+ * we are using implicit "attach" as the last reference
+ * need to be destroyed explicitly in the async callback
+ */
+ event->handle = handle;
+ FLARG_IEVENT_PASS(event);
+ isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
+ (isc__netievent_t *)event);
+ }
+}
+
+static void
+nmhandle_detach_cb(isc_nmhandle_t **handlep FLARG) {
+ isc_nmsocket_t *sock = NULL;
+ isc_nmhandle_t *handle = NULL;
+
+ REQUIRE(handlep != NULL);
+ REQUIRE(VALID_NMHANDLE(*handlep));
+
+ handle = *handlep;
+ *handlep = NULL;
+
+ NETMGR_TRACE_LOG("isc__nmhandle_detach():%p->references = %" PRIuFAST32
+ "\n",
+ handle, isc_refcount_current(&handle->references) - 1);
+
+ if (isc_refcount_decrement(&handle->references) > 1) {
+ return;
+ }
+
+ /* We need an acquire memory barrier here */
+ (void)isc_refcount_current(&handle->references);
+
+ sock = handle->sock;
+ handle->sock = NULL;
+
+ if (handle->doreset != NULL) {
+ handle->doreset(handle->opaque);
+ }
+
+#if HAVE_LIBNGHTTP2
+ if (sock->type == isc_nm_httpsocket && handle->httpsession != NULL) {
+ isc__nm_httpsession_detach(&handle->httpsession);
+ }
+#endif
+
+ nmhandle_deactivate(sock, handle);
+
+ /*
+ * The handle is gone now. If the socket has a callback configured
+ * for that (e.g., to perform cleanup after request processing),
+ * call it now, or schedule it to run asynchronously.
+ */
+ if (sock->closehandle_cb != NULL) {
+ if (sock->tid == isc_nm_tid()) {
+ sock->closehandle_cb(sock);
+ } else {
+ isc__netievent_close_t *event =
+ isc__nm_get_netievent_close(sock->mgr, sock);
+ isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
+ (isc__netievent_t *)event);
+ }
+ }
+
+ if (handle == sock->statichandle) {
+ /* statichandle is assigned, not attached. */
+ sock->statichandle = NULL;
+ }
+
+ isc___nmsocket_detach(&sock FLARG_PASS);
+}
+
+void *
+isc_nmhandle_getdata(isc_nmhandle_t *handle) {
+ REQUIRE(VALID_NMHANDLE(handle));
+
+ return (handle->opaque);
+}
+
+void
+isc_nmhandle_setdata(isc_nmhandle_t *handle, void *arg,
+ isc_nm_opaquecb_t doreset, isc_nm_opaquecb_t dofree) {
+ REQUIRE(VALID_NMHANDLE(handle));
+
+ handle->opaque = arg;
+ handle->doreset = doreset;
+ handle->dofree = dofree;
+}
+
+void
+isc__nm_alloc_dnsbuf(isc_nmsocket_t *sock, size_t len) {
+ REQUIRE(len <= NM_BIG_BUF);
+
+ if (sock->buf == NULL) {
+ /* We don't have the buffer at all */
+ size_t alloc_len = len < NM_REG_BUF ? NM_REG_BUF : NM_BIG_BUF;
+ sock->buf = isc_mem_get(sock->mgr->mctx, alloc_len);
+ sock->buf_size = alloc_len;
+ } else {
+ /* We have the buffer but it's too small */
+ sock->buf = isc_mem_reget(sock->mgr->mctx, sock->buf,
+ sock->buf_size, NM_BIG_BUF);
+ sock->buf_size = NM_BIG_BUF;
+ }
+}
+
+void
+isc__nm_failed_send_cb(isc_nmsocket_t *sock, isc__nm_uvreq_t *req,
+ isc_result_t eresult) {
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(VALID_UVREQ(req));
+
+ if (req->cb.send != NULL) {
+ isc__nm_sendcb(sock, req, eresult, true);
+ } else {
+ isc__nm_uvreq_put(&req, sock);
+ }
+}
+
+void
+isc__nm_failed_accept_cb(isc_nmsocket_t *sock, isc_result_t eresult) {
+ REQUIRE(atomic_load(&sock->accepting));
+ REQUIRE(sock->server);
+
+ /*
+ * Detach the quota early to make room for other connections;
+ * otherwise it'd be detached later asynchronously, and clog
+ * the quota unnecessarily.
+ */
+ if (sock->quota != NULL) {
+ isc_quota_detach(&sock->quota);
+ }
+
+ isc__nmsocket_detach(&sock->server);
+
+ atomic_store(&sock->accepting, false);
+
+ switch (eresult) {
+ case ISC_R_NOTCONNECTED:
+ /* IGNORE: The client disconnected before we could accept */
+ break;
+ default:
+ isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL,
+ ISC_LOGMODULE_NETMGR, ISC_LOG_ERROR,
+ "Accepting TCP connection failed: %s",
+ isc_result_totext(eresult));
+ }
+}
+
+void
+isc__nm_failed_connect_cb(isc_nmsocket_t *sock, isc__nm_uvreq_t *req,
+ isc_result_t eresult, bool async) {
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(VALID_UVREQ(req));
+ REQUIRE(sock->tid == isc_nm_tid());
+ REQUIRE(req->cb.connect != NULL);
+
+ isc__nm_incstats(sock, STATID_CONNECTFAIL);
+
+ isc__nmsocket_timer_stop(sock);
+ uv_handle_set_data((uv_handle_t *)&sock->read_timer, sock);
+
+ atomic_compare_exchange_enforced(&sock->connecting, &(bool){ true },
+ false);
+
+ isc__nmsocket_clearcb(sock);
+ isc__nm_connectcb(sock, req, eresult, async);
+
+ isc__nmsocket_prep_destroy(sock);
+}
+
+void
+isc__nm_failed_read_cb(isc_nmsocket_t *sock, isc_result_t result, bool async) {
+ REQUIRE(VALID_NMSOCK(sock));
+ switch (sock->type) {
+ case isc_nm_udpsocket:
+ isc__nm_udp_failed_read_cb(sock, result);
+ return;
+ case isc_nm_tcpsocket:
+ isc__nm_tcp_failed_read_cb(sock, result);
+ return;
+ case isc_nm_tcpdnssocket:
+ isc__nm_tcpdns_failed_read_cb(sock, result);
+ return;
+ case isc_nm_tlsdnssocket:
+ isc__nm_tlsdns_failed_read_cb(sock, result, async);
+ return;
+ default:
+ UNREACHABLE();
+ }
+}
+
+void
+isc__nmsocket_connecttimeout_cb(uv_timer_t *timer) {
+ uv_connect_t *uvreq = uv_handle_get_data((uv_handle_t *)timer);
+ isc_nmsocket_t *sock = uv_handle_get_data((uv_handle_t *)uvreq->handle);
+ isc__nm_uvreq_t *req = uv_handle_get_data((uv_handle_t *)uvreq);
+
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->tid == isc_nm_tid());
+ REQUIRE(atomic_load(&sock->connecting));
+ REQUIRE(VALID_UVREQ(req));
+ REQUIRE(VALID_NMHANDLE(req->handle));
+
+ isc__nmsocket_timer_stop(sock);
+
+ /*
+ * Mark the connection as timed out and shutdown the socket.
+ */
+ atomic_compare_exchange_enforced(&sock->timedout, &(bool){ false },
+ true);
+ isc__nmsocket_clearcb(sock);
+ isc__nmsocket_shutdown(sock);
+}
+
+void
+isc__nm_accept_connection_log(isc_result_t result, bool can_log_quota) {
+ int level;
+
+ switch (result) {
+ case ISC_R_SUCCESS:
+ case ISC_R_NOCONN:
+ return;
+ case ISC_R_QUOTA:
+ case ISC_R_SOFTQUOTA:
+ if (!can_log_quota) {
+ return;
+ }
+ level = ISC_LOG_INFO;
+ break;
+ case ISC_R_NOTCONNECTED:
+ level = ISC_LOG_INFO;
+ break;
+ default:
+ level = ISC_LOG_ERROR;
+ }
+
+ isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL, ISC_LOGMODULE_NETMGR,
+ level, "Accepting TCP connection failed: %s",
+ isc_result_totext(result));
+}
+
+void
+isc__nmsocket_writetimeout_cb(void *data, isc_result_t eresult) {
+ isc__nm_uvreq_t *req = data;
+ isc_nmsocket_t *sock = NULL;
+
+ REQUIRE(eresult == ISC_R_TIMEDOUT);
+ REQUIRE(VALID_UVREQ(req));
+ REQUIRE(VALID_NMSOCK(req->sock));
+
+ sock = req->sock;
+
+ isc__nmsocket_reset(sock);
+}
+
+void
+isc__nmsocket_readtimeout_cb(uv_timer_t *timer) {
+ isc_nmsocket_t *sock = uv_handle_get_data((uv_handle_t *)timer);
+
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->tid == isc_nm_tid());
+ REQUIRE(atomic_load(&sock->reading));
+
+ if (atomic_load(&sock->client)) {
+ uv_timer_stop(timer);
+
+ sock->recv_read = false;
+
+ if (sock->recv_cb != NULL) {
+ isc__nm_uvreq_t *req = isc__nm_get_read_req(sock, NULL);
+ isc__nm_readcb(sock, req, ISC_R_TIMEDOUT);
+ }
+
+ if (!isc__nmsocket_timer_running(sock)) {
+ isc__nmsocket_clearcb(sock);
+ isc__nm_failed_read_cb(sock, ISC_R_CANCELED, false);
+ }
+ } else {
+ isc__nm_failed_read_cb(sock, ISC_R_TIMEDOUT, false);
+ }
+}
+
+void
+isc__nmsocket_timer_restart(isc_nmsocket_t *sock) {
+ REQUIRE(VALID_NMSOCK(sock));
+
+ if (uv_is_closing((uv_handle_t *)&sock->read_timer)) {
+ return;
+ }
+
+ if (atomic_load(&sock->connecting)) {
+ int r;
+
+ if (sock->connect_timeout == 0) {
+ return;
+ }
+
+ r = uv_timer_start(&sock->read_timer,
+ isc__nmsocket_connecttimeout_cb,
+ sock->connect_timeout + 10, 0);
+ UV_RUNTIME_CHECK(uv_timer_start, r);
+
+ } else {
+ int r;
+
+ if (sock->read_timeout == 0) {
+ return;
+ }
+
+ r = uv_timer_start(&sock->read_timer,
+ isc__nmsocket_readtimeout_cb,
+ sock->read_timeout, 0);
+ UV_RUNTIME_CHECK(uv_timer_start, r);
+ }
+}
+
+bool
+isc__nmsocket_timer_running(isc_nmsocket_t *sock) {
+ REQUIRE(VALID_NMSOCK(sock));
+
+ return (uv_is_active((uv_handle_t *)&sock->read_timer));
+}
+
+void
+isc__nmsocket_timer_start(isc_nmsocket_t *sock) {
+ REQUIRE(VALID_NMSOCK(sock));
+
+ if (isc__nmsocket_timer_running(sock)) {
+ return;
+ }
+
+ isc__nmsocket_timer_restart(sock);
+}
+
+void
+isc__nmsocket_timer_stop(isc_nmsocket_t *sock) {
+ int r;
+
+ REQUIRE(VALID_NMSOCK(sock));
+
+ /* uv_timer_stop() is idempotent, no need to check if running */
+
+ r = uv_timer_stop(&sock->read_timer);
+ UV_RUNTIME_CHECK(uv_timer_stop, r);
+}
+
+isc__nm_uvreq_t *
+isc__nm_get_read_req(isc_nmsocket_t *sock, isc_sockaddr_t *sockaddr) {
+ isc__nm_uvreq_t *req = NULL;
+
+ req = isc__nm_uvreq_get(sock->mgr, sock);
+ req->cb.recv = sock->recv_cb;
+ req->cbarg = sock->recv_cbarg;
+
+ switch (sock->type) {
+ case isc_nm_tcpsocket:
+ case isc_nm_tlssocket:
+ isc_nmhandle_attach(sock->statichandle, &req->handle);
+ break;
+ default:
+ if (atomic_load(&sock->client) && sock->statichandle != NULL) {
+ isc_nmhandle_attach(sock->statichandle, &req->handle);
+ } else {
+ req->handle = isc__nmhandle_get(sock, sockaddr, NULL);
+ }
+ break;
+ }
+
+ return (req);
+}
+
+/*%<
+ * Allocator callback for read operations.
+ *
+ * Note this doesn't actually allocate anything, it just assigns the
+ * worker's receive buffer to a socket, and marks it as "in use".
+ */
+void
+isc__nm_alloc_cb(uv_handle_t *handle, size_t size, uv_buf_t *buf) {
+ isc_nmsocket_t *sock = uv_handle_get_data(handle);
+ isc__networker_t *worker = NULL;
+
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(isc__nm_in_netthread());
+ /*
+ * The size provided by libuv is only suggested size, and it always
+ * defaults to 64 * 1024 in the current versions of libuv (see
+ * src/unix/udp.c and src/unix/stream.c).
+ */
+ UNUSED(size);
+
+ worker = &sock->mgr->workers[sock->tid];
+ INSIST(!worker->recvbuf_inuse);
+ INSIST(worker->recvbuf != NULL);
+
+ switch (sock->type) {
+ case isc_nm_udpsocket:
+ buf->len = ISC_NETMGR_UDP_RECVBUF_SIZE;
+ break;
+ case isc_nm_tcpsocket:
+ case isc_nm_tcpdnssocket:
+ case isc_nm_tlsdnssocket:
+ buf->len = ISC_NETMGR_TCP_RECVBUF_SIZE;
+ break;
+ default:
+ UNREACHABLE();
+ }
+
+ REQUIRE(buf->len <= ISC_NETMGR_RECVBUF_SIZE);
+ buf->base = worker->recvbuf;
+
+ worker->recvbuf_inuse = true;
+}
+
+isc_result_t
+isc__nm_start_reading(isc_nmsocket_t *sock) {
+ isc_result_t result = ISC_R_SUCCESS;
+ int r;
+
+ if (atomic_load(&sock->reading)) {
+ return (ISC_R_SUCCESS);
+ }
+
+ switch (sock->type) {
+ case isc_nm_udpsocket:
+ r = uv_udp_recv_start(&sock->uv_handle.udp, isc__nm_alloc_cb,
+ isc__nm_udp_read_cb);
+ break;
+ case isc_nm_tcpsocket:
+ r = uv_read_start(&sock->uv_handle.stream, isc__nm_alloc_cb,
+ isc__nm_tcp_read_cb);
+ break;
+ case isc_nm_tcpdnssocket:
+ r = uv_read_start(&sock->uv_handle.stream, isc__nm_alloc_cb,
+ isc__nm_tcpdns_read_cb);
+ break;
+ case isc_nm_tlsdnssocket:
+ r = uv_read_start(&sock->uv_handle.stream, isc__nm_alloc_cb,
+ isc__nm_tlsdns_read_cb);
+ break;
+ default:
+ UNREACHABLE();
+ }
+ if (r != 0) {
+ result = isc__nm_uverr2result(r);
+ } else {
+ atomic_store(&sock->reading, true);
+ }
+
+ return (result);
+}
+
+void
+isc__nm_stop_reading(isc_nmsocket_t *sock) {
+ int r;
+
+ if (!atomic_load(&sock->reading)) {
+ return;
+ }
+
+ switch (sock->type) {
+ case isc_nm_udpsocket:
+ r = uv_udp_recv_stop(&sock->uv_handle.udp);
+ UV_RUNTIME_CHECK(uv_udp_recv_stop, r);
+ break;
+ case isc_nm_tcpsocket:
+ case isc_nm_tcpdnssocket:
+ case isc_nm_tlsdnssocket:
+ r = uv_read_stop(&sock->uv_handle.stream);
+ UV_RUNTIME_CHECK(uv_read_stop, r);
+ break;
+ default:
+ UNREACHABLE();
+ }
+ atomic_store(&sock->reading, false);
+}
+
+bool
+isc__nm_closing(isc_nmsocket_t *sock) {
+ return (atomic_load(&sock->mgr->closing));
+}
+
+bool
+isc__nmsocket_closing(isc_nmsocket_t *sock) {
+ return (!isc__nmsocket_active(sock) || atomic_load(&sock->closing) ||
+ isc__nm_closing(sock) ||
+ (sock->server != NULL && !isc__nmsocket_active(sock->server)));
+}
+
+static isc_result_t
+processbuffer(isc_nmsocket_t *sock) {
+ switch (sock->type) {
+ case isc_nm_tcpdnssocket:
+ return (isc__nm_tcpdns_processbuffer(sock));
+ case isc_nm_tlsdnssocket:
+ return (isc__nm_tlsdns_processbuffer(sock));
+ default:
+ UNREACHABLE();
+ }
+}
+
+/*
+ * Process a DNS message.
+ *
+ * If we only have an incomplete DNS message, we don't touch any
+ * timers. If we do have a full message, reset the timer.
+ *
+ * Stop reading if this is a client socket, or if the server socket
+ * has been set to sequential mode. In this case we'll be called again
+ * later by isc__nm_resume_processing().
+ */
+isc_result_t
+isc__nm_process_sock_buffer(isc_nmsocket_t *sock) {
+ for (;;) {
+ int_fast32_t ah = atomic_load(&sock->ah);
+ isc_result_t result = processbuffer(sock);
+ switch (result) {
+ case ISC_R_NOMORE:
+ /*
+ * Don't reset the timer until we have a
+ * full DNS message.
+ */
+ result = isc__nm_start_reading(sock);
+ if (result != ISC_R_SUCCESS) {
+ return (result);
+ }
+ /*
+ * Start the timer only if there are no externally used
+ * active handles, there's always one active handle
+ * attached internally to sock->recv_handle in
+ * accept_connection()
+ */
+ if (ah == 1) {
+ isc__nmsocket_timer_start(sock);
+ }
+ goto done;
+ case ISC_R_CANCELED:
+ isc__nmsocket_timer_stop(sock);
+ isc__nm_stop_reading(sock);
+ goto done;
+ case ISC_R_SUCCESS:
+ /*
+ * Stop the timer on the successful message read, this
+ * also allows to restart the timer when we have no more
+ * data.
+ */
+ isc__nmsocket_timer_stop(sock);
+
+ if (atomic_load(&sock->client) ||
+ atomic_load(&sock->sequential))
+ {
+ isc__nm_stop_reading(sock);
+ goto done;
+ }
+ break;
+ default:
+ UNREACHABLE();
+ }
+ }
+done:
+ return (ISC_R_SUCCESS);
+}
+
+void
+isc__nm_resume_processing(void *arg) {
+ isc_nmsocket_t *sock = (isc_nmsocket_t *)arg;
+
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->tid == isc_nm_tid());
+ REQUIRE(!atomic_load(&sock->client));
+
+ if (isc__nmsocket_closing(sock)) {
+ return;
+ }
+
+ isc__nm_process_sock_buffer(sock);
+}
+
+void
+isc_nmhandle_cleartimeout(isc_nmhandle_t *handle) {
+ REQUIRE(VALID_NMHANDLE(handle));
+ REQUIRE(VALID_NMSOCK(handle->sock));
+
+ switch (handle->sock->type) {
+#if HAVE_LIBNGHTTP2
+ case isc_nm_httpsocket:
+ isc__nm_http_cleartimeout(handle);
+ return;
+ case isc_nm_tlssocket:
+ isc__nm_tls_cleartimeout(handle);
+ return;
+#endif
+ default:
+ handle->sock->read_timeout = 0;
+
+ if (uv_is_active((uv_handle_t *)&handle->sock->read_timer)) {
+ isc__nmsocket_timer_stop(handle->sock);
+ }
+ }
+}
+
+void
+isc_nmhandle_settimeout(isc_nmhandle_t *handle, uint32_t timeout) {
+ REQUIRE(VALID_NMHANDLE(handle));
+ REQUIRE(VALID_NMSOCK(handle->sock));
+
+ switch (handle->sock->type) {
+#if HAVE_LIBNGHTTP2
+ case isc_nm_httpsocket:
+ isc__nm_http_settimeout(handle, timeout);
+ return;
+ case isc_nm_tlssocket:
+ isc__nm_tls_settimeout(handle, timeout);
+ return;
+#endif
+ default:
+ handle->sock->read_timeout = timeout;
+ isc__nmsocket_timer_restart(handle->sock);
+ }
+}
+
+void
+isc_nmhandle_keepalive(isc_nmhandle_t *handle, bool value) {
+ isc_nmsocket_t *sock = NULL;
+
+ REQUIRE(VALID_NMHANDLE(handle));
+ REQUIRE(VALID_NMSOCK(handle->sock));
+
+ sock = handle->sock;
+
+ switch (sock->type) {
+ case isc_nm_tcpsocket:
+ case isc_nm_tcpdnssocket:
+ case isc_nm_tlsdnssocket:
+ atomic_store(&sock->keepalive, value);
+ sock->read_timeout = value ? atomic_load(&sock->mgr->keepalive)
+ : atomic_load(&sock->mgr->idle);
+ sock->write_timeout = value ? atomic_load(&sock->mgr->keepalive)
+ : atomic_load(&sock->mgr->idle);
+ break;
+#if HAVE_LIBNGHTTP2
+ case isc_nm_tlssocket:
+ isc__nmhandle_tls_keepalive(handle, value);
+ break;
+ case isc_nm_httpsocket:
+ isc__nmhandle_http_keepalive(handle, value);
+ break;
+#endif /* HAVE_LIBNGHTTP2 */
+ default:
+ /*
+ * For any other protocol, this is a no-op.
+ */
+ return;
+ }
+}
+
+bool
+isc_nmhandle_timer_running(isc_nmhandle_t *handle) {
+ REQUIRE(VALID_NMHANDLE(handle));
+ REQUIRE(VALID_NMSOCK(handle->sock));
+
+ return (isc__nmsocket_timer_running(handle->sock));
+}
+
+void *
+isc_nmhandle_getextra(isc_nmhandle_t *handle) {
+ REQUIRE(VALID_NMHANDLE(handle));
+
+ return (handle->extra);
+}
+
+isc_sockaddr_t
+isc_nmhandle_peeraddr(isc_nmhandle_t *handle) {
+ REQUIRE(VALID_NMHANDLE(handle));
+
+ return (handle->peer);
+}
+
+isc_sockaddr_t
+isc_nmhandle_localaddr(isc_nmhandle_t *handle) {
+ REQUIRE(VALID_NMHANDLE(handle));
+
+ return (handle->local);
+}
+
+isc_nm_t *
+isc_nmhandle_netmgr(isc_nmhandle_t *handle) {
+ REQUIRE(VALID_NMHANDLE(handle));
+ REQUIRE(VALID_NMSOCK(handle->sock));
+
+ return (handle->sock->mgr);
+}
+
+isc__nm_uvreq_t *
+isc___nm_uvreq_get(isc_nm_t *mgr, isc_nmsocket_t *sock FLARG) {
+ isc__nm_uvreq_t *req = NULL;
+
+ REQUIRE(VALID_NM(mgr));
+ REQUIRE(VALID_NMSOCK(sock));
+
+ if (sock != NULL && isc__nmsocket_active(sock)) {
+ /* Try to reuse one */
+ req = isc_astack_pop(sock->inactivereqs);
+ }
+
+ if (req == NULL) {
+ req = isc_mem_get(mgr->mctx, sizeof(*req));
+ }
+
+ *req = (isc__nm_uvreq_t){
+ .magic = 0,
+ .connect_tries = 3,
+ };
+ ISC_LINK_INIT(req, link);
+ req->uv_req.req.data = req;
+ isc___nmsocket_attach(sock, &req->sock FLARG_PASS);
+ req->magic = UVREQ_MAGIC;
+
+ return (req);
+}
+
+void
+isc___nm_uvreq_put(isc__nm_uvreq_t **req0, isc_nmsocket_t *sock FLARG) {
+ isc__nm_uvreq_t *req = NULL;
+ isc_nmhandle_t *handle = NULL;
+
+ REQUIRE(req0 != NULL);
+ REQUIRE(VALID_UVREQ(*req0));
+
+ req = *req0;
+ *req0 = NULL;
+
+ INSIST(sock == req->sock);
+
+ req->magic = 0;
+
+ /*
+ * We need to save this first to make sure that handle,
+ * sock, and the netmgr won't all disappear.
+ */
+ handle = req->handle;
+ req->handle = NULL;
+
+#if !__SANITIZE_ADDRESS__ && !__SANITIZE_THREAD__
+ if (!isc__nmsocket_active(sock) ||
+ !isc_astack_trypush(sock->inactivereqs, req))
+ {
+ isc_mem_put(sock->mgr->mctx, req, sizeof(*req));
+ }
+#else /* !__SANITIZE_ADDRESS__ && !__SANITIZE_THREAD__ */
+ isc_mem_put(sock->mgr->mctx, req, sizeof(*req));
+#endif /* !__SANITIZE_ADDRESS__ && !__SANITIZE_THREAD__ */
+
+ if (handle != NULL) {
+ isc__nmhandle_detach(&handle FLARG_PASS);
+ }
+
+ isc___nmsocket_detach(&sock FLARG_PASS);
+}
+
+void
+isc_nm_send(isc_nmhandle_t *handle, isc_region_t *region, isc_nm_cb_t cb,
+ void *cbarg) {
+ REQUIRE(VALID_NMHANDLE(handle));
+
+ switch (handle->sock->type) {
+ case isc_nm_udpsocket:
+ case isc_nm_udplistener:
+ isc__nm_udp_send(handle, region, cb, cbarg);
+ break;
+ case isc_nm_tcpsocket:
+ isc__nm_tcp_send(handle, region, cb, cbarg);
+ break;
+ case isc_nm_tcpdnssocket:
+ isc__nm_tcpdns_send(handle, region, cb, cbarg);
+ break;
+ case isc_nm_tlsdnssocket:
+ isc__nm_tlsdns_send(handle, region, cb, cbarg);
+ break;
+#if HAVE_LIBNGHTTP2
+ case isc_nm_tlssocket:
+ isc__nm_tls_send(handle, region, cb, cbarg);
+ break;
+ case isc_nm_httpsocket:
+ isc__nm_http_send(handle, region, cb, cbarg);
+ break;
+#endif
+ default:
+ UNREACHABLE();
+ }
+}
+
+void
+isc_nm_read(isc_nmhandle_t *handle, isc_nm_recv_cb_t cb, void *cbarg) {
+ REQUIRE(VALID_NMHANDLE(handle));
+
+ switch (handle->sock->type) {
+ case isc_nm_udpsocket:
+ isc__nm_udp_read(handle, cb, cbarg);
+ break;
+ case isc_nm_tcpsocket:
+ isc__nm_tcp_read(handle, cb, cbarg);
+ break;
+ case isc_nm_tcpdnssocket:
+ isc__nm_tcpdns_read(handle, cb, cbarg);
+ break;
+ case isc_nm_tlsdnssocket:
+ isc__nm_tlsdns_read(handle, cb, cbarg);
+ break;
+#if HAVE_LIBNGHTTP2
+ case isc_nm_tlssocket:
+ isc__nm_tls_read(handle, cb, cbarg);
+ break;
+ case isc_nm_httpsocket:
+ isc__nm_http_read(handle, cb, cbarg);
+ break;
+#endif
+ default:
+ UNREACHABLE();
+ }
+}
+
+void
+isc_nm_cancelread(isc_nmhandle_t *handle) {
+ REQUIRE(VALID_NMHANDLE(handle));
+
+ switch (handle->sock->type) {
+ case isc_nm_udpsocket:
+ isc__nm_udp_cancelread(handle);
+ break;
+ case isc_nm_tcpsocket:
+ isc__nm_tcp_cancelread(handle);
+ break;
+ case isc_nm_tcpdnssocket:
+ isc__nm_tcpdns_cancelread(handle);
+ break;
+ case isc_nm_tlsdnssocket:
+ isc__nm_tlsdns_cancelread(handle);
+ break;
+#if HAVE_LIBNGHTTP2
+ case isc_nm_tlssocket:
+ isc__nm_tls_cancelread(handle);
+ break;
+#endif
+ default:
+ UNREACHABLE();
+ }
+}
+
+void
+isc_nm_pauseread(isc_nmhandle_t *handle) {
+ REQUIRE(VALID_NMHANDLE(handle));
+
+ isc_nmsocket_t *sock = handle->sock;
+
+ switch (sock->type) {
+ case isc_nm_tcpsocket:
+ isc__nm_tcp_pauseread(handle);
+ break;
+#if HAVE_LIBNGHTTP2
+ case isc_nm_tlssocket:
+ isc__nm_tls_pauseread(handle);
+ break;
+#endif
+ default:
+ UNREACHABLE();
+ }
+}
+
+void
+isc_nm_resumeread(isc_nmhandle_t *handle) {
+ REQUIRE(VALID_NMHANDLE(handle));
+
+ isc_nmsocket_t *sock = handle->sock;
+
+ switch (sock->type) {
+ case isc_nm_tcpsocket:
+ isc__nm_tcp_resumeread(handle);
+ break;
+#if HAVE_LIBNGHTTP2
+ case isc_nm_tlssocket:
+ isc__nm_tls_resumeread(handle);
+ break;
+#endif
+ default:
+ UNREACHABLE();
+ }
+}
+
+void
+isc_nm_stoplistening(isc_nmsocket_t *sock) {
+ REQUIRE(VALID_NMSOCK(sock));
+
+ switch (sock->type) {
+ case isc_nm_udplistener:
+ isc__nm_udp_stoplistening(sock);
+ break;
+ case isc_nm_tcpdnslistener:
+ isc__nm_tcpdns_stoplistening(sock);
+ break;
+ case isc_nm_tcplistener:
+ isc__nm_tcp_stoplistening(sock);
+ break;
+ case isc_nm_tlsdnslistener:
+ isc__nm_tlsdns_stoplistening(sock);
+ break;
+#if HAVE_LIBNGHTTP2
+ case isc_nm_tlslistener:
+ isc__nm_tls_stoplistening(sock);
+ break;
+ case isc_nm_httplistener:
+ isc__nm_http_stoplistening(sock);
+ break;
+#endif
+ default:
+ UNREACHABLE();
+ }
+}
+
+void
+isc__nmsocket_stop(isc_nmsocket_t *listener) {
+ isc__netievent_sockstop_t ievent = { .sock = listener };
+
+ REQUIRE(VALID_NMSOCK(listener));
+
+ if (!atomic_compare_exchange_strong(&listener->closing,
+ &(bool){ false }, true))
+ {
+ UNREACHABLE();
+ }
+
+ for (size_t i = 0; i < listener->nchildren; i++) {
+ isc__networker_t *worker = &listener->mgr->workers[i];
+ isc__netievent_sockstop_t *ev;
+
+ if (isc__nm_in_netthread() && i == (size_t)isc_nm_tid()) {
+ continue;
+ }
+
+ ev = isc__nm_get_netievent_sockstop(listener->mgr, listener);
+ isc__nm_enqueue_ievent(worker, (isc__netievent_t *)ev);
+ }
+
+ if (isc__nm_in_netthread()) {
+ isc__nm_async_sockstop(&listener->mgr->workers[isc_nm_tid()],
+ (isc__netievent_t *)&ievent);
+ }
+}
+
+void
+isc__nmsocket_barrier_init(isc_nmsocket_t *listener) {
+ REQUIRE(listener->nchildren > 0);
+ isc_barrier_init(&listener->barrier, listener->nchildren);
+ listener->barrier_initialised = true;
+}
+
+void
+isc__nm_async_sockstop(isc__networker_t *worker, isc__netievent_t *ev0) {
+ isc__netievent_sockstop_t *ievent = (isc__netievent_sockstop_t *)ev0;
+ isc_nmsocket_t *listener = ievent->sock;
+ UNUSED(worker);
+
+ (void)atomic_fetch_sub(&listener->rchildren, 1);
+ isc_barrier_wait(&listener->barrier);
+
+ if (listener->tid != isc_nm_tid()) {
+ return;
+ }
+
+ if (!atomic_compare_exchange_strong(&listener->listening,
+ &(bool){ true }, false))
+ {
+ UNREACHABLE();
+ }
+
+ INSIST(atomic_load(&listener->rchildren) == 0);
+
+ listener->accept_cb = NULL;
+ listener->accept_cbarg = NULL;
+ listener->recv_cb = NULL;
+ listener->recv_cbarg = NULL;
+
+ if (listener->outer != NULL) {
+ isc_nm_stoplistening(listener->outer);
+ isc__nmsocket_detach(&listener->outer);
+ }
+
+ atomic_store(&listener->closed, true);
+}
+
+void
+isc__nm_connectcb(isc_nmsocket_t *sock, isc__nm_uvreq_t *uvreq,
+ isc_result_t eresult, bool async) {
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(VALID_UVREQ(uvreq));
+ REQUIRE(VALID_NMHANDLE(uvreq->handle));
+
+ if (!async) {
+ isc__netievent_connectcb_t ievent = { .sock = sock,
+ .req = uvreq,
+ .result = eresult };
+ isc__nm_async_connectcb(NULL, (isc__netievent_t *)&ievent);
+ } else {
+ isc__netievent_connectcb_t *ievent =
+ isc__nm_get_netievent_connectcb(sock->mgr, sock, uvreq,
+ eresult);
+ isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
+ (isc__netievent_t *)ievent);
+ }
+}
+
+void
+isc__nm_async_connectcb(isc__networker_t *worker, isc__netievent_t *ev0) {
+ isc__netievent_connectcb_t *ievent = (isc__netievent_connectcb_t *)ev0;
+ isc_nmsocket_t *sock = ievent->sock;
+ isc__nm_uvreq_t *uvreq = ievent->req;
+ isc_result_t eresult = ievent->result;
+
+ UNUSED(worker);
+
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(VALID_UVREQ(uvreq));
+ REQUIRE(VALID_NMHANDLE(uvreq->handle));
+ REQUIRE(ievent->sock->tid == isc_nm_tid());
+ REQUIRE(uvreq->cb.connect != NULL);
+
+ uvreq->cb.connect(uvreq->handle, eresult, uvreq->cbarg);
+
+ isc__nm_uvreq_put(&uvreq, sock);
+}
+
+void
+isc__nm_readcb(isc_nmsocket_t *sock, isc__nm_uvreq_t *uvreq,
+ isc_result_t eresult) {
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(VALID_UVREQ(uvreq));
+ REQUIRE(VALID_NMHANDLE(uvreq->handle));
+
+ if (eresult == ISC_R_SUCCESS || eresult == ISC_R_TIMEDOUT) {
+ isc__netievent_readcb_t ievent = { .sock = sock,
+ .req = uvreq,
+ .result = eresult };
+
+ isc__nm_async_readcb(NULL, (isc__netievent_t *)&ievent);
+ } else {
+ isc__netievent_readcb_t *ievent = isc__nm_get_netievent_readcb(
+ sock->mgr, sock, uvreq, eresult);
+ isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
+ (isc__netievent_t *)ievent);
+ }
+}
+
+void
+isc__nm_async_readcb(isc__networker_t *worker, isc__netievent_t *ev0) {
+ isc__netievent_readcb_t *ievent = (isc__netievent_readcb_t *)ev0;
+ isc_nmsocket_t *sock = ievent->sock;
+ isc__nm_uvreq_t *uvreq = ievent->req;
+ isc_result_t eresult = ievent->result;
+ isc_region_t region;
+
+ UNUSED(worker);
+
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(VALID_UVREQ(uvreq));
+ REQUIRE(VALID_NMHANDLE(uvreq->handle));
+ REQUIRE(sock->tid == isc_nm_tid());
+
+ region.base = (unsigned char *)uvreq->uvbuf.base;
+ region.length = uvreq->uvbuf.len;
+
+ uvreq->cb.recv(uvreq->handle, eresult, &region, uvreq->cbarg);
+
+ isc__nm_uvreq_put(&uvreq, sock);
+}
+
+void
+isc__nm_sendcb(isc_nmsocket_t *sock, isc__nm_uvreq_t *uvreq,
+ isc_result_t eresult, bool async) {
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(VALID_UVREQ(uvreq));
+ REQUIRE(VALID_NMHANDLE(uvreq->handle));
+
+ if (!async) {
+ isc__netievent_sendcb_t ievent = { .sock = sock,
+ .req = uvreq,
+ .result = eresult };
+ isc__nm_async_sendcb(NULL, (isc__netievent_t *)&ievent);
+ return;
+ }
+
+ isc__netievent_sendcb_t *ievent =
+ isc__nm_get_netievent_sendcb(sock->mgr, sock, uvreq, eresult);
+ isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
+ (isc__netievent_t *)ievent);
+}
+
+void
+isc__nm_async_sendcb(isc__networker_t *worker, isc__netievent_t *ev0) {
+ isc__netievent_sendcb_t *ievent = (isc__netievent_sendcb_t *)ev0;
+ isc_nmsocket_t *sock = ievent->sock;
+ isc__nm_uvreq_t *uvreq = ievent->req;
+ isc_result_t eresult = ievent->result;
+
+ UNUSED(worker);
+
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(VALID_UVREQ(uvreq));
+ REQUIRE(VALID_NMHANDLE(uvreq->handle));
+ REQUIRE(sock->tid == isc_nm_tid());
+
+ uvreq->cb.send(uvreq->handle, eresult, uvreq->cbarg);
+
+ isc__nm_uvreq_put(&uvreq, sock);
+}
+
+static void
+isc__nm_async_close(isc__networker_t *worker, isc__netievent_t *ev0) {
+ isc__netievent_close_t *ievent = (isc__netievent_close_t *)ev0;
+ isc_nmsocket_t *sock = ievent->sock;
+
+ REQUIRE(VALID_NMSOCK(ievent->sock));
+ REQUIRE(sock->tid == isc_nm_tid());
+ REQUIRE(sock->closehandle_cb != NULL);
+
+ UNUSED(worker);
+
+ ievent->sock->closehandle_cb(sock);
+}
+
+void
+isc__nm_async_detach(isc__networker_t *worker, isc__netievent_t *ev0) {
+ isc__netievent_detach_t *ievent = (isc__netievent_detach_t *)ev0;
+ FLARG_IEVENT(ievent);
+
+ REQUIRE(VALID_NMSOCK(ievent->sock));
+ REQUIRE(VALID_NMHANDLE(ievent->handle));
+ REQUIRE(ievent->sock->tid == isc_nm_tid());
+
+ UNUSED(worker);
+
+ nmhandle_detach_cb(&ievent->handle FLARG_PASS);
+}
+
+static void
+reset_shutdown(uv_handle_t *handle) {
+ isc_nmsocket_t *sock = uv_handle_get_data(handle);
+
+ isc__nmsocket_shutdown(sock);
+ isc__nmsocket_detach(&sock);
+}
+
+void
+isc__nmsocket_reset(isc_nmsocket_t *sock) {
+ REQUIRE(VALID_NMSOCK(sock));
+
+ switch (sock->type) {
+ case isc_nm_tcpsocket:
+ case isc_nm_tcpdnssocket:
+ case isc_nm_tlsdnssocket:
+ /*
+ * This can be called from the TCP write timeout, or
+ * from the TCPDNS or TLSDNS branches of isc_nm_bad_request().
+ */
+ REQUIRE(sock->parent == NULL);
+ break;
+ default:
+ UNREACHABLE();
+ break;
+ }
+
+ if (!uv_is_closing(&sock->uv_handle.handle) &&
+ uv_is_active(&sock->uv_handle.handle))
+ {
+ /*
+ * The real shutdown will be handled in the respective
+ * close functions.
+ */
+ isc__nmsocket_attach(sock, &(isc_nmsocket_t *){ NULL });
+ int r = uv_tcp_close_reset(&sock->uv_handle.tcp,
+ reset_shutdown);
+ UV_RUNTIME_CHECK(uv_tcp_close_reset, r);
+ } else {
+ isc__nmsocket_shutdown(sock);
+ }
+}
+
+void
+isc__nmsocket_shutdown(isc_nmsocket_t *sock) {
+ REQUIRE(VALID_NMSOCK(sock));
+ switch (sock->type) {
+ case isc_nm_udpsocket:
+ isc__nm_udp_shutdown(sock);
+ break;
+ case isc_nm_tcpsocket:
+ isc__nm_tcp_shutdown(sock);
+ break;
+ case isc_nm_tcpdnssocket:
+ isc__nm_tcpdns_shutdown(sock);
+ break;
+ case isc_nm_tlsdnssocket:
+ isc__nm_tlsdns_shutdown(sock);
+ break;
+ case isc_nm_udplistener:
+ case isc_nm_tcplistener:
+ case isc_nm_tcpdnslistener:
+ case isc_nm_tlsdnslistener:
+ return;
+ default:
+ UNREACHABLE();
+ }
+}
+
+static void
+shutdown_walk_cb(uv_handle_t *handle, void *arg) {
+ isc_nmsocket_t *sock = uv_handle_get_data(handle);
+ UNUSED(arg);
+
+ if (uv_is_closing(handle)) {
+ return;
+ }
+
+ switch (handle->type) {
+ case UV_UDP:
+ isc__nmsocket_shutdown(sock);
+ return;
+ case UV_TCP:
+ switch (sock->type) {
+ case isc_nm_tcpsocket:
+ case isc_nm_tcpdnssocket:
+ case isc_nm_tlsdnssocket:
+ if (sock->parent == NULL) {
+ /* Reset the TCP connections on shutdown */
+ isc__nmsocket_reset(sock);
+ return;
+ }
+ FALLTHROUGH;
+ default:
+ isc__nmsocket_shutdown(sock);
+ }
+
+ return;
+ default:
+ return;
+ }
+}
+
+void
+isc__nm_async_shutdown(isc__networker_t *worker, isc__netievent_t *ev0) {
+ UNUSED(ev0);
+
+ uv_walk(&worker->loop, shutdown_walk_cb, NULL);
+}
+
+bool
+isc__nm_acquire_interlocked(isc_nm_t *mgr) {
+ if (!isc__nm_in_netthread()) {
+ return (false);
+ }
+
+ LOCK(&mgr->lock);
+ bool success = atomic_compare_exchange_strong(
+ &mgr->interlocked, &(int){ ISC_NETMGR_NON_INTERLOCKED },
+ isc_nm_tid());
+
+ UNLOCK(&mgr->lock);
+ return (success);
+}
+
+void
+isc__nm_drop_interlocked(isc_nm_t *mgr) {
+ if (!isc__nm_in_netthread()) {
+ return;
+ }
+
+ LOCK(&mgr->lock);
+ int tid = atomic_exchange(&mgr->interlocked,
+ ISC_NETMGR_NON_INTERLOCKED);
+ INSIST(tid != ISC_NETMGR_NON_INTERLOCKED);
+ BROADCAST(&mgr->wkstatecond);
+ UNLOCK(&mgr->lock);
+}
+
+void
+isc__nm_acquire_interlocked_force(isc_nm_t *mgr) {
+ if (!isc__nm_in_netthread()) {
+ return;
+ }
+
+ LOCK(&mgr->lock);
+ while (!atomic_compare_exchange_strong(
+ &mgr->interlocked, &(int){ ISC_NETMGR_NON_INTERLOCKED },
+ isc_nm_tid()))
+ {
+ WAIT(&mgr->wkstatecond, &mgr->lock);
+ }
+ UNLOCK(&mgr->lock);
+}
+
+void
+isc_nm_setstats(isc_nm_t *mgr, isc_stats_t *stats) {
+ REQUIRE(VALID_NM(mgr));
+ REQUIRE(mgr->stats == NULL);
+ REQUIRE(isc_stats_ncounters(stats) == isc_sockstatscounter_max);
+
+ isc_stats_attach(stats, &mgr->stats);
+}
+
+void
+isc__nm_incstats(isc_nmsocket_t *sock, isc__nm_statid_t id) {
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(id < STATID_MAX);
+
+ if (sock->statsindex != NULL && sock->mgr->stats != NULL) {
+ isc_stats_increment(sock->mgr->stats, sock->statsindex[id]);
+ }
+}
+
+void
+isc__nm_decstats(isc_nmsocket_t *sock, isc__nm_statid_t id) {
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(id < STATID_MAX);
+
+ if (sock->statsindex != NULL && sock->mgr->stats != NULL) {
+ isc_stats_decrement(sock->mgr->stats, sock->statsindex[id]);
+ }
+}
+
+isc_result_t
+isc__nm_socket(int domain, int type, int protocol, uv_os_sock_t *sockp) {
+ int sock = socket(domain, type, protocol);
+ if (sock < 0) {
+ return (isc_errno_toresult(errno));
+ }
+
+ *sockp = (uv_os_sock_t)sock;
+ return (ISC_R_SUCCESS);
+}
+
+void
+isc__nm_closesocket(uv_os_sock_t sock) {
+ close(sock);
+}
+
+#define setsockopt_on(socket, level, name) \
+ setsockopt(socket, level, name, &(int){ 1 }, sizeof(int))
+
+#define setsockopt_off(socket, level, name) \
+ setsockopt(socket, level, name, &(int){ 0 }, sizeof(int))
+
+isc_result_t
+isc__nm_socket_freebind(uv_os_sock_t fd, sa_family_t sa_family) {
+ /*
+ * Set the IP_FREEBIND (or equivalent option) on the uv_handle.
+ */
+#ifdef IP_FREEBIND
+ UNUSED(sa_family);
+ if (setsockopt_on(fd, IPPROTO_IP, IP_FREEBIND) == -1) {
+ return (ISC_R_FAILURE);
+ }
+ return (ISC_R_SUCCESS);
+#elif defined(IP_BINDANY) || defined(IPV6_BINDANY)
+ if (sa_family == AF_INET) {
+#if defined(IP_BINDANY)
+ if (setsockopt_on(fd, IPPROTO_IP, IP_BINDANY) == -1) {
+ return (ISC_R_FAILURE);
+ }
+ return (ISC_R_SUCCESS);
+#endif
+ } else if (sa_family == AF_INET6) {
+#if defined(IPV6_BINDANY)
+ if (setsockopt_on(fd, IPPROTO_IPV6, IPV6_BINDANY) == -1) {
+ return (ISC_R_FAILURE);
+ }
+ return (ISC_R_SUCCESS);
+#endif
+ }
+ return (ISC_R_NOTIMPLEMENTED);
+#elif defined(SO_BINDANY)
+ UNUSED(sa_family);
+ if (setsockopt_on(fd, SOL_SOCKET, SO_BINDANY) == -1) {
+ return (ISC_R_FAILURE);
+ }
+ return (ISC_R_SUCCESS);
+#else
+ UNUSED(fd);
+ UNUSED(sa_family);
+ return (ISC_R_NOTIMPLEMENTED);
+#endif
+}
+
+isc_result_t
+isc__nm_socket_reuse(uv_os_sock_t fd) {
+ /*
+ * Generally, the SO_REUSEADDR socket option allows reuse of
+ * local addresses.
+ *
+ * On the BSDs, SO_REUSEPORT implies SO_REUSEADDR but with some
+ * additional refinements for programs that use multicast.
+ *
+ * On Linux, SO_REUSEPORT has different semantics: it _shares_ the port
+ * rather than steal it from the current listener, so we don't use it
+ * here, but rather in isc__nm_socket_reuse_lb().
+ *
+ * On Windows, it also allows a socket to forcibly bind to a port in use
+ * by another socket.
+ */
+
+#if defined(SO_REUSEPORT) && !defined(__linux__)
+ if (setsockopt_on(fd, SOL_SOCKET, SO_REUSEPORT) == -1) {
+ return (ISC_R_FAILURE);
+ }
+ return (ISC_R_SUCCESS);
+#elif defined(SO_REUSEADDR)
+ if (setsockopt_on(fd, SOL_SOCKET, SO_REUSEADDR) == -1) {
+ return (ISC_R_FAILURE);
+ }
+ return (ISC_R_SUCCESS);
+#else
+ UNUSED(fd);
+ return (ISC_R_NOTIMPLEMENTED);
+#endif
+}
+
+isc_result_t
+isc__nm_socket_reuse_lb(uv_os_sock_t fd) {
+ /*
+ * On FreeBSD 12+, SO_REUSEPORT_LB socket option allows sockets to be
+ * bound to an identical socket address. For UDP sockets, the use of
+ * this option can provide better distribution of incoming datagrams to
+ * multiple processes (or threads) as compared to the traditional
+ * technique of having multiple processes compete to receive datagrams
+ * on the same socket.
+ *
+ * On Linux, the same thing is achieved simply with SO_REUSEPORT.
+ */
+#if defined(SO_REUSEPORT_LB)
+ if (setsockopt_on(fd, SOL_SOCKET, SO_REUSEPORT_LB) == -1) {
+ return (ISC_R_FAILURE);
+ } else {
+ return (ISC_R_SUCCESS);
+ }
+#elif defined(SO_REUSEPORT) && defined(__linux__)
+ if (setsockopt_on(fd, SOL_SOCKET, SO_REUSEPORT) == -1) {
+ return (ISC_R_FAILURE);
+ } else {
+ return (ISC_R_SUCCESS);
+ }
+#else
+ UNUSED(fd);
+ return (ISC_R_NOTIMPLEMENTED);
+#endif
+}
+
+isc_result_t
+isc__nm_socket_incoming_cpu(uv_os_sock_t fd) {
+#ifdef SO_INCOMING_CPU
+ if (setsockopt_on(fd, SOL_SOCKET, SO_INCOMING_CPU) == -1) {
+ return (ISC_R_FAILURE);
+ } else {
+ return (ISC_R_SUCCESS);
+ }
+#else
+ UNUSED(fd);
+#endif
+ return (ISC_R_NOTIMPLEMENTED);
+}
+
+isc_result_t
+isc__nm_socket_disable_pmtud(uv_os_sock_t fd, sa_family_t sa_family) {
+ /*
+ * Disable the Path MTU Discovery on IP packets
+ */
+ if (sa_family == AF_INET6) {
+#if defined(IPV6_DONTFRAG)
+ if (setsockopt_off(fd, IPPROTO_IPV6, IPV6_DONTFRAG) == -1) {
+ return (ISC_R_FAILURE);
+ } else {
+ return (ISC_R_SUCCESS);
+ }
+#elif defined(IPV6_MTU_DISCOVER) && defined(IP_PMTUDISC_OMIT)
+ if (setsockopt(fd, IPPROTO_IPV6, IPV6_MTU_DISCOVER,
+ &(int){ IP_PMTUDISC_OMIT }, sizeof(int)) == -1)
+ {
+ return (ISC_R_FAILURE);
+ } else {
+ return (ISC_R_SUCCESS);
+ }
+#else
+ UNUSED(fd);
+#endif
+ } else if (sa_family == AF_INET) {
+#if defined(IP_DONTFRAG)
+ if (setsockopt_off(fd, IPPROTO_IP, IP_DONTFRAG) == -1) {
+ return (ISC_R_FAILURE);
+ } else {
+ return (ISC_R_SUCCESS);
+ }
+#elif defined(IP_MTU_DISCOVER) && defined(IP_PMTUDISC_OMIT)
+ if (setsockopt(fd, IPPROTO_IP, IP_MTU_DISCOVER,
+ &(int){ IP_PMTUDISC_OMIT }, sizeof(int)) == -1)
+ {
+ return (ISC_R_FAILURE);
+ } else {
+ return (ISC_R_SUCCESS);
+ }
+#else
+ UNUSED(fd);
+#endif
+ } else {
+ return (ISC_R_FAMILYNOSUPPORT);
+ }
+
+ return (ISC_R_NOTIMPLEMENTED);
+}
+
+isc_result_t
+isc__nm_socket_v6only(uv_os_sock_t fd, sa_family_t sa_family) {
+ /*
+ * Enable the IPv6-only option on IPv6 sockets
+ */
+ if (sa_family == AF_INET6) {
+#if defined(IPV6_V6ONLY)
+ if (setsockopt_on(fd, IPPROTO_IPV6, IPV6_V6ONLY) == -1) {
+ return (ISC_R_FAILURE);
+ } else {
+ return (ISC_R_SUCCESS);
+ }
+#else
+ UNUSED(fd);
+#endif
+ }
+ return (ISC_R_NOTIMPLEMENTED);
+}
+
+isc_result_t
+isc_nm_checkaddr(const isc_sockaddr_t *addr, isc_socktype_t type) {
+ int proto, pf, addrlen, fd, r;
+
+ REQUIRE(addr != NULL);
+
+ switch (type) {
+ case isc_socktype_tcp:
+ proto = SOCK_STREAM;
+ break;
+ case isc_socktype_udp:
+ proto = SOCK_DGRAM;
+ break;
+ default:
+ return (ISC_R_NOTIMPLEMENTED);
+ }
+
+ pf = isc_sockaddr_pf(addr);
+ if (pf == AF_INET) {
+ addrlen = sizeof(struct sockaddr_in);
+ } else {
+ addrlen = sizeof(struct sockaddr_in6);
+ }
+
+ fd = socket(pf, proto, 0);
+ if (fd < 0) {
+ return (isc_errno_toresult(errno));
+ }
+
+ r = bind(fd, (const struct sockaddr *)&addr->type.sa, addrlen);
+ if (r < 0) {
+ close(fd);
+ return (isc_errno_toresult(errno));
+ }
+
+ close(fd);
+ return (ISC_R_SUCCESS);
+}
+
+#if defined(TCP_CONNECTIONTIMEOUT)
+#define TIMEOUT_TYPE int
+#define TIMEOUT_DIV 1000
+#define TIMEOUT_OPTNAME TCP_CONNECTIONTIMEOUT
+#elif defined(TCP_RXT_CONNDROPTIME)
+#define TIMEOUT_TYPE int
+#define TIMEOUT_DIV 1000
+#define TIMEOUT_OPTNAME TCP_RXT_CONNDROPTIME
+#elif defined(TCP_USER_TIMEOUT)
+#define TIMEOUT_TYPE unsigned int
+#define TIMEOUT_DIV 1
+#define TIMEOUT_OPTNAME TCP_USER_TIMEOUT
+#elif defined(TCP_KEEPINIT)
+#define TIMEOUT_TYPE int
+#define TIMEOUT_DIV 1000
+#define TIMEOUT_OPTNAME TCP_KEEPINIT
+#endif
+
+isc_result_t
+isc__nm_socket_connectiontimeout(uv_os_sock_t fd, int timeout_ms) {
+#if defined(TIMEOUT_OPTNAME)
+ TIMEOUT_TYPE timeout = timeout_ms / TIMEOUT_DIV;
+
+ if (timeout == 0) {
+ timeout = 1;
+ }
+
+ if (setsockopt(fd, IPPROTO_TCP, TIMEOUT_OPTNAME, &timeout,
+ sizeof(timeout)) == -1)
+ {
+ return (ISC_R_FAILURE);
+ }
+
+ return (ISC_R_SUCCESS);
+#else
+ UNUSED(fd);
+ UNUSED(timeout_ms);
+
+ return (ISC_R_SUCCESS);
+#endif
+}
+
+isc_result_t
+isc__nm_socket_tcp_nodelay(uv_os_sock_t fd) {
+#ifdef TCP_NODELAY
+ if (setsockopt_on(fd, IPPROTO_TCP, TCP_NODELAY) == -1) {
+ return (ISC_R_FAILURE);
+ } else {
+ return (ISC_R_SUCCESS);
+ }
+#else
+ UNUSED(fd);
+ return (ISC_R_SUCCESS);
+#endif
+}
+
+isc_result_t
+isc__nm_socket_tcp_maxseg(uv_os_sock_t fd, int size) {
+#ifdef TCP_MAXSEG
+ if (setsockopt(fd, IPPROTO_TCP, TCP_MAXSEG, (void *)&size,
+ sizeof(size)))
+ {
+ return (ISC_R_FAILURE);
+ } else {
+ return (ISC_R_SUCCESS);
+ }
+#else
+ UNUSED(fd);
+ UNUSED(size);
+ return (ISC_R_SUCCESS);
+#endif
+}
+
+isc_result_t
+isc__nm_socket_min_mtu(uv_os_sock_t fd, sa_family_t sa_family) {
+ if (sa_family != AF_INET6) {
+ return (ISC_R_SUCCESS);
+ }
+#ifdef IPV6_USE_MIN_MTU
+ if (setsockopt_on(fd, IPPROTO_IPV6, IPV6_USE_MIN_MTU) == -1) {
+ return (ISC_R_FAILURE);
+ }
+#elif defined(IPV6_MTU)
+ if (setsockopt(fd, IPPROTO_IPV6, IPV6_MTU, &(int){ 1280 },
+ sizeof(int)) == -1)
+ {
+ return (ISC_R_FAILURE);
+ }
+#else
+ UNUSED(fd);
+#endif
+
+ return (ISC_R_SUCCESS);
+}
+
+void
+isc__nm_set_network_buffers(isc_nm_t *nm, uv_handle_t *handle) {
+ int32_t recv_buffer_size = 0;
+ int32_t send_buffer_size = 0;
+
+ switch (handle->type) {
+ case UV_TCP:
+ recv_buffer_size =
+ atomic_load_relaxed(&nm->recv_tcp_buffer_size);
+ send_buffer_size =
+ atomic_load_relaxed(&nm->send_tcp_buffer_size);
+ break;
+ case UV_UDP:
+ recv_buffer_size =
+ atomic_load_relaxed(&nm->recv_udp_buffer_size);
+ send_buffer_size =
+ atomic_load_relaxed(&nm->send_udp_buffer_size);
+ break;
+ default:
+ UNREACHABLE();
+ }
+
+ if (recv_buffer_size > 0) {
+ int r = uv_recv_buffer_size(handle, &recv_buffer_size);
+ UV_RUNTIME_CHECK(uv_recv_buffer_size, r);
+ }
+
+ if (send_buffer_size > 0) {
+ int r = uv_send_buffer_size(handle, &send_buffer_size);
+ UV_RUNTIME_CHECK(uv_send_buffer_size, r);
+ }
+}
+
+static isc_threadresult_t
+isc__nm_work_run(isc_threadarg_t arg) {
+ isc__nm_work_t *work = (isc__nm_work_t *)arg;
+
+ work->cb(work->data);
+
+ return ((isc_threadresult_t)0);
+}
+
+static void
+isc__nm_work_cb(uv_work_t *req) {
+ isc__nm_work_t *work = uv_req_get_data((uv_req_t *)req);
+
+ if (isc_tid_v == SIZE_MAX) {
+ isc__trampoline_t *trampoline_arg =
+ isc__trampoline_get(isc__nm_work_run, work);
+ (void)isc__trampoline_run(trampoline_arg);
+ } else {
+ (void)isc__nm_work_run((isc_threadarg_t)work);
+ }
+}
+
+static void
+isc__nm_after_work_cb(uv_work_t *req, int status) {
+ isc_result_t result = ISC_R_SUCCESS;
+ isc__nm_work_t *work = uv_req_get_data((uv_req_t *)req);
+ isc_nm_t *netmgr = work->netmgr;
+
+ if (status != 0) {
+ result = isc__nm_uverr2result(status);
+ }
+
+ work->after_cb(work->data, result);
+
+ isc_mem_put(netmgr->mctx, work, sizeof(*work));
+
+ isc_nm_detach(&netmgr);
+}
+
+void
+isc_nm_work_offload(isc_nm_t *netmgr, isc_nm_workcb_t work_cb,
+ isc_nm_after_workcb_t after_work_cb, void *data) {
+ isc__networker_t *worker = NULL;
+ isc__nm_work_t *work = NULL;
+ int r;
+
+ REQUIRE(isc__nm_in_netthread());
+ REQUIRE(VALID_NM(netmgr));
+
+ worker = &netmgr->workers[isc_nm_tid()];
+
+ work = isc_mem_get(netmgr->mctx, sizeof(*work));
+ *work = (isc__nm_work_t){
+ .cb = work_cb,
+ .after_cb = after_work_cb,
+ .data = data,
+ };
+
+ isc_nm_attach(netmgr, &work->netmgr);
+
+ uv_req_set_data((uv_req_t *)&work->req, work);
+
+ r = uv_queue_work(&worker->loop, &work->req, isc__nm_work_cb,
+ isc__nm_after_work_cb);
+ UV_RUNTIME_CHECK(uv_queue_work, r);
+}
+
+void
+isc_nm_sequential(isc_nmhandle_t *handle) {
+ isc_nmsocket_t *sock = NULL;
+
+ REQUIRE(VALID_NMHANDLE(handle));
+ REQUIRE(VALID_NMSOCK(handle->sock));
+
+ sock = handle->sock;
+
+ switch (sock->type) {
+ case isc_nm_tcpdnssocket:
+ case isc_nm_tlsdnssocket:
+ break;
+ case isc_nm_httpsocket:
+ return;
+ default:
+ UNREACHABLE();
+ }
+
+ /*
+ * We don't want pipelining on this connection. That means
+ * that we need to pause after reading each request, and
+ * resume only after the request has been processed. This
+ * is done in isc__nm_resume_processing(), which is the
+ * socket's closehandle_cb callback, called whenever a handle
+ * is released.
+ */
+ isc__nmsocket_timer_stop(sock);
+ isc__nm_stop_reading(sock);
+ atomic_store(&sock->sequential, true);
+}
+
+void
+isc_nm_bad_request(isc_nmhandle_t *handle) {
+ isc_nmsocket_t *sock = NULL;
+
+ REQUIRE(VALID_NMHANDLE(handle));
+ REQUIRE(VALID_NMSOCK(handle->sock));
+
+ sock = handle->sock;
+
+ switch (sock->type) {
+ case isc_nm_udpsocket:
+ return;
+ case isc_nm_tcpdnssocket:
+ case isc_nm_tlsdnssocket:
+ REQUIRE(sock->parent == NULL);
+ isc__nmsocket_reset(sock);
+ return;
+#if HAVE_LIBNGHTTP2
+ case isc_nm_httpsocket:
+ isc__nm_http_bad_request(handle);
+ return;
+#endif /* HAVE_LIBNGHTTP2 */
+ case isc_nm_tcpsocket:
+#if HAVE_LIBNGHTTP2
+ case isc_nm_tlssocket:
+#endif /* HAVE_LIBNGHTTP2 */
+ default:
+ UNREACHABLE();
+ break;
+ }
+}
+
+isc_result_t
+isc_nm_xfr_checkperm(isc_nmhandle_t *handle) {
+ isc_nmsocket_t *sock = NULL;
+ isc_result_t result = ISC_R_NOPERM;
+
+ REQUIRE(VALID_NMHANDLE(handle));
+ REQUIRE(VALID_NMSOCK(handle->sock));
+
+ sock = handle->sock;
+
+ switch (sock->type) {
+ case isc_nm_tcpdnssocket:
+ result = ISC_R_SUCCESS;
+ break;
+ case isc_nm_tlsdnssocket:
+ result = isc__nm_tlsdns_xfr_checkperm(sock);
+ break;
+ default:
+ break;
+ }
+
+ return (result);
+}
+
+bool
+isc_nm_is_http_handle(isc_nmhandle_t *handle) {
+ REQUIRE(VALID_NMHANDLE(handle));
+ REQUIRE(VALID_NMSOCK(handle->sock));
+
+ return (handle->sock->type == isc_nm_httpsocket);
+}
+
+void
+isc_nm_set_maxage(isc_nmhandle_t *handle, const uint32_t ttl) {
+ isc_nmsocket_t *sock = NULL;
+
+ REQUIRE(VALID_NMHANDLE(handle));
+ REQUIRE(VALID_NMSOCK(handle->sock));
+ REQUIRE(!atomic_load(&handle->sock->client));
+
+#if !HAVE_LIBNGHTTP2
+ UNUSED(ttl);
+#endif
+
+ sock = handle->sock;
+ switch (sock->type) {
+#if HAVE_LIBNGHTTP2
+ case isc_nm_httpsocket:
+ isc__nm_http_set_maxage(handle, ttl);
+ break;
+#endif /* HAVE_LIBNGHTTP2 */
+ case isc_nm_udpsocket:
+ case isc_nm_tcpdnssocket:
+ case isc_nm_tlsdnssocket:
+ return;
+ break;
+
+ case isc_nm_tcpsocket:
+#if HAVE_LIBNGHTTP2
+ case isc_nm_tlssocket:
+#endif /* HAVE_LIBNGHTTP2 */
+ default:
+ UNREACHABLE();
+ break;
+ }
+}
+
+isc_nmsocket_type
+isc_nm_socket_type(const isc_nmhandle_t *handle) {
+ REQUIRE(VALID_NMHANDLE(handle));
+ REQUIRE(VALID_NMSOCK(handle->sock));
+
+ return (handle->sock->type);
+}
+
+bool
+isc_nm_has_encryption(const isc_nmhandle_t *handle) {
+ REQUIRE(VALID_NMHANDLE(handle));
+ REQUIRE(VALID_NMSOCK(handle->sock));
+
+ switch (handle->sock->type) {
+ case isc_nm_tlsdnssocket:
+#if HAVE_LIBNGHTTP2
+ case isc_nm_tlssocket:
+#endif /* HAVE_LIBNGHTTP2 */
+ return (true);
+#if HAVE_LIBNGHTTP2
+ case isc_nm_httpsocket:
+ return (isc__nm_http_has_encryption(handle));
+#endif /* HAVE_LIBNGHTTP2 */
+ default:
+ return (false);
+ };
+
+ return (false);
+}
+
+void
+isc__nm_async_settlsctx(isc__networker_t *worker, isc__netievent_t *ev0) {
+ isc__netievent__tlsctx_t *ev_tlsctx = (isc__netievent__tlsctx_t *)ev0;
+ const int tid = isc_nm_tid();
+ isc_nmsocket_t *listener = ev_tlsctx->sock;
+ isc_tlsctx_t *tlsctx = ev_tlsctx->tlsctx;
+
+ UNUSED(worker);
+
+ switch (listener->type) {
+ case isc_nm_tlsdnslistener:
+ isc__nm_async_tlsdns_set_tlsctx(listener, tlsctx, tid);
+ break;
+#if HAVE_LIBNGHTTP2
+ case isc_nm_tlslistener:
+ isc__nm_async_tls_set_tlsctx(listener, tlsctx, tid);
+ break;
+#endif /* HAVE_LIBNGHTTP2 */
+ default:
+ UNREACHABLE();
+ break;
+ };
+}
+
+static void
+set_tlsctx_workers(isc_nmsocket_t *listener, isc_tlsctx_t *tlsctx) {
+ /* Update the TLS context reference for every worker thread. */
+ for (size_t i = 0; i < (size_t)listener->mgr->nworkers; i++) {
+ isc__netievent__tlsctx_t *ievent =
+ isc__nm_get_netievent_settlsctx(listener->mgr, listener,
+ tlsctx);
+ isc__nm_enqueue_ievent(&listener->mgr->workers[i],
+ (isc__netievent_t *)ievent);
+ }
+}
+
+void
+isc_nmsocket_set_tlsctx(isc_nmsocket_t *listener, isc_tlsctx_t *tlsctx) {
+ REQUIRE(VALID_NMSOCK(listener));
+ REQUIRE(tlsctx != NULL);
+
+ switch (listener->type) {
+#if HAVE_LIBNGHTTP2
+ case isc_nm_httplistener:
+ /*
+ * We handle HTTP listener sockets differently, as they rely
+ * on underlying TLS sockets for networking. The TLS context
+ * will get passed to these underlying sockets via the call to
+ * isc__nm_http_set_tlsctx().
+ */
+ isc__nm_http_set_tlsctx(listener, tlsctx);
+ break;
+ case isc_nm_tlslistener:
+ set_tlsctx_workers(listener, tlsctx);
+ break;
+#endif /* HAVE_LIBNGHTTP2 */
+ case isc_nm_tlsdnslistener:
+ set_tlsctx_workers(listener, tlsctx);
+ break;
+ default:
+ UNREACHABLE();
+ break;
+ };
+}
+
+const char *
+isc_nm_verify_tls_peer_result_string(const isc_nmhandle_t *handle) {
+ isc_nmsocket_t *sock;
+
+ REQUIRE(VALID_NMHANDLE(handle));
+ REQUIRE(VALID_NMSOCK(handle->sock));
+
+ sock = handle->sock;
+ switch (sock->type) {
+ case isc_nm_tlsdnssocket:
+ return (isc__nm_tlsdns_verify_tls_peer_result_string(handle));
+ break;
+#if HAVE_LIBNGHTTP2
+ case isc_nm_tlssocket:
+ return (isc__nm_tls_verify_tls_peer_result_string(handle));
+ break;
+ case isc_nm_httpsocket:
+ return (isc__nm_http_verify_tls_peer_result_string(handle));
+ break;
+#endif /* HAVE_LIBNGHTTP2 */
+ default:
+ break;
+ }
+
+ return (NULL);
+}
+
+void
+isc_nmsocket_set_max_streams(isc_nmsocket_t *listener,
+ const uint32_t max_streams) {
+ REQUIRE(VALID_NMSOCK(listener));
+ switch (listener->type) {
+#if HAVE_LIBNGHTTP2
+ case isc_nm_httplistener:
+ isc__nm_http_set_max_streams(listener, max_streams);
+ break;
+#endif /* HAVE_LIBNGHTTP2 */
+ default:
+ UNUSED(max_streams);
+ break;
+ };
+ return;
+}
+
+void
+isc__nmsocket_log_tls_session_reuse(isc_nmsocket_t *sock, isc_tls_t *tls) {
+ const int log_level = ISC_LOG_DEBUG(1);
+ char client_sabuf[ISC_SOCKADDR_FORMATSIZE];
+ char local_sabuf[ISC_SOCKADDR_FORMATSIZE];
+
+ REQUIRE(tls != NULL);
+
+ if (!isc_log_wouldlog(isc_lctx, log_level)) {
+ return;
+ };
+
+ isc_sockaddr_format(&sock->peer, client_sabuf, sizeof(client_sabuf));
+ isc_sockaddr_format(&sock->iface, local_sabuf, sizeof(local_sabuf));
+ isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL, ISC_LOGMODULE_NETMGR,
+ log_level, "TLS %s session %s for %s on %s",
+ SSL_is_server(tls) ? "server" : "client",
+ SSL_session_reused(tls) ? "resumed" : "created",
+ client_sabuf, local_sabuf);
+}
+
+#ifdef NETMGR_TRACE
+/*
+ * Dump all active sockets in netmgr. We output to stderr
+ * as the logger might be already shut down.
+ */
+
+static const char *
+nmsocket_type_totext(isc_nmsocket_type type) {
+ switch (type) {
+ case isc_nm_udpsocket:
+ return ("isc_nm_udpsocket");
+ case isc_nm_udplistener:
+ return ("isc_nm_udplistener");
+ case isc_nm_tcpsocket:
+ return ("isc_nm_tcpsocket");
+ case isc_nm_tcplistener:
+ return ("isc_nm_tcplistener");
+ case isc_nm_tcpdnslistener:
+ return ("isc_nm_tcpdnslistener");
+ case isc_nm_tcpdnssocket:
+ return ("isc_nm_tcpdnssocket");
+ case isc_nm_tlssocket:
+ return ("isc_nm_tlssocket");
+ case isc_nm_tlslistener:
+ return ("isc_nm_tlslistener");
+ case isc_nm_tlsdnslistener:
+ return ("isc_nm_tlsdnslistener");
+ case isc_nm_tlsdnssocket:
+ return ("isc_nm_tlsdnssocket");
+ case isc_nm_httplistener:
+ return ("isc_nm_httplistener");
+ case isc_nm_httpsocket:
+ return ("isc_nm_httpsocket");
+ default:
+ UNREACHABLE();
+ }
+}
+
+static void
+nmhandle_dump(isc_nmhandle_t *handle) {
+ fprintf(stderr, "Active handle %p, refs %" PRIuFAST32 "\n", handle,
+ isc_refcount_current(&handle->references));
+ fprintf(stderr, "Created by:\n");
+ isc_backtrace_symbols_fd(handle->backtrace, handle->backtrace_size,
+ STDERR_FILENO);
+ fprintf(stderr, "\n\n");
+}
+
+static void
+nmsocket_dump(isc_nmsocket_t *sock) {
+ isc_nmhandle_t *handle = NULL;
+
+ LOCK(&sock->lock);
+ fprintf(stderr, "\n=================\n");
+ fprintf(stderr, "Active %s socket %p, type %s, refs %" PRIuFAST32 "\n",
+ atomic_load(&sock->client) ? "client" : "server", sock,
+ nmsocket_type_totext(sock->type),
+ isc_refcount_current(&sock->references));
+ fprintf(stderr,
+ "Parent %p, listener %p, server %p, statichandle = "
+ "%p\n",
+ sock->parent, sock->listener, sock->server, sock->statichandle);
+ fprintf(stderr, "Flags:%s%s%s%s%s\n",
+ atomic_load(&sock->active) ? " active" : "",
+ atomic_load(&sock->closing) ? " closing" : "",
+ atomic_load(&sock->destroying) ? " destroying" : "",
+ atomic_load(&sock->connecting) ? " connecting" : "",
+ atomic_load(&sock->accepting) ? " accepting" : "");
+ fprintf(stderr, "Created by:\n");
+ isc_backtrace_symbols_fd(sock->backtrace, sock->backtrace_size,
+ STDERR_FILENO);
+ fprintf(stderr, "\n");
+
+ for (handle = ISC_LIST_HEAD(sock->active_handles); handle != NULL;
+ handle = ISC_LIST_NEXT(handle, active_link))
+ {
+ static bool first = true;
+ if (first) {
+ fprintf(stderr, "Active handles:\n");
+ first = false;
+ }
+ nmhandle_dump(handle);
+ }
+
+ fprintf(stderr, "\n");
+ UNLOCK(&sock->lock);
+}
+
+void
+isc__nm_dump_active(isc_nm_t *nm) {
+ isc_nmsocket_t *sock = NULL;
+
+ REQUIRE(VALID_NM(nm));
+
+ LOCK(&nm->lock);
+ for (sock = ISC_LIST_HEAD(nm->active_sockets); sock != NULL;
+ sock = ISC_LIST_NEXT(sock, active_link))
+ {
+ static bool first = true;
+ if (first) {
+ fprintf(stderr, "Outstanding sockets\n");
+ first = false;
+ }
+ nmsocket_dump(sock);
+ }
+ UNLOCK(&nm->lock);
+}
+#endif
diff --git a/lib/isc/netmgr/tcp.c b/lib/isc/netmgr/tcp.c
new file mode 100644
index 0000000..2a644fe
--- /dev/null
+++ b/lib/isc/netmgr/tcp.c
@@ -0,0 +1,1456 @@
+/*
+ * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
+ *
+ * SPDX-License-Identifier: MPL-2.0
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, you can obtain one at https://mozilla.org/MPL/2.0/.
+ *
+ * See the COPYRIGHT file distributed with this work for additional
+ * information regarding copyright ownership.
+ */
+
+#include <libgen.h>
+#include <unistd.h>
+#include <uv.h>
+
+#include <isc/atomic.h>
+#include <isc/barrier.h>
+#include <isc/buffer.h>
+#include <isc/condition.h>
+#include <isc/errno.h>
+#include <isc/log.h>
+#include <isc/magic.h>
+#include <isc/mem.h>
+#include <isc/netmgr.h>
+#include <isc/quota.h>
+#include <isc/random.h>
+#include <isc/refcount.h>
+#include <isc/region.h>
+#include <isc/result.h>
+#include <isc/sockaddr.h>
+#include <isc/stdtime.h>
+#include <isc/thread.h>
+#include <isc/util.h>
+
+#include "netmgr-int.h"
+#include "uv-compat.h"
+
+static atomic_uint_fast32_t last_tcpquota_log = 0;
+
+static bool
+can_log_tcp_quota(void) {
+ isc_stdtime_t now, last;
+
+ isc_stdtime_get(&now);
+ last = atomic_exchange_relaxed(&last_tcpquota_log, now);
+ if (now != last) {
+ return (true);
+ }
+
+ return (false);
+}
+
+static isc_result_t
+tcp_connect_direct(isc_nmsocket_t *sock, isc__nm_uvreq_t *req);
+
+static void
+tcp_close_direct(isc_nmsocket_t *sock);
+
+static isc_result_t
+tcp_send_direct(isc_nmsocket_t *sock, isc__nm_uvreq_t *req);
+static void
+tcp_connect_cb(uv_connect_t *uvreq, int status);
+
+static void
+tcp_connection_cb(uv_stream_t *server, int status);
+
+static void
+tcp_close_cb(uv_handle_t *uvhandle);
+
+static isc_result_t
+accept_connection(isc_nmsocket_t *ssock, isc_quota_t *quota);
+
+static void
+quota_accept_cb(isc_quota_t *quota, void *sock0);
+
+static void
+failed_accept_cb(isc_nmsocket_t *sock, isc_result_t eresult);
+
+static void
+stop_tcp_parent(isc_nmsocket_t *sock);
+static void
+stop_tcp_child(isc_nmsocket_t *sock);
+
+static void
+failed_accept_cb(isc_nmsocket_t *sock, isc_result_t eresult) {
+ REQUIRE(atomic_load(&sock->accepting));
+ REQUIRE(sock->server);
+
+ /*
+ * Detach the quota early to make room for other connections;
+ * otherwise it'd be detached later asynchronously, and clog
+ * the quota unnecessarily.
+ */
+ if (sock->quota != NULL) {
+ isc_quota_detach(&sock->quota);
+ }
+
+ isc__nmsocket_detach(&sock->server);
+
+ atomic_store(&sock->accepting, false);
+
+ switch (eresult) {
+ case ISC_R_NOTCONNECTED:
+ /* IGNORE: The client disconnected before we could accept */
+ break;
+ default:
+ isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL,
+ ISC_LOGMODULE_NETMGR, ISC_LOG_ERROR,
+ "Accepting TCP connection failed: %s",
+ isc_result_totext(eresult));
+ }
+}
+
+static isc_result_t
+tcp_connect_direct(isc_nmsocket_t *sock, isc__nm_uvreq_t *req) {
+ isc__networker_t *worker = NULL;
+ isc_result_t result = ISC_R_UNSET;
+ int r;
+
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(VALID_UVREQ(req));
+
+ REQUIRE(isc__nm_in_netthread());
+ REQUIRE(sock->tid == isc_nm_tid());
+
+ worker = &sock->mgr->workers[sock->tid];
+
+ atomic_store(&sock->connecting, true);
+
+ /* 2 minute timeout */
+ result = isc__nm_socket_connectiontimeout(sock->fd, 120 * 1000);
+ RUNTIME_CHECK(result == ISC_R_SUCCESS);
+
+ r = uv_tcp_init(&worker->loop, &sock->uv_handle.tcp);
+ UV_RUNTIME_CHECK(uv_tcp_init, r);
+ uv_handle_set_data(&sock->uv_handle.handle, sock);
+
+ r = uv_timer_init(&worker->loop, &sock->read_timer);
+ UV_RUNTIME_CHECK(uv_timer_init, r);
+ uv_handle_set_data((uv_handle_t *)&sock->read_timer, sock);
+
+ r = uv_tcp_open(&sock->uv_handle.tcp, sock->fd);
+ if (r != 0) {
+ isc__nm_closesocket(sock->fd);
+ isc__nm_incstats(sock, STATID_OPENFAIL);
+ goto done;
+ }
+ isc__nm_incstats(sock, STATID_OPEN);
+
+ if (req->local.length != 0) {
+ r = uv_tcp_bind(&sock->uv_handle.tcp, &req->local.type.sa, 0);
+ if (r != 0) {
+ isc__nm_incstats(sock, STATID_BINDFAIL);
+ goto done;
+ }
+ }
+
+ isc__nm_set_network_buffers(sock->mgr, &sock->uv_handle.handle);
+
+ uv_handle_set_data(&req->uv_req.handle, req);
+ r = uv_tcp_connect(&req->uv_req.connect, &sock->uv_handle.tcp,
+ &req->peer.type.sa, tcp_connect_cb);
+ if (r != 0) {
+ isc__nm_incstats(sock, STATID_CONNECTFAIL);
+ goto done;
+ }
+
+ uv_handle_set_data((uv_handle_t *)&sock->read_timer,
+ &req->uv_req.connect);
+ isc__nmsocket_timer_start(sock);
+
+ atomic_store(&sock->connected, true);
+
+done:
+ result = isc__nm_uverr2result(r);
+ LOCK(&sock->lock);
+ sock->result = result;
+ SIGNAL(&sock->cond);
+ if (!atomic_load(&sock->active)) {
+ WAIT(&sock->scond, &sock->lock);
+ }
+ INSIST(atomic_load(&sock->active));
+ UNLOCK(&sock->lock);
+
+ return (result);
+}
+
+void
+isc__nm_async_tcpconnect(isc__networker_t *worker, isc__netievent_t *ev0) {
+ isc__netievent_tcpconnect_t *ievent =
+ (isc__netievent_tcpconnect_t *)ev0;
+ isc_nmsocket_t *sock = ievent->sock;
+ isc__nm_uvreq_t *req = ievent->req;
+ isc_result_t result = ISC_R_SUCCESS;
+
+ UNUSED(worker);
+
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->type == isc_nm_tcpsocket);
+ REQUIRE(sock->parent == NULL);
+ REQUIRE(sock->tid == isc_nm_tid());
+
+ result = tcp_connect_direct(sock, req);
+ if (result != ISC_R_SUCCESS) {
+ atomic_store(&sock->active, false);
+ if (sock->fd != (uv_os_sock_t)(-1)) {
+ isc__nm_tcp_close(sock);
+ }
+ isc__nm_connectcb(sock, req, result, true);
+ }
+
+ /*
+ * The sock is now attached to the handle.
+ */
+ isc__nmsocket_detach(&sock);
+}
+
+static void
+tcp_connect_cb(uv_connect_t *uvreq, int status) {
+ isc_result_t result = ISC_R_UNSET;
+ isc__nm_uvreq_t *req = NULL;
+ isc_nmsocket_t *sock = uv_handle_get_data((uv_handle_t *)uvreq->handle);
+ struct sockaddr_storage ss;
+ int r;
+
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->tid == isc_nm_tid());
+
+ req = uv_handle_get_data((uv_handle_t *)uvreq);
+
+ REQUIRE(VALID_UVREQ(req));
+ REQUIRE(VALID_NMHANDLE(req->handle));
+
+ if (atomic_load(&sock->timedout)) {
+ result = ISC_R_TIMEDOUT;
+ goto error;
+ } else if (!atomic_load(&sock->connecting)) {
+ /*
+ * The connect was cancelled from timeout; just clean up
+ * the req.
+ */
+ isc__nm_uvreq_put(&req, sock);
+ return;
+ } else if (isc__nm_closing(sock)) {
+ /* Network manager shutting down */
+ result = ISC_R_SHUTTINGDOWN;
+ goto error;
+ } else if (isc__nmsocket_closing(sock)) {
+ /* Connection canceled */
+ result = ISC_R_CANCELED;
+ goto error;
+ } else if (status == UV_ETIMEDOUT) {
+ /* Timeout status code here indicates hard error */
+ result = ISC_R_TIMEDOUT;
+ goto error;
+ } else if (status == UV_EADDRINUSE) {
+ /*
+ * On FreeBSD the TCP connect() call sometimes results in a
+ * spurious transient EADDRINUSE. Try a few more times before
+ * giving up.
+ */
+ if (--req->connect_tries > 0) {
+ r = uv_tcp_connect(&req->uv_req.connect,
+ &sock->uv_handle.tcp,
+ &req->peer.type.sa, tcp_connect_cb);
+ if (r != 0) {
+ result = isc__nm_uverr2result(r);
+ goto error;
+ }
+ return;
+ }
+ result = isc__nm_uverr2result(status);
+ goto error;
+ } else if (status != 0) {
+ result = isc__nm_uverr2result(status);
+ goto error;
+ }
+
+ isc__nmsocket_timer_stop(sock);
+ uv_handle_set_data((uv_handle_t *)&sock->read_timer, sock);
+
+ isc__nm_incstats(sock, STATID_CONNECT);
+ r = uv_tcp_getpeername(&sock->uv_handle.tcp, (struct sockaddr *)&ss,
+ &(int){ sizeof(ss) });
+ if (r != 0) {
+ result = isc__nm_uverr2result(r);
+ goto error;
+ }
+
+ atomic_store(&sock->connecting, false);
+
+ result = isc_sockaddr_fromsockaddr(&sock->peer, (struct sockaddr *)&ss);
+ RUNTIME_CHECK(result == ISC_R_SUCCESS);
+
+ isc__nm_connectcb(sock, req, ISC_R_SUCCESS, false);
+
+ return;
+error:
+ isc__nm_failed_connect_cb(sock, req, result, false);
+}
+
+void
+isc_nm_tcpconnect(isc_nm_t *mgr, isc_sockaddr_t *local, isc_sockaddr_t *peer,
+ isc_nm_cb_t cb, void *cbarg, unsigned int timeout,
+ size_t extrahandlesize) {
+ isc_result_t result = ISC_R_SUCCESS;
+ isc_nmsocket_t *sock = NULL;
+ isc__netievent_tcpconnect_t *ievent = NULL;
+ isc__nm_uvreq_t *req = NULL;
+ sa_family_t sa_family;
+
+ REQUIRE(VALID_NM(mgr));
+ REQUIRE(local != NULL);
+ REQUIRE(peer != NULL);
+
+ sa_family = peer->type.sa.sa_family;
+
+ sock = isc_mem_get(mgr->mctx, sizeof(*sock));
+ isc__nmsocket_init(sock, mgr, isc_nm_tcpsocket, local);
+
+ sock->extrahandlesize = extrahandlesize;
+ sock->connect_timeout = timeout;
+ sock->result = ISC_R_UNSET;
+ sock->fd = (uv_os_sock_t)-1;
+ atomic_init(&sock->client, true);
+
+ req = isc__nm_uvreq_get(mgr, sock);
+ req->cb.connect = cb;
+ req->cbarg = cbarg;
+ req->peer = *peer;
+ req->local = *local;
+ req->handle = isc__nmhandle_get(sock, &req->peer, &sock->iface);
+
+ result = isc__nm_socket(sa_family, SOCK_STREAM, 0, &sock->fd);
+ if (result != ISC_R_SUCCESS) {
+ if (isc__nm_in_netthread()) {
+ sock->tid = isc_nm_tid();
+ isc__nmsocket_clearcb(sock);
+ isc__nm_connectcb(sock, req, result, false);
+ } else {
+ isc__nmsocket_clearcb(sock);
+ sock->tid = isc_random_uniform(mgr->nworkers);
+ isc__nm_connectcb(sock, req, result, true);
+ }
+ atomic_store(&sock->closed, true);
+ isc__nmsocket_detach(&sock);
+ return;
+ }
+
+ (void)isc__nm_socket_min_mtu(sock->fd, sa_family);
+ (void)isc__nm_socket_tcp_maxseg(sock->fd, NM_MAXSEG);
+
+ ievent = isc__nm_get_netievent_tcpconnect(mgr, sock, req);
+
+ if (isc__nm_in_netthread()) {
+ atomic_store(&sock->active, true);
+ sock->tid = isc_nm_tid();
+ isc__nm_async_tcpconnect(&mgr->workers[sock->tid],
+ (isc__netievent_t *)ievent);
+ isc__nm_put_netievent_tcpconnect(mgr, ievent);
+ } else {
+ atomic_init(&sock->active, false);
+ sock->tid = isc_random_uniform(mgr->nworkers);
+ isc__nm_enqueue_ievent(&mgr->workers[sock->tid],
+ (isc__netievent_t *)ievent);
+ }
+ LOCK(&sock->lock);
+ while (sock->result == ISC_R_UNSET) {
+ WAIT(&sock->cond, &sock->lock);
+ }
+ atomic_store(&sock->active, true);
+ BROADCAST(&sock->scond);
+ UNLOCK(&sock->lock);
+}
+
+static uv_os_sock_t
+isc__nm_tcp_lb_socket(isc_nm_t *mgr, sa_family_t sa_family) {
+ isc_result_t result;
+ uv_os_sock_t sock;
+
+ result = isc__nm_socket(sa_family, SOCK_STREAM, 0, &sock);
+ RUNTIME_CHECK(result == ISC_R_SUCCESS);
+
+ (void)isc__nm_socket_incoming_cpu(sock);
+ (void)isc__nm_socket_v6only(sock, sa_family);
+
+ /* FIXME: set mss */
+
+ result = isc__nm_socket_reuse(sock);
+ RUNTIME_CHECK(result == ISC_R_SUCCESS);
+
+ if (mgr->load_balance_sockets) {
+ result = isc__nm_socket_reuse_lb(sock);
+ RUNTIME_CHECK(result == ISC_R_SUCCESS);
+ }
+
+ return (sock);
+}
+
+static void
+start_tcp_child(isc_nm_t *mgr, isc_sockaddr_t *iface, isc_nmsocket_t *sock,
+ uv_os_sock_t fd, int tid) {
+ isc__netievent_tcplisten_t *ievent = NULL;
+ isc_nmsocket_t *csock = &sock->children[tid];
+
+ isc__nmsocket_init(csock, mgr, isc_nm_tcpsocket, iface);
+ csock->parent = sock;
+ csock->accept_cb = sock->accept_cb;
+ csock->accept_cbarg = sock->accept_cbarg;
+ csock->extrahandlesize = sock->extrahandlesize;
+ csock->backlog = sock->backlog;
+ csock->tid = tid;
+ /*
+ * We don't attach to quota, just assign - to avoid
+ * increasing quota unnecessarily.
+ */
+ csock->pquota = sock->pquota;
+ isc_quota_cb_init(&csock->quotacb, quota_accept_cb, csock);
+
+ if (mgr->load_balance_sockets) {
+ UNUSED(fd);
+ csock->fd = isc__nm_tcp_lb_socket(mgr,
+ iface->type.sa.sa_family);
+ } else {
+ csock->fd = dup(fd);
+ }
+ REQUIRE(csock->fd >= 0);
+
+ ievent = isc__nm_get_netievent_tcplisten(mgr, csock);
+ isc__nm_maybe_enqueue_ievent(&mgr->workers[tid],
+ (isc__netievent_t *)ievent);
+}
+
+static void
+enqueue_stoplistening(isc_nmsocket_t *sock) {
+ isc__netievent_tcpstop_t *ievent =
+ isc__nm_get_netievent_tcpstop(sock->mgr, sock);
+ isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
+ (isc__netievent_t *)ievent);
+}
+
+isc_result_t
+isc_nm_listentcp(isc_nm_t *mgr, isc_sockaddr_t *iface,
+ isc_nm_accept_cb_t accept_cb, void *accept_cbarg,
+ size_t extrahandlesize, int backlog, isc_quota_t *quota,
+ isc_nmsocket_t **sockp) {
+ isc_result_t result = ISC_R_SUCCESS;
+ isc_nmsocket_t *sock = NULL;
+ size_t children_size = 0;
+ uv_os_sock_t fd = -1;
+
+ REQUIRE(VALID_NM(mgr));
+
+ sock = isc_mem_get(mgr->mctx, sizeof(*sock));
+ isc__nmsocket_init(sock, mgr, isc_nm_tcplistener, iface);
+
+ atomic_init(&sock->rchildren, 0);
+ sock->nchildren = mgr->nworkers;
+ children_size = sock->nchildren * sizeof(sock->children[0]);
+ sock->children = isc_mem_get(mgr->mctx, children_size);
+ memset(sock->children, 0, children_size);
+
+ sock->result = ISC_R_UNSET;
+
+ sock->accept_cb = accept_cb;
+ sock->accept_cbarg = accept_cbarg;
+ sock->extrahandlesize = extrahandlesize;
+ sock->backlog = backlog;
+ sock->pquota = quota;
+
+ sock->tid = 0;
+ sock->fd = -1;
+
+ if (!mgr->load_balance_sockets) {
+ fd = isc__nm_tcp_lb_socket(mgr, iface->type.sa.sa_family);
+ }
+
+ isc_barrier_init(&sock->startlistening, sock->nchildren);
+
+ for (size_t i = 0; i < sock->nchildren; i++) {
+ if ((int)i == isc_nm_tid()) {
+ continue;
+ }
+ start_tcp_child(mgr, iface, sock, fd, i);
+ }
+
+ if (isc__nm_in_netthread()) {
+ start_tcp_child(mgr, iface, sock, fd, isc_nm_tid());
+ }
+
+ if (!mgr->load_balance_sockets) {
+ isc__nm_closesocket(fd);
+ }
+
+ LOCK(&sock->lock);
+ while (atomic_load(&sock->rchildren) != sock->nchildren) {
+ WAIT(&sock->cond, &sock->lock);
+ }
+ result = sock->result;
+ atomic_store(&sock->active, true);
+ UNLOCK(&sock->lock);
+
+ INSIST(result != ISC_R_UNSET);
+
+ if (result == ISC_R_SUCCESS) {
+ REQUIRE(atomic_load(&sock->rchildren) == sock->nchildren);
+ *sockp = sock;
+ } else {
+ atomic_store(&sock->active, false);
+ enqueue_stoplistening(sock);
+ isc_nmsocket_close(&sock);
+ }
+
+ return (result);
+}
+
+void
+isc__nm_async_tcplisten(isc__networker_t *worker, isc__netievent_t *ev0) {
+ isc__netievent_tcplisten_t *ievent = (isc__netievent_tcplisten_t *)ev0;
+ sa_family_t sa_family;
+ int r;
+ int flags = 0;
+ isc_nmsocket_t *sock = NULL;
+ isc_result_t result;
+ isc_nm_t *mgr;
+
+ REQUIRE(VALID_NMSOCK(ievent->sock));
+ REQUIRE(ievent->sock->tid == isc_nm_tid());
+ REQUIRE(VALID_NMSOCK(ievent->sock->parent));
+
+ sock = ievent->sock;
+ sa_family = sock->iface.type.sa.sa_family;
+ mgr = sock->mgr;
+
+ REQUIRE(sock->type == isc_nm_tcpsocket);
+ REQUIRE(sock->parent != NULL);
+ REQUIRE(sock->tid == isc_nm_tid());
+
+ (void)isc__nm_socket_min_mtu(sock->fd, sa_family);
+ (void)isc__nm_socket_tcp_maxseg(sock->fd, NM_MAXSEG);
+
+ r = uv_tcp_init(&worker->loop, &sock->uv_handle.tcp);
+ UV_RUNTIME_CHECK(uv_tcp_init, r);
+
+ uv_handle_set_data(&sock->uv_handle.handle, sock);
+ /* This keeps the socket alive after everything else is gone */
+ isc__nmsocket_attach(sock, &(isc_nmsocket_t *){ NULL });
+
+ r = uv_timer_init(&worker->loop, &sock->read_timer);
+ UV_RUNTIME_CHECK(uv_timer_init, r);
+ uv_handle_set_data((uv_handle_t *)&sock->read_timer, sock);
+
+ LOCK(&sock->parent->lock);
+
+ r = uv_tcp_open(&sock->uv_handle.tcp, sock->fd);
+ if (r < 0) {
+ isc__nm_closesocket(sock->fd);
+ isc__nm_incstats(sock, STATID_OPENFAIL);
+ goto done;
+ }
+ isc__nm_incstats(sock, STATID_OPEN);
+
+ if (sa_family == AF_INET6) {
+ flags = UV_TCP_IPV6ONLY;
+ }
+
+ if (mgr->load_balance_sockets) {
+ r = isc_uv_tcp_freebind(&sock->uv_handle.tcp,
+ &sock->iface.type.sa, flags);
+ if (r < 0) {
+ isc__nm_incstats(sock, STATID_BINDFAIL);
+ goto done;
+ }
+ } else {
+ if (sock->parent->fd == -1) {
+ r = isc_uv_tcp_freebind(&sock->uv_handle.tcp,
+ &sock->iface.type.sa, flags);
+ if (r < 0) {
+ isc__nm_incstats(sock, STATID_BINDFAIL);
+ goto done;
+ }
+ sock->parent->uv_handle.tcp.flags =
+ sock->uv_handle.tcp.flags;
+ sock->parent->fd = sock->fd;
+ } else {
+ /* The socket is already bound, just copy the flags */
+ sock->uv_handle.tcp.flags =
+ sock->parent->uv_handle.tcp.flags;
+ }
+ }
+
+ isc__nm_set_network_buffers(sock->mgr, &sock->uv_handle.handle);
+
+ /*
+ * The callback will run in the same thread uv_listen() was called
+ * from, so a race with tcp_connection_cb() isn't possible.
+ */
+ r = uv_listen((uv_stream_t *)&sock->uv_handle.tcp, sock->backlog,
+ tcp_connection_cb);
+ if (r != 0) {
+ isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL,
+ ISC_LOGMODULE_NETMGR, ISC_LOG_ERROR,
+ "uv_listen failed: %s",
+ isc_result_totext(isc__nm_uverr2result(r)));
+ isc__nm_incstats(sock, STATID_BINDFAIL);
+ goto done;
+ }
+
+ atomic_store(&sock->listening, true);
+
+done:
+ result = isc__nm_uverr2result(r);
+ if (result != ISC_R_SUCCESS) {
+ sock->pquota = NULL;
+ }
+
+ atomic_fetch_add(&sock->parent->rchildren, 1);
+ if (sock->parent->result == ISC_R_UNSET) {
+ sock->parent->result = result;
+ }
+ SIGNAL(&sock->parent->cond);
+ UNLOCK(&sock->parent->lock);
+
+ isc_barrier_wait(&sock->parent->startlistening);
+}
+
+static void
+tcp_connection_cb(uv_stream_t *server, int status) {
+ isc_nmsocket_t *ssock = uv_handle_get_data((uv_handle_t *)server);
+ isc_result_t result;
+ isc_quota_t *quota = NULL;
+
+ if (status != 0) {
+ result = isc__nm_uverr2result(status);
+ goto done;
+ }
+
+ REQUIRE(VALID_NMSOCK(ssock));
+ REQUIRE(ssock->tid == isc_nm_tid());
+
+ if (isc__nmsocket_closing(ssock)) {
+ result = ISC_R_CANCELED;
+ goto done;
+ }
+
+ if (ssock->pquota != NULL) {
+ result = isc_quota_attach_cb(ssock->pquota, &quota,
+ &ssock->quotacb);
+ if (result == ISC_R_QUOTA) {
+ isc__nm_incstats(ssock, STATID_ACCEPTFAIL);
+ goto done;
+ }
+ }
+
+ result = accept_connection(ssock, quota);
+done:
+ isc__nm_accept_connection_log(result, can_log_tcp_quota());
+}
+
+void
+isc__nm_tcp_stoplistening(isc_nmsocket_t *sock) {
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->type == isc_nm_tcplistener);
+
+ if (!atomic_compare_exchange_strong(&sock->closing, &(bool){ false },
+ true))
+ {
+ UNREACHABLE();
+ }
+
+ if (!isc__nm_in_netthread()) {
+ enqueue_stoplistening(sock);
+ } else {
+ stop_tcp_parent(sock);
+ }
+}
+
+void
+isc__nm_async_tcpstop(isc__networker_t *worker, isc__netievent_t *ev0) {
+ isc__netievent_tcpstop_t *ievent = (isc__netievent_tcpstop_t *)ev0;
+ isc_nmsocket_t *sock = ievent->sock;
+
+ UNUSED(worker);
+
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->tid == isc_nm_tid());
+
+ if (sock->parent != NULL) {
+ stop_tcp_child(sock);
+ return;
+ }
+
+ stop_tcp_parent(sock);
+}
+
+void
+isc__nm_tcp_failed_read_cb(isc_nmsocket_t *sock, isc_result_t result) {
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(result != ISC_R_SUCCESS);
+
+ isc__nmsocket_timer_stop(sock);
+ isc__nm_stop_reading(sock);
+
+ if (!sock->recv_read) {
+ goto destroy;
+ }
+ sock->recv_read = false;
+
+ if (sock->recv_cb != NULL) {
+ isc__nm_uvreq_t *req = isc__nm_get_read_req(sock, NULL);
+ isc__nmsocket_clearcb(sock);
+ isc__nm_readcb(sock, req, result);
+ }
+
+destroy:
+ isc__nmsocket_prep_destroy(sock);
+
+ /*
+ * We need to detach from quota after the read callback function had a
+ * chance to be executed.
+ */
+ if (sock->quota != NULL) {
+ isc_quota_detach(&sock->quota);
+ }
+}
+
+void
+isc__nm_tcp_read(isc_nmhandle_t *handle, isc_nm_recv_cb_t cb, void *cbarg) {
+ REQUIRE(VALID_NMHANDLE(handle));
+ REQUIRE(VALID_NMSOCK(handle->sock));
+
+ isc_nmsocket_t *sock = handle->sock;
+ isc__netievent_tcpstartread_t *ievent = NULL;
+
+ REQUIRE(sock->type == isc_nm_tcpsocket);
+ REQUIRE(sock->statichandle == handle);
+
+ sock->recv_cb = cb;
+ sock->recv_cbarg = cbarg;
+ sock->recv_read = true;
+ if (sock->read_timeout == 0) {
+ sock->read_timeout =
+ (atomic_load(&sock->keepalive)
+ ? atomic_load(&sock->mgr->keepalive)
+ : atomic_load(&sock->mgr->idle));
+ }
+
+ ievent = isc__nm_get_netievent_tcpstartread(sock->mgr, sock);
+
+ /*
+ * This MUST be done asynchronously, no matter which thread we're
+ * in. The callback function for isc_nm_read() often calls
+ * isc_nm_read() again; if we tried to do that synchronously
+ * we'd clash in processbuffer() and grow the stack indefinitely.
+ */
+ isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
+ (isc__netievent_t *)ievent);
+
+ return;
+}
+
+void
+isc__nm_async_tcpstartread(isc__networker_t *worker, isc__netievent_t *ev0) {
+ isc__netievent_tcpstartread_t *ievent =
+ (isc__netievent_tcpstartread_t *)ev0;
+ isc_nmsocket_t *sock = ievent->sock;
+ isc_result_t result;
+
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->tid == isc_nm_tid());
+ UNUSED(worker);
+
+ if (isc__nmsocket_closing(sock)) {
+ result = ISC_R_CANCELED;
+ } else {
+ result = isc__nm_start_reading(sock);
+ }
+
+ if (result != ISC_R_SUCCESS) {
+ atomic_store(&sock->reading, true);
+ isc__nm_tcp_failed_read_cb(sock, result);
+ return;
+ }
+
+ isc__nmsocket_timer_start(sock);
+}
+
+void
+isc__nm_tcp_pauseread(isc_nmhandle_t *handle) {
+ isc__netievent_tcppauseread_t *ievent = NULL;
+ isc_nmsocket_t *sock = NULL;
+
+ REQUIRE(VALID_NMHANDLE(handle));
+
+ sock = handle->sock;
+
+ REQUIRE(VALID_NMSOCK(sock));
+
+ if (!atomic_compare_exchange_strong(&sock->readpaused, &(bool){ false },
+ true))
+ {
+ return;
+ }
+
+ ievent = isc__nm_get_netievent_tcppauseread(sock->mgr, sock);
+
+ isc__nm_maybe_enqueue_ievent(&sock->mgr->workers[sock->tid],
+ (isc__netievent_t *)ievent);
+
+ return;
+}
+
+void
+isc__nm_async_tcppauseread(isc__networker_t *worker, isc__netievent_t *ev0) {
+ isc__netievent_tcppauseread_t *ievent =
+ (isc__netievent_tcppauseread_t *)ev0;
+ isc_nmsocket_t *sock = ievent->sock;
+
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->tid == isc_nm_tid());
+ UNUSED(worker);
+
+ isc__nmsocket_timer_stop(sock);
+ isc__nm_stop_reading(sock);
+}
+
+void
+isc__nm_tcp_resumeread(isc_nmhandle_t *handle) {
+ REQUIRE(VALID_NMHANDLE(handle));
+ REQUIRE(VALID_NMSOCK(handle->sock));
+
+ isc__netievent_tcpstartread_t *ievent = NULL;
+ isc_nmsocket_t *sock = handle->sock;
+
+ REQUIRE(sock->tid == isc_nm_tid());
+
+ if (sock->recv_cb == NULL) {
+ /* We are no longer reading */
+ return;
+ }
+
+ if (!isc__nmsocket_active(sock)) {
+ atomic_store(&sock->reading, true);
+ isc__nm_tcp_failed_read_cb(sock, ISC_R_CANCELED);
+ return;
+ }
+
+ if (!atomic_compare_exchange_strong(&sock->readpaused, &(bool){ true },
+ false))
+ {
+ return;
+ }
+
+ ievent = isc__nm_get_netievent_tcpstartread(sock->mgr, sock);
+
+ isc__nm_maybe_enqueue_ievent(&sock->mgr->workers[sock->tid],
+ (isc__netievent_t *)ievent);
+}
+
+void
+isc__nm_tcp_read_cb(uv_stream_t *stream, ssize_t nread, const uv_buf_t *buf) {
+ isc_nmsocket_t *sock = uv_handle_get_data((uv_handle_t *)stream);
+ isc__nm_uvreq_t *req = NULL;
+
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->tid == isc_nm_tid());
+ REQUIRE(atomic_load(&sock->reading));
+ REQUIRE(buf != NULL);
+
+ if (isc__nmsocket_closing(sock)) {
+ isc__nm_tcp_failed_read_cb(sock, ISC_R_CANCELED);
+ goto free;
+ }
+
+ if (nread < 0) {
+ if (nread != UV_EOF) {
+ isc__nm_incstats(sock, STATID_RECVFAIL);
+ }
+
+ isc__nm_tcp_failed_read_cb(sock, isc__nm_uverr2result(nread));
+
+ goto free;
+ }
+
+ req = isc__nm_get_read_req(sock, NULL);
+
+ /*
+ * The callback will be called synchronously because the
+ * result is ISC_R_SUCCESS, so we don't need to retain
+ * the buffer
+ */
+ req->uvbuf.base = buf->base;
+ req->uvbuf.len = nread;
+
+ if (!atomic_load(&sock->client)) {
+ sock->read_timeout =
+ (atomic_load(&sock->keepalive)
+ ? atomic_load(&sock->mgr->keepalive)
+ : atomic_load(&sock->mgr->idle));
+ }
+
+ isc__nm_readcb(sock, req, ISC_R_SUCCESS);
+
+ /* The readcb could have paused the reading */
+ if (atomic_load(&sock->reading)) {
+ /* The timer will be updated */
+ isc__nmsocket_timer_restart(sock);
+ }
+
+free:
+ if (nread < 0) {
+ /*
+ * The buffer may be a null buffer on error.
+ */
+ if (buf->base == NULL && buf->len == 0) {
+ return;
+ }
+ }
+
+ isc__nm_free_uvbuf(sock, buf);
+}
+
+static void
+quota_accept_cb(isc_quota_t *quota, void *sock0) {
+ isc_nmsocket_t *sock = (isc_nmsocket_t *)sock0;
+ isc__netievent_tcpaccept_t *ievent = NULL;
+
+ REQUIRE(VALID_NMSOCK(sock));
+
+ /*
+ * Create a tcpaccept event and pass it using the async channel.
+ */
+ ievent = isc__nm_get_netievent_tcpaccept(sock->mgr, sock, quota);
+ isc__nm_maybe_enqueue_ievent(&sock->mgr->workers[sock->tid],
+ (isc__netievent_t *)ievent);
+}
+
+/*
+ * This is called after we get a quota_accept_cb() callback.
+ */
+void
+isc__nm_async_tcpaccept(isc__networker_t *worker, isc__netievent_t *ev0) {
+ isc__netievent_tcpaccept_t *ievent = (isc__netievent_tcpaccept_t *)ev0;
+ isc_nmsocket_t *sock = ievent->sock;
+ isc_result_t result;
+
+ UNUSED(worker);
+
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->tid == isc_nm_tid());
+
+ result = accept_connection(sock, ievent->quota);
+ isc__nm_accept_connection_log(result, can_log_tcp_quota());
+}
+
+static isc_result_t
+accept_connection(isc_nmsocket_t *ssock, isc_quota_t *quota) {
+ isc_nmsocket_t *csock = NULL;
+ isc__networker_t *worker = NULL;
+ int r;
+ isc_result_t result;
+ struct sockaddr_storage ss;
+ isc_sockaddr_t local;
+ isc_nmhandle_t *handle = NULL;
+
+ REQUIRE(VALID_NMSOCK(ssock));
+ REQUIRE(ssock->tid == isc_nm_tid());
+
+ if (isc__nmsocket_closing(ssock)) {
+ if (quota != NULL) {
+ isc_quota_detach(&quota);
+ }
+ return (ISC_R_CANCELED);
+ }
+
+ csock = isc_mem_get(ssock->mgr->mctx, sizeof(isc_nmsocket_t));
+ isc__nmsocket_init(csock, ssock->mgr, isc_nm_tcpsocket, &ssock->iface);
+ csock->tid = ssock->tid;
+ csock->extrahandlesize = ssock->extrahandlesize;
+ isc__nmsocket_attach(ssock, &csock->server);
+ csock->recv_cb = ssock->recv_cb;
+ csock->recv_cbarg = ssock->recv_cbarg;
+ csock->quota = quota;
+ atomic_init(&csock->accepting, true);
+
+ worker = &csock->mgr->workers[isc_nm_tid()];
+
+ r = uv_tcp_init(&worker->loop, &csock->uv_handle.tcp);
+ UV_RUNTIME_CHECK(uv_tcp_init, r);
+ uv_handle_set_data(&csock->uv_handle.handle, csock);
+
+ r = uv_timer_init(&worker->loop, &csock->read_timer);
+ UV_RUNTIME_CHECK(uv_timer_init, r);
+ uv_handle_set_data((uv_handle_t *)&csock->read_timer, csock);
+
+ r = uv_accept(&ssock->uv_handle.stream, &csock->uv_handle.stream);
+ if (r != 0) {
+ result = isc__nm_uverr2result(r);
+ goto failure;
+ }
+
+ r = uv_tcp_getpeername(&csock->uv_handle.tcp, (struct sockaddr *)&ss,
+ &(int){ sizeof(ss) });
+ if (r != 0) {
+ result = isc__nm_uverr2result(r);
+ goto failure;
+ }
+
+ result = isc_sockaddr_fromsockaddr(&csock->peer,
+ (struct sockaddr *)&ss);
+ if (result != ISC_R_SUCCESS) {
+ goto failure;
+ }
+
+ r = uv_tcp_getsockname(&csock->uv_handle.tcp, (struct sockaddr *)&ss,
+ &(int){ sizeof(ss) });
+ if (r != 0) {
+ result = isc__nm_uverr2result(r);
+ goto failure;
+ }
+
+ result = isc_sockaddr_fromsockaddr(&local, (struct sockaddr *)&ss);
+ if (result != ISC_R_SUCCESS) {
+ goto failure;
+ }
+
+ handle = isc__nmhandle_get(csock, NULL, &local);
+
+ result = ssock->accept_cb(handle, ISC_R_SUCCESS, ssock->accept_cbarg);
+ if (result != ISC_R_SUCCESS) {
+ isc_nmhandle_detach(&handle);
+ goto failure;
+ }
+
+ atomic_store(&csock->accepting, false);
+
+ isc__nm_incstats(csock, STATID_ACCEPT);
+
+ csock->read_timeout = atomic_load(&csock->mgr->init);
+
+ atomic_fetch_add(&ssock->parent->active_child_connections, 1);
+
+ /*
+ * The acceptcb needs to attach to the handle if it wants to keep the
+ * connection alive
+ */
+ isc_nmhandle_detach(&handle);
+
+ /*
+ * sock is now attached to the handle.
+ */
+ isc__nmsocket_detach(&csock);
+
+ return (ISC_R_SUCCESS);
+
+failure:
+ atomic_store(&csock->active, false);
+
+ failed_accept_cb(csock, result);
+
+ isc__nmsocket_prep_destroy(csock);
+
+ isc__nmsocket_detach(&csock);
+
+ return (result);
+}
+
+void
+isc__nm_tcp_send(isc_nmhandle_t *handle, const isc_region_t *region,
+ isc_nm_cb_t cb, void *cbarg) {
+ REQUIRE(VALID_NMHANDLE(handle));
+ REQUIRE(VALID_NMSOCK(handle->sock));
+
+ isc_nmsocket_t *sock = handle->sock;
+ isc__netievent_tcpsend_t *ievent = NULL;
+ isc__nm_uvreq_t *uvreq = NULL;
+
+ REQUIRE(sock->type == isc_nm_tcpsocket);
+
+ uvreq = isc__nm_uvreq_get(sock->mgr, sock);
+ uvreq->uvbuf.base = (char *)region->base;
+ uvreq->uvbuf.len = region->length;
+
+ isc_nmhandle_attach(handle, &uvreq->handle);
+
+ uvreq->cb.send = cb;
+ uvreq->cbarg = cbarg;
+
+ ievent = isc__nm_get_netievent_tcpsend(sock->mgr, sock, uvreq);
+ isc__nm_maybe_enqueue_ievent(&sock->mgr->workers[sock->tid],
+ (isc__netievent_t *)ievent);
+
+ return;
+}
+
+static void
+tcp_send_cb(uv_write_t *req, int status) {
+ isc__nm_uvreq_t *uvreq = (isc__nm_uvreq_t *)req->data;
+ isc_nmsocket_t *sock = NULL;
+
+ REQUIRE(VALID_UVREQ(uvreq));
+ REQUIRE(VALID_NMSOCK(uvreq->sock));
+
+ sock = uvreq->sock;
+
+ isc_nm_timer_stop(uvreq->timer);
+ isc_nm_timer_detach(&uvreq->timer);
+
+ if (status < 0) {
+ isc__nm_incstats(sock, STATID_SENDFAIL);
+ isc__nm_failed_send_cb(sock, uvreq,
+ isc__nm_uverr2result(status));
+ return;
+ }
+
+ isc__nm_sendcb(sock, uvreq, ISC_R_SUCCESS, false);
+}
+
+/*
+ * Handle 'tcpsend' async event - send a packet on the socket
+ */
+void
+isc__nm_async_tcpsend(isc__networker_t *worker, isc__netievent_t *ev0) {
+ isc_result_t result;
+ isc__netievent_tcpsend_t *ievent = (isc__netievent_tcpsend_t *)ev0;
+ isc_nmsocket_t *sock = ievent->sock;
+ isc__nm_uvreq_t *uvreq = ievent->req;
+
+ REQUIRE(sock->type == isc_nm_tcpsocket);
+ REQUIRE(sock->tid == isc_nm_tid());
+ UNUSED(worker);
+
+ if (sock->write_timeout == 0) {
+ sock->write_timeout =
+ (atomic_load(&sock->keepalive)
+ ? atomic_load(&sock->mgr->keepalive)
+ : atomic_load(&sock->mgr->idle));
+ }
+
+ result = tcp_send_direct(sock, uvreq);
+ if (result != ISC_R_SUCCESS) {
+ isc__nm_incstats(sock, STATID_SENDFAIL);
+ isc__nm_failed_send_cb(sock, uvreq, result);
+ }
+}
+
+static isc_result_t
+tcp_send_direct(isc_nmsocket_t *sock, isc__nm_uvreq_t *req) {
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(VALID_UVREQ(req));
+ REQUIRE(sock->tid == isc_nm_tid());
+ REQUIRE(sock->type == isc_nm_tcpsocket);
+
+ int r;
+
+ if (isc__nmsocket_closing(sock)) {
+ return (ISC_R_CANCELED);
+ }
+
+ r = uv_write(&req->uv_req.write, &sock->uv_handle.stream, &req->uvbuf,
+ 1, tcp_send_cb);
+ if (r < 0) {
+ return (isc__nm_uverr2result(r));
+ }
+
+ isc_nm_timer_create(req->handle, isc__nmsocket_writetimeout_cb, req,
+ &req->timer);
+ if (sock->write_timeout > 0) {
+ isc_nm_timer_start(req->timer, sock->write_timeout);
+ }
+
+ return (ISC_R_SUCCESS);
+}
+
+static void
+tcp_stop_cb(uv_handle_t *handle) {
+ isc_nmsocket_t *sock = uv_handle_get_data(handle);
+ uv_handle_set_data(handle, NULL);
+
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->tid == isc_nm_tid());
+ REQUIRE(atomic_load(&sock->closing));
+
+ if (!atomic_compare_exchange_strong(&sock->closed, &(bool){ false },
+ true))
+ {
+ UNREACHABLE();
+ }
+
+ isc__nm_incstats(sock, STATID_CLOSE);
+
+ atomic_store(&sock->listening, false);
+
+ isc__nmsocket_detach(&sock);
+}
+
+static void
+tcp_close_sock(isc_nmsocket_t *sock) {
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->tid == isc_nm_tid());
+ REQUIRE(atomic_load(&sock->closing));
+
+ if (!atomic_compare_exchange_strong(&sock->closed, &(bool){ false },
+ true))
+ {
+ UNREACHABLE();
+ }
+
+ isc__nm_incstats(sock, STATID_CLOSE);
+
+ if (sock->server != NULL) {
+ isc__nmsocket_detach(&sock->server);
+ }
+
+ atomic_store(&sock->connected, false);
+
+ isc__nmsocket_prep_destroy(sock);
+}
+
+static void
+tcp_close_cb(uv_handle_t *handle) {
+ isc_nmsocket_t *sock = uv_handle_get_data(handle);
+ uv_handle_set_data(handle, NULL);
+
+ tcp_close_sock(sock);
+}
+
+static void
+read_timer_close_cb(uv_handle_t *handle) {
+ isc_nmsocket_t *sock = uv_handle_get_data(handle);
+ uv_handle_set_data(handle, NULL);
+
+ if (sock->parent) {
+ uv_close(&sock->uv_handle.handle, tcp_stop_cb);
+ } else if (uv_is_closing(&sock->uv_handle.handle)) {
+ tcp_close_sock(sock);
+ } else {
+ uv_close(&sock->uv_handle.handle, tcp_close_cb);
+ }
+}
+
+static void
+stop_tcp_child(isc_nmsocket_t *sock) {
+ REQUIRE(sock->type == isc_nm_tcpsocket);
+ REQUIRE(sock->tid == isc_nm_tid());
+
+ if (!atomic_compare_exchange_strong(&sock->closing, &(bool){ false },
+ true))
+ {
+ return;
+ }
+
+ tcp_close_direct(sock);
+
+ atomic_fetch_sub(&sock->parent->rchildren, 1);
+
+ isc_barrier_wait(&sock->parent->stoplistening);
+}
+
+static void
+stop_tcp_parent(isc_nmsocket_t *sock) {
+ isc_nmsocket_t *csock = NULL;
+
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->tid == isc_nm_tid());
+ REQUIRE(sock->type == isc_nm_tcplistener);
+
+ isc_barrier_init(&sock->stoplistening, sock->nchildren);
+
+ for (size_t i = 0; i < sock->nchildren; i++) {
+ csock = &sock->children[i];
+ REQUIRE(VALID_NMSOCK(csock));
+
+ if ((int)i == isc_nm_tid()) {
+ /*
+ * We need to schedule closing the other sockets first
+ */
+ continue;
+ }
+
+ atomic_store(&csock->active, false);
+ enqueue_stoplistening(csock);
+ }
+
+ csock = &sock->children[isc_nm_tid()];
+ atomic_store(&csock->active, false);
+ stop_tcp_child(csock);
+
+ atomic_store(&sock->closed, true);
+ isc__nmsocket_prep_destroy(sock);
+}
+
+static void
+tcp_close_direct(isc_nmsocket_t *sock) {
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->tid == isc_nm_tid());
+ REQUIRE(atomic_load(&sock->closing));
+
+ if (sock->server != NULL) {
+ REQUIRE(VALID_NMSOCK(sock->server));
+ REQUIRE(VALID_NMSOCK(sock->server->parent));
+ if (sock->server->parent != NULL) {
+ atomic_fetch_sub(
+ &sock->server->parent->active_child_connections,
+ 1);
+ }
+ }
+
+ if (sock->quota != NULL) {
+ isc_quota_detach(&sock->quota);
+ }
+
+ isc__nmsocket_timer_stop(sock);
+ isc__nm_stop_reading(sock);
+
+ uv_handle_set_data((uv_handle_t *)&sock->read_timer, sock);
+ uv_close((uv_handle_t *)&sock->read_timer, read_timer_close_cb);
+}
+
+void
+isc__nm_tcp_close(isc_nmsocket_t *sock) {
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->type == isc_nm_tcpsocket);
+ REQUIRE(!isc__nmsocket_active(sock));
+
+ if (!atomic_compare_exchange_strong(&sock->closing, &(bool){ false },
+ true))
+ {
+ return;
+ }
+
+ if (sock->tid == isc_nm_tid()) {
+ tcp_close_direct(sock);
+ } else {
+ /*
+ * We need to create an event and pass it using async channel
+ */
+ isc__netievent_tcpclose_t *ievent =
+ isc__nm_get_netievent_tcpclose(sock->mgr, sock);
+
+ isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
+ (isc__netievent_t *)ievent);
+ }
+}
+
+void
+isc__nm_async_tcpclose(isc__networker_t *worker, isc__netievent_t *ev0) {
+ isc__netievent_tcpclose_t *ievent = (isc__netievent_tcpclose_t *)ev0;
+ isc_nmsocket_t *sock = ievent->sock;
+
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->tid == isc_nm_tid());
+
+ UNUSED(worker);
+
+ tcp_close_direct(sock);
+}
+
+static void
+tcp_close_connect_cb(uv_handle_t *handle) {
+ isc_nmsocket_t *sock = uv_handle_get_data(handle);
+
+ REQUIRE(VALID_NMSOCK(sock));
+
+ REQUIRE(isc__nm_in_netthread());
+ REQUIRE(sock->tid == isc_nm_tid());
+
+ isc__nmsocket_prep_destroy(sock);
+ isc__nmsocket_detach(&sock);
+}
+
+void
+isc__nm_tcp_shutdown(isc_nmsocket_t *sock) {
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->tid == isc_nm_tid());
+ REQUIRE(sock->type == isc_nm_tcpsocket);
+
+ /*
+ * If the socket is active, mark it inactive and
+ * continue. If it isn't active, stop now.
+ */
+ if (!isc__nmsocket_deactivate(sock)) {
+ return;
+ }
+
+ if (atomic_load(&sock->accepting)) {
+ return;
+ }
+
+ if (atomic_load(&sock->connecting)) {
+ isc_nmsocket_t *tsock = NULL;
+ isc__nmsocket_attach(sock, &tsock);
+ uv_close(&sock->uv_handle.handle, tcp_close_connect_cb);
+ return;
+ }
+
+ if (sock->statichandle != NULL) {
+ if (isc__nm_closing(sock)) {
+ isc__nm_failed_read_cb(sock, ISC_R_SHUTTINGDOWN, false);
+ } else {
+ isc__nm_failed_read_cb(sock, ISC_R_CANCELED, false);
+ }
+ return;
+ }
+
+ /*
+ * Otherwise, we just send the socket to abyss...
+ */
+ if (sock->parent == NULL) {
+ isc__nmsocket_prep_destroy(sock);
+ }
+}
+
+void
+isc__nm_tcp_cancelread(isc_nmhandle_t *handle) {
+ isc_nmsocket_t *sock = NULL;
+ isc__netievent_tcpcancel_t *ievent = NULL;
+
+ REQUIRE(VALID_NMHANDLE(handle));
+
+ sock = handle->sock;
+
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->type == isc_nm_tcpsocket);
+
+ ievent = isc__nm_get_netievent_tcpcancel(sock->mgr, sock, handle);
+ isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
+ (isc__netievent_t *)ievent);
+}
+
+void
+isc__nm_async_tcpcancel(isc__networker_t *worker, isc__netievent_t *ev0) {
+ isc__netievent_tcpcancel_t *ievent = (isc__netievent_tcpcancel_t *)ev0;
+ isc_nmsocket_t *sock = ievent->sock;
+
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->tid == isc_nm_tid());
+ UNUSED(worker);
+
+ uv_timer_stop(&sock->read_timer);
+
+ isc__nm_tcp_failed_read_cb(sock, ISC_R_EOF);
+}
+
+int_fast32_t
+isc__nm_tcp_listener_nactive(isc_nmsocket_t *listener) {
+ int_fast32_t nactive;
+
+ REQUIRE(VALID_NMSOCK(listener));
+
+ nactive = atomic_load(&listener->active_child_connections);
+ INSIST(nactive >= 0);
+ return (nactive);
+}
diff --git a/lib/isc/netmgr/tcpdns.c b/lib/isc/netmgr/tcpdns.c
new file mode 100644
index 0000000..eda6aa6
--- /dev/null
+++ b/lib/isc/netmgr/tcpdns.c
@@ -0,0 +1,1500 @@
+/*
+ * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
+ *
+ * SPDX-License-Identifier: MPL-2.0
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, you can obtain one at https://mozilla.org/MPL/2.0/.
+ *
+ * See the COPYRIGHT file distributed with this work for additional
+ * information regarding copyright ownership.
+ */
+
+#include <libgen.h>
+#include <unistd.h>
+#include <uv.h>
+
+#include <isc/atomic.h>
+#include <isc/barrier.h>
+#include <isc/buffer.h>
+#include <isc/condition.h>
+#include <isc/errno.h>
+#include <isc/log.h>
+#include <isc/magic.h>
+#include <isc/mem.h>
+#include <isc/netmgr.h>
+#include <isc/quota.h>
+#include <isc/random.h>
+#include <isc/refcount.h>
+#include <isc/region.h>
+#include <isc/result.h>
+#include <isc/sockaddr.h>
+#include <isc/stdtime.h>
+#include <isc/thread.h>
+#include <isc/util.h>
+
+#include "netmgr-int.h"
+#include "uv-compat.h"
+
+static atomic_uint_fast32_t last_tcpdnsquota_log = 0;
+
+static bool
+can_log_tcpdns_quota(void) {
+ isc_stdtime_t now, last;
+
+ isc_stdtime_get(&now);
+ last = atomic_exchange_relaxed(&last_tcpdnsquota_log, now);
+ if (now != last) {
+ return (true);
+ }
+
+ return (false);
+}
+
+static isc_result_t
+tcpdns_connect_direct(isc_nmsocket_t *sock, isc__nm_uvreq_t *req);
+
+static void
+tcpdns_close_direct(isc_nmsocket_t *sock);
+
+static void
+tcpdns_connect_cb(uv_connect_t *uvreq, int status);
+
+static void
+tcpdns_connection_cb(uv_stream_t *server, int status);
+
+static void
+tcpdns_close_cb(uv_handle_t *uvhandle);
+
+static isc_result_t
+accept_connection(isc_nmsocket_t *ssock, isc_quota_t *quota);
+
+static void
+quota_accept_cb(isc_quota_t *quota, void *sock0);
+
+static void
+stop_tcpdns_parent(isc_nmsocket_t *sock);
+static void
+stop_tcpdns_child(isc_nmsocket_t *sock);
+
+static isc_result_t
+tcpdns_connect_direct(isc_nmsocket_t *sock, isc__nm_uvreq_t *req) {
+ isc__networker_t *worker = NULL;
+ isc_result_t result = ISC_R_UNSET;
+ int r;
+
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(VALID_UVREQ(req));
+
+ REQUIRE(isc__nm_in_netthread());
+ REQUIRE(sock->tid == isc_nm_tid());
+
+ worker = &sock->mgr->workers[sock->tid];
+
+ atomic_store(&sock->connecting, true);
+
+ r = uv_tcp_init(&worker->loop, &sock->uv_handle.tcp);
+ UV_RUNTIME_CHECK(uv_tcp_init, r);
+ uv_handle_set_data(&sock->uv_handle.handle, sock);
+
+ r = uv_timer_init(&worker->loop, &sock->read_timer);
+ UV_RUNTIME_CHECK(uv_timer_init, r);
+ uv_handle_set_data((uv_handle_t *)&sock->read_timer, sock);
+
+ if (isc__nm_closing(sock)) {
+ result = ISC_R_SHUTTINGDOWN;
+ goto error;
+ }
+
+ r = uv_tcp_open(&sock->uv_handle.tcp, sock->fd);
+ if (r != 0) {
+ isc__nm_closesocket(sock->fd);
+ isc__nm_incstats(sock, STATID_OPENFAIL);
+ goto done;
+ }
+ isc__nm_incstats(sock, STATID_OPEN);
+
+ if (req->local.length != 0) {
+ r = uv_tcp_bind(&sock->uv_handle.tcp, &req->local.type.sa, 0);
+ /*
+ * In case of shared socket UV_EINVAL will be returned and needs
+ * to be ignored
+ */
+ if (r != 0 && r != UV_EINVAL) {
+ isc__nm_incstats(sock, STATID_BINDFAIL);
+ goto done;
+ }
+ }
+
+ isc__nm_set_network_buffers(sock->mgr, &sock->uv_handle.handle);
+
+ uv_handle_set_data(&req->uv_req.handle, req);
+ r = uv_tcp_connect(&req->uv_req.connect, &sock->uv_handle.tcp,
+ &req->peer.type.sa, tcpdns_connect_cb);
+ if (r != 0) {
+ isc__nm_incstats(sock, STATID_CONNECTFAIL);
+ goto done;
+ }
+
+ uv_handle_set_data((uv_handle_t *)&sock->read_timer,
+ &req->uv_req.connect);
+ isc__nmsocket_timer_start(sock);
+
+ atomic_store(&sock->connected, true);
+
+done:
+ result = isc__nm_uverr2result(r);
+error:
+ LOCK(&sock->lock);
+ sock->result = result;
+ SIGNAL(&sock->cond);
+ if (!atomic_load(&sock->active)) {
+ WAIT(&sock->scond, &sock->lock);
+ }
+ INSIST(atomic_load(&sock->active));
+ UNLOCK(&sock->lock);
+
+ return (result);
+}
+
+void
+isc__nm_async_tcpdnsconnect(isc__networker_t *worker, isc__netievent_t *ev0) {
+ isc__netievent_tcpdnsconnect_t *ievent =
+ (isc__netievent_tcpdnsconnect_t *)ev0;
+ isc_nmsocket_t *sock = ievent->sock;
+ isc__nm_uvreq_t *req = ievent->req;
+ isc_result_t result = ISC_R_SUCCESS;
+
+ UNUSED(worker);
+
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->type == isc_nm_tcpdnssocket);
+ REQUIRE(sock->parent == NULL);
+ REQUIRE(sock->tid == isc_nm_tid());
+
+ result = tcpdns_connect_direct(sock, req);
+ if (result != ISC_R_SUCCESS) {
+ isc__nmsocket_clearcb(sock);
+ isc__nm_connectcb(sock, req, result, true);
+ atomic_store(&sock->active, false);
+ isc__nm_tcpdns_close(sock);
+ }
+
+ /*
+ * The sock is now attached to the handle.
+ */
+ isc__nmsocket_detach(&sock);
+}
+
+static void
+tcpdns_connect_cb(uv_connect_t *uvreq, int status) {
+ isc_result_t result = ISC_R_UNSET;
+ isc__nm_uvreq_t *req = NULL;
+ isc_nmsocket_t *sock = uv_handle_get_data((uv_handle_t *)uvreq->handle);
+ struct sockaddr_storage ss;
+ int r;
+
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->tid == isc_nm_tid());
+
+ req = uv_handle_get_data((uv_handle_t *)uvreq);
+
+ REQUIRE(VALID_UVREQ(req));
+ REQUIRE(VALID_NMHANDLE(req->handle));
+
+ if (atomic_load(&sock->timedout)) {
+ result = ISC_R_TIMEDOUT;
+ goto error;
+ } else if (isc__nm_closing(sock)) {
+ /* Network manager shutting down */
+ result = ISC_R_SHUTTINGDOWN;
+ goto error;
+ } else if (isc__nmsocket_closing(sock)) {
+ /* Connection canceled */
+ result = ISC_R_CANCELED;
+ goto error;
+ } else if (status == UV_ETIMEDOUT) {
+ /* Timeout status code here indicates hard error */
+ result = ISC_R_TIMEDOUT;
+ goto error;
+ } else if (status == UV_EADDRINUSE) {
+ /*
+ * On FreeBSD the TCP connect() call sometimes results in a
+ * spurious transient EADDRINUSE. Try a few more times before
+ * giving up.
+ */
+ if (--req->connect_tries > 0) {
+ r = uv_tcp_connect(
+ &req->uv_req.connect, &sock->uv_handle.tcp,
+ &req->peer.type.sa, tcpdns_connect_cb);
+ if (r != 0) {
+ result = isc__nm_uverr2result(r);
+ goto error;
+ }
+ return;
+ }
+ result = isc__nm_uverr2result(status);
+ goto error;
+ } else if (status != 0) {
+ result = isc__nm_uverr2result(status);
+ goto error;
+ }
+
+ isc__nmsocket_timer_stop(sock);
+ uv_handle_set_data((uv_handle_t *)&sock->read_timer, sock);
+
+ isc__nm_incstats(sock, STATID_CONNECT);
+ r = uv_tcp_getpeername(&sock->uv_handle.tcp, (struct sockaddr *)&ss,
+ &(int){ sizeof(ss) });
+ if (r != 0) {
+ result = isc__nm_uverr2result(r);
+ goto error;
+ }
+
+ atomic_store(&sock->connecting, false);
+
+ result = isc_sockaddr_fromsockaddr(&sock->peer, (struct sockaddr *)&ss);
+ RUNTIME_CHECK(result == ISC_R_SUCCESS);
+
+ isc__nm_connectcb(sock, req, ISC_R_SUCCESS, false);
+
+ return;
+error:
+ isc__nm_failed_connect_cb(sock, req, result, false);
+}
+
+void
+isc_nm_tcpdnsconnect(isc_nm_t *mgr, isc_sockaddr_t *local, isc_sockaddr_t *peer,
+ isc_nm_cb_t cb, void *cbarg, unsigned int timeout,
+ size_t extrahandlesize) {
+ isc_result_t result = ISC_R_SUCCESS;
+ isc_nmsocket_t *sock = NULL;
+ isc__netievent_tcpdnsconnect_t *ievent = NULL;
+ isc__nm_uvreq_t *req = NULL;
+ sa_family_t sa_family;
+
+ REQUIRE(VALID_NM(mgr));
+ REQUIRE(local != NULL);
+ REQUIRE(peer != NULL);
+
+ sa_family = peer->type.sa.sa_family;
+
+ sock = isc_mem_get(mgr->mctx, sizeof(*sock));
+ isc__nmsocket_init(sock, mgr, isc_nm_tcpdnssocket, local);
+
+ sock->extrahandlesize = extrahandlesize;
+ sock->connect_timeout = timeout;
+ sock->result = ISC_R_UNSET;
+ atomic_init(&sock->client, true);
+
+ req = isc__nm_uvreq_get(mgr, sock);
+ req->cb.connect = cb;
+ req->cbarg = cbarg;
+ req->peer = *peer;
+ req->local = *local;
+ req->handle = isc__nmhandle_get(sock, &req->peer, &sock->iface);
+
+ result = isc__nm_socket(sa_family, SOCK_STREAM, 0, &sock->fd);
+ if (result != ISC_R_SUCCESS) {
+ if (isc__nm_in_netthread()) {
+ sock->tid = isc_nm_tid();
+ }
+ isc__nmsocket_clearcb(sock);
+ isc__nm_connectcb(sock, req, result, true);
+ atomic_store(&sock->closed, true);
+ isc__nmsocket_detach(&sock);
+ return;
+ }
+
+ (void)isc__nm_socket_min_mtu(sock->fd, sa_family);
+ (void)isc__nm_socket_tcp_maxseg(sock->fd, NM_MAXSEG);
+
+ /* 2 minute timeout */
+ result = isc__nm_socket_connectiontimeout(sock->fd, 120 * 1000);
+ RUNTIME_CHECK(result == ISC_R_SUCCESS);
+
+ ievent = isc__nm_get_netievent_tcpdnsconnect(mgr, sock, req);
+
+ if (isc__nm_in_netthread()) {
+ atomic_store(&sock->active, true);
+ sock->tid = isc_nm_tid();
+ isc__nm_async_tcpdnsconnect(&mgr->workers[sock->tid],
+ (isc__netievent_t *)ievent);
+ isc__nm_put_netievent_tcpdnsconnect(mgr, ievent);
+ } else {
+ atomic_init(&sock->active, false);
+ sock->tid = isc_random_uniform(mgr->nworkers);
+ isc__nm_enqueue_ievent(&mgr->workers[sock->tid],
+ (isc__netievent_t *)ievent);
+ }
+
+ LOCK(&sock->lock);
+ while (sock->result == ISC_R_UNSET) {
+ WAIT(&sock->cond, &sock->lock);
+ }
+ atomic_store(&sock->active, true);
+ BROADCAST(&sock->scond);
+ UNLOCK(&sock->lock);
+}
+
+static uv_os_sock_t
+isc__nm_tcpdns_lb_socket(isc_nm_t *mgr, sa_family_t sa_family) {
+ isc_result_t result;
+ uv_os_sock_t sock;
+
+ result = isc__nm_socket(sa_family, SOCK_STREAM, 0, &sock);
+ RUNTIME_CHECK(result == ISC_R_SUCCESS);
+
+ (void)isc__nm_socket_incoming_cpu(sock);
+ (void)isc__nm_socket_v6only(sock, sa_family);
+
+ /* FIXME: set mss */
+
+ result = isc__nm_socket_reuse(sock);
+ RUNTIME_CHECK(result == ISC_R_SUCCESS);
+
+ if (mgr->load_balance_sockets) {
+ result = isc__nm_socket_reuse_lb(sock);
+ RUNTIME_CHECK(result == ISC_R_SUCCESS);
+ }
+
+ return (sock);
+}
+
+static void
+enqueue_stoplistening(isc_nmsocket_t *sock) {
+ isc__netievent_tcpdnsstop_t *ievent =
+ isc__nm_get_netievent_tcpdnsstop(sock->mgr, sock);
+ isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
+ (isc__netievent_t *)ievent);
+}
+
+static void
+start_tcpdns_child(isc_nm_t *mgr, isc_sockaddr_t *iface, isc_nmsocket_t *sock,
+ uv_os_sock_t fd, int tid) {
+ isc__netievent_tcpdnslisten_t *ievent = NULL;
+ isc_nmsocket_t *csock = &sock->children[tid];
+
+ isc__nmsocket_init(csock, mgr, isc_nm_tcpdnssocket, iface);
+ csock->parent = sock;
+ csock->accept_cb = sock->accept_cb;
+ csock->accept_cbarg = sock->accept_cbarg;
+ csock->recv_cb = sock->recv_cb;
+ csock->recv_cbarg = sock->recv_cbarg;
+ csock->extrahandlesize = sock->extrahandlesize;
+ csock->backlog = sock->backlog;
+ csock->tid = tid;
+ /*
+ * We don't attach to quota, just assign - to avoid
+ * increasing quota unnecessarily.
+ */
+ csock->pquota = sock->pquota;
+ isc_quota_cb_init(&csock->quotacb, quota_accept_cb, csock);
+
+ if (mgr->load_balance_sockets) {
+ UNUSED(fd);
+ csock->fd = isc__nm_tcpdns_lb_socket(mgr,
+ iface->type.sa.sa_family);
+ } else {
+ csock->fd = dup(fd);
+ }
+ REQUIRE(csock->fd >= 0);
+
+ ievent = isc__nm_get_netievent_tcpdnslisten(mgr, csock);
+ isc__nm_maybe_enqueue_ievent(&mgr->workers[tid],
+ (isc__netievent_t *)ievent);
+}
+isc_result_t
+isc_nm_listentcpdns(isc_nm_t *mgr, isc_sockaddr_t *iface,
+ isc_nm_recv_cb_t recv_cb, void *recv_cbarg,
+ isc_nm_accept_cb_t accept_cb, void *accept_cbarg,
+ size_t extrahandlesize, int backlog, isc_quota_t *quota,
+ isc_nmsocket_t **sockp) {
+ isc_result_t result = ISC_R_SUCCESS;
+ isc_nmsocket_t *sock = NULL;
+ size_t children_size = 0;
+ uv_os_sock_t fd = -1;
+
+ REQUIRE(VALID_NM(mgr));
+
+ sock = isc_mem_get(mgr->mctx, sizeof(*sock));
+ isc__nmsocket_init(sock, mgr, isc_nm_tcpdnslistener, iface);
+
+ atomic_init(&sock->rchildren, 0);
+ sock->nchildren = mgr->nworkers;
+ children_size = sock->nchildren * sizeof(sock->children[0]);
+ sock->children = isc_mem_get(mgr->mctx, children_size);
+ memset(sock->children, 0, children_size);
+
+ sock->result = ISC_R_UNSET;
+ sock->accept_cb = accept_cb;
+ sock->accept_cbarg = accept_cbarg;
+ sock->recv_cb = recv_cb;
+ sock->recv_cbarg = recv_cbarg;
+ sock->extrahandlesize = extrahandlesize;
+ sock->backlog = backlog;
+ sock->pquota = quota;
+
+ sock->tid = 0;
+ sock->fd = -1;
+
+ if (!mgr->load_balance_sockets) {
+ fd = isc__nm_tcpdns_lb_socket(mgr, iface->type.sa.sa_family);
+ }
+
+ isc_barrier_init(&sock->startlistening, sock->nchildren);
+
+ for (size_t i = 0; i < sock->nchildren; i++) {
+ if ((int)i == isc_nm_tid()) {
+ continue;
+ }
+ start_tcpdns_child(mgr, iface, sock, fd, i);
+ }
+
+ if (isc__nm_in_netthread()) {
+ start_tcpdns_child(mgr, iface, sock, fd, isc_nm_tid());
+ }
+
+ if (!mgr->load_balance_sockets) {
+ isc__nm_closesocket(fd);
+ }
+
+ LOCK(&sock->lock);
+ while (atomic_load(&sock->rchildren) != sock->nchildren) {
+ WAIT(&sock->cond, &sock->lock);
+ }
+ result = sock->result;
+ atomic_store(&sock->active, true);
+ UNLOCK(&sock->lock);
+
+ INSIST(result != ISC_R_UNSET);
+
+ if (result == ISC_R_SUCCESS) {
+ REQUIRE(atomic_load(&sock->rchildren) == sock->nchildren);
+ *sockp = sock;
+ } else {
+ atomic_store(&sock->active, false);
+ enqueue_stoplistening(sock);
+ isc_nmsocket_close(&sock);
+ }
+
+ return (result);
+}
+
+void
+isc__nm_async_tcpdnslisten(isc__networker_t *worker, isc__netievent_t *ev0) {
+ isc__netievent_tcpdnslisten_t *ievent =
+ (isc__netievent_tcpdnslisten_t *)ev0;
+ sa_family_t sa_family;
+ int r;
+ int flags = 0;
+ isc_nmsocket_t *sock = NULL;
+ isc_result_t result = ISC_R_UNSET;
+ isc_nm_t *mgr = NULL;
+
+ REQUIRE(VALID_NMSOCK(ievent->sock));
+ REQUIRE(ievent->sock->tid == isc_nm_tid());
+ REQUIRE(VALID_NMSOCK(ievent->sock->parent));
+
+ sock = ievent->sock;
+ sa_family = sock->iface.type.sa.sa_family;
+ mgr = sock->mgr;
+
+ REQUIRE(sock->type == isc_nm_tcpdnssocket);
+ REQUIRE(sock->parent != NULL);
+ REQUIRE(sock->tid == isc_nm_tid());
+
+ (void)isc__nm_socket_min_mtu(sock->fd, sa_family);
+ (void)isc__nm_socket_tcp_maxseg(sock->fd, NM_MAXSEG);
+
+ r = uv_tcp_init(&worker->loop, &sock->uv_handle.tcp);
+ UV_RUNTIME_CHECK(uv_tcp_init, r);
+ uv_handle_set_data(&sock->uv_handle.handle, sock);
+ /* This keeps the socket alive after everything else is gone */
+ isc__nmsocket_attach(sock, &(isc_nmsocket_t *){ NULL });
+
+ r = uv_timer_init(&worker->loop, &sock->read_timer);
+ UV_RUNTIME_CHECK(uv_timer_init, r);
+ uv_handle_set_data((uv_handle_t *)&sock->read_timer, sock);
+
+ LOCK(&sock->parent->lock);
+
+ r = uv_tcp_open(&sock->uv_handle.tcp, sock->fd);
+ if (r < 0) {
+ isc__nm_closesocket(sock->fd);
+ isc__nm_incstats(sock, STATID_OPENFAIL);
+ goto done;
+ }
+ isc__nm_incstats(sock, STATID_OPEN);
+
+ if (sa_family == AF_INET6) {
+ flags = UV_TCP_IPV6ONLY;
+ }
+
+ if (mgr->load_balance_sockets) {
+ r = isc_uv_tcp_freebind(&sock->uv_handle.tcp,
+ &sock->iface.type.sa, flags);
+ if (r < 0) {
+ isc__nm_incstats(sock, STATID_BINDFAIL);
+ goto done;
+ }
+ } else {
+ if (sock->parent->fd == -1) {
+ r = isc_uv_tcp_freebind(&sock->uv_handle.tcp,
+ &sock->iface.type.sa, flags);
+ if (r < 0) {
+ isc__nm_incstats(sock, STATID_BINDFAIL);
+ goto done;
+ }
+ sock->parent->uv_handle.tcp.flags =
+ sock->uv_handle.tcp.flags;
+ sock->parent->fd = sock->fd;
+ } else {
+ /* The socket is already bound, just copy the flags */
+ sock->uv_handle.tcp.flags =
+ sock->parent->uv_handle.tcp.flags;
+ }
+ }
+
+ isc__nm_set_network_buffers(sock->mgr, &sock->uv_handle.handle);
+
+ /*
+ * The callback will run in the same thread uv_listen() was called
+ * from, so a race with tcpdns_connection_cb() isn't possible.
+ */
+ r = uv_listen((uv_stream_t *)&sock->uv_handle.tcp, sock->backlog,
+ tcpdns_connection_cb);
+ if (r != 0) {
+ isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL,
+ ISC_LOGMODULE_NETMGR, ISC_LOG_ERROR,
+ "uv_listen failed: %s",
+ isc_result_totext(isc__nm_uverr2result(r)));
+ isc__nm_incstats(sock, STATID_BINDFAIL);
+ goto done;
+ }
+
+ atomic_store(&sock->listening, true);
+
+done:
+ result = isc__nm_uverr2result(r);
+ if (result != ISC_R_SUCCESS) {
+ sock->pquota = NULL;
+ }
+
+ atomic_fetch_add(&sock->parent->rchildren, 1);
+ if (sock->parent->result == ISC_R_UNSET) {
+ sock->parent->result = result;
+ }
+ SIGNAL(&sock->parent->cond);
+ UNLOCK(&sock->parent->lock);
+
+ isc_barrier_wait(&sock->parent->startlistening);
+}
+
+static void
+tcpdns_connection_cb(uv_stream_t *server, int status) {
+ isc_nmsocket_t *ssock = uv_handle_get_data((uv_handle_t *)server);
+ isc_result_t result;
+ isc_quota_t *quota = NULL;
+
+ if (status != 0) {
+ result = isc__nm_uverr2result(status);
+ goto done;
+ }
+
+ REQUIRE(VALID_NMSOCK(ssock));
+ REQUIRE(ssock->tid == isc_nm_tid());
+
+ if (isc__nmsocket_closing(ssock)) {
+ result = ISC_R_CANCELED;
+ goto done;
+ }
+
+ if (ssock->pquota != NULL) {
+ result = isc_quota_attach_cb(ssock->pquota, &quota,
+ &ssock->quotacb);
+ if (result == ISC_R_QUOTA) {
+ isc__nm_incstats(ssock, STATID_ACCEPTFAIL);
+ goto done;
+ }
+ }
+
+ result = accept_connection(ssock, quota);
+done:
+ isc__nm_accept_connection_log(result, can_log_tcpdns_quota());
+}
+
+void
+isc__nm_tcpdns_stoplistening(isc_nmsocket_t *sock) {
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->type == isc_nm_tcpdnslistener);
+
+ if (!atomic_compare_exchange_strong(&sock->closing, &(bool){ false },
+ true))
+ {
+ UNREACHABLE();
+ }
+
+ if (!isc__nm_in_netthread()) {
+ enqueue_stoplistening(sock);
+ } else {
+ stop_tcpdns_parent(sock);
+ }
+}
+
+void
+isc__nm_async_tcpdnsstop(isc__networker_t *worker, isc__netievent_t *ev0) {
+ isc__netievent_tcpdnsstop_t *ievent =
+ (isc__netievent_tcpdnsstop_t *)ev0;
+ isc_nmsocket_t *sock = ievent->sock;
+
+ UNUSED(worker);
+
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->tid == isc_nm_tid());
+
+ if (sock->parent != NULL) {
+ stop_tcpdns_child(sock);
+ return;
+ }
+
+ stop_tcpdns_parent(sock);
+}
+
+void
+isc__nm_tcpdns_failed_read_cb(isc_nmsocket_t *sock, isc_result_t result) {
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(result != ISC_R_SUCCESS);
+
+ isc__nmsocket_timer_stop(sock);
+ isc__nm_stop_reading(sock);
+
+ if (!sock->recv_read) {
+ goto destroy;
+ }
+ sock->recv_read = false;
+
+ if (sock->recv_cb != NULL) {
+ isc__nm_uvreq_t *req = isc__nm_get_read_req(sock, NULL);
+ isc__nmsocket_clearcb(sock);
+ isc__nm_readcb(sock, req, result);
+ }
+
+destroy:
+ isc__nmsocket_prep_destroy(sock);
+
+ /*
+ * We need to detach from quota after the read callback function had a
+ * chance to be executed.
+ */
+ if (sock->quota != NULL) {
+ isc_quota_detach(&sock->quota);
+ }
+}
+
+void
+isc__nm_tcpdns_read(isc_nmhandle_t *handle, isc_nm_recv_cb_t cb, void *cbarg) {
+ REQUIRE(VALID_NMHANDLE(handle));
+ REQUIRE(VALID_NMSOCK(handle->sock));
+
+ isc_nmsocket_t *sock = handle->sock;
+ isc__netievent_tcpdnsread_t *ievent = NULL;
+
+ REQUIRE(sock->type == isc_nm_tcpdnssocket);
+ REQUIRE(sock->statichandle == handle);
+
+ sock->recv_cb = cb;
+ sock->recv_cbarg = cbarg;
+ sock->recv_read = true;
+ if (sock->read_timeout == 0) {
+ sock->read_timeout =
+ (atomic_load(&sock->keepalive)
+ ? atomic_load(&sock->mgr->keepalive)
+ : atomic_load(&sock->mgr->idle));
+ }
+
+ ievent = isc__nm_get_netievent_tcpdnsread(sock->mgr, sock);
+
+ /*
+ * This MUST be done asynchronously, no matter which thread we're
+ * in. The callback function for isc_nm_read() often calls
+ * isc_nm_read() again; if we tried to do that synchronously
+ * we'd clash in processbuffer() and grow the stack indefinitely.
+ */
+ isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
+ (isc__netievent_t *)ievent);
+
+ return;
+}
+
+void
+isc__nm_async_tcpdnsread(isc__networker_t *worker, isc__netievent_t *ev0) {
+ isc__netievent_tcpdnsread_t *ievent =
+ (isc__netievent_tcpdnsread_t *)ev0;
+ isc_nmsocket_t *sock = ievent->sock;
+ isc_result_t result;
+
+ UNUSED(worker);
+
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->tid == isc_nm_tid());
+
+ if (isc__nmsocket_closing(sock)) {
+ result = ISC_R_CANCELED;
+ } else {
+ result = isc__nm_process_sock_buffer(sock);
+ }
+
+ if (result != ISC_R_SUCCESS) {
+ atomic_store(&sock->reading, true);
+ isc__nm_failed_read_cb(sock, result, false);
+ }
+}
+
+/*
+ * Process a single packet from the incoming buffer.
+ *
+ * Return ISC_R_SUCCESS and attach 'handlep' to a handle if something
+ * was processed; return ISC_R_NOMORE if there isn't a full message
+ * to be processed.
+ *
+ * The caller will need to unreference the handle.
+ */
+isc_result_t
+isc__nm_tcpdns_processbuffer(isc_nmsocket_t *sock) {
+ size_t len;
+ isc__nm_uvreq_t *req = NULL;
+ isc_nmhandle_t *handle = NULL;
+
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->tid == isc_nm_tid());
+
+ if (isc__nmsocket_closing(sock)) {
+ return (ISC_R_CANCELED);
+ }
+
+ /*
+ * If we don't even have the length yet, we can't do
+ * anything.
+ */
+ if (sock->buf_len < 2) {
+ return (ISC_R_NOMORE);
+ }
+
+ /*
+ * Process the first packet from the buffer, leaving
+ * the rest (if any) for later.
+ */
+ len = ntohs(*(uint16_t *)sock->buf);
+ if (len > sock->buf_len - 2) {
+ return (ISC_R_NOMORE);
+ }
+
+ if (sock->recv_cb == NULL) {
+ /*
+ * recv_cb has been cleared - there is
+ * nothing to do
+ */
+ return (ISC_R_CANCELED);
+ } else if (sock->statichandle == NULL &&
+ atomic_load(&sock->connected) &&
+ !atomic_load(&sock->connecting))
+ {
+ /*
+ * It seems that some unexpected data (a DNS message) has
+ * arrived while we are wrapping up.
+ */
+ return (ISC_R_CANCELED);
+ }
+
+ req = isc__nm_get_read_req(sock, NULL);
+ REQUIRE(VALID_UVREQ(req));
+
+ /*
+ * We need to launch isc__nm_resume_processing() after the buffer
+ * has been consumed, thus we must delay detaching the handle.
+ */
+ isc_nmhandle_attach(req->handle, &handle);
+
+ /*
+ * The callback will be called synchronously because the
+ * result is ISC_R_SUCCESS, so we don't need to have
+ * the buffer on the heap
+ */
+ req->uvbuf.base = (char *)sock->buf + 2;
+ req->uvbuf.len = len;
+
+ /*
+ * If isc__nm_tcpdns_read() was called, it will be satisfied by single
+ * DNS message in the next call.
+ */
+ sock->recv_read = false;
+
+ /*
+ * An assertion failure here means that there's an erroneous
+ * extra nmhandle detach happening in the callback and
+ * isc__nm_resume_processing() is called while we're
+ * processing the buffer.
+ */
+ REQUIRE(sock->processing == false);
+ sock->processing = true;
+ isc__nm_readcb(sock, req, ISC_R_SUCCESS);
+ sock->processing = false;
+
+ len += 2;
+ sock->buf_len -= len;
+ if (sock->buf_len > 0) {
+ memmove(sock->buf, sock->buf + len, sock->buf_len);
+ }
+
+ isc_nmhandle_detach(&handle);
+
+ return (ISC_R_SUCCESS);
+}
+
+void
+isc__nm_tcpdns_read_cb(uv_stream_t *stream, ssize_t nread,
+ const uv_buf_t *buf) {
+ isc_nmsocket_t *sock = uv_handle_get_data((uv_handle_t *)stream);
+ uint8_t *base = NULL;
+ size_t len;
+ isc_result_t result;
+
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->tid == isc_nm_tid());
+ REQUIRE(atomic_load(&sock->reading));
+ REQUIRE(buf != NULL);
+
+ if (isc__nmsocket_closing(sock)) {
+ isc__nm_failed_read_cb(sock, ISC_R_CANCELED, true);
+ goto free;
+ }
+
+ if (nread < 0) {
+ if (nread != UV_EOF) {
+ isc__nm_incstats(sock, STATID_RECVFAIL);
+ }
+
+ isc__nm_failed_read_cb(sock, isc__nm_uverr2result(nread), true);
+ goto free;
+ }
+
+ base = (uint8_t *)buf->base;
+ len = nread;
+
+ /*
+ * FIXME: We can avoid the memmove here if we know we have received full
+ * packet; e.g. we should be smarter, a.s. there are just few situations
+ *
+ * The tcp_alloc_buf should be smarter and point the uv_read_start to
+ * the position where previous read has ended in the sock->buf, that way
+ * the data could be read directly into sock->buf.
+ */
+
+ if (sock->buf_len + len > sock->buf_size) {
+ isc__nm_alloc_dnsbuf(sock, sock->buf_len + len);
+ }
+ memmove(sock->buf + sock->buf_len, base, len);
+ sock->buf_len += len;
+
+ if (!atomic_load(&sock->client)) {
+ sock->read_timeout = atomic_load(&sock->mgr->idle);
+ }
+
+ result = isc__nm_process_sock_buffer(sock);
+ if (result != ISC_R_SUCCESS) {
+ isc__nm_failed_read_cb(sock, result, true);
+ }
+free:
+ if (nread < 0) {
+ /*
+ * The buffer may be a null buffer on error.
+ */
+ if (buf->base == NULL && buf->len == 0) {
+ return;
+ }
+ }
+
+ isc__nm_free_uvbuf(sock, buf);
+}
+
+static void
+quota_accept_cb(isc_quota_t *quota, void *sock0) {
+ isc_nmsocket_t *sock = (isc_nmsocket_t *)sock0;
+
+ REQUIRE(VALID_NMSOCK(sock));
+
+ /*
+ * Create a tcpdnsaccept event and pass it using the async channel.
+ */
+
+ isc__netievent_tcpdnsaccept_t *ievent =
+ isc__nm_get_netievent_tcpdnsaccept(sock->mgr, sock, quota);
+ isc__nm_maybe_enqueue_ievent(&sock->mgr->workers[sock->tid],
+ (isc__netievent_t *)ievent);
+}
+
+/*
+ * This is called after we get a quota_accept_cb() callback.
+ */
+void
+isc__nm_async_tcpdnsaccept(isc__networker_t *worker, isc__netievent_t *ev0) {
+ isc__netievent_tcpdnsaccept_t *ievent =
+ (isc__netievent_tcpdnsaccept_t *)ev0;
+ isc_result_t result;
+
+ UNUSED(worker);
+
+ REQUIRE(VALID_NMSOCK(ievent->sock));
+ REQUIRE(ievent->sock->tid == isc_nm_tid());
+
+ result = accept_connection(ievent->sock, ievent->quota);
+ isc__nm_accept_connection_log(result, can_log_tcpdns_quota());
+}
+
+static isc_result_t
+accept_connection(isc_nmsocket_t *ssock, isc_quota_t *quota) {
+ isc_nmsocket_t *csock = NULL;
+ isc__networker_t *worker = NULL;
+ int r;
+ isc_result_t result;
+ struct sockaddr_storage peer_ss;
+ struct sockaddr_storage local_ss;
+ isc_sockaddr_t local;
+ isc_nmhandle_t *handle = NULL;
+
+ REQUIRE(VALID_NMSOCK(ssock));
+ REQUIRE(ssock->tid == isc_nm_tid());
+
+ if (isc__nmsocket_closing(ssock)) {
+ if (quota != NULL) {
+ isc_quota_detach(&quota);
+ }
+ return (ISC_R_CANCELED);
+ }
+
+ REQUIRE(ssock->accept_cb != NULL);
+
+ csock = isc_mem_get(ssock->mgr->mctx, sizeof(isc_nmsocket_t));
+ isc__nmsocket_init(csock, ssock->mgr, isc_nm_tcpdnssocket,
+ &ssock->iface);
+ csock->tid = ssock->tid;
+ csock->extrahandlesize = ssock->extrahandlesize;
+ isc__nmsocket_attach(ssock, &csock->server);
+ csock->recv_cb = ssock->recv_cb;
+ csock->recv_cbarg = ssock->recv_cbarg;
+ csock->quota = quota;
+ atomic_init(&csock->accepting, true);
+
+ worker = &csock->mgr->workers[csock->tid];
+
+ r = uv_tcp_init(&worker->loop, &csock->uv_handle.tcp);
+ UV_RUNTIME_CHECK(uv_tcp_init, r);
+ uv_handle_set_data(&csock->uv_handle.handle, csock);
+
+ r = uv_timer_init(&worker->loop, &csock->read_timer);
+ UV_RUNTIME_CHECK(uv_timer_init, r);
+ uv_handle_set_data((uv_handle_t *)&csock->read_timer, csock);
+
+ r = uv_accept(&ssock->uv_handle.stream, &csock->uv_handle.stream);
+ if (r != 0) {
+ result = isc__nm_uverr2result(r);
+ goto failure;
+ }
+
+ r = uv_tcp_getpeername(&csock->uv_handle.tcp,
+ (struct sockaddr *)&peer_ss,
+ &(int){ sizeof(peer_ss) });
+ if (r != 0) {
+ result = isc__nm_uverr2result(r);
+ goto failure;
+ }
+
+ result = isc_sockaddr_fromsockaddr(&csock->peer,
+ (struct sockaddr *)&peer_ss);
+ if (result != ISC_R_SUCCESS) {
+ goto failure;
+ }
+
+ r = uv_tcp_getsockname(&csock->uv_handle.tcp,
+ (struct sockaddr *)&local_ss,
+ &(int){ sizeof(local_ss) });
+ if (r != 0) {
+ result = isc__nm_uverr2result(r);
+ goto failure;
+ }
+
+ result = isc_sockaddr_fromsockaddr(&local,
+ (struct sockaddr *)&local_ss);
+ if (result != ISC_R_SUCCESS) {
+ goto failure;
+ }
+
+ /*
+ * The handle will be either detached on acceptcb failure or in the
+ * readcb.
+ */
+ handle = isc__nmhandle_get(csock, NULL, &local);
+
+ result = ssock->accept_cb(handle, ISC_R_SUCCESS, ssock->accept_cbarg);
+ if (result != ISC_R_SUCCESS) {
+ isc_nmhandle_detach(&handle);
+ goto failure;
+ }
+
+ atomic_store(&csock->accepting, false);
+
+ isc__nm_incstats(csock, STATID_ACCEPT);
+
+ csock->read_timeout = atomic_load(&csock->mgr->init);
+
+ csock->closehandle_cb = isc__nm_resume_processing;
+
+ /*
+ * We need to keep the handle alive until we fail to read or connection
+ * is closed by the other side, it will be detached via
+ * prep_destroy()->tcpdns_close_direct().
+ */
+ isc_nmhandle_attach(handle, &csock->recv_handle);
+ result = isc__nm_process_sock_buffer(csock);
+ if (result != ISC_R_SUCCESS) {
+ isc_nmhandle_detach(&csock->recv_handle);
+ isc_nmhandle_detach(&handle);
+ goto failure;
+ }
+
+ /*
+ * The initial timer has been set, update the read timeout for the next
+ * reads.
+ */
+ csock->read_timeout = (atomic_load(&csock->keepalive)
+ ? atomic_load(&csock->mgr->keepalive)
+ : atomic_load(&csock->mgr->idle));
+
+ isc_nmhandle_detach(&handle);
+
+ /*
+ * sock is now attached to the handle.
+ */
+ isc__nmsocket_detach(&csock);
+
+ return (ISC_R_SUCCESS);
+
+failure:
+
+ atomic_store(&csock->active, false);
+
+ isc__nm_failed_accept_cb(csock, result);
+
+ isc__nmsocket_prep_destroy(csock);
+
+ isc__nmsocket_detach(&csock);
+
+ return (result);
+}
+
+void
+isc__nm_tcpdns_send(isc_nmhandle_t *handle, isc_region_t *region,
+ isc_nm_cb_t cb, void *cbarg) {
+ isc__netievent_tcpdnssend_t *ievent = NULL;
+ isc__nm_uvreq_t *uvreq = NULL;
+ isc_nmsocket_t *sock = NULL;
+
+ REQUIRE(VALID_NMHANDLE(handle));
+
+ sock = handle->sock;
+
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->type == isc_nm_tcpdnssocket);
+
+ uvreq = isc__nm_uvreq_get(sock->mgr, sock);
+ *(uint16_t *)uvreq->tcplen = htons(region->length);
+ uvreq->uvbuf.base = (char *)region->base;
+ uvreq->uvbuf.len = region->length;
+
+ isc_nmhandle_attach(handle, &uvreq->handle);
+
+ uvreq->cb.send = cb;
+ uvreq->cbarg = cbarg;
+
+ ievent = isc__nm_get_netievent_tcpdnssend(sock->mgr, sock, uvreq);
+ isc__nm_maybe_enqueue_ievent(&sock->mgr->workers[sock->tid],
+ (isc__netievent_t *)ievent);
+
+ return;
+}
+
+static void
+tcpdns_send_cb(uv_write_t *req, int status) {
+ isc__nm_uvreq_t *uvreq = (isc__nm_uvreq_t *)req->data;
+ isc_nmsocket_t *sock = NULL;
+
+ REQUIRE(VALID_UVREQ(uvreq));
+ REQUIRE(VALID_NMSOCK(uvreq->sock));
+
+ sock = uvreq->sock;
+
+ isc_nm_timer_stop(uvreq->timer);
+ isc_nm_timer_detach(&uvreq->timer);
+
+ if (status < 0) {
+ isc__nm_incstats(sock, STATID_SENDFAIL);
+ isc__nm_failed_send_cb(sock, uvreq,
+ isc__nm_uverr2result(status));
+ return;
+ }
+
+ isc__nm_sendcb(sock, uvreq, ISC_R_SUCCESS, false);
+}
+
+/*
+ * Handle 'tcpsend' async event - send a packet on the socket
+ */
+void
+isc__nm_async_tcpdnssend(isc__networker_t *worker, isc__netievent_t *ev0) {
+ isc_result_t result;
+ isc__netievent_tcpdnssend_t *ievent =
+ (isc__netievent_tcpdnssend_t *)ev0;
+ isc_nmsocket_t *sock = NULL;
+ isc__nm_uvreq_t *uvreq = NULL;
+ int r, nbufs = 2;
+
+ UNUSED(worker);
+
+ REQUIRE(VALID_UVREQ(ievent->req));
+ REQUIRE(VALID_NMSOCK(ievent->sock));
+ REQUIRE(ievent->sock->type == isc_nm_tcpdnssocket);
+ REQUIRE(ievent->sock->tid == isc_nm_tid());
+
+ sock = ievent->sock;
+ uvreq = ievent->req;
+
+ if (sock->write_timeout == 0) {
+ sock->write_timeout =
+ (atomic_load(&sock->keepalive)
+ ? atomic_load(&sock->mgr->keepalive)
+ : atomic_load(&sock->mgr->idle));
+ }
+
+ uv_buf_t bufs[2] = { { .base = uvreq->tcplen, .len = 2 },
+ { .base = uvreq->uvbuf.base,
+ .len = uvreq->uvbuf.len } };
+
+ if (isc__nmsocket_closing(sock)) {
+ result = ISC_R_CANCELED;
+ goto fail;
+ }
+
+ r = uv_try_write(&sock->uv_handle.stream, bufs, nbufs);
+
+ if (r == (int)(bufs[0].len + bufs[1].len)) {
+ /* Wrote everything */
+ isc__nm_sendcb(sock, uvreq, ISC_R_SUCCESS, true);
+ return;
+ }
+
+ if (r == 1) {
+ /* Partial write of DNSMSG length */
+ bufs[0].base = uvreq->tcplen + 1;
+ bufs[0].len = 1;
+ } else if (r > 0) {
+ /* Partial write of DNSMSG */
+ nbufs = 1;
+ bufs[0].base = uvreq->uvbuf.base + (r - 2);
+ bufs[0].len = uvreq->uvbuf.len - (r - 2);
+ } else if (r == UV_ENOSYS || r == UV_EAGAIN) {
+ /* uv_try_write not supported, send asynchronously */
+ } else {
+ /* error sending data */
+ result = isc__nm_uverr2result(r);
+ goto fail;
+ }
+
+ r = uv_write(&uvreq->uv_req.write, &sock->uv_handle.stream, bufs, nbufs,
+ tcpdns_send_cb);
+ if (r < 0) {
+ result = isc__nm_uverr2result(r);
+ goto fail;
+ }
+
+ isc_nm_timer_create(uvreq->handle, isc__nmsocket_writetimeout_cb, uvreq,
+ &uvreq->timer);
+ if (sock->write_timeout > 0) {
+ isc_nm_timer_start(uvreq->timer, sock->write_timeout);
+ }
+
+ return;
+fail:
+ isc__nm_incstats(sock, STATID_SENDFAIL);
+ isc__nm_failed_send_cb(sock, uvreq, result);
+}
+
+static void
+tcpdns_stop_cb(uv_handle_t *handle) {
+ isc_nmsocket_t *sock = uv_handle_get_data(handle);
+
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->tid == isc_nm_tid());
+ REQUIRE(atomic_load(&sock->closing));
+
+ uv_handle_set_data(handle, NULL);
+
+ if (!atomic_compare_exchange_strong(&sock->closed, &(bool){ false },
+ true))
+ {
+ UNREACHABLE();
+ }
+
+ isc__nm_incstats(sock, STATID_CLOSE);
+
+ atomic_store(&sock->listening, false);
+
+ isc__nmsocket_detach(&sock);
+}
+
+static void
+tcpdns_close_sock(isc_nmsocket_t *sock) {
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->tid == isc_nm_tid());
+ REQUIRE(atomic_load(&sock->closing));
+
+ if (!atomic_compare_exchange_strong(&sock->closed, &(bool){ false },
+ true))
+ {
+ UNREACHABLE();
+ }
+
+ isc__nm_incstats(sock, STATID_CLOSE);
+
+ if (sock->server != NULL) {
+ isc__nmsocket_detach(&sock->server);
+ }
+
+ atomic_store(&sock->connected, false);
+
+ isc__nmsocket_prep_destroy(sock);
+}
+
+static void
+tcpdns_close_cb(uv_handle_t *handle) {
+ isc_nmsocket_t *sock = uv_handle_get_data(handle);
+
+ uv_handle_set_data(handle, NULL);
+
+ tcpdns_close_sock(sock);
+}
+
+static void
+read_timer_close_cb(uv_handle_t *timer) {
+ isc_nmsocket_t *sock = uv_handle_get_data(timer);
+ uv_handle_set_data(timer, NULL);
+
+ REQUIRE(VALID_NMSOCK(sock));
+
+ if (sock->parent) {
+ uv_close(&sock->uv_handle.handle, tcpdns_stop_cb);
+ } else if (uv_is_closing(&sock->uv_handle.handle)) {
+ tcpdns_close_sock(sock);
+ } else {
+ uv_close(&sock->uv_handle.handle, tcpdns_close_cb);
+ }
+}
+
+static void
+stop_tcpdns_child(isc_nmsocket_t *sock) {
+ REQUIRE(sock->type == isc_nm_tcpdnssocket);
+ REQUIRE(sock->tid == isc_nm_tid());
+
+ if (!atomic_compare_exchange_strong(&sock->closing, &(bool){ false },
+ true))
+ {
+ return;
+ }
+
+ tcpdns_close_direct(sock);
+
+ atomic_fetch_sub(&sock->parent->rchildren, 1);
+
+ isc_barrier_wait(&sock->parent->stoplistening);
+}
+
+static void
+stop_tcpdns_parent(isc_nmsocket_t *sock) {
+ isc_nmsocket_t *csock = NULL;
+
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->tid == isc_nm_tid());
+ REQUIRE(sock->type == isc_nm_tcpdnslistener);
+
+ isc_barrier_init(&sock->stoplistening, sock->nchildren);
+
+ for (size_t i = 0; i < sock->nchildren; i++) {
+ csock = &sock->children[i];
+ REQUIRE(VALID_NMSOCK(csock));
+
+ if ((int)i == isc_nm_tid()) {
+ /*
+ * We need to schedule closing the other sockets first
+ */
+ continue;
+ }
+
+ atomic_store(&csock->active, false);
+ enqueue_stoplistening(csock);
+ }
+
+ csock = &sock->children[isc_nm_tid()];
+ atomic_store(&csock->active, false);
+ stop_tcpdns_child(csock);
+
+ atomic_store(&sock->closed, true);
+ isc__nmsocket_prep_destroy(sock);
+}
+
+static void
+tcpdns_close_direct(isc_nmsocket_t *sock) {
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->tid == isc_nm_tid());
+ REQUIRE(atomic_load(&sock->closing));
+
+ if (sock->quota != NULL) {
+ isc_quota_detach(&sock->quota);
+ }
+
+ if (sock->recv_handle != NULL) {
+ isc_nmhandle_detach(&sock->recv_handle);
+ }
+
+ isc__nmsocket_timer_stop(sock);
+ isc__nm_stop_reading(sock);
+
+ uv_handle_set_data((uv_handle_t *)&sock->read_timer, sock);
+ uv_close((uv_handle_t *)&sock->read_timer, read_timer_close_cb);
+}
+
+void
+isc__nm_tcpdns_close(isc_nmsocket_t *sock) {
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->type == isc_nm_tcpdnssocket);
+ REQUIRE(!isc__nmsocket_active(sock));
+
+ if (!atomic_compare_exchange_strong(&sock->closing, &(bool){ false },
+ true))
+ {
+ return;
+ }
+
+ if (sock->tid == isc_nm_tid()) {
+ tcpdns_close_direct(sock);
+ } else {
+ /*
+ * We need to create an event and pass it using async channel
+ */
+ isc__netievent_tcpdnsclose_t *ievent =
+ isc__nm_get_netievent_tcpdnsclose(sock->mgr, sock);
+
+ isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
+ (isc__netievent_t *)ievent);
+ }
+}
+
+void
+isc__nm_async_tcpdnsclose(isc__networker_t *worker, isc__netievent_t *ev0) {
+ isc__netievent_tcpdnsclose_t *ievent =
+ (isc__netievent_tcpdnsclose_t *)ev0;
+ isc_nmsocket_t *sock = ievent->sock;
+
+ UNUSED(worker);
+
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->tid == isc_nm_tid());
+
+ tcpdns_close_direct(sock);
+}
+
+static void
+tcpdns_close_connect_cb(uv_handle_t *handle) {
+ isc_nmsocket_t *sock = uv_handle_get_data(handle);
+
+ REQUIRE(VALID_NMSOCK(sock));
+
+ REQUIRE(isc__nm_in_netthread());
+ REQUIRE(sock->tid == isc_nm_tid());
+
+ isc__nmsocket_prep_destroy(sock);
+ isc__nmsocket_detach(&sock);
+}
+
+void
+isc__nm_tcpdns_shutdown(isc_nmsocket_t *sock) {
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->tid == isc_nm_tid());
+ REQUIRE(sock->type == isc_nm_tcpdnssocket);
+
+ /*
+ * If the socket is active, mark it inactive and
+ * continue. If it isn't active, stop now.
+ */
+ if (!isc__nmsocket_deactivate(sock)) {
+ return;
+ }
+
+ if (atomic_load(&sock->accepting)) {
+ return;
+ }
+
+ if (atomic_load(&sock->connecting)) {
+ isc_nmsocket_t *tsock = NULL;
+ isc__nmsocket_attach(sock, &tsock);
+ uv_close(&sock->uv_handle.handle, tcpdns_close_connect_cb);
+ return;
+ }
+
+ if (sock->statichandle != NULL) {
+ if (isc__nm_closing(sock)) {
+ isc__nm_failed_read_cb(sock, ISC_R_SHUTTINGDOWN, false);
+ } else {
+ isc__nm_failed_read_cb(sock, ISC_R_CANCELED, false);
+ }
+ return;
+ }
+
+ /*
+ * Otherwise, we just send the socket to abyss...
+ */
+ if (sock->parent == NULL) {
+ isc__nmsocket_prep_destroy(sock);
+ }
+}
+
+void
+isc__nm_tcpdns_cancelread(isc_nmhandle_t *handle) {
+ isc_nmsocket_t *sock = NULL;
+ isc__netievent_tcpdnscancel_t *ievent = NULL;
+
+ REQUIRE(VALID_NMHANDLE(handle));
+
+ sock = handle->sock;
+
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->type == isc_nm_tcpdnssocket);
+
+ ievent = isc__nm_get_netievent_tcpdnscancel(sock->mgr, sock, handle);
+ isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
+ (isc__netievent_t *)ievent);
+}
+
+void
+isc__nm_async_tcpdnscancel(isc__networker_t *worker, isc__netievent_t *ev0) {
+ isc__netievent_tcpdnscancel_t *ievent =
+ (isc__netievent_tcpdnscancel_t *)ev0;
+ isc_nmsocket_t *sock = ievent->sock;
+
+ UNUSED(worker);
+
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->tid == isc_nm_tid());
+
+ isc__nm_failed_read_cb(sock, ISC_R_EOF, false);
+}
diff --git a/lib/isc/netmgr/timer.c b/lib/isc/netmgr/timer.c
new file mode 100644
index 0000000..8328775
--- /dev/null
+++ b/lib/isc/netmgr/timer.c
@@ -0,0 +1,120 @@
+/*
+ * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
+ *
+ * SPDX-License-Identifier: MPL-2.0
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, you can obtain one at https://mozilla.org/MPL/2.0/.
+ *
+ * See the COPYRIGHT file distributed with this work for additional
+ * information regarding copyright ownership.
+ */
+
+#include <uv.h>
+
+#include <isc/netmgr.h>
+#include <isc/util.h>
+
+#include "netmgr-int.h"
+
+struct isc_nm_timer {
+ isc_refcount_t references;
+ uv_timer_t timer;
+ isc_nmhandle_t *handle;
+ isc_nm_timer_cb cb;
+ void *cbarg;
+};
+
+void
+isc_nm_timer_create(isc_nmhandle_t *handle, isc_nm_timer_cb cb, void *cbarg,
+ isc_nm_timer_t **timerp) {
+ isc__networker_t *worker = NULL;
+ isc_nmsocket_t *sock = NULL;
+ isc_nm_timer_t *timer = NULL;
+ int r;
+
+ REQUIRE(isc__nm_in_netthread());
+ REQUIRE(VALID_NMHANDLE(handle));
+ REQUIRE(VALID_NMSOCK(handle->sock));
+
+ sock = handle->sock;
+ worker = &sock->mgr->workers[isc_nm_tid()];
+
+ /* TODO: per-loop object cache */
+ timer = isc_mem_get(sock->mgr->mctx, sizeof(*timer));
+ *timer = (isc_nm_timer_t){ .cb = cb, .cbarg = cbarg };
+ isc_refcount_init(&timer->references, 1);
+ isc_nmhandle_attach(handle, &timer->handle);
+
+ r = uv_timer_init(&worker->loop, &timer->timer);
+ UV_RUNTIME_CHECK(uv_timer_init, r);
+
+ uv_handle_set_data((uv_handle_t *)&timer->timer, timer);
+
+ *timerp = timer;
+}
+
+void
+isc_nm_timer_attach(isc_nm_timer_t *timer, isc_nm_timer_t **timerp) {
+ REQUIRE(timer != NULL);
+ REQUIRE(timerp != NULL && *timerp == NULL);
+
+ isc_refcount_increment(&timer->references);
+ *timerp = timer;
+}
+
+static void
+timer_destroy(uv_handle_t *uvhandle) {
+ isc_nm_timer_t *timer = uv_handle_get_data(uvhandle);
+ isc_nmhandle_t *handle = timer->handle;
+ isc_mem_t *mctx = timer->handle->sock->mgr->mctx;
+
+ isc_mem_put(mctx, timer, sizeof(*timer));
+
+ isc_nmhandle_detach(&handle);
+}
+
+void
+isc_nm_timer_detach(isc_nm_timer_t **timerp) {
+ isc_nm_timer_t *timer = NULL;
+ isc_nmhandle_t *handle = NULL;
+
+ REQUIRE(timerp != NULL && *timerp != NULL);
+
+ timer = *timerp;
+ *timerp = NULL;
+
+ handle = timer->handle;
+
+ REQUIRE(isc__nm_in_netthread());
+ REQUIRE(VALID_NMHANDLE(handle));
+ REQUIRE(VALID_NMSOCK(handle->sock));
+
+ if (isc_refcount_decrement(&timer->references) == 1) {
+ int r = uv_timer_stop(&timer->timer);
+ UV_RUNTIME_CHECK(uv_timer_stop, r);
+ uv_close((uv_handle_t *)&timer->timer, timer_destroy);
+ }
+}
+
+static void
+timer_cb(uv_timer_t *uvtimer) {
+ isc_nm_timer_t *timer = uv_handle_get_data((uv_handle_t *)uvtimer);
+
+ REQUIRE(timer->cb != NULL);
+
+ timer->cb(timer->cbarg, ISC_R_TIMEDOUT);
+}
+
+void
+isc_nm_timer_start(isc_nm_timer_t *timer, uint64_t timeout) {
+ int r = uv_timer_start(&timer->timer, timer_cb, timeout, 0);
+ UV_RUNTIME_CHECK(uv_timer_start, r);
+}
+
+void
+isc_nm_timer_stop(isc_nm_timer_t *timer) {
+ int r = uv_timer_stop(&timer->timer);
+ UV_RUNTIME_CHECK(uv_timer_stop, r);
+}
diff --git a/lib/isc/netmgr/tlsdns.c b/lib/isc/netmgr/tlsdns.c
new file mode 100644
index 0000000..d30e33f
--- /dev/null
+++ b/lib/isc/netmgr/tlsdns.c
@@ -0,0 +1,2363 @@
+/*
+ * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
+ *
+ * SPDX-License-Identifier: MPL-2.0
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, you can obtain one at https://mozilla.org/MPL/2.0/.
+ *
+ * See the COPYRIGHT file distributed with this work for additional
+ * information regarding copyright ownership.
+ */
+
+#include <libgen.h>
+#include <unistd.h>
+#include <uv.h>
+
+#include <isc/atomic.h>
+#include <isc/barrier.h>
+#include <isc/buffer.h>
+#include <isc/condition.h>
+#include <isc/errno.h>
+#include <isc/log.h>
+#include <isc/magic.h>
+#include <isc/mem.h>
+#include <isc/netmgr.h>
+#include <isc/quota.h>
+#include <isc/random.h>
+#include <isc/refcount.h>
+#include <isc/region.h>
+#include <isc/result.h>
+#include <isc/sockaddr.h>
+#include <isc/stdtime.h>
+#include <isc/thread.h>
+#include <isc/util.h>
+
+#include "netmgr-int.h"
+#include "openssl_shim.h"
+#include "uv-compat.h"
+
+static atomic_uint_fast32_t last_tlsdnsquota_log = 0;
+
+static void
+tls_error(isc_nmsocket_t *sock, isc_result_t result);
+
+static isc_result_t
+tlsdns_connect_direct(isc_nmsocket_t *sock, isc__nm_uvreq_t *req);
+
+static void
+tlsdns_close_direct(isc_nmsocket_t *sock);
+
+static isc_result_t
+tlsdns_send_direct(isc_nmsocket_t *sock, isc__nm_uvreq_t *req);
+static void
+tlsdns_connect_cb(uv_connect_t *uvreq, int status);
+
+static void
+tlsdns_connection_cb(uv_stream_t *server, int status);
+
+static void
+tlsdns_close_cb(uv_handle_t *uvhandle);
+
+static isc_result_t
+accept_connection(isc_nmsocket_t *ssock, isc_quota_t *quota);
+
+static void
+quota_accept_cb(isc_quota_t *quota, void *sock0);
+
+static void
+stop_tlsdns_parent(isc_nmsocket_t *sock);
+static void
+stop_tlsdns_child(isc_nmsocket_t *sock);
+
+static void
+async_tlsdns_cycle(isc_nmsocket_t *sock) __attribute__((unused));
+
+static isc_result_t
+tls_cycle(isc_nmsocket_t *sock);
+
+static void
+call_pending_send_callbacks(isc_nmsocket_t *sock, const isc_result_t result);
+
+static void
+tlsdns_keep_client_tls_session(isc_nmsocket_t *sock);
+
+static void
+tlsdns_set_tls_shutdown(isc_tls_t *tls) {
+ (void)SSL_set_shutdown(tls, SSL_SENT_SHUTDOWN);
+}
+
+static bool
+peer_verification_has_failed(isc_nmsocket_t *sock) {
+ if (sock->tls.tls != NULL && sock->tls.state == TLS_STATE_HANDSHAKE &&
+ SSL_get_verify_result(sock->tls.tls) != X509_V_OK)
+ {
+ return (true);
+ }
+
+ return (false);
+}
+
+static bool
+can_log_tlsdns_quota(void) {
+ isc_stdtime_t now, last;
+
+ isc_stdtime_get(&now);
+ last = atomic_exchange_relaxed(&last_tlsdnsquota_log, now);
+ if (now != last) {
+ return (true);
+ }
+
+ return (false);
+}
+
+static isc_result_t
+tlsdns_connect_direct(isc_nmsocket_t *sock, isc__nm_uvreq_t *req) {
+ isc__networker_t *worker = NULL;
+ isc_result_t result = ISC_R_UNSET;
+ int r;
+
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(VALID_UVREQ(req));
+
+ REQUIRE(isc__nm_in_netthread());
+ REQUIRE(sock->tid == isc_nm_tid());
+
+ worker = &sock->mgr->workers[sock->tid];
+
+ atomic_store(&sock->connecting, true);
+
+ /* 2 minute timeout */
+ result = isc__nm_socket_connectiontimeout(sock->fd, 120 * 1000);
+ RUNTIME_CHECK(result == ISC_R_SUCCESS);
+
+ r = uv_tcp_init(&worker->loop, &sock->uv_handle.tcp);
+ UV_RUNTIME_CHECK(uv_tcp_init, r);
+ uv_handle_set_data(&sock->uv_handle.handle, sock);
+
+ r = uv_timer_init(&worker->loop, &sock->read_timer);
+ UV_RUNTIME_CHECK(uv_timer_init, r);
+ uv_handle_set_data((uv_handle_t *)&sock->read_timer, sock);
+
+ if (isc__nm_closing(sock)) {
+ result = ISC_R_SHUTTINGDOWN;
+ goto error;
+ }
+
+ r = uv_tcp_open(&sock->uv_handle.tcp, sock->fd);
+ if (r != 0) {
+ isc__nm_closesocket(sock->fd);
+ isc__nm_incstats(sock, STATID_OPENFAIL);
+ goto done;
+ }
+ isc__nm_incstats(sock, STATID_OPEN);
+
+ if (req->local.length != 0) {
+ r = uv_tcp_bind(&sock->uv_handle.tcp, &req->local.type.sa, 0);
+ /*
+ * In case of shared socket UV_EINVAL will be returned and needs
+ * to be ignored
+ */
+ if (r != 0 && r != UV_EINVAL) {
+ isc__nm_incstats(sock, STATID_BINDFAIL);
+ goto done;
+ }
+ }
+
+ isc__nm_set_network_buffers(sock->mgr, &sock->uv_handle.handle);
+
+ uv_handle_set_data(&req->uv_req.handle, req);
+ r = uv_tcp_connect(&req->uv_req.connect, &sock->uv_handle.tcp,
+ &req->peer.type.sa, tlsdns_connect_cb);
+ if (r != 0) {
+ isc__nm_incstats(sock, STATID_CONNECTFAIL);
+ goto done;
+ }
+
+ uv_handle_set_data((uv_handle_t *)&sock->read_timer,
+ &req->uv_req.connect);
+ isc__nmsocket_timer_start(sock);
+
+ atomic_store(&sock->connected, true);
+
+done:
+ result = isc__nm_uverr2result(r);
+error:
+ LOCK(&sock->lock);
+ sock->result = result;
+ SIGNAL(&sock->cond);
+ if (!atomic_load(&sock->active)) {
+ WAIT(&sock->scond, &sock->lock);
+ }
+ INSIST(atomic_load(&sock->active));
+ UNLOCK(&sock->lock);
+
+ return (result);
+}
+
+void
+isc__nm_async_tlsdnsconnect(isc__networker_t *worker, isc__netievent_t *ev0) {
+ isc__netievent_tlsdnsconnect_t *ievent =
+ (isc__netievent_tlsdnsconnect_t *)ev0;
+ isc_nmsocket_t *sock = ievent->sock;
+ isc__nm_uvreq_t *req = ievent->req;
+ isc_result_t result = ISC_R_SUCCESS;
+
+ UNUSED(worker);
+
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->type == isc_nm_tlsdnssocket);
+ REQUIRE(sock->parent == NULL);
+ REQUIRE(sock->tid == isc_nm_tid());
+
+ result = tlsdns_connect_direct(sock, req);
+ if (result != ISC_R_SUCCESS) {
+ atomic_compare_exchange_enforced(&sock->connecting,
+ &(bool){ true }, false);
+ isc__nmsocket_clearcb(sock);
+ isc__nm_connectcb(sock, req, result, true);
+ atomic_store(&sock->active, false);
+ isc__nm_tlsdns_close(sock);
+ }
+
+ /*
+ * The sock is now attached to the handle.
+ */
+ isc__nmsocket_detach(&sock);
+}
+
+static void
+tlsdns_connect_cb(uv_connect_t *uvreq, int status) {
+ isc_result_t result = ISC_R_UNSET;
+ isc__nm_uvreq_t *req = NULL;
+ isc_nmsocket_t *sock = uv_handle_get_data((uv_handle_t *)uvreq->handle);
+ struct sockaddr_storage ss;
+ int r;
+
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->tid == isc_nm_tid());
+
+ req = uv_handle_get_data((uv_handle_t *)uvreq);
+
+ REQUIRE(VALID_UVREQ(req));
+ REQUIRE(VALID_NMHANDLE(req->handle));
+
+ if (atomic_load(&sock->timedout)) {
+ result = ISC_R_TIMEDOUT;
+ goto error;
+ } else if (isc__nm_closing(sock)) {
+ /* Network manager shutting down */
+ result = ISC_R_SHUTTINGDOWN;
+ goto error;
+ } else if (isc__nmsocket_closing(sock)) {
+ /* Connection canceled */
+ result = ISC_R_CANCELED;
+ goto error;
+ } else if (status == UV_ETIMEDOUT) {
+ /* Timeout status code here indicates hard error */
+ result = ISC_R_TIMEDOUT;
+ goto error;
+ } else if (status == UV_EADDRINUSE) {
+ /*
+ * On FreeBSD the TCP connect() call sometimes results in a
+ * spurious transient EADDRINUSE. Try a few more times before
+ * giving up.
+ */
+ if (--req->connect_tries > 0) {
+ r = uv_tcp_connect(
+ &req->uv_req.connect, &sock->uv_handle.tcp,
+ &req->peer.type.sa, tlsdns_connect_cb);
+ if (r != 0) {
+ result = isc__nm_uverr2result(r);
+ goto error;
+ }
+ return;
+ }
+ result = isc__nm_uverr2result(status);
+ goto error;
+ } else if (status != 0) {
+ result = isc__nm_uverr2result(status);
+ goto error;
+ }
+
+ isc__nm_incstats(sock, STATID_CONNECT);
+ r = uv_tcp_getpeername(&sock->uv_handle.tcp, (struct sockaddr *)&ss,
+ &(int){ sizeof(ss) });
+ if (r != 0) {
+ result = isc__nm_uverr2result(r);
+ goto error;
+ }
+
+ sock->tls.state = TLS_STATE_NONE;
+ sock->tls.tls = isc_tls_create(sock->tls.ctx);
+ RUNTIME_CHECK(sock->tls.tls != NULL);
+
+ /*
+ *
+ */
+ r = BIO_new_bio_pair(&sock->tls.ssl_wbio, ISC_NETMGR_TCP_RECVBUF_SIZE,
+ &sock->tls.app_rbio, ISC_NETMGR_TCP_RECVBUF_SIZE);
+ RUNTIME_CHECK(r == 1);
+
+ r = BIO_new_bio_pair(&sock->tls.ssl_rbio, ISC_NETMGR_TCP_RECVBUF_SIZE,
+ &sock->tls.app_wbio, ISC_NETMGR_TCP_RECVBUF_SIZE);
+ RUNTIME_CHECK(r == 1);
+
+#if HAVE_SSL_SET0_RBIO && HAVE_SSL_SET0_WBIO
+ /*
+ * Note that if the rbio and wbio are the same then
+ * SSL_set0_rbio() and SSL_set0_wbio() each take ownership of
+ * one reference. Therefore it may be necessary to increment the
+ * number of references available using BIO_up_ref(3) before
+ * calling the set0 functions.
+ */
+ SSL_set0_rbio(sock->tls.tls, sock->tls.ssl_rbio);
+ SSL_set0_wbio(sock->tls.tls, sock->tls.ssl_wbio);
+#else
+ SSL_set_bio(sock->tls.tls, sock->tls.ssl_rbio, sock->tls.ssl_wbio);
+#endif
+
+ result = isc_sockaddr_fromsockaddr(&sock->peer, (struct sockaddr *)&ss);
+ RUNTIME_CHECK(result == ISC_R_SUCCESS);
+
+ if (sock->tls.client_sess_cache != NULL) {
+ isc_tlsctx_client_session_cache_reuse_sockaddr(
+ sock->tls.client_sess_cache, &sock->peer,
+ sock->tls.tls);
+ }
+
+ SSL_set_connect_state(sock->tls.tls);
+
+ /* Setting pending req */
+ sock->tls.pending_req = req;
+
+ result = isc__nm_process_sock_buffer(sock);
+ if (result != ISC_R_SUCCESS) {
+ sock->tls.pending_req = NULL;
+ goto error;
+ }
+
+ result = tls_cycle(sock);
+ if (result != ISC_R_SUCCESS) {
+ sock->tls.pending_req = NULL;
+ goto error;
+ }
+
+ return;
+
+error:
+ isc__nm_failed_connect_cb(sock, req, result, false);
+}
+
+void
+isc_nm_tlsdnsconnect(isc_nm_t *mgr, isc_sockaddr_t *local, isc_sockaddr_t *peer,
+ isc_nm_cb_t cb, void *cbarg, unsigned int timeout,
+ size_t extrahandlesize, isc_tlsctx_t *sslctx,
+ isc_tlsctx_client_session_cache_t *client_sess_cache) {
+ isc_result_t result = ISC_R_SUCCESS;
+ isc_nmsocket_t *sock = NULL;
+ isc__netievent_tlsdnsconnect_t *ievent = NULL;
+ isc__nm_uvreq_t *req = NULL;
+ sa_family_t sa_family;
+
+ REQUIRE(VALID_NM(mgr));
+ REQUIRE(local != NULL);
+ REQUIRE(peer != NULL);
+ REQUIRE(sslctx != NULL);
+
+ sa_family = peer->type.sa.sa_family;
+
+ sock = isc_mem_get(mgr->mctx, sizeof(*sock));
+ isc__nmsocket_init(sock, mgr, isc_nm_tlsdnssocket, local);
+
+ sock->extrahandlesize = extrahandlesize;
+ sock->connect_timeout = timeout;
+ sock->result = ISC_R_UNSET;
+ isc_tlsctx_attach(sslctx, &sock->tls.ctx);
+ atomic_init(&sock->client, true);
+ atomic_init(&sock->connecting, true);
+
+ req = isc__nm_uvreq_get(mgr, sock);
+ req->cb.connect = cb;
+ req->cbarg = cbarg;
+ req->peer = *peer;
+ req->local = *local;
+ req->handle = isc__nmhandle_get(sock, &req->peer, &sock->iface);
+
+ if (client_sess_cache != NULL) {
+ INSIST(isc_tlsctx_client_session_cache_getctx(
+ client_sess_cache) == sslctx);
+ isc_tlsctx_client_session_cache_attach(
+ client_sess_cache, &sock->tls.client_sess_cache);
+ }
+
+ result = isc__nm_socket(sa_family, SOCK_STREAM, 0, &sock->fd);
+ if (result != ISC_R_SUCCESS) {
+ goto failure;
+ }
+
+ if (isc__nm_closing(sock)) {
+ result = ISC_R_SHUTTINGDOWN;
+ goto failure;
+ }
+
+ (void)isc__nm_socket_min_mtu(sock->fd, sa_family);
+ (void)isc__nm_socket_tcp_maxseg(sock->fd, NM_MAXSEG);
+
+ /* 2 minute timeout */
+ result = isc__nm_socket_connectiontimeout(sock->fd, 120 * 1000);
+ RUNTIME_CHECK(result == ISC_R_SUCCESS);
+
+ ievent = isc__nm_get_netievent_tlsdnsconnect(mgr, sock, req);
+
+ if (isc__nm_in_netthread()) {
+ atomic_store(&sock->active, true);
+ sock->tid = isc_nm_tid();
+ isc__nm_async_tlsdnsconnect(&mgr->workers[sock->tid],
+ (isc__netievent_t *)ievent);
+ isc__nm_put_netievent_tlsdnsconnect(mgr, ievent);
+ } else {
+ atomic_init(&sock->active, false);
+ sock->tid = isc_random_uniform(mgr->nworkers);
+ isc__nm_enqueue_ievent(&mgr->workers[sock->tid],
+ (isc__netievent_t *)ievent);
+ }
+ LOCK(&sock->lock);
+ while (sock->result == ISC_R_UNSET) {
+ WAIT(&sock->cond, &sock->lock);
+ }
+ atomic_store(&sock->active, true);
+ BROADCAST(&sock->scond);
+ UNLOCK(&sock->lock);
+ return;
+
+failure:
+ if (isc__nm_in_netthread()) {
+ sock->tid = isc_nm_tid();
+ }
+
+ atomic_compare_exchange_enforced(&sock->connecting, &(bool){ true },
+ false);
+ isc__nmsocket_clearcb(sock);
+ isc__nm_connectcb(sock, req, result, true);
+ atomic_store(&sock->closed, true);
+ isc__nmsocket_detach(&sock);
+}
+
+static uv_os_sock_t
+isc__nm_tlsdns_lb_socket(isc_nm_t *mgr, sa_family_t sa_family) {
+ isc_result_t result;
+ uv_os_sock_t sock;
+
+ result = isc__nm_socket(sa_family, SOCK_STREAM, 0, &sock);
+ RUNTIME_CHECK(result == ISC_R_SUCCESS);
+
+ (void)isc__nm_socket_incoming_cpu(sock);
+ (void)isc__nm_socket_v6only(sock, sa_family);
+
+ /* FIXME: set mss */
+
+ result = isc__nm_socket_reuse(sock);
+ RUNTIME_CHECK(result == ISC_R_SUCCESS);
+
+ if (mgr->load_balance_sockets) {
+ result = isc__nm_socket_reuse_lb(sock);
+ RUNTIME_CHECK(result == ISC_R_SUCCESS);
+ }
+
+ return (sock);
+}
+
+static void
+start_tlsdns_child(isc_nm_t *mgr, isc_sockaddr_t *iface, isc_nmsocket_t *sock,
+ uv_os_sock_t fd, int tid) {
+ isc__netievent_tlsdnslisten_t *ievent = NULL;
+ isc_nmsocket_t *csock = &sock->children[tid];
+
+ isc__nmsocket_init(csock, mgr, isc_nm_tlsdnssocket, iface);
+ csock->parent = sock;
+ csock->accept_cb = sock->accept_cb;
+ csock->accept_cbarg = sock->accept_cbarg;
+ csock->recv_cb = sock->recv_cb;
+ csock->recv_cbarg = sock->recv_cbarg;
+ csock->extrahandlesize = sock->extrahandlesize;
+ csock->backlog = sock->backlog;
+ csock->tid = tid;
+ isc_tlsctx_attach(sock->tls.ctx, &csock->tls.ctx);
+
+ /*
+ * We don't attach to quota, just assign - to avoid
+ * increasing quota unnecessarily.
+ */
+ csock->pquota = sock->pquota;
+ isc_quota_cb_init(&csock->quotacb, quota_accept_cb, csock);
+
+ if (mgr->load_balance_sockets) {
+ UNUSED(fd);
+ csock->fd = isc__nm_tlsdns_lb_socket(mgr,
+ iface->type.sa.sa_family);
+ } else {
+ csock->fd = dup(fd);
+ }
+ REQUIRE(csock->fd >= 0);
+
+ ievent = isc__nm_get_netievent_tlsdnslisten(mgr, csock);
+ isc__nm_maybe_enqueue_ievent(&mgr->workers[tid],
+ (isc__netievent_t *)ievent);
+}
+
+static void
+enqueue_stoplistening(isc_nmsocket_t *sock) {
+ isc__netievent_tlsdnsstop_t *ievent =
+ isc__nm_get_netievent_tlsdnsstop(sock->mgr, sock);
+ isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
+ (isc__netievent_t *)ievent);
+}
+
+isc_result_t
+isc_nm_listentlsdns(isc_nm_t *mgr, isc_sockaddr_t *iface,
+ isc_nm_recv_cb_t recv_cb, void *recv_cbarg,
+ isc_nm_accept_cb_t accept_cb, void *accept_cbarg,
+ size_t extrahandlesize, int backlog, isc_quota_t *quota,
+ isc_tlsctx_t *sslctx, isc_nmsocket_t **sockp) {
+ isc_result_t result = ISC_R_SUCCESS;
+ isc_nmsocket_t *sock = NULL;
+ size_t children_size = 0;
+ uv_os_sock_t fd = -1;
+
+ REQUIRE(VALID_NM(mgr));
+
+ sock = isc_mem_get(mgr->mctx, sizeof(*sock));
+ isc__nmsocket_init(sock, mgr, isc_nm_tlsdnslistener, iface);
+
+ atomic_init(&sock->rchildren, 0);
+ sock->nchildren = mgr->nworkers;
+ children_size = sock->nchildren * sizeof(sock->children[0]);
+ sock->children = isc_mem_get(mgr->mctx, children_size);
+ memset(sock->children, 0, children_size);
+
+ sock->result = ISC_R_UNSET;
+ sock->accept_cb = accept_cb;
+ sock->accept_cbarg = accept_cbarg;
+ sock->recv_cb = recv_cb;
+ sock->recv_cbarg = recv_cbarg;
+ sock->extrahandlesize = extrahandlesize;
+ sock->backlog = backlog;
+ sock->pquota = quota;
+
+ isc_tlsctx_attach(sslctx, &sock->tls.ctx);
+
+ sock->tid = 0;
+ sock->fd = -1;
+
+ if (!mgr->load_balance_sockets) {
+ fd = isc__nm_tlsdns_lb_socket(mgr, iface->type.sa.sa_family);
+ }
+
+ isc_barrier_init(&sock->startlistening, sock->nchildren);
+
+ for (size_t i = 0; i < sock->nchildren; i++) {
+ if ((int)i == isc_nm_tid()) {
+ continue;
+ }
+ start_tlsdns_child(mgr, iface, sock, fd, i);
+ }
+
+ if (isc__nm_in_netthread()) {
+ start_tlsdns_child(mgr, iface, sock, fd, isc_nm_tid());
+ }
+
+ if (!mgr->load_balance_sockets) {
+ isc__nm_closesocket(fd);
+ }
+
+ LOCK(&sock->lock);
+ while (atomic_load(&sock->rchildren) != sock->nchildren) {
+ WAIT(&sock->cond, &sock->lock);
+ }
+ result = sock->result;
+ atomic_store(&sock->active, true);
+ UNLOCK(&sock->lock);
+
+ INSIST(result != ISC_R_UNSET);
+
+ if (result == ISC_R_SUCCESS) {
+ REQUIRE(atomic_load(&sock->rchildren) == sock->nchildren);
+ *sockp = sock;
+ } else {
+ atomic_store(&sock->active, false);
+ enqueue_stoplistening(sock);
+ isc_nmsocket_close(&sock);
+ }
+
+ return (result);
+}
+
+void
+isc__nm_async_tlsdnslisten(isc__networker_t *worker, isc__netievent_t *ev0) {
+ isc__netievent_tlsdnslisten_t *ievent =
+ (isc__netievent_tlsdnslisten_t *)ev0;
+ sa_family_t sa_family;
+ int r;
+ int flags = 0;
+ isc_nmsocket_t *sock = NULL;
+ isc_result_t result = ISC_R_UNSET;
+ isc_nm_t *mgr;
+
+ REQUIRE(VALID_NMSOCK(ievent->sock));
+ REQUIRE(ievent->sock->tid == isc_nm_tid());
+ REQUIRE(VALID_NMSOCK(ievent->sock->parent));
+
+ sock = ievent->sock;
+ sa_family = sock->iface.type.sa.sa_family;
+ mgr = sock->mgr;
+
+ REQUIRE(sock->type == isc_nm_tlsdnssocket);
+ REQUIRE(sock->parent != NULL);
+ REQUIRE(sock->tid == isc_nm_tid());
+
+ (void)isc__nm_socket_min_mtu(sock->fd, sa_family);
+ (void)isc__nm_socket_tcp_maxseg(sock->fd, NM_MAXSEG);
+
+ r = uv_tcp_init(&worker->loop, &sock->uv_handle.tcp);
+ UV_RUNTIME_CHECK(uv_tcp_init, r);
+ uv_handle_set_data(&sock->uv_handle.handle, sock);
+ /* This keeps the socket alive after everything else is gone */
+ isc__nmsocket_attach(sock, &(isc_nmsocket_t *){ NULL });
+
+ r = uv_timer_init(&worker->loop, &sock->read_timer);
+ UV_RUNTIME_CHECK(uv_timer_init, r);
+ uv_handle_set_data((uv_handle_t *)&sock->read_timer, sock);
+
+ LOCK(&sock->parent->lock);
+
+ r = uv_tcp_open(&sock->uv_handle.tcp, sock->fd);
+ if (r < 0) {
+ isc__nm_closesocket(sock->fd);
+ isc__nm_incstats(sock, STATID_OPENFAIL);
+ goto done;
+ }
+ isc__nm_incstats(sock, STATID_OPEN);
+
+ if (sa_family == AF_INET6) {
+ flags = UV_TCP_IPV6ONLY;
+ }
+
+ if (mgr->load_balance_sockets) {
+ r = isc_uv_tcp_freebind(&sock->uv_handle.tcp,
+ &sock->iface.type.sa, flags);
+ if (r < 0) {
+ isc__nm_incstats(sock, STATID_BINDFAIL);
+ goto done;
+ }
+ } else {
+ if (sock->parent->fd == -1) {
+ r = isc_uv_tcp_freebind(&sock->uv_handle.tcp,
+ &sock->iface.type.sa, flags);
+ if (r < 0) {
+ isc__nm_incstats(sock, STATID_BINDFAIL);
+ goto done;
+ }
+ sock->parent->uv_handle.tcp.flags =
+ sock->uv_handle.tcp.flags;
+ sock->parent->fd = sock->fd;
+ } else {
+ /* The socket is already bound, just copy the flags */
+ sock->uv_handle.tcp.flags =
+ sock->parent->uv_handle.tcp.flags;
+ }
+ }
+
+ isc__nm_set_network_buffers(sock->mgr, &sock->uv_handle.handle);
+
+ /*
+ * The callback will run in the same thread uv_listen() was
+ * called from, so a race with tlsdns_connection_cb() isn't
+ * possible.
+ */
+ r = uv_listen((uv_stream_t *)&sock->uv_handle.tcp, sock->backlog,
+ tlsdns_connection_cb);
+ if (r != 0) {
+ isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL,
+ ISC_LOGMODULE_NETMGR, ISC_LOG_ERROR,
+ "uv_listen failed: %s",
+ isc_result_totext(isc__nm_uverr2result(r)));
+ isc__nm_incstats(sock, STATID_BINDFAIL);
+ goto done;
+ }
+
+ atomic_store(&sock->listening, true);
+
+done:
+ result = isc__nm_uverr2result(r);
+ if (result != ISC_R_SUCCESS) {
+ sock->pquota = NULL;
+ }
+
+ atomic_fetch_add(&sock->parent->rchildren, 1);
+ if (sock->parent->result == ISC_R_UNSET) {
+ sock->parent->result = result;
+ }
+ SIGNAL(&sock->parent->cond);
+ UNLOCK(&sock->parent->lock);
+
+ isc_barrier_wait(&sock->parent->startlistening);
+}
+
+static void
+tlsdns_connection_cb(uv_stream_t *server, int status) {
+ isc_nmsocket_t *ssock = uv_handle_get_data((uv_handle_t *)server);
+ isc_result_t result;
+ isc_quota_t *quota = NULL;
+
+ if (status != 0) {
+ result = isc__nm_uverr2result(status);
+ goto done;
+ }
+
+ REQUIRE(VALID_NMSOCK(ssock));
+ REQUIRE(ssock->tid == isc_nm_tid());
+
+ if (isc__nmsocket_closing(ssock)) {
+ result = ISC_R_CANCELED;
+ goto done;
+ }
+
+ if (ssock->pquota != NULL) {
+ result = isc_quota_attach_cb(ssock->pquota, &quota,
+ &ssock->quotacb);
+ if (result == ISC_R_QUOTA) {
+ isc__nm_incstats(ssock, STATID_ACCEPTFAIL);
+ goto done;
+ }
+ }
+
+ result = accept_connection(ssock, quota);
+done:
+ isc__nm_accept_connection_log(result, can_log_tlsdns_quota());
+}
+
+void
+isc__nm_tlsdns_stoplistening(isc_nmsocket_t *sock) {
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->type == isc_nm_tlsdnslistener);
+
+ if (!atomic_compare_exchange_strong(&sock->closing, &(bool){ false },
+ true))
+ {
+ UNREACHABLE();
+ }
+
+ if (!isc__nm_in_netthread()) {
+ enqueue_stoplistening(sock);
+ } else {
+ stop_tlsdns_parent(sock);
+ }
+}
+
+static void
+tls_shutdown(isc_nmsocket_t *sock) {
+ REQUIRE(VALID_NMSOCK(sock));
+
+ isc__netievent_tlsdnsshutdown_t *ievent =
+ isc__nm_get_netievent_tlsdnsshutdown(sock->mgr, sock);
+ isc__nm_maybe_enqueue_ievent(&sock->mgr->workers[sock->tid],
+ (isc__netievent_t *)ievent);
+}
+
+void
+isc__nm_async_tlsdnsshutdown(isc__networker_t *worker, isc__netievent_t *ev0) {
+ isc__netievent_tlsdnsshutdown_t *ievent =
+ (isc__netievent_tlsdnsshutdown_t *)ev0;
+ isc_nmsocket_t *sock = ievent->sock;
+ int rv;
+ int err;
+ isc_result_t result;
+
+ UNUSED(worker);
+
+ REQUIRE(VALID_NMSOCK(ievent->sock));
+
+ if (sock->tls.state != TLS_STATE_IO) {
+ /* Nothing to do */
+ return;
+ }
+
+ rv = SSL_shutdown(sock->tls.tls);
+
+ if (rv == 1) {
+ sock->tls.state = TLS_STATE_NONE;
+ /* FIXME: continue closing the socket */
+ return;
+ }
+
+ if (rv == 0) {
+ result = tls_cycle(sock);
+ if (result != ISC_R_SUCCESS) {
+ tls_error(sock, result);
+ return;
+ }
+
+ /* Reschedule closing the socket */
+ tls_shutdown(sock);
+ return;
+ }
+
+ err = SSL_get_error(sock->tls.tls, rv);
+
+ switch (err) {
+ case SSL_ERROR_WANT_READ:
+ case SSL_ERROR_WANT_WRITE:
+ case SSL_ERROR_WANT_X509_LOOKUP:
+ result = tls_cycle(sock);
+ if (result != ISC_R_SUCCESS) {
+ tls_error(sock, result);
+ return;
+ }
+
+ /* Reschedule closing the socket */
+ tls_shutdown(sock);
+ return;
+ case 0:
+ UNREACHABLE();
+ case SSL_ERROR_ZERO_RETURN:
+ tls_error(sock, ISC_R_EOF);
+ break;
+ default:
+ tls_error(sock, ISC_R_TLSERROR);
+ }
+ return;
+}
+
+void
+isc__nm_async_tlsdnsstop(isc__networker_t *worker, isc__netievent_t *ev0) {
+ isc__netievent_tlsdnsstop_t *ievent =
+ (isc__netievent_tlsdnsstop_t *)ev0;
+ isc_nmsocket_t *sock = ievent->sock;
+
+ UNUSED(worker);
+
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->tid == isc_nm_tid());
+
+ if (sock->parent != NULL) {
+ stop_tlsdns_child(sock);
+ return;
+ }
+
+ stop_tlsdns_parent(sock);
+}
+
+void
+isc__nm_tlsdns_failed_read_cb(isc_nmsocket_t *sock, isc_result_t result,
+ bool async) {
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(result != ISC_R_SUCCESS);
+
+ isc__nmsocket_timer_stop(sock);
+ isc__nm_stop_reading(sock);
+
+ if (sock->tls.pending_req != NULL) {
+ isc_result_t failure_result = ISC_R_CANCELED;
+ isc__nm_uvreq_t *req = sock->tls.pending_req;
+ sock->tls.pending_req = NULL;
+
+ if (peer_verification_has_failed(sock)) {
+ /*
+ * Save error message as 'sock->tls' will get detached.
+ */
+ sock->tls.tls_verify_errmsg =
+ isc_tls_verify_peer_result_string(
+ sock->tls.tls);
+ failure_result = ISC_R_TLSBADPEERCERT;
+ }
+ isc__nm_failed_connect_cb(sock, req, failure_result, async);
+ }
+
+ if (!sock->recv_read) {
+ goto destroy;
+ }
+ sock->recv_read = false;
+
+ if (sock->recv_cb != NULL) {
+ isc__nm_uvreq_t *req = isc__nm_get_read_req(sock, NULL);
+ isc__nmsocket_clearcb(sock);
+ isc__nm_readcb(sock, req, result);
+ }
+
+destroy:
+ call_pending_send_callbacks(sock, result);
+ isc__nmsocket_prep_destroy(sock);
+
+ /*
+ * We need to detach from quota after the read callback function
+ * had a chance to be executed.
+ */
+ if (sock->quota != NULL) {
+ isc_quota_detach(&sock->quota);
+ }
+}
+
+void
+isc__nm_tlsdns_read(isc_nmhandle_t *handle, isc_nm_recv_cb_t cb, void *cbarg) {
+ REQUIRE(VALID_NMHANDLE(handle));
+ REQUIRE(VALID_NMSOCK(handle->sock));
+
+ isc_nmsocket_t *sock = handle->sock;
+ isc__netievent_tlsdnsread_t *ievent = NULL;
+
+ REQUIRE(sock->type == isc_nm_tlsdnssocket);
+ REQUIRE(sock->statichandle == handle);
+
+ sock->recv_cb = cb;
+ sock->recv_cbarg = cbarg;
+ sock->recv_read = true;
+ if (sock->read_timeout == 0) {
+ sock->read_timeout =
+ (atomic_load(&sock->keepalive)
+ ? atomic_load(&sock->mgr->keepalive)
+ : atomic_load(&sock->mgr->idle));
+ }
+
+ ievent = isc__nm_get_netievent_tlsdnsread(sock->mgr, sock);
+
+ /*
+ * This MUST be done asynchronously, no matter which thread
+ * we're in. The callback function for isc_nm_read() often calls
+ * isc_nm_read() again; if we tried to do that synchronously
+ * we'd clash in processbuffer() and grow the stack
+ * indefinitely.
+ */
+ isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
+ (isc__netievent_t *)ievent);
+
+ return;
+}
+
+void
+isc__nm_async_tlsdnsread(isc__networker_t *worker, isc__netievent_t *ev0) {
+ isc__netievent_tlsdnsread_t *ievent =
+ (isc__netievent_tlsdnsread_t *)ev0;
+ isc_nmsocket_t *sock = ievent->sock;
+ isc_result_t result = ISC_R_SUCCESS;
+
+ UNUSED(worker);
+
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->tid == isc_nm_tid());
+
+ if (isc__nmsocket_closing(sock)) {
+ atomic_store(&sock->reading, true);
+ isc__nm_failed_read_cb(sock, ISC_R_CANCELED, false);
+ return;
+ }
+
+ result = tls_cycle(sock);
+ if (result != ISC_R_SUCCESS) {
+ isc__nm_failed_read_cb(sock, result, false);
+ }
+}
+
+/*
+ * Process a single packet from the incoming buffer.
+ *
+ * Return ISC_R_SUCCESS and attach 'handlep' to a handle if something
+ * was processed; return ISC_R_NOMORE if there isn't a full message
+ * to be processed.
+ *
+ * The caller will need to unreference the handle.
+ */
+isc_result_t
+isc__nm_tlsdns_processbuffer(isc_nmsocket_t *sock) {
+ size_t len;
+ isc__nm_uvreq_t *req = NULL;
+ isc_nmhandle_t *handle = NULL;
+
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->tid == isc_nm_tid());
+
+ if (isc__nmsocket_closing(sock)) {
+ return (ISC_R_CANCELED);
+ }
+
+ /*
+ * If we don't even have the length yet, we can't do
+ * anything.
+ */
+ if (sock->buf_len < 2) {
+ return (ISC_R_NOMORE);
+ }
+
+ /*
+ * Process the first packet from the buffer, leaving
+ * the rest (if any) for later.
+ */
+ len = ntohs(*(uint16_t *)sock->buf);
+ if (len > sock->buf_len - 2) {
+ return (ISC_R_NOMORE);
+ }
+
+ if (sock->recv_cb == NULL) {
+ /*
+ * recv_cb has been cleared - there is
+ * nothing to do
+ */
+ return (ISC_R_CANCELED);
+ } else if (sock->statichandle == NULL &&
+ sock->tls.state == TLS_STATE_IO &&
+ atomic_load(&sock->connected) &&
+ !atomic_load(&sock->connecting))
+ {
+ /*
+ * It seems that some unexpected data (a DNS message) has
+ * arrived while we are wrapping up.
+ */
+ return (ISC_R_CANCELED);
+ }
+
+ req = isc__nm_get_read_req(sock, NULL);
+ REQUIRE(VALID_UVREQ(req));
+
+ /*
+ * We need to launch isc__nm_resume_processing() after the buffer
+ * has been consumed, thus we must delay detaching the handle.
+ */
+ isc_nmhandle_attach(req->handle, &handle);
+
+ /*
+ * The callback will be called synchronously because the
+ * result is ISC_R_SUCCESS, so we don't need to have
+ * the buffer on the heap
+ */
+ req->uvbuf.base = (char *)sock->buf + 2;
+ req->uvbuf.len = len;
+
+ /*
+ * If isc__nm_tlsdns_read() was called, it will be satisfied by
+ * single DNS message in the next call.
+ */
+ sock->recv_read = false;
+
+ /*
+ * An assertion failure here means that there's an erroneous
+ * extra nmhandle detach happening in the callback and
+ * isc__nm_resume_processing() is called while we're
+ * processing the buffer.
+ */
+ REQUIRE(sock->processing == false);
+ sock->processing = true;
+ isc__nm_readcb(sock, req, ISC_R_SUCCESS);
+ sock->processing = false;
+
+ len += 2;
+ sock->buf_len -= len;
+ if (sock->buf_len > 0) {
+ memmove(sock->buf, sock->buf + len, sock->buf_len);
+ }
+
+ isc_nmhandle_detach(&handle);
+
+ if (isc__nmsocket_closing(sock)) {
+ tlsdns_set_tls_shutdown(sock->tls.tls);
+ tlsdns_keep_client_tls_session(sock);
+ }
+
+ return (ISC_R_SUCCESS);
+}
+
+static isc_result_t
+tls_cycle_input(isc_nmsocket_t *sock) {
+ isc_result_t result = ISC_R_SUCCESS;
+ int err = 0;
+ int rv = 1;
+
+ if (sock->tls.state == TLS_STATE_IO) {
+ size_t len;
+
+ for (;;) {
+ (void)SSL_peek(sock->tls.tls, &(char){ '\0' }, 0);
+
+ int pending = SSL_pending(sock->tls.tls);
+ if (pending > (int)ISC_NETMGR_TCP_RECVBUF_SIZE) {
+ pending = (int)ISC_NETMGR_TCP_RECVBUF_SIZE;
+ }
+
+ if (pending != 0) {
+ if ((sock->buf_len + pending) > sock->buf_size)
+ {
+ isc__nm_alloc_dnsbuf(
+ sock, sock->buf_len + pending);
+ }
+
+ len = 0;
+ rv = SSL_read_ex(sock->tls.tls,
+ sock->buf + sock->buf_len,
+ sock->buf_size - sock->buf_len,
+ &len);
+ if (rv != 1) {
+ /*
+ * Process what's in the buffer so far
+ */
+ result = isc__nm_process_sock_buffer(
+ sock);
+ if (result != ISC_R_SUCCESS) {
+ goto failure;
+ }
+ /*
+ * FIXME: Should we call
+ * isc__nm_failed_read_cb()?
+ */
+ break;
+ }
+
+ INSIST((size_t)pending == len);
+
+ sock->buf_len += len;
+ }
+ result = isc__nm_process_sock_buffer(sock);
+ if (result != ISC_R_SUCCESS) {
+ goto failure;
+ }
+
+ if (pending == 0) {
+ break;
+ }
+ }
+ } else if (!SSL_is_init_finished(sock->tls.tls)) {
+ if (SSL_is_server(sock->tls.tls)) {
+ rv = SSL_accept(sock->tls.tls);
+ } else {
+ rv = SSL_connect(sock->tls.tls);
+ }
+
+ } else {
+ rv = 1;
+ }
+
+ if (rv <= 0) {
+ err = SSL_get_error(sock->tls.tls, rv);
+ }
+
+ switch (err) {
+ case SSL_ERROR_WANT_READ:
+ if (sock->tls.state == TLS_STATE_NONE &&
+ !SSL_is_init_finished(sock->tls.tls))
+ {
+ sock->tls.state = TLS_STATE_HANDSHAKE;
+ result = isc__nm_process_sock_buffer(sock);
+ if (result != ISC_R_SUCCESS) {
+ goto failure;
+ }
+ }
+ /* else continue reading */
+ break;
+ case SSL_ERROR_WANT_WRITE:
+ async_tlsdns_cycle(sock);
+ break;
+ case SSL_ERROR_WANT_X509_LOOKUP:
+ /* Continue reading/writing */
+ break;
+ case 0:
+ /* Everything is ok, continue */
+ break;
+ case SSL_ERROR_ZERO_RETURN:
+ return (ISC_R_EOF);
+ default:
+ return (ISC_R_TLSERROR);
+ }
+
+ /* Stop state after handshake */
+ if (sock->tls.state == TLS_STATE_HANDSHAKE &&
+ SSL_is_init_finished(sock->tls.tls))
+ {
+ const unsigned char *alpn = NULL;
+ unsigned int alpnlen = 0;
+
+ isc__nmsocket_log_tls_session_reuse(sock, sock->tls.tls);
+
+ isc_tls_get_selected_alpn(sock->tls.tls, &alpn, &alpnlen);
+ if (alpn != NULL && alpnlen == ISC_TLS_DOT_PROTO_ALPN_ID_LEN &&
+ memcmp(ISC_TLS_DOT_PROTO_ALPN_ID, alpn,
+ ISC_TLS_DOT_PROTO_ALPN_ID_LEN) == 0)
+ {
+ sock->tls.alpn_negotiated = true;
+ }
+
+ sock->tls.state = TLS_STATE_IO;
+
+ if (SSL_is_server(sock->tls.tls)) {
+ REQUIRE(sock->recv_handle != NULL);
+ result = sock->accept_cb(sock->recv_handle,
+ ISC_R_SUCCESS,
+ sock->accept_cbarg);
+
+ if (result != ISC_R_SUCCESS) {
+ isc_nmhandle_detach(&sock->recv_handle);
+ goto failure;
+ }
+ } else {
+ isc__nm_uvreq_t *req = sock->tls.pending_req;
+ sock->tls.pending_req = NULL;
+
+ isc__nmsocket_timer_stop(sock);
+ uv_handle_set_data((uv_handle_t *)&sock->read_timer,
+ sock);
+
+ atomic_compare_exchange_enforced(
+ &sock->connecting, &(bool){ true }, false);
+ isc__nm_connectcb(sock, req, ISC_R_SUCCESS, true);
+ }
+ async_tlsdns_cycle(sock);
+ }
+failure:
+ return (result);
+}
+
+static void
+tls_error(isc_nmsocket_t *sock, isc_result_t result) {
+ switch (sock->tls.state) {
+ case TLS_STATE_HANDSHAKE:
+ case TLS_STATE_IO:
+ if (atomic_load(&sock->connecting)) {
+ isc__nm_uvreq_t *req = sock->tls.pending_req;
+ sock->tls.pending_req = NULL;
+
+ isc__nm_failed_connect_cb(sock, req, result, false);
+ } else {
+ isc__nm_tlsdns_failed_read_cb(sock, result, false);
+ }
+ break;
+ case TLS_STATE_ERROR:
+ return;
+ default:
+ break;
+ }
+
+ sock->tls.state = TLS_STATE_ERROR;
+ sock->tls.pending_error = result;
+
+ isc__nmsocket_shutdown(sock);
+}
+
+static void
+call_pending_send_callbacks(isc_nmsocket_t *sock, const isc_result_t result) {
+ isc__nm_uvreq_t *cbreq = ISC_LIST_HEAD(sock->tls.sendreqs);
+ while (cbreq != NULL) {
+ isc__nm_uvreq_t *next = ISC_LIST_NEXT(cbreq, link);
+ ISC_LIST_UNLINK(sock->tls.sendreqs, cbreq, link);
+ INSIST(sock == cbreq->handle->sock);
+ isc__nm_sendcb(sock, cbreq, result, false);
+ cbreq = next;
+ }
+}
+
+static void
+free_senddata(isc_nmsocket_t *sock, const isc_result_t result) {
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->tls.senddata.base != NULL);
+ REQUIRE(sock->tls.senddata.length > 0);
+
+ isc_mem_put(sock->mgr->mctx, sock->tls.senddata.base,
+ sock->tls.senddata.length);
+ sock->tls.senddata.base = NULL;
+ sock->tls.senddata.length = 0;
+
+ call_pending_send_callbacks(sock, result);
+}
+
+static void
+tls_write_cb(uv_write_t *req, int status) {
+ isc_result_t result = status != 0 ? isc__nm_uverr2result(status)
+ : ISC_R_SUCCESS;
+ isc__nm_uvreq_t *uvreq = (isc__nm_uvreq_t *)req->data;
+ isc_nmsocket_t *sock = uvreq->sock;
+
+ isc_nm_timer_stop(uvreq->timer);
+ isc_nm_timer_detach(&uvreq->timer);
+
+ free_senddata(sock, result);
+
+ isc__nm_uvreq_put(&uvreq, sock);
+
+ if (status != 0) {
+ tls_error(sock, result);
+ return;
+ }
+
+ result = tls_cycle(sock);
+ if (result != ISC_R_SUCCESS) {
+ tls_error(sock, result);
+ return;
+ }
+}
+
+static isc_result_t
+tls_cycle_output(isc_nmsocket_t *sock) {
+ isc_result_t result = ISC_R_SUCCESS;
+ int pending;
+
+ while ((pending = BIO_pending(sock->tls.app_rbio)) > 0) {
+ isc__nm_uvreq_t *req = NULL;
+ size_t bytes;
+ int rv;
+ int r;
+
+ if (sock->tls.senddata.base != NULL ||
+ sock->tls.senddata.length > 0)
+ {
+ break;
+ }
+
+ if (pending > (int)ISC_NETMGR_TCP_RECVBUF_SIZE) {
+ pending = (int)ISC_NETMGR_TCP_RECVBUF_SIZE;
+ }
+
+ sock->tls.senddata.base = isc_mem_get(sock->mgr->mctx, pending);
+ sock->tls.senddata.length = pending;
+
+ /* It's a bit misnomer here, but it does the right thing */
+ req = isc__nm_get_read_req(sock, NULL);
+ req->uvbuf.base = (char *)sock->tls.senddata.base;
+ req->uvbuf.len = sock->tls.senddata.length;
+
+ rv = BIO_read_ex(sock->tls.app_rbio, req->uvbuf.base,
+ req->uvbuf.len, &bytes);
+
+ RUNTIME_CHECK(rv == 1);
+ INSIST((size_t)pending == bytes);
+
+ r = uv_try_write(&sock->uv_handle.stream, &req->uvbuf, 1);
+
+ if (r == pending) {
+ /* Wrote everything, restart */
+ isc__nm_uvreq_put(&req, sock);
+ free_senddata(sock, ISC_R_SUCCESS);
+ continue;
+ }
+
+ if (r > 0) {
+ /* Partial write, send rest asynchronously */
+ memmove(req->uvbuf.base, req->uvbuf.base + r,
+ req->uvbuf.len - r);
+ req->uvbuf.len = req->uvbuf.len - r;
+ } else if (r == UV_ENOSYS || r == UV_EAGAIN) {
+ /* uv_try_write is not supported, send
+ * asynchronously */
+ } else {
+ result = isc__nm_uverr2result(r);
+ isc__nm_uvreq_put(&req, sock);
+ free_senddata(sock, result);
+ break;
+ }
+
+ r = uv_write(&req->uv_req.write, &sock->uv_handle.stream,
+ &req->uvbuf, 1, tls_write_cb);
+ if (r < 0) {
+ result = isc__nm_uverr2result(r);
+ isc__nm_uvreq_put(&req, sock);
+ free_senddata(sock, result);
+ break;
+ }
+
+ isc_nm_timer_create(req->handle, isc__nmsocket_writetimeout_cb,
+ req, &req->timer);
+ if (sock->write_timeout > 0) {
+ isc_nm_timer_start(req->timer, sock->write_timeout);
+ }
+
+ break;
+ }
+
+ return (result);
+}
+
+static isc_result_t
+tls_pop_error(isc_nmsocket_t *sock) {
+ isc_result_t result;
+
+ if (sock->tls.state != TLS_STATE_ERROR) {
+ return (ISC_R_SUCCESS);
+ }
+
+ if (sock->tls.pending_error == ISC_R_SUCCESS) {
+ return (ISC_R_TLSERROR);
+ }
+
+ result = sock->tls.pending_error;
+ sock->tls.pending_error = ISC_R_SUCCESS;
+
+ return (result);
+}
+
+static isc_result_t
+tls_cycle(isc_nmsocket_t *sock) {
+ isc_result_t result;
+
+ /*
+ * Clear the TLS error queue so that SSL_get_error() and SSL I/O
+ * routine calls will not get affected by prior error statuses.
+ *
+ * See here:
+ * https://www.openssl.org/docs/man3.0/man3/SSL_get_error.html
+ *
+ * In particular, it mentions the following:
+ *
+ * The current thread's error queue must be empty before the
+ * TLS/SSL I/O operation is attempted, or SSL_get_error() will not
+ * work reliably.
+ *
+ * As we use the result of SSL_get_error() to decide on I/O
+ * operations, we need to ensure that it works reliably by
+ * cleaning the error queue.
+ *
+ * The sum of details: https://stackoverflow.com/a/37980911
+ */
+ ERR_clear_error();
+
+ if (isc__nmsocket_closing(sock)) {
+ return (ISC_R_CANCELED);
+ }
+
+ result = tls_pop_error(sock);
+ if (result != ISC_R_SUCCESS) {
+ goto done;
+ }
+
+ if (sock->tls.cycle) {
+ return (ISC_R_SUCCESS);
+ }
+
+ sock->tls.cycle = true;
+ result = tls_cycle_input(sock);
+ if (result != ISC_R_SUCCESS) {
+ goto done;
+ }
+
+ result = tls_cycle_output(sock);
+ if (result != ISC_R_SUCCESS) {
+ goto done;
+ }
+done:
+ sock->tls.cycle = false;
+
+ return (result);
+}
+
+static void
+async_tlsdns_cycle(isc_nmsocket_t *sock) {
+ isc__netievent_tlsdnscycle_t *ievent = NULL;
+
+ REQUIRE(VALID_NMSOCK(sock));
+
+ /* Socket was closed midflight by isc__nm_tlsdns_shutdown() */
+ if (isc__nmsocket_closing(sock)) {
+ return;
+ }
+
+ ievent = isc__nm_get_netievent_tlsdnscycle(sock->mgr, sock);
+ isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
+ (isc__netievent_t *)ievent);
+}
+
+void
+isc__nm_async_tlsdnscycle(isc__networker_t *worker, isc__netievent_t *ev0) {
+ isc__netievent_tlsdnscycle_t *ievent =
+ (isc__netievent_tlsdnscycle_t *)ev0;
+ isc_result_t result;
+ isc_nmsocket_t *sock;
+
+ UNUSED(worker);
+
+ REQUIRE(VALID_NMSOCK(ievent->sock));
+ REQUIRE(ievent->sock->tid == isc_nm_tid());
+
+ sock = ievent->sock;
+
+ result = tls_cycle(sock);
+ if (result != ISC_R_SUCCESS) {
+ tls_error(sock, result);
+ }
+}
+
+void
+isc__nm_tlsdns_read_cb(uv_stream_t *stream, ssize_t nread,
+ const uv_buf_t *buf) {
+ isc_nmsocket_t *sock = uv_handle_get_data((uv_handle_t *)stream);
+ size_t len;
+ isc_result_t result;
+ int rv;
+
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->tid == isc_nm_tid());
+ REQUIRE(atomic_load(&sock->reading));
+ REQUIRE(buf != NULL);
+
+ if (isc__nmsocket_closing(sock)) {
+ isc__nm_failed_read_cb(sock, ISC_R_CANCELED, true);
+ goto free;
+ }
+
+ if (nread < 0) {
+ if (nread != UV_EOF) {
+ isc__nm_incstats(sock, STATID_RECVFAIL);
+ }
+
+ isc__nm_failed_read_cb(sock, isc__nm_uverr2result(nread), true);
+
+ goto free;
+ }
+
+ if (!atomic_load(&sock->client)) {
+ sock->read_timeout = atomic_load(&sock->mgr->idle);
+ }
+
+ /*
+ * The input has to be fed into BIO
+ */
+ rv = BIO_write_ex(sock->tls.app_wbio, buf->base, (size_t)nread, &len);
+
+ if (rv <= 0 || (size_t)nread != len) {
+ isc__nm_failed_read_cb(sock, ISC_R_TLSERROR, true);
+ goto free;
+ }
+
+ result = tls_cycle(sock);
+ if (result != ISC_R_SUCCESS) {
+ isc__nm_failed_read_cb(sock, result, true);
+ }
+free:
+ async_tlsdns_cycle(sock);
+
+ if (nread < 0) {
+ /*
+ * The buffer may be a null buffer on error.
+ */
+ if (buf->base == NULL && buf->len == 0) {
+ return;
+ }
+ }
+
+ isc__nm_free_uvbuf(sock, buf);
+}
+
+static void
+quota_accept_cb(isc_quota_t *quota, void *sock0) {
+ isc_nmsocket_t *sock = (isc_nmsocket_t *)sock0;
+
+ REQUIRE(VALID_NMSOCK(sock));
+
+ /*
+ * Create a tlsdnsaccept event and pass it using the async
+ * channel.
+ */
+
+ isc__netievent_tlsdnsaccept_t *ievent =
+ isc__nm_get_netievent_tlsdnsaccept(sock->mgr, sock, quota);
+ isc__nm_maybe_enqueue_ievent(&sock->mgr->workers[sock->tid],
+ (isc__netievent_t *)ievent);
+}
+
+/*
+ * This is called after we get a quota_accept_cb() callback.
+ */
+void
+isc__nm_async_tlsdnsaccept(isc__networker_t *worker, isc__netievent_t *ev0) {
+ isc__netievent_tlsdnsaccept_t *ievent =
+ (isc__netievent_tlsdnsaccept_t *)ev0;
+ isc_result_t result;
+
+ UNUSED(worker);
+
+ REQUIRE(VALID_NMSOCK(ievent->sock));
+ REQUIRE(ievent->sock->tid == isc_nm_tid());
+
+ result = accept_connection(ievent->sock, ievent->quota);
+ isc__nm_accept_connection_log(result, can_log_tlsdns_quota());
+}
+
+static isc_result_t
+accept_connection(isc_nmsocket_t *ssock, isc_quota_t *quota) {
+ isc_nmsocket_t *csock = NULL;
+ isc__networker_t *worker = NULL;
+ int r;
+ isc_result_t result;
+ struct sockaddr_storage peer_ss;
+ struct sockaddr_storage local_ss;
+ isc_sockaddr_t local;
+
+ REQUIRE(VALID_NMSOCK(ssock));
+ REQUIRE(ssock->tid == isc_nm_tid());
+
+ if (isc__nmsocket_closing(ssock)) {
+ if (quota != NULL) {
+ isc_quota_detach(&quota);
+ }
+ return (ISC_R_CANCELED);
+ }
+
+ REQUIRE(ssock->accept_cb != NULL);
+
+ csock = isc_mem_get(ssock->mgr->mctx, sizeof(isc_nmsocket_t));
+ isc__nmsocket_init(csock, ssock->mgr, isc_nm_tlsdnssocket,
+ &ssock->iface);
+ csock->tid = ssock->tid;
+ csock->extrahandlesize = ssock->extrahandlesize;
+ isc__nmsocket_attach(ssock, &csock->server);
+ csock->accept_cb = ssock->accept_cb;
+ csock->accept_cbarg = ssock->accept_cbarg;
+ csock->recv_cb = ssock->recv_cb;
+ csock->recv_cbarg = ssock->recv_cbarg;
+ csock->quota = quota;
+ atomic_init(&csock->accepting, true);
+
+ worker = &csock->mgr->workers[csock->tid];
+
+ r = uv_tcp_init(&worker->loop, &csock->uv_handle.tcp);
+ UV_RUNTIME_CHECK(uv_tcp_init, r);
+ uv_handle_set_data(&csock->uv_handle.handle, csock);
+
+ r = uv_timer_init(&worker->loop, &csock->read_timer);
+ UV_RUNTIME_CHECK(uv_timer_init, r);
+ uv_handle_set_data((uv_handle_t *)&csock->read_timer, csock);
+
+ r = uv_accept(&ssock->uv_handle.stream, &csock->uv_handle.stream);
+ if (r != 0) {
+ result = isc__nm_uverr2result(r);
+ goto failure;
+ }
+
+ r = uv_tcp_getpeername(&csock->uv_handle.tcp,
+ (struct sockaddr *)&peer_ss,
+ &(int){ sizeof(peer_ss) });
+ if (r != 0) {
+ result = isc__nm_uverr2result(r);
+ goto failure;
+ }
+
+ result = isc_sockaddr_fromsockaddr(&csock->peer,
+ (struct sockaddr *)&peer_ss);
+ if (result != ISC_R_SUCCESS) {
+ goto failure;
+ }
+
+ r = uv_tcp_getsockname(&csock->uv_handle.tcp,
+ (struct sockaddr *)&local_ss,
+ &(int){ sizeof(local_ss) });
+ if (r != 0) {
+ result = isc__nm_uverr2result(r);
+ goto failure;
+ }
+
+ result = isc_sockaddr_fromsockaddr(&local,
+ (struct sockaddr *)&local_ss);
+ if (result != ISC_R_SUCCESS) {
+ goto failure;
+ }
+
+ csock->tls.state = TLS_STATE_NONE;
+
+ csock->tls.tls = isc_tls_create(ssock->tls.ctx);
+ RUNTIME_CHECK(csock->tls.tls != NULL);
+
+ r = BIO_new_bio_pair(&csock->tls.ssl_wbio, ISC_NETMGR_TCP_RECVBUF_SIZE,
+ &csock->tls.app_rbio, ISC_NETMGR_TCP_RECVBUF_SIZE);
+ RUNTIME_CHECK(r == 1);
+
+ r = BIO_new_bio_pair(&csock->tls.ssl_rbio, ISC_NETMGR_TCP_RECVBUF_SIZE,
+ &csock->tls.app_wbio, ISC_NETMGR_TCP_RECVBUF_SIZE);
+ RUNTIME_CHECK(r == 1);
+
+#if HAVE_SSL_SET0_RBIO && HAVE_SSL_SET0_WBIO
+ /*
+ * Note that if the rbio and wbio are the same then
+ * SSL_set0_rbio() and SSL_set0_wbio() each take ownership of
+ * one reference. Therefore it may be necessary to increment the
+ * number of references available using BIO_up_ref(3) before
+ * calling the set0 functions.
+ */
+ SSL_set0_rbio(csock->tls.tls, csock->tls.ssl_rbio);
+ SSL_set0_wbio(csock->tls.tls, csock->tls.ssl_wbio);
+#else
+ SSL_set_bio(csock->tls.tls, csock->tls.ssl_rbio, csock->tls.ssl_wbio);
+#endif
+
+ SSL_set_accept_state(csock->tls.tls);
+
+ /* FIXME: Set SSL_MODE_RELEASE_BUFFERS */
+
+ atomic_store(&csock->accepting, false);
+
+ isc__nm_incstats(csock, STATID_ACCEPT);
+
+ csock->read_timeout = atomic_load(&csock->mgr->init);
+
+ csock->closehandle_cb = isc__nm_resume_processing;
+
+ /*
+ * We need to keep the handle alive until we fail to read or
+ * connection is closed by the other side, it will be detached
+ * via prep_destroy()->tlsdns_close_direct().
+ *
+ * The handle will be either detached on acceptcb failure or in
+ * the readcb.
+ */
+ csock->recv_handle = isc__nmhandle_get(csock, NULL, &local);
+
+ /*
+ * The initial timer has been set, update the read timeout for
+ * the next reads.
+ */
+ csock->read_timeout = (atomic_load(&csock->keepalive)
+ ? atomic_load(&csock->mgr->keepalive)
+ : atomic_load(&csock->mgr->idle));
+
+ result = isc__nm_process_sock_buffer(csock);
+ if (result != ISC_R_SUCCESS) {
+ goto failure;
+ }
+
+ /*
+ * sock is now attached to the handle.
+ */
+ isc__nmsocket_detach(&csock);
+
+ return (ISC_R_SUCCESS);
+
+failure:
+ atomic_store(&csock->active, false);
+
+ isc__nm_failed_accept_cb(csock, result);
+
+ isc__nmsocket_prep_destroy(csock);
+
+ isc__nmsocket_detach(&csock);
+
+ return (result);
+}
+
+void
+isc__nm_tlsdns_send(isc_nmhandle_t *handle, isc_region_t *region,
+ isc_nm_cb_t cb, void *cbarg) {
+ isc__netievent_tlsdnssend_t *ievent = NULL;
+ isc__nm_uvreq_t *uvreq = NULL;
+ isc_nmsocket_t *sock = NULL;
+
+ REQUIRE(VALID_NMHANDLE(handle));
+
+ sock = handle->sock;
+
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->type == isc_nm_tlsdnssocket);
+
+ uvreq = isc__nm_uvreq_get(sock->mgr, sock);
+ *(uint16_t *)uvreq->tcplen = htons(region->length);
+ uvreq->uvbuf.base = (char *)region->base;
+ uvreq->uvbuf.len = region->length;
+
+ isc_nmhandle_attach(handle, &uvreq->handle);
+
+ uvreq->cb.send = cb;
+ uvreq->cbarg = cbarg;
+
+ ievent = isc__nm_get_netievent_tlsdnssend(sock->mgr, sock, uvreq);
+ isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
+ (isc__netievent_t *)ievent);
+ return;
+}
+
+/*
+ * Handle 'tcpsend' async event - send a packet on the socket
+ */
+void
+isc__nm_async_tlsdnssend(isc__networker_t *worker, isc__netievent_t *ev0) {
+ isc_result_t result;
+ isc__netievent_tlsdnssend_t *ievent =
+ (isc__netievent_tlsdnssend_t *)ev0;
+ isc_nmsocket_t *sock = ievent->sock;
+ isc__nm_uvreq_t *uvreq = ievent->req;
+
+ UNUSED(worker);
+
+ REQUIRE(sock->type == isc_nm_tlsdnssocket);
+ REQUIRE(sock->tid == isc_nm_tid());
+
+ if (sock->write_timeout == 0) {
+ sock->write_timeout =
+ (atomic_load(&sock->keepalive)
+ ? atomic_load(&sock->mgr->keepalive)
+ : atomic_load(&sock->mgr->idle));
+ }
+
+ result = tlsdns_send_direct(sock, uvreq);
+ if (result != ISC_R_SUCCESS) {
+ isc__nm_incstats(sock, STATID_SENDFAIL);
+ isc__nm_failed_send_cb(sock, uvreq, result);
+ }
+}
+
+static void
+tlsdns_send_enqueue(isc_nmsocket_t *sock, isc__nm_uvreq_t *req) {
+ isc__netievent_tlsdnssend_t *ievent =
+ isc__nm_get_netievent_tlsdnssend(sock->mgr, sock, req);
+ isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
+ (isc__netievent_t *)ievent);
+}
+
+static isc_result_t
+tlsdns_send_direct(isc_nmsocket_t *sock, isc__nm_uvreq_t *req) {
+ isc_result_t result;
+ int err = 0;
+ int rv;
+ size_t bytes = 0;
+ size_t sendlen;
+ isc__networker_t *worker = NULL;
+
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(VALID_UVREQ(req));
+ REQUIRE(sock->tid == isc_nm_tid());
+ REQUIRE(sock->type == isc_nm_tlsdnssocket);
+
+ result = tls_pop_error(sock);
+ if (result != ISC_R_SUCCESS) {
+ return (result);
+ }
+
+ if (isc__nmsocket_closing(sock)) {
+ return (ISC_R_CANCELED);
+ }
+
+ /* Writes won't succeed until handshake end */
+ if (!SSL_is_init_finished(sock->tls.tls)) {
+ goto requeue;
+ }
+
+ /*
+ * Try to send any pending data before trying to call SSL_write_ex().
+ * Otherwise, it could fail with SSL_ERROR_WANT_WRITE error.
+ *
+ * It is important to stress that we want to avoid this happening
+ * due to how SSL_write_ex() works - mainly to avoid partial
+ * writes.
+ *
+ * Although the documentation for these functions is vague, it is
+ * not stated that partial writes are not possible. On the
+ * contrary, one can deduce that it is possible and recovering
+ * from this situation is complicated and unreasonably hard to
+ * implement to the point when it is better to avoid this
+ * situation altogether.
+ *
+ * In particular, the following can be found in the documentation:
+ *
+ * "The write functions will only return with success when the
+ * complete contents of buf of length num has been written. This
+ * default behaviour can be changed with the
+ * SSL_MODE_ENABLE_PARTIAL_WRITE option of
+ * SSL_CTX_set_mode(3). When this flag is set, the write functions
+ * will also return with success when a partial write has been
+ * successfully completed. In this case, the write function
+ * operation is considered completed. The bytes are sent, and a
+ * new write call with a new buffer (with the already sent bytes
+ * removed) must be started. A partial write is performed with the
+ * size of a message block, which is 16kB."
+
+ * That is, it is said that success is returned only when the
+ * complete chunk of data is written (encrypted), but it does not
+ * mention that partial writes are not possible (the behaviour can
+ * be changed using SSL_MODE_ENABLE_PARTIAL_WRITE). Another
+ * important aspect of this passage is that a partial write of up
+ * to 16 kilobytes can happen, and the call still can
+ * fail. Needless to say, this amount of data may include more
+ * than one DNS message.
+ *
+ * One could expect that SSL_write_ex() should return the number
+ * of bytes written, but no, that is not guaranteed (emphasis is
+ * mine): "*On success* SSL_write_ex() will store the number of
+ * bytes written in *written."
+ *
+ * Moreover, we can find the following guidance on how to handle
+ * the SSL_ERROR_WANT_WRITE error in the "Warnings" section of the
+ * documentation:
+
+ * "When a write function call has to be repeated because
+ * SSL_get_error(3) returned SSL_ERROR_WANT_READ or
+ * SSL_ERROR_WANT_WRITE, it must be repeated with the same
+ * arguments. The data that was passed might have been partially
+ * processed. When SSL_MODE_ACCEPT_MOVING_WRITE_BUFFER was set
+ * using SSL_CTX_set_mode(3) the pointer can be different, but the
+ * data and length should still be the same."
+ *
+ * That is, when a call to SSL_write_ex() fails with
+ * SSL_ERROR_WANT_WRITE, we must attempt to make the next call to
+ * the function exactly with the same arguments. Of course, the
+ * code is structured in such a way that we cannot guarantee that
+ * (and keeping track of that would be unreasonably complicated to
+ * implement). The best we can do to avoid this error is to get
+ * (and send) the outgoing data from the SSL buffer ASAP before
+ * processing the subsequent write request. We can achieve that by
+ * calling tls_cycle() and rescheduling the write request for
+ * being processed later.
+ */
+ if (BIO_pending(sock->tls.app_rbio) > 0) {
+ /* Handle any pending data and requeue the write request. */
+ goto cycle;
+ }
+
+ /*
+ * There's no SSL_writev(), so we need to use a local buffer to
+ * assemble the whole message
+ */
+ worker = &sock->mgr->workers[sock->tid];
+ sendlen = req->uvbuf.len + sizeof(uint16_t);
+ memmove(worker->sendbuf, req->tcplen, sizeof(uint16_t));
+ memmove(worker->sendbuf + sizeof(uint16_t), req->uvbuf.base,
+ req->uvbuf.len);
+
+ rv = SSL_write_ex(sock->tls.tls, worker->sendbuf, sendlen, &bytes);
+ if (rv > 0) {
+ INSIST(sendlen == bytes);
+
+ ISC_LIST_APPEND(sock->tls.sendreqs, req, link);
+ async_tlsdns_cycle(sock);
+ return (ISC_R_SUCCESS);
+ }
+
+ /* Nothing was written, maybe enqueue? */
+ err = SSL_get_error(sock->tls.tls, rv);
+
+ switch (err) {
+ case SSL_ERROR_WANT_WRITE:
+ case SSL_ERROR_WANT_READ:
+ break;
+ case 0:
+ UNREACHABLE();
+ default:
+ return (ISC_R_TLSERROR);
+ }
+
+cycle:
+ result = tls_cycle(sock);
+ if (result != ISC_R_SUCCESS) {
+ return (result);
+ }
+
+requeue:
+ tlsdns_send_enqueue(sock, req);
+
+ return (result);
+}
+
+static void
+tlsdns_stop_cb(uv_handle_t *handle) {
+ isc_nmsocket_t *sock = uv_handle_get_data(handle);
+
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->tid == isc_nm_tid());
+ REQUIRE(atomic_load(&sock->closing));
+
+ uv_handle_set_data(handle, NULL);
+
+ if (!atomic_compare_exchange_strong(&sock->closed, &(bool){ false },
+ true))
+ {
+ UNREACHABLE();
+ }
+
+ isc__nm_incstats(sock, STATID_CLOSE);
+
+ atomic_store(&sock->listening, false);
+
+ BIO_free_all(sock->tls.app_rbio);
+ BIO_free_all(sock->tls.app_wbio);
+
+ if (sock->tls.ctx != NULL) {
+ isc_tlsctx_free(&sock->tls.ctx);
+ }
+
+ isc__nmsocket_detach(&sock);
+}
+
+static void
+tlsdns_close_sock(isc_nmsocket_t *sock) {
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->tid == isc_nm_tid());
+ REQUIRE(atomic_load(&sock->closing));
+
+ if (!atomic_compare_exchange_strong(&sock->closed, &(bool){ false },
+ true))
+ {
+ UNREACHABLE();
+ }
+
+ isc__nm_incstats(sock, STATID_CLOSE);
+
+ if (sock->server != NULL) {
+ isc__nmsocket_detach(&sock->server);
+ }
+
+ atomic_store(&sock->connected, false);
+
+ if (sock->tls.tls != NULL) {
+ /*
+ * Let's shutdown the TLS session properly so that the session
+ * will remain resumable, if required.
+ */
+ tlsdns_set_tls_shutdown(sock->tls.tls);
+ tlsdns_keep_client_tls_session(sock);
+ isc_tls_free(&sock->tls.tls);
+ }
+
+ BIO_free_all(sock->tls.app_rbio);
+ BIO_free_all(sock->tls.app_wbio);
+
+ if (sock->tls.ctx != NULL) {
+ isc_tlsctx_free(&sock->tls.ctx);
+ }
+
+ isc__nmsocket_prep_destroy(sock);
+}
+
+static void
+tlsdns_close_cb(uv_handle_t *handle) {
+ isc_nmsocket_t *sock = uv_handle_get_data(handle);
+ uv_handle_set_data(handle, NULL);
+
+ tlsdns_close_sock(sock);
+}
+
+static void
+read_timer_close_cb(uv_handle_t *handle) {
+ isc_nmsocket_t *sock = uv_handle_get_data(handle);
+ uv_handle_set_data(handle, NULL);
+
+ REQUIRE(VALID_NMSOCK(sock));
+
+ if (sock->parent) {
+ uv_close(&sock->uv_handle.handle, tlsdns_stop_cb);
+ } else if (uv_is_closing(&sock->uv_handle.handle)) {
+ tlsdns_close_sock(sock);
+ } else {
+ uv_close(&sock->uv_handle.handle, tlsdns_close_cb);
+ }
+}
+
+static void
+stop_tlsdns_child(isc_nmsocket_t *sock) {
+ REQUIRE(sock->type == isc_nm_tlsdnssocket);
+ REQUIRE(sock->tid == isc_nm_tid());
+
+ if (!atomic_compare_exchange_strong(&sock->closing, &(bool){ false },
+ true))
+ {
+ return;
+ }
+
+ tlsdns_close_direct(sock);
+
+ atomic_fetch_sub(&sock->parent->rchildren, 1);
+
+ isc_barrier_wait(&sock->parent->stoplistening);
+}
+
+static void
+stop_tlsdns_parent(isc_nmsocket_t *sock) {
+ isc_nmsocket_t *csock = NULL;
+
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->tid == isc_nm_tid());
+ REQUIRE(sock->type == isc_nm_tlsdnslistener);
+
+ isc_barrier_init(&sock->stoplistening, sock->nchildren);
+
+ for (size_t i = 0; i < sock->nchildren; i++) {
+ csock = &sock->children[i];
+
+ REQUIRE(VALID_NMSOCK(csock));
+
+ if ((int)i == isc_nm_tid()) {
+ /*
+ * We need to schedule closing the other sockets first
+ */
+ continue;
+ }
+
+ atomic_store(&csock->active, false);
+ enqueue_stoplistening(csock);
+ }
+
+ csock = &sock->children[isc_nm_tid()];
+ atomic_store(&csock->active, false);
+ stop_tlsdns_child(csock);
+
+ atomic_store(&sock->closed, true);
+ isc__nmsocket_prep_destroy(sock);
+}
+
+static void
+tlsdns_close_direct(isc_nmsocket_t *sock) {
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->tid == isc_nm_tid());
+ REQUIRE(atomic_load(&sock->closing));
+
+ REQUIRE(sock->tls.pending_req == NULL);
+
+ if (sock->quota != NULL) {
+ isc_quota_detach(&sock->quota);
+ }
+
+ if (sock->recv_handle != NULL) {
+ isc_nmhandle_detach(&sock->recv_handle);
+ }
+
+ isc__nmsocket_timer_stop(sock);
+ isc__nm_stop_reading(sock);
+
+ uv_handle_set_data((uv_handle_t *)&sock->read_timer, sock);
+ uv_close((uv_handle_t *)&sock->read_timer, read_timer_close_cb);
+}
+
+void
+isc__nm_tlsdns_close(isc_nmsocket_t *sock) {
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->type == isc_nm_tlsdnssocket);
+ REQUIRE(!isc__nmsocket_active(sock));
+
+ if (!atomic_compare_exchange_strong(&sock->closing, &(bool){ false },
+ true))
+ {
+ return;
+ }
+
+ if (sock->tid == isc_nm_tid()) {
+ tlsdns_close_direct(sock);
+ } else {
+ /*
+ * We need to create an event and pass it using async
+ * channel
+ */
+ isc__netievent_tlsdnsclose_t *ievent =
+ isc__nm_get_netievent_tlsdnsclose(sock->mgr, sock);
+
+ isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
+ (isc__netievent_t *)ievent);
+ }
+}
+
+void
+isc__nm_async_tlsdnsclose(isc__networker_t *worker, isc__netievent_t *ev0) {
+ isc__netievent_tlsdnsclose_t *ievent =
+ (isc__netievent_tlsdnsclose_t *)ev0;
+ isc_nmsocket_t *sock = ievent->sock;
+
+ UNUSED(worker);
+
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->tid == isc_nm_tid());
+
+ tlsdns_close_direct(sock);
+}
+
+static void
+tlsdns_close_connect_cb(uv_handle_t *handle) {
+ isc_nmsocket_t *sock = uv_handle_get_data(handle);
+
+ REQUIRE(VALID_NMSOCK(sock));
+
+ REQUIRE(isc__nm_in_netthread());
+ REQUIRE(sock->tid == isc_nm_tid());
+
+ isc__nmsocket_prep_destroy(sock);
+ isc__nmsocket_detach(&sock);
+}
+
+void
+isc__nm_tlsdns_shutdown(isc_nmsocket_t *sock) {
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->tid == isc_nm_tid());
+ REQUIRE(sock->type == isc_nm_tlsdnssocket);
+
+ /*
+ * If the socket is active, mark it inactive and
+ * continue. If it isn't active, stop now.
+ */
+ if (!isc__nmsocket_deactivate(sock)) {
+ return;
+ }
+
+ if (sock->tls.tls) {
+ /* Shutdown any active TLS connections */
+ tlsdns_set_tls_shutdown(sock->tls.tls);
+ }
+
+ if (atomic_load(&sock->accepting)) {
+ return;
+ }
+
+ /* TLS handshake hasn't been completed yet */
+ if (atomic_load(&sock->connecting)) {
+ isc_nmsocket_t *tsock = NULL;
+
+ /*
+ * TCP connection has been established, now waiting on
+ * TLS handshake to complete
+ */
+ if (sock->tls.pending_req != NULL) {
+ isc_result_t result = ISC_R_CANCELED;
+ isc__nm_uvreq_t *req = sock->tls.pending_req;
+ sock->tls.pending_req = NULL;
+
+ if (peer_verification_has_failed(sock)) {
+ /*
+ * Save error message as 'sock->tls' will get
+ * detached.
+ */
+ sock->tls.tls_verify_errmsg =
+ isc_tls_verify_peer_result_string(
+ sock->tls.tls);
+ result = ISC_R_TLSBADPEERCERT;
+ }
+ isc__nm_failed_connect_cb(sock, req, result, false);
+ return;
+ }
+
+ /* The TCP connection hasn't been established yet */
+ isc__nmsocket_attach(sock, &tsock);
+ uv_close(&sock->uv_handle.handle, tlsdns_close_connect_cb);
+ return;
+ }
+
+ if (sock->statichandle != NULL) {
+ if (isc__nm_closing(sock)) {
+ isc__nm_failed_read_cb(sock, ISC_R_SHUTTINGDOWN, false);
+ } else {
+ isc__nm_failed_read_cb(sock, ISC_R_CANCELED, false);
+ }
+ return;
+ }
+
+ /*
+ * Otherwise, we just send the socket to abyss...
+ */
+ if (sock->parent == NULL) {
+ isc__nmsocket_prep_destroy(sock);
+ }
+}
+
+void
+isc__nm_tlsdns_cancelread(isc_nmhandle_t *handle) {
+ isc_nmsocket_t *sock = NULL;
+ isc__netievent_tlsdnscancel_t *ievent = NULL;
+
+ REQUIRE(VALID_NMHANDLE(handle));
+
+ sock = handle->sock;
+
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->type == isc_nm_tlsdnssocket);
+
+ ievent = isc__nm_get_netievent_tlsdnscancel(sock->mgr, sock, handle);
+ isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
+ (isc__netievent_t *)ievent);
+}
+
+void
+isc__nm_async_tlsdnscancel(isc__networker_t *worker, isc__netievent_t *ev0) {
+ isc__netievent_tlsdnscancel_t *ievent =
+ (isc__netievent_tlsdnscancel_t *)ev0;
+ isc_nmsocket_t *sock = ievent->sock;
+
+ UNUSED(worker);
+
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->tid == isc_nm_tid());
+
+ isc__nm_failed_read_cb(sock, ISC_R_EOF, false);
+}
+
+/* Zone transfers/updates over TLS are allowed only when "dot" ALPN
+ * was negotiated.
+ *
+ * Per the XoT spec, we must also check that the TLS version is >=
+ * 1.3. The check could be added here. However, we still need to
+ * support platforms where no cryptographic library with TLSv1.3
+ * support is available. As a result of this we cannot be too strict
+ * regarding the minimal TLS protocol version in order to make it
+ * possible to do encrypted zone transfers over TLSv1.2, as it would
+ * not be right to leave users on these platforms without means for
+ * encrypted zone transfers using BIND only.
+ *
+ * The ones requiring strict compatibility with the specification
+ * could disable TLSv1.2 in the configuration file.
+ */
+isc_result_t
+isc__nm_tlsdns_xfr_checkperm(isc_nmsocket_t *sock) {
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->type == isc_nm_tlsdnssocket);
+
+ if (!sock->tls.alpn_negotiated) {
+ return (ISC_R_DOTALPNERROR);
+ }
+
+ return (ISC_R_SUCCESS);
+}
+
+const char *
+isc__nm_tlsdns_verify_tls_peer_result_string(const isc_nmhandle_t *handle) {
+ isc_nmsocket_t *sock = NULL;
+
+ REQUIRE(VALID_NMHANDLE(handle));
+ REQUIRE(VALID_NMSOCK(handle->sock));
+ REQUIRE(handle->sock->type == isc_nm_tlsdnssocket);
+
+ sock = handle->sock;
+ if (sock->tls.tls == NULL) {
+ return (sock->tls.tls_verify_errmsg);
+ }
+
+ return (isc_tls_verify_peer_result_string(sock->tls.tls));
+}
+
+void
+isc__nm_async_tlsdns_set_tlsctx(isc_nmsocket_t *listener, isc_tlsctx_t *tlsctx,
+ const int tid) {
+ REQUIRE(tid >= 0);
+
+ isc_tlsctx_free(&listener->children[tid].tls.ctx);
+ isc_tlsctx_attach(tlsctx, &listener->children[tid].tls.ctx);
+}
+
+void
+isc__nm_tlsdns_cleanup_data(isc_nmsocket_t *sock) {
+ if (sock->type == isc_nm_tlsdnslistener ||
+ sock->type == isc_nm_tlsdnssocket)
+ {
+ if (sock->tls.client_sess_cache != NULL) {
+ INSIST(atomic_load(&sock->client));
+ INSIST(sock->type == isc_nm_tlsdnssocket);
+ isc_tlsctx_client_session_cache_detach(
+ &sock->tls.client_sess_cache);
+ }
+ if (sock->tls.ctx != NULL) {
+ INSIST(ISC_LIST_EMPTY(sock->tls.sendreqs));
+ isc_tlsctx_free(&sock->tls.ctx);
+ }
+ }
+}
+
+static void
+tlsdns_keep_client_tls_session(isc_nmsocket_t *sock) {
+ /*
+ * Ensure that the isc_tls_t is being accessed from
+ * within the worker thread the socket is bound to.
+ */
+ REQUIRE(sock->tid == isc_nm_tid());
+ if (sock->tls.client_sess_cache != NULL &&
+ sock->tls.client_session_saved == false)
+ {
+ INSIST(atomic_load(&sock->client));
+ isc_tlsctx_client_session_cache_keep_sockaddr(
+ sock->tls.client_sess_cache, &sock->peer,
+ sock->tls.tls);
+ sock->tls.client_session_saved = true;
+ }
+}
diff --git a/lib/isc/netmgr/tlsstream.c b/lib/isc/netmgr/tlsstream.c
new file mode 100644
index 0000000..7b49071
--- /dev/null
+++ b/lib/isc/netmgr/tlsstream.c
@@ -0,0 +1,1348 @@
+/*
+ * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
+ *
+ * SPDX-License-Identifier: MPL-2.0
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, you can obtain one at https://mozilla.org/MPL/2.0/.
+ *
+ * See the COPYRIGHT file distributed with this work for additional
+ * information regarding copyright ownership.
+ */
+
+#include <errno.h>
+#include <libgen.h>
+#include <unistd.h>
+#include <uv.h>
+
+#include <openssl/err.h>
+#include <openssl/ssl.h>
+
+#include <isc/atomic.h>
+#include <isc/buffer.h>
+#include <isc/condition.h>
+#include <isc/log.h>
+#include <isc/magic.h>
+#include <isc/mem.h>
+#include <isc/netmgr.h>
+#include <isc/once.h>
+#include <isc/quota.h>
+#include <isc/random.h>
+#include <isc/refcount.h>
+#include <isc/region.h>
+#include <isc/result.h>
+#include <isc/sockaddr.h>
+#include <isc/stdtime.h>
+#include <isc/thread.h>
+#include <isc/util.h>
+
+#include "../openssl_shim.h"
+#include "netmgr-int.h"
+#include "uv-compat.h"
+
+#define TLS_BUF_SIZE (UINT16_MAX)
+
+static isc_result_t
+tls_error_to_result(const int tls_err, const int tls_state, isc_tls_t *tls) {
+ switch (tls_err) {
+ case SSL_ERROR_ZERO_RETURN:
+ return (ISC_R_EOF);
+ case SSL_ERROR_SSL:
+ if (tls != NULL && tls_state < TLS_IO &&
+ SSL_get_verify_result(tls) != X509_V_OK)
+ {
+ return (ISC_R_TLSBADPEERCERT);
+ }
+ return (ISC_R_TLSERROR);
+ default:
+ return (ISC_R_UNEXPECTED);
+ }
+}
+
+static void
+tls_failed_read_cb(isc_nmsocket_t *sock, const isc_result_t result);
+
+static void
+tls_do_bio(isc_nmsocket_t *sock, isc_region_t *received_data,
+ isc__nm_uvreq_t *send_data, bool finish);
+
+static void
+tls_readcb(isc_nmhandle_t *handle, isc_result_t result, isc_region_t *region,
+ void *cbarg);
+
+static void
+tls_close_direct(isc_nmsocket_t *sock);
+
+static void
+async_tls_do_bio(isc_nmsocket_t *sock);
+
+static void
+tls_init_listener_tlsctx(isc_nmsocket_t *listener, isc_tlsctx_t *ctx);
+
+static void
+tls_cleanup_listener_tlsctx(isc_nmsocket_t *listener);
+
+static isc_tlsctx_t *
+tls_get_listener_tlsctx(isc_nmsocket_t *listener, const int tid);
+
+static void
+tls_keep_client_tls_session(isc_nmsocket_t *sock);
+
+static void
+tls_try_shutdown(isc_tls_t *tls, const bool quite);
+
+/*
+ * The socket is closing, outerhandle has been detached, listener is
+ * inactive, or the netmgr is closing: any operation on it should abort
+ * with ISC_R_CANCELED.
+ */
+static bool
+inactive(isc_nmsocket_t *sock) {
+ return (!isc__nmsocket_active(sock) || atomic_load(&sock->closing) ||
+ sock->outerhandle == NULL ||
+ !isc__nmsocket_active(sock->outerhandle->sock) ||
+ atomic_load(&sock->outerhandle->sock->closing) ||
+ (sock->listener != NULL &&
+ !isc__nmsocket_active(sock->listener)) ||
+ isc__nm_closing(sock));
+}
+
+static void
+tls_call_connect_cb(isc_nmsocket_t *sock, isc_nmhandle_t *handle,
+ const isc_result_t result) {
+ if (sock->connect_cb == NULL) {
+ return;
+ }
+ sock->connect_cb(handle, result, sock->connect_cbarg);
+ if (result != ISC_R_SUCCESS) {
+ isc__nmsocket_clearcb(handle->sock);
+ }
+}
+
+static void
+tls_senddone(isc_nmhandle_t *handle, isc_result_t eresult, void *cbarg) {
+ isc_nmsocket_tls_send_req_t *send_req =
+ (isc_nmsocket_tls_send_req_t *)cbarg;
+ isc_nmsocket_t *tlssock = NULL;
+ bool finish = send_req->finish;
+
+ REQUIRE(VALID_NMHANDLE(handle));
+ REQUIRE(VALID_NMSOCK(handle->sock));
+ REQUIRE(VALID_NMSOCK(send_req->tlssock));
+
+ tlssock = send_req->tlssock;
+ send_req->tlssock = NULL;
+
+ if (finish) {
+ tls_try_shutdown(tlssock->tlsstream.tls, true);
+ }
+
+ if (send_req->cb != NULL) {
+ INSIST(VALID_NMHANDLE(tlssock->statichandle));
+ send_req->cb(send_req->handle, eresult, send_req->cbarg);
+ isc_nmhandle_detach(&send_req->handle);
+ /* The last handle has been just detached: close the underlying
+ * socket. */
+ if (tlssock->statichandle == NULL) {
+ finish = true;
+ }
+ }
+
+ /* We are tying to avoid a memory allocation for small write
+ * requests. See the mirroring code in the tls_send_outgoing()
+ * function. */
+ if (send_req->data.length > sizeof(send_req->smallbuf)) {
+ isc_mem_put(handle->sock->mgr->mctx, send_req->data.base,
+ send_req->data.length);
+ } else {
+ INSIST(&send_req->smallbuf[0] == send_req->data.base);
+ }
+ isc_mem_put(handle->sock->mgr->mctx, send_req, sizeof(*send_req));
+ tlssock->tlsstream.nsending--;
+
+ if (finish && eresult == ISC_R_SUCCESS) {
+ tlssock->tlsstream.reading = false;
+ isc_nm_cancelread(handle);
+ } else if (eresult == ISC_R_SUCCESS) {
+ tls_do_bio(tlssock, NULL, NULL, false);
+ } else if (eresult != ISC_R_SUCCESS &&
+ tlssock->tlsstream.state <= TLS_HANDSHAKE &&
+ !tlssock->tlsstream.server)
+ {
+ /*
+ * We are still waiting for the handshake to complete, but
+ * it isn't going to happen. Call the connect callback,
+ * passing the error code there.
+ *
+ * (Note: tls_failed_read_cb() calls the connect
+ * rather than the read callback in this case.
+ * XXX: clarify?)
+ */
+ tls_failed_read_cb(tlssock, eresult);
+ }
+
+ isc__nmsocket_detach(&tlssock);
+}
+
+static void
+tls_failed_read_cb(isc_nmsocket_t *sock, const isc_result_t result) {
+ bool destroy = true;
+
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(result != ISC_R_SUCCESS);
+
+ if (!sock->tlsstream.server &&
+ (sock->tlsstream.state == TLS_INIT ||
+ sock->tlsstream.state == TLS_HANDSHAKE) &&
+ sock->connect_cb != NULL)
+ {
+ isc_nmhandle_t *handle = NULL;
+ INSIST(sock->statichandle == NULL);
+ handle = isc__nmhandle_get(sock, &sock->peer, &sock->iface);
+ tls_call_connect_cb(sock, handle, result);
+ isc__nmsocket_clearcb(sock);
+ isc_nmhandle_detach(&handle);
+ } else if (sock->recv_cb != NULL && sock->statichandle != NULL &&
+ (sock->recv_read || result == ISC_R_TIMEDOUT))
+ {
+ isc__nm_uvreq_t *req = NULL;
+ INSIST(VALID_NMHANDLE(sock->statichandle));
+ sock->recv_read = false;
+ req = isc__nm_uvreq_get(sock->mgr, sock);
+ req->cb.recv = sock->recv_cb;
+ req->cbarg = sock->recv_cbarg;
+ isc_nmhandle_attach(sock->statichandle, &req->handle);
+ if (result != ISC_R_TIMEDOUT) {
+ isc__nmsocket_clearcb(sock);
+ }
+ isc__nm_readcb(sock, req, result);
+ if (result == ISC_R_TIMEDOUT &&
+ (sock->outerhandle == NULL ||
+ isc__nmsocket_timer_running(sock->outerhandle->sock)))
+ {
+ destroy = false;
+ }
+ }
+
+ if (destroy) {
+ isc__nmsocket_prep_destroy(sock);
+ }
+}
+
+static void
+async_tls_do_bio(isc_nmsocket_t *sock) {
+ isc__netievent_tlsdobio_t *ievent =
+ isc__nm_get_netievent_tlsdobio(sock->mgr, sock);
+ isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
+ (isc__netievent_t *)ievent);
+}
+
+static int
+tls_send_outgoing(isc_nmsocket_t *sock, bool finish, isc_nmhandle_t *tlshandle,
+ isc_nm_cb_t cb, void *cbarg) {
+ isc_nmsocket_tls_send_req_t *send_req = NULL;
+ int pending;
+ int rv;
+ size_t len = 0;
+
+ if (inactive(sock)) {
+ if (cb != NULL) {
+ INSIST(VALID_NMHANDLE(tlshandle));
+ cb(tlshandle, ISC_R_CANCELED, cbarg);
+ }
+ return (0);
+ }
+
+ if (finish) {
+ tls_try_shutdown(sock->tlsstream.tls, false);
+ tls_keep_client_tls_session(sock);
+ }
+
+ pending = BIO_pending(sock->tlsstream.bio_out);
+ if (pending <= 0) {
+ return (pending);
+ }
+
+ /* TODO Should we keep track of these requests in a list? */
+ if ((unsigned int)pending > TLS_BUF_SIZE) {
+ pending = TLS_BUF_SIZE;
+ }
+
+ send_req = isc_mem_get(sock->mgr->mctx, sizeof(*send_req));
+ *send_req = (isc_nmsocket_tls_send_req_t){ .finish = finish,
+ .data.length = pending };
+
+ /* Let's try to avoid a memory allocation for small write requests */
+ if ((size_t)pending > sizeof(send_req->smallbuf)) {
+ send_req->data.base = isc_mem_get(sock->mgr->mctx, pending);
+ } else {
+ send_req->data.base = &send_req->smallbuf[0];
+ }
+
+ isc__nmsocket_attach(sock, &send_req->tlssock);
+ if (cb != NULL) {
+ send_req->cb = cb;
+ send_req->cbarg = cbarg;
+ isc_nmhandle_attach(tlshandle, &send_req->handle);
+ }
+
+ rv = BIO_read_ex(sock->tlsstream.bio_out, send_req->data.base, pending,
+ &len);
+ /* There's something pending, read must succeed */
+ RUNTIME_CHECK(rv == 1);
+
+ INSIST(VALID_NMHANDLE(sock->outerhandle));
+
+ sock->tlsstream.nsending++;
+ isc_nm_send(sock->outerhandle, &send_req->data, tls_senddone, send_req);
+
+ return (pending);
+}
+
+static int
+tls_process_outgoing(isc_nmsocket_t *sock, bool finish,
+ isc__nm_uvreq_t *send_data) {
+ int pending;
+
+ bool received_shutdown = ((SSL_get_shutdown(sock->tlsstream.tls) &
+ SSL_RECEIVED_SHUTDOWN) != 0);
+ bool sent_shutdown = ((SSL_get_shutdown(sock->tlsstream.tls) &
+ SSL_SENT_SHUTDOWN) != 0);
+
+ if (received_shutdown && !sent_shutdown) {
+ finish = true;
+ }
+
+ /* Data from TLS to network */
+ if (send_data != NULL) {
+ pending = tls_send_outgoing(sock, finish, send_data->handle,
+ send_data->cb.send,
+ send_data->cbarg);
+ } else {
+ pending = tls_send_outgoing(sock, finish, NULL, NULL, NULL);
+ }
+
+ return (pending);
+}
+
+static int
+tls_try_handshake(isc_nmsocket_t *sock, isc_result_t *presult) {
+ int rv = 0;
+ isc_nmhandle_t *tlshandle = NULL;
+
+ REQUIRE(sock->tlsstream.state == TLS_HANDSHAKE);
+
+ if (SSL_is_init_finished(sock->tlsstream.tls) == 1) {
+ return (0);
+ }
+
+ rv = SSL_do_handshake(sock->tlsstream.tls);
+ if (rv == 1) {
+ isc_result_t result = ISC_R_SUCCESS;
+ INSIST(SSL_is_init_finished(sock->tlsstream.tls) == 1);
+ INSIST(sock->statichandle == NULL);
+ isc__nmsocket_log_tls_session_reuse(sock, sock->tlsstream.tls);
+ tlshandle = isc__nmhandle_get(sock, &sock->peer, &sock->iface);
+
+ if (isc__nm_closing(sock)) {
+ result = ISC_R_SHUTTINGDOWN;
+ }
+
+ if (sock->tlsstream.server) {
+ if (isc__nmsocket_closing(sock->listener)) {
+ result = ISC_R_CANCELED;
+ } else if (result == ISC_R_SUCCESS) {
+ result = sock->listener->accept_cb(
+ tlshandle, result,
+ sock->listener->accept_cbarg);
+ }
+ } else {
+ tls_call_connect_cb(sock, tlshandle, result);
+ }
+ isc_nmhandle_detach(&tlshandle);
+ sock->tlsstream.state = TLS_IO;
+
+ if (presult != NULL) {
+ *presult = result;
+ }
+ }
+
+ return (rv);
+}
+
+static bool
+tls_try_to_close_unused_socket(isc_nmsocket_t *sock) {
+ if (sock->tlsstream.state > TLS_HANDSHAKE &&
+ sock->statichandle == NULL && sock->tlsstream.nsending == 0)
+ {
+ /*
+ * It seems that no action on the socket has been
+ * scheduled on some point after the handshake, let's
+ * close the connection.
+ */
+ isc__nmsocket_prep_destroy(sock);
+ return (true);
+ }
+
+ return (false);
+}
+
+static void
+tls_do_bio(isc_nmsocket_t *sock, isc_region_t *received_data,
+ isc__nm_uvreq_t *send_data, bool finish) {
+ isc_result_t result = ISC_R_SUCCESS;
+ int pending, tls_status = SSL_ERROR_NONE;
+ int rv = 0;
+ size_t len = 0;
+ int saved_errno = 0;
+
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->tid == isc_nm_tid());
+
+ /* We will resume read if TLS layer wants us to */
+ if (sock->tlsstream.reading && sock->outerhandle) {
+ REQUIRE(VALID_NMHANDLE(sock->outerhandle));
+ isc_nm_pauseread(sock->outerhandle);
+ }
+
+ /*
+ * Clear the TLS error queue so that SSL_get_error() and SSL I/O
+ * routine calls will not get affected by prior error statuses.
+ *
+ * See here:
+ * https://www.openssl.org/docs/man3.0/man3/SSL_get_error.html
+ *
+ * In particular, it mentions the following:
+ *
+ * The current thread's error queue must be empty before the
+ * TLS/SSL I/O operation is attempted, or SSL_get_error() will not
+ * work reliably.
+ *
+ * As we use the result of SSL_get_error() to decide on I/O
+ * operations, we need to ensure that it works reliably by
+ * cleaning the error queue.
+ *
+ * The sum of details: https://stackoverflow.com/a/37980911
+ */
+ ERR_clear_error();
+
+ if (sock->tlsstream.state == TLS_INIT) {
+ INSIST(received_data == NULL && send_data == NULL);
+ if (sock->tlsstream.server) {
+ SSL_set_accept_state(sock->tlsstream.tls);
+ } else {
+ SSL_set_connect_state(sock->tlsstream.tls);
+ }
+ sock->tlsstream.state = TLS_HANDSHAKE;
+ rv = tls_try_handshake(sock, NULL);
+ INSIST(SSL_is_init_finished(sock->tlsstream.tls) == 0);
+ } else if (sock->tlsstream.state == TLS_CLOSED) {
+ return;
+ } else { /* initialised and doing I/O */
+ if (received_data != NULL) {
+ INSIST(send_data == NULL);
+ rv = BIO_write_ex(sock->tlsstream.bio_in,
+ received_data->base,
+ received_data->length, &len);
+ if (rv <= 0 || len != received_data->length) {
+ result = ISC_R_TLSERROR;
+#if defined(NETMGR_TRACE) && defined(NETMGR_TRACE_VERBOSE)
+ saved_errno = errno;
+#endif
+ goto error;
+ }
+
+ /*
+ * Only after doing the IO we can check whether SSL
+ * handshake is done.
+ */
+ if (sock->tlsstream.state == TLS_HANDSHAKE) {
+ isc_result_t hs_result = ISC_R_UNSET;
+ rv = tls_try_handshake(sock, &hs_result);
+ if (sock->tlsstream.state == TLS_IO &&
+ hs_result != ISC_R_SUCCESS)
+ {
+ /*
+ * The accept callback has been called
+ * unsuccessfully. Let's try to shut
+ * down the TLS connection gracefully.
+ */
+ INSIST(SSL_is_init_finished(
+ sock->tlsstream.tls) ==
+ 1);
+ finish = true;
+ }
+ }
+ } else if (send_data != NULL) {
+ INSIST(received_data == NULL);
+ INSIST(sock->tlsstream.state > TLS_HANDSHAKE);
+ bool received_shutdown =
+ ((SSL_get_shutdown(sock->tlsstream.tls) &
+ SSL_RECEIVED_SHUTDOWN) != 0);
+ bool sent_shutdown =
+ ((SSL_get_shutdown(sock->tlsstream.tls) &
+ SSL_SENT_SHUTDOWN) != 0);
+ rv = SSL_write_ex(sock->tlsstream.tls,
+ send_data->uvbuf.base,
+ send_data->uvbuf.len, &len);
+ if (rv != 1 || len != send_data->uvbuf.len) {
+ result = received_shutdown || sent_shutdown
+ ? ISC_R_CANCELED
+ : ISC_R_TLSERROR;
+ send_data->cb.send(send_data->handle, result,
+ send_data->cbarg);
+ send_data = NULL;
+ return;
+ }
+ }
+
+ /* Decrypt and pass data from network to client */
+ if (sock->tlsstream.state >= TLS_IO && sock->recv_cb != NULL &&
+ !atomic_load(&sock->readpaused) &&
+ sock->statichandle != NULL && !finish)
+ {
+ uint8_t recv_buf[TLS_BUF_SIZE];
+ INSIST(sock->tlsstream.state > TLS_HANDSHAKE);
+ while ((rv = SSL_read_ex(sock->tlsstream.tls, recv_buf,
+ TLS_BUF_SIZE, &len)) == 1)
+ {
+ isc_region_t region;
+ region = (isc_region_t){ .base = &recv_buf[0],
+ .length = len };
+
+ INSIST(VALID_NMHANDLE(sock->statichandle));
+ sock->recv_cb(sock->statichandle, ISC_R_SUCCESS,
+ &region, sock->recv_cbarg);
+ /* The handle could have been detached in
+ * sock->recv_cb, making the sock->statichandle
+ * nullified (it happens in netmgr.c). If it is
+ * the case, then it means that we are not
+ * interested in keeping the connection alive
+ * anymore. Let's shut down the SSL session,
+ * send what we have in the SSL buffers,
+ * and close the connection.
+ */
+ if (sock->statichandle == NULL) {
+ finish = true;
+ break;
+ } else if (sock->recv_cb == NULL) {
+ /*
+ * The 'sock->recv_cb' might have been
+ * nullified during the call to
+ * 'sock->recv_cb'. That could happen,
+ * indirectly when wrapping up.
+ *
+ * In this case, let's close the TLS
+ * connection.
+ */
+ finish = true;
+ break;
+ } else if (atomic_load(&sock->readpaused)) {
+ /*
+ * Reading has been paused from withing
+ * the context of read callback - stop
+ * processing incoming data.
+ */
+ break;
+ }
+ }
+ }
+ }
+ errno = 0;
+ tls_status = SSL_get_error(sock->tlsstream.tls, rv);
+ saved_errno = errno;
+
+ /* See "BUGS" section at:
+ * https://www.openssl.org/docs/man1.1.1/man3/SSL_get_error.html
+ *
+ * It is mentioned there that when TLS status equals
+ * SSL_ERROR_SYSCALL AND errno == 0 it means that underlying
+ * transport layer returned EOF prematurely. However, we are
+ * managing the transport ourselves, so we should just resume
+ * reading from the TCP socket.
+ *
+ * It seems that this case has been handled properly on modern
+ * versions of OpenSSL. That being said, the situation goes in
+ * line with the manual: it is briefly mentioned there that
+ * SSL_ERROR_SYSCALL might be returned not only in a case of
+ * low-level errors (like system call failures).
+ */
+ if (tls_status == SSL_ERROR_SYSCALL && saved_errno == 0 &&
+ received_data == NULL && send_data == NULL && finish == false)
+ {
+ tls_status = SSL_ERROR_WANT_READ;
+ }
+
+ pending = tls_process_outgoing(sock, finish, send_data);
+ if (pending > 0 && tls_status != SSL_ERROR_SSL) {
+ /* We'll continue in tls_senddone */
+ return;
+ }
+
+ switch (tls_status) {
+ case SSL_ERROR_NONE:
+ case SSL_ERROR_ZERO_RETURN:
+ (void)tls_try_to_close_unused_socket(sock);
+ return;
+ case SSL_ERROR_WANT_WRITE:
+ if (sock->tlsstream.nsending == 0) {
+ /*
+ * Launch tls_do_bio asynchronously. If we're sending
+ * already the send callback will call it.
+ */
+ async_tls_do_bio(sock);
+ }
+ return;
+ case SSL_ERROR_WANT_READ:
+ if (tls_try_to_close_unused_socket(sock) ||
+ sock->outerhandle == NULL || atomic_load(&sock->readpaused))
+ {
+ return;
+ }
+
+ INSIST(VALID_NMHANDLE(sock->outerhandle));
+
+ if (sock->tlsstream.reading) {
+ isc_nm_resumeread(sock->outerhandle);
+ } else if (sock->tlsstream.state == TLS_HANDSHAKE) {
+ sock->tlsstream.reading = true;
+ isc_nm_read(sock->outerhandle, tls_readcb, sock);
+ }
+ return;
+ default:
+ result = tls_error_to_result(tls_status, sock->tlsstream.state,
+ sock->tlsstream.tls);
+ break;
+ }
+
+error:
+#if defined(NETMGR_TRACE) && defined(NETMGR_TRACE_VERBOSE)
+ isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL, ISC_LOGMODULE_NETMGR,
+ ISC_LOG_NOTICE,
+ "SSL error in BIO: %d %s (errno: %d). Arguments: "
+ "received_data: %p, "
+ "send_data: %p, finish: %s",
+ tls_status, isc_result_totext(result), saved_errno,
+ received_data, send_data, finish ? "true" : "false");
+#endif
+ tls_failed_read_cb(sock, result);
+}
+
+static void
+tls_readcb(isc_nmhandle_t *handle, isc_result_t result, isc_region_t *region,
+ void *cbarg) {
+ isc_nmsocket_t *tlssock = (isc_nmsocket_t *)cbarg;
+
+ REQUIRE(VALID_NMSOCK(tlssock));
+ REQUIRE(VALID_NMHANDLE(handle));
+ REQUIRE(tlssock->tid == isc_nm_tid());
+
+ if (result != ISC_R_SUCCESS) {
+ tls_failed_read_cb(tlssock, result);
+ return;
+ }
+
+ tls_do_bio(tlssock, region, NULL, false);
+}
+
+static isc_result_t
+initialize_tls(isc_nmsocket_t *sock, bool server) {
+ REQUIRE(sock->tid == isc_nm_tid());
+
+ sock->tlsstream.bio_in = BIO_new(BIO_s_mem());
+ if (sock->tlsstream.bio_in == NULL) {
+ isc_tls_free(&sock->tlsstream.tls);
+ return (ISC_R_TLSERROR);
+ }
+ sock->tlsstream.bio_out = BIO_new(BIO_s_mem());
+ if (sock->tlsstream.bio_out == NULL) {
+ BIO_free_all(sock->tlsstream.bio_in);
+ sock->tlsstream.bio_in = NULL;
+ isc_tls_free(&sock->tlsstream.tls);
+ return (ISC_R_TLSERROR);
+ }
+
+ if (BIO_set_mem_eof_return(sock->tlsstream.bio_in, EOF) != 1 ||
+ BIO_set_mem_eof_return(sock->tlsstream.bio_out, EOF) != 1)
+ {
+ goto error;
+ }
+
+ SSL_set_bio(sock->tlsstream.tls, sock->tlsstream.bio_in,
+ sock->tlsstream.bio_out);
+ sock->tlsstream.server = server;
+ sock->tlsstream.nsending = 0;
+ sock->tlsstream.state = TLS_INIT;
+ return (ISC_R_SUCCESS);
+error:
+ isc_tls_free(&sock->tlsstream.tls);
+ sock->tlsstream.bio_out = sock->tlsstream.bio_in = NULL;
+ return (ISC_R_TLSERROR);
+}
+
+static isc_result_t
+tlslisten_acceptcb(isc_nmhandle_t *handle, isc_result_t result, void *cbarg) {
+ isc_nmsocket_t *tlslistensock = (isc_nmsocket_t *)cbarg;
+ isc_nmsocket_t *tlssock = NULL;
+ isc_tlsctx_t *tlsctx = NULL;
+ int tid;
+
+ /* If accept() was unsuccessful we can't do anything */
+ if (result != ISC_R_SUCCESS) {
+ return (result);
+ }
+
+ REQUIRE(VALID_NMHANDLE(handle));
+ REQUIRE(VALID_NMSOCK(handle->sock));
+ REQUIRE(VALID_NMSOCK(tlslistensock));
+ REQUIRE(tlslistensock->type == isc_nm_tlslistener);
+
+ if (isc__nmsocket_closing(handle->sock) ||
+ isc__nmsocket_closing(tlslistensock) ||
+ !atomic_load(&tlslistensock->listening))
+ {
+ return (ISC_R_CANCELED);
+ }
+
+ /*
+ * We need to create a 'wrapper' tlssocket for this connection.
+ */
+ tlssock = isc_mem_get(handle->sock->mgr->mctx, sizeof(*tlssock));
+ isc__nmsocket_init(tlssock, handle->sock->mgr, isc_nm_tlssocket,
+ &handle->sock->iface);
+
+ tid = isc_nm_tid();
+ /* We need to initialize SSL now to reference SSL_CTX properly */
+ tlsctx = tls_get_listener_tlsctx(tlslistensock, tid);
+ RUNTIME_CHECK(tlsctx != NULL);
+ isc_tlsctx_attach(tlsctx, &tlssock->tlsstream.ctx);
+ tlssock->tlsstream.tls = isc_tls_create(tlssock->tlsstream.ctx);
+ if (tlssock->tlsstream.tls == NULL) {
+ atomic_store(&tlssock->closed, true);
+ isc_tlsctx_free(&tlssock->tlsstream.ctx);
+ isc__nmsocket_detach(&tlssock);
+ return (ISC_R_TLSERROR);
+ }
+
+ tlssock->extrahandlesize = tlslistensock->extrahandlesize;
+ isc__nmsocket_attach(tlslistensock, &tlssock->listener);
+ isc_nmhandle_attach(handle, &tlssock->outerhandle);
+ tlssock->peer = handle->sock->peer;
+ tlssock->read_timeout = atomic_load(&handle->sock->mgr->init);
+ tlssock->tid = tid;
+
+ /*
+ * Hold a reference to tlssock in the TCP socket: it will
+ * detached in isc__nm_tls_cleanup_data().
+ */
+ handle->sock->tlsstream.tlssocket = tlssock;
+
+ result = initialize_tls(tlssock, true);
+ RUNTIME_CHECK(result == ISC_R_SUCCESS);
+ /* TODO: catch failure code, detach tlssock, and log the error */
+
+ tls_do_bio(tlssock, NULL, NULL, false);
+ return (result);
+}
+
+isc_result_t
+isc_nm_listentls(isc_nm_t *mgr, isc_sockaddr_t *iface,
+ isc_nm_accept_cb_t accept_cb, void *accept_cbarg,
+ size_t extrahandlesize, int backlog, isc_quota_t *quota,
+ SSL_CTX *sslctx, isc_nmsocket_t **sockp) {
+ isc_result_t result;
+ isc_nmsocket_t *tlssock = NULL;
+ isc_nmsocket_t *tsock = NULL;
+
+ REQUIRE(VALID_NM(mgr));
+ if (atomic_load(&mgr->closing)) {
+ return (ISC_R_SHUTTINGDOWN);
+ }
+
+ tlssock = isc_mem_get(mgr->mctx, sizeof(*tlssock));
+
+ isc__nmsocket_init(tlssock, mgr, isc_nm_tlslistener, iface);
+ tlssock->result = ISC_R_UNSET;
+ tlssock->accept_cb = accept_cb;
+ tlssock->accept_cbarg = accept_cbarg;
+ tlssock->extrahandlesize = extrahandlesize;
+ tls_init_listener_tlsctx(tlssock, sslctx);
+ tlssock->tlsstream.tls = NULL;
+
+ /*
+ * tlssock will be a TLS 'wrapper' around an unencrypted stream.
+ * We set tlssock->outer to a socket listening for a TCP connection.
+ */
+ result = isc_nm_listentcp(mgr, iface, tlslisten_acceptcb, tlssock,
+ extrahandlesize, backlog, quota,
+ &tlssock->outer);
+ if (result != ISC_R_SUCCESS) {
+ atomic_store(&tlssock->closed, true);
+ isc__nmsocket_detach(&tlssock);
+ return (result);
+ }
+
+ /* wait for listen result */
+ isc__nmsocket_attach(tlssock->outer, &tsock);
+ tlssock->result = result;
+ atomic_store(&tlssock->active, true);
+ INSIST(tlssock->outer->tlsstream.tlslistener == NULL);
+ isc__nmsocket_attach(tlssock, &tlssock->outer->tlsstream.tlslistener);
+ isc__nmsocket_detach(&tsock);
+ INSIST(result != ISC_R_UNSET);
+ tlssock->nchildren = tlssock->outer->nchildren;
+
+ isc__nmsocket_barrier_init(tlssock);
+ atomic_init(&tlssock->rchildren, tlssock->nchildren);
+
+ if (result == ISC_R_SUCCESS) {
+ atomic_store(&tlssock->listening, true);
+ *sockp = tlssock;
+ }
+
+ return (result);
+}
+
+void
+isc__nm_async_tlssend(isc__networker_t *worker, isc__netievent_t *ev0) {
+ isc__netievent_tlssend_t *ievent = (isc__netievent_tlssend_t *)ev0;
+ isc_nmsocket_t *sock = ievent->sock;
+ isc__nm_uvreq_t *req = ievent->req;
+
+ REQUIRE(VALID_UVREQ(req));
+ REQUIRE(sock->tid == isc_nm_tid());
+
+ UNUSED(worker);
+
+ ievent->req = NULL;
+
+ if (inactive(sock)) {
+ req->cb.send(req->handle, ISC_R_CANCELED, req->cbarg);
+ goto done;
+ }
+
+ tls_do_bio(sock, NULL, req, false);
+done:
+ isc__nm_uvreq_put(&req, sock);
+ return;
+}
+
+void
+isc__nm_tls_send(isc_nmhandle_t *handle, const isc_region_t *region,
+ isc_nm_cb_t cb, void *cbarg) {
+ isc__netievent_tlssend_t *ievent = NULL;
+ isc__nm_uvreq_t *uvreq = NULL;
+ isc_nmsocket_t *sock = NULL;
+
+ REQUIRE(VALID_NMHANDLE(handle));
+ REQUIRE(VALID_NMSOCK(handle->sock));
+
+ sock = handle->sock;
+
+ REQUIRE(sock->type == isc_nm_tlssocket);
+
+ uvreq = isc__nm_uvreq_get(sock->mgr, sock);
+ isc_nmhandle_attach(handle, &uvreq->handle);
+ uvreq->cb.send = cb;
+ uvreq->cbarg = cbarg;
+ uvreq->uvbuf.base = (char *)region->base;
+ uvreq->uvbuf.len = region->length;
+
+ /*
+ * We need to create an event and pass it using async channel
+ */
+ ievent = isc__nm_get_netievent_tlssend(sock->mgr, sock, uvreq);
+ isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
+ (isc__netievent_t *)ievent);
+}
+
+void
+isc__nm_async_tlsstartread(isc__networker_t *worker, isc__netievent_t *ev0) {
+ isc__netievent_tlsstartread_t *ievent =
+ (isc__netievent_tlsstartread_t *)ev0;
+ isc_nmsocket_t *sock = ievent->sock;
+
+ REQUIRE(sock->tid == isc_nm_tid());
+
+ UNUSED(worker);
+
+ tls_do_bio(sock, NULL, NULL, false);
+}
+
+void
+isc__nm_tls_read(isc_nmhandle_t *handle, isc_nm_recv_cb_t cb, void *cbarg) {
+ isc__netievent_tlsstartread_t *ievent = NULL;
+ isc_nmsocket_t *sock = NULL;
+
+ REQUIRE(VALID_NMHANDLE(handle));
+
+ sock = handle->sock;
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->statichandle == handle);
+ REQUIRE(sock->tid == isc_nm_tid());
+ REQUIRE(sock->recv_cb == NULL);
+
+ if (inactive(sock)) {
+ cb(handle, ISC_R_CANCELED, NULL, cbarg);
+ return;
+ }
+
+ sock->recv_cb = cb;
+ sock->recv_cbarg = cbarg;
+ sock->recv_read = true;
+
+ ievent = isc__nm_get_netievent_tlsstartread(sock->mgr, sock);
+ isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
+ (isc__netievent_t *)ievent);
+}
+
+void
+isc__nm_tls_pauseread(isc_nmhandle_t *handle) {
+ REQUIRE(VALID_NMHANDLE(handle));
+ REQUIRE(VALID_NMSOCK(handle->sock));
+
+ if (atomic_compare_exchange_strong(&handle->sock->readpaused,
+ &(bool){ false }, true))
+ {
+ if (handle->sock->outerhandle != NULL) {
+ isc_nm_pauseread(handle->sock->outerhandle);
+ }
+ }
+}
+
+void
+isc__nm_tls_resumeread(isc_nmhandle_t *handle) {
+ REQUIRE(VALID_NMHANDLE(handle));
+ REQUIRE(VALID_NMSOCK(handle->sock));
+
+ if (!atomic_compare_exchange_strong(&handle->sock->readpaused,
+ &(bool){ true }, false))
+ {
+ if (inactive(handle->sock)) {
+ return;
+ }
+
+ async_tls_do_bio(handle->sock);
+ }
+}
+
+static void
+tls_close_direct(isc_nmsocket_t *sock) {
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->tid == isc_nm_tid());
+ /*
+ * At this point we're certain that there are no
+ * external references, we can close everything.
+ */
+ if (sock->outerhandle != NULL) {
+ isc_nm_pauseread(sock->outerhandle);
+ isc__nmsocket_clearcb(sock->outerhandle->sock);
+ isc_nmhandle_detach(&sock->outerhandle);
+ }
+
+ if (sock->listener != NULL) {
+ isc__nmsocket_detach(&sock->listener);
+ }
+
+ /* Further cleanup performed in isc__nm_tls_cleanup_data() */
+ atomic_store(&sock->closed, true);
+ atomic_store(&sock->active, false);
+ sock->tlsstream.state = TLS_CLOSED;
+}
+
+void
+isc__nm_tls_close(isc_nmsocket_t *sock) {
+ isc__netievent_tlsclose_t *ievent = NULL;
+
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->type == isc_nm_tlssocket);
+
+ if (!atomic_compare_exchange_strong(&sock->closing, &(bool){ false },
+ true))
+ {
+ return;
+ }
+
+ ievent = isc__nm_get_netievent_tlsclose(sock->mgr, sock);
+ isc__nm_maybe_enqueue_ievent(&sock->mgr->workers[sock->tid],
+ (isc__netievent_t *)ievent);
+}
+
+void
+isc__nm_async_tlsclose(isc__networker_t *worker, isc__netievent_t *ev0) {
+ isc__netievent_tlsclose_t *ievent = (isc__netievent_tlsclose_t *)ev0;
+ isc_nmsocket_t *sock = ievent->sock;
+
+ REQUIRE(ievent->sock->tid == isc_nm_tid());
+
+ UNUSED(worker);
+
+ tls_close_direct(sock);
+}
+
+void
+isc__nm_tls_stoplistening(isc_nmsocket_t *sock) {
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->type == isc_nm_tlslistener);
+
+ isc__nmsocket_stop(sock);
+}
+
+static void
+tcp_connected(isc_nmhandle_t *handle, isc_result_t result, void *cbarg);
+
+void
+isc_nm_tlsconnect(isc_nm_t *mgr, isc_sockaddr_t *local, isc_sockaddr_t *peer,
+ isc_nm_cb_t cb, void *cbarg, isc_tlsctx_t *ctx,
+ isc_tlsctx_client_session_cache_t *client_sess_cache,
+ unsigned int timeout, size_t extrahandlesize) {
+ isc_nmsocket_t *nsock = NULL;
+#if defined(NETMGR_TRACE) && defined(NETMGR_TRACE_VERBOSE)
+ fprintf(stderr, "TLS: isc_nm_tlsconnect(): in net thread: %s\n",
+ isc__nm_in_netthread() ? "yes" : "no");
+#endif /* NETMGR_TRACE */
+
+ REQUIRE(VALID_NM(mgr));
+
+ if (atomic_load(&mgr->closing)) {
+ cb(NULL, ISC_R_SHUTTINGDOWN, cbarg);
+ return;
+ }
+
+ nsock = isc_mem_get(mgr->mctx, sizeof(*nsock));
+ isc__nmsocket_init(nsock, mgr, isc_nm_tlssocket, local);
+ nsock->extrahandlesize = extrahandlesize;
+ nsock->result = ISC_R_UNSET;
+ nsock->connect_cb = cb;
+ nsock->connect_cbarg = cbarg;
+ nsock->connect_timeout = timeout;
+ isc_tlsctx_attach(ctx, &nsock->tlsstream.ctx);
+ atomic_init(&nsock->client, true);
+ if (client_sess_cache != NULL) {
+ INSIST(isc_tlsctx_client_session_cache_getctx(
+ client_sess_cache) == ctx);
+ isc_tlsctx_client_session_cache_attach(
+ client_sess_cache, &nsock->tlsstream.client_sess_cache);
+ }
+
+ isc_nm_tcpconnect(mgr, local, peer, tcp_connected, nsock,
+ nsock->connect_timeout, 0);
+}
+
+static void
+tcp_connected(isc_nmhandle_t *handle, isc_result_t result, void *cbarg) {
+ isc_nmsocket_t *tlssock = (isc_nmsocket_t *)cbarg;
+ isc_nmhandle_t *tlshandle = NULL;
+
+ REQUIRE(VALID_NMSOCK(tlssock));
+
+ tlssock->tid = isc_nm_tid();
+ if (result != ISC_R_SUCCESS) {
+ goto error;
+ }
+
+ INSIST(VALID_NMHANDLE(handle));
+
+ tlssock->iface = handle->sock->iface;
+ tlssock->peer = handle->sock->peer;
+ if (isc__nm_closing(tlssock)) {
+ result = ISC_R_SHUTTINGDOWN;
+ goto error;
+ }
+
+ /*
+ * We need to initialize SSL now to reference SSL_CTX properly.
+ */
+ tlssock->tlsstream.tls = isc_tls_create(tlssock->tlsstream.ctx);
+ if (tlssock->tlsstream.tls == NULL) {
+ result = ISC_R_TLSERROR;
+ goto error;
+ }
+
+ result = initialize_tls(tlssock, false);
+ if (result != ISC_R_SUCCESS) {
+ goto error;
+ }
+ tlssock->peer = isc_nmhandle_peeraddr(handle);
+ isc_nmhandle_attach(handle, &tlssock->outerhandle);
+ atomic_store(&tlssock->active, true);
+
+ if (tlssock->tlsstream.client_sess_cache != NULL) {
+ isc_tlsctx_client_session_cache_reuse_sockaddr(
+ tlssock->tlsstream.client_sess_cache, &tlssock->peer,
+ tlssock->tlsstream.tls);
+ }
+
+ /*
+ * Hold a reference to tlssock in the TCP socket: it will
+ * detached in isc__nm_tls_cleanup_data().
+ */
+ handle->sock->tlsstream.tlssocket = tlssock;
+
+ tls_do_bio(tlssock, NULL, NULL, false);
+ return;
+error:
+ tlshandle = isc__nmhandle_get(tlssock, NULL, NULL);
+ atomic_store(&tlssock->closed, true);
+ tls_call_connect_cb(tlssock, tlshandle, result);
+ isc_nmhandle_detach(&tlshandle);
+ isc__nmsocket_detach(&tlssock);
+}
+
+static void
+tls_cancelread(isc_nmsocket_t *sock) {
+ if (!inactive(sock) && sock->tlsstream.state == TLS_IO) {
+ tls_do_bio(sock, NULL, NULL, true);
+ } else if (sock->outerhandle != NULL) {
+ sock->tlsstream.reading = false;
+ isc_nm_cancelread(sock->outerhandle);
+ }
+}
+
+void
+isc__nm_tls_cancelread(isc_nmhandle_t *handle) {
+ isc_nmsocket_t *sock = NULL;
+ isc__netievent_tlscancel_t *ievent = NULL;
+
+ REQUIRE(VALID_NMHANDLE(handle));
+
+ sock = handle->sock;
+
+ REQUIRE(sock->type == isc_nm_tlssocket);
+
+ if (sock->tid == isc_nm_tid()) {
+ tls_cancelread(sock);
+ } else {
+ ievent = isc__nm_get_netievent_tlscancel(sock->mgr, sock,
+ handle);
+ isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
+ (isc__netievent_t *)ievent);
+ }
+}
+
+void
+isc__nm_async_tlscancel(isc__networker_t *worker, isc__netievent_t *ev0) {
+ isc__netievent_tlscancel_t *ievent = (isc__netievent_tlscancel_t *)ev0;
+ isc_nmsocket_t *sock = ievent->sock;
+
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(worker->id == sock->tid);
+ REQUIRE(sock->tid == isc_nm_tid());
+
+ UNUSED(worker);
+ tls_cancelread(sock);
+}
+
+void
+isc__nm_async_tlsdobio(isc__networker_t *worker, isc__netievent_t *ev0) {
+ isc__netievent_tlsdobio_t *ievent = (isc__netievent_tlsdobio_t *)ev0;
+
+ UNUSED(worker);
+
+ tls_do_bio(ievent->sock, NULL, NULL, false);
+}
+
+void
+isc__nm_tls_cleanup_data(isc_nmsocket_t *sock) {
+ if (sock->type == isc_nm_tcplistener &&
+ sock->tlsstream.tlslistener != NULL)
+ {
+ isc__nmsocket_detach(&sock->tlsstream.tlslistener);
+ } else if (sock->type == isc_nm_tlslistener) {
+ tls_cleanup_listener_tlsctx(sock);
+ } else if (sock->type == isc_nm_tlssocket) {
+ if (sock->tlsstream.tls != NULL) {
+ /*
+ * Let's shut down the TLS session properly so that
+ * the session will remain resumable, if required.
+ */
+ tls_try_shutdown(sock->tlsstream.tls, true);
+ tls_keep_client_tls_session(sock);
+ isc_tls_free(&sock->tlsstream.tls);
+ /* These are destroyed when we free SSL */
+ sock->tlsstream.bio_out = NULL;
+ sock->tlsstream.bio_in = NULL;
+ }
+ if (sock->tlsstream.ctx != NULL) {
+ isc_tlsctx_free(&sock->tlsstream.ctx);
+ }
+ if (sock->tlsstream.client_sess_cache != NULL) {
+ INSIST(atomic_load(&sock->client));
+ isc_tlsctx_client_session_cache_detach(
+ &sock->tlsstream.client_sess_cache);
+ }
+ } else if (sock->type == isc_nm_tcpsocket &&
+ sock->tlsstream.tlssocket != NULL)
+ {
+ /*
+ * The TLS socket can't be destroyed until its underlying TCP
+ * socket is, to avoid possible use-after-free errors.
+ */
+ isc__nmsocket_detach(&sock->tlsstream.tlssocket);
+ }
+}
+
+void
+isc__nm_tls_cleartimeout(isc_nmhandle_t *handle) {
+ isc_nmsocket_t *sock = NULL;
+
+ REQUIRE(VALID_NMHANDLE(handle));
+ REQUIRE(VALID_NMSOCK(handle->sock));
+ REQUIRE(handle->sock->type == isc_nm_tlssocket);
+
+ sock = handle->sock;
+ if (sock->outerhandle != NULL) {
+ INSIST(VALID_NMHANDLE(sock->outerhandle));
+ isc_nmhandle_cleartimeout(sock->outerhandle);
+ }
+}
+
+void
+isc__nm_tls_settimeout(isc_nmhandle_t *handle, uint32_t timeout) {
+ isc_nmsocket_t *sock = NULL;
+
+ REQUIRE(VALID_NMHANDLE(handle));
+ REQUIRE(VALID_NMSOCK(handle->sock));
+ REQUIRE(handle->sock->type == isc_nm_tlssocket);
+
+ sock = handle->sock;
+ if (sock->outerhandle != NULL) {
+ INSIST(VALID_NMHANDLE(sock->outerhandle));
+ isc_nmhandle_settimeout(sock->outerhandle, timeout);
+ }
+}
+
+void
+isc__nmhandle_tls_keepalive(isc_nmhandle_t *handle, bool value) {
+ isc_nmsocket_t *sock = NULL;
+
+ REQUIRE(VALID_NMHANDLE(handle));
+ REQUIRE(VALID_NMSOCK(handle->sock));
+ REQUIRE(handle->sock->type == isc_nm_tlssocket);
+
+ sock = handle->sock;
+ if (sock->outerhandle != NULL) {
+ INSIST(VALID_NMHANDLE(sock->outerhandle));
+
+ isc_nmhandle_keepalive(sock->outerhandle, value);
+ }
+}
+
+void
+isc__nmhandle_tls_setwritetimeout(isc_nmhandle_t *handle,
+ uint64_t write_timeout) {
+ isc_nmsocket_t *sock = NULL;
+
+ REQUIRE(VALID_NMHANDLE(handle));
+ REQUIRE(VALID_NMSOCK(handle->sock));
+ REQUIRE(handle->sock->type == isc_nm_tlssocket);
+
+ sock = handle->sock;
+ if (sock->outerhandle != NULL) {
+ INSIST(VALID_NMHANDLE(sock->outerhandle));
+
+ isc_nmhandle_setwritetimeout(sock->outerhandle, write_timeout);
+ }
+}
+
+const char *
+isc__nm_tls_verify_tls_peer_result_string(const isc_nmhandle_t *handle) {
+ isc_nmsocket_t *sock = NULL;
+
+ REQUIRE(VALID_NMHANDLE(handle));
+ REQUIRE(VALID_NMSOCK(handle->sock));
+ REQUIRE(handle->sock->type == isc_nm_tlssocket);
+
+ sock = handle->sock;
+ if (sock->tlsstream.tls == NULL) {
+ return (NULL);
+ }
+
+ return (isc_tls_verify_peer_result_string(sock->tlsstream.tls));
+}
+
+static void
+tls_init_listener_tlsctx(isc_nmsocket_t *listener, isc_tlsctx_t *ctx) {
+ size_t nworkers;
+
+ REQUIRE(VALID_NM(listener->mgr));
+ REQUIRE(ctx != NULL);
+
+ nworkers = (size_t)listener->mgr->nworkers;
+ INSIST(nworkers > 0);
+
+ listener->tlsstream.listener_tls_ctx = isc_mem_get(
+ listener->mgr->mctx, sizeof(isc_tlsctx_t *) * nworkers);
+ listener->tlsstream.n_listener_tls_ctx = nworkers;
+ for (size_t i = 0; i < nworkers; i++) {
+ listener->tlsstream.listener_tls_ctx[i] = NULL;
+ isc_tlsctx_attach(ctx,
+ &listener->tlsstream.listener_tls_ctx[i]);
+ }
+}
+
+static void
+tls_cleanup_listener_tlsctx(isc_nmsocket_t *listener) {
+ REQUIRE(VALID_NM(listener->mgr));
+
+ if (listener->tlsstream.listener_tls_ctx == NULL) {
+ return;
+ }
+
+ for (size_t i = 0; i < listener->tlsstream.n_listener_tls_ctx; i++) {
+ isc_tlsctx_free(&listener->tlsstream.listener_tls_ctx[i]);
+ }
+ isc_mem_put(listener->mgr->mctx, listener->tlsstream.listener_tls_ctx,
+ sizeof(isc_tlsctx_t *) *
+ listener->tlsstream.n_listener_tls_ctx);
+ listener->tlsstream.n_listener_tls_ctx = 0;
+}
+
+static isc_tlsctx_t *
+tls_get_listener_tlsctx(isc_nmsocket_t *listener, const int tid) {
+ REQUIRE(VALID_NM(listener->mgr));
+ REQUIRE(tid >= 0);
+
+ if (listener->tlsstream.listener_tls_ctx == NULL) {
+ return (NULL);
+ }
+
+ return (listener->tlsstream.listener_tls_ctx[tid]);
+}
+
+void
+isc__nm_async_tls_set_tlsctx(isc_nmsocket_t *listener, isc_tlsctx_t *tlsctx,
+ const int tid) {
+ REQUIRE(tid >= 0);
+
+ isc_tlsctx_free(&listener->tlsstream.listener_tls_ctx[tid]);
+ isc_tlsctx_attach(tlsctx, &listener->tlsstream.listener_tls_ctx[tid]);
+}
+
+static void
+tls_keep_client_tls_session(isc_nmsocket_t *sock) {
+ /*
+ * Ensure that the isc_tls_t is being accessed from
+ * within the worker thread the socket is bound to.
+ */
+ REQUIRE(sock->tid == isc_nm_tid());
+ if (sock->tlsstream.client_sess_cache != NULL &&
+ sock->tlsstream.client_session_saved == false)
+ {
+ INSIST(atomic_load(&sock->client));
+ isc_tlsctx_client_session_cache_keep_sockaddr(
+ sock->tlsstream.client_sess_cache, &sock->peer,
+ sock->tlsstream.tls);
+ sock->tlsstream.client_session_saved = true;
+ }
+}
+
+static void
+tls_try_shutdown(isc_tls_t *tls, const bool force) {
+ if (force) {
+ (void)SSL_set_shutdown(tls, SSL_SENT_SHUTDOWN);
+ } else if ((SSL_get_shutdown(tls) & SSL_SENT_SHUTDOWN) == 0) {
+ (void)SSL_shutdown(tls);
+ }
+}
diff --git a/lib/isc/netmgr/udp.c b/lib/isc/netmgr/udp.c
new file mode 100644
index 0000000..1a0ee16
--- /dev/null
+++ b/lib/isc/netmgr/udp.c
@@ -0,0 +1,1405 @@
+/*
+ * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
+ *
+ * SPDX-License-Identifier: MPL-2.0
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, you can obtain one at https://mozilla.org/MPL/2.0/.
+ *
+ * See the COPYRIGHT file distributed with this work for additional
+ * information regarding copyright ownership.
+ */
+
+#include <unistd.h>
+#include <uv.h>
+
+#include <isc/atomic.h>
+#include <isc/barrier.h>
+#include <isc/buffer.h>
+#include <isc/condition.h>
+#include <isc/errno.h>
+#include <isc/magic.h>
+#include <isc/mem.h>
+#include <isc/netmgr.h>
+#include <isc/random.h>
+#include <isc/refcount.h>
+#include <isc/region.h>
+#include <isc/result.h>
+#include <isc/sockaddr.h>
+#include <isc/thread.h>
+#include <isc/util.h>
+
+#include "netmgr-int.h"
+#include "uv-compat.h"
+
+#ifdef HAVE_NET_ROUTE_H
+#include <net/route.h>
+#if defined(RTM_VERSION) && defined(RTM_NEWADDR) && defined(RTM_DELADDR)
+#define USE_ROUTE_SOCKET 1
+#define ROUTE_SOCKET_PF PF_ROUTE
+#define ROUTE_SOCKET_PROTOCOL 0
+#define MSGHDR rt_msghdr
+#define MSGTYPE rtm_type
+#endif /* if defined(RTM_VERSION) && defined(RTM_NEWADDR) && \
+ * defined(RTM_DELADDR) */
+#endif /* ifdef HAVE_NET_ROUTE_H */
+
+#if defined(HAVE_LINUX_NETLINK_H) && defined(HAVE_LINUX_RTNETLINK_H)
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#if defined(RTM_NEWADDR) && defined(RTM_DELADDR)
+#define USE_ROUTE_SOCKET 1
+#define USE_NETLINK 1
+#define ROUTE_SOCKET_PF PF_NETLINK
+#define ROUTE_SOCKET_PROTOCOL NETLINK_ROUTE
+#define MSGHDR nlmsghdr
+#define MSGTYPE nlmsg_type
+#endif /* if defined(RTM_NEWADDR) && defined(RTM_DELADDR) */
+#endif /* if defined(HAVE_LINUX_NETLINK_H) && defined(HAVE_LINUX_RTNETLINK_H) \
+ */
+
+static isc_result_t
+udp_send_direct(isc_nmsocket_t *sock, isc__nm_uvreq_t *req,
+ isc_sockaddr_t *peer);
+
+static void
+udp_recv_cb(uv_udp_t *handle, ssize_t nrecv, const uv_buf_t *buf,
+ const struct sockaddr *addr, unsigned flags);
+
+static void
+udp_send_cb(uv_udp_send_t *req, int status);
+
+static void
+udp_close_cb(uv_handle_t *handle);
+
+static void
+read_timer_close_cb(uv_handle_t *handle);
+
+static void
+udp_close_direct(isc_nmsocket_t *sock);
+
+static void
+stop_udp_parent(isc_nmsocket_t *sock);
+static void
+stop_udp_child(isc_nmsocket_t *sock);
+
+static uv_os_sock_t
+isc__nm_udp_lb_socket(isc_nm_t *mgr, sa_family_t sa_family) {
+ isc_result_t result;
+ uv_os_sock_t sock;
+
+ result = isc__nm_socket(sa_family, SOCK_DGRAM, 0, &sock);
+ RUNTIME_CHECK(result == ISC_R_SUCCESS);
+
+ (void)isc__nm_socket_incoming_cpu(sock);
+ (void)isc__nm_socket_disable_pmtud(sock, sa_family);
+ (void)isc__nm_socket_v6only(sock, sa_family);
+
+ result = isc__nm_socket_reuse(sock);
+ RUNTIME_CHECK(result == ISC_R_SUCCESS);
+
+ if (mgr->load_balance_sockets) {
+ result = isc__nm_socket_reuse_lb(sock);
+ RUNTIME_CHECK(result == ISC_R_SUCCESS);
+ }
+
+ return (sock);
+}
+
+static void
+start_udp_child(isc_nm_t *mgr, isc_sockaddr_t *iface, isc_nmsocket_t *sock,
+ uv_os_sock_t fd, int tid) {
+ isc_nmsocket_t *csock;
+ isc__netievent_udplisten_t *ievent = NULL;
+
+ csock = &sock->children[tid];
+
+ isc__nmsocket_init(csock, mgr, isc_nm_udpsocket, iface);
+ csock->parent = sock;
+ csock->iface = sock->iface;
+ atomic_init(&csock->reading, true);
+ csock->recv_cb = sock->recv_cb;
+ csock->recv_cbarg = sock->recv_cbarg;
+ csock->extrahandlesize = sock->extrahandlesize;
+ csock->tid = tid;
+
+ if (mgr->load_balance_sockets) {
+ UNUSED(fd);
+ csock->fd = isc__nm_udp_lb_socket(mgr,
+ iface->type.sa.sa_family);
+ } else {
+ csock->fd = dup(fd);
+ }
+ REQUIRE(csock->fd >= 0);
+
+ ievent = isc__nm_get_netievent_udplisten(mgr, csock);
+ isc__nm_maybe_enqueue_ievent(&mgr->workers[tid],
+ (isc__netievent_t *)ievent);
+}
+
+static void
+enqueue_stoplistening(isc_nmsocket_t *sock) {
+ isc__netievent_udpstop_t *ievent =
+ isc__nm_get_netievent_udpstop(sock->mgr, sock);
+ isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
+ (isc__netievent_t *)ievent);
+}
+
+isc_result_t
+isc_nm_listenudp(isc_nm_t *mgr, isc_sockaddr_t *iface, isc_nm_recv_cb_t cb,
+ void *cbarg, size_t extrahandlesize, isc_nmsocket_t **sockp) {
+ isc_result_t result = ISC_R_SUCCESS;
+ isc_nmsocket_t *sock = NULL;
+ size_t children_size = 0;
+ REQUIRE(VALID_NM(mgr));
+ uv_os_sock_t fd = -1;
+
+ /*
+ * We are creating mgr->nworkers duplicated sockets, one
+ * socket for each worker thread.
+ */
+ sock = isc_mem_get(mgr->mctx, sizeof(isc_nmsocket_t));
+ isc__nmsocket_init(sock, mgr, isc_nm_udplistener, iface);
+
+ atomic_init(&sock->rchildren, 0);
+ sock->nchildren = mgr->nworkers;
+ children_size = sock->nchildren * sizeof(sock->children[0]);
+ sock->children = isc_mem_get(mgr->mctx, children_size);
+ memset(sock->children, 0, children_size);
+
+ sock->recv_cb = cb;
+ sock->recv_cbarg = cbarg;
+ sock->extrahandlesize = extrahandlesize;
+ sock->result = ISC_R_UNSET;
+
+ sock->tid = 0;
+ sock->fd = -1;
+
+ if (!mgr->load_balance_sockets) {
+ fd = isc__nm_udp_lb_socket(mgr, iface->type.sa.sa_family);
+ }
+
+ isc_barrier_init(&sock->startlistening, sock->nchildren);
+
+ for (size_t i = 0; i < sock->nchildren; i++) {
+ if ((int)i == isc_nm_tid()) {
+ continue;
+ }
+ start_udp_child(mgr, iface, sock, fd, i);
+ }
+
+ if (isc__nm_in_netthread()) {
+ start_udp_child(mgr, iface, sock, fd, isc_nm_tid());
+ }
+
+ if (!mgr->load_balance_sockets) {
+ isc__nm_closesocket(fd);
+ }
+
+ LOCK(&sock->lock);
+ while (atomic_load(&sock->rchildren) != sock->nchildren) {
+ WAIT(&sock->cond, &sock->lock);
+ }
+ result = sock->result;
+ atomic_store(&sock->active, true);
+ UNLOCK(&sock->lock);
+
+ INSIST(result != ISC_R_UNSET);
+
+ if (result == ISC_R_SUCCESS) {
+ REQUIRE(atomic_load(&sock->rchildren) == sock->nchildren);
+ *sockp = sock;
+ } else {
+ atomic_store(&sock->active, false);
+ enqueue_stoplistening(sock);
+ isc_nmsocket_close(&sock);
+ }
+
+ return (result);
+}
+
+#ifdef USE_ROUTE_SOCKET
+static isc_result_t
+route_socket(uv_os_sock_t *fdp) {
+ isc_result_t result;
+ uv_os_sock_t fd;
+#ifdef USE_NETLINK
+ struct sockaddr_nl sa;
+ int r;
+#endif
+
+ result = isc__nm_socket(ROUTE_SOCKET_PF, SOCK_RAW,
+ ROUTE_SOCKET_PROTOCOL, &fd);
+ if (result != ISC_R_SUCCESS) {
+ return (result);
+ }
+
+#ifdef USE_NETLINK
+ sa.nl_family = PF_NETLINK;
+ sa.nl_groups = RTMGRP_LINK | RTMGRP_IPV4_IFADDR | RTMGRP_IPV6_IFADDR;
+ r = bind(fd, (struct sockaddr *)&sa, sizeof(sa));
+ if (r < 0) {
+ isc__nm_closesocket(fd);
+ return (isc_errno_toresult(r));
+ }
+#endif
+
+ *fdp = fd;
+ return (ISC_R_SUCCESS);
+}
+
+static isc_result_t
+route_connect_direct(isc_nmsocket_t *sock) {
+ isc__networker_t *worker = NULL;
+ isc_result_t result = ISC_R_UNSET;
+ int r;
+
+ REQUIRE(isc__nm_in_netthread());
+ REQUIRE(sock->tid == isc_nm_tid());
+
+ worker = &sock->mgr->workers[isc_nm_tid()];
+
+ atomic_store(&sock->connecting, true);
+
+ r = uv_udp_init(&worker->loop, &sock->uv_handle.udp);
+ UV_RUNTIME_CHECK(uv_udp_init, r);
+ uv_handle_set_data(&sock->uv_handle.handle, sock);
+
+ r = uv_timer_init(&worker->loop, &sock->read_timer);
+ UV_RUNTIME_CHECK(uv_timer_init, r);
+ uv_handle_set_data((uv_handle_t *)&sock->read_timer, sock);
+
+ if (isc__nm_closing(sock)) {
+ result = ISC_R_SHUTTINGDOWN;
+ goto error;
+ }
+
+ r = uv_udp_open(&sock->uv_handle.udp, sock->fd);
+ if (r != 0) {
+ goto done;
+ }
+
+ isc__nm_set_network_buffers(sock->mgr, &sock->uv_handle.handle);
+
+ atomic_store(&sock->connecting, false);
+ atomic_store(&sock->connected, true);
+
+done:
+ result = isc__nm_uverr2result(r);
+error:
+
+ LOCK(&sock->lock);
+ sock->result = result;
+ SIGNAL(&sock->cond);
+ if (!atomic_load(&sock->active)) {
+ WAIT(&sock->scond, &sock->lock);
+ }
+ INSIST(atomic_load(&sock->active));
+ UNLOCK(&sock->lock);
+
+ return (result);
+}
+
+/*
+ * Asynchronous 'udpconnect' call handler: open a new UDP socket and
+ * call the 'open' callback with a handle.
+ */
+void
+isc__nm_async_routeconnect(isc__networker_t *worker, isc__netievent_t *ev0) {
+ isc__netievent_routeconnect_t *ievent =
+ (isc__netievent_routeconnect_t *)ev0;
+ isc_nmsocket_t *sock = ievent->sock;
+ isc__nm_uvreq_t *req = ievent->req;
+ isc_result_t result;
+
+ UNUSED(worker);
+
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->type == isc_nm_udpsocket);
+ REQUIRE(sock->parent == NULL);
+ REQUIRE(sock->tid == isc_nm_tid());
+
+ result = route_connect_direct(sock);
+ if (result != ISC_R_SUCCESS) {
+ atomic_store(&sock->active, false);
+ isc__nm_udp_close(sock);
+ isc__nm_connectcb(sock, req, result, true);
+ } else {
+ /*
+ * The callback has to be called after the socket has been
+ * initialized
+ */
+ isc__nm_connectcb(sock, req, ISC_R_SUCCESS, true);
+ }
+
+ /*
+ * The sock is now attached to the handle.
+ */
+ isc__nmsocket_detach(&sock);
+}
+#endif /* USE_ROUTE_SOCKET */
+
+isc_result_t
+isc_nm_routeconnect(isc_nm_t *mgr, isc_nm_cb_t cb, void *cbarg,
+ size_t extrahandlesize) {
+#ifdef USE_ROUTE_SOCKET
+ isc_result_t result = ISC_R_SUCCESS;
+ isc_nmsocket_t *sock = NULL;
+ isc__netievent_udpconnect_t *event = NULL;
+ isc__nm_uvreq_t *req = NULL;
+
+ REQUIRE(VALID_NM(mgr));
+
+ sock = isc_mem_get(mgr->mctx, sizeof(*sock));
+ isc__nmsocket_init(sock, mgr, isc_nm_udpsocket, NULL);
+
+ sock->connect_cb = cb;
+ sock->connect_cbarg = cbarg;
+ sock->extrahandlesize = extrahandlesize;
+ sock->result = ISC_R_UNSET;
+ atomic_init(&sock->client, true);
+ sock->route_sock = true;
+
+ req = isc__nm_uvreq_get(mgr, sock);
+ req->cb.connect = cb;
+ req->cbarg = cbarg;
+ req->handle = isc__nmhandle_get(sock, NULL, NULL);
+
+ result = route_socket(&sock->fd);
+ if (result != ISC_R_SUCCESS) {
+ if (isc__nm_in_netthread()) {
+ sock->tid = isc_nm_tid();
+ }
+ isc__nmsocket_clearcb(sock);
+ isc__nm_connectcb(sock, req, result, true);
+ atomic_store(&sock->closed, true);
+ isc__nmsocket_detach(&sock);
+ return (result);
+ }
+
+ event = isc__nm_get_netievent_routeconnect(mgr, sock, req);
+
+ if (isc__nm_in_netthread()) {
+ atomic_store(&sock->active, true);
+ sock->tid = isc_nm_tid();
+ isc__nm_async_routeconnect(&mgr->workers[sock->tid],
+ (isc__netievent_t *)event);
+ isc__nm_put_netievent_routeconnect(mgr, event);
+ } else {
+ atomic_init(&sock->active, false);
+ sock->tid = 0;
+ isc__nm_enqueue_ievent(&mgr->workers[sock->tid],
+ (isc__netievent_t *)event);
+ }
+ LOCK(&sock->lock);
+ while (sock->result == ISC_R_UNSET) {
+ WAIT(&sock->cond, &sock->lock);
+ }
+ atomic_store(&sock->active, true);
+ BROADCAST(&sock->scond);
+ UNLOCK(&sock->lock);
+
+ return (sock->result);
+#else /* USE_ROUTE_SOCKET */
+ UNUSED(mgr);
+ UNUSED(cb);
+ UNUSED(cbarg);
+ UNUSED(extrahandlesize);
+ return (ISC_R_NOTIMPLEMENTED);
+#endif /* USE_ROUTE_SOCKET */
+}
+
+/*
+ * Asynchronous 'udplisten' call handler: start listening on a UDP socket.
+ */
+void
+isc__nm_async_udplisten(isc__networker_t *worker, isc__netievent_t *ev0) {
+ isc__netievent_udplisten_t *ievent = (isc__netievent_udplisten_t *)ev0;
+ isc_nmsocket_t *sock = NULL;
+ int r, uv_bind_flags = 0;
+ int uv_init_flags = 0;
+ sa_family_t sa_family;
+ isc_result_t result = ISC_R_UNSET;
+ isc_nm_t *mgr = NULL;
+
+ REQUIRE(VALID_NMSOCK(ievent->sock));
+ REQUIRE(ievent->sock->tid == isc_nm_tid());
+ REQUIRE(VALID_NMSOCK(ievent->sock->parent));
+
+ sock = ievent->sock;
+ sa_family = sock->iface.type.sa.sa_family;
+ mgr = sock->mgr;
+
+ REQUIRE(sock->type == isc_nm_udpsocket);
+ REQUIRE(sock->parent != NULL);
+ REQUIRE(sock->tid == isc_nm_tid());
+
+ (void)isc__nm_socket_min_mtu(sock->fd, sa_family);
+
+#if HAVE_DECL_UV_UDP_RECVMMSG
+ uv_init_flags |= UV_UDP_RECVMMSG;
+#endif
+ r = uv_udp_init_ex(&worker->loop, &sock->uv_handle.udp, uv_init_flags);
+ UV_RUNTIME_CHECK(uv_udp_init_ex, r);
+ uv_handle_set_data(&sock->uv_handle.handle, sock);
+ /* This keeps the socket alive after everything else is gone */
+ isc__nmsocket_attach(sock, &(isc_nmsocket_t *){ NULL });
+
+ r = uv_timer_init(&worker->loop, &sock->read_timer);
+ UV_RUNTIME_CHECK(uv_timer_init, r);
+ uv_handle_set_data((uv_handle_t *)&sock->read_timer, sock);
+
+ LOCK(&sock->parent->lock);
+
+ r = uv_udp_open(&sock->uv_handle.udp, sock->fd);
+ if (r < 0) {
+ isc__nm_closesocket(sock->fd);
+ isc__nm_incstats(sock, STATID_OPENFAIL);
+ goto done;
+ }
+ isc__nm_incstats(sock, STATID_OPEN);
+
+ if (sa_family == AF_INET6) {
+ uv_bind_flags |= UV_UDP_IPV6ONLY;
+ }
+
+ if (mgr->load_balance_sockets) {
+ r = isc_uv_udp_freebind(&sock->uv_handle.udp,
+ &sock->parent->iface.type.sa,
+ uv_bind_flags);
+ if (r < 0) {
+ isc__nm_incstats(sock, STATID_BINDFAIL);
+ goto done;
+ }
+ } else {
+ if (sock->parent->fd == -1) {
+ /* This thread is first, bind the socket */
+ r = isc_uv_udp_freebind(&sock->uv_handle.udp,
+ &sock->parent->iface.type.sa,
+ uv_bind_flags);
+ if (r < 0) {
+ isc__nm_incstats(sock, STATID_BINDFAIL);
+ goto done;
+ }
+ sock->parent->uv_handle.udp.flags =
+ sock->uv_handle.udp.flags;
+ sock->parent->fd = sock->fd;
+ } else {
+ /* The socket is already bound, just copy the flags */
+ sock->uv_handle.udp.flags =
+ sock->parent->uv_handle.udp.flags;
+ }
+ }
+
+ isc__nm_set_network_buffers(sock->mgr, &sock->uv_handle.handle);
+
+ r = uv_udp_recv_start(&sock->uv_handle.udp, isc__nm_alloc_cb,
+ udp_recv_cb);
+ if (r != 0) {
+ isc__nm_incstats(sock, STATID_BINDFAIL);
+ goto done;
+ }
+
+ atomic_store(&sock->listening, true);
+
+done:
+ result = isc__nm_uverr2result(r);
+ atomic_fetch_add(&sock->parent->rchildren, 1);
+ if (sock->parent->result == ISC_R_UNSET) {
+ sock->parent->result = result;
+ }
+ SIGNAL(&sock->parent->cond);
+ UNLOCK(&sock->parent->lock);
+
+ isc_barrier_wait(&sock->parent->startlistening);
+}
+
+void
+isc__nm_udp_stoplistening(isc_nmsocket_t *sock) {
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->type == isc_nm_udplistener);
+
+ if (!atomic_compare_exchange_strong(&sock->closing, &(bool){ false },
+ true))
+ {
+ UNREACHABLE();
+ }
+
+ if (!isc__nm_in_netthread()) {
+ enqueue_stoplistening(sock);
+ } else {
+ stop_udp_parent(sock);
+ }
+}
+
+/*
+ * Asynchronous 'udpstop' call handler: stop listening on a UDP socket.
+ */
+void
+isc__nm_async_udpstop(isc__networker_t *worker, isc__netievent_t *ev0) {
+ isc__netievent_udpstop_t *ievent = (isc__netievent_udpstop_t *)ev0;
+ isc_nmsocket_t *sock = ievent->sock;
+
+ UNUSED(worker);
+
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->tid == isc_nm_tid());
+
+ if (sock->parent != NULL) {
+ stop_udp_child(sock);
+ return;
+ }
+
+ stop_udp_parent(sock);
+}
+
+/*
+ * udp_recv_cb handles incoming UDP packet from uv. The buffer here is
+ * reused for a series of packets, so we need to allocate a new one.
+ * This new one can be reused to send the response then.
+ */
+static void
+udp_recv_cb(uv_udp_t *handle, ssize_t nrecv, const uv_buf_t *buf,
+ const struct sockaddr *addr, unsigned flags) {
+ isc_nmsocket_t *sock = uv_handle_get_data((uv_handle_t *)handle);
+ isc__nm_uvreq_t *req = NULL;
+ uint32_t maxudp;
+ isc_result_t result;
+ isc_sockaddr_t sockaddr, *sa = NULL;
+
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->tid == isc_nm_tid());
+ REQUIRE(atomic_load(&sock->reading));
+
+ /*
+ * When using recvmmsg(2), if no errors occur, there will be a final
+ * callback with nrecv set to 0, addr set to NULL and the buffer
+ * pointing at the initially allocated data with the UV_UDP_MMSG_CHUNK
+ * flag cleared and the UV_UDP_MMSG_FREE flag set.
+ */
+#if HAVE_DECL_UV_UDP_MMSG_FREE
+ if ((flags & UV_UDP_MMSG_FREE) == UV_UDP_MMSG_FREE) {
+ INSIST(nrecv == 0);
+ INSIST(addr == NULL);
+ goto free;
+ }
+#else
+ UNUSED(flags);
+#endif
+
+ /*
+ * - If we're simulating a firewall blocking UDP packets
+ * bigger than 'maxudp' bytes for testing purposes.
+ */
+ maxudp = atomic_load(&sock->mgr->maxudp);
+ if ((maxudp != 0 && (uint32_t)nrecv > maxudp)) {
+ /*
+ * We need to keep the read_cb intact in case, so the
+ * readtimeout_cb can trigger and not crash because of
+ * missing read_req.
+ */
+ goto free;
+ }
+
+ /*
+ * - If there was a networking error.
+ */
+ if (nrecv < 0) {
+ isc__nm_failed_read_cb(sock, isc__nm_uverr2result(nrecv),
+ false);
+ goto free;
+ }
+
+ /*
+ * - If addr == NULL, in which case it's the end of stream;
+ * we can free the buffer and bail.
+ */
+ if (addr == NULL) {
+ isc__nm_failed_read_cb(sock, ISC_R_EOF, false);
+ goto free;
+ }
+
+ /*
+ * - If the socket is no longer active.
+ */
+ if (!isc__nmsocket_active(sock)) {
+ isc__nm_failed_read_cb(sock, ISC_R_CANCELED, false);
+ goto free;
+ }
+
+ if (!sock->route_sock) {
+ result = isc_sockaddr_fromsockaddr(&sockaddr, addr);
+ RUNTIME_CHECK(result == ISC_R_SUCCESS);
+ sa = &sockaddr;
+ }
+
+ req = isc__nm_get_read_req(sock, sa);
+
+ /*
+ * The callback will be called synchronously, because result is
+ * ISC_R_SUCCESS, so we are ok of passing the buf directly.
+ */
+ req->uvbuf.base = buf->base;
+ req->uvbuf.len = nrecv;
+
+ sock->recv_read = false;
+
+ REQUIRE(!sock->processing);
+ sock->processing = true;
+ isc__nm_readcb(sock, req, ISC_R_SUCCESS);
+ sock->processing = false;
+
+free:
+#if HAVE_DECL_UV_UDP_MMSG_CHUNK
+ /*
+ * When using recvmmsg(2), chunks will have the UV_UDP_MMSG_CHUNK flag
+ * set, those must not be freed.
+ */
+ if ((flags & UV_UDP_MMSG_CHUNK) == UV_UDP_MMSG_CHUNK) {
+ return;
+ }
+#endif
+
+ /*
+ * When using recvmmsg(2), if a UDP socket error occurs, nrecv will be <
+ * 0. In either scenario, the callee can now safely free the provided
+ * buffer.
+ */
+ if (nrecv < 0) {
+ /*
+ * The buffer may be a null buffer on error.
+ */
+ if (buf->base == NULL && buf->len == 0) {
+ return;
+ }
+ }
+
+ isc__nm_free_uvbuf(sock, buf);
+}
+
+/*
+ * Send the data in 'region' to a peer via a UDP socket. We try to find
+ * a proper sibling/child socket so that we won't have to jump to
+ * another thread.
+ */
+void
+isc__nm_udp_send(isc_nmhandle_t *handle, const isc_region_t *region,
+ isc_nm_cb_t cb, void *cbarg) {
+ isc_nmsocket_t *sock = handle->sock;
+ isc_nmsocket_t *rsock = NULL;
+ isc_sockaddr_t *peer = &handle->peer;
+ isc__nm_uvreq_t *uvreq = NULL;
+ uint32_t maxudp = atomic_load(&sock->mgr->maxudp);
+ int ntid;
+
+ INSIST(sock->type == isc_nm_udpsocket);
+
+ /*
+ * We're simulating a firewall blocking UDP packets bigger than
+ * 'maxudp' bytes, for testing purposes.
+ *
+ * The client would ordinarily have unreferenced the handle
+ * in the callback, but that won't happen in this case, so
+ * we need to do so here.
+ */
+ if (maxudp != 0 && region->length > maxudp) {
+ isc_nmhandle_detach(&handle);
+ return;
+ }
+
+ if (atomic_load(&sock->client)) {
+ /*
+ * When we are sending from the client socket, we directly use
+ * the socket provided.
+ */
+ rsock = sock;
+ goto send;
+ } else {
+ /*
+ * When we are sending from the server socket, we either use the
+ * socket associated with the network thread we are in, or we
+ * use the thread from the socket associated with the handle.
+ */
+ INSIST(sock->parent != NULL);
+
+ if (isc__nm_in_netthread()) {
+ ntid = isc_nm_tid();
+ } else {
+ ntid = sock->tid;
+ }
+ rsock = &sock->parent->children[ntid];
+ }
+
+send:
+ uvreq = isc__nm_uvreq_get(rsock->mgr, rsock);
+ uvreq->uvbuf.base = (char *)region->base;
+ uvreq->uvbuf.len = region->length;
+
+ isc_nmhandle_attach(handle, &uvreq->handle);
+
+ uvreq->cb.send = cb;
+ uvreq->cbarg = cbarg;
+
+ if (isc_nm_tid() == rsock->tid) {
+ REQUIRE(rsock->tid == isc_nm_tid());
+ isc__netievent_udpsend_t ievent = { .sock = rsock,
+ .req = uvreq,
+ .peer = *peer };
+
+ isc__nm_async_udpsend(NULL, (isc__netievent_t *)&ievent);
+ } else {
+ isc__netievent_udpsend_t *ievent =
+ isc__nm_get_netievent_udpsend(sock->mgr, rsock);
+ ievent->peer = *peer;
+ ievent->req = uvreq;
+
+ isc__nm_enqueue_ievent(&sock->mgr->workers[rsock->tid],
+ (isc__netievent_t *)ievent);
+ }
+}
+
+/*
+ * Asynchronous 'udpsend' event handler: send a packet on a UDP socket.
+ */
+void
+isc__nm_async_udpsend(isc__networker_t *worker, isc__netievent_t *ev0) {
+ isc_result_t result;
+ isc__netievent_udpsend_t *ievent = (isc__netievent_udpsend_t *)ev0;
+ isc_nmsocket_t *sock = ievent->sock;
+ isc__nm_uvreq_t *uvreq = ievent->req;
+
+ REQUIRE(sock->type == isc_nm_udpsocket);
+ REQUIRE(sock->tid == isc_nm_tid());
+ UNUSED(worker);
+
+ if (isc__nmsocket_closing(sock)) {
+ isc__nm_failed_send_cb(sock, uvreq, ISC_R_CANCELED);
+ return;
+ }
+
+ result = udp_send_direct(sock, uvreq, &ievent->peer);
+ if (result != ISC_R_SUCCESS) {
+ isc__nm_incstats(sock, STATID_SENDFAIL);
+ isc__nm_failed_send_cb(sock, uvreq, result);
+ }
+}
+
+static void
+udp_send_cb(uv_udp_send_t *req, int status) {
+ isc_result_t result = ISC_R_SUCCESS;
+ isc__nm_uvreq_t *uvreq = uv_handle_get_data((uv_handle_t *)req);
+ isc_nmsocket_t *sock = NULL;
+
+ REQUIRE(VALID_UVREQ(uvreq));
+ REQUIRE(VALID_NMHANDLE(uvreq->handle));
+
+ sock = uvreq->sock;
+
+ REQUIRE(sock->tid == isc_nm_tid());
+
+ if (status < 0) {
+ result = isc__nm_uverr2result(status);
+ isc__nm_incstats(sock, STATID_SENDFAIL);
+ }
+
+ isc__nm_sendcb(sock, uvreq, result, false);
+}
+
+/*
+ * udp_send_direct sends buf to a peer on a socket. Sock has to be in
+ * the same thread as the callee.
+ */
+static isc_result_t
+udp_send_direct(isc_nmsocket_t *sock, isc__nm_uvreq_t *req,
+ isc_sockaddr_t *peer) {
+ const struct sockaddr *sa = &peer->type.sa;
+ int r;
+
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(VALID_UVREQ(req));
+ REQUIRE(sock->tid == isc_nm_tid());
+ REQUIRE(sock->type == isc_nm_udpsocket);
+
+ if (isc__nmsocket_closing(sock)) {
+ return (ISC_R_CANCELED);
+ }
+
+#if UV_VERSION_HEX >= UV_VERSION(1, 27, 0)
+ /*
+ * If we used uv_udp_connect() (and not the shim version for
+ * older versions of libuv), then the peer address has to be
+ * set to NULL or else uv_udp_send() could fail or assert,
+ * depending on the libuv version.
+ */
+ if (atomic_load(&sock->connected)) {
+ sa = NULL;
+ }
+#endif
+
+ r = uv_udp_send(&req->uv_req.udp_send, &sock->uv_handle.udp,
+ &req->uvbuf, 1, sa, udp_send_cb);
+ if (r < 0) {
+ return (isc__nm_uverr2result(r));
+ }
+
+ return (ISC_R_SUCCESS);
+}
+
+static isc_result_t
+udp_connect_direct(isc_nmsocket_t *sock, isc__nm_uvreq_t *req) {
+ isc__networker_t *worker = NULL;
+ int uv_bind_flags = UV_UDP_REUSEADDR;
+ isc_result_t result = ISC_R_UNSET;
+ int r;
+
+ REQUIRE(isc__nm_in_netthread());
+ REQUIRE(sock->tid == isc_nm_tid());
+
+ worker = &sock->mgr->workers[isc_nm_tid()];
+
+ atomic_store(&sock->connecting, true);
+
+ r = uv_udp_init(&worker->loop, &sock->uv_handle.udp);
+ UV_RUNTIME_CHECK(uv_udp_init, r);
+ uv_handle_set_data(&sock->uv_handle.handle, sock);
+
+ r = uv_timer_init(&worker->loop, &sock->read_timer);
+ UV_RUNTIME_CHECK(uv_timer_init, r);
+ uv_handle_set_data((uv_handle_t *)&sock->read_timer, sock);
+
+ if (isc__nm_closing(sock)) {
+ result = ISC_R_SHUTTINGDOWN;
+ goto error;
+ }
+
+ r = uv_udp_open(&sock->uv_handle.udp, sock->fd);
+ if (r != 0) {
+ isc__nm_incstats(sock, STATID_OPENFAIL);
+ goto done;
+ }
+ isc__nm_incstats(sock, STATID_OPEN);
+
+ if (sock->iface.type.sa.sa_family == AF_INET6) {
+ uv_bind_flags |= UV_UDP_IPV6ONLY;
+ }
+
+ r = uv_udp_bind(&sock->uv_handle.udp, &sock->iface.type.sa,
+ uv_bind_flags);
+ if (r != 0) {
+ isc__nm_incstats(sock, STATID_BINDFAIL);
+ goto done;
+ }
+
+ isc__nm_set_network_buffers(sock->mgr, &sock->uv_handle.handle);
+
+ /*
+ * On FreeBSD the UDP connect() call sometimes results in a
+ * spurious transient EADDRINUSE. Try a few more times before
+ * giving up.
+ */
+ do {
+ r = isc_uv_udp_connect(&sock->uv_handle.udp,
+ &req->peer.type.sa);
+ } while (r == UV_EADDRINUSE && --req->connect_tries > 0);
+ if (r != 0) {
+ isc__nm_incstats(sock, STATID_CONNECTFAIL);
+ goto done;
+ }
+ isc__nm_incstats(sock, STATID_CONNECT);
+
+ atomic_store(&sock->connecting, false);
+ atomic_store(&sock->connected, true);
+
+done:
+ result = isc__nm_uverr2result(r);
+error:
+
+ LOCK(&sock->lock);
+ sock->result = result;
+ SIGNAL(&sock->cond);
+ if (!atomic_load(&sock->active)) {
+ WAIT(&sock->scond, &sock->lock);
+ }
+ INSIST(atomic_load(&sock->active));
+ UNLOCK(&sock->lock);
+
+ return (result);
+}
+
+/*
+ * Asynchronous 'udpconnect' call handler: open a new UDP socket and
+ * call the 'open' callback with a handle.
+ */
+void
+isc__nm_async_udpconnect(isc__networker_t *worker, isc__netievent_t *ev0) {
+ isc__netievent_udpconnect_t *ievent =
+ (isc__netievent_udpconnect_t *)ev0;
+ isc_nmsocket_t *sock = ievent->sock;
+ isc__nm_uvreq_t *req = ievent->req;
+ isc_result_t result;
+
+ UNUSED(worker);
+
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->type == isc_nm_udpsocket);
+ REQUIRE(sock->parent == NULL);
+ REQUIRE(sock->tid == isc_nm_tid());
+
+ result = udp_connect_direct(sock, req);
+ if (result != ISC_R_SUCCESS) {
+ atomic_store(&sock->active, false);
+ isc__nm_udp_close(sock);
+ isc__nm_connectcb(sock, req, result, true);
+ } else {
+ /*
+ * The callback has to be called after the socket has been
+ * initialized
+ */
+ isc__nm_connectcb(sock, req, ISC_R_SUCCESS, true);
+ }
+
+ /*
+ * The sock is now attached to the handle.
+ */
+ isc__nmsocket_detach(&sock);
+}
+
+void
+isc_nm_udpconnect(isc_nm_t *mgr, isc_sockaddr_t *local, isc_sockaddr_t *peer,
+ isc_nm_cb_t cb, void *cbarg, unsigned int timeout,
+ size_t extrahandlesize) {
+ isc_result_t result = ISC_R_SUCCESS;
+ isc_nmsocket_t *sock = NULL;
+ isc__netievent_udpconnect_t *event = NULL;
+ isc__nm_uvreq_t *req = NULL;
+ sa_family_t sa_family;
+
+ REQUIRE(VALID_NM(mgr));
+ REQUIRE(local != NULL);
+ REQUIRE(peer != NULL);
+
+ sa_family = peer->type.sa.sa_family;
+
+ sock = isc_mem_get(mgr->mctx, sizeof(isc_nmsocket_t));
+ isc__nmsocket_init(sock, mgr, isc_nm_udpsocket, local);
+
+ sock->connect_cb = cb;
+ sock->connect_cbarg = cbarg;
+ sock->read_timeout = timeout;
+ sock->extrahandlesize = extrahandlesize;
+ sock->peer = *peer;
+ sock->result = ISC_R_UNSET;
+ atomic_init(&sock->client, true);
+
+ req = isc__nm_uvreq_get(mgr, sock);
+ req->cb.connect = cb;
+ req->cbarg = cbarg;
+ req->peer = *peer;
+ req->local = *local;
+ req->handle = isc__nmhandle_get(sock, &req->peer, &sock->iface);
+
+ result = isc__nm_socket(sa_family, SOCK_DGRAM, 0, &sock->fd);
+ if (result != ISC_R_SUCCESS) {
+ if (isc__nm_in_netthread()) {
+ sock->tid = isc_nm_tid();
+ }
+ isc__nmsocket_clearcb(sock);
+ isc__nm_connectcb(sock, req, result, true);
+ atomic_store(&sock->closed, true);
+ isc__nmsocket_detach(&sock);
+ return;
+ }
+
+ result = isc__nm_socket_reuse(sock->fd);
+ RUNTIME_CHECK(result == ISC_R_SUCCESS ||
+ result == ISC_R_NOTIMPLEMENTED);
+
+ result = isc__nm_socket_reuse_lb(sock->fd);
+ RUNTIME_CHECK(result == ISC_R_SUCCESS ||
+ result == ISC_R_NOTIMPLEMENTED);
+
+ (void)isc__nm_socket_incoming_cpu(sock->fd);
+
+ (void)isc__nm_socket_disable_pmtud(sock->fd, sa_family);
+
+ (void)isc__nm_socket_min_mtu(sock->fd, sa_family);
+
+ event = isc__nm_get_netievent_udpconnect(mgr, sock, req);
+
+ if (isc__nm_in_netthread()) {
+ atomic_store(&sock->active, true);
+ sock->tid = isc_nm_tid();
+ isc__nm_async_udpconnect(&mgr->workers[sock->tid],
+ (isc__netievent_t *)event);
+ isc__nm_put_netievent_udpconnect(mgr, event);
+ } else {
+ atomic_init(&sock->active, false);
+ sock->tid = isc_random_uniform(mgr->nworkers);
+ isc__nm_enqueue_ievent(&mgr->workers[sock->tid],
+ (isc__netievent_t *)event);
+ }
+ LOCK(&sock->lock);
+ while (sock->result == ISC_R_UNSET) {
+ WAIT(&sock->cond, &sock->lock);
+ }
+ atomic_store(&sock->active, true);
+ BROADCAST(&sock->scond);
+ UNLOCK(&sock->lock);
+}
+
+void
+isc__nm_udp_read_cb(uv_udp_t *handle, ssize_t nrecv, const uv_buf_t *buf,
+ const struct sockaddr *addr, unsigned flags) {
+ isc_nmsocket_t *sock = uv_handle_get_data((uv_handle_t *)handle);
+ REQUIRE(VALID_NMSOCK(sock));
+
+ udp_recv_cb(handle, nrecv, buf, addr, flags);
+ /*
+ * If a caller calls isc_nm_read() on a listening socket, we can
+ * get here, but we MUST NOT stop reading from the listener
+ * socket. The only difference between listener and connected
+ * sockets is that the former has sock->parent set and later
+ * does not.
+ */
+ if (!sock->parent) {
+ isc__nmsocket_timer_stop(sock);
+ isc__nm_stop_reading(sock);
+ }
+}
+
+void
+isc__nm_udp_failed_read_cb(isc_nmsocket_t *sock, isc_result_t result) {
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(result != ISC_R_SUCCESS);
+
+ if (atomic_load(&sock->client)) {
+ isc__nmsocket_timer_stop(sock);
+ isc__nm_stop_reading(sock);
+
+ if (!sock->recv_read) {
+ goto destroy;
+ }
+ sock->recv_read = false;
+
+ if (sock->recv_cb != NULL) {
+ isc__nm_uvreq_t *req = isc__nm_get_read_req(sock, NULL);
+ isc__nmsocket_clearcb(sock);
+ isc__nm_readcb(sock, req, result);
+ }
+
+ destroy:
+ isc__nmsocket_prep_destroy(sock);
+ return;
+ }
+
+ /*
+ * For UDP server socket, we don't have child socket via
+ * "accept", so we:
+ * - we continue to read
+ * - we don't clear the callbacks
+ * - we don't destroy it (only stoplistening could do that)
+ */
+ if (!sock->recv_read) {
+ return;
+ }
+ sock->recv_read = false;
+
+ if (sock->recv_cb != NULL) {
+ isc__nm_uvreq_t *req = isc__nm_get_read_req(sock, NULL);
+ isc__nm_readcb(sock, req, result);
+ }
+}
+
+/*
+ * Asynchronous 'udpread' call handler: start or resume reading on a
+ * socket; pause reading and call the 'recv' callback after each
+ * datagram.
+ */
+void
+isc__nm_async_udpread(isc__networker_t *worker, isc__netievent_t *ev0) {
+ isc__netievent_udpread_t *ievent = (isc__netievent_udpread_t *)ev0;
+ isc_nmsocket_t *sock = ievent->sock;
+ isc_result_t result;
+
+ UNUSED(worker);
+
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->tid == isc_nm_tid());
+
+ if (isc__nm_closing(sock)) {
+ result = ISC_R_SHUTTINGDOWN;
+ } else if (isc__nmsocket_closing(sock)) {
+ result = ISC_R_CANCELED;
+ } else {
+ result = isc__nm_start_reading(sock);
+ }
+
+ if (result != ISC_R_SUCCESS) {
+ atomic_store(&sock->reading, true);
+ isc__nm_failed_read_cb(sock, result, false);
+ return;
+ }
+
+ isc__nmsocket_timer_start(sock);
+}
+
+void
+isc__nm_udp_read(isc_nmhandle_t *handle, isc_nm_recv_cb_t cb, void *cbarg) {
+ REQUIRE(VALID_NMHANDLE(handle));
+ REQUIRE(VALID_NMSOCK(handle->sock));
+
+ isc_nmsocket_t *sock = handle->sock;
+
+ REQUIRE(sock->type == isc_nm_udpsocket);
+ REQUIRE(sock->statichandle == handle);
+ REQUIRE(!sock->recv_read);
+
+ sock->recv_cb = cb;
+ sock->recv_cbarg = cbarg;
+ sock->recv_read = true;
+
+ if (!atomic_load(&sock->reading) && sock->tid == isc_nm_tid()) {
+ isc__netievent_udpread_t ievent = { .sock = sock };
+ isc__nm_async_udpread(NULL, (isc__netievent_t *)&ievent);
+ } else {
+ isc__netievent_udpread_t *ievent =
+ isc__nm_get_netievent_udpread(sock->mgr, sock);
+ isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
+ (isc__netievent_t *)ievent);
+ }
+}
+
+static void
+udp_stop_cb(uv_handle_t *handle) {
+ isc_nmsocket_t *sock = uv_handle_get_data(handle);
+ uv_handle_set_data(handle, NULL);
+
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->tid == isc_nm_tid());
+ REQUIRE(atomic_load(&sock->closing));
+
+ if (!atomic_compare_exchange_strong(&sock->closed, &(bool){ false },
+ true))
+ {
+ UNREACHABLE();
+ }
+
+ isc__nm_incstats(sock, STATID_CLOSE);
+
+ atomic_store(&sock->listening, false);
+
+ isc__nmsocket_detach(&sock);
+}
+
+static void
+udp_close_cb(uv_handle_t *handle) {
+ isc_nmsocket_t *sock = uv_handle_get_data(handle);
+ uv_handle_set_data(handle, NULL);
+
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->tid == isc_nm_tid());
+ REQUIRE(atomic_load(&sock->closing));
+
+ if (!atomic_compare_exchange_strong(&sock->closed, &(bool){ false },
+ true))
+ {
+ UNREACHABLE();
+ }
+
+ isc__nm_incstats(sock, STATID_CLOSE);
+
+ if (sock->server != NULL) {
+ isc__nmsocket_detach(&sock->server);
+ }
+
+ atomic_store(&sock->connected, false);
+ atomic_store(&sock->listening, false);
+
+ isc__nmsocket_prep_destroy(sock);
+}
+
+static void
+read_timer_close_cb(uv_handle_t *handle) {
+ isc_nmsocket_t *sock = uv_handle_get_data(handle);
+ uv_handle_set_data(handle, NULL);
+
+ if (sock->parent) {
+ uv_close(&sock->uv_handle.handle, udp_stop_cb);
+ } else {
+ uv_close(&sock->uv_handle.handle, udp_close_cb);
+ }
+}
+
+static void
+stop_udp_child(isc_nmsocket_t *sock) {
+ REQUIRE(sock->type == isc_nm_udpsocket);
+ REQUIRE(sock->tid == isc_nm_tid());
+
+ if (!atomic_compare_exchange_strong(&sock->closing, &(bool){ false },
+ true))
+ {
+ return;
+ }
+
+ udp_close_direct(sock);
+
+ atomic_fetch_sub(&sock->parent->rchildren, 1);
+
+ isc_barrier_wait(&sock->parent->stoplistening);
+}
+
+static void
+stop_udp_parent(isc_nmsocket_t *sock) {
+ isc_nmsocket_t *csock = NULL;
+
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->tid == isc_nm_tid());
+ REQUIRE(sock->type == isc_nm_udplistener);
+
+ isc_barrier_init(&sock->stoplistening, sock->nchildren);
+
+ for (size_t i = 0; i < sock->nchildren; i++) {
+ csock = &sock->children[i];
+ REQUIRE(VALID_NMSOCK(csock));
+
+ if ((int)i == isc_nm_tid()) {
+ /*
+ * We need to schedule closing the other sockets first
+ */
+ continue;
+ }
+
+ atomic_store(&csock->active, false);
+ enqueue_stoplistening(csock);
+ }
+
+ csock = &sock->children[isc_nm_tid()];
+ atomic_store(&csock->active, false);
+ stop_udp_child(csock);
+
+ atomic_store(&sock->closed, true);
+ isc__nmsocket_prep_destroy(sock);
+}
+
+static void
+udp_close_direct(isc_nmsocket_t *sock) {
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->tid == isc_nm_tid());
+
+ uv_handle_set_data((uv_handle_t *)&sock->read_timer, sock);
+ uv_close((uv_handle_t *)&sock->read_timer, read_timer_close_cb);
+}
+
+void
+isc__nm_async_udpclose(isc__networker_t *worker, isc__netievent_t *ev0) {
+ isc__netievent_udpclose_t *ievent = (isc__netievent_udpclose_t *)ev0;
+ isc_nmsocket_t *sock = ievent->sock;
+
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->tid == isc_nm_tid());
+ UNUSED(worker);
+
+ udp_close_direct(sock);
+}
+
+void
+isc__nm_udp_close(isc_nmsocket_t *sock) {
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->type == isc_nm_udpsocket);
+ REQUIRE(!isc__nmsocket_active(sock));
+
+ if (!atomic_compare_exchange_strong(&sock->closing, &(bool){ false },
+ true))
+ {
+ return;
+ }
+
+ if (sock->tid == isc_nm_tid()) {
+ udp_close_direct(sock);
+ } else {
+ isc__netievent_udpclose_t *ievent =
+ isc__nm_get_netievent_udpclose(sock->mgr, sock);
+ isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
+ (isc__netievent_t *)ievent);
+ }
+}
+
+void
+isc__nm_udp_shutdown(isc_nmsocket_t *sock) {
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->tid == isc_nm_tid());
+ REQUIRE(sock->type == isc_nm_udpsocket);
+
+ /*
+ * If the socket is active, mark it inactive and
+ * continue. If it isn't active, stop now.
+ */
+ if (!isc__nmsocket_deactivate(sock)) {
+ return;
+ }
+
+ /*
+ * If the socket is connecting, the cancel will happen in the
+ * async_udpconnect() due socket being inactive now.
+ */
+ if (atomic_load(&sock->connecting)) {
+ return;
+ }
+
+ /*
+ * When the client detaches the last handle, the
+ * sock->statichandle would be NULL, in that case, nobody is
+ * interested in the callback.
+ */
+ if (sock->statichandle != NULL) {
+ if (isc__nm_closing(sock)) {
+ isc__nm_failed_read_cb(sock, ISC_R_SHUTTINGDOWN, false);
+ } else {
+ isc__nm_failed_read_cb(sock, ISC_R_CANCELED, false);
+ }
+ return;
+ }
+
+ /*
+ * Otherwise, we just send the socket to abyss...
+ */
+ if (sock->parent == NULL) {
+ isc__nmsocket_prep_destroy(sock);
+ }
+}
+
+void
+isc__nm_udp_cancelread(isc_nmhandle_t *handle) {
+ isc_nmsocket_t *sock = NULL;
+ isc__netievent_udpcancel_t *ievent = NULL;
+
+ REQUIRE(VALID_NMHANDLE(handle));
+
+ sock = handle->sock;
+
+ REQUIRE(VALID_NMSOCK(sock));
+ REQUIRE(sock->type == isc_nm_udpsocket);
+
+ ievent = isc__nm_get_netievent_udpcancel(sock->mgr, sock, handle);
+
+ isc__nm_enqueue_ievent(&sock->mgr->workers[sock->tid],
+ (isc__netievent_t *)ievent);
+}
+
+void
+isc__nm_async_udpcancel(isc__networker_t *worker, isc__netievent_t *ev0) {
+ isc__netievent_udpcancel_t *ievent = (isc__netievent_udpcancel_t *)ev0;
+ isc_nmsocket_t *sock = NULL;
+
+ UNUSED(worker);
+
+ REQUIRE(VALID_NMSOCK(ievent->sock));
+
+ sock = ievent->sock;
+
+ REQUIRE(sock->tid == isc_nm_tid());
+ REQUIRE(atomic_load(&sock->client));
+
+ isc__nm_failed_read_cb(sock, ISC_R_EOF, false);
+}
diff --git a/lib/isc/netmgr/uv-compat.c b/lib/isc/netmgr/uv-compat.c
new file mode 100644
index 0000000..b7c0f7b
--- /dev/null
+++ b/lib/isc/netmgr/uv-compat.c
@@ -0,0 +1,140 @@
+/*
+ * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
+ *
+ * SPDX-License-Identifier: MPL-2.0
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, you can obtain one at https://mozilla.org/MPL/2.0/.
+ *
+ * See the COPYRIGHT file distributed with this work for additional
+ * information regarding copyright ownership.
+ */
+
+#include "uv-compat.h"
+#include <unistd.h>
+
+#include <isc/util.h>
+
+#include "netmgr-int.h"
+
+#if UV_VERSION_HEX < UV_VERSION(1, 27, 0)
+int
+isc_uv_udp_connect(uv_udp_t *handle, const struct sockaddr *addr) {
+ int err = 0;
+
+ do {
+ int addrlen = (addr->sa_family == AF_INET)
+ ? sizeof(struct sockaddr_in)
+ : sizeof(struct sockaddr_in6);
+ err = connect(handle->io_watcher.fd, addr, addrlen);
+ } while (err == -1 && errno == EINTR);
+
+ if (err) {
+#if UV_VERSION_HEX >= UV_VERSION(1, 10, 0)
+ return (uv_translate_sys_error(errno));
+#else
+ return (-errno);
+#endif /* UV_VERSION_HEX >= UV_VERSION(1, 10, 0) */
+ }
+
+ return (0);
+}
+#endif /* UV_VERSION_HEX < UV_VERSION(1, 27, 0) */
+
+#if UV_VERSION_HEX < UV_VERSION(1, 32, 0)
+int
+uv_tcp_close_reset(uv_tcp_t *handle, uv_close_cb close_cb) {
+ if (setsockopt(handle->io_watcher.fd, SOL_SOCKET, SO_LINGER,
+ &(struct linger){ 1, 0 }, sizeof(struct linger)) == -1)
+ {
+#if UV_VERSION_HEX >= UV_VERSION(1, 10, 0)
+ return (uv_translate_sys_error(errno));
+#else
+ return (-errno);
+#endif /* UV_VERSION_HEX >= UV_VERSION(1, 10, 0) */
+ }
+
+ uv_close((uv_handle_t *)handle, close_cb);
+ return (0);
+}
+#endif /* UV_VERSION_HEX < UV_VERSION(1, 32, 0) */
+
+int
+isc_uv_udp_freebind(uv_udp_t *handle, const struct sockaddr *addr,
+ unsigned int flags) {
+ int r;
+ uv_os_sock_t fd;
+
+ r = uv_fileno((const uv_handle_t *)handle, (uv_os_fd_t *)&fd);
+ if (r < 0) {
+ return (r);
+ }
+
+ r = uv_udp_bind(handle, addr, flags);
+ if (r == UV_EADDRNOTAVAIL &&
+ isc__nm_socket_freebind(fd, addr->sa_family) == ISC_R_SUCCESS)
+ {
+ /*
+ * Retry binding with IP_FREEBIND (or equivalent option) if the
+ * address is not available. This helps with IPv6 tentative
+ * addresses which are reported by the route socket, although
+ * named is not yet able to properly bind to them.
+ */
+ r = uv_udp_bind(handle, addr, flags);
+ }
+
+ return (r);
+}
+
+static int
+isc__uv_tcp_bind_now(uv_tcp_t *handle, const struct sockaddr *addr,
+ unsigned int flags) {
+ int r;
+ struct sockaddr_storage sname;
+ int snamelen = sizeof(sname);
+
+ r = uv_tcp_bind(handle, addr, flags);
+ if (r < 0) {
+ return (r);
+ }
+
+ /*
+ * uv_tcp_bind() uses a delayed error, initially returning
+ * success even if bind() fails. By calling uv_tcp_getsockname()
+ * here we can find out whether the bind() call was successful.
+ */
+ r = uv_tcp_getsockname(handle, (struct sockaddr *)&sname, &snamelen);
+ if (r < 0) {
+ return (r);
+ }
+
+ return (0);
+}
+
+int
+isc_uv_tcp_freebind(uv_tcp_t *handle, const struct sockaddr *addr,
+ unsigned int flags) {
+ int r;
+ uv_os_sock_t fd;
+
+ r = uv_fileno((const uv_handle_t *)handle, (uv_os_fd_t *)&fd);
+ if (r < 0) {
+ return (r);
+ }
+
+ r = isc__uv_tcp_bind_now(handle, addr, flags);
+ if (r == UV_EADDRNOTAVAIL &&
+ isc__nm_socket_freebind(fd, addr->sa_family) == ISC_R_SUCCESS)
+ {
+ /*
+ * Retry binding with IP_FREEBIND (or equivalent option) if the
+ * address is not available. This helps with IPv6 tentative
+ * addresses which are reported by the route socket, although
+ * named is not yet able to properly bind to them.
+ */
+ r = isc__uv_tcp_bind_now(handle, addr, flags);
+ }
+
+ return (r);
+}
diff --git a/lib/isc/netmgr/uv-compat.h b/lib/isc/netmgr/uv-compat.h
new file mode 100644
index 0000000..3a10387
--- /dev/null
+++ b/lib/isc/netmgr/uv-compat.h
@@ -0,0 +1,126 @@
+/*
+ * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
+ *
+ * SPDX-License-Identifier: MPL-2.0
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, you can obtain one at https://mozilla.org/MPL/2.0/.
+ *
+ * See the COPYRIGHT file distributed with this work for additional
+ * information regarding copyright ownership.
+ */
+
+#pragma once
+#include <uv.h>
+
+/*
+ * These functions were introduced in newer libuv, but we still
+ * want BIND9 compile on older ones so we emulate them.
+ * They're inline to avoid conflicts when running with a newer
+ * library version.
+ */
+
+#define UV_VERSION(major, minor, patch) ((major << 16) | (minor << 8) | (patch))
+
+/*
+ * Copied verbatim from libuv/src/version.c
+ */
+
+#define UV_STRINGIFY(v) UV_STRINGIFY_HELPER(v)
+#define UV_STRINGIFY_HELPER(v) #v
+
+#define UV_VERSION_STRING_BASE \
+ UV_STRINGIFY(UV_VERSION_MAJOR) \
+ "." UV_STRINGIFY(UV_VERSION_MINOR) "." UV_STRINGIFY(UV_VERSION_PATCH)
+
+#if UV_VERSION_IS_RELEASE
+#define UV_VERSION_STRING UV_VERSION_STRING_BASE
+#else
+#define UV_VERSION_STRING UV_VERSION_STRING_BASE "-" UV_VERSION_SUFFIX
+#endif
+
+#if !defined(UV__ERR)
+#define UV__ERR(x) (-(x))
+#endif
+
+#if UV_VERSION_HEX < UV_VERSION(1, 19, 0)
+static inline void *
+uv_handle_get_data(const uv_handle_t *handle) {
+ return (handle->data);
+}
+
+static inline void
+uv_handle_set_data(uv_handle_t *handle, void *data) {
+ handle->data = data;
+}
+
+static inline void *
+uv_req_get_data(const uv_req_t *req) {
+ return (req->data);
+}
+
+static inline void
+uv_req_set_data(uv_req_t *req, void *data) {
+ req->data = data;
+}
+#endif /* UV_VERSION_HEX < UV_VERSION(1, 19, 0) */
+
+#if UV_VERSION_HEX < UV_VERSION(1, 32, 0)
+int
+uv_tcp_close_reset(uv_tcp_t *handle, uv_close_cb close_cb);
+#endif
+
+#if UV_VERSION_HEX < UV_VERSION(1, 34, 0)
+#define uv_sleep(msec) usleep(msec * 1000)
+#endif /* UV_VERSION_HEX < UV_VERSION(1, 34, 0) */
+
+#if UV_VERSION_HEX < UV_VERSION(1, 27, 0)
+int
+isc_uv_udp_connect(uv_udp_t *handle, const struct sockaddr *addr);
+/*%<
+ * Associate the UDP handle to a remote address and port, so every message sent
+ * by this handle is automatically sent to that destination.
+ *
+ * NOTE: This is just a limited shim for uv_udp_connect() as it requires the
+ * handle to be bound.
+ */
+#else /* UV_VERSION_HEX < UV_VERSION(1, 27, 0) */
+#define isc_uv_udp_connect uv_udp_connect
+#endif /* UV_VERSION_HEX < UV_VERSION(1, 27, 0) */
+
+#if UV_VERSION_HEX < UV_VERSION(1, 12, 0)
+#include <stdlib.h>
+#include <string.h>
+
+static inline int
+uv_os_getenv(const char *name, char *buffer, size_t *size) {
+ size_t len;
+ char *buf = getenv(name);
+
+ if (buf == NULL) {
+ return (UV_ENOENT);
+ }
+
+ len = strlen(buf) + 1;
+ if (len > *size) {
+ *size = len;
+ return (UV_ENOBUFS);
+ }
+
+ *size = len;
+ memmove(buffer, buf, len);
+
+ return (0);
+}
+
+#define uv_os_setenv(name, value) setenv(name, value, 0)
+#endif /* UV_VERSION_HEX < UV_VERSION(1, 12, 0) */
+
+int
+isc_uv_udp_freebind(uv_udp_t *handle, const struct sockaddr *addr,
+ unsigned int flags);
+
+int
+isc_uv_tcp_freebind(uv_tcp_t *handle, const struct sockaddr *addr,
+ unsigned int flags);
diff --git a/lib/isc/netmgr/uverr2result.c b/lib/isc/netmgr/uverr2result.c
new file mode 100644
index 0000000..9f16ea8
--- /dev/null
+++ b/lib/isc/netmgr/uverr2result.c
@@ -0,0 +1,105 @@
+/*
+ * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
+ *
+ * SPDX-License-Identifier: MPL-2.0
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, you can obtain one at https://mozilla.org/MPL/2.0/.
+ *
+ * See the COPYRIGHT file distributed with this work for additional
+ * information regarding copyright ownership.
+ */
+
+#include <stdbool.h>
+#include <uv.h>
+
+#include <isc/result.h>
+#include <isc/string.h>
+#include <isc/util.h>
+
+#include "netmgr-int.h"
+
+/*%
+ * Convert a libuv error value into an isc_result_t. The
+ * list of supported error values is not complete; new users
+ * of this function should add any expected errors that are
+ * not already there.
+ */
+isc_result_t
+isc___nm_uverr2result(int uverr, bool dolog, const char *file,
+ unsigned int line, const char *func) {
+ switch (uverr) {
+ case 0:
+ return (ISC_R_SUCCESS);
+ case UV_ENOTDIR:
+ case UV_ELOOP:
+ case UV_EINVAL: /* XXX sometimes this is not for files */
+ case UV_ENAMETOOLONG:
+ case UV_EBADF:
+ return (ISC_R_INVALIDFILE);
+ case UV_ENOENT:
+ return (ISC_R_FILENOTFOUND);
+ case UV_EAGAIN:
+ return (ISC_R_NOCONN);
+ case UV_EACCES:
+ case UV_EPERM:
+ return (ISC_R_NOPERM);
+ case UV_EEXIST:
+ return (ISC_R_FILEEXISTS);
+ case UV_EIO:
+ return (ISC_R_IOERROR);
+ case UV_ENOMEM:
+ return (ISC_R_NOMEMORY);
+ case UV_ENFILE:
+ case UV_EMFILE:
+ return (ISC_R_TOOMANYOPENFILES);
+ case UV_ENOSPC:
+ return (ISC_R_DISCFULL);
+ case UV_EPIPE:
+ case UV_ECONNRESET:
+ case UV_ECONNABORTED:
+ return (ISC_R_CONNECTIONRESET);
+ case UV_ENOTCONN:
+ return (ISC_R_NOTCONNECTED);
+ case UV_ETIMEDOUT:
+ return (ISC_R_TIMEDOUT);
+ case UV_ENOBUFS:
+ return (ISC_R_NORESOURCES);
+ case UV_EAFNOSUPPORT:
+ return (ISC_R_FAMILYNOSUPPORT);
+ case UV_ENETDOWN:
+ return (ISC_R_NETDOWN);
+ case UV_EHOSTDOWN:
+ return (ISC_R_HOSTDOWN);
+ case UV_ENETUNREACH:
+ return (ISC_R_NETUNREACH);
+ case UV_EHOSTUNREACH:
+ return (ISC_R_HOSTUNREACH);
+ case UV_EADDRINUSE:
+ return (ISC_R_ADDRINUSE);
+ case UV_EADDRNOTAVAIL:
+ return (ISC_R_ADDRNOTAVAIL);
+ case UV_ECONNREFUSED:
+ return (ISC_R_CONNREFUSED);
+ case UV_ECANCELED:
+ return (ISC_R_CANCELED);
+ case UV_EOF:
+ return (ISC_R_EOF);
+ case UV_EMSGSIZE:
+ return (ISC_R_MAXSIZE);
+ case UV_ENOTSUP:
+ return (ISC_R_FAMILYNOSUPPORT);
+ case UV_ENOPROTOOPT:
+ case UV_EPROTONOSUPPORT:
+ return (ISC_R_INVALIDPROTO);
+ default:
+ if (dolog) {
+ UNEXPECTED_ERROR("unable to convert libuv error code "
+ "in %s (%s:%d) to isc_result: %d: %s",
+ func, file, line, uverr,
+ uv_strerror(uverr));
+ }
+ return (ISC_R_UNEXPECTED);
+ }
+}