diff options
Diffstat (limited to 'lib/dns/dispatch.c')
-rw-r--r-- | lib/dns/dispatch.c | 3591 |
1 files changed, 3591 insertions, 0 deletions
diff --git a/lib/dns/dispatch.c b/lib/dns/dispatch.c new file mode 100644 index 0000000..5f27d63 --- /dev/null +++ b/lib/dns/dispatch.c @@ -0,0 +1,3591 @@ +/* + * Copyright (C) Internet Systems Consortium, Inc. ("ISC") + * + * SPDX-License-Identifier: MPL-2.0 + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, you can obtain one at https://mozilla.org/MPL/2.0/. + * + * See the COPYRIGHT file distributed with this work for additional + * information regarding copyright ownership. + */ + +/*! \file */ + +#include <inttypes.h> +#include <stdbool.h> +#include <stdlib.h> +#include <sys/types.h> +#include <unistd.h> + +#include <isc/mem.h> +#include <isc/mutex.h> +#include <isc/portset.h> +#include <isc/print.h> +#include <isc/random.h> +#include <isc/socket.h> +#include <isc/stats.h> +#include <isc/string.h> +#include <isc/task.h> +#include <isc/time.h> +#include <isc/util.h> + +#include <dns/acl.h> +#include <dns/dispatch.h> +#include <dns/events.h> +#include <dns/log.h> +#include <dns/message.h> +#include <dns/portlist.h> +#include <dns/stats.h> +#include <dns/tcpmsg.h> +#include <dns/types.h> + +typedef ISC_LIST(dns_dispentry_t) dns_displist_t; + +typedef struct dispsocket dispsocket_t; +typedef ISC_LIST(dispsocket_t) dispsocketlist_t; + +typedef struct dispportentry dispportentry_t; +typedef ISC_LIST(dispportentry_t) dispportlist_t; + +typedef struct dns_qid { + unsigned int magic; + unsigned int qid_nbuckets; /*%< hash table size */ + unsigned int qid_increment; /*%< id increment on collision */ + isc_mutex_t lock; + dns_displist_t *qid_table; /*%< the table itself */ + dispsocketlist_t *sock_table; /*%< socket table */ +} dns_qid_t; + +struct dns_dispatchmgr { + /* Unlocked. */ + unsigned int magic; + isc_mem_t *mctx; + dns_acl_t *blackhole; + dns_portlist_t *portlist; + isc_stats_t *stats; + + /* Locked by "lock". */ + isc_mutex_t lock; + unsigned int state; + ISC_LIST(dns_dispatch_t) list; + + /* locked by buffer_lock */ + dns_qid_t *qid; + isc_mutex_t buffer_lock; + unsigned int buffers; /*%< allocated buffers */ + unsigned int buffersize; /*%< size of each buffer */ + unsigned int maxbuffers; /*%< max buffers */ + + isc_refcount_t irefs; + + /*% + * Locked by qid->lock if qid exists; otherwise, can be used without + * being locked. + * Memory footprint considerations: this is a simple implementation of + * available ports, i.e., an ordered array of the actual port numbers. + * This will require about 256KB of memory in the worst case (128KB for + * each of IPv4 and IPv6). We could reduce it by representing it as a + * more sophisticated way such as a list (or array) of ranges that are + * searched to identify a specific port. Our decision here is the saved + * memory isn't worth the implementation complexity, considering the + * fact that the whole BIND9 process (which is mainly named) already + * requires a pretty large memory footprint. We may, however, have to + * revisit the decision when we want to use it as a separate module for + * an environment where memory requirement is severer. + */ + in_port_t *v4ports; /*%< available ports for IPv4 */ + unsigned int nv4ports; /*%< # of available ports for IPv4 */ + in_port_t *v6ports; /*%< available ports for IPv4 */ + unsigned int nv6ports; /*%< # of available ports for IPv4 */ +}; + +#define MGR_SHUTTINGDOWN 0x00000001U +#define MGR_IS_SHUTTINGDOWN(l) (((l)->state & MGR_SHUTTINGDOWN) != 0) + +#define IS_PRIVATE(d) (((d)->attributes & DNS_DISPATCHATTR_PRIVATE) != 0) + +struct dns_dispentry { + unsigned int magic; + dns_dispatch_t *disp; + dns_messageid_t id; + in_port_t port; + unsigned int bucket; + isc_sockaddr_t host; + isc_task_t *task; + isc_taskaction_t action; + void *arg; + bool item_out; + dispsocket_t *dispsocket; + ISC_LIST(dns_dispatchevent_t) items; + ISC_LINK(dns_dispentry_t) link; +}; + +/*% + * Maximum number of dispatch sockets that can be pooled for reuse. The + * appropriate value may vary, but experiments have shown a busy caching server + * may need more than 1000 sockets concurrently opened. The maximum allowable + * number of dispatch sockets (per manager) will be set to the double of this + * value. + */ +#ifndef DNS_DISPATCH_POOLSOCKS +#define DNS_DISPATCH_POOLSOCKS 2048 +#endif /* ifndef DNS_DISPATCH_POOLSOCKS */ + +/*% + * Quota to control the number of dispatch sockets. If a dispatch has more + * than the quota of sockets, new queries will purge oldest ones, so that + * a massive number of outstanding queries won't prevent subsequent queries + * (especially if the older ones take longer time and result in timeout). + */ +#ifndef DNS_DISPATCH_SOCKSQUOTA +#define DNS_DISPATCH_SOCKSQUOTA 3072 +#endif /* ifndef DNS_DISPATCH_SOCKSQUOTA */ + +struct dispsocket { + unsigned int magic; + isc_socket_t *socket; + dns_dispatch_t *disp; + isc_sockaddr_t host; + in_port_t localport; /* XXX: should be removed later */ + dispportentry_t *portentry; + dns_dispentry_t *resp; + isc_task_t *task; + ISC_LINK(dispsocket_t) link; + unsigned int bucket; + ISC_LINK(dispsocket_t) blink; +}; + +/*% + * A port table entry. We remember every port we first open in a table with a + * reference counter so that we can 'reuse' the same port (with different + * destination addresses) using the SO_REUSEADDR socket option. + */ +struct dispportentry { + in_port_t port; + isc_refcount_t refs; + ISC_LINK(struct dispportentry) link; +}; + +#ifndef DNS_DISPATCH_PORTTABLESIZE +#define DNS_DISPATCH_PORTTABLESIZE 1024 +#endif /* ifndef DNS_DISPATCH_PORTTABLESIZE */ + +#define INVALID_BUCKET (0xffffdead) + +/*% + * Number of tasks for each dispatch that use separate sockets for different + * transactions. This must be a power of 2 as it will divide 32 bit numbers + * to get an uniformly random tasks selection. See get_dispsocket(). + */ +#define MAX_INTERNAL_TASKS 64 + +struct dns_dispatch { + /* Unlocked. */ + unsigned int magic; /*%< magic */ + dns_dispatchmgr_t *mgr; /*%< dispatch manager */ + int ntasks; + /*% + * internal task buckets. We use multiple tasks to distribute various + * socket events well when using separate dispatch sockets. We use the + * 1st task (task[0]) for internal control events. + */ + isc_task_t *task[MAX_INTERNAL_TASKS]; + isc_socket_t *socket; /*%< isc socket attached to */ + isc_sockaddr_t local; /*%< local address */ + in_port_t localport; /*%< local UDP port */ + isc_sockaddr_t peer; /*%< peer address (TCP) */ + isc_dscp_t dscp; /*%< "listen-on" DSCP value */ + unsigned int maxrequests; /*%< max requests */ + isc_event_t *ctlevent; + + isc_mem_t *sepool; /*%< pool for socket events */ + + /*% Locked by mgr->lock. */ + ISC_LINK(dns_dispatch_t) link; + + /* Locked by "lock". */ + isc_mutex_t lock; /*%< locks all below */ + isc_sockettype_t socktype; + unsigned int attributes; + unsigned int refcount; /*%< number of users */ + dns_dispatchevent_t *failsafe_ev; /*%< failsafe cancel event */ + unsigned int shutting_down : 1, shutdown_out : 1, connected : 1, + tcpmsg_valid : 1, recv_pending : 1; /*%< is a + * recv() + * pending? + * */ + isc_result_t shutdown_why; + ISC_LIST(dispsocket_t) activesockets; + ISC_LIST(dispsocket_t) inactivesockets; + unsigned int nsockets; + unsigned int requests; /*%< how many requests we have */ + unsigned int tcpbuffers; /*%< allocated buffers */ + dns_tcpmsg_t tcpmsg; /*%< for tcp streams */ + dns_qid_t *qid; + dispportlist_t *port_table; /*%< hold ports 'owned' by us */ +}; + +#define QID_MAGIC ISC_MAGIC('Q', 'i', 'd', ' ') +#define VALID_QID(e) ISC_MAGIC_VALID((e), QID_MAGIC) + +#define RESPONSE_MAGIC ISC_MAGIC('D', 'r', 's', 'p') +#define VALID_RESPONSE(e) ISC_MAGIC_VALID((e), RESPONSE_MAGIC) + +#define DISPSOCK_MAGIC ISC_MAGIC('D', 's', 'o', 'c') +#define VALID_DISPSOCK(e) ISC_MAGIC_VALID((e), DISPSOCK_MAGIC) + +#define DISPATCH_MAGIC ISC_MAGIC('D', 'i', 's', 'p') +#define VALID_DISPATCH(e) ISC_MAGIC_VALID((e), DISPATCH_MAGIC) + +#define DNS_DISPATCHMGR_MAGIC ISC_MAGIC('D', 'M', 'g', 'r') +#define VALID_DISPATCHMGR(e) ISC_MAGIC_VALID((e), DNS_DISPATCHMGR_MAGIC) + +#define DNS_QID(disp) \ + ((disp)->socktype == isc_sockettype_tcp) ? (disp)->qid \ + : (disp)->mgr->qid + +/*% + * Locking a query port buffer is a bit tricky. We access the buffer without + * locking until qid is created. Technically, there is a possibility of race + * between the creation of qid and access to the port buffer; in practice, + * however, this should be safe because qid isn't created until the first + * dispatch is created and there should be no contending situation until then. + */ +#define PORTBUFLOCK(mgr) \ + if ((mgr)->qid != NULL) \ + LOCK(&((mgr)->qid->lock)) +#define PORTBUFUNLOCK(mgr) \ + if ((mgr)->qid != NULL) \ + UNLOCK((&(mgr)->qid->lock)) + +/* + * Statics. + */ +static dns_dispentry_t * +entry_search(dns_qid_t *, const isc_sockaddr_t *, dns_messageid_t, in_port_t, + unsigned int); +static bool +destroy_disp_ok(dns_dispatch_t *); +static void +destroy_disp(isc_task_t *task, isc_event_t *event); +static void +destroy_dispsocket(dns_dispatch_t *, dispsocket_t **); +static void +deactivate_dispsocket(dns_dispatch_t *, dispsocket_t *); +static void +udp_exrecv(isc_task_t *, isc_event_t *); +static void +udp_shrecv(isc_task_t *, isc_event_t *); +static void +udp_recv(isc_event_t *, dns_dispatch_t *, dispsocket_t *); +static void +tcp_recv(isc_task_t *, isc_event_t *); +static isc_result_t +startrecv(dns_dispatch_t *, dispsocket_t *); +static uint32_t +dns_hash(dns_qid_t *, const isc_sockaddr_t *, dns_messageid_t, in_port_t); +static void +free_buffer(dns_dispatch_t *disp, void *buf, unsigned int len); +static void * +allocate_udp_buffer(dns_dispatch_t *disp); +static void +free_devent(dns_dispatch_t *disp, dns_dispatchevent_t *ev); +static dns_dispatchevent_t * +allocate_devent(dns_dispatch_t *disp); +static void +do_cancel(dns_dispatch_t *disp); +static dns_dispentry_t * +linear_first(dns_qid_t *disp); +static dns_dispentry_t * +linear_next(dns_qid_t *disp, dns_dispentry_t *resp); +static void +dispatch_free(dns_dispatch_t **dispp); +static isc_result_t +get_udpsocket(dns_dispatchmgr_t *mgr, dns_dispatch_t *disp, + isc_socketmgr_t *sockmgr, const isc_sockaddr_t *localaddr, + isc_socket_t **sockp, isc_socket_t *dup_socket, bool duponly); +static isc_result_t +dispatch_createudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr, + isc_taskmgr_t *taskmgr, const isc_sockaddr_t *localaddr, + unsigned int maxrequests, unsigned int attributes, + dns_dispatch_t **dispp, isc_socket_t *dup_socket); +static bool +destroy_mgr_ok(dns_dispatchmgr_t *mgr); +static void +destroy_mgr(dns_dispatchmgr_t **mgrp); +static isc_result_t +qid_allocate(dns_dispatchmgr_t *mgr, unsigned int buckets, + unsigned int increment, dns_qid_t **qidp, bool needaddrtable); +static void +qid_destroy(isc_mem_t *mctx, dns_qid_t **qidp); +static isc_result_t +open_socket(isc_socketmgr_t *mgr, const isc_sockaddr_t *local, + unsigned int options, isc_socket_t **sockp, + isc_socket_t *dup_socket, bool duponly); +static bool +portavailable(dns_dispatchmgr_t *mgr, isc_socket_t *sock, + isc_sockaddr_t *sockaddrp); + +#define LVL(x) ISC_LOG_DEBUG(x) + +static void +mgr_log(dns_dispatchmgr_t *mgr, int level, const char *fmt, ...) + ISC_FORMAT_PRINTF(3, 4); + +static void +mgr_log(dns_dispatchmgr_t *mgr, int level, const char *fmt, ...) { + char msgbuf[2048]; + va_list ap; + + if (!isc_log_wouldlog(dns_lctx, level)) { + return; + } + + va_start(ap, fmt); + vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap); + va_end(ap); + + isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH, + DNS_LOGMODULE_DISPATCH, level, "dispatchmgr %p: %s", mgr, + msgbuf); +} + +static void +inc_stats(dns_dispatchmgr_t *mgr, isc_statscounter_t counter) { + if (mgr->stats != NULL) { + isc_stats_increment(mgr->stats, counter); + } +} + +static void +dec_stats(dns_dispatchmgr_t *mgr, isc_statscounter_t counter) { + if (mgr->stats != NULL) { + isc_stats_decrement(mgr->stats, counter); + } +} + +static void +dispatch_log(dns_dispatch_t *disp, int level, const char *fmt, ...) + ISC_FORMAT_PRINTF(3, 4); + +static void +dispatch_log(dns_dispatch_t *disp, int level, const char *fmt, ...) { + char msgbuf[2048]; + va_list ap; + + if (!isc_log_wouldlog(dns_lctx, level)) { + return; + } + + va_start(ap, fmt); + vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap); + va_end(ap); + + isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH, + DNS_LOGMODULE_DISPATCH, level, "dispatch %p: %s", disp, + msgbuf); +} + +static void +request_log(dns_dispatch_t *disp, dns_dispentry_t *resp, int level, + const char *fmt, ...) ISC_FORMAT_PRINTF(4, 5); + +static void +request_log(dns_dispatch_t *disp, dns_dispentry_t *resp, int level, + const char *fmt, ...) { + char msgbuf[2048]; + char peerbuf[256]; + va_list ap; + + if (!isc_log_wouldlog(dns_lctx, level)) { + return; + } + + va_start(ap, fmt); + vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap); + va_end(ap); + + if (VALID_RESPONSE(resp)) { + isc_sockaddr_format(&resp->host, peerbuf, sizeof(peerbuf)); + isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH, + DNS_LOGMODULE_DISPATCH, level, + "dispatch %p response %p %s: %s", disp, resp, + peerbuf, msgbuf); + } else { + isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH, + DNS_LOGMODULE_DISPATCH, level, + "dispatch %p req/resp %p: %s", disp, resp, + msgbuf); + } +} + +/* + * Return a hash of the destination and message id. + */ +static uint32_t +dns_hash(dns_qid_t *qid, const isc_sockaddr_t *dest, dns_messageid_t id, + in_port_t port) { + uint32_t ret; + + ret = isc_sockaddr_hash(dest, true); + ret ^= ((uint32_t)id << 16) | port; + ret %= qid->qid_nbuckets; + + INSIST(ret < qid->qid_nbuckets); + + return (ret); +} + +/* + * Find the first entry in 'qid'. Returns NULL if there are no entries. + */ +static dns_dispentry_t * +linear_first(dns_qid_t *qid) { + dns_dispentry_t *ret; + unsigned int bucket; + + bucket = 0; + + while (bucket < qid->qid_nbuckets) { + ret = ISC_LIST_HEAD(qid->qid_table[bucket]); + if (ret != NULL) { + return (ret); + } + bucket++; + } + + return (NULL); +} + +/* + * Find the next entry after 'resp' in 'qid'. Return NULL if there are + * no more entries. + */ +static dns_dispentry_t * +linear_next(dns_qid_t *qid, dns_dispentry_t *resp) { + dns_dispentry_t *ret; + unsigned int bucket; + + ret = ISC_LIST_NEXT(resp, link); + if (ret != NULL) { + return (ret); + } + + bucket = resp->bucket; + bucket++; + while (bucket < qid->qid_nbuckets) { + ret = ISC_LIST_HEAD(qid->qid_table[bucket]); + if (ret != NULL) { + return (ret); + } + bucket++; + } + + return (NULL); +} + +/* + * The dispatch must be locked. + */ +static bool +destroy_disp_ok(dns_dispatch_t *disp) { + if (disp->refcount != 0) { + return (false); + } + + if (disp->recv_pending != 0) { + return (false); + } + + if (!ISC_LIST_EMPTY(disp->activesockets)) { + return (false); + } + + if (disp->shutting_down == 0) { + return (false); + } + + return (true); +} + +/* + * Called when refcount reaches 0 (and safe to destroy). + * + * The dispatcher must be locked. + * The manager must not be locked. + */ +static void +destroy_disp(isc_task_t *task, isc_event_t *event) { + dns_dispatch_t *disp; + dns_dispatchmgr_t *mgr; + bool killmgr; + dispsocket_t *dispsocket; + int i; + + INSIST(event->ev_type == DNS_EVENT_DISPATCHCONTROL); + + UNUSED(task); + + disp = event->ev_arg; + mgr = disp->mgr; + + LOCK(&mgr->lock); + ISC_LIST_UNLINK(mgr->list, disp, link); + + dispatch_log(disp, LVL(90), + "shutting down; detaching from sock %p, task %p", + disp->socket, disp->task[0]); /* XXXX */ + + if (disp->sepool != NULL) { + isc_mem_destroy(&disp->sepool); + } + + if (disp->socket != NULL) { + isc_socket_detach(&disp->socket); + } + while ((dispsocket = ISC_LIST_HEAD(disp->inactivesockets)) != NULL) { + ISC_LIST_UNLINK(disp->inactivesockets, dispsocket, link); + destroy_dispsocket(disp, &dispsocket); + } + for (i = 0; i < disp->ntasks; i++) { + isc_task_detach(&disp->task[i]); + } + isc_event_free(&event); + + dispatch_free(&disp); + + killmgr = destroy_mgr_ok(mgr); + UNLOCK(&mgr->lock); + if (killmgr) { + destroy_mgr(&mgr); + } +} + +/*% + * Manipulate port table per dispatch: find an entry for a given port number, + * create a new entry, and decrement a given entry with possible clean-up. + */ +static dispportentry_t * +port_search(dns_dispatch_t *disp, in_port_t port) { + dispportentry_t *portentry; + + REQUIRE(disp->port_table != NULL); + + portentry = ISC_LIST_HEAD( + disp->port_table[port % DNS_DISPATCH_PORTTABLESIZE]); + while (portentry != NULL) { + if (portentry->port == port) { + return (portentry); + } + portentry = ISC_LIST_NEXT(portentry, link); + } + + return (NULL); +} + +static dispportentry_t * +new_portentry(dns_dispatch_t *disp, in_port_t port) { + dispportentry_t *portentry; + dns_qid_t *qid; + + REQUIRE(disp->port_table != NULL); + + portentry = isc_mem_get(disp->mgr->mctx, sizeof(*portentry)); + + portentry->port = port; + isc_refcount_init(&portentry->refs, 1); + ISC_LINK_INIT(portentry, link); + qid = DNS_QID(disp); + LOCK(&qid->lock); + ISC_LIST_APPEND(disp->port_table[port % DNS_DISPATCH_PORTTABLESIZE], + portentry, link); + UNLOCK(&qid->lock); + + return (portentry); +} + +/*% + * The caller must hold the qid->lock. + */ +static void +deref_portentry(dns_dispatch_t *disp, dispportentry_t **portentryp) { + dispportentry_t *portentry = *portentryp; + *portentryp = NULL; + + REQUIRE(disp->port_table != NULL); + REQUIRE(portentry != NULL); + + if (isc_refcount_decrement(&portentry->refs) == 1) { + ISC_LIST_UNLINK(disp->port_table[portentry->port % + DNS_DISPATCH_PORTTABLESIZE], + portentry, link); + isc_mem_put(disp->mgr->mctx, portentry, sizeof(*portentry)); + } +} + +/*% + * Find a dispsocket for socket address 'dest', and port number 'port'. + * Return NULL if no such entry exists. Requires qid->lock to be held. + */ +static dispsocket_t * +socket_search(dns_qid_t *qid, const isc_sockaddr_t *dest, in_port_t port, + unsigned int bucket) { + dispsocket_t *dispsock; + + REQUIRE(VALID_QID(qid)); + REQUIRE(bucket < qid->qid_nbuckets); + + dispsock = ISC_LIST_HEAD(qid->sock_table[bucket]); + + while (dispsock != NULL) { + if (dispsock->portentry != NULL && + dispsock->portentry->port == port && + isc_sockaddr_equal(dest, &dispsock->host)) + { + return (dispsock); + } + dispsock = ISC_LIST_NEXT(dispsock, blink); + } + + return (NULL); +} + +/*% + * Make a new socket for a single dispatch with a random port number. + * The caller must hold the disp->lock + */ +static isc_result_t +get_dispsocket(dns_dispatch_t *disp, const isc_sockaddr_t *dest, + isc_socketmgr_t *sockmgr, dispsocket_t **dispsockp, + in_port_t *portp) { + int i; + dns_dispatchmgr_t *mgr = disp->mgr; + isc_socket_t *sock = NULL; + isc_result_t result = ISC_R_FAILURE; + in_port_t port; + isc_sockaddr_t localaddr; + unsigned int bucket = 0; + dispsocket_t *dispsock; + unsigned int nports; + in_port_t *ports; + isc_socket_options_t bindoptions; + dispportentry_t *portentry = NULL; + dns_qid_t *qid; + + if (isc_sockaddr_pf(&disp->local) == AF_INET) { + nports = disp->mgr->nv4ports; + ports = disp->mgr->v4ports; + } else { + nports = disp->mgr->nv6ports; + ports = disp->mgr->v6ports; + } + if (nports == 0) { + return (ISC_R_ADDRNOTAVAIL); + } + + dispsock = ISC_LIST_HEAD(disp->inactivesockets); + if (dispsock != NULL) { + ISC_LIST_UNLINK(disp->inactivesockets, dispsock, link); + sock = dispsock->socket; + dispsock->socket = NULL; + } else { + dispsock = isc_mem_get(mgr->mctx, sizeof(*dispsock)); + + disp->nsockets++; + dispsock->socket = NULL; + dispsock->disp = disp; + dispsock->resp = NULL; + dispsock->portentry = NULL; + dispsock->task = NULL; + isc_task_attach(disp->task[isc_random_uniform(disp->ntasks)], + &dispsock->task); + ISC_LINK_INIT(dispsock, link); + ISC_LINK_INIT(dispsock, blink); + dispsock->magic = DISPSOCK_MAGIC; + } + + /* + * Pick up a random UDP port and open a new socket with it. Avoid + * choosing ports that share the same destination because it will be + * very likely to fail in bind(2) or connect(2). + */ + localaddr = disp->local; + qid = DNS_QID(disp); + + for (i = 0; i < 64; i++) { + port = ports[isc_random_uniform(nports)]; + isc_sockaddr_setport(&localaddr, port); + + LOCK(&qid->lock); + bucket = dns_hash(qid, dest, 0, port); + if (socket_search(qid, dest, port, bucket) != NULL) { + UNLOCK(&qid->lock); + continue; + } + UNLOCK(&qid->lock); + bindoptions = 0; + portentry = port_search(disp, port); + + if (portentry != NULL) { + bindoptions |= ISC_SOCKET_REUSEADDRESS; + } + result = open_socket(sockmgr, &localaddr, bindoptions, &sock, + NULL, false); + if (result == ISC_R_SUCCESS) { + if (portentry == NULL) { + portentry = new_portentry(disp, port); + if (portentry == NULL) { + result = ISC_R_NOMEMORY; + break; + } + } else { + isc_refcount_increment(&portentry->refs); + } + break; + } else if (result == ISC_R_NOPERM) { + char buf[ISC_SOCKADDR_FORMATSIZE]; + isc_sockaddr_format(&localaddr, buf, sizeof(buf)); + dispatch_log(disp, ISC_LOG_WARNING, + "open_socket(%s) -> %s: continuing", buf, + isc_result_totext(result)); + } else if (result != ISC_R_ADDRINUSE) { + break; + } + } + + if (result == ISC_R_SUCCESS) { + dispsock->socket = sock; + dispsock->host = *dest; + dispsock->bucket = bucket; + LOCK(&qid->lock); + dispsock->portentry = portentry; + ISC_LIST_APPEND(qid->sock_table[bucket], dispsock, blink); + UNLOCK(&qid->lock); + *dispsockp = dispsock; + *portp = port; + } else { + /* + * We could keep it in the inactive list, but since this should + * be an exceptional case and might be resource shortage, we'd + * rather destroy it. + */ + if (sock != NULL) { + isc_socket_detach(&sock); + } + destroy_dispsocket(disp, &dispsock); + } + + return (result); +} + +/*% + * Destroy a dedicated dispatch socket. + */ +static void +destroy_dispsocket(dns_dispatch_t *disp, dispsocket_t **dispsockp) { + dispsocket_t *dispsock; + dns_qid_t *qid = DNS_QID(disp); + + /* + * The dispatch must be locked. + */ + + REQUIRE(dispsockp != NULL && *dispsockp != NULL); + dispsock = *dispsockp; + *dispsockp = NULL; + REQUIRE(!ISC_LINK_LINKED(dispsock, link)); + + disp->nsockets--; + dispsock->magic = 0; + if (dispsock->portentry != NULL) { + /* socket_search() tests and dereferences portentry. */ + LOCK(&qid->lock); + deref_portentry(disp, &dispsock->portentry); + UNLOCK(&qid->lock); + } + if (dispsock->socket != NULL) { + isc_socket_detach(&dispsock->socket); + } + if (ISC_LINK_LINKED(dispsock, blink)) { + LOCK(&qid->lock); + ISC_LIST_UNLINK(qid->sock_table[dispsock->bucket], dispsock, + blink); + UNLOCK(&qid->lock); + } + if (dispsock->task != NULL) { + isc_task_detach(&dispsock->task); + } + isc_mem_put(disp->mgr->mctx, dispsock, sizeof(*dispsock)); +} + +/*% + * Deactivate a dedicated dispatch socket. Move it to the inactive list for + * future reuse unless the total number of sockets are exceeding the maximum. + */ +static void +deactivate_dispsocket(dns_dispatch_t *disp, dispsocket_t *dispsock) { + isc_result_t result; + dns_qid_t *qid = DNS_QID(disp); + + /* + * The dispatch must be locked. + */ + ISC_LIST_UNLINK(disp->activesockets, dispsock, link); + if (dispsock->resp != NULL) { + INSIST(dispsock->resp->dispsocket == dispsock); + dispsock->resp->dispsocket = NULL; + } + + INSIST(dispsock->portentry != NULL); + /* socket_search() tests and dereferences portentry. */ + LOCK(&qid->lock); + deref_portentry(disp, &dispsock->portentry); + UNLOCK(&qid->lock); + + if (disp->nsockets > DNS_DISPATCH_POOLSOCKS) { + destroy_dispsocket(disp, &dispsock); + } else { + result = isc_socket_close(dispsock->socket); + + LOCK(&qid->lock); + ISC_LIST_UNLINK(qid->sock_table[dispsock->bucket], dispsock, + blink); + UNLOCK(&qid->lock); + + if (result == ISC_R_SUCCESS) { + ISC_LIST_APPEND(disp->inactivesockets, dispsock, link); + } else { + /* + * If the underlying system does not allow this + * optimization, destroy this temporary structure (and + * create a new one for a new transaction). + */ + INSIST(result == ISC_R_NOTIMPLEMENTED); + destroy_dispsocket(disp, &dispsock); + } + } +} + +/* + * Find an entry for query ID 'id', socket address 'dest', and port number + * 'port'. + * Return NULL if no such entry exists. + */ +static dns_dispentry_t * +entry_search(dns_qid_t *qid, const isc_sockaddr_t *dest, dns_messageid_t id, + in_port_t port, unsigned int bucket) { + dns_dispentry_t *res; + + REQUIRE(VALID_QID(qid)); + REQUIRE(bucket < qid->qid_nbuckets); + + res = ISC_LIST_HEAD(qid->qid_table[bucket]); + + while (res != NULL) { + if (res->id == id && isc_sockaddr_equal(dest, &res->host) && + res->port == port) + { + return (res); + } + res = ISC_LIST_NEXT(res, link); + } + + return (NULL); +} + +static void +free_buffer(dns_dispatch_t *disp, void *buf, unsigned int len) { + unsigned int buffersize; + INSIST(buf != NULL && len != 0); + + switch (disp->socktype) { + case isc_sockettype_tcp: + INSIST(disp->tcpbuffers > 0); + disp->tcpbuffers--; + isc_mem_put(disp->mgr->mctx, buf, len); + break; + case isc_sockettype_udp: + LOCK(&disp->mgr->buffer_lock); + INSIST(disp->mgr->buffers > 0); + INSIST(len == disp->mgr->buffersize); + disp->mgr->buffers--; + buffersize = disp->mgr->buffersize; + UNLOCK(&disp->mgr->buffer_lock); + isc_mem_put(disp->mgr->mctx, buf, buffersize); + break; + default: + UNREACHABLE(); + } +} + +static void * +allocate_udp_buffer(dns_dispatch_t *disp) { + unsigned int buffersize; + + LOCK(&disp->mgr->buffer_lock); + if (disp->mgr->buffers >= disp->mgr->maxbuffers) { + UNLOCK(&disp->mgr->buffer_lock); + return (NULL); + } + buffersize = disp->mgr->buffersize; + disp->mgr->buffers++; + UNLOCK(&disp->mgr->buffer_lock); + + return (isc_mem_get(disp->mgr->mctx, buffersize)); +} + +static void +free_sevent(isc_event_t *ev) { + isc_mem_t *pool = ev->ev_destroy_arg; + isc_socketevent_t *sev = (isc_socketevent_t *)ev; + isc_mem_put(pool, sev, sizeof(*sev)); +} + +static isc_socketevent_t * +allocate_sevent(dns_dispatch_t *disp, isc_socket_t *sock, isc_eventtype_t type, + isc_taskaction_t action, const void *arg) { + isc_socketevent_t *ev; + void *deconst_arg; + + ev = isc_mem_get(disp->sepool, sizeof(*ev)); + DE_CONST(arg, deconst_arg); + ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, type, action, deconst_arg, + sock, free_sevent, disp->sepool); + ev->result = ISC_R_UNSET; + ISC_LINK_INIT(ev, ev_link); + ev->region.base = NULL; + ev->n = 0; + ev->offset = 0; + ev->attributes = 0; + + return (ev); +} + +static void +free_devent(dns_dispatch_t *disp, dns_dispatchevent_t *ev) { + if (disp->failsafe_ev == ev) { + INSIST(disp->shutdown_out == 1); + disp->shutdown_out = 0; + + return; + } + + isc_refcount_decrement(&disp->mgr->irefs); + isc_mem_put(disp->mgr->mctx, ev, sizeof(*ev)); +} + +static dns_dispatchevent_t * +allocate_devent(dns_dispatch_t *disp) { + dns_dispatchevent_t *ev; + + ev = isc_mem_get(disp->mgr->mctx, sizeof(*ev)); + isc_refcount_increment0(&disp->mgr->irefs); + ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, 0, NULL, NULL, NULL, NULL, + NULL); + + return (ev); +} + +static void +udp_exrecv(isc_task_t *task, isc_event_t *ev) { + dispsocket_t *dispsock = ev->ev_arg; + + UNUSED(task); + + REQUIRE(VALID_DISPSOCK(dispsock)); + udp_recv(ev, dispsock->disp, dispsock); +} + +static void +udp_shrecv(isc_task_t *task, isc_event_t *ev) { + dns_dispatch_t *disp = ev->ev_arg; + + UNUSED(task); + + REQUIRE(VALID_DISPATCH(disp)); + udp_recv(ev, disp, NULL); +} + +/* + * General flow: + * + * If I/O result == CANCELED or error, free the buffer. + * + * If query, free the buffer, restart. + * + * If response: + * Allocate event, fill in details. + * If cannot allocate, free buffer, restart. + * find target. If not found, free buffer, restart. + * if event queue is not empty, queue. else, send. + * restart. + */ +static void +udp_recv(isc_event_t *ev_in, dns_dispatch_t *disp, dispsocket_t *dispsock) { + isc_socketevent_t *ev = (isc_socketevent_t *)ev_in; + dns_messageid_t id; + isc_result_t dres; + isc_buffer_t source; + unsigned int flags; + dns_dispentry_t *resp = NULL; + dns_dispatchevent_t *rev; + unsigned int bucket; + bool killit; + bool queue_response; + dns_dispatchmgr_t *mgr; + dns_qid_t *qid; + isc_netaddr_t netaddr; + int match; + int result; + bool qidlocked = false; + + LOCK(&disp->lock); + + mgr = disp->mgr; + qid = mgr->qid; + + LOCK(&disp->mgr->buffer_lock); + dispatch_log(disp, LVL(90), + "got packet: requests %d, buffers %d, recvs %d", + disp->requests, disp->mgr->buffers, disp->recv_pending); + UNLOCK(&disp->mgr->buffer_lock); + + if (dispsock == NULL && ev->ev_type == ISC_SOCKEVENT_RECVDONE) { + /* + * Unless the receive event was imported from a listening + * interface, in which case the event type is + * DNS_EVENT_IMPORTRECVDONE, receive operation must be pending. + */ + INSIST(disp->recv_pending != 0); + disp->recv_pending = 0; + } + + if (dispsock != NULL && + (ev->result == ISC_R_CANCELED || dispsock->resp == NULL)) + { + /* + * dispsock->resp can be NULL if this transaction was canceled + * just after receiving a response. Since this socket is + * exclusively used and there should be at most one receive + * event the canceled event should have been no effect. So + * we can (and should) deactivate the socket right now. + */ + deactivate_dispsocket(disp, dispsock); + dispsock = NULL; + } + + if (disp->shutting_down) { + /* + * This dispatcher is shutting down. + */ + free_buffer(disp, ev->region.base, ev->region.length); + + isc_event_free(&ev_in); + ev = NULL; + + killit = destroy_disp_ok(disp); + UNLOCK(&disp->lock); + if (killit) { + isc_task_send(disp->task[0], &disp->ctlevent); + } + + return; + } + + if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) { + if (dispsock != NULL) { + resp = dispsock->resp; + id = resp->id; + if (ev->result != ISC_R_SUCCESS) { + /* + * This is most likely a network error on a + * connected socket. It makes no sense to + * check the address or parse the packet, but it + * will help to return the error to the caller. + */ + goto sendresponse; + } + } else { + free_buffer(disp, ev->region.base, ev->region.length); + + isc_event_free(&ev_in); + UNLOCK(&disp->lock); + return; + } + } else if (ev->result != ISC_R_SUCCESS) { + free_buffer(disp, ev->region.base, ev->region.length); + + if (ev->result != ISC_R_CANCELED) { + dispatch_log(disp, ISC_LOG_ERROR, + "odd socket result in udp_recv(): %s", + isc_result_totext(ev->result)); + } + + isc_event_free(&ev_in); + UNLOCK(&disp->lock); + return; + } + + /* + * If this is from a blackholed address, drop it. + */ + isc_netaddr_fromsockaddr(&netaddr, &ev->address); + if (disp->mgr->blackhole != NULL && + dns_acl_match(&netaddr, NULL, disp->mgr->blackhole, NULL, &match, + NULL) == ISC_R_SUCCESS && + match > 0) + { + if (isc_log_wouldlog(dns_lctx, LVL(10))) { + char netaddrstr[ISC_NETADDR_FORMATSIZE]; + isc_netaddr_format(&netaddr, netaddrstr, + sizeof(netaddrstr)); + dispatch_log(disp, LVL(10), "blackholed packet from %s", + netaddrstr); + } + free_buffer(disp, ev->region.base, ev->region.length); + goto restart; + } + + /* + * Peek into the buffer to see what we can see. + */ + isc_buffer_init(&source, ev->region.base, ev->region.length); + isc_buffer_add(&source, ev->n); + dres = dns_message_peekheader(&source, &id, &flags); + if (dres != ISC_R_SUCCESS) { + free_buffer(disp, ev->region.base, ev->region.length); + dispatch_log(disp, LVL(10), "got garbage packet"); + goto restart; + } + + dispatch_log(disp, LVL(92), + "got valid DNS message header, /QR %c, id %u", + (((flags & DNS_MESSAGEFLAG_QR) != 0) ? '1' : '0'), id); + + /* + * Look at flags. If query, drop it. If response, + * look to see where it goes. + */ + if ((flags & DNS_MESSAGEFLAG_QR) == 0) { + /* query */ + free_buffer(disp, ev->region.base, ev->region.length); + goto restart; + } + + /* + * Search for the corresponding response. If we are using an exclusive + * socket, we've already identified it and we can skip the search; but + * the ID and the address must match the expected ones. + */ + if (resp == NULL) { + bucket = dns_hash(qid, &ev->address, id, disp->localport); + LOCK(&qid->lock); + qidlocked = true; + resp = entry_search(qid, &ev->address, id, disp->localport, + bucket); + dispatch_log(disp, LVL(90), + "search for response in bucket %d: %s", bucket, + (resp == NULL ? "not found" : "found")); + + } else if (resp->id != id || + !isc_sockaddr_equal(&ev->address, &resp->host)) + { + dispatch_log(disp, LVL(90), + "response to an exclusive socket doesn't match"); + inc_stats(mgr, dns_resstatscounter_mismatch); + free_buffer(disp, ev->region.base, ev->region.length); + goto unlock; + } + + if (resp == NULL) { + inc_stats(mgr, dns_resstatscounter_mismatch); + free_buffer(disp, ev->region.base, ev->region.length); + goto unlock; + } + + /* + * Now that we have the original dispatch the query was sent + * from check that the address and port the response was + * sent to make sense. + */ + if (disp != resp->disp) { + isc_sockaddr_t a1; + isc_sockaddr_t a2; + + /* + * Check that the socket types and ports match. + */ + if (disp->socktype != resp->disp->socktype || + isc_sockaddr_getport(&disp->local) != + isc_sockaddr_getport(&resp->disp->local)) + { + free_buffer(disp, ev->region.base, ev->region.length); + goto unlock; + } + + /* + * If each dispatch is bound to a different address + * then fail. + * + * Note under Linux a packet can be sent out via IPv4 socket + * and the response be received via a IPv6 socket. + * + * Requests sent out via IPv6 should always come back in + * via IPv6. + */ + if (isc_sockaddr_pf(&resp->disp->local) == PF_INET6 && + isc_sockaddr_pf(&disp->local) != PF_INET6) + { + free_buffer(disp, ev->region.base, ev->region.length); + goto unlock; + } + isc_sockaddr_anyofpf(&a1, isc_sockaddr_pf(&resp->disp->local)); + isc_sockaddr_anyofpf(&a2, isc_sockaddr_pf(&disp->local)); + if (!isc_sockaddr_eqaddr(&disp->local, &resp->disp->local) && + !isc_sockaddr_eqaddr(&a1, &resp->disp->local) && + !isc_sockaddr_eqaddr(&a2, &disp->local)) + { + free_buffer(disp, ev->region.base, ev->region.length); + goto unlock; + } + } + +sendresponse: + queue_response = resp->item_out; + rev = allocate_devent(resp->disp); + if (rev == NULL) { + free_buffer(disp, ev->region.base, ev->region.length); + goto unlock; + } + + /* + * At this point, rev contains the event we want to fill in, and + * resp contains the information on the place to send it to. + * Send the event off. + */ + isc_buffer_init(&rev->buffer, ev->region.base, ev->region.length); + isc_buffer_add(&rev->buffer, ev->n); + rev->result = ev->result; + rev->id = id; + rev->addr = ev->address; + rev->pktinfo = ev->pktinfo; + rev->attributes = ev->attributes; + if (queue_response) { + ISC_LIST_APPEND(resp->items, rev, ev_link); + } else { + ISC_EVENT_INIT(rev, sizeof(*rev), 0, NULL, DNS_EVENT_DISPATCH, + resp->action, resp->arg, resp, NULL, NULL); + request_log(disp, resp, LVL(90), + "[a] Sent event %p buffer %p len %d to task %p", + rev, rev->buffer.base, rev->buffer.length, + resp->task); + resp->item_out = true; + isc_task_send(resp->task, ISC_EVENT_PTR(&rev)); + } +unlock: + if (qidlocked) { + UNLOCK(&qid->lock); + } + + /* + * Restart recv() to get the next packet. + */ +restart: + result = startrecv(disp, dispsock); + if (result != ISC_R_SUCCESS && dispsock != NULL) { + /* + * XXX: wired. There seems to be no recovery process other than + * deactivate this socket anyway (since we cannot start + * receiving, we won't be able to receive a cancel event + * from the user). + */ + deactivate_dispsocket(disp, dispsock); + } + isc_event_free(&ev_in); + UNLOCK(&disp->lock); +} + +/* + * General flow: + * + * If I/O result == CANCELED, EOF, or error, notify everyone as the + * various queues drain. + * + * If query, restart. + * + * If response: + * Allocate event, fill in details. + * If cannot allocate, restart. + * find target. If not found, restart. + * if event queue is not empty, queue. else, send. + * restart. + */ +static void +tcp_recv(isc_task_t *task, isc_event_t *ev_in) { + dns_dispatch_t *disp = ev_in->ev_arg; + dns_tcpmsg_t *tcpmsg = &disp->tcpmsg; + dns_messageid_t id; + isc_result_t dres; + unsigned int flags; + dns_dispentry_t *resp; + dns_dispatchevent_t *rev; + unsigned int bucket; + bool killit; + bool queue_response; + dns_qid_t *qid; + int level; + char buf[ISC_SOCKADDR_FORMATSIZE]; + + UNUSED(task); + + REQUIRE(VALID_DISPATCH(disp)); + + qid = disp->qid; + + LOCK(&disp->lock); + + dispatch_log(disp, LVL(90), + "got TCP packet: requests %d, buffers %d, recvs %d", + disp->requests, disp->tcpbuffers, disp->recv_pending); + + INSIST(disp->recv_pending != 0); + disp->recv_pending = 0; + + if (disp->refcount == 0) { + /* + * This dispatcher is shutting down. Force cancellation. + */ + tcpmsg->result = ISC_R_CANCELED; + } + + if (tcpmsg->result != ISC_R_SUCCESS) { + switch (tcpmsg->result) { + case ISC_R_CANCELED: + break; + + case ISC_R_EOF: + dispatch_log(disp, LVL(90), "shutting down on EOF"); + do_cancel(disp); + break; + + case ISC_R_CONNECTIONRESET: + level = ISC_LOG_INFO; + goto logit; + + default: + level = ISC_LOG_ERROR; + logit: + isc_sockaddr_format(&tcpmsg->address, buf, sizeof(buf)); + dispatch_log(disp, level, + "shutting down due to TCP " + "receive error: %s: %s", + buf, isc_result_totext(tcpmsg->result)); + do_cancel(disp); + break; + } + + /* + * The event is statically allocated in the tcpmsg + * structure, and destroy_disp() frees the tcpmsg, so we must + * free the event *before* calling destroy_disp(). + */ + isc_event_free(&ev_in); + + disp->shutting_down = 1; + disp->shutdown_why = tcpmsg->result; + + /* + * If the recv() was canceled pass the word on. + */ + killit = destroy_disp_ok(disp); + UNLOCK(&disp->lock); + if (killit) { + isc_task_send(disp->task[0], &disp->ctlevent); + } + return; + } + + dispatch_log(disp, LVL(90), "result %d, length == %d, addr = %p", + tcpmsg->result, tcpmsg->buffer.length, + tcpmsg->buffer.base); + + /* + * Peek into the buffer to see what we can see. + */ + dres = dns_message_peekheader(&tcpmsg->buffer, &id, &flags); + if (dres != ISC_R_SUCCESS) { + dispatch_log(disp, LVL(10), "got garbage packet"); + goto restart; + } + + dispatch_log(disp, LVL(92), + "got valid DNS message header, /QR %c, id %u", + (((flags & DNS_MESSAGEFLAG_QR) != 0) ? '1' : '0'), id); + + /* + * Allocate an event to send to the query or response client, and + * allocate a new buffer for our use. + */ + + /* + * Look at flags. If query, drop it. If response, + * look to see where it goes. + */ + if ((flags & DNS_MESSAGEFLAG_QR) == 0) { + /* + * Query. + */ + goto restart; + } + + /* + * Response. + */ + bucket = dns_hash(qid, &tcpmsg->address, id, disp->localport); + LOCK(&qid->lock); + resp = entry_search(qid, &tcpmsg->address, id, disp->localport, bucket); + dispatch_log(disp, LVL(90), "search for response in bucket %d: %s", + bucket, (resp == NULL ? "not found" : "found")); + + if (resp == NULL) { + goto unlock; + } + queue_response = resp->item_out; + rev = allocate_devent(disp); + if (rev == NULL) { + goto unlock; + } + + /* + * At this point, rev contains the event we want to fill in, and + * resp contains the information on the place to send it to. + * Send the event off. + */ + dns_tcpmsg_keepbuffer(tcpmsg, &rev->buffer); + disp->tcpbuffers++; + rev->result = ISC_R_SUCCESS; + rev->id = id; + rev->addr = tcpmsg->address; + if (queue_response) { + ISC_LIST_APPEND(resp->items, rev, ev_link); + } else { + ISC_EVENT_INIT(rev, sizeof(*rev), 0, NULL, DNS_EVENT_DISPATCH, + resp->action, resp->arg, resp, NULL, NULL); + request_log(disp, resp, LVL(90), + "[b] Sent event %p buffer %p len %d to task %p", + rev, rev->buffer.base, rev->buffer.length, + resp->task); + resp->item_out = true; + isc_task_send(resp->task, ISC_EVENT_PTR(&rev)); + } +unlock: + UNLOCK(&qid->lock); + + /* + * Restart recv() to get the next packet. + */ +restart: + (void)startrecv(disp, NULL); + + isc_event_free(&ev_in); + UNLOCK(&disp->lock); +} + +/* + * disp must be locked. + */ +static isc_result_t +startrecv(dns_dispatch_t *disp, dispsocket_t *dispsock) { + isc_result_t res; + isc_region_t region; + isc_socket_t *sock; + + if (disp->shutting_down == 1) { + return (ISC_R_SUCCESS); + } + + if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0) { + return (ISC_R_SUCCESS); + } + + if (disp->recv_pending != 0 && dispsock == NULL) { + return (ISC_R_SUCCESS); + } + + if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0 && + dispsock == NULL) + { + return (ISC_R_SUCCESS); + } + + if (dispsock != NULL) { + sock = dispsock->socket; + } else { + sock = disp->socket; + } + INSIST(sock != NULL); + + switch (disp->socktype) { + /* + * UDP reads are always maximal. + */ + case isc_sockettype_udp: + region.length = disp->mgr->buffersize; + region.base = allocate_udp_buffer(disp); + if (region.base == NULL) { + return (ISC_R_NOMEMORY); + } + if (dispsock != NULL) { + isc_task_t *dt = dispsock->task; + isc_socketevent_t *sev = allocate_sevent( + disp, sock, ISC_SOCKEVENT_RECVDONE, udp_exrecv, + dispsock); + if (sev == NULL) { + free_buffer(disp, region.base, region.length); + return (ISC_R_NOMEMORY); + } + + res = isc_socket_recv2(sock, ®ion, 1, dt, sev, 0); + if (res != ISC_R_SUCCESS) { + free_buffer(disp, region.base, region.length); + return (res); + } + } else { + isc_task_t *dt = disp->task[0]; + isc_socketevent_t *sev = allocate_sevent( + disp, sock, ISC_SOCKEVENT_RECVDONE, udp_shrecv, + disp); + if (sev == NULL) { + free_buffer(disp, region.base, region.length); + return (ISC_R_NOMEMORY); + } + + res = isc_socket_recv2(sock, ®ion, 1, dt, sev, 0); + if (res != ISC_R_SUCCESS) { + free_buffer(disp, region.base, region.length); + disp->shutdown_why = res; + disp->shutting_down = 1; + do_cancel(disp); + return (ISC_R_SUCCESS); /* recover by cancel */ + } + INSIST(disp->recv_pending == 0); + disp->recv_pending = 1; + } + break; + + case isc_sockettype_tcp: + res = dns_tcpmsg_readmessage(&disp->tcpmsg, disp->task[0], + tcp_recv, disp); + if (res != ISC_R_SUCCESS) { + disp->shutdown_why = res; + disp->shutting_down = 1; + do_cancel(disp); + return (ISC_R_SUCCESS); /* recover by cancel */ + } + INSIST(disp->recv_pending == 0); + disp->recv_pending = 1; + break; + default: + UNREACHABLE(); + } + + return (ISC_R_SUCCESS); +} + +/* + * Mgr must be locked when calling this function. + */ +static bool +destroy_mgr_ok(dns_dispatchmgr_t *mgr) { + mgr_log(mgr, LVL(90), + "destroy_mgr_ok: shuttingdown=%d, listnonempty=%d, ", + MGR_IS_SHUTTINGDOWN(mgr), !ISC_LIST_EMPTY(mgr->list)); + if (!MGR_IS_SHUTTINGDOWN(mgr)) { + return (false); + } + if (!ISC_LIST_EMPTY(mgr->list)) { + return (false); + } + if (isc_refcount_current(&mgr->irefs) != 0) { + return (false); + } + + return (true); +} + +/* + * Mgr must be unlocked when calling this function. + */ +static void +destroy_mgr(dns_dispatchmgr_t **mgrp) { + dns_dispatchmgr_t *mgr; + + mgr = *mgrp; + *mgrp = NULL; + + mgr->magic = 0; + isc_mutex_destroy(&mgr->lock); + mgr->state = 0; + + if (mgr->qid != NULL) { + qid_destroy(mgr->mctx, &mgr->qid); + } + + isc_mutex_destroy(&mgr->buffer_lock); + + if (mgr->blackhole != NULL) { + dns_acl_detach(&mgr->blackhole); + } + + if (mgr->stats != NULL) { + isc_stats_detach(&mgr->stats); + } + + if (mgr->v4ports != NULL) { + isc_mem_put(mgr->mctx, mgr->v4ports, + mgr->nv4ports * sizeof(in_port_t)); + } + if (mgr->v6ports != NULL) { + isc_mem_put(mgr->mctx, mgr->v6ports, + mgr->nv6ports * sizeof(in_port_t)); + } + isc_mem_putanddetach(&mgr->mctx, mgr, sizeof(dns_dispatchmgr_t)); +} + +static isc_result_t +open_socket(isc_socketmgr_t *mgr, const isc_sockaddr_t *local, + unsigned int options, isc_socket_t **sockp, + isc_socket_t *dup_socket, bool duponly) { + isc_socket_t *sock; + isc_result_t result; + + sock = *sockp; + if (sock != NULL) { + result = isc_socket_open(sock); + if (result != ISC_R_SUCCESS) { + return (result); + } + } else if (dup_socket != NULL && + (!isc_socket_hasreuseport() || duponly)) + { + result = isc_socket_dup(dup_socket, &sock); + if (result != ISC_R_SUCCESS) { + return (result); + } + + isc_socket_setname(sock, "dispatcher", NULL); + *sockp = sock; + return (ISC_R_SUCCESS); + } else { + result = isc_socket_create(mgr, isc_sockaddr_pf(local), + isc_sockettype_udp, &sock); + if (result != ISC_R_SUCCESS) { + return (result); + } + } + + isc_socket_setname(sock, "dispatcher", NULL); + +#ifndef ISC_ALLOW_MAPPED + isc_socket_ipv6only(sock, true); +#endif /* ifndef ISC_ALLOW_MAPPED */ + result = isc_socket_bind(sock, local, options); + if (result != ISC_R_SUCCESS) { + if (*sockp == NULL) { + isc_socket_detach(&sock); + } else { + isc_socket_close(sock); + } + return (result); + } + + *sockp = sock; + return (ISC_R_SUCCESS); +} + +/*% + * Create a temporary port list to set the initial default set of dispatch + * ports: [1024, 65535]. This is almost meaningless as the application will + * normally set the ports explicitly, but is provided to fill some minor corner + * cases. + */ +static isc_result_t +create_default_portset(isc_mem_t *mctx, isc_portset_t **portsetp) { + isc_result_t result; + + result = isc_portset_create(mctx, portsetp); + if (result != ISC_R_SUCCESS) { + return (result); + } + isc_portset_addrange(*portsetp, 1024, 65535); + + return (ISC_R_SUCCESS); +} + +/* + * Publics. + */ + +isc_result_t +dns_dispatchmgr_create(isc_mem_t *mctx, dns_dispatchmgr_t **mgrp) { + dns_dispatchmgr_t *mgr; + isc_result_t result; + isc_portset_t *v4portset = NULL; + isc_portset_t *v6portset = NULL; + + REQUIRE(mctx != NULL); + REQUIRE(mgrp != NULL && *mgrp == NULL); + + mgr = isc_mem_get(mctx, sizeof(dns_dispatchmgr_t)); + *mgr = (dns_dispatchmgr_t){ 0 }; + + isc_mem_attach(mctx, &mgr->mctx); + + isc_mutex_init(&mgr->lock); + isc_mutex_init(&mgr->buffer_lock); + + isc_refcount_init(&mgr->irefs, 0); + + ISC_LIST_INIT(mgr->list); + + mgr->magic = DNS_DISPATCHMGR_MAGIC; + + result = create_default_portset(mctx, &v4portset); + if (result == ISC_R_SUCCESS) { + result = create_default_portset(mctx, &v6portset); + if (result == ISC_R_SUCCESS) { + result = dns_dispatchmgr_setavailports(mgr, v4portset, + v6portset); + } + } + if (v4portset != NULL) { + isc_portset_destroy(mctx, &v4portset); + } + if (v6portset != NULL) { + isc_portset_destroy(mctx, &v6portset); + } + if (result != ISC_R_SUCCESS) { + goto kill_dpool; + } + + *mgrp = mgr; + return (ISC_R_SUCCESS); + +kill_dpool: + isc_mutex_destroy(&mgr->buffer_lock); + isc_mutex_destroy(&mgr->lock); + isc_mem_putanddetach(&mctx, mgr, sizeof(dns_dispatchmgr_t)); + + return (result); +} + +void +dns_dispatchmgr_setblackhole(dns_dispatchmgr_t *mgr, dns_acl_t *blackhole) { + REQUIRE(VALID_DISPATCHMGR(mgr)); + if (mgr->blackhole != NULL) { + dns_acl_detach(&mgr->blackhole); + } + dns_acl_attach(blackhole, &mgr->blackhole); +} + +dns_acl_t * +dns_dispatchmgr_getblackhole(dns_dispatchmgr_t *mgr) { + REQUIRE(VALID_DISPATCHMGR(mgr)); + return (mgr->blackhole); +} + +void +dns_dispatchmgr_setblackportlist(dns_dispatchmgr_t *mgr, + dns_portlist_t *portlist) { + REQUIRE(VALID_DISPATCHMGR(mgr)); + UNUSED(portlist); + + /* This function is deprecated: use dns_dispatchmgr_setavailports(). */ + return; +} + +dns_portlist_t * +dns_dispatchmgr_getblackportlist(dns_dispatchmgr_t *mgr) { + REQUIRE(VALID_DISPATCHMGR(mgr)); + return (NULL); /* this function is deprecated */ +} + +isc_result_t +dns_dispatchmgr_setavailports(dns_dispatchmgr_t *mgr, isc_portset_t *v4portset, + isc_portset_t *v6portset) { + in_port_t *v4ports, *v6ports, p; + unsigned int nv4ports, nv6ports, i4, i6; + + REQUIRE(VALID_DISPATCHMGR(mgr)); + + nv4ports = isc_portset_nports(v4portset); + nv6ports = isc_portset_nports(v6portset); + + v4ports = NULL; + if (nv4ports != 0) { + v4ports = isc_mem_get(mgr->mctx, sizeof(in_port_t) * nv4ports); + } + v6ports = NULL; + if (nv6ports != 0) { + v6ports = isc_mem_get(mgr->mctx, sizeof(in_port_t) * nv6ports); + } + + p = 0; + i4 = 0; + i6 = 0; + do { + if (isc_portset_isset(v4portset, p)) { + INSIST(i4 < nv4ports); + v4ports[i4++] = p; + } + if (isc_portset_isset(v6portset, p)) { + INSIST(i6 < nv6ports); + v6ports[i6++] = p; + } + } while (p++ < 65535); + INSIST(i4 == nv4ports && i6 == nv6ports); + + PORTBUFLOCK(mgr); + if (mgr->v4ports != NULL) { + isc_mem_put(mgr->mctx, mgr->v4ports, + mgr->nv4ports * sizeof(in_port_t)); + } + mgr->v4ports = v4ports; + mgr->nv4ports = nv4ports; + + if (mgr->v6ports != NULL) { + isc_mem_put(mgr->mctx, mgr->v6ports, + mgr->nv6ports * sizeof(in_port_t)); + } + mgr->v6ports = v6ports; + mgr->nv6ports = nv6ports; + PORTBUFUNLOCK(mgr); + + return (ISC_R_SUCCESS); +} + +static isc_result_t +dns_dispatchmgr_setudp(dns_dispatchmgr_t *mgr, unsigned int buffersize, + unsigned int maxbuffers, unsigned int maxrequests, + unsigned int buckets, unsigned int increment) { + isc_result_t result; + + REQUIRE(VALID_DISPATCHMGR(mgr)); + REQUIRE(buffersize >= 512 && buffersize < (64 * 1024)); + REQUIRE(maxbuffers > 0); + REQUIRE(buckets < 2097169); /* next prime > 65536 * 32 */ + REQUIRE(increment > buckets); + UNUSED(maxrequests); + + /* + * Keep some number of items around. This should be a config + * option. For now, keep 8, but later keep at least two even + * if the caller wants less. This allows us to ensure certain + * things, like an event can be "freed" and the next allocation + * will always succeed. + * + * Note that if limits are placed on anything here, we use one + * event internally, so the actual limit should be "wanted + 1." + * + * XXXMLG + */ + + if (maxbuffers < 8) { + maxbuffers = 8; + } + + LOCK(&mgr->buffer_lock); + + if (maxbuffers > mgr->maxbuffers) { + mgr->maxbuffers = maxbuffers; + } + + /* Create or adjust socket pool */ + if (mgr->qid != NULL) { + UNLOCK(&mgr->buffer_lock); + return (ISC_R_SUCCESS); + } + + result = qid_allocate(mgr, buckets, increment, &mgr->qid, true); + if (result != ISC_R_SUCCESS) { + goto cleanup; + } + + mgr->buffersize = buffersize; + mgr->maxbuffers = maxbuffers; + UNLOCK(&mgr->buffer_lock); + return (ISC_R_SUCCESS); + +cleanup: + UNLOCK(&mgr->buffer_lock); + return (result); +} + +void +dns_dispatchmgr_destroy(dns_dispatchmgr_t **mgrp) { + dns_dispatchmgr_t *mgr; + bool killit; + + REQUIRE(mgrp != NULL); + REQUIRE(VALID_DISPATCHMGR(*mgrp)); + + mgr = *mgrp; + *mgrp = NULL; + + LOCK(&mgr->lock); + mgr->state |= MGR_SHUTTINGDOWN; + killit = destroy_mgr_ok(mgr); + UNLOCK(&mgr->lock); + + mgr_log(mgr, LVL(90), "destroy: killit=%d", killit); + + if (killit) { + destroy_mgr(&mgr); + } +} + +void +dns_dispatchmgr_setstats(dns_dispatchmgr_t *mgr, isc_stats_t *stats) { + REQUIRE(VALID_DISPATCHMGR(mgr)); + REQUIRE(ISC_LIST_EMPTY(mgr->list)); + REQUIRE(mgr->stats == NULL); + + isc_stats_attach(stats, &mgr->stats); +} + +static int +port_cmp(const void *key, const void *ent) { + in_port_t p1 = *(const in_port_t *)key; + in_port_t p2 = *(const in_port_t *)ent; + + if (p1 < p2) { + return (-1); + } else if (p1 == p2) { + return (0); + } else { + return (1); + } +} + +static bool +portavailable(dns_dispatchmgr_t *mgr, isc_socket_t *sock, + isc_sockaddr_t *sockaddrp) { + isc_sockaddr_t sockaddr; + isc_result_t result; + in_port_t *ports, port; + unsigned int nports; + bool available = false; + + REQUIRE(sock != NULL || sockaddrp != NULL); + + PORTBUFLOCK(mgr); + if (sock != NULL) { + sockaddrp = &sockaddr; + result = isc_socket_getsockname(sock, sockaddrp); + if (result != ISC_R_SUCCESS) { + goto unlock; + } + } + + if (isc_sockaddr_pf(sockaddrp) == AF_INET) { + ports = mgr->v4ports; + nports = mgr->nv4ports; + } else { + ports = mgr->v6ports; + nports = mgr->nv6ports; + } + if (ports == NULL) { + goto unlock; + } + + port = isc_sockaddr_getport(sockaddrp); + if (bsearch(&port, ports, nports, sizeof(in_port_t), port_cmp) != NULL) + { + available = true; + } + +unlock: + PORTBUFUNLOCK(mgr); + return (available); +} + +#define ATTRMATCH(_a1, _a2, _mask) (((_a1) & (_mask)) == ((_a2) & (_mask))) + +static bool +local_addr_match(dns_dispatch_t *disp, const isc_sockaddr_t *addr) { + isc_sockaddr_t sockaddr; + isc_result_t result; + + REQUIRE(disp->socket != NULL); + + if (addr == NULL) { + return (true); + } + + /* + * Don't match wildcard ports unless the port is available in the + * current configuration. + */ + if (isc_sockaddr_getport(addr) == 0 && + isc_sockaddr_getport(&disp->local) == 0 && + !portavailable(disp->mgr, disp->socket, NULL)) + { + return (false); + } + + /* + * Check if we match the binding <address,port>. + * Wildcard ports match/fail here. + */ + if (isc_sockaddr_equal(&disp->local, addr)) { + return (true); + } + if (isc_sockaddr_getport(addr) == 0) { + return (false); + } + + /* + * Check if we match a bound wildcard port <address,port>. + */ + if (!isc_sockaddr_eqaddr(&disp->local, addr)) { + return (false); + } + result = isc_socket_getsockname(disp->socket, &sockaddr); + if (result != ISC_R_SUCCESS) { + return (false); + } + + return (isc_sockaddr_equal(&sockaddr, addr)); +} + +/* + * Requires mgr be locked. + * + * No dispatcher can be locked by this thread when calling this function. + * + * + * NOTE: + * If a matching dispatcher is found, it is locked after this function + * returns, and must be unlocked by the caller. + */ +static isc_result_t +dispatch_find(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *local, + unsigned int attributes, unsigned int mask, + dns_dispatch_t **dispp) { + dns_dispatch_t *disp; + isc_result_t result; + + /* + * Make certain that we will not match a private or exclusive dispatch. + */ + attributes &= ~(DNS_DISPATCHATTR_PRIVATE | DNS_DISPATCHATTR_EXCLUSIVE); + mask |= (DNS_DISPATCHATTR_PRIVATE | DNS_DISPATCHATTR_EXCLUSIVE); + + disp = ISC_LIST_HEAD(mgr->list); + while (disp != NULL) { + LOCK(&disp->lock); + if ((disp->shutting_down == 0) && + ATTRMATCH(disp->attributes, attributes, mask) && + local_addr_match(disp, local)) + { + break; + } + UNLOCK(&disp->lock); + disp = ISC_LIST_NEXT(disp, link); + } + + if (disp == NULL) { + result = ISC_R_NOTFOUND; + goto out; + } + + *dispp = disp; + result = ISC_R_SUCCESS; +out: + + return (result); +} + +static isc_result_t +qid_allocate(dns_dispatchmgr_t *mgr, unsigned int buckets, + unsigned int increment, dns_qid_t **qidp, bool needsocktable) { + dns_qid_t *qid; + unsigned int i; + + REQUIRE(VALID_DISPATCHMGR(mgr)); + REQUIRE(buckets < 2097169); /* next prime > 65536 * 32 */ + REQUIRE(increment > buckets); + REQUIRE(qidp != NULL && *qidp == NULL); + + qid = isc_mem_get(mgr->mctx, sizeof(*qid)); + + qid->qid_table = isc_mem_get(mgr->mctx, + buckets * sizeof(dns_displist_t)); + + qid->sock_table = NULL; + if (needsocktable) { + qid->sock_table = isc_mem_get( + mgr->mctx, buckets * sizeof(dispsocketlist_t)); + } + + isc_mutex_init(&qid->lock); + + for (i = 0; i < buckets; i++) { + ISC_LIST_INIT(qid->qid_table[i]); + if (qid->sock_table != NULL) { + ISC_LIST_INIT(qid->sock_table[i]); + } + } + + qid->qid_nbuckets = buckets; + qid->qid_increment = increment; + qid->magic = QID_MAGIC; + *qidp = qid; + return (ISC_R_SUCCESS); +} + +static void +qid_destroy(isc_mem_t *mctx, dns_qid_t **qidp) { + dns_qid_t *qid; + + REQUIRE(qidp != NULL); + qid = *qidp; + *qidp = NULL; + + REQUIRE(VALID_QID(qid)); + + qid->magic = 0; + isc_mem_put(mctx, qid->qid_table, + qid->qid_nbuckets * sizeof(dns_displist_t)); + if (qid->sock_table != NULL) { + isc_mem_put(mctx, qid->sock_table, + qid->qid_nbuckets * sizeof(dispsocketlist_t)); + } + isc_mutex_destroy(&qid->lock); + isc_mem_put(mctx, qid, sizeof(*qid)); +} + +/* + * Allocate and set important limits. + */ +static isc_result_t +dispatch_allocate(dns_dispatchmgr_t *mgr, unsigned int maxrequests, + dns_dispatch_t **dispp) { + dns_dispatch_t *disp; + isc_result_t result; + + REQUIRE(VALID_DISPATCHMGR(mgr)); + REQUIRE(dispp != NULL && *dispp == NULL); + + /* + * Set up the dispatcher, mostly. Don't bother setting some of + * the options that are controlled by tcp vs. udp, etc. + */ + + disp = isc_mem_get(mgr->mctx, sizeof(*disp)); + isc_refcount_increment0(&mgr->irefs); + + disp->magic = 0; + disp->mgr = mgr; + disp->maxrequests = maxrequests; + disp->attributes = 0; + ISC_LINK_INIT(disp, link); + disp->refcount = 1; + disp->recv_pending = 0; + memset(&disp->local, 0, sizeof(disp->local)); + memset(&disp->peer, 0, sizeof(disp->peer)); + disp->localport = 0; + disp->shutting_down = 0; + disp->shutdown_out = 0; + disp->connected = 0; + disp->tcpmsg_valid = 0; + disp->shutdown_why = ISC_R_UNEXPECTED; + disp->requests = 0; + disp->tcpbuffers = 0; + disp->qid = NULL; + ISC_LIST_INIT(disp->activesockets); + ISC_LIST_INIT(disp->inactivesockets); + disp->nsockets = 0; + disp->port_table = NULL; + disp->dscp = -1; + + isc_mutex_init(&disp->lock); + + disp->failsafe_ev = allocate_devent(disp); + if (disp->failsafe_ev == NULL) { + result = ISC_R_NOMEMORY; + goto kill_lock; + } + + disp->magic = DISPATCH_MAGIC; + + *dispp = disp; + return (ISC_R_SUCCESS); + + /* + * error returns + */ +kill_lock: + isc_mutex_destroy(&disp->lock); + isc_refcount_decrement(&mgr->irefs); + isc_mem_put(mgr->mctx, disp, sizeof(*disp)); + + return (result); +} + +/* + * MUST be unlocked, and not used by anything. + */ +static void +dispatch_free(dns_dispatch_t **dispp) { + dns_dispatch_t *disp; + dns_dispatchmgr_t *mgr; + + REQUIRE(VALID_DISPATCH(*dispp)); + disp = *dispp; + *dispp = NULL; + + mgr = disp->mgr; + REQUIRE(VALID_DISPATCHMGR(mgr)); + + if (disp->tcpmsg_valid) { + dns_tcpmsg_invalidate(&disp->tcpmsg); + disp->tcpmsg_valid = 0; + } + + INSIST(disp->tcpbuffers == 0); + INSIST(disp->requests == 0); + INSIST(disp->recv_pending == 0); + INSIST(ISC_LIST_EMPTY(disp->activesockets)); + INSIST(ISC_LIST_EMPTY(disp->inactivesockets)); + + isc_refcount_decrement(&mgr->irefs); + isc_mem_put(mgr->mctx, disp->failsafe_ev, sizeof(*disp->failsafe_ev)); + disp->failsafe_ev = NULL; + + if (disp->qid != NULL) { + qid_destroy(mgr->mctx, &disp->qid); + } + + if (disp->port_table != NULL) { + for (int i = 0; i < DNS_DISPATCH_PORTTABLESIZE; i++) { + INSIST(ISC_LIST_EMPTY(disp->port_table[i])); + } + isc_mem_put(mgr->mctx, disp->port_table, + sizeof(disp->port_table[0]) * + DNS_DISPATCH_PORTTABLESIZE); + } + + disp->mgr = NULL; + isc_mutex_destroy(&disp->lock); + disp->magic = 0; + isc_refcount_decrement(&mgr->irefs); + isc_mem_put(mgr->mctx, disp, sizeof(*disp)); +} + +isc_result_t +dns_dispatch_createtcp(dns_dispatchmgr_t *mgr, isc_socket_t *sock, + isc_taskmgr_t *taskmgr, const isc_sockaddr_t *localaddr, + const isc_sockaddr_t *destaddr, unsigned int buffersize, + unsigned int maxbuffers, unsigned int maxrequests, + unsigned int buckets, unsigned int increment, + unsigned int attributes, dns_dispatch_t **dispp) { + isc_result_t result; + dns_dispatch_t *disp; + + UNUSED(maxbuffers); + UNUSED(buffersize); + + REQUIRE(VALID_DISPATCHMGR(mgr)); + REQUIRE(isc_socket_gettype(sock) == isc_sockettype_tcp); + REQUIRE((attributes & DNS_DISPATCHATTR_TCP) != 0); + REQUIRE((attributes & DNS_DISPATCHATTR_UDP) == 0); + + if (destaddr == NULL) { + attributes |= DNS_DISPATCHATTR_PRIVATE; /* XXXMLG */ + } + + LOCK(&mgr->lock); + + /* + * dispatch_allocate() checks mgr for us. + * qid_allocate() checks buckets and increment for us. + */ + disp = NULL; + result = dispatch_allocate(mgr, maxrequests, &disp); + if (result != ISC_R_SUCCESS) { + UNLOCK(&mgr->lock); + return (result); + } + + result = qid_allocate(mgr, buckets, increment, &disp->qid, false); + if (result != ISC_R_SUCCESS) { + goto deallocate_dispatch; + } + + disp->socktype = isc_sockettype_tcp; + disp->socket = NULL; + isc_socket_attach(sock, &disp->socket); + + disp->sepool = NULL; + + disp->ntasks = 1; + disp->task[0] = NULL; + result = isc_task_create(taskmgr, 50, &disp->task[0]); + if (result != ISC_R_SUCCESS) { + goto kill_socket; + } + + disp->ctlevent = + isc_event_allocate(mgr->mctx, disp, DNS_EVENT_DISPATCHCONTROL, + destroy_disp, disp, sizeof(isc_event_t)); + + isc_task_setname(disp->task[0], "tcpdispatch", disp); + + dns_tcpmsg_init(mgr->mctx, disp->socket, &disp->tcpmsg); + disp->tcpmsg_valid = 1; + + disp->attributes = attributes; + + if (localaddr == NULL) { + if (destaddr != NULL) { + switch (isc_sockaddr_pf(destaddr)) { + case AF_INET: + isc_sockaddr_any(&disp->local); + break; + case AF_INET6: + isc_sockaddr_any6(&disp->local); + break; + } + } + } else { + disp->local = *localaddr; + } + + if (destaddr != NULL) { + disp->peer = *destaddr; + } + + /* + * Append it to the dispatcher list. + */ + ISC_LIST_APPEND(mgr->list, disp, link); + UNLOCK(&mgr->lock); + + mgr_log(mgr, LVL(90), "created TCP dispatcher %p", disp); + dispatch_log(disp, LVL(90), "created task %p", disp->task[0]); + *dispp = disp; + + return (ISC_R_SUCCESS); + +kill_socket: + isc_socket_detach(&disp->socket); +deallocate_dispatch: + dispatch_free(&disp); + + UNLOCK(&mgr->lock); + + return (result); +} + +isc_result_t +dns_dispatch_gettcp(dns_dispatchmgr_t *mgr, const isc_sockaddr_t *destaddr, + const isc_sockaddr_t *localaddr, bool *connected, + dns_dispatch_t **dispp) { + dns_dispatch_t *disp; + isc_result_t result; + isc_sockaddr_t peeraddr; + isc_sockaddr_t sockname; + unsigned int attributes, mask; + bool match = false; + + REQUIRE(VALID_DISPATCHMGR(mgr)); + REQUIRE(destaddr != NULL); + REQUIRE(dispp != NULL && *dispp == NULL); + + /* First pass */ + attributes = DNS_DISPATCHATTR_TCP | DNS_DISPATCHATTR_CONNECTED; + mask = DNS_DISPATCHATTR_TCP | DNS_DISPATCHATTR_PRIVATE | + DNS_DISPATCHATTR_EXCLUSIVE | DNS_DISPATCHATTR_CONNECTED; + + LOCK(&mgr->lock); + disp = ISC_LIST_HEAD(mgr->list); + while (disp != NULL && !match) { + LOCK(&disp->lock); + if ((disp->shutting_down == 0) && + ATTRMATCH(disp->attributes, attributes, mask) && + (localaddr == NULL || + isc_sockaddr_eqaddr(localaddr, &disp->local))) + { + result = isc_socket_getsockname(disp->socket, + &sockname); + if (result == ISC_R_SUCCESS) { + result = isc_socket_getpeername(disp->socket, + &peeraddr); + } + if (result == ISC_R_SUCCESS && + isc_sockaddr_equal(destaddr, &peeraddr) && + (localaddr == NULL || + isc_sockaddr_eqaddr(localaddr, &sockname))) + { + /* attach */ + disp->refcount++; + *dispp = disp; + match = true; + if (connected != NULL) { + *connected = true; + } + } + } + UNLOCK(&disp->lock); + disp = ISC_LIST_NEXT(disp, link); + } + if (match || connected == NULL) { + UNLOCK(&mgr->lock); + return (match ? ISC_R_SUCCESS : ISC_R_NOTFOUND); + } + + /* Second pass, only if connected != NULL */ + attributes = DNS_DISPATCHATTR_TCP; + + disp = ISC_LIST_HEAD(mgr->list); + while (disp != NULL && !match) { + LOCK(&disp->lock); + if ((disp->shutting_down == 0) && + ATTRMATCH(disp->attributes, attributes, mask) && + (localaddr == NULL || + isc_sockaddr_eqaddr(localaddr, &disp->local)) && + isc_sockaddr_equal(destaddr, &disp->peer)) + { + /* attach */ + disp->refcount++; + *dispp = disp; + match = true; + } + UNLOCK(&disp->lock); + disp = ISC_LIST_NEXT(disp, link); + } + UNLOCK(&mgr->lock); + return (match ? ISC_R_SUCCESS : ISC_R_NOTFOUND); +} + +isc_result_t +dns_dispatch_getudp_dup(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr, + isc_taskmgr_t *taskmgr, const isc_sockaddr_t *localaddr, + unsigned int buffersize, unsigned int maxbuffers, + unsigned int maxrequests, unsigned int buckets, + unsigned int increment, unsigned int attributes, + unsigned int mask, dns_dispatch_t **dispp, + dns_dispatch_t *dup_dispatch) { + isc_result_t result; + dns_dispatch_t *disp = NULL; + + REQUIRE(VALID_DISPATCHMGR(mgr)); + REQUIRE(sockmgr != NULL); + REQUIRE(localaddr != NULL); + REQUIRE(taskmgr != NULL); + REQUIRE(buffersize >= 512 && buffersize < (64 * 1024)); + REQUIRE(maxbuffers > 0); + REQUIRE(buckets < 2097169); /* next prime > 65536 * 32 */ + REQUIRE(increment > buckets); + REQUIRE(dispp != NULL && *dispp == NULL); + REQUIRE((attributes & DNS_DISPATCHATTR_TCP) == 0); + + result = dns_dispatchmgr_setudp(mgr, buffersize, maxbuffers, + maxrequests, buckets, increment); + if (result != ISC_R_SUCCESS) { + return (result); + } + + LOCK(&mgr->lock); + + if ((attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) { + REQUIRE(isc_sockaddr_getport(localaddr) == 0); + goto createudp; + } + + /* + * See if we have a dispatcher that matches. + */ + if (dup_dispatch == NULL) { + result = dispatch_find(mgr, localaddr, attributes, mask, &disp); + if (result == ISC_R_SUCCESS) { + disp->refcount++; + + if (disp->maxrequests < maxrequests) { + disp->maxrequests = maxrequests; + } + + if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) == + 0 && + (attributes & DNS_DISPATCHATTR_NOLISTEN) != 0) + { + disp->attributes |= DNS_DISPATCHATTR_NOLISTEN; + if (disp->recv_pending != 0) { + isc_socket_cancel(disp->socket, + disp->task[0], + ISC_SOCKCANCEL_RECV); + } + } + + UNLOCK(&disp->lock); + UNLOCK(&mgr->lock); + + *dispp = disp; + + return (ISC_R_SUCCESS); + } + } + +createudp: + /* + * Nope, create one. + */ + result = dispatch_createudp( + mgr, sockmgr, taskmgr, localaddr, maxrequests, attributes, + &disp, dup_dispatch == NULL ? NULL : dup_dispatch->socket); + + if (result != ISC_R_SUCCESS) { + UNLOCK(&mgr->lock); + return (result); + } + + UNLOCK(&mgr->lock); + *dispp = disp; + + return (ISC_R_SUCCESS); +} + +isc_result_t +dns_dispatch_getudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr, + isc_taskmgr_t *taskmgr, const isc_sockaddr_t *localaddr, + unsigned int buffersize, unsigned int maxbuffers, + unsigned int maxrequests, unsigned int buckets, + unsigned int increment, unsigned int attributes, + unsigned int mask, dns_dispatch_t **dispp) { + return (dns_dispatch_getudp_dup(mgr, sockmgr, taskmgr, localaddr, + buffersize, maxbuffers, maxrequests, + buckets, increment, attributes, mask, + dispp, NULL)); +} + +/* + * mgr should be locked. + */ + +#ifndef DNS_DISPATCH_HELD +#define DNS_DISPATCH_HELD 20U +#endif /* ifndef DNS_DISPATCH_HELD */ + +static isc_result_t +get_udpsocket(dns_dispatchmgr_t *mgr, dns_dispatch_t *disp, + isc_socketmgr_t *sockmgr, const isc_sockaddr_t *localaddr, + isc_socket_t **sockp, isc_socket_t *dup_socket, bool duponly) { + unsigned int i, j; + isc_socket_t *held[DNS_DISPATCH_HELD]; + isc_sockaddr_t localaddr_bound; + isc_socket_t *sock = NULL; + isc_result_t result = ISC_R_SUCCESS; + bool anyport; + + INSIST(sockp != NULL && *sockp == NULL); + + localaddr_bound = *localaddr; + anyport = (isc_sockaddr_getport(localaddr) == 0); + + if (anyport) { + unsigned int nports; + in_port_t *ports; + + /* + * If no port is specified, we first try to pick up a random + * port by ourselves. + */ + if (isc_sockaddr_pf(localaddr) == AF_INET) { + nports = disp->mgr->nv4ports; + ports = disp->mgr->v4ports; + } else { + nports = disp->mgr->nv6ports; + ports = disp->mgr->v6ports; + } + if (nports == 0) { + return (ISC_R_ADDRNOTAVAIL); + } + + for (i = 0; i < 1024; i++) { + in_port_t prt; + + prt = ports[isc_random_uniform(nports)]; + isc_sockaddr_setport(&localaddr_bound, prt); + result = open_socket(sockmgr, &localaddr_bound, 0, + &sock, NULL, false); + /* + * Continue if the port chosen is already in use + * or the OS has reserved it. + */ + if (result == ISC_R_NOPERM || result == ISC_R_ADDRINUSE) + { + continue; + } + disp->localport = prt; + *sockp = sock; + return (result); + } + + /* + * If this fails 1024 times, we then ask the kernel for + * choosing one. + */ + } else { + /* Allow to reuse address for non-random ports. */ + result = open_socket(sockmgr, localaddr, + ISC_SOCKET_REUSEADDRESS, &sock, dup_socket, + duponly); + + if (result == ISC_R_SUCCESS) { + *sockp = sock; + } + + return (result); + } + + memset(held, 0, sizeof(held)); + i = 0; + + for (j = 0; j < 0xffffU; j++) { + result = open_socket(sockmgr, localaddr, 0, &sock, NULL, false); + if (result != ISC_R_SUCCESS) { + goto end; + } else if (portavailable(mgr, sock, NULL)) { + break; + } + if (held[i] != NULL) { + isc_socket_detach(&held[i]); + } + held[i++] = sock; + sock = NULL; + if (i == DNS_DISPATCH_HELD) { + i = 0; + } + } + if (j == 0xffffU) { + mgr_log(mgr, ISC_LOG_ERROR, + "avoid-v%s-udp-ports: unable to allocate " + "an available port", + isc_sockaddr_pf(localaddr) == AF_INET ? "4" : "6"); + result = ISC_R_FAILURE; + goto end; + } + *sockp = sock; + +end: + for (i = 0; i < DNS_DISPATCH_HELD; i++) { + if (held[i] != NULL) { + isc_socket_detach(&held[i]); + } + } + + return (result); +} + +static isc_result_t +dispatch_createudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr, + isc_taskmgr_t *taskmgr, const isc_sockaddr_t *localaddr, + unsigned int maxrequests, unsigned int attributes, + dns_dispatch_t **dispp, isc_socket_t *dup_socket) { + isc_result_t result; + dns_dispatch_t *disp; + isc_socket_t *sock = NULL; + int i = 0; + bool duponly = ((attributes & DNS_DISPATCHATTR_CANREUSE) == 0); + + /* This is an attribute needed only at creation time */ + attributes &= ~DNS_DISPATCHATTR_CANREUSE; + /* + * dispatch_allocate() checks mgr for us. + */ + disp = NULL; + result = dispatch_allocate(mgr, maxrequests, &disp); + if (result != ISC_R_SUCCESS) { + return (result); + } + + disp->socktype = isc_sockettype_udp; + + if ((attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0) { + result = get_udpsocket(mgr, disp, sockmgr, localaddr, &sock, + dup_socket, duponly); + if (result != ISC_R_SUCCESS) { + goto deallocate_dispatch; + } + + if (isc_log_wouldlog(dns_lctx, 90)) { + char addrbuf[ISC_SOCKADDR_FORMATSIZE]; + + isc_sockaddr_format(localaddr, addrbuf, + ISC_SOCKADDR_FORMATSIZE); + mgr_log(mgr, LVL(90), + "dns_dispatch_createudp: Created" + " UDP dispatch for %s with socket fd %d", + addrbuf, isc_socket_getfd(sock)); + } + } else { + isc_sockaddr_t sa_any; + + /* + * For dispatches using exclusive sockets with a specific + * source address, we only check if the specified address is + * available on the system. Query sockets will be created later + * on demand. + */ + isc_sockaddr_anyofpf(&sa_any, isc_sockaddr_pf(localaddr)); + if (!isc_sockaddr_eqaddr(&sa_any, localaddr)) { + result = open_socket(sockmgr, localaddr, 0, &sock, NULL, + false); + if (sock != NULL) { + isc_socket_detach(&sock); + } + if (result != ISC_R_SUCCESS) { + goto deallocate_dispatch; + } + } + + disp->port_table = isc_mem_get( + mgr->mctx, sizeof(disp->port_table[0]) * + DNS_DISPATCH_PORTTABLESIZE); + for (i = 0; i < DNS_DISPATCH_PORTTABLESIZE; i++) { + ISC_LIST_INIT(disp->port_table[i]); + } + } + disp->socket = sock; + disp->local = *localaddr; + + if ((attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) { + disp->ntasks = MAX_INTERNAL_TASKS; + } else { + disp->ntasks = 1; + } + for (i = 0; i < disp->ntasks; i++) { + disp->task[i] = NULL; + result = isc_task_create(taskmgr, 0, &disp->task[i]); + if (result != ISC_R_SUCCESS) { + while (--i >= 0) { + isc_task_shutdown(disp->task[i]); + isc_task_detach(&disp->task[i]); + } + goto kill_socket; + } + isc_task_setname(disp->task[i], "udpdispatch", disp); + } + + disp->ctlevent = + isc_event_allocate(mgr->mctx, disp, DNS_EVENT_DISPATCHCONTROL, + destroy_disp, disp, sizeof(isc_event_t)); + + disp->sepool = NULL; + isc_mem_create(&disp->sepool); + isc_mem_setname(disp->sepool, "disp_sepool", NULL); + + attributes &= ~DNS_DISPATCHATTR_TCP; + attributes |= DNS_DISPATCHATTR_UDP; + disp->attributes = attributes; + + /* + * Append it to the dispatcher list. + */ + ISC_LIST_APPEND(mgr->list, disp, link); + + mgr_log(mgr, LVL(90), "created UDP dispatcher %p", disp); + dispatch_log(disp, LVL(90), "created task %p", disp->task[0]); /* XXX */ + if (disp->socket != NULL) { + dispatch_log(disp, LVL(90), "created socket %p", disp->socket); + } + + *dispp = disp; + + return (result); + + /* + * Error returns. + */ +kill_socket: + if (disp->socket != NULL) { + isc_socket_detach(&disp->socket); + } +deallocate_dispatch: + dispatch_free(&disp); + + return (result); +} + +void +dns_dispatch_attach(dns_dispatch_t *disp, dns_dispatch_t **dispp) { + REQUIRE(VALID_DISPATCH(disp)); + REQUIRE(dispp != NULL && *dispp == NULL); + + LOCK(&disp->lock); + disp->refcount++; + UNLOCK(&disp->lock); + + *dispp = disp; +} + +/* + * It is important to lock the manager while we are deleting the dispatch, + * since dns_dispatch_getudp will call dispatch_find, which returns to + * the caller a dispatch but does not attach to it until later. _getudp + * locks the manager, however, so locking it here will keep us from attaching + * to a dispatcher that is in the process of going away. + */ +void +dns_dispatch_detach(dns_dispatch_t **dispp) { + dns_dispatch_t *disp; + dispsocket_t *dispsock; + bool killit; + + REQUIRE(dispp != NULL && VALID_DISPATCH(*dispp)); + + disp = *dispp; + *dispp = NULL; + + LOCK(&disp->lock); + + INSIST(disp->refcount > 0); + disp->refcount--; + if (disp->refcount == 0) { + if (disp->recv_pending > 0) { + isc_socket_cancel(disp->socket, disp->task[0], + ISC_SOCKCANCEL_RECV); + } + for (dispsock = ISC_LIST_HEAD(disp->activesockets); + dispsock != NULL; dispsock = ISC_LIST_NEXT(dispsock, link)) + { + isc_socket_cancel(dispsock->socket, dispsock->task, + ISC_SOCKCANCEL_RECV); + } + disp->shutting_down = 1; + } + + dispatch_log(disp, LVL(90), "detach: refcount %d", disp->refcount); + + killit = destroy_disp_ok(disp); + UNLOCK(&disp->lock); + if (killit) { + isc_task_send(disp->task[0], &disp->ctlevent); + } +} + +isc_result_t +dns_dispatch_addresponse(dns_dispatch_t *disp, unsigned int options, + const isc_sockaddr_t *dest, isc_task_t *task, + isc_taskaction_t action, void *arg, + dns_messageid_t *idp, dns_dispentry_t **resp, + isc_socketmgr_t *sockmgr) { + dns_dispentry_t *res; + unsigned int bucket; + in_port_t localport = 0; + dns_messageid_t id; + int i; + bool ok; + dns_qid_t *qid; + dispsocket_t *dispsocket = NULL; + isc_result_t result; + + REQUIRE(VALID_DISPATCH(disp)); + REQUIRE(task != NULL); + REQUIRE(dest != NULL); + REQUIRE(resp != NULL && *resp == NULL); + REQUIRE(idp != NULL); + if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) { + REQUIRE(sockmgr != NULL); + } + + LOCK(&disp->lock); + + if (disp->shutting_down == 1) { + UNLOCK(&disp->lock); + return (ISC_R_SHUTTINGDOWN); + } + + if (disp->requests >= disp->maxrequests) { + UNLOCK(&disp->lock); + return (ISC_R_QUOTA); + } + + if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0 && + disp->nsockets > DNS_DISPATCH_SOCKSQUOTA) + { + dispsocket_t *oldestsocket; + dns_dispentry_t *oldestresp; + dns_dispatchevent_t *rev; + + /* + * Kill oldest outstanding query if the number of sockets + * exceeds the quota to keep the room for new queries. + */ + oldestsocket = ISC_LIST_HEAD(disp->activesockets); + oldestresp = oldestsocket->resp; + if (oldestresp != NULL && !oldestresp->item_out) { + rev = allocate_devent(oldestresp->disp); + if (rev != NULL) { + rev->buffer.base = NULL; + rev->result = ISC_R_CANCELED; + rev->id = oldestresp->id; + ISC_EVENT_INIT(rev, sizeof(*rev), 0, NULL, + DNS_EVENT_DISPATCH, + oldestresp->action, + oldestresp->arg, oldestresp, + NULL, NULL); + oldestresp->item_out = true; + isc_task_send(oldestresp->task, + ISC_EVENT_PTR(&rev)); + inc_stats(disp->mgr, + dns_resstatscounter_dispabort); + } + } + + /* + * Move this entry to the tail so that it won't (easily) be + * examined before actually being canceled. + */ + ISC_LIST_UNLINK(disp->activesockets, oldestsocket, link); + ISC_LIST_APPEND(disp->activesockets, oldestsocket, link); + } + + qid = DNS_QID(disp); + + if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) { + /* + * Get a separate UDP socket with a random port number. + */ + result = get_dispsocket(disp, dest, sockmgr, &dispsocket, + &localport); + if (result != ISC_R_SUCCESS) { + UNLOCK(&disp->lock); + inc_stats(disp->mgr, dns_resstatscounter_dispsockfail); + return (result); + } + } else { + localport = disp->localport; + } + + /* + * Try somewhat hard to find an unique ID unless FIXEDID is set + * in which case we use the id passed in via *idp. + */ + LOCK(&qid->lock); + if ((options & DNS_DISPATCHOPT_FIXEDID) != 0) { + id = *idp; + } else { + id = (dns_messageid_t)isc_random16(); + } + ok = false; + i = 0; + do { + bucket = dns_hash(qid, dest, id, localport); + if (entry_search(qid, dest, id, localport, bucket) == NULL) { + ok = true; + break; + } + if ((disp->attributes & DNS_DISPATCHATTR_FIXEDID) != 0) { + break; + } + id += qid->qid_increment; + id &= 0x0000ffff; + } while (i++ < 64); + UNLOCK(&qid->lock); + + if (!ok) { + UNLOCK(&disp->lock); + return (ISC_R_NOMORE); + } + + res = isc_mem_get(disp->mgr->mctx, sizeof(*res)); + isc_refcount_increment0(&disp->mgr->irefs); + + disp->refcount++; + disp->requests++; + res->task = NULL; + isc_task_attach(task, &res->task); + res->disp = disp; + res->id = id; + res->port = localport; + res->bucket = bucket; + res->host = *dest; + res->action = action; + res->arg = arg; + res->dispsocket = dispsocket; + if (dispsocket != NULL) { + dispsocket->resp = res; + } + res->item_out = false; + ISC_LIST_INIT(res->items); + ISC_LINK_INIT(res, link); + res->magic = RESPONSE_MAGIC; + + LOCK(&qid->lock); + ISC_LIST_APPEND(qid->qid_table[bucket], res, link); + UNLOCK(&qid->lock); + + inc_stats(disp->mgr, (qid == disp->mgr->qid) + ? dns_resstatscounter_disprequdp + : dns_resstatscounter_dispreqtcp); + + request_log(disp, res, LVL(90), "attached to task %p", res->task); + + if (((disp->attributes & DNS_DISPATCHATTR_UDP) != 0) || + ((disp->attributes & DNS_DISPATCHATTR_CONNECTED) != 0)) + { + result = startrecv(disp, dispsocket); + if (result != ISC_R_SUCCESS) { + LOCK(&qid->lock); + ISC_LIST_UNLINK(qid->qid_table[bucket], res, link); + UNLOCK(&qid->lock); + + if (dispsocket != NULL) { + destroy_dispsocket(disp, &dispsocket); + } + + disp->refcount--; + disp->requests--; + + dec_stats(disp->mgr, + (qid == disp->mgr->qid) + ? dns_resstatscounter_disprequdp + : dns_resstatscounter_dispreqtcp); + + UNLOCK(&disp->lock); + isc_task_detach(&res->task); + isc_refcount_decrement(&disp->mgr->irefs); + isc_mem_put(disp->mgr->mctx, res, sizeof(*res)); + return (result); + } + } + + if (dispsocket != NULL) { + ISC_LIST_APPEND(disp->activesockets, dispsocket, link); + } + + UNLOCK(&disp->lock); + + *idp = id; + *resp = res; + + if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) { + INSIST(res->dispsocket != NULL); + } + + return (ISC_R_SUCCESS); +} + +void +dns_dispatch_starttcp(dns_dispatch_t *disp) { + REQUIRE(VALID_DISPATCH(disp)); + + dispatch_log(disp, LVL(90), "starttcp %p", disp->task[0]); + + LOCK(&disp->lock); + if ((disp->attributes & DNS_DISPATCHATTR_CONNECTED) == 0) { + disp->attributes |= DNS_DISPATCHATTR_CONNECTED; + (void)startrecv(disp, NULL); + } + UNLOCK(&disp->lock); +} + +isc_result_t +dns_dispatch_getnext(dns_dispentry_t *resp, dns_dispatchevent_t **sockevent) { + dns_dispatch_t *disp; + dns_dispatchevent_t *ev; + + REQUIRE(VALID_RESPONSE(resp)); + REQUIRE(sockevent != NULL && *sockevent != NULL); + + disp = resp->disp; + REQUIRE(VALID_DISPATCH(disp)); + + ev = *sockevent; + *sockevent = NULL; + + LOCK(&disp->lock); + + REQUIRE(resp->item_out); + resp->item_out = false; + + if (ev->buffer.base != NULL) { + free_buffer(disp, ev->buffer.base, ev->buffer.length); + } + free_devent(disp, ev); + + if (disp->shutting_down == 1) { + UNLOCK(&disp->lock); + return (ISC_R_SHUTTINGDOWN); + } + ev = ISC_LIST_HEAD(resp->items); + if (ev != NULL) { + ISC_LIST_UNLINK(resp->items, ev, ev_link); + ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, DNS_EVENT_DISPATCH, + resp->action, resp->arg, resp, NULL, NULL); + request_log(disp, resp, LVL(90), + "[c] Sent event %p buffer %p len %d to task %p", ev, + ev->buffer.base, ev->buffer.length, resp->task); + resp->item_out = true; + isc_task_send(resp->task, ISC_EVENT_PTR(&ev)); + } + UNLOCK(&disp->lock); + return (ISC_R_SUCCESS); +} + +void +dns_dispatch_removeresponse(dns_dispentry_t **resp, + dns_dispatchevent_t **sockevent) { + dns_dispatchmgr_t *mgr; + dns_dispatch_t *disp; + dns_dispentry_t *res; + dispsocket_t *dispsock; + dns_dispatchevent_t *ev; + unsigned int bucket; + bool killit; + unsigned int n; + isc_eventlist_t events; + dns_qid_t *qid; + + REQUIRE(resp != NULL); + REQUIRE(VALID_RESPONSE(*resp)); + + res = *resp; + *resp = NULL; + + disp = res->disp; + REQUIRE(VALID_DISPATCH(disp)); + mgr = disp->mgr; + REQUIRE(VALID_DISPATCHMGR(mgr)); + + qid = DNS_QID(disp); + + if (sockevent != NULL) { + REQUIRE(*sockevent != NULL); + ev = *sockevent; + *sockevent = NULL; + } else { + ev = NULL; + } + + LOCK(&disp->lock); + + INSIST(disp->requests > 0); + disp->requests--; + dec_stats(disp->mgr, (qid == disp->mgr->qid) + ? dns_resstatscounter_disprequdp + : dns_resstatscounter_dispreqtcp); + INSIST(disp->refcount > 0); + disp->refcount--; + if (disp->refcount == 0) { + if (disp->recv_pending > 0) { + isc_socket_cancel(disp->socket, disp->task[0], + ISC_SOCKCANCEL_RECV); + } + for (dispsock = ISC_LIST_HEAD(disp->activesockets); + dispsock != NULL; dispsock = ISC_LIST_NEXT(dispsock, link)) + { + isc_socket_cancel(dispsock->socket, dispsock->task, + ISC_SOCKCANCEL_RECV); + } + disp->shutting_down = 1; + } + + bucket = res->bucket; + + LOCK(&qid->lock); + ISC_LIST_UNLINK(qid->qid_table[bucket], res, link); + UNLOCK(&qid->lock); + + if (ev == NULL && res->item_out) { + /* + * We've posted our event, but the caller hasn't gotten it + * yet. Take it back. + */ + ISC_LIST_INIT(events); + n = isc_task_unsend(res->task, res, DNS_EVENT_DISPATCH, NULL, + &events); + /* + * We had better have gotten it back. + */ + INSIST(n == 1); + ev = (dns_dispatchevent_t *)ISC_LIST_HEAD(events); + } + + if (ev != NULL) { + REQUIRE(res->item_out); + res->item_out = false; + if (ev->buffer.base != NULL) { + free_buffer(disp, ev->buffer.base, ev->buffer.length); + } + free_devent(disp, ev); + } + + request_log(disp, res, LVL(90), "detaching from task %p", res->task); + isc_task_detach(&res->task); + + if (res->dispsocket != NULL) { + isc_socket_cancel(res->dispsocket->socket, + res->dispsocket->task, ISC_SOCKCANCEL_RECV); + res->dispsocket->resp = NULL; + } + + /* + * Free any buffered responses as well + */ + ev = ISC_LIST_HEAD(res->items); + while (ev != NULL) { + ISC_LIST_UNLINK(res->items, ev, ev_link); + if (ev->buffer.base != NULL) { + free_buffer(disp, ev->buffer.base, ev->buffer.length); + } + free_devent(disp, ev); + ev = ISC_LIST_HEAD(res->items); + } + res->magic = 0; + isc_refcount_decrement(&disp->mgr->irefs); + isc_mem_put(disp->mgr->mctx, res, sizeof(*res)); + if (disp->shutting_down == 1) { + do_cancel(disp); + } else { + (void)startrecv(disp, NULL); + } + + killit = destroy_disp_ok(disp); + UNLOCK(&disp->lock); + if (killit) { + isc_task_send(disp->task[0], &disp->ctlevent); + } +} + +/* + * disp must be locked. + */ +static void +do_cancel(dns_dispatch_t *disp) { + dns_dispatchevent_t *ev; + dns_dispentry_t *resp; + dns_qid_t *qid; + + if (disp->shutdown_out == 1) { + return; + } + + qid = DNS_QID(disp); + + /* + * Search for the first response handler without packets outstanding + * unless a specific handler is given. + */ + LOCK(&qid->lock); + for (resp = linear_first(qid); resp != NULL && resp->item_out; + /* Empty. */) + { + resp = linear_next(qid, resp); + } + + /* + * No one to send the cancel event to, so nothing to do. + */ + if (resp == NULL) { + goto unlock; + } + + /* + * Send the shutdown failsafe event to this resp. + */ + ev = disp->failsafe_ev; + ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, DNS_EVENT_DISPATCH, + resp->action, resp->arg, resp, NULL, NULL); + ev->result = disp->shutdown_why; + ev->buffer.base = NULL; + ev->buffer.length = 0; + disp->shutdown_out = 1; + request_log(disp, resp, LVL(10), "cancel: failsafe event %p -> task %p", + ev, resp->task); + resp->item_out = true; + isc_task_send(resp->task, ISC_EVENT_PTR(&ev)); +unlock: + UNLOCK(&qid->lock); +} + +isc_socket_t * +dns_dispatch_getsocket(dns_dispatch_t *disp) { + REQUIRE(VALID_DISPATCH(disp)); + + return (disp->socket); +} + +isc_socket_t * +dns_dispatch_getentrysocket(dns_dispentry_t *resp) { + REQUIRE(VALID_RESPONSE(resp)); + + if (resp->dispsocket != NULL) { + return (resp->dispsocket->socket); + } else { + return (NULL); + } +} + +isc_result_t +dns_dispatch_getlocaladdress(dns_dispatch_t *disp, isc_sockaddr_t *addrp) { + REQUIRE(VALID_DISPATCH(disp)); + REQUIRE(addrp != NULL); + + if (disp->socktype == isc_sockettype_udp) { + *addrp = disp->local; + return (ISC_R_SUCCESS); + } + return (ISC_R_NOTIMPLEMENTED); +} + +void +dns_dispatch_cancel(dns_dispatch_t *disp) { + REQUIRE(VALID_DISPATCH(disp)); + + LOCK(&disp->lock); + + if (disp->shutting_down == 1) { + UNLOCK(&disp->lock); + return; + } + + disp->shutdown_why = ISC_R_CANCELED; + disp->shutting_down = 1; + do_cancel(disp); + + UNLOCK(&disp->lock); + + return; +} + +unsigned int +dns_dispatch_getattributes(dns_dispatch_t *disp) { + REQUIRE(VALID_DISPATCH(disp)); + + /* + * We don't bother locking disp here; it's the caller's responsibility + * to use only non volatile flags. + */ + return (disp->attributes); +} + +void +dns_dispatch_changeattributes(dns_dispatch_t *disp, unsigned int attributes, + unsigned int mask) { + REQUIRE(VALID_DISPATCH(disp)); + /* Exclusive attribute can only be set on creation */ + REQUIRE((attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0); + /* Also, a dispatch with randomport specified cannot start listening */ + REQUIRE((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0 || + (attributes & DNS_DISPATCHATTR_NOLISTEN) == 0); + + /* XXXMLG + * Should check for valid attributes here! + */ + + LOCK(&disp->lock); + + if ((mask & DNS_DISPATCHATTR_NOLISTEN) != 0) { + if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0 && + (attributes & DNS_DISPATCHATTR_NOLISTEN) == 0) + { + disp->attributes &= ~DNS_DISPATCHATTR_NOLISTEN; + (void)startrecv(disp, NULL); + } else if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) == + 0 && + (attributes & DNS_DISPATCHATTR_NOLISTEN) != 0) + { + disp->attributes |= DNS_DISPATCHATTR_NOLISTEN; + if (disp->recv_pending != 0) { + isc_socket_cancel(disp->socket, disp->task[0], + ISC_SOCKCANCEL_RECV); + } + } + } + + disp->attributes &= ~mask; + disp->attributes |= (attributes & mask); + UNLOCK(&disp->lock); +} + +void +dns_dispatch_importrecv(dns_dispatch_t *disp, isc_event_t *event) { + void *buf; + isc_socketevent_t *sevent, *newsevent; + + REQUIRE(VALID_DISPATCH(disp)); + REQUIRE(event != NULL); + + if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) == 0) { + return; + } + + sevent = (isc_socketevent_t *)event; + INSIST(sevent->n <= disp->mgr->buffersize); + + newsevent = (isc_socketevent_t *)isc_event_allocate( + disp->mgr->mctx, NULL, DNS_EVENT_IMPORTRECVDONE, udp_shrecv, + disp, sizeof(isc_socketevent_t)); + + buf = allocate_udp_buffer(disp); + if (buf == NULL) { + isc_event_free(ISC_EVENT_PTR(&newsevent)); + return; + } + memmove(buf, sevent->region.base, sevent->n); + newsevent->region.base = buf; + newsevent->region.length = disp->mgr->buffersize; + newsevent->n = sevent->n; + newsevent->result = sevent->result; + newsevent->address = sevent->address; + newsevent->timestamp = sevent->timestamp; + newsevent->pktinfo = sevent->pktinfo; + newsevent->attributes = sevent->attributes; + + isc_task_send(disp->task[0], ISC_EVENT_PTR(&newsevent)); +} + +dns_dispatch_t * +dns_dispatchset_get(dns_dispatchset_t *dset) { + dns_dispatch_t *disp; + + /* check that dispatch set is configured */ + if (dset == NULL || dset->ndisp == 0) { + return (NULL); + } + + LOCK(&dset->lock); + disp = dset->dispatches[dset->cur]; + dset->cur++; + if (dset->cur == dset->ndisp) { + dset->cur = 0; + } + UNLOCK(&dset->lock); + + return (disp); +} + +isc_result_t +dns_dispatchset_create(isc_mem_t *mctx, isc_socketmgr_t *sockmgr, + isc_taskmgr_t *taskmgr, dns_dispatch_t *source, + dns_dispatchset_t **dsetp, int n) { + isc_result_t result; + dns_dispatchset_t *dset; + dns_dispatchmgr_t *mgr; + int i, j; + + REQUIRE(VALID_DISPATCH(source)); + REQUIRE((source->attributes & DNS_DISPATCHATTR_UDP) != 0); + REQUIRE(dsetp != NULL && *dsetp == NULL); + + mgr = source->mgr; + + dset = isc_mem_get(mctx, sizeof(dns_dispatchset_t)); + memset(dset, 0, sizeof(*dset)); + + isc_mutex_init(&dset->lock); + + dset->dispatches = isc_mem_get(mctx, sizeof(dns_dispatch_t *) * n); + + isc_mem_attach(mctx, &dset->mctx); + dset->ndisp = n; + dset->cur = 0; + + dset->dispatches[0] = NULL; + dns_dispatch_attach(source, &dset->dispatches[0]); + + LOCK(&mgr->lock); + for (i = 1; i < n; i++) { + dset->dispatches[i] = NULL; + result = dispatch_createudp( + mgr, sockmgr, taskmgr, &source->local, + source->maxrequests, source->attributes, + &dset->dispatches[i], source->socket); + if (result != ISC_R_SUCCESS) { + goto fail; + } + } + + UNLOCK(&mgr->lock); + *dsetp = dset; + + return (ISC_R_SUCCESS); + +fail: + UNLOCK(&mgr->lock); + + for (j = 0; j < i; j++) { + dns_dispatch_detach(&(dset->dispatches[j])); + } + isc_mem_put(mctx, dset->dispatches, sizeof(dns_dispatch_t *) * n); + if (dset->mctx == mctx) { + isc_mem_detach(&dset->mctx); + } + + isc_mutex_destroy(&dset->lock); + isc_mem_put(mctx, dset, sizeof(dns_dispatchset_t)); + return (result); +} + +void +dns_dispatchset_cancelall(dns_dispatchset_t *dset, isc_task_t *task) { + int i; + + REQUIRE(dset != NULL); + + for (i = 0; i < dset->ndisp; i++) { + isc_socket_t *sock; + sock = dns_dispatch_getsocket(dset->dispatches[i]); + isc_socket_cancel(sock, task, ISC_SOCKCANCEL_ALL); + } +} + +void +dns_dispatchset_destroy(dns_dispatchset_t **dsetp) { + dns_dispatchset_t *dset; + int i; + + REQUIRE(dsetp != NULL && *dsetp != NULL); + + dset = *dsetp; + *dsetp = NULL; + for (i = 0; i < dset->ndisp; i++) { + dns_dispatch_detach(&(dset->dispatches[i])); + } + isc_mem_put(dset->mctx, dset->dispatches, + sizeof(dns_dispatch_t *) * dset->ndisp); + isc_mutex_destroy(&dset->lock); + isc_mem_putanddetach(&dset->mctx, dset, sizeof(dns_dispatchset_t)); +} + +void +dns_dispatch_setdscp(dns_dispatch_t *disp, isc_dscp_t dscp) { + REQUIRE(VALID_DISPATCH(disp)); + disp->dscp = dscp; +} + +isc_dscp_t +dns_dispatch_getdscp(dns_dispatch_t *disp) { + REQUIRE(VALID_DISPATCH(disp)); + return (disp->dscp); +} + +#if 0 +void +dns_dispatchmgr_dump(dns_dispatchmgr_t *mgr) { + dns_dispatch_t *disp; + char foo[1024]; + + disp = ISC_LIST_HEAD(mgr->list); + while (disp != NULL) { + isc_sockaddr_format(&disp->local, foo, sizeof(foo)); + printf("\tdispatch %p, addr %s\n", disp, foo); + disp = ISC_LIST_NEXT(disp, link); + } +} +#endif /* if 0 */ |