/* * Copyright (C) Internet Systems Consortium, Inc. ("ISC") * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. * * See the COPYRIGHT file distributed with this work for additional * information regarding copyright ownership. */ /*! \file */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include typedef ISC_LIST(dns_dispentry_t) dns_displist_t; typedef struct dispsocket dispsocket_t; typedef ISC_LIST(dispsocket_t) dispsocketlist_t; typedef struct dispportentry dispportentry_t; typedef ISC_LIST(dispportentry_t) dispportlist_t; typedef struct dns_qid { unsigned int magic; unsigned int qid_nbuckets; /*%< hash table size */ unsigned int qid_increment; /*%< id increment on collision */ isc_mutex_t lock; dns_displist_t *qid_table; /*%< the table itself */ dispsocketlist_t *sock_table; /*%< socket table */ } dns_qid_t; struct dns_dispatchmgr { /* Unlocked. */ unsigned int magic; isc_mem_t *mctx; dns_acl_t *blackhole; dns_portlist_t *portlist; isc_stats_t *stats; isc_entropy_t *entropy; /*%< entropy source */ /* Locked by "lock". */ isc_mutex_t lock; unsigned int state; ISC_LIST(dns_dispatch_t) list; /* Locked by rng_lock. */ isc_mutex_t rng_lock; isc_rng_t *rngctx; /*%< RNG context for QID */ /* locked by buffer_lock */ dns_qid_t *qid; isc_mutex_t buffer_lock; unsigned int buffers; /*%< allocated buffers */ unsigned int buffersize; /*%< size of each buffer */ unsigned int maxbuffers; /*%< max buffers */ /* Locked internally. */ isc_mutex_t depool_lock; isc_mempool_t *depool; /*%< pool for dispatch events */ isc_mutex_t rpool_lock; isc_mempool_t *rpool; /*%< pool for replies */ isc_mutex_t dpool_lock; isc_mempool_t *dpool; /*%< dispatch allocations */ isc_mutex_t bpool_lock; isc_mempool_t *bpool; /*%< pool for buffers */ isc_mutex_t spool_lock; isc_mempool_t *spool; /*%< pool for dispsocks */ /*% * Locked by qid->lock if qid exists; otherwise, can be used without * being locked. * Memory footprint considerations: this is a simple implementation of * available ports, i.e., an ordered array of the actual port numbers. * This will require about 256KB of memory in the worst case (128KB for * each of IPv4 and IPv6). We could reduce it by representing it as a * more sophisticated way such as a list (or array) of ranges that are * searched to identify a specific port. Our decision here is the saved * memory isn't worth the implementation complexity, considering the * fact that the whole BIND9 process (which is mainly named) already * requires a pretty large memory footprint. We may, however, have to * revisit the decision when we want to use it as a separate module for * an environment where memory requirement is severer. */ in_port_t *v4ports; /*%< available ports for IPv4 */ unsigned int nv4ports; /*%< # of available ports for IPv4 */ in_port_t *v6ports; /*%< available ports for IPv4 */ unsigned int nv6ports; /*%< # of available ports for IPv4 */ }; #define MGR_SHUTTINGDOWN 0x00000001U #define MGR_IS_SHUTTINGDOWN(l) (((l)->state & MGR_SHUTTINGDOWN) != 0) #define IS_PRIVATE(d) (((d)->attributes & DNS_DISPATCHATTR_PRIVATE) != 0) struct dns_dispentry { unsigned int magic; dns_dispatch_t *disp; dns_messageid_t id; in_port_t port; unsigned int bucket; isc_sockaddr_t host; isc_task_t *task; isc_taskaction_t action; void *arg; bool item_out; dispsocket_t *dispsocket; ISC_LIST(dns_dispatchevent_t) items; ISC_LINK(dns_dispentry_t) link; }; /*% * Maximum number of dispatch sockets that can be pooled for reuse. The * appropriate value may vary, but experiments have shown a busy caching server * may need more than 1000 sockets concurrently opened. The maximum allowable * number of dispatch sockets (per manager) will be set to the double of this * value. */ #ifndef DNS_DISPATCH_POOLSOCKS #define DNS_DISPATCH_POOLSOCKS 2048 #endif /*% * Quota to control the number of dispatch sockets. If a dispatch has more * than the quota of sockets, new queries will purge oldest ones, so that * a massive number of outstanding queries won't prevent subsequent queries * (especially if the older ones take longer time and result in timeout). */ #ifndef DNS_DISPATCH_SOCKSQUOTA #define DNS_DISPATCH_SOCKSQUOTA 3072 #endif struct dispsocket { unsigned int magic; isc_socket_t *socket; dns_dispatch_t *disp; isc_sockaddr_t host; in_port_t localport; /* XXX: should be removed later */ dispportentry_t *portentry; dns_dispentry_t *resp; isc_task_t *task; ISC_LINK(dispsocket_t) link; unsigned int bucket; ISC_LINK(dispsocket_t) blink; }; /*% * A port table entry. We remember every port we first open in a table with a * reference counter so that we can 'reuse' the same port (with different * destination addresses) using the SO_REUSEADDR socket option. */ struct dispportentry { in_port_t port; unsigned int refs; ISC_LINK(struct dispportentry) link; }; #ifndef DNS_DISPATCH_PORTTABLESIZE #define DNS_DISPATCH_PORTTABLESIZE 1024 #endif #define INVALID_BUCKET (0xffffdead) /*% * Number of tasks for each dispatch that use separate sockets for different * transactions. This must be a power of 2 as it will divide 32 bit numbers * to get an uniformly random tasks selection. See get_dispsocket(). */ #define MAX_INTERNAL_TASKS 64 struct dns_dispatch { /* Unlocked. */ unsigned int magic; /*%< magic */ dns_dispatchmgr_t *mgr; /*%< dispatch manager */ int ntasks; /*% * internal task buckets. We use multiple tasks to distribute various * socket events well when using separate dispatch sockets. We use the * 1st task (task[0]) for internal control events. */ isc_task_t *task[MAX_INTERNAL_TASKS]; isc_socket_t *socket; /*%< isc socket attached to */ isc_sockaddr_t local; /*%< local address */ in_port_t localport; /*%< local UDP port */ isc_sockaddr_t peer; /*%< peer address (TCP) */ isc_dscp_t dscp; /*%< "listen-on" DSCP value */ unsigned int maxrequests; /*%< max requests */ isc_event_t *ctlevent; isc_mutex_t sepool_lock; isc_mempool_t *sepool; /*%< pool for socket events */ /*% Locked by mgr->lock. */ ISC_LINK(dns_dispatch_t) link; /* Locked by "lock". */ isc_mutex_t lock; /*%< locks all below */ isc_sockettype_t socktype; unsigned int attributes; unsigned int refcount; /*%< number of users */ dns_dispatchevent_t *failsafe_ev; /*%< failsafe cancel event */ unsigned int shutting_down : 1, shutdown_out : 1, connected : 1, tcpmsg_valid : 1, recv_pending : 1; /*%< is a recv() pending? */ isc_result_t shutdown_why; ISC_LIST(dispsocket_t) activesockets; ISC_LIST(dispsocket_t) inactivesockets; unsigned int nsockets; unsigned int requests; /*%< how many requests we have */ unsigned int tcpbuffers; /*%< allocated buffers */ dns_tcpmsg_t tcpmsg; /*%< for tcp streams */ dns_qid_t *qid; isc_rng_t *rngctx; /*%< for QID/UDP port num */ dispportlist_t *port_table; /*%< hold ports 'owned' by us */ isc_mempool_t *portpool; /*%< port table entries */ }; #define QID_MAGIC ISC_MAGIC('Q', 'i', 'd', ' ') #define VALID_QID(e) ISC_MAGIC_VALID((e), QID_MAGIC) #define RESPONSE_MAGIC ISC_MAGIC('D', 'r', 's', 'p') #define VALID_RESPONSE(e) ISC_MAGIC_VALID((e), RESPONSE_MAGIC) #define DISPSOCK_MAGIC ISC_MAGIC('D', 's', 'o', 'c') #define VALID_DISPSOCK(e) ISC_MAGIC_VALID((e), DISPSOCK_MAGIC) #define DISPATCH_MAGIC ISC_MAGIC('D', 'i', 's', 'p') #define VALID_DISPATCH(e) ISC_MAGIC_VALID((e), DISPATCH_MAGIC) #define DNS_DISPATCHMGR_MAGIC ISC_MAGIC('D', 'M', 'g', 'r') #define VALID_DISPATCHMGR(e) ISC_MAGIC_VALID((e), DNS_DISPATCHMGR_MAGIC) #define DNS_QID(disp) ((disp)->socktype == isc_sockettype_tcp) ? \ (disp)->qid : (disp)->mgr->qid #define DISP_RNGCTX(disp) ((disp)->socktype == isc_sockettype_udp) ? \ ((disp)->rngctx) : ((disp)->mgr->rngctx) /*% * Locking a query port buffer is a bit tricky. We access the buffer without * locking until qid is created. Technically, there is a possibility of race * between the creation of qid and access to the port buffer; in practice, * however, this should be safe because qid isn't created until the first * dispatch is created and there should be no contending situation until then. */ #define PORTBUFLOCK(mgr) if ((mgr)->qid != NULL) LOCK(&((mgr)->qid->lock)) #define PORTBUFUNLOCK(mgr) if ((mgr)->qid != NULL) UNLOCK((&(mgr)->qid->lock)) /* * Statics. */ static dns_dispentry_t *entry_search(dns_qid_t *, isc_sockaddr_t *, dns_messageid_t, in_port_t, unsigned int); static bool destroy_disp_ok(dns_dispatch_t *); static void destroy_disp(isc_task_t *task, isc_event_t *event); static void destroy_dispsocket(dns_dispatch_t *, dispsocket_t **); static void deactivate_dispsocket(dns_dispatch_t *, dispsocket_t *); static void udp_exrecv(isc_task_t *, isc_event_t *); static void udp_shrecv(isc_task_t *, isc_event_t *); static void udp_recv(isc_event_t *, dns_dispatch_t *, dispsocket_t *); static void tcp_recv(isc_task_t *, isc_event_t *); static isc_result_t startrecv(dns_dispatch_t *, dispsocket_t *); static uint32_t dns_hash(dns_qid_t *, isc_sockaddr_t *, dns_messageid_t, in_port_t); static void free_buffer(dns_dispatch_t *disp, void *buf, unsigned int len); static void *allocate_udp_buffer(dns_dispatch_t *disp); static inline void free_devent(dns_dispatch_t *disp, dns_dispatchevent_t *ev); static inline dns_dispatchevent_t *allocate_devent(dns_dispatch_t *disp); static void do_cancel(dns_dispatch_t *disp); static dns_dispentry_t *linear_first(dns_qid_t *disp); static dns_dispentry_t *linear_next(dns_qid_t *disp, dns_dispentry_t *resp); static void dispatch_free(dns_dispatch_t **dispp); static isc_result_t get_udpsocket(dns_dispatchmgr_t *mgr, dns_dispatch_t *disp, isc_socketmgr_t *sockmgr, isc_sockaddr_t *localaddr, isc_socket_t **sockp, isc_socket_t *dup_socket); static isc_result_t dispatch_createudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr, isc_taskmgr_t *taskmgr, isc_sockaddr_t *localaddr, unsigned int maxrequests, unsigned int attributes, dns_dispatch_t **dispp, isc_socket_t *dup_socket); static bool destroy_mgr_ok(dns_dispatchmgr_t *mgr); static void destroy_mgr(dns_dispatchmgr_t **mgrp); static isc_result_t qid_allocate(dns_dispatchmgr_t *mgr, unsigned int buckets, unsigned int increment, dns_qid_t **qidp, bool needaddrtable); static void qid_destroy(isc_mem_t *mctx, dns_qid_t **qidp); static isc_result_t open_socket(isc_socketmgr_t *mgr, isc_sockaddr_t *local, unsigned int options, isc_socket_t **sockp, isc_socket_t *dup_socket); static bool portavailable(dns_dispatchmgr_t *mgr, isc_socket_t *sock, isc_sockaddr_t *sockaddrp); #define LVL(x) ISC_LOG_DEBUG(x) static void mgr_log(dns_dispatchmgr_t *mgr, int level, const char *fmt, ...) ISC_FORMAT_PRINTF(3, 4); static void mgr_log(dns_dispatchmgr_t *mgr, int level, const char *fmt, ...) { char msgbuf[2048]; va_list ap; if (! isc_log_wouldlog(dns_lctx, level)) return; va_start(ap, fmt); vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap); va_end(ap); isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH, DNS_LOGMODULE_DISPATCH, level, "dispatchmgr %p: %s", mgr, msgbuf); } static inline void inc_stats(dns_dispatchmgr_t *mgr, isc_statscounter_t counter) { if (mgr->stats != NULL) isc_stats_increment(mgr->stats, counter); } static inline void dec_stats(dns_dispatchmgr_t *mgr, isc_statscounter_t counter) { if (mgr->stats != NULL) isc_stats_decrement(mgr->stats, counter); } static void dispatch_log(dns_dispatch_t *disp, int level, const char *fmt, ...) ISC_FORMAT_PRINTF(3, 4); static void dispatch_log(dns_dispatch_t *disp, int level, const char *fmt, ...) { char msgbuf[2048]; va_list ap; if (! isc_log_wouldlog(dns_lctx, level)) return; va_start(ap, fmt); vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap); va_end(ap); isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH, DNS_LOGMODULE_DISPATCH, level, "dispatch %p: %s", disp, msgbuf); } static void request_log(dns_dispatch_t *disp, dns_dispentry_t *resp, int level, const char *fmt, ...) ISC_FORMAT_PRINTF(4, 5); static void request_log(dns_dispatch_t *disp, dns_dispentry_t *resp, int level, const char *fmt, ...) { char msgbuf[2048]; char peerbuf[256]; va_list ap; if (! isc_log_wouldlog(dns_lctx, level)) return; va_start(ap, fmt); vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap); va_end(ap); if (VALID_RESPONSE(resp)) { isc_sockaddr_format(&resp->host, peerbuf, sizeof(peerbuf)); isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH, DNS_LOGMODULE_DISPATCH, level, "dispatch %p response %p %s: %s", disp, resp, peerbuf, msgbuf); } else { isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH, DNS_LOGMODULE_DISPATCH, level, "dispatch %p req/resp %p: %s", disp, resp, msgbuf); } } /* * Return a hash of the destination and message id. */ static uint32_t dns_hash(dns_qid_t *qid, isc_sockaddr_t *dest, dns_messageid_t id, in_port_t port) { uint32_t ret; ret = isc_sockaddr_hash(dest, true); ret ^= ((uint32_t)id << 16) | port; ret %= qid->qid_nbuckets; INSIST(ret < qid->qid_nbuckets); return (ret); } /* * Find the first entry in 'qid'. Returns NULL if there are no entries. */ static dns_dispentry_t * linear_first(dns_qid_t *qid) { dns_dispentry_t *ret; unsigned int bucket; bucket = 0; while (bucket < qid->qid_nbuckets) { ret = ISC_LIST_HEAD(qid->qid_table[bucket]); if (ret != NULL) return (ret); bucket++; } return (NULL); } /* * Find the next entry after 'resp' in 'qid'. Return NULL if there are * no more entries. */ static dns_dispentry_t * linear_next(dns_qid_t *qid, dns_dispentry_t *resp) { dns_dispentry_t *ret; unsigned int bucket; ret = ISC_LIST_NEXT(resp, link); if (ret != NULL) return (ret); bucket = resp->bucket; bucket++; while (bucket < qid->qid_nbuckets) { ret = ISC_LIST_HEAD(qid->qid_table[bucket]); if (ret != NULL) return (ret); bucket++; } return (NULL); } /* * The dispatch must be locked. */ static bool destroy_disp_ok(dns_dispatch_t *disp) { if (disp->refcount != 0) return (false); if (disp->recv_pending != 0) return (false); if (!ISC_LIST_EMPTY(disp->activesockets)) return (false); if (disp->shutting_down == 0) return (false); return (true); } /* * Called when refcount reaches 0 (and safe to destroy). * * The dispatcher must be locked. * The manager must not be locked. */ static void destroy_disp(isc_task_t *task, isc_event_t *event) { dns_dispatch_t *disp; dns_dispatchmgr_t *mgr; bool killmgr; dispsocket_t *dispsocket; int i; INSIST(event->ev_type == DNS_EVENT_DISPATCHCONTROL); UNUSED(task); disp = event->ev_arg; mgr = disp->mgr; LOCK(&mgr->lock); ISC_LIST_UNLINK(mgr->list, disp, link); dispatch_log(disp, LVL(90), "shutting down; detaching from sock %p, task %p", disp->socket, disp->task[0]); /* XXXX */ if (disp->sepool != NULL) { isc_mempool_destroy(&disp->sepool); (void)isc_mutex_destroy(&disp->sepool_lock); } if (disp->socket != NULL) isc_socket_detach(&disp->socket); while ((dispsocket = ISC_LIST_HEAD(disp->inactivesockets)) != NULL) { ISC_LIST_UNLINK(disp->inactivesockets, dispsocket, link); destroy_dispsocket(disp, &dispsocket); } for (i = 0; i < disp->ntasks; i++) isc_task_detach(&disp->task[i]); isc_event_free(&event); dispatch_free(&disp); killmgr = destroy_mgr_ok(mgr); UNLOCK(&mgr->lock); if (killmgr) destroy_mgr(&mgr); } /*% * Manipulate port table per dispatch: find an entry for a given port number, * create a new entry, and decrement a given entry with possible clean-up. */ static dispportentry_t * port_search(dns_dispatch_t *disp, in_port_t port) { dispportentry_t *portentry; REQUIRE(disp->port_table != NULL); portentry = ISC_LIST_HEAD(disp->port_table[port % DNS_DISPATCH_PORTTABLESIZE]); while (portentry != NULL) { if (portentry->port == port) return (portentry); portentry = ISC_LIST_NEXT(portentry, link); } return (NULL); } static dispportentry_t * new_portentry(dns_dispatch_t *disp, in_port_t port) { dispportentry_t *portentry; dns_qid_t *qid; REQUIRE(disp->port_table != NULL); portentry = isc_mempool_get(disp->portpool); if (portentry == NULL) return (portentry); portentry->port = port; portentry->refs = 1; ISC_LINK_INIT(portentry, link); qid = DNS_QID(disp); LOCK(&qid->lock); ISC_LIST_APPEND(disp->port_table[port % DNS_DISPATCH_PORTTABLESIZE], portentry, link); UNLOCK(&qid->lock); return (portentry); } /*% * The caller must not hold the qid->lock. */ static void deref_portentry(dns_dispatch_t *disp, dispportentry_t **portentryp) { dispportentry_t *portentry = *portentryp; dns_qid_t *qid; REQUIRE(disp->port_table != NULL); REQUIRE(portentry != NULL && portentry->refs > 0); qid = DNS_QID(disp); LOCK(&qid->lock); portentry->refs--; if (portentry->refs == 0) { ISC_LIST_UNLINK(disp->port_table[portentry->port % DNS_DISPATCH_PORTTABLESIZE], portentry, link); isc_mempool_put(disp->portpool, portentry); } /* * Set '*portentryp' to NULL inside the lock so that * dispsock->portentry does not change in socket_search. */ *portentryp = NULL; UNLOCK(&qid->lock); } /*% * Find a dispsocket for socket address 'dest', and port number 'port'. * Return NULL if no such entry exists. Requires qid->lock to be held. */ static dispsocket_t * socket_search(dns_qid_t *qid, isc_sockaddr_t *dest, in_port_t port, unsigned int bucket) { dispsocket_t *dispsock; REQUIRE(VALID_QID(qid)); REQUIRE(bucket < qid->qid_nbuckets); dispsock = ISC_LIST_HEAD(qid->sock_table[bucket]); while (dispsock != NULL) { if (dispsock->portentry != NULL && dispsock->portentry->port == port && isc_sockaddr_equal(dest, &dispsock->host)) return (dispsock); dispsock = ISC_LIST_NEXT(dispsock, blink); } return (NULL); } /*% * Make a new socket for a single dispatch with a random port number. * The caller must hold the disp->lock */ static isc_result_t get_dispsocket(dns_dispatch_t *disp, isc_sockaddr_t *dest, isc_socketmgr_t *sockmgr, dispsocket_t **dispsockp, in_port_t *portp) { int i; uint32_t r; dns_dispatchmgr_t *mgr = disp->mgr; isc_socket_t *sock = NULL; isc_result_t result = ISC_R_FAILURE; in_port_t port; isc_sockaddr_t localaddr; unsigned int bucket = 0; dispsocket_t *dispsock; unsigned int nports; in_port_t *ports; unsigned int bindoptions; dispportentry_t *portentry = NULL; dns_qid_t *qid; if (isc_sockaddr_pf(&disp->local) == AF_INET) { nports = disp->mgr->nv4ports; ports = disp->mgr->v4ports; } else { nports = disp->mgr->nv6ports; ports = disp->mgr->v6ports; } if (nports == 0) return (ISC_R_ADDRNOTAVAIL); dispsock = ISC_LIST_HEAD(disp->inactivesockets); if (dispsock != NULL) { ISC_LIST_UNLINK(disp->inactivesockets, dispsock, link); sock = dispsock->socket; dispsock->socket = NULL; } else { dispsock = isc_mempool_get(mgr->spool); if (dispsock == NULL) return (ISC_R_NOMEMORY); disp->nsockets++; dispsock->socket = NULL; dispsock->disp = disp; dispsock->resp = NULL; dispsock->portentry = NULL; isc_random_get(&r); dispsock->task = NULL; isc_task_attach(disp->task[r % disp->ntasks], &dispsock->task); ISC_LINK_INIT(dispsock, link); ISC_LINK_INIT(dispsock, blink); dispsock->magic = DISPSOCK_MAGIC; } /* * Pick up a random UDP port and open a new socket with it. Avoid * choosing ports that share the same destination because it will be * very likely to fail in bind(2) or connect(2). */ localaddr = disp->local; qid = DNS_QID(disp); for (i = 0; i < 64; i++) { port = ports[isc_rng_uniformrandom(DISP_RNGCTX(disp), nports)]; isc_sockaddr_setport(&localaddr, port); LOCK(&qid->lock); bucket = dns_hash(qid, dest, 0, port); if (socket_search(qid, dest, port, bucket) != NULL) { UNLOCK(&qid->lock); continue; } UNLOCK(&qid->lock); bindoptions = 0; portentry = port_search(disp, port); if (portentry != NULL) bindoptions |= ISC_SOCKET_REUSEADDRESS; result = open_socket(sockmgr, &localaddr, bindoptions, &sock, NULL); if (result == ISC_R_SUCCESS) { if (portentry == NULL) { portentry = new_portentry(disp, port); if (portentry == NULL) { result = ISC_R_NOMEMORY; break; } } else { LOCK(&qid->lock); portentry->refs++; UNLOCK(&qid->lock); } break; } else if (result == ISC_R_NOPERM) { char buf[ISC_SOCKADDR_FORMATSIZE]; isc_sockaddr_format(&localaddr, buf, sizeof(buf)); dispatch_log(disp, ISC_LOG_WARNING, "open_socket(%s) -> %s: continuing", buf, isc_result_totext(result)); } else if (result != ISC_R_ADDRINUSE) break; } if (result == ISC_R_SUCCESS) { dispsock->socket = sock; dispsock->host = *dest; dispsock->portentry = portentry; dispsock->bucket = bucket; LOCK(&qid->lock); ISC_LIST_APPEND(qid->sock_table[bucket], dispsock, blink); UNLOCK(&qid->lock); *dispsockp = dispsock; *portp = port; } else { /* * We could keep it in the inactive list, but since this should * be an exceptional case and might be resource shortage, we'd * rather destroy it. */ if (sock != NULL) isc_socket_detach(&sock); destroy_dispsocket(disp, &dispsock); } return (result); } /*% * Destroy a dedicated dispatch socket. */ static void destroy_dispsocket(dns_dispatch_t *disp, dispsocket_t **dispsockp) { dispsocket_t *dispsock; dns_qid_t *qid; /* * The dispatch must be locked. */ REQUIRE(dispsockp != NULL && *dispsockp != NULL); dispsock = *dispsockp; REQUIRE(!ISC_LINK_LINKED(dispsock, link)); disp->nsockets--; dispsock->magic = 0; if (dispsock->portentry != NULL) deref_portentry(disp, &dispsock->portentry); if (dispsock->socket != NULL) isc_socket_detach(&dispsock->socket); if (ISC_LINK_LINKED(dispsock, blink)) { qid = DNS_QID(disp); LOCK(&qid->lock); ISC_LIST_UNLINK(qid->sock_table[dispsock->bucket], dispsock, blink); UNLOCK(&qid->lock); } if (dispsock->task != NULL) isc_task_detach(&dispsock->task); isc_mempool_put(disp->mgr->spool, dispsock); *dispsockp = NULL; } /*% * Deactivate a dedicated dispatch socket. Move it to the inactive list for * future reuse unless the total number of sockets are exceeding the maximum. */ static void deactivate_dispsocket(dns_dispatch_t *disp, dispsocket_t *dispsock) { isc_result_t result; dns_qid_t *qid; /* * The dispatch must be locked. */ ISC_LIST_UNLINK(disp->activesockets, dispsock, link); if (dispsock->resp != NULL) { INSIST(dispsock->resp->dispsocket == dispsock); dispsock->resp->dispsocket = NULL; } INSIST(dispsock->portentry != NULL); deref_portentry(disp, &dispsock->portentry); if (disp->nsockets > DNS_DISPATCH_POOLSOCKS) destroy_dispsocket(disp, &dispsock); else { result = isc_socket_close(dispsock->socket); qid = DNS_QID(disp); LOCK(&qid->lock); ISC_LIST_UNLINK(qid->sock_table[dispsock->bucket], dispsock, blink); UNLOCK(&qid->lock); if (result == ISC_R_SUCCESS) ISC_LIST_APPEND(disp->inactivesockets, dispsock, link); else { /* * If the underlying system does not allow this * optimization, destroy this temporary structure (and * create a new one for a new transaction). */ INSIST(result == ISC_R_NOTIMPLEMENTED); destroy_dispsocket(disp, &dispsock); } } } /* * Find an entry for query ID 'id', socket address 'dest', and port number * 'port'. * Return NULL if no such entry exists. */ static dns_dispentry_t * entry_search(dns_qid_t *qid, isc_sockaddr_t *dest, dns_messageid_t id, in_port_t port, unsigned int bucket) { dns_dispentry_t *res; REQUIRE(VALID_QID(qid)); REQUIRE(bucket < qid->qid_nbuckets); res = ISC_LIST_HEAD(qid->qid_table[bucket]); while (res != NULL) { if (res->id == id && isc_sockaddr_equal(dest, &res->host) && res->port == port) { return (res); } res = ISC_LIST_NEXT(res, link); } return (NULL); } static void free_buffer(dns_dispatch_t *disp, void *buf, unsigned int len) { isc_mempool_t *bpool; INSIST(buf != NULL && len != 0); switch (disp->socktype) { case isc_sockettype_tcp: INSIST(disp->tcpbuffers > 0); disp->tcpbuffers--; isc_mem_put(disp->mgr->mctx, buf, len); break; case isc_sockettype_udp: LOCK(&disp->mgr->buffer_lock); INSIST(disp->mgr->buffers > 0); INSIST(len == disp->mgr->buffersize); disp->mgr->buffers--; bpool = disp->mgr->bpool; UNLOCK(&disp->mgr->buffer_lock); isc_mempool_put(bpool, buf); break; default: INSIST(0); break; } } static void * allocate_udp_buffer(dns_dispatch_t *disp) { isc_mempool_t *bpool; void *temp; LOCK(&disp->mgr->buffer_lock); bpool = disp->mgr->bpool; disp->mgr->buffers++; UNLOCK(&disp->mgr->buffer_lock); temp = isc_mempool_get(bpool); if (temp == NULL) { LOCK(&disp->mgr->buffer_lock); disp->mgr->buffers--; UNLOCK(&disp->mgr->buffer_lock); } return (temp); } static inline void free_sevent(isc_event_t *ev) { isc_mempool_t *pool = ev->ev_destroy_arg; isc_socketevent_t *sev = (isc_socketevent_t *) ev; isc_mempool_put(pool, sev); } static inline isc_socketevent_t * allocate_sevent(dns_dispatch_t *disp, isc_socket_t *sock, isc_eventtype_t type, isc_taskaction_t action, const void *arg) { isc_socketevent_t *ev; void *deconst_arg; ev = isc_mempool_get(disp->sepool); if (ev == NULL) return (NULL); DE_CONST(arg, deconst_arg); ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, type, action, deconst_arg, sock, free_sevent, disp->sepool); ev->result = ISC_R_UNSET; ISC_LINK_INIT(ev, ev_link); ISC_LIST_INIT(ev->bufferlist); ev->region.base = NULL; ev->n = 0; ev->offset = 0; ev->attributes = 0; return (ev); } static inline void free_devent(dns_dispatch_t *disp, dns_dispatchevent_t *ev) { if (disp->failsafe_ev == ev) { INSIST(disp->shutdown_out == 1); disp->shutdown_out = 0; return; } isc_mempool_put(disp->mgr->depool, ev); } static inline dns_dispatchevent_t * allocate_devent(dns_dispatch_t *disp) { dns_dispatchevent_t *ev; ev = isc_mempool_get(disp->mgr->depool); if (ev == NULL) return (NULL); ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, 0, NULL, NULL, NULL, NULL, NULL); return (ev); } static void udp_exrecv(isc_task_t *task, isc_event_t *ev) { dispsocket_t *dispsock = ev->ev_arg; UNUSED(task); REQUIRE(VALID_DISPSOCK(dispsock)); udp_recv(ev, dispsock->disp, dispsock); } static void udp_shrecv(isc_task_t *task, isc_event_t *ev) { dns_dispatch_t *disp = ev->ev_arg; UNUSED(task); REQUIRE(VALID_DISPATCH(disp)); udp_recv(ev, disp, NULL); } /* * General flow: * * If I/O result == CANCELED or error, free the buffer. * * If query, free the buffer, restart. * * If response: * Allocate event, fill in details. * If cannot allocate, free buffer, restart. * find target. If not found, free buffer, restart. * if event queue is not empty, queue. else, send. * restart. */ static void udp_recv(isc_event_t *ev_in, dns_dispatch_t *disp, dispsocket_t *dispsock) { isc_socketevent_t *ev = (isc_socketevent_t *)ev_in; dns_messageid_t id; isc_result_t dres; isc_buffer_t source; unsigned int flags; dns_dispentry_t *resp = NULL; dns_dispatchevent_t *rev; unsigned int bucket; bool killit; bool queue_response; dns_dispatchmgr_t *mgr; dns_qid_t *qid; isc_netaddr_t netaddr; int match; int result; bool qidlocked = false; LOCK(&disp->lock); mgr = disp->mgr; qid = mgr->qid; dispatch_log(disp, LVL(90), "got packet: requests %d, buffers %d, recvs %d", disp->requests, disp->mgr->buffers, disp->recv_pending); if (dispsock == NULL && ev->ev_type == ISC_SOCKEVENT_RECVDONE) { /* * Unless the receive event was imported from a listening * interface, in which case the event type is * DNS_EVENT_IMPORTRECVDONE, receive operation must be pending. */ INSIST(disp->recv_pending != 0); disp->recv_pending = 0; } if (dispsock != NULL && (ev->result == ISC_R_CANCELED || dispsock->resp == NULL)) { /* * dispsock->resp can be NULL if this transaction was canceled * just after receiving a response. Since this socket is * exclusively used and there should be at most one receive * event the canceled event should have been no effect. So * we can (and should) deactivate the socket right now. */ deactivate_dispsocket(disp, dispsock); dispsock = NULL; } if (disp->shutting_down) { /* * This dispatcher is shutting down. */ free_buffer(disp, ev->region.base, ev->region.length); isc_event_free(&ev_in); ev = NULL; killit = destroy_disp_ok(disp); UNLOCK(&disp->lock); if (killit) isc_task_send(disp->task[0], &disp->ctlevent); return; } if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) { if (dispsock != NULL) { resp = dispsock->resp; id = resp->id; if (ev->result != ISC_R_SUCCESS) { /* * This is most likely a network error on a * connected socket. It makes no sense to * check the address or parse the packet, but it * will help to return the error to the caller. */ goto sendresponse; } } else { free_buffer(disp, ev->region.base, ev->region.length); isc_event_free(&ev_in); UNLOCK(&disp->lock); return; } } else if (ev->result != ISC_R_SUCCESS) { free_buffer(disp, ev->region.base, ev->region.length); if (ev->result != ISC_R_CANCELED) dispatch_log(disp, ISC_LOG_ERROR, "odd socket result in udp_recv(): %s", isc_result_totext(ev->result)); isc_event_free(&ev_in); UNLOCK(&disp->lock); return; } /* * If this is from a blackholed address, drop it. */ isc_netaddr_fromsockaddr(&netaddr, &ev->address); if (disp->mgr->blackhole != NULL && dns_acl_match(&netaddr, NULL, disp->mgr->blackhole, NULL, &match, NULL) == ISC_R_SUCCESS && match > 0) { if (isc_log_wouldlog(dns_lctx, LVL(10))) { char netaddrstr[ISC_NETADDR_FORMATSIZE]; isc_netaddr_format(&netaddr, netaddrstr, sizeof(netaddrstr)); dispatch_log(disp, LVL(10), "blackholed packet from %s", netaddrstr); } free_buffer(disp, ev->region.base, ev->region.length); goto restart; } /* * Peek into the buffer to see what we can see. */ isc_buffer_init(&source, ev->region.base, ev->region.length); isc_buffer_add(&source, ev->n); dres = dns_message_peekheader(&source, &id, &flags); if (dres != ISC_R_SUCCESS) { free_buffer(disp, ev->region.base, ev->region.length); dispatch_log(disp, LVL(10), "got garbage packet"); goto restart; } dispatch_log(disp, LVL(92), "got valid DNS message header, /QR %c, id %u", ((flags & DNS_MESSAGEFLAG_QR) ? '1' : '0'), id); /* * Look at flags. If query, drop it. If response, * look to see where it goes. */ if ((flags & DNS_MESSAGEFLAG_QR) == 0) { /* query */ free_buffer(disp, ev->region.base, ev->region.length); goto restart; } /* * Search for the corresponding response. If we are using an exclusive * socket, we've already identified it and we can skip the search; but * the ID and the address must match the expected ones. */ if (resp == NULL) { bucket = dns_hash(qid, &ev->address, id, disp->localport); LOCK(&qid->lock); qidlocked = true; resp = entry_search(qid, &ev->address, id, disp->localport, bucket); dispatch_log(disp, LVL(90), "search for response in bucket %d: %s", bucket, (resp == NULL ? "not found" : "found")); if (resp == NULL) { inc_stats(mgr, dns_resstatscounter_mismatch); free_buffer(disp, ev->region.base, ev->region.length); goto unlock; } } else if (resp->id != id || !isc_sockaddr_equal(&ev->address, &resp->host)) { dispatch_log(disp, LVL(90), "response to an exclusive socket doesn't match"); inc_stats(mgr, dns_resstatscounter_mismatch); free_buffer(disp, ev->region.base, ev->region.length); goto unlock; } /* * Now that we have the original dispatch the query was sent * from check that the address and port the response was * sent to make sense. */ if (disp != resp->disp) { isc_sockaddr_t a1; isc_sockaddr_t a2; /* * Check that the socket types and ports match. */ if (disp->socktype != resp->disp->socktype || isc_sockaddr_getport(&disp->local) != isc_sockaddr_getport(&resp->disp->local)) { free_buffer(disp, ev->region.base, ev->region.length); goto unlock; } /* * If each dispatch is bound to a different address * then fail. * * Note under Linux a packet can be sent out via IPv4 socket * and the response be received via a IPv6 socket. * * Requests sent out via IPv6 should always come back in * via IPv6. */ if (isc_sockaddr_pf(&resp->disp->local) == PF_INET6 && isc_sockaddr_pf(&disp->local) != PF_INET6) { free_buffer(disp, ev->region.base, ev->region.length); goto unlock; } isc_sockaddr_anyofpf(&a1, isc_sockaddr_pf(&resp->disp->local)); isc_sockaddr_anyofpf(&a2, isc_sockaddr_pf(&disp->local)); if (!isc_sockaddr_eqaddr(&disp->local, &resp->disp->local) && !isc_sockaddr_eqaddr(&a1, &resp->disp->local) && !isc_sockaddr_eqaddr(&a2, &disp->local)) { free_buffer(disp, ev->region.base, ev->region.length); goto unlock; } } sendresponse: queue_response = resp->item_out; rev = allocate_devent(resp->disp); if (rev == NULL) { free_buffer(disp, ev->region.base, ev->region.length); goto unlock; } /* * At this point, rev contains the event we want to fill in, and * resp contains the information on the place to send it to. * Send the event off. */ isc_buffer_init(&rev->buffer, ev->region.base, ev->region.length); isc_buffer_add(&rev->buffer, ev->n); rev->result = ev->result; rev->id = id; rev->addr = ev->address; rev->pktinfo = ev->pktinfo; rev->attributes = ev->attributes; if (queue_response) { ISC_LIST_APPEND(resp->items, rev, ev_link); } else { ISC_EVENT_INIT(rev, sizeof(*rev), 0, NULL, DNS_EVENT_DISPATCH, resp->action, resp->arg, resp, NULL, NULL); request_log(disp, resp, LVL(90), "[a] Sent event %p buffer %p len %d to task %p", rev, rev->buffer.base, rev->buffer.length, resp->task); resp->item_out = true; isc_task_send(resp->task, ISC_EVENT_PTR(&rev)); } unlock: if (qidlocked) UNLOCK(&qid->lock); /* * Restart recv() to get the next packet. */ restart: result = startrecv(disp, dispsock); if (result != ISC_R_SUCCESS && dispsock != NULL) { /* * XXX: wired. There seems to be no recovery process other than * deactivate this socket anyway (since we cannot start * receiving, we won't be able to receive a cancel event * from the user). */ deactivate_dispsocket(disp, dispsock); } isc_event_free(&ev_in); UNLOCK(&disp->lock); } /* * General flow: * * If I/O result == CANCELED, EOF, or error, notify everyone as the * various queues drain. * * If query, restart. * * If response: * Allocate event, fill in details. * If cannot allocate, restart. * find target. If not found, restart. * if event queue is not empty, queue. else, send. * restart. */ static void tcp_recv(isc_task_t *task, isc_event_t *ev_in) { dns_dispatch_t *disp = ev_in->ev_arg; dns_tcpmsg_t *tcpmsg = &disp->tcpmsg; dns_messageid_t id; isc_result_t dres; unsigned int flags; dns_dispentry_t *resp; dns_dispatchevent_t *rev; unsigned int bucket; bool killit; bool queue_response; dns_qid_t *qid; int level; char buf[ISC_SOCKADDR_FORMATSIZE]; UNUSED(task); REQUIRE(VALID_DISPATCH(disp)); qid = disp->qid; dispatch_log(disp, LVL(90), "got TCP packet: requests %d, buffers %d, recvs %d", disp->requests, disp->tcpbuffers, disp->recv_pending); LOCK(&disp->lock); INSIST(disp->recv_pending != 0); disp->recv_pending = 0; if (disp->refcount == 0) { /* * This dispatcher is shutting down. Force cancelation. */ tcpmsg->result = ISC_R_CANCELED; } if (tcpmsg->result != ISC_R_SUCCESS) { switch (tcpmsg->result) { case ISC_R_CANCELED: break; case ISC_R_EOF: dispatch_log(disp, LVL(90), "shutting down on EOF"); do_cancel(disp); break; case ISC_R_CONNECTIONRESET: level = ISC_LOG_INFO; goto logit; default: level = ISC_LOG_ERROR; logit: isc_sockaddr_format(&tcpmsg->address, buf, sizeof(buf)); dispatch_log(disp, level, "shutting down due to TCP " "receive error: %s: %s", buf, isc_result_totext(tcpmsg->result)); do_cancel(disp); break; } /* * The event is statically allocated in the tcpmsg * structure, and destroy_disp() frees the tcpmsg, so we must * free the event *before* calling destroy_disp(). */ isc_event_free(&ev_in); disp->shutting_down = 1; disp->shutdown_why = tcpmsg->result; /* * If the recv() was canceled pass the word on. */ killit = destroy_disp_ok(disp); UNLOCK(&disp->lock); if (killit) isc_task_send(disp->task[0], &disp->ctlevent); return; } dispatch_log(disp, LVL(90), "result %d, length == %d, addr = %p", tcpmsg->result, tcpmsg->buffer.length, tcpmsg->buffer.base); /* * Peek into the buffer to see what we can see. */ dres = dns_message_peekheader(&tcpmsg->buffer, &id, &flags); if (dres != ISC_R_SUCCESS) { dispatch_log(disp, LVL(10), "got garbage packet"); goto restart; } dispatch_log(disp, LVL(92), "got valid DNS message header, /QR %c, id %u", ((flags & DNS_MESSAGEFLAG_QR) ? '1' : '0'), id); /* * Allocate an event to send to the query or response client, and * allocate a new buffer for our use. */ /* * Look at flags. If query, drop it. If response, * look to see where it goes. */ if ((flags & DNS_MESSAGEFLAG_QR) == 0) { /* * Query. */ goto restart; } /* * Response. */ bucket = dns_hash(qid, &tcpmsg->address, id, disp->localport); LOCK(&qid->lock); resp = entry_search(qid, &tcpmsg->address, id, disp->localport, bucket); dispatch_log(disp, LVL(90), "search for response in bucket %d: %s", bucket, (resp == NULL ? "not found" : "found")); if (resp == NULL) goto unlock; queue_response = resp->item_out; rev = allocate_devent(disp); if (rev == NULL) goto unlock; /* * At this point, rev contains the event we want to fill in, and * resp contains the information on the place to send it to. * Send the event off. */ dns_tcpmsg_keepbuffer(tcpmsg, &rev->buffer); disp->tcpbuffers++; rev->result = ISC_R_SUCCESS; rev->id = id; rev->addr = tcpmsg->address; if (queue_response) { ISC_LIST_APPEND(resp->items, rev, ev_link); } else { ISC_EVENT_INIT(rev, sizeof(*rev), 0, NULL, DNS_EVENT_DISPATCH, resp->action, resp->arg, resp, NULL, NULL); request_log(disp, resp, LVL(90), "[b] Sent event %p buffer %p len %d to task %p", rev, rev->buffer.base, rev->buffer.length, resp->task); resp->item_out = true; isc_task_send(resp->task, ISC_EVENT_PTR(&rev)); } unlock: UNLOCK(&qid->lock); /* * Restart recv() to get the next packet. */ restart: (void)startrecv(disp, NULL); isc_event_free(&ev_in); UNLOCK(&disp->lock); } /* * disp must be locked. */ static isc_result_t startrecv(dns_dispatch_t *disp, dispsocket_t *dispsock) { isc_result_t res; isc_region_t region; isc_socket_t *sock; if (disp->shutting_down == 1) return (ISC_R_SUCCESS); if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0) return (ISC_R_SUCCESS); if (disp->recv_pending != 0 && dispsock == NULL) return (ISC_R_SUCCESS); if (disp->mgr->buffers >= disp->mgr->maxbuffers) return (ISC_R_NOMEMORY); if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0 && dispsock == NULL) return (ISC_R_SUCCESS); if (dispsock != NULL) sock = dispsock->socket; else sock = disp->socket; INSIST(sock != NULL); switch (disp->socktype) { /* * UDP reads are always maximal. */ case isc_sockettype_udp: region.length = disp->mgr->buffersize; region.base = allocate_udp_buffer(disp); if (region.base == NULL) return (ISC_R_NOMEMORY); if (dispsock != NULL) { isc_task_t *dt = dispsock->task; isc_socketevent_t *sev = allocate_sevent(disp, sock, ISC_SOCKEVENT_RECVDONE, udp_exrecv, dispsock); if (sev == NULL) { free_buffer(disp, region.base, region.length); return (ISC_R_NOMEMORY); } res = isc_socket_recv2(sock, ®ion, 1, dt, sev, 0); if (res != ISC_R_SUCCESS) { free_buffer(disp, region.base, region.length); return (res); } } else { isc_task_t *dt = disp->task[0]; isc_socketevent_t *sev = allocate_sevent(disp, sock, ISC_SOCKEVENT_RECVDONE, udp_shrecv, disp); if (sev == NULL) { free_buffer(disp, region.base, region.length); return (ISC_R_NOMEMORY); } res = isc_socket_recv2(sock, ®ion, 1, dt, sev, 0); if (res != ISC_R_SUCCESS) { free_buffer(disp, region.base, region.length); disp->shutdown_why = res; disp->shutting_down = 1; do_cancel(disp); return (ISC_R_SUCCESS); /* recover by cancel */ } INSIST(disp->recv_pending == 0); disp->recv_pending = 1; } break; case isc_sockettype_tcp: res = dns_tcpmsg_readmessage(&disp->tcpmsg, disp->task[0], tcp_recv, disp); if (res != ISC_R_SUCCESS) { disp->shutdown_why = res; disp->shutting_down = 1; do_cancel(disp); return (ISC_R_SUCCESS); /* recover by cancel */ } INSIST(disp->recv_pending == 0); disp->recv_pending = 1; break; default: INSIST(0); break; } return (ISC_R_SUCCESS); } /* * Mgr must be locked when calling this function. */ static bool destroy_mgr_ok(dns_dispatchmgr_t *mgr) { mgr_log(mgr, LVL(90), "destroy_mgr_ok: shuttingdown=%d, listnonempty=%d, " "depool=%d, rpool=%d, dpool=%d", MGR_IS_SHUTTINGDOWN(mgr), !ISC_LIST_EMPTY(mgr->list), isc_mempool_getallocated(mgr->depool), isc_mempool_getallocated(mgr->rpool), isc_mempool_getallocated(mgr->dpool)); if (!MGR_IS_SHUTTINGDOWN(mgr)) return (false); if (!ISC_LIST_EMPTY(mgr->list)) return (false); if (isc_mempool_getallocated(mgr->depool) != 0) return (false); if (isc_mempool_getallocated(mgr->rpool) != 0) return (false); if (isc_mempool_getallocated(mgr->dpool) != 0) return (false); return (true); } /* * Mgr must be unlocked when calling this function. */ static void destroy_mgr(dns_dispatchmgr_t **mgrp) { isc_mem_t *mctx; dns_dispatchmgr_t *mgr; mgr = *mgrp; *mgrp = NULL; mctx = mgr->mctx; mgr->magic = 0; mgr->mctx = NULL; DESTROYLOCK(&mgr->lock); mgr->state = 0; if (mgr->rngctx != NULL) isc_rng_detach(&mgr->rngctx); DESTROYLOCK(&mgr->rng_lock); isc_mempool_destroy(&mgr->depool); isc_mempool_destroy(&mgr->rpool); isc_mempool_destroy(&mgr->dpool); if (mgr->bpool != NULL) isc_mempool_destroy(&mgr->bpool); if (mgr->spool != NULL) isc_mempool_destroy(&mgr->spool); DESTROYLOCK(&mgr->spool_lock); DESTROYLOCK(&mgr->bpool_lock); DESTROYLOCK(&mgr->dpool_lock); DESTROYLOCK(&mgr->rpool_lock); DESTROYLOCK(&mgr->depool_lock); if (mgr->entropy != NULL) isc_entropy_detach(&mgr->entropy); if (mgr->qid != NULL) qid_destroy(mctx, &mgr->qid); DESTROYLOCK(&mgr->buffer_lock); if (mgr->blackhole != NULL) dns_acl_detach(&mgr->blackhole); if (mgr->stats != NULL) isc_stats_detach(&mgr->stats); if (mgr->v4ports != NULL) { isc_mem_put(mctx, mgr->v4ports, mgr->nv4ports * sizeof(in_port_t)); } if (mgr->v6ports != NULL) { isc_mem_put(mctx, mgr->v6ports, mgr->nv6ports * sizeof(in_port_t)); } isc_mem_put(mctx, mgr, sizeof(dns_dispatchmgr_t)); isc_mem_detach(&mctx); } static isc_result_t open_socket(isc_socketmgr_t *mgr, isc_sockaddr_t *local, unsigned int options, isc_socket_t **sockp, isc_socket_t *dup_socket) { isc_socket_t *sock; isc_result_t result; sock = *sockp; if (sock != NULL) { result = isc_socket_open(sock); if (result != ISC_R_SUCCESS) return (result); } else if (dup_socket != NULL) { result = isc_socket_dup(dup_socket, &sock); if (result != ISC_R_SUCCESS) return (result); isc_socket_setname(sock, "dispatcher", NULL); *sockp = sock; return (ISC_R_SUCCESS); } else { result = isc_socket_create(mgr, isc_sockaddr_pf(local), isc_sockettype_udp, &sock); if (result != ISC_R_SUCCESS) return (result); } isc_socket_setname(sock, "dispatcher", NULL); #ifndef ISC_ALLOW_MAPPED isc_socket_ipv6only(sock, true); #endif result = isc_socket_bind(sock, local, options); if (result != ISC_R_SUCCESS) { if (*sockp == NULL) isc_socket_detach(&sock); else { isc_socket_close(sock); } return (result); } *sockp = sock; return (ISC_R_SUCCESS); } /*% * Create a temporary port list to set the initial default set of dispatch * ports: [1024, 65535]. This is almost meaningless as the application will * normally set the ports explicitly, but is provided to fill some minor corner * cases. */ static isc_result_t create_default_portset(isc_mem_t *mctx, isc_portset_t **portsetp) { isc_result_t result; result = isc_portset_create(mctx, portsetp); if (result != ISC_R_SUCCESS) return (result); isc_portset_addrange(*portsetp, 1024, 65535); return (ISC_R_SUCCESS); } /* * Publics. */ isc_result_t dns_dispatchmgr_create(isc_mem_t *mctx, isc_entropy_t *entropy, dns_dispatchmgr_t **mgrp) { dns_dispatchmgr_t *mgr; isc_result_t result; isc_portset_t *v4portset = NULL; isc_portset_t *v6portset = NULL; REQUIRE(mctx != NULL); REQUIRE(mgrp != NULL && *mgrp == NULL); mgr = isc_mem_get(mctx, sizeof(dns_dispatchmgr_t)); if (mgr == NULL) return (ISC_R_NOMEMORY); mgr->mctx = NULL; isc_mem_attach(mctx, &mgr->mctx); mgr->blackhole = NULL; mgr->stats = NULL; mgr->rngctx = NULL; result = isc_mutex_init(&mgr->lock); if (result != ISC_R_SUCCESS) goto deallocate; result = isc_mutex_init(&mgr->rng_lock); if (result != ISC_R_SUCCESS) goto kill_lock; result = isc_mutex_init(&mgr->buffer_lock); if (result != ISC_R_SUCCESS) goto kill_rng_lock; result = isc_mutex_init(&mgr->depool_lock); if (result != ISC_R_SUCCESS) goto kill_buffer_lock; result = isc_mutex_init(&mgr->rpool_lock); if (result != ISC_R_SUCCESS) goto kill_depool_lock; result = isc_mutex_init(&mgr->dpool_lock); if (result != ISC_R_SUCCESS) goto kill_rpool_lock; result = isc_mutex_init(&mgr->bpool_lock); if (result != ISC_R_SUCCESS) goto kill_dpool_lock; result = isc_mutex_init(&mgr->spool_lock); if (result != ISC_R_SUCCESS) goto kill_bpool_lock; mgr->depool = NULL; if (isc_mempool_create(mgr->mctx, sizeof(dns_dispatchevent_t), &mgr->depool) != ISC_R_SUCCESS) { result = ISC_R_NOMEMORY; goto kill_spool_lock; } mgr->rpool = NULL; if (isc_mempool_create(mgr->mctx, sizeof(dns_dispentry_t), &mgr->rpool) != ISC_R_SUCCESS) { result = ISC_R_NOMEMORY; goto kill_depool; } mgr->dpool = NULL; if (isc_mempool_create(mgr->mctx, sizeof(dns_dispatch_t), &mgr->dpool) != ISC_R_SUCCESS) { result = ISC_R_NOMEMORY; goto kill_rpool; } isc_mempool_setname(mgr->depool, "dispmgr_depool"); isc_mempool_setmaxalloc(mgr->depool, 32768); isc_mempool_setfreemax(mgr->depool, 32768); isc_mempool_associatelock(mgr->depool, &mgr->depool_lock); isc_mempool_setfillcount(mgr->depool, 32); isc_mempool_setname(mgr->rpool, "dispmgr_rpool"); isc_mempool_setmaxalloc(mgr->rpool, 32768); isc_mempool_setfreemax(mgr->rpool, 32768); isc_mempool_associatelock(mgr->rpool, &mgr->rpool_lock); isc_mempool_setfillcount(mgr->rpool, 32); isc_mempool_setname(mgr->dpool, "dispmgr_dpool"); isc_mempool_setmaxalloc(mgr->dpool, 32768); isc_mempool_setfreemax(mgr->dpool, 32768); isc_mempool_associatelock(mgr->dpool, &mgr->dpool_lock); isc_mempool_setfillcount(mgr->dpool, 32); mgr->buffers = 0; mgr->buffersize = 0; mgr->maxbuffers = 0; mgr->bpool = NULL; mgr->spool = NULL; mgr->entropy = NULL; mgr->qid = NULL; mgr->state = 0; ISC_LIST_INIT(mgr->list); mgr->v4ports = NULL; mgr->v6ports = NULL; mgr->nv4ports = 0; mgr->nv6ports = 0; mgr->magic = DNS_DISPATCHMGR_MAGIC; result = create_default_portset(mctx, &v4portset); if (result == ISC_R_SUCCESS) { result = create_default_portset(mctx, &v6portset); if (result == ISC_R_SUCCESS) { result = dns_dispatchmgr_setavailports(mgr, v4portset, v6portset); } } if (v4portset != NULL) isc_portset_destroy(mctx, &v4portset); if (v6portset != NULL) isc_portset_destroy(mctx, &v6portset); if (result != ISC_R_SUCCESS) goto kill_dpool; if (entropy != NULL) isc_entropy_attach(entropy, &mgr->entropy); result = isc_rng_create(mctx, mgr->entropy, &mgr->rngctx); if (result != ISC_R_SUCCESS) goto kill_dpool; *mgrp = mgr; return (ISC_R_SUCCESS); kill_dpool: isc_mempool_destroy(&mgr->dpool); kill_rpool: isc_mempool_destroy(&mgr->rpool); kill_depool: isc_mempool_destroy(&mgr->depool); kill_spool_lock: DESTROYLOCK(&mgr->spool_lock); kill_bpool_lock: DESTROYLOCK(&mgr->bpool_lock); kill_dpool_lock: DESTROYLOCK(&mgr->dpool_lock); kill_rpool_lock: DESTROYLOCK(&mgr->rpool_lock); kill_depool_lock: DESTROYLOCK(&mgr->depool_lock); kill_buffer_lock: DESTROYLOCK(&mgr->buffer_lock); kill_rng_lock: DESTROYLOCK(&mgr->rng_lock); kill_lock: DESTROYLOCK(&mgr->lock); deallocate: isc_mem_put(mctx, mgr, sizeof(dns_dispatchmgr_t)); isc_mem_detach(&mctx); return (result); } void dns_dispatchmgr_setblackhole(dns_dispatchmgr_t *mgr, dns_acl_t *blackhole) { REQUIRE(VALID_DISPATCHMGR(mgr)); if (mgr->blackhole != NULL) dns_acl_detach(&mgr->blackhole); dns_acl_attach(blackhole, &mgr->blackhole); } dns_acl_t * dns_dispatchmgr_getblackhole(dns_dispatchmgr_t *mgr) { REQUIRE(VALID_DISPATCHMGR(mgr)); return (mgr->blackhole); } void dns_dispatchmgr_setblackportlist(dns_dispatchmgr_t *mgr, dns_portlist_t *portlist) { REQUIRE(VALID_DISPATCHMGR(mgr)); UNUSED(portlist); /* This function is deprecated: use dns_dispatchmgr_setavailports(). */ return; } dns_portlist_t * dns_dispatchmgr_getblackportlist(dns_dispatchmgr_t *mgr) { REQUIRE(VALID_DISPATCHMGR(mgr)); return (NULL); /* this function is deprecated */ } isc_result_t dns_dispatchmgr_setavailports(dns_dispatchmgr_t *mgr, isc_portset_t *v4portset, isc_portset_t *v6portset) { in_port_t *v4ports, *v6ports, p; unsigned int nv4ports, nv6ports, i4, i6; REQUIRE(VALID_DISPATCHMGR(mgr)); nv4ports = isc_portset_nports(v4portset); nv6ports = isc_portset_nports(v6portset); v4ports = NULL; if (nv4ports != 0) { v4ports = isc_mem_get(mgr->mctx, sizeof(in_port_t) * nv4ports); if (v4ports == NULL) return (ISC_R_NOMEMORY); } v6ports = NULL; if (nv6ports != 0) { v6ports = isc_mem_get(mgr->mctx, sizeof(in_port_t) * nv6ports); if (v6ports == NULL) { if (v4ports != NULL) { isc_mem_put(mgr->mctx, v4ports, sizeof(in_port_t) * isc_portset_nports(v4portset)); } return (ISC_R_NOMEMORY); } } p = 0; i4 = 0; i6 = 0; do { if (isc_portset_isset(v4portset, p)) { INSIST(i4 < nv4ports); v4ports[i4++] = p; } if (isc_portset_isset(v6portset, p)) { INSIST(i6 < nv6ports); v6ports[i6++] = p; } } while (p++ < 65535); INSIST(i4 == nv4ports && i6 == nv6ports); PORTBUFLOCK(mgr); if (mgr->v4ports != NULL) { isc_mem_put(mgr->mctx, mgr->v4ports, mgr->nv4ports * sizeof(in_port_t)); } mgr->v4ports = v4ports; mgr->nv4ports = nv4ports; if (mgr->v6ports != NULL) { isc_mem_put(mgr->mctx, mgr->v6ports, mgr->nv6ports * sizeof(in_port_t)); } mgr->v6ports = v6ports; mgr->nv6ports = nv6ports; PORTBUFUNLOCK(mgr); return (ISC_R_SUCCESS); } static isc_result_t dns_dispatchmgr_setudp(dns_dispatchmgr_t *mgr, unsigned int buffersize, unsigned int maxbuffers, unsigned int maxrequests, unsigned int buckets, unsigned int increment) { isc_result_t result; REQUIRE(VALID_DISPATCHMGR(mgr)); REQUIRE(buffersize >= 512 && buffersize < (64 * 1024)); REQUIRE(maxbuffers > 0); REQUIRE(buckets < 2097169); /* next prime > 65536 * 32 */ REQUIRE(increment > buckets); /* * Keep some number of items around. This should be a config * option. For now, keep 8, but later keep at least two even * if the caller wants less. This allows us to ensure certain * things, like an event can be "freed" and the next allocation * will always succeed. * * Note that if limits are placed on anything here, we use one * event internally, so the actual limit should be "wanted + 1." * * XXXMLG */ if (maxbuffers < 8) maxbuffers = 8; LOCK(&mgr->buffer_lock); /* Create or adjust buffer pool */ if (mgr->bpool != NULL) { /* * We only increase the maxbuffers to avoid accidental buffer * shortage. Ideally we'd separate the manager-wide maximum * from per-dispatch limits and respect the latter within the * global limit. But at this moment that's deemed to be * overkilling and isn't worth additional implementation * complexity. */ if (maxbuffers > mgr->maxbuffers) { isc_mempool_setmaxalloc(mgr->bpool, maxbuffers); isc_mempool_setfreemax(mgr->bpool, maxbuffers); mgr->maxbuffers = maxbuffers; } } else { result = isc_mempool_create(mgr->mctx, buffersize, &mgr->bpool); if (result != ISC_R_SUCCESS) { UNLOCK(&mgr->buffer_lock); return (result); } isc_mempool_setname(mgr->bpool, "dispmgr_bpool"); isc_mempool_setmaxalloc(mgr->bpool, maxbuffers); isc_mempool_setfreemax(mgr->bpool, maxbuffers); isc_mempool_associatelock(mgr->bpool, &mgr->bpool_lock); isc_mempool_setfillcount(mgr->bpool, 32); } /* Create or adjust socket pool */ if (mgr->spool != NULL) { if (maxrequests < DNS_DISPATCH_POOLSOCKS * 2) { isc_mempool_setmaxalloc(mgr->spool, DNS_DISPATCH_POOLSOCKS * 2); isc_mempool_setfreemax(mgr->spool, DNS_DISPATCH_POOLSOCKS * 2); } UNLOCK(&mgr->buffer_lock); return (ISC_R_SUCCESS); } result = isc_mempool_create(mgr->mctx, sizeof(dispsocket_t), &mgr->spool); if (result != ISC_R_SUCCESS) goto cleanup; isc_mempool_setname(mgr->spool, "dispmgr_spool"); isc_mempool_setmaxalloc(mgr->spool, maxrequests); isc_mempool_setfreemax(mgr->spool, maxrequests); isc_mempool_associatelock(mgr->spool, &mgr->spool_lock); isc_mempool_setfillcount(mgr->spool, 32); result = qid_allocate(mgr, buckets, increment, &mgr->qid, true); if (result != ISC_R_SUCCESS) goto cleanup; mgr->buffersize = buffersize; mgr->maxbuffers = maxbuffers; UNLOCK(&mgr->buffer_lock); return (ISC_R_SUCCESS); cleanup: isc_mempool_destroy(&mgr->bpool); if (mgr->spool != NULL) isc_mempool_destroy(&mgr->spool); UNLOCK(&mgr->buffer_lock); return (result); } void dns_dispatchmgr_destroy(dns_dispatchmgr_t **mgrp) { dns_dispatchmgr_t *mgr; bool killit; REQUIRE(mgrp != NULL); REQUIRE(VALID_DISPATCHMGR(*mgrp)); mgr = *mgrp; *mgrp = NULL; LOCK(&mgr->lock); mgr->state |= MGR_SHUTTINGDOWN; killit = destroy_mgr_ok(mgr); UNLOCK(&mgr->lock); mgr_log(mgr, LVL(90), "destroy: killit=%d", killit); if (killit) destroy_mgr(&mgr); } void dns_dispatchmgr_setstats(dns_dispatchmgr_t *mgr, isc_stats_t *stats) { REQUIRE(VALID_DISPATCHMGR(mgr)); REQUIRE(ISC_LIST_EMPTY(mgr->list)); REQUIRE(mgr->stats == NULL); isc_stats_attach(stats, &mgr->stats); } static int port_cmp(const void *key, const void *ent) { in_port_t p1 = *(const in_port_t *)key; in_port_t p2 = *(const in_port_t *)ent; if (p1 < p2) return (-1); else if (p1 == p2) return (0); else return (1); } static bool portavailable(dns_dispatchmgr_t *mgr, isc_socket_t *sock, isc_sockaddr_t *sockaddrp) { isc_sockaddr_t sockaddr; isc_result_t result; in_port_t *ports, port; unsigned int nports; bool available = false; REQUIRE(sock != NULL || sockaddrp != NULL); PORTBUFLOCK(mgr); if (sock != NULL) { sockaddrp = &sockaddr; result = isc_socket_getsockname(sock, sockaddrp); if (result != ISC_R_SUCCESS) goto unlock; } if (isc_sockaddr_pf(sockaddrp) == AF_INET) { ports = mgr->v4ports; nports = mgr->nv4ports; } else { ports = mgr->v6ports; nports = mgr->nv6ports; } if (ports == NULL) goto unlock; port = isc_sockaddr_getport(sockaddrp); if (bsearch(&port, ports, nports, sizeof(in_port_t), port_cmp) != NULL) available = true; unlock: PORTBUFUNLOCK(mgr); return (available); } #define ATTRMATCH(_a1, _a2, _mask) (((_a1) & (_mask)) == ((_a2) & (_mask))) static bool local_addr_match(dns_dispatch_t *disp, isc_sockaddr_t *addr) { isc_sockaddr_t sockaddr; isc_result_t result; REQUIRE(disp->socket != NULL); if (addr == NULL) return (true); /* * Don't match wildcard ports unless the port is available in the * current configuration. */ if (isc_sockaddr_getport(addr) == 0 && isc_sockaddr_getport(&disp->local) == 0 && !portavailable(disp->mgr, disp->socket, NULL)) { return (false); } /* * Check if we match the binding . * Wildcard ports match/fail here. */ if (isc_sockaddr_equal(&disp->local, addr)) return (true); if (isc_sockaddr_getport(addr) == 0) return (false); /* * Check if we match a bound wildcard port . */ if (!isc_sockaddr_eqaddr(&disp->local, addr)) return (false); result = isc_socket_getsockname(disp->socket, &sockaddr); if (result != ISC_R_SUCCESS) return (false); return (isc_sockaddr_equal(&sockaddr, addr)); } /* * Requires mgr be locked. * * No dispatcher can be locked by this thread when calling this function. * * * NOTE: * If a matching dispatcher is found, it is locked after this function * returns, and must be unlocked by the caller. */ static isc_result_t dispatch_find(dns_dispatchmgr_t *mgr, isc_sockaddr_t *local, unsigned int attributes, unsigned int mask, dns_dispatch_t **dispp) { dns_dispatch_t *disp; isc_result_t result; /* * Make certain that we will not match a private or exclusive dispatch. */ attributes &= ~(DNS_DISPATCHATTR_PRIVATE|DNS_DISPATCHATTR_EXCLUSIVE); mask |= (DNS_DISPATCHATTR_PRIVATE|DNS_DISPATCHATTR_EXCLUSIVE); disp = ISC_LIST_HEAD(mgr->list); while (disp != NULL) { LOCK(&disp->lock); if ((disp->shutting_down == 0) && ATTRMATCH(disp->attributes, attributes, mask) && local_addr_match(disp, local)) break; UNLOCK(&disp->lock); disp = ISC_LIST_NEXT(disp, link); } if (disp == NULL) { result = ISC_R_NOTFOUND; goto out; } *dispp = disp; result = ISC_R_SUCCESS; out: return (result); } static isc_result_t qid_allocate(dns_dispatchmgr_t *mgr, unsigned int buckets, unsigned int increment, dns_qid_t **qidp, bool needsocktable) { dns_qid_t *qid; unsigned int i; isc_result_t result; REQUIRE(VALID_DISPATCHMGR(mgr)); REQUIRE(buckets < 2097169); /* next prime > 65536 * 32 */ REQUIRE(increment > buckets); REQUIRE(qidp != NULL && *qidp == NULL); qid = isc_mem_get(mgr->mctx, sizeof(*qid)); if (qid == NULL) return (ISC_R_NOMEMORY); qid->qid_table = isc_mem_get(mgr->mctx, buckets * sizeof(dns_displist_t)); if (qid->qid_table == NULL) { isc_mem_put(mgr->mctx, qid, sizeof(*qid)); return (ISC_R_NOMEMORY); } qid->sock_table = NULL; if (needsocktable) { qid->sock_table = isc_mem_get(mgr->mctx, buckets * sizeof(dispsocketlist_t)); if (qid->sock_table == NULL) { isc_mem_put(mgr->mctx, qid->qid_table, buckets * sizeof(dns_displist_t)); isc_mem_put(mgr->mctx, qid, sizeof(*qid)); return (ISC_R_NOMEMORY); } } result = isc_mutex_init(&qid->lock); if (result != ISC_R_SUCCESS) { if (qid->sock_table != NULL) { isc_mem_put(mgr->mctx, qid->sock_table, buckets * sizeof(dispsocketlist_t)); } isc_mem_put(mgr->mctx, qid->qid_table, buckets * sizeof(dns_displist_t)); isc_mem_put(mgr->mctx, qid, sizeof(*qid)); return (result); } for (i = 0; i < buckets; i++) { ISC_LIST_INIT(qid->qid_table[i]); if (qid->sock_table != NULL) ISC_LIST_INIT(qid->sock_table[i]); } qid->qid_nbuckets = buckets; qid->qid_increment = increment; qid->magic = QID_MAGIC; *qidp = qid; return (ISC_R_SUCCESS); } static void qid_destroy(isc_mem_t *mctx, dns_qid_t **qidp) { dns_qid_t *qid; REQUIRE(qidp != NULL); qid = *qidp; REQUIRE(VALID_QID(qid)); *qidp = NULL; qid->magic = 0; isc_mem_put(mctx, qid->qid_table, qid->qid_nbuckets * sizeof(dns_displist_t)); if (qid->sock_table != NULL) { isc_mem_put(mctx, qid->sock_table, qid->qid_nbuckets * sizeof(dispsocketlist_t)); } DESTROYLOCK(&qid->lock); isc_mem_put(mctx, qid, sizeof(*qid)); } /* * Allocate and set important limits. */ static isc_result_t dispatch_allocate(dns_dispatchmgr_t *mgr, unsigned int maxrequests, dns_dispatch_t **dispp) { dns_dispatch_t *disp; isc_result_t result; REQUIRE(VALID_DISPATCHMGR(mgr)); REQUIRE(dispp != NULL && *dispp == NULL); /* * Set up the dispatcher, mostly. Don't bother setting some of * the options that are controlled by tcp vs. udp, etc. */ disp = isc_mempool_get(mgr->dpool); if (disp == NULL) return (ISC_R_NOMEMORY); disp->magic = 0; disp->mgr = mgr; disp->maxrequests = maxrequests; disp->attributes = 0; ISC_LINK_INIT(disp, link); disp->refcount = 1; disp->recv_pending = 0; memset(&disp->local, 0, sizeof(disp->local)); memset(&disp->peer, 0, sizeof(disp->peer)); disp->localport = 0; disp->shutting_down = 0; disp->shutdown_out = 0; disp->connected = 0; disp->tcpmsg_valid = 0; disp->shutdown_why = ISC_R_UNEXPECTED; disp->requests = 0; disp->tcpbuffers = 0; disp->qid = NULL; ISC_LIST_INIT(disp->activesockets); ISC_LIST_INIT(disp->inactivesockets); disp->nsockets = 0; disp->rngctx = NULL; isc_rng_attach(mgr->rngctx, &disp->rngctx); disp->port_table = NULL; disp->portpool = NULL; disp->dscp = -1; result = isc_mutex_init(&disp->lock); if (result != ISC_R_SUCCESS) goto deallocate; disp->failsafe_ev = allocate_devent(disp); if (disp->failsafe_ev == NULL) { result = ISC_R_NOMEMORY; goto kill_lock; } disp->magic = DISPATCH_MAGIC; *dispp = disp; return (ISC_R_SUCCESS); /* * error returns */ kill_lock: DESTROYLOCK(&disp->lock); deallocate: if (disp->rngctx != NULL) isc_rng_detach(&disp->rngctx); isc_mempool_put(mgr->dpool, disp); return (result); } /* * MUST be unlocked, and not used by anything. */ static void dispatch_free(dns_dispatch_t **dispp) { dns_dispatch_t *disp; dns_dispatchmgr_t *mgr; int i; REQUIRE(VALID_DISPATCH(*dispp)); disp = *dispp; *dispp = NULL; mgr = disp->mgr; REQUIRE(VALID_DISPATCHMGR(mgr)); if (disp->tcpmsg_valid) { dns_tcpmsg_invalidate(&disp->tcpmsg); disp->tcpmsg_valid = 0; } INSIST(disp->tcpbuffers == 0); INSIST(disp->requests == 0); INSIST(disp->recv_pending == 0); INSIST(ISC_LIST_EMPTY(disp->activesockets)); INSIST(ISC_LIST_EMPTY(disp->inactivesockets)); isc_mempool_put(mgr->depool, disp->failsafe_ev); disp->failsafe_ev = NULL; if (disp->qid != NULL) qid_destroy(mgr->mctx, &disp->qid); if (disp->port_table != NULL) { for (i = 0; i < DNS_DISPATCH_PORTTABLESIZE; i++) INSIST(ISC_LIST_EMPTY(disp->port_table[i])); isc_mem_put(mgr->mctx, disp->port_table, sizeof(disp->port_table[0]) * DNS_DISPATCH_PORTTABLESIZE); } if (disp->portpool != NULL) isc_mempool_destroy(&disp->portpool); if (disp->rngctx != NULL) isc_rng_detach(&disp->rngctx); disp->mgr = NULL; DESTROYLOCK(&disp->lock); disp->magic = 0; isc_mempool_put(mgr->dpool, disp); } isc_result_t dns_dispatch_createtcp(dns_dispatchmgr_t *mgr, isc_socket_t *sock, isc_taskmgr_t *taskmgr, unsigned int buffersize, unsigned int maxbuffers, unsigned int maxrequests, unsigned int buckets, unsigned int increment, unsigned int attributes, dns_dispatch_t **dispp) { attributes |= DNS_DISPATCHATTR_PRIVATE; /* XXXMLG */ return (dns_dispatch_createtcp2(mgr, sock, taskmgr, NULL, NULL, buffersize, maxbuffers, maxrequests, buckets, increment, attributes, dispp)); } isc_result_t dns_dispatch_createtcp2(dns_dispatchmgr_t *mgr, isc_socket_t *sock, isc_taskmgr_t *taskmgr, isc_sockaddr_t *localaddr, isc_sockaddr_t *destaddr, unsigned int buffersize, unsigned int maxbuffers, unsigned int maxrequests, unsigned int buckets, unsigned int increment, unsigned int attributes, dns_dispatch_t **dispp) { isc_result_t result; dns_dispatch_t *disp; UNUSED(maxbuffers); UNUSED(buffersize); REQUIRE(VALID_DISPATCHMGR(mgr)); REQUIRE(isc_socket_gettype(sock) == isc_sockettype_tcp); REQUIRE((attributes & DNS_DISPATCHATTR_TCP) != 0); REQUIRE((attributes & DNS_DISPATCHATTR_UDP) == 0); if (destaddr == NULL) attributes |= DNS_DISPATCHATTR_PRIVATE; /* XXXMLG */ LOCK(&mgr->lock); /* * dispatch_allocate() checks mgr for us. * qid_allocate() checks buckets and increment for us. */ disp = NULL; result = dispatch_allocate(mgr, maxrequests, &disp); if (result != ISC_R_SUCCESS) { UNLOCK(&mgr->lock); return (result); } result = qid_allocate(mgr, buckets, increment, &disp->qid, false); if (result != ISC_R_SUCCESS) goto deallocate_dispatch; disp->socktype = isc_sockettype_tcp; disp->socket = NULL; isc_socket_attach(sock, &disp->socket); disp->sepool = NULL; disp->ntasks = 1; disp->task[0] = NULL; result = isc_task_create(taskmgr, 0, &disp->task[0]); if (result != ISC_R_SUCCESS) goto kill_socket; disp->ctlevent = isc_event_allocate(mgr->mctx, disp, DNS_EVENT_DISPATCHCONTROL, destroy_disp, disp, sizeof(isc_event_t)); if (disp->ctlevent == NULL) { result = ISC_R_NOMEMORY; goto kill_task; } isc_task_setname(disp->task[0], "tcpdispatch", disp); dns_tcpmsg_init(mgr->mctx, disp->socket, &disp->tcpmsg); disp->tcpmsg_valid = 1; disp->attributes = attributes; if (localaddr == NULL) { if (destaddr != NULL) { switch (isc_sockaddr_pf(destaddr)) { case AF_INET: isc_sockaddr_any(&disp->local); break; case AF_INET6: isc_sockaddr_any6(&disp->local); break; } } } else disp->local = *localaddr; if (destaddr != NULL) disp->peer = *destaddr; /* * Append it to the dispatcher list. */ ISC_LIST_APPEND(mgr->list, disp, link); UNLOCK(&mgr->lock); mgr_log(mgr, LVL(90), "created TCP dispatcher %p", disp); dispatch_log(disp, LVL(90), "created task %p", disp->task[0]); *dispp = disp; return (ISC_R_SUCCESS); /* * Error returns. */ kill_task: isc_task_detach(&disp->task[0]); kill_socket: isc_socket_detach(&disp->socket); deallocate_dispatch: dispatch_free(&disp); UNLOCK(&mgr->lock); return (result); } isc_result_t dns_dispatch_gettcp(dns_dispatchmgr_t *mgr, isc_sockaddr_t *destaddr, isc_sockaddr_t *localaddr, dns_dispatch_t **dispp) { dns_dispatch_t *disp; isc_result_t result; isc_sockaddr_t peeraddr; isc_sockaddr_t sockname; unsigned int attributes, mask; bool match = false; REQUIRE(VALID_DISPATCHMGR(mgr)); REQUIRE(destaddr != NULL); REQUIRE(dispp != NULL && *dispp == NULL); attributes = DNS_DISPATCHATTR_TCP | DNS_DISPATCHATTR_CONNECTED; mask = DNS_DISPATCHATTR_TCP | DNS_DISPATCHATTR_PRIVATE | DNS_DISPATCHATTR_EXCLUSIVE | DNS_DISPATCHATTR_CONNECTED; LOCK(&mgr->lock); disp = ISC_LIST_HEAD(mgr->list); while (disp != NULL && !match) { LOCK(&disp->lock); if ((disp->shutting_down == 0) && ATTRMATCH(disp->attributes, attributes, mask) && (localaddr == NULL || isc_sockaddr_eqaddr(localaddr, &disp->local))) { result = isc_socket_getsockname(disp->socket, &sockname); if (result == ISC_R_SUCCESS) result = isc_socket_getpeername(disp->socket, &peeraddr); if (result == ISC_R_SUCCESS && isc_sockaddr_equal(destaddr, &peeraddr) && (localaddr == NULL || isc_sockaddr_eqaddr(localaddr, &sockname))) { /* attach */ disp->refcount++; *dispp = disp; match = true; } } UNLOCK(&disp->lock); disp = ISC_LIST_NEXT(disp, link); } UNLOCK(&mgr->lock); return (match ? ISC_R_SUCCESS : ISC_R_NOTFOUND); } isc_result_t dns_dispatch_gettcp2(dns_dispatchmgr_t *mgr, isc_sockaddr_t *destaddr, isc_sockaddr_t *localaddr, bool *connected, dns_dispatch_t **dispp) { dns_dispatch_t *disp; isc_result_t result; isc_sockaddr_t peeraddr; isc_sockaddr_t sockname; unsigned int attributes, mask; bool match = false; REQUIRE(VALID_DISPATCHMGR(mgr)); REQUIRE(destaddr != NULL); REQUIRE(dispp != NULL && *dispp == NULL); REQUIRE(connected != NULL); /* First pass (same as dns_dispatch_gettcp()) */ attributes = DNS_DISPATCHATTR_TCP | DNS_DISPATCHATTR_CONNECTED; mask = DNS_DISPATCHATTR_TCP | DNS_DISPATCHATTR_PRIVATE | DNS_DISPATCHATTR_EXCLUSIVE | DNS_DISPATCHATTR_CONNECTED; LOCK(&mgr->lock); disp = ISC_LIST_HEAD(mgr->list); while (disp != NULL && !match) { LOCK(&disp->lock); if ((disp->shutting_down == 0) && ATTRMATCH(disp->attributes, attributes, mask) && (localaddr == NULL || isc_sockaddr_eqaddr(localaddr, &disp->local))) { result = isc_socket_getsockname(disp->socket, &sockname); if (result == ISC_R_SUCCESS) result = isc_socket_getpeername(disp->socket, &peeraddr); if (result == ISC_R_SUCCESS && isc_sockaddr_equal(destaddr, &peeraddr) && (localaddr == NULL || isc_sockaddr_eqaddr(localaddr, &sockname))) { /* attach */ disp->refcount++; *dispp = disp; match = true; *connected = true; } } UNLOCK(&disp->lock); disp = ISC_LIST_NEXT(disp, link); } if (match) { UNLOCK(&mgr->lock); return (ISC_R_SUCCESS); } /* Second pass */ attributes = DNS_DISPATCHATTR_TCP; disp = ISC_LIST_HEAD(mgr->list); while (disp != NULL && !match) { LOCK(&disp->lock); if ((disp->shutting_down == 0) && ATTRMATCH(disp->attributes, attributes, mask) && (localaddr == NULL || isc_sockaddr_eqaddr(localaddr, &disp->local)) && isc_sockaddr_equal(destaddr, &disp->peer)) { /* attach */ disp->refcount++; *dispp = disp; match = true; } UNLOCK(&disp->lock); disp = ISC_LIST_NEXT(disp, link); } UNLOCK(&mgr->lock); return (match ? ISC_R_SUCCESS : ISC_R_NOTFOUND); } isc_result_t dns_dispatch_getudp_dup(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr, isc_taskmgr_t *taskmgr, isc_sockaddr_t *localaddr, unsigned int buffersize, unsigned int maxbuffers, unsigned int maxrequests, unsigned int buckets, unsigned int increment, unsigned int attributes, unsigned int mask, dns_dispatch_t **dispp, dns_dispatch_t *dup_dispatch) { isc_result_t result; dns_dispatch_t *disp = NULL; REQUIRE(VALID_DISPATCHMGR(mgr)); REQUIRE(sockmgr != NULL); REQUIRE(localaddr != NULL); REQUIRE(taskmgr != NULL); REQUIRE(buffersize >= 512 && buffersize < (64 * 1024)); REQUIRE(maxbuffers > 0); REQUIRE(buckets < 2097169); /* next prime > 65536 * 32 */ REQUIRE(increment > buckets); REQUIRE(dispp != NULL && *dispp == NULL); REQUIRE((attributes & DNS_DISPATCHATTR_TCP) == 0); result = dns_dispatchmgr_setudp(mgr, buffersize, maxbuffers, maxrequests, buckets, increment); if (result != ISC_R_SUCCESS) return (result); LOCK(&mgr->lock); if ((attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) { REQUIRE(isc_sockaddr_getport(localaddr) == 0); goto createudp; } /* * See if we have a dispatcher that matches. */ if (dup_dispatch == NULL) { result = dispatch_find(mgr, localaddr, attributes, mask, &disp); if (result == ISC_R_SUCCESS) { disp->refcount++; if (disp->maxrequests < maxrequests) disp->maxrequests = maxrequests; if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) == 0 && (attributes & DNS_DISPATCHATTR_NOLISTEN) != 0) { disp->attributes |= DNS_DISPATCHATTR_NOLISTEN; if (disp->recv_pending != 0) isc_socket_cancel(disp->socket, disp->task[0], ISC_SOCKCANCEL_RECV); } UNLOCK(&disp->lock); UNLOCK(&mgr->lock); *dispp = disp; return (ISC_R_SUCCESS); } } createudp: /* * Nope, create one. */ result = dispatch_createudp(mgr, sockmgr, taskmgr, localaddr, maxrequests, attributes, &disp, dup_dispatch == NULL ? NULL : dup_dispatch->socket); if (result != ISC_R_SUCCESS) { UNLOCK(&mgr->lock); return (result); } UNLOCK(&mgr->lock); *dispp = disp; return (ISC_R_SUCCESS); } isc_result_t dns_dispatch_getudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr, isc_taskmgr_t *taskmgr, isc_sockaddr_t *localaddr, unsigned int buffersize, unsigned int maxbuffers, unsigned int maxrequests, unsigned int buckets, unsigned int increment, unsigned int attributes, unsigned int mask, dns_dispatch_t **dispp) { return (dns_dispatch_getudp_dup(mgr, sockmgr, taskmgr, localaddr, buffersize, maxbuffers, maxrequests, buckets, increment, attributes, mask, dispp, NULL)); } /* * mgr should be locked. */ #ifndef DNS_DISPATCH_HELD #define DNS_DISPATCH_HELD 20U #endif static isc_result_t get_udpsocket(dns_dispatchmgr_t *mgr, dns_dispatch_t *disp, isc_socketmgr_t *sockmgr, isc_sockaddr_t *localaddr, isc_socket_t **sockp, isc_socket_t *dup_socket) { unsigned int i, j; isc_socket_t *held[DNS_DISPATCH_HELD]; isc_sockaddr_t localaddr_bound; isc_socket_t *sock = NULL; isc_result_t result = ISC_R_SUCCESS; bool anyport; INSIST(sockp != NULL && *sockp == NULL); localaddr_bound = *localaddr; anyport = (isc_sockaddr_getport(localaddr) == 0); if (anyport) { unsigned int nports; in_port_t *ports; /* * If no port is specified, we first try to pick up a random * port by ourselves. */ if (isc_sockaddr_pf(localaddr) == AF_INET) { nports = disp->mgr->nv4ports; ports = disp->mgr->v4ports; } else { nports = disp->mgr->nv6ports; ports = disp->mgr->v6ports; } if (nports == 0) return (ISC_R_ADDRNOTAVAIL); for (i = 0; i < 1024; i++) { in_port_t prt; prt = ports[isc_rng_uniformrandom(DISP_RNGCTX(disp), nports)]; isc_sockaddr_setport(&localaddr_bound, prt); result = open_socket(sockmgr, &localaddr_bound, 0, &sock, NULL); /* * Continue if the port choosen is already in use * or the OS has reserved it. */ if (result == ISC_R_NOPERM || result == ISC_R_ADDRINUSE) continue; disp->localport = prt; *sockp = sock; return (result); } /* * If this fails 1024 times, we then ask the kernel for * choosing one. */ } else { /* Allow to reuse address for non-random ports. */ result = open_socket(sockmgr, localaddr, ISC_SOCKET_REUSEADDRESS, &sock, dup_socket); if (result == ISC_R_SUCCESS) *sockp = sock; return (result); } memset(held, 0, sizeof(held)); i = 0; for (j = 0; j < 0xffffU; j++) { result = open_socket(sockmgr, localaddr, 0, &sock, NULL); if (result != ISC_R_SUCCESS) goto end; else if (portavailable(mgr, sock, NULL)) break; if (held[i] != NULL) isc_socket_detach(&held[i]); held[i++] = sock; sock = NULL; if (i == DNS_DISPATCH_HELD) i = 0; } if (j == 0xffffU) { mgr_log(mgr, ISC_LOG_ERROR, "avoid-v%s-udp-ports: unable to allocate " "an available port", isc_sockaddr_pf(localaddr) == AF_INET ? "4" : "6"); result = ISC_R_FAILURE; goto end; } *sockp = sock; end: for (i = 0; i < DNS_DISPATCH_HELD; i++) { if (held[i] != NULL) isc_socket_detach(&held[i]); } return (result); } static isc_result_t dispatch_createudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr, isc_taskmgr_t *taskmgr, isc_sockaddr_t *localaddr, unsigned int maxrequests, unsigned int attributes, dns_dispatch_t **dispp, isc_socket_t *dup_socket) { isc_result_t result; dns_dispatch_t *disp; isc_socket_t *sock = NULL; int i = 0; /* * dispatch_allocate() checks mgr for us. */ disp = NULL; result = dispatch_allocate(mgr, maxrequests, &disp); if (result != ISC_R_SUCCESS) return (result); disp->socktype = isc_sockettype_udp; if ((attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0) { result = get_udpsocket(mgr, disp, sockmgr, localaddr, &sock, dup_socket); if (result != ISC_R_SUCCESS) goto deallocate_dispatch; if (isc_log_wouldlog(dns_lctx, 90)) { char addrbuf[ISC_SOCKADDR_FORMATSIZE]; isc_sockaddr_format(localaddr, addrbuf, ISC_SOCKADDR_FORMATSIZE); mgr_log(mgr, LVL(90), "dns_dispatch_createudp: Created" " UDP dispatch for %s with socket fd %d", addrbuf, isc_socket_getfd(sock)); } } else { isc_sockaddr_t sa_any; /* * For dispatches using exclusive sockets with a specific * source address, we only check if the specified address is * available on the system. Query sockets will be created later * on demand. */ isc_sockaddr_anyofpf(&sa_any, isc_sockaddr_pf(localaddr)); if (!isc_sockaddr_eqaddr(&sa_any, localaddr)) { result = open_socket(sockmgr, localaddr, 0, &sock, NULL); if (sock != NULL) isc_socket_detach(&sock); if (result != ISC_R_SUCCESS) goto deallocate_dispatch; } disp->port_table = isc_mem_get(mgr->mctx, sizeof(disp->port_table[0]) * DNS_DISPATCH_PORTTABLESIZE); if (disp->port_table == NULL) goto deallocate_dispatch; for (i = 0; i < DNS_DISPATCH_PORTTABLESIZE; i++) ISC_LIST_INIT(disp->port_table[i]); result = isc_mempool_create(mgr->mctx, sizeof(dispportentry_t), &disp->portpool); if (result != ISC_R_SUCCESS) goto deallocate_dispatch; isc_mempool_setname(disp->portpool, "disp_portpool"); isc_mempool_setfreemax(disp->portpool, 128); } disp->socket = sock; disp->local = *localaddr; if ((attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) disp->ntasks = MAX_INTERNAL_TASKS; else disp->ntasks = 1; for (i = 0; i < disp->ntasks; i++) { disp->task[i] = NULL; result = isc_task_create(taskmgr, 0, &disp->task[i]); if (result != ISC_R_SUCCESS) { while (--i >= 0) { isc_task_shutdown(disp->task[i]); isc_task_detach(&disp->task[i]); } goto kill_socket; } isc_task_setname(disp->task[i], "udpdispatch", disp); } disp->ctlevent = isc_event_allocate(mgr->mctx, disp, DNS_EVENT_DISPATCHCONTROL, destroy_disp, disp, sizeof(isc_event_t)); if (disp->ctlevent == NULL) { result = ISC_R_NOMEMORY; goto kill_task; } disp->sepool = NULL; if (isc_mempool_create(mgr->mctx, sizeof(isc_socketevent_t), &disp->sepool) != ISC_R_SUCCESS) { result = ISC_R_NOMEMORY; goto kill_ctlevent; } result = isc_mutex_init(&disp->sepool_lock); if (result != ISC_R_SUCCESS) goto kill_sepool; isc_mempool_setname(disp->sepool, "disp_sepool"); isc_mempool_setmaxalloc(disp->sepool, 32768); isc_mempool_setfreemax(disp->sepool, 32768); isc_mempool_associatelock(disp->sepool, &disp->sepool_lock); isc_mempool_setfillcount(disp->sepool, 16); attributes &= ~DNS_DISPATCHATTR_TCP; attributes |= DNS_DISPATCHATTR_UDP; disp->attributes = attributes; /* * Append it to the dispatcher list. */ ISC_LIST_APPEND(mgr->list, disp, link); mgr_log(mgr, LVL(90), "created UDP dispatcher %p", disp); dispatch_log(disp, LVL(90), "created task %p", disp->task[0]); /* XXX */ if (disp->socket != NULL) dispatch_log(disp, LVL(90), "created socket %p", disp->socket); *dispp = disp; return (result); /* * Error returns. */ kill_sepool: isc_mempool_destroy(&disp->sepool); kill_ctlevent: isc_event_free(&disp->ctlevent); kill_task: for (i = 0; i < disp->ntasks; i++) isc_task_detach(&disp->task[i]); kill_socket: if (disp->socket != NULL) isc_socket_detach(&disp->socket); deallocate_dispatch: dispatch_free(&disp); return (result); } void dns_dispatch_attach(dns_dispatch_t *disp, dns_dispatch_t **dispp) { REQUIRE(VALID_DISPATCH(disp)); REQUIRE(dispp != NULL && *dispp == NULL); LOCK(&disp->lock); disp->refcount++; UNLOCK(&disp->lock); *dispp = disp; } /* * It is important to lock the manager while we are deleting the dispatch, * since dns_dispatch_getudp will call dispatch_find, which returns to * the caller a dispatch but does not attach to it until later. _getudp * locks the manager, however, so locking it here will keep us from attaching * to a dispatcher that is in the process of going away. */ void dns_dispatch_detach(dns_dispatch_t **dispp) { dns_dispatch_t *disp; dispsocket_t *dispsock; bool killit; REQUIRE(dispp != NULL && VALID_DISPATCH(*dispp)); disp = *dispp; *dispp = NULL; LOCK(&disp->lock); INSIST(disp->refcount > 0); disp->refcount--; if (disp->refcount == 0) { if (disp->recv_pending > 0) isc_socket_cancel(disp->socket, disp->task[0], ISC_SOCKCANCEL_RECV); for (dispsock = ISC_LIST_HEAD(disp->activesockets); dispsock != NULL; dispsock = ISC_LIST_NEXT(dispsock, link)) { isc_socket_cancel(dispsock->socket, dispsock->task, ISC_SOCKCANCEL_RECV); } disp->shutting_down = 1; } dispatch_log(disp, LVL(90), "detach: refcount %d", disp->refcount); killit = destroy_disp_ok(disp); UNLOCK(&disp->lock); if (killit) isc_task_send(disp->task[0], &disp->ctlevent); } isc_result_t dns_dispatch_addresponse2(dns_dispatch_t *disp, isc_sockaddr_t *dest, isc_task_t *task, isc_taskaction_t action, void *arg, dns_messageid_t *idp, dns_dispentry_t **resp, isc_socketmgr_t *sockmgr) { return (dns_dispatch_addresponse3(disp, 0, dest, task, action, arg, idp, resp, sockmgr)); } isc_result_t dns_dispatch_addresponse3(dns_dispatch_t *disp, unsigned int options, isc_sockaddr_t *dest, isc_task_t *task, isc_taskaction_t action, void *arg, dns_messageid_t *idp, dns_dispentry_t **resp, isc_socketmgr_t *sockmgr) { dns_dispentry_t *res; unsigned int bucket; in_port_t localport = 0; dns_messageid_t id; int i; bool ok; dns_qid_t *qid; dispsocket_t *dispsocket = NULL; isc_result_t result; REQUIRE(VALID_DISPATCH(disp)); REQUIRE(task != NULL); REQUIRE(dest != NULL); REQUIRE(resp != NULL && *resp == NULL); REQUIRE(idp != NULL); if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) REQUIRE(sockmgr != NULL); LOCK(&disp->lock); if (disp->shutting_down == 1) { UNLOCK(&disp->lock); return (ISC_R_SHUTTINGDOWN); } if (disp->requests >= disp->maxrequests) { UNLOCK(&disp->lock); return (ISC_R_QUOTA); } if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0 && disp->nsockets > DNS_DISPATCH_SOCKSQUOTA) { dispsocket_t *oldestsocket; dns_dispentry_t *oldestresp; dns_dispatchevent_t *rev; /* * Kill oldest outstanding query if the number of sockets * exceeds the quota to keep the room for new queries. */ oldestsocket = ISC_LIST_HEAD(disp->activesockets); oldestresp = oldestsocket->resp; if (oldestresp != NULL && !oldestresp->item_out) { rev = allocate_devent(oldestresp->disp); if (rev != NULL) { rev->buffer.base = NULL; rev->result = ISC_R_CANCELED; rev->id = oldestresp->id; ISC_EVENT_INIT(rev, sizeof(*rev), 0, NULL, DNS_EVENT_DISPATCH, oldestresp->action, oldestresp->arg, oldestresp, NULL, NULL); oldestresp->item_out = true; isc_task_send(oldestresp->task, ISC_EVENT_PTR(&rev)); inc_stats(disp->mgr, dns_resstatscounter_dispabort); } } /* * Move this entry to the tail so that it won't (easily) be * examined before actually being canceled. */ ISC_LIST_UNLINK(disp->activesockets, oldestsocket, link); ISC_LIST_APPEND(disp->activesockets, oldestsocket, link); } qid = DNS_QID(disp); if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) { /* * Get a separate UDP socket with a random port number. */ result = get_dispsocket(disp, dest, sockmgr, &dispsocket, &localport); if (result != ISC_R_SUCCESS) { UNLOCK(&disp->lock); inc_stats(disp->mgr, dns_resstatscounter_dispsockfail); return (result); } } else { localport = disp->localport; } /* * Try somewhat hard to find an unique ID unless FIXEDID is set * in which case we use the id passed in via *idp. */ LOCK(&qid->lock); if ((options & DNS_DISPATCHOPT_FIXEDID) != 0) id = *idp; else id = (dns_messageid_t)isc_rng_random(DISP_RNGCTX(disp)); ok = false; i = 0; do { bucket = dns_hash(qid, dest, id, localport); if (entry_search(qid, dest, id, localport, bucket) == NULL) { ok = true; break; } if ((disp->attributes & DNS_DISPATCHATTR_FIXEDID) != 0) break; id += qid->qid_increment; id &= 0x0000ffff; } while (i++ < 64); UNLOCK(&qid->lock); if (!ok) { UNLOCK(&disp->lock); return (ISC_R_NOMORE); } res = isc_mempool_get(disp->mgr->rpool); if (res == NULL) { if (dispsocket != NULL) destroy_dispsocket(disp, &dispsocket); UNLOCK(&disp->lock); return (ISC_R_NOMEMORY); } disp->refcount++; disp->requests++; res->task = NULL; isc_task_attach(task, &res->task); res->disp = disp; res->id = id; res->port = localport; res->bucket = bucket; res->host = *dest; res->action = action; res->arg = arg; res->dispsocket = dispsocket; if (dispsocket != NULL) dispsocket->resp = res; res->item_out = false; ISC_LIST_INIT(res->items); ISC_LINK_INIT(res, link); res->magic = RESPONSE_MAGIC; LOCK(&qid->lock); ISC_LIST_APPEND(qid->qid_table[bucket], res, link); UNLOCK(&qid->lock); inc_stats(disp->mgr, (qid == disp->mgr->qid) ? dns_resstatscounter_disprequdp : dns_resstatscounter_dispreqtcp); request_log(disp, res, LVL(90), "attached to task %p", res->task); if (((disp->attributes & DNS_DISPATCHATTR_UDP) != 0) || ((disp->attributes & DNS_DISPATCHATTR_CONNECTED) != 0)) { result = startrecv(disp, dispsocket); if (result != ISC_R_SUCCESS) { LOCK(&qid->lock); ISC_LIST_UNLINK(qid->qid_table[bucket], res, link); UNLOCK(&qid->lock); if (dispsocket != NULL) destroy_dispsocket(disp, &dispsocket); disp->refcount--; disp->requests--; dec_stats(disp->mgr, (qid == disp->mgr->qid) ? dns_resstatscounter_disprequdp : dns_resstatscounter_dispreqtcp); UNLOCK(&disp->lock); isc_task_detach(&res->task); isc_mempool_put(disp->mgr->rpool, res); return (result); } } if (dispsocket != NULL) ISC_LIST_APPEND(disp->activesockets, dispsocket, link); UNLOCK(&disp->lock); *idp = id; *resp = res; if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) INSIST(res->dispsocket != NULL); return (ISC_R_SUCCESS); } isc_result_t dns_dispatch_addresponse(dns_dispatch_t *disp, isc_sockaddr_t *dest, isc_task_t *task, isc_taskaction_t action, void *arg, dns_messageid_t *idp, dns_dispentry_t **resp) { REQUIRE(VALID_DISPATCH(disp)); REQUIRE((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0); return (dns_dispatch_addresponse3(disp, 0, dest, task, action, arg, idp, resp, NULL)); } void dns_dispatch_starttcp(dns_dispatch_t *disp) { REQUIRE(VALID_DISPATCH(disp)); dispatch_log(disp, LVL(90), "starttcp %p", disp->task[0]); LOCK(&disp->lock); if ((disp->attributes & DNS_DISPATCHATTR_CONNECTED) == 0) { disp->attributes |= DNS_DISPATCHATTR_CONNECTED; (void)startrecv(disp, NULL); } UNLOCK(&disp->lock); } isc_result_t dns_dispatch_getnext(dns_dispentry_t *resp, dns_dispatchevent_t **sockevent) { dns_dispatch_t *disp; dns_dispatchevent_t *ev; REQUIRE(VALID_RESPONSE(resp)); REQUIRE(sockevent != NULL && *sockevent != NULL); disp = resp->disp; REQUIRE(VALID_DISPATCH(disp)); REQUIRE(resp->item_out == true); resp->item_out = false; ev = *sockevent; *sockevent = NULL; LOCK(&disp->lock); if (ev->buffer.base != NULL) free_buffer(disp, ev->buffer.base, ev->buffer.length); free_devent(disp, ev); if (disp->shutting_down == 1) { UNLOCK(&disp->lock); return (ISC_R_SHUTTINGDOWN); } ev = ISC_LIST_HEAD(resp->items); if (ev != NULL) { ISC_LIST_UNLINK(resp->items, ev, ev_link); ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, DNS_EVENT_DISPATCH, resp->action, resp->arg, resp, NULL, NULL); request_log(disp, resp, LVL(90), "[c] Sent event %p buffer %p len %d to task %p", ev, ev->buffer.base, ev->buffer.length, resp->task); resp->item_out = true; isc_task_send(resp->task, ISC_EVENT_PTR(&ev)); } UNLOCK(&disp->lock); return (ISC_R_SUCCESS); } void dns_dispatch_removeresponse(dns_dispentry_t **resp, dns_dispatchevent_t **sockevent) { dns_dispatchmgr_t *mgr; dns_dispatch_t *disp; dns_dispentry_t *res; dispsocket_t *dispsock; dns_dispatchevent_t *ev; unsigned int bucket; bool killit; unsigned int n; isc_eventlist_t events; dns_qid_t *qid; REQUIRE(resp != NULL); REQUIRE(VALID_RESPONSE(*resp)); res = *resp; *resp = NULL; disp = res->disp; REQUIRE(VALID_DISPATCH(disp)); mgr = disp->mgr; REQUIRE(VALID_DISPATCHMGR(mgr)); qid = DNS_QID(disp); if (sockevent != NULL) { REQUIRE(*sockevent != NULL); ev = *sockevent; *sockevent = NULL; } else { ev = NULL; } LOCK(&disp->lock); INSIST(disp->requests > 0); disp->requests--; dec_stats(disp->mgr, (qid == disp->mgr->qid) ? dns_resstatscounter_disprequdp : dns_resstatscounter_dispreqtcp); INSIST(disp->refcount > 0); disp->refcount--; if (disp->refcount == 0) { if (disp->recv_pending > 0) isc_socket_cancel(disp->socket, disp->task[0], ISC_SOCKCANCEL_RECV); for (dispsock = ISC_LIST_HEAD(disp->activesockets); dispsock != NULL; dispsock = ISC_LIST_NEXT(dispsock, link)) { isc_socket_cancel(dispsock->socket, dispsock->task, ISC_SOCKCANCEL_RECV); } disp->shutting_down = 1; } bucket = res->bucket; LOCK(&qid->lock); ISC_LIST_UNLINK(qid->qid_table[bucket], res, link); UNLOCK(&qid->lock); if (ev == NULL && res->item_out) { /* * We've posted our event, but the caller hasn't gotten it * yet. Take it back. */ ISC_LIST_INIT(events); n = isc_task_unsend(res->task, res, DNS_EVENT_DISPATCH, NULL, &events); /* * We had better have gotten it back. */ INSIST(n == 1); ev = (dns_dispatchevent_t *)ISC_LIST_HEAD(events); } if (ev != NULL) { REQUIRE(res->item_out == true); res->item_out = false; if (ev->buffer.base != NULL) free_buffer(disp, ev->buffer.base, ev->buffer.length); free_devent(disp, ev); } request_log(disp, res, LVL(90), "detaching from task %p", res->task); isc_task_detach(&res->task); if (res->dispsocket != NULL) { isc_socket_cancel(res->dispsocket->socket, res->dispsocket->task, ISC_SOCKCANCEL_RECV); res->dispsocket->resp = NULL; } /* * Free any buffered responses as well */ ev = ISC_LIST_HEAD(res->items); while (ev != NULL) { ISC_LIST_UNLINK(res->items, ev, ev_link); if (ev->buffer.base != NULL) free_buffer(disp, ev->buffer.base, ev->buffer.length); free_devent(disp, ev); ev = ISC_LIST_HEAD(res->items); } res->magic = 0; isc_mempool_put(disp->mgr->rpool, res); if (disp->shutting_down == 1) do_cancel(disp); else (void)startrecv(disp, NULL); killit = destroy_disp_ok(disp); UNLOCK(&disp->lock); if (killit) isc_task_send(disp->task[0], &disp->ctlevent); } static void do_cancel(dns_dispatch_t *disp) { dns_dispatchevent_t *ev; dns_dispentry_t *resp; dns_qid_t *qid; if (disp->shutdown_out == 1) return; qid = DNS_QID(disp); /* * Search for the first response handler without packets outstanding * unless a specific hander is given. */ LOCK(&qid->lock); for (resp = linear_first(qid); resp != NULL && resp->item_out; /* Empty. */) resp = linear_next(qid, resp); /* * No one to send the cancel event to, so nothing to do. */ if (resp == NULL) goto unlock; /* * Send the shutdown failsafe event to this resp. */ ev = disp->failsafe_ev; ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, DNS_EVENT_DISPATCH, resp->action, resp->arg, resp, NULL, NULL); ev->result = disp->shutdown_why; ev->buffer.base = NULL; ev->buffer.length = 0; disp->shutdown_out = 1; request_log(disp, resp, LVL(10), "cancel: failsafe event %p -> task %p", ev, resp->task); resp->item_out = true; isc_task_send(resp->task, ISC_EVENT_PTR(&ev)); unlock: UNLOCK(&qid->lock); } isc_socket_t * dns_dispatch_getsocket(dns_dispatch_t *disp) { REQUIRE(VALID_DISPATCH(disp)); return (disp->socket); } isc_socket_t * dns_dispatch_getentrysocket(dns_dispentry_t *resp) { REQUIRE(VALID_RESPONSE(resp)); if (resp->dispsocket != NULL) return (resp->dispsocket->socket); else return (NULL); } isc_result_t dns_dispatch_getlocaladdress(dns_dispatch_t *disp, isc_sockaddr_t *addrp) { REQUIRE(VALID_DISPATCH(disp)); REQUIRE(addrp != NULL); if (disp->socktype == isc_sockettype_udp) { *addrp = disp->local; return (ISC_R_SUCCESS); } return (ISC_R_NOTIMPLEMENTED); } void dns_dispatch_cancel(dns_dispatch_t *disp) { REQUIRE(VALID_DISPATCH(disp)); LOCK(&disp->lock); if (disp->shutting_down == 1) { UNLOCK(&disp->lock); return; } disp->shutdown_why = ISC_R_CANCELED; disp->shutting_down = 1; do_cancel(disp); UNLOCK(&disp->lock); return; } unsigned int dns_dispatch_getattributes(dns_dispatch_t *disp) { REQUIRE(VALID_DISPATCH(disp)); /* * We don't bother locking disp here; it's the caller's responsibility * to use only non volatile flags. */ return (disp->attributes); } void dns_dispatch_changeattributes(dns_dispatch_t *disp, unsigned int attributes, unsigned int mask) { REQUIRE(VALID_DISPATCH(disp)); /* Exclusive attribute can only be set on creation */ REQUIRE((attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0); /* Also, a dispatch with randomport specified cannot start listening */ REQUIRE((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0 || (attributes & DNS_DISPATCHATTR_NOLISTEN) == 0); /* XXXMLG * Should check for valid attributes here! */ LOCK(&disp->lock); if ((mask & DNS_DISPATCHATTR_NOLISTEN) != 0) { if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0 && (attributes & DNS_DISPATCHATTR_NOLISTEN) == 0) { disp->attributes &= ~DNS_DISPATCHATTR_NOLISTEN; (void)startrecv(disp, NULL); } else if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) == 0 && (attributes & DNS_DISPATCHATTR_NOLISTEN) != 0) { disp->attributes |= DNS_DISPATCHATTR_NOLISTEN; if (disp->recv_pending != 0) isc_socket_cancel(disp->socket, disp->task[0], ISC_SOCKCANCEL_RECV); } } disp->attributes &= ~mask; disp->attributes |= (attributes & mask); UNLOCK(&disp->lock); } void dns_dispatch_importrecv(dns_dispatch_t *disp, isc_event_t *event) { void *buf; isc_socketevent_t *sevent, *newsevent; REQUIRE(VALID_DISPATCH(disp)); REQUIRE(event != NULL); if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) == 0) return; sevent = (isc_socketevent_t *)event; INSIST(sevent->n <= disp->mgr->buffersize); newsevent = (isc_socketevent_t *) isc_event_allocate(disp->mgr->mctx, NULL, DNS_EVENT_IMPORTRECVDONE, udp_shrecv, disp, sizeof(isc_socketevent_t)); if (newsevent == NULL) return; buf = allocate_udp_buffer(disp); if (buf == NULL) { isc_event_free(ISC_EVENT_PTR(&newsevent)); return; } memmove(buf, sevent->region.base, sevent->n); newsevent->region.base = buf; newsevent->region.length = disp->mgr->buffersize; newsevent->n = sevent->n; newsevent->result = sevent->result; newsevent->address = sevent->address; newsevent->timestamp = sevent->timestamp; newsevent->pktinfo = sevent->pktinfo; newsevent->attributes = sevent->attributes; isc_task_send(disp->task[0], ISC_EVENT_PTR(&newsevent)); } dns_dispatch_t * dns_dispatchset_get(dns_dispatchset_t *dset) { dns_dispatch_t *disp; /* check that dispatch set is configured */ if (dset == NULL || dset->ndisp == 0) return (NULL); LOCK(&dset->lock); disp = dset->dispatches[dset->cur]; dset->cur++; if (dset->cur == dset->ndisp) dset->cur = 0; UNLOCK(&dset->lock); return (disp); } isc_result_t dns_dispatchset_create(isc_mem_t *mctx, isc_socketmgr_t *sockmgr, isc_taskmgr_t *taskmgr, dns_dispatch_t *source, dns_dispatchset_t **dsetp, int n) { isc_result_t result; dns_dispatchset_t *dset; dns_dispatchmgr_t *mgr; int i, j; REQUIRE(VALID_DISPATCH(source)); REQUIRE((source->attributes & DNS_DISPATCHATTR_UDP) != 0); REQUIRE(dsetp != NULL && *dsetp == NULL); mgr = source->mgr; dset = isc_mem_get(mctx, sizeof(dns_dispatchset_t)); if (dset == NULL) return (ISC_R_NOMEMORY); memset(dset, 0, sizeof(*dset)); result = isc_mutex_init(&dset->lock); if (result != ISC_R_SUCCESS) goto fail_alloc; dset->dispatches = isc_mem_get(mctx, sizeof(dns_dispatch_t *) * n); if (dset->dispatches == NULL) { result = ISC_R_NOMEMORY; goto fail_lock; } isc_mem_attach(mctx, &dset->mctx); dset->ndisp = n; dset->cur = 0; dset->dispatches[0] = NULL; dns_dispatch_attach(source, &dset->dispatches[0]); LOCK(&mgr->lock); for (i = 1; i < n; i++) { dset->dispatches[i] = NULL; result = dispatch_createudp(mgr, sockmgr, taskmgr, &source->local, source->maxrequests, source->attributes, &dset->dispatches[i], source->socket); if (result != ISC_R_SUCCESS) goto fail; } UNLOCK(&mgr->lock); *dsetp = dset; return (ISC_R_SUCCESS); fail: UNLOCK(&mgr->lock); for (j = 0; j < i; j++) dns_dispatch_detach(&(dset->dispatches[j])); isc_mem_put(mctx, dset->dispatches, sizeof(dns_dispatch_t *) * n); if (dset->mctx == mctx) isc_mem_detach(&dset->mctx); fail_lock: DESTROYLOCK(&dset->lock); fail_alloc: isc_mem_put(mctx, dset, sizeof(dns_dispatchset_t)); return (result); } void dns_dispatchset_cancelall(dns_dispatchset_t *dset, isc_task_t *task) { int i; REQUIRE(dset != NULL); for (i = 0; i < dset->ndisp; i++) { isc_socket_t *sock; sock = dns_dispatch_getsocket(dset->dispatches[i]); isc_socket_cancel(sock, task, ISC_SOCKCANCEL_ALL); } } void dns_dispatchset_destroy(dns_dispatchset_t **dsetp) { dns_dispatchset_t *dset; int i; REQUIRE(dsetp != NULL && *dsetp != NULL); dset = *dsetp; for (i = 0; i < dset->ndisp; i++) dns_dispatch_detach(&(dset->dispatches[i])); isc_mem_put(dset->mctx, dset->dispatches, sizeof(dns_dispatch_t *) * dset->ndisp); DESTROYLOCK(&dset->lock); isc_mem_putanddetach(&dset->mctx, dset, sizeof(dns_dispatchset_t)); *dsetp = NULL; } void dns_dispatch_setdscp(dns_dispatch_t *disp, isc_dscp_t dscp) { REQUIRE(VALID_DISPATCH(disp)); disp->dscp = dscp; } isc_dscp_t dns_dispatch_getdscp(dns_dispatch_t *disp) { REQUIRE(VALID_DISPATCH(disp)); return (disp->dscp); } #if 0 void dns_dispatchmgr_dump(dns_dispatchmgr_t *mgr) { dns_dispatch_t *disp; char foo[1024]; disp = ISC_LIST_HEAD(mgr->list); while (disp != NULL) { isc_sockaddr_format(&disp->local, foo, sizeof(foo)); printf("\tdispatch %p, addr %s\n", disp, foo); disp = ISC_LIST_NEXT(disp, link); } } #endif