diff options
Diffstat (limited to 'debian/patches/0012-CVE-2018-5743-Limiting-simultaneous-TCP-clients-is-i.patch')
-rw-r--r-- | debian/patches/0012-CVE-2018-5743-Limiting-simultaneous-TCP-clients-is-i.patch | 912 |
1 files changed, 912 insertions, 0 deletions
diff --git a/debian/patches/0012-CVE-2018-5743-Limiting-simultaneous-TCP-clients-is-i.patch b/debian/patches/0012-CVE-2018-5743-Limiting-simultaneous-TCP-clients-is-i.patch new file mode 100644 index 0000000..b451238 --- /dev/null +++ b/debian/patches/0012-CVE-2018-5743-Limiting-simultaneous-TCP-clients-is-i.patch @@ -0,0 +1,912 @@ +From: =?utf-8?q?Witold_Kr=C4=99cicki?= <wpk@isc.org> +Date: Thu, 3 Jan 2019 14:17:43 +0100 +Subject: [CVE-2018-5743]: Limiting simultaneous TCP clients is ineffective + +--- + bin/named/client.c | 427 ++++++++++++++++++++++++++------- + bin/named/include/named/client.h | 23 +- + bin/named/include/named/interfacemgr.h | 13 +- + bin/named/interfacemgr.c | 9 +- + doc/arm/Bv9ARM-book.xml | 3 +- + lib/isc/include/isc/quota.h | 7 + + lib/isc/quota.c | 33 ++- + lib/isc/win32/libisc.def.in | 1 + + 8 files changed, 396 insertions(+), 120 deletions(-) + +diff --git a/bin/named/client.c b/bin/named/client.c +index 4d26eff..020603d 100644 +--- a/bin/named/client.c ++++ b/bin/named/client.c +@@ -246,10 +246,11 @@ static void ns_client_dumpmessage(ns_client_t *client, const char *reason); + static isc_result_t get_client(ns_clientmgr_t *manager, ns_interface_t *ifp, + dns_dispatch_t *disp, bool tcp); + static isc_result_t get_worker(ns_clientmgr_t *manager, ns_interface_t *ifp, +- isc_socket_t *sock); ++ isc_socket_t *sock, ns_client_t *oldclient); + static inline bool +-allowed(isc_netaddr_t *addr, dns_name_t *signer, isc_netaddr_t *ecs_addr, +- uint8_t ecs_addrlen, uint8_t *ecs_scope, dns_acl_t *acl); ++allowed(isc_netaddr_t *addr, dns_name_t *signer, ++ isc_netaddr_t *ecs_addr, uint8_t ecs_addrlen, ++ uint8_t *ecs_scope, dns_acl_t *acl); + static void compute_cookie(ns_client_t *client, uint32_t when, + uint32_t nonce, const unsigned char *secret, + isc_buffer_t *buf); +@@ -298,6 +299,119 @@ ns_client_settimeout(ns_client_t *client, unsigned int seconds) { + } + } + ++/*% ++ * Allocate a reference-counted object that will maintain a single pointer to ++ * the (also reference-counted) TCP client quota, shared between all the ++ * clients processing queries on a single TCP connection, so that all ++ * clients sharing the one socket will together consume only one slot in ++ * the 'tcp-clients' quota. ++ */ ++static isc_result_t ++tcpconn_init(ns_client_t *client, bool force) { ++ isc_result_t result; ++ isc_quota_t *quota = NULL; ++ ns_tcpconn_t *tconn = NULL; ++ ++ REQUIRE(client->tcpconn == NULL); ++ ++ /* ++ * Try to attach to the quota first, so we won't pointlessly ++ * allocate memory for a tcpconn object if we can't get one. ++ */ ++ if (force) { ++ result = isc_quota_force(&ns_g_server->tcpquota, "a); ++ } else { ++ result = isc_quota_attach(&ns_g_server->tcpquota, "a); ++ } ++ if (result != ISC_R_SUCCESS) { ++ return (result); ++ } ++ ++ /* ++ * A global memory context is used for the allocation as different ++ * client structures may have different memory contexts assigned and a ++ * reference counter allocated here might need to be freed by a ++ * different client. The performance impact caused by memory context ++ * contention here is expected to be negligible, given that this code ++ * is only executed for TCP connections. ++ */ ++ tconn = isc_mem_allocate(ns_g_mctx, sizeof(*tconn)); ++ ++ isc_refcount_init(&tconn->refs, 1); ++ tconn->tcpquota = quota; ++ quota = NULL; ++ tconn->pipelined = false; ++ ++ client->tcpconn = tconn; ++ ++ return (ISC_R_SUCCESS); ++} ++ ++/*% ++ * Increase the count of client structures sharing the TCP connection ++ * that 'source' is associated with; add a pointer to the same tcpconn ++ * to 'target', thus associating it with the same TCP connection. ++ */ ++static void ++tcpconn_attach(ns_client_t *source, ns_client_t *target) { ++ int refs; ++ ++ REQUIRE(source->tcpconn != NULL); ++ REQUIRE(target->tcpconn == NULL); ++ REQUIRE(source->tcpconn->pipelined); ++ ++ isc_refcount_increment(&source->tcpconn->refs, &refs); ++ INSIST(refs > 1); ++ target->tcpconn = source->tcpconn; ++} ++ ++/*% ++ * Decrease the count of client structures sharing the TCP connection that ++ * 'client' is associated with. If this is the last client using this TCP ++ * connection, we detach from the TCP quota and free the tcpconn ++ * object. Either way, client->tcpconn is set to NULL. ++ */ ++static void ++tcpconn_detach(ns_client_t *client) { ++ ns_tcpconn_t *tconn = NULL; ++ int refs; ++ ++ REQUIRE(client->tcpconn != NULL); ++ ++ tconn = client->tcpconn; ++ client->tcpconn = NULL; ++ ++ isc_refcount_decrement(&tconn->refs, &refs); ++ if (refs == 0) { ++ isc_quota_detach(&tconn->tcpquota); ++ isc_mem_free(ns_g_mctx, tconn); ++ } ++} ++ ++/*% ++ * Mark a client as active and increment the interface's 'ntcpactive' ++ * counter, as a signal that there is at least one client servicing ++ * TCP queries for the interface. If we reach the TCP client quota at ++ * some point, this will be used to determine whether a quota overrun ++ * should be permitted. ++ * ++ * Marking the client active with the 'tcpactive' flag ensures proper ++ * accounting, by preventing us from incrementing or decrementing ++ * 'ntcpactive' more than once per client. ++ */ ++static void ++mark_tcp_active(ns_client_t *client, bool active) { ++ if (active && !client->tcpactive) { ++ isc_atomic_xadd(&client->interface->ntcpactive, 1); ++ client->tcpactive = active; ++ } else if (!active && client->tcpactive) { ++ uint32_t old = ++ isc_atomic_xadd(&client->interface->ntcpactive, -1); ++ INSIST(old > 0); ++ client->tcpactive = active; ++ } ++} ++ + /*% + * Check for a deactivation or shutdown request and take appropriate + * action. Returns true if either is in progress; in this case +@@ -387,7 +501,8 @@ exit_check(ns_client_t *client) { + INSIST(client->recursionquota == NULL); + + if (NS_CLIENTSTATE_READING == client->newstate) { +- if (!client->pipelined) { ++ INSIST(client->tcpconn != NULL); ++ if (!client->tcpconn->pipelined) { + client_read(client); + client->newstate = NS_CLIENTSTATE_MAX; + return (true); /* We're done. */ +@@ -405,10 +520,13 @@ exit_check(ns_client_t *client) { + */ + INSIST(client->recursionquota == NULL); + INSIST(client->newstate <= NS_CLIENTSTATE_READY); +- if (client->nreads > 0) ++ ++ if (client->nreads > 0) { + dns_tcpmsg_cancelread(&client->tcpmsg); +- if (client->nreads != 0) { +- /* Still waiting for read cancel completion. */ ++ } ++ ++ /* Still waiting for read cancel completion. */ ++ if (client->nreads > 0) { + return (true); + } + +@@ -416,14 +534,49 @@ exit_check(ns_client_t *client) { + dns_tcpmsg_invalidate(&client->tcpmsg); + client->tcpmsg_valid = false; + } ++ ++ /* ++ * Soon the client will be ready to accept a new TCP ++ * connection or UDP request, but we may have enough ++ * clients doing that already. Check whether this client ++ * needs to remain active and allow it go inactive if ++ * not. ++ * ++ * UDP clients always go inactive at this point, but a TCP ++ * client may need to stay active and return to READY ++ * state if no other clients are available to listen ++ * for TCP requests on this interface. ++ * ++ * Regardless, if we're going to FREED state, that means ++ * the system is shutting down and we don't need to ++ * retain clients. ++ */ ++ if (client->mortal && TCP_CLIENT(client) && ++ client->newstate != NS_CLIENTSTATE_FREED && ++ !ns_g_clienttest && ++ isc_atomic_xadd(&client->interface->ntcpaccepting, 0) == 0) ++ { ++ /* Nobody else is accepting */ ++ client->mortal = false; ++ client->newstate = NS_CLIENTSTATE_READY; ++ } ++ ++ /* ++ * Detach from TCP connection and TCP client quota, ++ * if appropriate. If this is the last reference to ++ * the TCP connection in our pipeline group, the ++ * TCP quota slot will be released. ++ */ ++ if (client->tcpconn) { ++ tcpconn_detach(client); ++ } ++ + if (client->tcpsocket != NULL) { + CTRACE("closetcp"); + isc_socket_detach(&client->tcpsocket); ++ mark_tcp_active(client, false); + } + +- if (client->tcpquota != NULL) +- isc_quota_detach(&client->tcpquota); +- + if (client->timerset) { + (void)isc_timer_reset(client->timer, + isc_timertype_inactive, +@@ -431,45 +584,26 @@ exit_check(ns_client_t *client) { + client->timerset = false; + } + +- client->pipelined = false; +- + client->peeraddr_valid = false; + + client->state = NS_CLIENTSTATE_READY; +- INSIST(client->recursionquota == NULL); +- +- /* +- * Now the client is ready to accept a new TCP connection +- * or UDP request, but we may have enough clients doing +- * that already. Check whether this client needs to remain +- * active and force it to go inactive if not. +- * +- * UDP clients go inactive at this point, but TCP clients +- * may remain active if we have fewer active TCP client +- * objects than desired due to an earlier quota exhaustion. +- */ +- if (client->mortal && TCP_CLIENT(client) && !ns_g_clienttest) { +- LOCK(&client->interface->lock); +- if (client->interface->ntcpcurrent < +- client->interface->ntcptarget) +- client->mortal = false; +- UNLOCK(&client->interface->lock); +- } + + /* + * We don't need the client; send it to the inactive + * queue for recycling. + */ + if (client->mortal) { +- if (client->newstate > NS_CLIENTSTATE_INACTIVE) ++ if (client->newstate > NS_CLIENTSTATE_INACTIVE) { + client->newstate = NS_CLIENTSTATE_INACTIVE; ++ } + } + + if (NS_CLIENTSTATE_READY == client->newstate) { + if (TCP_CLIENT(client)) { + client_accept(client); +- } else ++ } else { + client_udprecv(client); ++ } + client->newstate = NS_CLIENTSTATE_MAX; + return (true); + } +@@ -481,41 +615,50 @@ exit_check(ns_client_t *client) { + /* + * We are trying to enter the inactive state. + */ +- if (client->naccepts > 0) ++ if (client->naccepts > 0) { + isc_socket_cancel(client->tcplistener, client->task, + ISC_SOCKCANCEL_ACCEPT); ++ } + + /* Still waiting for accept cancel completion. */ +- if (! (client->naccepts == 0)) ++ if (client->naccepts > 0) { + return (true); ++ } + + /* Accept cancel is complete. */ +- if (client->nrecvs > 0) ++ if (client->nrecvs > 0) { + isc_socket_cancel(client->udpsocket, client->task, + ISC_SOCKCANCEL_RECV); ++ } + + /* Still waiting for recv cancel completion. */ +- if (! (client->nrecvs == 0)) ++ if (client->nrecvs > 0) { + return (true); ++ } + + /* Still waiting for control event to be delivered */ +- if (client->nctls > 0) ++ if (client->nctls > 0) { + return (true); +- +- /* Deactivate the client. */ +- if (client->interface) +- ns_interface_detach(&client->interface); ++ } + + INSIST(client->naccepts == 0); + INSIST(client->recursionquota == NULL); +- if (client->tcplistener != NULL) ++ if (client->tcplistener != NULL) { + isc_socket_detach(&client->tcplistener); +- +- if (client->udpsocket != NULL) ++ mark_tcp_active(client, false); ++ } ++ if (client->udpsocket != NULL) { + isc_socket_detach(&client->udpsocket); ++ } ++ ++ /* Deactivate the client. */ ++ if (client->interface != NULL) { ++ ns_interface_detach(&client->interface); ++ } + +- if (client->dispatch != NULL) ++ if (client->dispatch != NULL) { + dns_dispatch_detach(&client->dispatch); ++ } + + client->attributes = 0; + client->mortal = false; +@@ -540,10 +683,13 @@ exit_check(ns_client_t *client) { + client->newstate = NS_CLIENTSTATE_MAX; + if (!ns_g_clienttest && manager != NULL && + !manager->exiting) ++ { + ISC_QUEUE_PUSH(manager->inactive, client, + ilink); +- if (client->needshutdown) ++ } ++ if (client->needshutdown) { + isc_task_shutdown(client->task); ++ } + return (true); + } + } +@@ -653,7 +799,7 @@ client_start(isc_task_t *task, isc_event_t *event) { + return; + + if (TCP_CLIENT(client)) { +- if (client->pipelined) { ++ if (client->tcpconn != NULL) { + client_read(client); + } else { + client_accept(client); +@@ -663,7 +809,6 @@ client_start(isc_task_t *task, isc_event_t *event) { + } + } + +- + /*% + * The client's task has received a shutdown event. + */ +@@ -2304,6 +2449,7 @@ client_request(isc_task_t *task, isc_event_t *event) { + client->nrecvs--; + } else { + INSIST(TCP_CLIENT(client)); ++ INSIST(client->tcpconn != NULL); + REQUIRE(event->ev_type == DNS_EVENT_TCPMSG); + REQUIRE(event->ev_sender == &client->tcpmsg); + buffer = &client->tcpmsg.buffer; +@@ -2487,18 +2633,27 @@ client_request(isc_task_t *task, isc_event_t *event) { + /* + * Pipeline TCP query processing. + */ +- if (client->message->opcode != dns_opcode_query) +- client->pipelined = false; +- if (TCP_CLIENT(client) && client->pipelined) { +- result = isc_quota_reserve(&ns_g_server->tcpquota); +- if (result == ISC_R_SUCCESS) +- result = ns_client_replace(client); ++ if (TCP_CLIENT(client) && ++ client->message->opcode != dns_opcode_query) ++ { ++ client->tcpconn->pipelined = false; ++ } ++ if (TCP_CLIENT(client) && client->tcpconn->pipelined) { ++ /* ++ * We're pipelining. Replace the client; the ++ * replacement can read the TCP socket looking ++ * for new messages and this one can process the ++ * current message asynchronously. ++ * ++ * There will now be at least three clients using this ++ * TCP socket - one accepting new connections, ++ * one reading an existing connection to get new ++ * messages, and one answering the message already ++ * received. ++ */ ++ result = ns_client_replace(client); + if (result != ISC_R_SUCCESS) { +- ns_client_log(client, NS_LOGCATEGORY_CLIENT, +- NS_LOGMODULE_CLIENT, ISC_LOG_WARNING, +- "no more TCP clients(read): %s", +- isc_result_totext(result)); +- client->pipelined = false; ++ client->tcpconn->pipelined = false; + } + } + +@@ -3054,8 +3209,7 @@ client_create(ns_clientmgr_t *manager, ns_client_t **clientp) { + client->signer = NULL; + dns_name_init(&client->signername, NULL); + client->mortal = false; +- client->pipelined = false; +- client->tcpquota = NULL; ++ client->tcpconn = NULL; + client->recursionquota = NULL; + client->interface = NULL; + client->peeraddr_valid = false; +@@ -3065,6 +3219,7 @@ client_create(ns_clientmgr_t *manager, ns_client_t **clientp) { + client->filter_aaaa = dns_aaaa_ok; + #endif + client->needshutdown = ns_g_clienttest; ++ client->tcpactive = false; + + ISC_EVENT_INIT(&client->ctlevent, sizeof(client->ctlevent), 0, NULL, + NS_EVENT_CLIENTCONTROL, client_start, client, client, +@@ -3159,9 +3314,10 @@ client_read(ns_client_t *client) { + + static void + client_newconn(isc_task_t *task, isc_event_t *event) { ++ isc_result_t result; + ns_client_t *client = event->ev_arg; + isc_socket_newconnev_t *nevent = (isc_socket_newconnev_t *)event; +- isc_result_t result; ++ uint32_t old; + + REQUIRE(event->ev_type == ISC_SOCKEVENT_NEWCONN); + REQUIRE(NS_CLIENT_VALID(client)); +@@ -3171,13 +3327,18 @@ client_newconn(isc_task_t *task, isc_event_t *event) { + + INSIST(client->state == NS_CLIENTSTATE_READY); + ++ /* ++ * The accept() was successful and we're now establishing a new ++ * connection. We need to make note of it in the client and ++ * interface objects so client objects can do the right thing ++ * when going inactive in exit_check() (see comments in ++ * client_accept() for details). ++ */ + INSIST(client->naccepts == 1); + client->naccepts--; + +- LOCK(&client->interface->lock); +- INSIST(client->interface->ntcpcurrent > 0); +- client->interface->ntcpcurrent--; +- UNLOCK(&client->interface->lock); ++ old = isc_atomic_xadd(&client->interface->ntcpaccepting, -1); ++ INSIST(old > 0); + + /* + * We must take ownership of the new socket before the exit +@@ -3210,6 +3371,7 @@ client_newconn(isc_task_t *task, isc_event_t *event) { + NS_LOGMODULE_CLIENT, ISC_LOG_DEBUG(3), + "accept failed: %s", + isc_result_totext(nevent->result)); ++ tcpconn_detach(client); + } + + if (exit_check(client)) +@@ -3247,20 +3409,13 @@ client_newconn(isc_task_t *task, isc_event_t *event) { + * telnetting to port 53 (once per CPU) will + * deny service to legitimate TCP clients. + */ +- client->pipelined = false; +- result = isc_quota_attach(&ns_g_server->tcpquota, +- &client->tcpquota); +- if (result == ISC_R_SUCCESS) +- result = ns_client_replace(client); +- if (result != ISC_R_SUCCESS) { +- ns_client_log(client, NS_LOGCATEGORY_CLIENT, +- NS_LOGMODULE_CLIENT, ISC_LOG_WARNING, +- "no more TCP clients(accept): %s", +- isc_result_totext(result)); +- } else if (ns_g_server->keepresporder == NULL || +- !allowed(&netaddr, NULL, NULL, 0, NULL, +- ns_g_server->keepresporder)) { +- client->pipelined = true; ++ result = ns_client_replace(client); ++ if (result == ISC_R_SUCCESS && ++ (ns_g_server->keepresporder == NULL || ++ !allowed(&netaddr, NULL, NULL, 0, NULL, ++ ns_g_server->keepresporder))) ++ { ++ client->tcpconn->pipelined = true; + } + + client_read(client); +@@ -3276,12 +3431,66 @@ client_accept(ns_client_t *client) { + + CTRACE("accept"); + ++ /* ++ * Set up a new TCP connection. This means try to attach to the ++ * TCP client quota (tcp-clients), but fail if we're over quota. ++ */ ++ result = tcpconn_init(client, false); ++ if (result != ISC_R_SUCCESS) { ++ bool exit; ++ ++ ns_client_log(client, NS_LOGCATEGORY_CLIENT, ++ NS_LOGMODULE_CLIENT, ISC_LOG_WARNING, ++ "TCP client quota reached: %s", ++ isc_result_totext(result)); ++ ++ /* ++ * We have exceeded the system-wide TCP client quota. But, ++ * we can't just block this accept in all cases, because if ++ * we did, a heavy TCP load on other interfaces might cause ++ * this interface to be starved, with no clients able to ++ * accept new connections. ++ * ++ * So, we check here to see if any other clients are ++ * already servicing TCP queries on this interface (whether ++ * accepting, reading, or processing). If we find that at ++ * least one client other than this one is active, then ++ * it's okay *not* to call accept - we can let this ++ * client go inactive and another will take over when it's ++ * done. ++ * ++ * If there aren't enough active clients on the interface, ++ * then we can be a little bit flexible about the quota. ++ * We'll allow *one* extra client through to ensure we're ++ * listening on every interface; we do this by setting the ++ * 'force' option to tcpconn_init(). ++ * ++ * (Note: In practice this means that the real TCP client ++ * quota is tcp-clients plus the number of listening ++ * interfaces plus 1.) ++ */ ++ exit = (isc_atomic_xadd(&client->interface->ntcpactive, 0) > ++ (client->tcpactive ? 1 : 0)); ++ if (exit) { ++ client->newstate = NS_CLIENTSTATE_INACTIVE; ++ (void)exit_check(client); ++ return; ++ } ++ ++ result = tcpconn_init(client, true); ++ RUNTIME_CHECK(result == ISC_R_SUCCESS); ++ } ++ ++ /* ++ * If this client was set up using get_client() or get_worker(), ++ * then TCP is already marked active. However, if it was restarted ++ * from exit_check(), it might not be, so we take care of it now. ++ */ ++ mark_tcp_active(client, true); ++ + result = isc_socket_accept(client->tcplistener, client->task, + client_newconn, client); + if (result != ISC_R_SUCCESS) { +- UNEXPECTED_ERROR(__FILE__, __LINE__, +- "isc_socket_accept() failed: %s", +- isc_result_totext(result)); + /* + * XXXRTH What should we do? We're trying to accept but + * it didn't work. If we just give up, then TCP +@@ -3289,13 +3498,37 @@ client_accept(ns_client_t *client) { + * + * For now, we just go idle. + */ ++ UNEXPECTED_ERROR(__FILE__, __LINE__, ++ "isc_socket_accept() failed: %s", ++ isc_result_totext(result)); ++ ++ tcpconn_detach(client); ++ mark_tcp_active(client, false); + return; + } ++ ++ /* ++ * The client's 'naccepts' counter indicates that this client has ++ * called accept() and is waiting for a new connection. It should ++ * never exceed 1. ++ */ + INSIST(client->naccepts == 0); + client->naccepts++; +- LOCK(&client->interface->lock); +- client->interface->ntcpcurrent++; +- UNLOCK(&client->interface->lock); ++ ++ /* ++ * The interface's 'ntcpaccepting' counter is incremented when ++ * any client calls accept(), and decremented in client_newconn() ++ * once the connection is established. ++ * ++ * When the client object is shutting down after handling a TCP ++ * request (see exit_check()), if this value is at least one, that ++ * means another client has called accept() and is waiting to ++ * establish the next connection. That means the client may be ++ * be free to become inactive; otherwise it may need to start ++ * listening for connections itself to prevent the interface ++ * going dead. ++ */ ++ isc_atomic_xadd(&client->interface->ntcpaccepting, 1); + } + + static void +@@ -3366,15 +3599,17 @@ ns_client_replace(ns_client_t *client) { + REQUIRE(client->manager != NULL); + + tcp = TCP_CLIENT(client); +- if (tcp && client->pipelined) { ++ if (tcp && client->tcpconn != NULL && client->tcpconn->pipelined) { + result = get_worker(client->manager, client->interface, +- client->tcpsocket); ++ client->tcpsocket, client); + } else { + result = get_client(client->manager, client->interface, + client->dispatch, tcp); ++ + } +- if (result != ISC_R_SUCCESS) ++ if (result != ISC_R_SUCCESS) { + return (result); ++ } + + /* + * The responsibility for listening for new requests is hereby +@@ -3560,9 +3795,12 @@ get_client(ns_clientmgr_t *manager, ns_interface_t *ifp, + client->dscp = ifp->dscp; + + if (tcp) { ++ mark_tcp_active(client, true); ++ + client->attributes |= NS_CLIENTATTR_TCP; + isc_socket_attach(ifp->tcpsocket, + &client->tcplistener); ++ + } else { + isc_socket_t *sock; + +@@ -3580,7 +3818,8 @@ get_client(ns_clientmgr_t *manager, ns_interface_t *ifp, + } + + static isc_result_t +-get_worker(ns_clientmgr_t *manager, ns_interface_t *ifp, isc_socket_t *sock) ++get_worker(ns_clientmgr_t *manager, ns_interface_t *ifp, isc_socket_t *sock, ++ ns_client_t *oldclient) + { + isc_result_t result = ISC_R_SUCCESS; + isc_event_t *ev; +@@ -3588,6 +3827,7 @@ get_worker(ns_clientmgr_t *manager, ns_interface_t *ifp, isc_socket_t *sock) + MTRACE("get worker"); + + REQUIRE(manager != NULL); ++ REQUIRE(oldclient != NULL); + + if (manager->exiting) + return (ISC_R_SHUTTINGDOWN); +@@ -3620,14 +3860,15 @@ get_worker(ns_clientmgr_t *manager, ns_interface_t *ifp, isc_socket_t *sock) + ns_interface_attach(ifp, &client->interface); + client->newstate = client->state = NS_CLIENTSTATE_WORKING; + INSIST(client->recursionquota == NULL); +- client->tcpquota = &ns_g_server->tcpquota; + + client->dscp = ifp->dscp; + + client->attributes |= NS_CLIENTATTR_TCP; +- client->pipelined = true; + client->mortal = true; + ++ tcpconn_attach(oldclient, client); ++ mark_tcp_active(client, true); ++ + isc_socket_attach(ifp->tcpsocket, &client->tcplistener); + isc_socket_attach(sock, &client->tcpsocket); + isc_socket_setname(client->tcpsocket, "worker-tcp", NULL); +diff --git a/bin/named/include/named/client.h b/bin/named/include/named/client.h +index b23a7b1..969ee4c 100644 +--- a/bin/named/include/named/client.h ++++ b/bin/named/include/named/client.h +@@ -9,8 +9,6 @@ + * information regarding copyright ownership. + */ + +-/* $Id: client.h,v 1.96 2012/01/31 23:47:31 tbox Exp $ */ +- + #ifndef NAMED_CLIENT_H + #define NAMED_CLIENT_H 1 + +@@ -80,6 +78,13 @@ + *** Types + ***/ + ++/*% reference-counted TCP connection object */ ++typedef struct ns_tcpconn { ++ isc_refcount_t refs; ++ isc_quota_t *tcpquota; ++ bool pipelined; ++} ns_tcpconn_t; ++ + /*% nameserver client structure */ + struct ns_client { + unsigned int magic; +@@ -94,7 +99,8 @@ struct ns_client { + int nupdates; + int nctls; + int references; +- bool needshutdown; /* ++ bool tcpactive; ++ bool needshutdown; /* + * Used by clienttest to get + * the client to go from + * inactive to free state +@@ -130,10 +136,9 @@ struct ns_client { + isc_stdtime_t now; + isc_time_t tnow; + dns_name_t signername; /*%< [T]SIG key name */ +- dns_name_t * signer; /*%< NULL if not valid sig */ +- bool mortal; /*%< Die after handling request */ +- bool pipelined; /*%< TCP queries not in sequence */ +- isc_quota_t *tcpquota; ++ dns_name_t *signer; /*%< NULL if not valid sig */ ++ bool mortal; /*%< Die after handling request */ ++ ns_tcpconn_t *tcpconn; + isc_quota_t *recursionquota; + ns_interface_t *interface; + +@@ -143,8 +148,8 @@ struct ns_client { + isc_sockaddr_t destsockaddr; + + isc_netaddr_t ecs_addr; /*%< EDNS client subnet */ +- uint8_t ecs_addrlen; +- uint8_t ecs_scope; ++ uint8_t ecs_addrlen; ++ uint8_t ecs_scope; + + struct in6_pktinfo pktinfo; + isc_dscp_t dscp; +diff --git a/bin/named/include/named/interfacemgr.h b/bin/named/include/named/interfacemgr.h +index 7d1883e..3535ef2 100644 +--- a/bin/named/include/named/interfacemgr.h ++++ b/bin/named/include/named/interfacemgr.h +@@ -9,8 +9,6 @@ + * information regarding copyright ownership. + */ + +-/* $Id: interfacemgr.h,v 1.35 2011/07/28 23:47:58 tbox Exp $ */ +- + #ifndef NAMED_INTERFACEMGR_H + #define NAMED_INTERFACEMGR_H 1 + +@@ -77,9 +75,14 @@ struct ns_interface { + /*%< UDP dispatchers. */ + isc_socket_t * tcpsocket; /*%< TCP socket. */ + isc_dscp_t dscp; /*%< "listen-on" DSCP value */ +- int ntcptarget; /*%< Desired number of concurrent +- TCP accepts */ +- int ntcpcurrent; /*%< Current ditto, locked */ ++ int32_t ntcpaccepting; /*%< Number of clients ++ ready to accept new ++ TCP connections on this ++ interface */ ++ int32_t ntcpactive; /*%< Number of clients ++ servicing TCP queries ++ (whether accepting or ++ connected) */ + int nudpdispatch; /*%< Number of UDP dispatches */ + ns_clientmgr_t * clientmgr; /*%< Client manager. */ + ISC_LINK(ns_interface_t) link; +diff --git a/bin/named/interfacemgr.c b/bin/named/interfacemgr.c +index 419927b..d9f6df5 100644 +--- a/bin/named/interfacemgr.c ++++ b/bin/named/interfacemgr.c +@@ -386,8 +386,9 @@ ns_interface_create(ns_interfacemgr_t *mgr, isc_sockaddr_t *addr, + * connections will be handled in parallel even though there is + * only one client initially. + */ +- ifp->ntcptarget = 1; +- ifp->ntcpcurrent = 0; ++ ifp->ntcpaccepting = 0; ++ ifp->ntcpactive = 0; ++ + ifp->nudpdispatch = 0; + + ifp->dscp = -1; +@@ -522,9 +523,7 @@ ns_interface_accepttcp(ns_interface_t *ifp) { + */ + (void)isc_socket_filter(ifp->tcpsocket, "dataready"); + +- result = ns_clientmgr_createclients(ifp->clientmgr, +- ifp->ntcptarget, ifp, +- true); ++ result = ns_clientmgr_createclients(ifp->clientmgr, 1, ifp, true); + if (result != ISC_R_SUCCESS) { + UNEXPECTED_ERROR(__FILE__, __LINE__, + "TCP ns_clientmgr_createclients(): %s", +diff --git a/doc/arm/Bv9ARM-book.xml b/doc/arm/Bv9ARM-book.xml +index 719b074..4b36bd0 100644 +--- a/doc/arm/Bv9ARM-book.xml ++++ b/doc/arm/Bv9ARM-book.xml +@@ -8487,7 +8487,8 @@ avoid-v6-udp-ports { 40000; range 50000 60000; }; + <para> + The number of file descriptors reserved for TCP, stdio, + etc. This needs to be big enough to cover the number of +- interfaces <command>named</command> listens on, <command>tcp-clients</command> as well as ++ interfaces <command>named</command> listens on plus ++ <command>tcp-clients</command>, as well as + to provide room for outgoing TCP queries and incoming zone + transfers. The default is <literal>512</literal>. + The minimum value is <literal>128</literal> and the +diff --git a/lib/isc/include/isc/quota.h b/lib/isc/include/isc/quota.h +index b9bf598..36c5830 100644 +--- a/lib/isc/include/isc/quota.h ++++ b/lib/isc/include/isc/quota.h +@@ -100,6 +100,13 @@ isc_quota_attach(isc_quota_t *quota, isc_quota_t **p); + * quota if successful (ISC_R_SUCCESS or ISC_R_SOFTQUOTA). + */ + ++isc_result_t ++isc_quota_force(isc_quota_t *quota, isc_quota_t **p); ++/*%< ++ * Like isc_quota_attach, but will attach '*p' to the quota ++ * even if the hard quota has been exceeded. ++ */ ++ + void + isc_quota_detach(isc_quota_t **p); + /*%< +diff --git a/lib/isc/quota.c b/lib/isc/quota.c +index 3ddff0d..556a61f 100644 +--- a/lib/isc/quota.c ++++ b/lib/isc/quota.c +@@ -74,20 +74,39 @@ isc_quota_release(isc_quota_t *quota) { + UNLOCK("a->lock); + } + +-isc_result_t +-isc_quota_attach(isc_quota_t *quota, isc_quota_t **p) +-{ ++static isc_result_t ++doattach(isc_quota_t *quota, isc_quota_t **p, bool force) { + isc_result_t result; +- INSIST(p != NULL && *p == NULL); ++ REQUIRE(p != NULL && *p == NULL); ++ + result = isc_quota_reserve(quota); +- if (result == ISC_R_SUCCESS || result == ISC_R_SOFTQUOTA) ++ if (result == ISC_R_SUCCESS || result == ISC_R_SOFTQUOTA) { ++ *p = quota; ++ } else if (result == ISC_R_QUOTA && force) { ++ /* attach anyway */ ++ LOCK("a->lock); ++ quota->used++; ++ UNLOCK("a->lock); ++ + *p = quota; ++ result = ISC_R_SUCCESS; ++ } ++ + return (result); + } + ++isc_result_t ++isc_quota_attach(isc_quota_t *quota, isc_quota_t **p) { ++ return (doattach(quota, p, false)); ++} ++ ++isc_result_t ++isc_quota_force(isc_quota_t *quota, isc_quota_t **p) { ++ return (doattach(quota, p, true)); ++} ++ + void +-isc_quota_detach(isc_quota_t **p) +-{ ++isc_quota_detach(isc_quota_t **p) { + INSIST(p != NULL && *p != NULL); + isc_quota_release(*p); + *p = NULL; +diff --git a/lib/isc/win32/libisc.def.in b/lib/isc/win32/libisc.def.in +index a82face..7b9f23d 100644 +--- a/lib/isc/win32/libisc.def.in ++++ b/lib/isc/win32/libisc.def.in +@@ -519,6 +519,7 @@ isc_portset_removerange + isc_quota_attach + isc_quota_destroy + isc_quota_detach ++isc_quota_force + isc_quota_init + isc_quota_max + isc_quota_release |