diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 23:51:28 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 23:51:28 +0000 |
commit | 754c7de1e91eeb28c6d3766900ead0c3e44fdb85 (patch) | |
tree | 51433c0e38731dc9ecd342555a56f876980b1834 /lib/dns/rbtdb.c | |
parent | Adding debian version 1:9.16.44-1~deb11u1. (diff) | |
download | bind9-754c7de1e91eeb28c6d3766900ead0c3e44fdb85.tar.xz bind9-754c7de1e91eeb28c6d3766900ead0c3e44fdb85.zip |
Merging upstream version 1:9.16.48.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'lib/dns/rbtdb.c')
-rw-r--r-- | lib/dns/rbtdb.c | 153 |
1 files changed, 101 insertions, 52 deletions
diff --git a/lib/dns/rbtdb.c b/lib/dns/rbtdb.c index ee06c51..723d169 100644 --- a/lib/dns/rbtdb.c +++ b/lib/dns/rbtdb.c @@ -116,7 +116,7 @@ struct rbtdb_file_header { typedef uint32_t rbtdb_serial_t; typedef uint32_t rbtdb_rdatatype_t; -#define RBTDB_RDATATYPE_BASE(type) ((dns_rdatatype_t)((type)&0xFFFF)) +#define RBTDB_RDATATYPE_BASE(type) ((dns_rdatatype_t)((type) & 0xFFFF)) #define RBTDB_RDATATYPE_EXT(type) ((dns_rdatatype_t)((type) >> 16)) #define RBTDB_RDATATYPE_VALUE(base, ext) \ ((rbtdb_rdatatype_t)(((uint32_t)ext) << 16) | \ @@ -521,6 +521,10 @@ struct dns_rbtdb { */ rbtnodelist_t *deadnodes; + /* List of nodes from which recursive tree pruning can be started from. + * Locked by tree_lock. */ + rbtnodelist_t prunenodes; + /* * Heaps. These are used for TTL based expiry in a cache, * or for zone resigning in a zone DB. hmctx is the memory @@ -1067,6 +1071,7 @@ free_rbtdb(dns_rbtdb_t *rbtdb, bool log, isc_event_t *event) { unsigned int i; isc_result_t result; char buf[DNS_NAME_FORMATSIZE]; + dns_rbtnode_t *node = NULL; dns_rbt_t **treep; isc_time_t start; @@ -1092,8 +1097,6 @@ free_rbtdb(dns_rbtdb_t *rbtdb, bool log, isc_event_t *event) { * the overhead of unlinking all nodes here should be negligible. */ for (i = 0; i < rbtdb->node_lock_count; i++) { - dns_rbtnode_t *node; - node = ISC_LIST_HEAD(rbtdb->deadnodes[i]); while (node != NULL) { ISC_LIST_UNLINK(rbtdb->deadnodes[i], node, deadlink); @@ -1101,6 +1104,12 @@ free_rbtdb(dns_rbtdb_t *rbtdb, bool log, isc_event_t *event) { } } + node = ISC_LIST_HEAD(rbtdb->prunenodes); + while (node != NULL) { + ISC_LIST_UNLINK(rbtdb->prunenodes, node, prunelink); + node = ISC_LIST_HEAD(rbtdb->prunenodes); + } + if (event == NULL) { rbtdb->quantum = (rbtdb->task != NULL) ? 100 : 0; } @@ -1935,19 +1944,32 @@ is_leaf(dns_rbtnode_t *node) { node->left == NULL && node->right == NULL); } +/*% + * The tree lock must be held when this function is called as it reads and + * updates rbtdb->prunenodes. + */ static void send_to_prune_tree(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node, isc_rwlocktype_t locktype) { - isc_event_t *ev; - dns_db_t *db; + bool pruning_queued = (ISC_LIST_HEAD(rbtdb->prunenodes) != NULL); + + INSIST(locktype == isc_rwlocktype_write); - ev = isc_event_allocate(rbtdb->common.mctx, NULL, DNS_EVENT_RBTPRUNE, - prune_tree, node, sizeof(isc_event_t)); new_reference(rbtdb, node, locktype); - db = NULL; - attach((dns_db_t *)rbtdb, &db); - ev->ev_sender = db; - isc_task_send(rbtdb->task, &ev); + INSIST(!ISC_LINK_LINKED(node, prunelink)); + ISC_LIST_APPEND(rbtdb->prunenodes, node, prunelink); + + if (!pruning_queued) { + isc_event_t *ev = NULL; + dns_db_t *db = NULL; + + attach((dns_db_t *)rbtdb, &db); + + ev = isc_event_allocate(rbtdb->common.mctx, NULL, + DNS_EVENT_RBTPRUNE, prune_tree, db, + sizeof(isc_event_t)); + isc_task_send(rbtdb->task, &ev); + } } /*% @@ -2222,17 +2244,26 @@ restore_locks: } /* - * Prune the tree by recursively cleaning-up single leaves. In the worst - * case, the number of iteration is the number of tree levels, which is at - * most the maximum number of domain name labels, i.e, 127. In practice, this - * should be much smaller (only a few times), and even the worst case would be - * acceptable for a single event. + * Prune the tree by recursively cleaning up single leaves. Go through all + * nodes stored in the rbtdb->prunenodes list; for each of them, in the worst + * case, it will be necessary to traverse a number of tree levels equal to the + * maximum legal number of domain name labels (127); in practice, the number of + * tree levels to traverse will virtually always be much smaller (a few levels + * at most). While holding the tree lock throughout this entire operation is + * less than ideal, so is splitting the latter up by queueing a separate + * prune_tree() run for each node to start pruning from (as queueing requires + * allocating memory and can therefore potentially be exploited to exhaust + * available memory). Also note that actually freeing up the memory used by + * RBTDB nodes (which is what this function does) is essential to keeping cache + * memory use in check, so since the tree lock needs to be acquired anyway, + * freeing as many nodes as possible before the tree lock gets released is + * prudent. */ static void prune_tree(isc_task_t *task, isc_event_t *event) { - dns_rbtdb_t *rbtdb = event->ev_sender; - dns_rbtnode_t *node = event->ev_arg; - dns_rbtnode_t *parent; + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)event->ev_arg; + dns_rbtnode_t *node = NULL; + dns_rbtnode_t *parent = NULL; unsigned int locknum; UNUSED(task); @@ -2240,44 +2271,60 @@ prune_tree(isc_task_t *task, isc_event_t *event) { isc_event_free(&event); RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write); - locknum = node->locknum; - NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write); - do { - parent = node->parent; - decrement_reference(rbtdb, node, 0, isc_rwlocktype_write, - isc_rwlocktype_write, true); - if (parent != NULL && parent->down == NULL) { - /* - * node was the only down child of the parent and has - * just been removed. We'll then need to examine the - * parent. Keep the lock if possible; otherwise, - * release the old lock and acquire one for the parent. - */ - if (parent->locknum != locknum) { - NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, - isc_rwlocktype_write); - locknum = parent->locknum; - NODE_LOCK(&rbtdb->node_locks[locknum].lock, - isc_rwlocktype_write); + while ((node = ISC_LIST_HEAD(rbtdb->prunenodes)) != NULL) { + locknum = node->locknum; + NODE_LOCK(&rbtdb->node_locks[locknum].lock, + isc_rwlocktype_write); + do { + if (ISC_LINK_LINKED(node, prunelink)) { + ISC_LIST_UNLINK(rbtdb->prunenodes, node, + prunelink); } - /* - * We need to gain a reference to the node before - * decrementing it in the next iteration. - */ - if (ISC_LINK_LINKED(parent, deadlink)) { - ISC_LIST_UNLINK(rbtdb->deadnodes[locknum], + parent = node->parent; + decrement_reference(rbtdb, node, 0, + isc_rwlocktype_write, + isc_rwlocktype_write, true); + + if (parent != NULL && parent->down == NULL) { + /* + * node was the only down child of the parent + * and has just been removed. We'll then need + * to examine the parent. Keep the lock if + * possible; otherwise, release the old lock and + * acquire one for the parent. + */ + if (parent->locknum != locknum) { + NODE_UNLOCK( + &rbtdb->node_locks[locknum].lock, + isc_rwlocktype_write); + locknum = parent->locknum; + NODE_LOCK( + &rbtdb->node_locks[locknum].lock, + isc_rwlocktype_write); + } + + /* + * We need to gain a reference to the node + * before decrementing it in the next iteration. + */ + if (ISC_LINK_LINKED(parent, deadlink)) { + ISC_LIST_UNLINK( + rbtdb->deadnodes[locknum], parent, deadlink); + } + new_reference(rbtdb, parent, + isc_rwlocktype_write); + } else { + parent = NULL; } - new_reference(rbtdb, parent, isc_rwlocktype_write); - } else { - parent = NULL; - } - node = parent; - } while (node != NULL); - NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write); + node = parent; + } while (node != NULL); + NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, + isc_rwlocktype_write); + } RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write); detach((dns_db_t **)&rbtdb); @@ -8735,6 +8782,8 @@ dns_rbtdb_create(isc_mem_t *mctx, const dns_name_t *origin, dns_dbtype_t type, ISC_LIST_INIT(rbtdb->deadnodes[i]); } + ISC_LIST_INIT(rbtdb->prunenodes); + rbtdb->active = rbtdb->node_lock_count; for (i = 0; i < (int)(rbtdb->node_lock_count); i++) { @@ -10070,7 +10119,7 @@ rehash_bits(rbtdb_version_t *version, size_t newcount) { uint32_t newbits = oldbits; while (newcount >= HASHSIZE(newbits) && - newbits <= RBTDB_GLUE_TABLE_MAX_BITS) + newbits < RBTDB_GLUE_TABLE_MAX_BITS) { newbits += 1; } |