diff options
Diffstat (limited to 'modules/stats')
-rw-r--r-- | modules/stats/.packaging/test.config | 4 | ||||
-rw-r--r-- | modules/stats/README.rst | 211 | ||||
-rw-r--r-- | modules/stats/meson.build | 25 | ||||
-rw-r--r-- | modules/stats/stats.c | 534 | ||||
-rw-r--r-- | modules/stats/test.integr/deckard.yaml | 12 | ||||
-rw-r--r-- | modules/stats/test.integr/kresd_config.j2 | 114 | ||||
-rw-r--r-- | modules/stats/test.integr/stats.rpl | 194 |
7 files changed, 1094 insertions, 0 deletions
diff --git a/modules/stats/.packaging/test.config b/modules/stats/.packaging/test.config new file mode 100644 index 0000000..fd25460 --- /dev/null +++ b/modules/stats/.packaging/test.config @@ -0,0 +1,4 @@ +-- SPDX-License-Identifier: GPL-3.0-or-later +modules.load('stats') +assert(stats) +quit() diff --git a/modules/stats/README.rst b/modules/stats/README.rst new file mode 100644 index 0000000..7d423aa --- /dev/null +++ b/modules/stats/README.rst @@ -0,0 +1,211 @@ +.. SPDX-License-Identifier: GPL-3.0-or-later + +.. _mod-stats: + +Statistics collector +==================== + +Module ``stats`` gathers various counters from the query resolution +and server internals, and offers them as a key-value storage. +These metrics can be either exported to :ref:`mod-graphite`, +exposed as :ref:`mod-http-prometheus`, or processed using user-provided script +as described in chapter :ref:`async-events`. + +.. note:: Please remember that each Knot Resolver instance keeps its own + statistics, and instances can be started and stopped dynamically. This might + affect your data postprocessing procedures if you are using + :ref:`systemd-multiple-instances`. + +.. _mod-stats-list: + +Built-in statistics +------------------- + +Built-in counters keep track of number of queries and answers matching specific criteria. + ++-----------------------------------------------------------------+ +| **Global request counters** | ++------------------+----------------------------------------------+ +| request.total | total number of DNS requests | +| | (including internal client requests) | ++------------------+----------------------------------------------+ +| request.internal | internal requests generated by Knot Resolver | +| | (e.g. DNSSEC trust anchor updates) | ++------------------+----------------------------------------------+ +| request.udp | external requests received over plain UDP | +| | (:rfc:`1035`) | ++------------------+----------------------------------------------+ +| request.tcp | external requests received over plain TCP | +| | (:rfc:`1035`) | ++------------------+----------------------------------------------+ +| request.dot | external requests received over | +| | DNS-over-TLS (:rfc:`7858`) | ++------------------+----------------------------------------------+ +| request.doh | external requests received over | +| | DNS-over-HTTP (:rfc:`8484`) | ++------------------+----------------------------------------------+ +| request.xdp | external requests received over plain UDP | +| | via an AF_XDP socket | ++------------------+----------------------------------------------+ + ++----------------------------------------------------+ +| **Global answer counters** | ++-----------------+----------------------------------+ +| answer.total | total number of answered queries | ++-----------------+----------------------------------+ +| answer.cached | queries answered from cache | ++-----------------+----------------------------------+ + ++-----------------+----------------------------------+ +| **Answers categorized by RCODE** | ++-----------------+----------------------------------+ +| answer.noerror | NOERROR answers | ++-----------------+----------------------------------+ +| answer.nodata | NOERROR, but empty answers | ++-----------------+----------------------------------+ +| answer.nxdomain | NXDOMAIN answers | ++-----------------+----------------------------------+ +| answer.servfail | SERVFAIL answers | ++-----------------+----------------------------------+ + ++-----------------+----------------------------------+ +| **Answer latency** | ++-----------------+----------------------------------+ +| answer.1ms | completed in 1ms | ++-----------------+----------------------------------+ +| answer.10ms | completed in 10ms | ++-----------------+----------------------------------+ +| answer.50ms | completed in 50ms | ++-----------------+----------------------------------+ +| answer.100ms | completed in 100ms | ++-----------------+----------------------------------+ +| answer.250ms | completed in 250ms | ++-----------------+----------------------------------+ +| answer.500ms | completed in 500ms | ++-----------------+----------------------------------+ +| answer.1000ms | completed in 1000ms | ++-----------------+----------------------------------+ +| answer.1500ms | completed in 1500ms | ++-----------------+----------------------------------+ +| answer.slow | completed in more than 1500ms | ++-----------------+----------------------------------+ + ++-----------------+----------------------------------+ +| **Answer flags** | ++-----------------+----------------------------------+ +| answer.aa | authoritative answer | ++-----------------+----------------------------------+ +| answer.tc | truncated answer | ++-----------------+----------------------------------+ +| answer.ra | recursion available | ++-----------------+----------------------------------+ +| answer.rd | recursion desired (in answer!) | ++-----------------+----------------------------------+ +| answer.ad | authentic data (DNSSEC) | ++-----------------+----------------------------------+ +| answer.cd | checking disabled (DNSSEC) | ++-----------------+----------------------------------+ +| answer.do | DNSSEC answer OK | ++-----------------+----------------------------------+ +| answer.edns0 | EDNS0 present | ++-----------------+----------------------------------+ + ++-----------------+----------------------------------+ +| **Query flags** | ++-----------------+----------------------------------+ +| query.edns | queries with EDNS present | ++-----------------+----------------------------------+ +| query.dnssec | queries with DNSSEC DO=1 | ++-----------------+----------------------------------+ + +Example: + +.. code-block:: none + + modules.load('stats') + + -- Enumerate metrics + > stats.list() + [answer.cached] => 486178 + [iterator.tcp] => 490 + [answer.noerror] => 507367 + [answer.total] => 618631 + [iterator.udp] => 102408 + [query.concurrent] => 149 + + -- Query metrics by prefix + > stats.list('iter') + [iterator.udp] => 105104 + [iterator.tcp] => 490 + + -- Fetch most common queries + > stats.frequent() + [1] => { + [type] => 2 + [count] => 4 + [name] => cz. + } + + -- Fetch most common queries (sorted by frequency) + > table.sort(stats.frequent(), function (a, b) return a.count > b.count end) + + -- Show recently contacted authoritative servers + > stats.upstreams() + [2a01:618:404::1] => { + [1] => 26 -- RTT + } + [128.241.220.33] => { + [1] => 31 - RTT + } + + -- Set custom metrics from modules + > stats['filter.match'] = 5 + > stats['filter.match'] + 5 + +Module reference +---------------- + +.. function:: stats.get(key) + + :param string key: i.e. ``"answer.total"`` + :return: ``number`` + +Return nominal value of given metric. + +.. function:: stats.set('key val') + +Set nominal value of given metric. + +Example: + +.. code-block:: lua + + stats.set('answer.total 5') + -- or syntactic sugar + stats['answer.total'] = 5 + + +.. function:: stats.list([prefix]) + + :param string prefix: optional metric prefix, i.e. ``"answer"`` shows only metrics beginning with "answer" + +Outputs collected metrics as a JSON dictionary. + +.. function:: stats.upstreams() + +Outputs a list of recent upstreams and their RTT. It is sorted by time and stored in a ring buffer of +a fixed size. This means it's not aggregated and readable by multiple consumers, but also that +you may lose entries if you don't read quickly enough. The default ring size is 512 entries, and may be overridden on compile time by ``-DUPSTREAMS_COUNT=X``. + +.. function:: stats.frequent() + +Outputs list of most frequent iterative queries as a JSON array. The queries are sampled probabilistically, +and include subrequests. The list maximum size is 5000 entries, make diffs if you want to track it over time. + +.. function:: stats.clear_frequent() + +Clear the list of most frequent iterative queries. + +.. include:: ../modules/graphite/README.rst +.. include:: ../modules/http/prometheus.rst diff --git a/modules/stats/meson.build b/modules/stats/meson.build new file mode 100644 index 0000000..4f2d41e --- /dev/null +++ b/modules/stats/meson.build @@ -0,0 +1,25 @@ +# SPDX-License-Identifier: GPL-3.0-or-later +# C module: stats + +stats_src = files([ + 'stats.c', +]) +c_src_lint += stats_src + +integr_tests += [ + ['stats', meson.current_source_dir() / 'test.integr'], +] + + +stats_mod = shared_module( + 'stats', + stats_src, + dependencies: [ + libknot, + luajit_inc, + ], + include_directories: mod_inc_dir, + name_prefix: '', + install: true, + install_dir: modules_dir, +) diff --git a/modules/stats/stats.c b/modules/stats/stats.c new file mode 100644 index 0000000..ebb2877 --- /dev/null +++ b/modules/stats/stats.c @@ -0,0 +1,534 @@ +/* Copyright (C) CZ.NIC, z.s.p.o. <knot-resolver@labs.nic.cz> + * SPDX-License-Identifier: GPL-3.0-or-later + */ + +/** + * @file stats.c + * @brief Storage for various counters and metrics from query resolution. + * + * You can either reuse this module to compute statistics or store custom metrics + * in it via the extensions. + */ + +#include <libknot/packet/pkt.h> +#include <libknot/packet/wire.h> +#include <libknot/descriptor.h> +#include <ccan/json/json.h> +#include <contrib/cleanup.h> +#include <arpa/inet.h> +#include <lua.h> + +#include "lib/generic/trie.h" +#include "lib/layer/iterate.h" +#include "lib/rplan.h" +#include "lib/module.h" +#include "lib/layer.h" +#include "lib/resolve.h" + +/* Defaults */ +#define VERBOSE_MSG(qry, ...) kr_log_q(qry, STATISTICS, __VA_ARGS__) +#define FREQUENT_PSAMPLE 10 /* Sampling rate, 1 in N */ +#ifdef LRU_REP_SIZE + #define FREQUENT_COUNT LRU_REP_SIZE /* Size of frequent tables */ +#else + #define FREQUENT_COUNT 5000 /* Size of frequent tables */ +#endif +#ifndef UPSTREAMS_COUNT + #define UPSTREAMS_COUNT 512 /* Size of recent upstreams */ +#endif + +/** @cond internal Fixed-size map of predefined metrics. */ +#define CONST_METRICS(X) \ + X(answer,total) X(answer,noerror) X(answer,nodata) X(answer,nxdomain) X(answer,servfail) \ + X(answer,cached) X(answer,1ms) X(answer,10ms) X(answer,50ms) X(answer,100ms) \ + X(answer,250ms) X(answer,500ms) X(answer,1000ms) X(answer,1500ms) X(answer,slow) \ + X(answer,aa) X(answer,tc) X(answer,rd) X(answer,ra) X(answer, ad) X(answer,cd) \ + X(answer,edns0) X(answer,do) \ + X(query,edns) X(query,dnssec) \ + X(request,total) X(request,udp) X(request,tcp) X(request,xdp) \ + X(request,dot) X(request,doh) X(request,internal) \ + X(const,end) + +enum const_metric { + #define X(a,b) metric_ ## a ## _ ## b, + CONST_METRICS(X) + #undef X +}; +struct const_metric_elm { + const char *key; + size_t val; +}; +static struct const_metric_elm const_metrics[] = { + #define X(a,b) [metric_ ## a ## _ ## b] = { #a "." #b, 0 }, + CONST_METRICS(X) + #undef X +}; +/** @endcond */ + +/** @internal LRU hash of most frequent names. */ +typedef lru_t(unsigned) namehash_t; +typedef array_t(struct sockaddr_in6) addrlist_t; + +/** @internal Stats data structure. */ +struct stat_data { + trie_t *trie; + struct { + namehash_t *frequent; + } queries; + struct { + addrlist_t q; + size_t head; + } upstreams; +}; + +/** @internal We don't store/publish port, repurpose it for RTT instead. */ +#define sin6_rtt sin6_port + +/** @internal Add to const map counter */ +static inline void stat_const_add(struct stat_data *data, enum const_metric key, ssize_t incr) +{ + const_metrics[key].val += incr; +} + +static int collect_answer(struct stat_data *data, knot_pkt_t *pkt) +{ + stat_const_add(data, metric_answer_total, 1); + /* Count per-rcode */ + switch(knot_wire_get_rcode(pkt->wire)) { + case KNOT_RCODE_NOERROR: + if (knot_wire_get_ancount(pkt->wire) > 0) + stat_const_add(data, metric_answer_noerror, 1); + else + stat_const_add(data, metric_answer_nodata, 1); + break; + case KNOT_RCODE_NXDOMAIN: stat_const_add(data, metric_answer_nxdomain, 1); break; + case KNOT_RCODE_SERVFAIL: stat_const_add(data, metric_answer_servfail, 1); break; + default: break; + } + + return kr_ok(); +} + +static inline int collect_key(char *key, const knot_dname_t *name, uint16_t type) +{ + memcpy(key, &type, sizeof(type)); + int key_len = knot_dname_to_wire((uint8_t *)key + sizeof(type), name, KNOT_DNAME_MAXLEN); + if (key_len < 0) { + return kr_error(key_len); + } + return key_len + sizeof(type); +} + +static void collect_sample(struct stat_data *data, struct kr_rplan *rplan) +{ + /* Sample key = {[2] type, [1-255] owner} */ + char key[sizeof(uint16_t) + KNOT_DNAME_MAXLEN]; + for (size_t i = 0; i < rplan->resolved.len; ++i) { + /* Sample queries leading to iteration */ + struct kr_query *qry = rplan->resolved.at[i]; + if (qry->flags.CACHED) { + continue; + } + /* Consider 1 in N for frequent sampling. + * TODO: redesign the sampling approach. */ + if (kr_rand_coin(1, FREQUENT_PSAMPLE)) { + int key_len = collect_key(key, qry->sname, qry->stype); + if (kr_fails_assert(key_len >= 0)) + continue; + unsigned *count = lru_get_new(data->queries.frequent, key, key_len, NULL); + if (count) + *count += 1; + } + } +} + +static int collect_rtt(kr_layer_t *ctx, knot_pkt_t *pkt) +{ + struct kr_request *req = ctx->req; + struct kr_query *qry = req->current_query; + if (qry->flags.CACHED || !req->upstream.transport) { + return ctx->state; + } + + /* Push address and RTT to the ring buffer head */ + struct kr_module *module = ctx->api->data; + struct stat_data *data = module->data; + + /* Socket address is encoded into sockaddr_in6 struct that + * unions with sockaddr_in and differ in sa_family */ + struct sockaddr_in6 *e = &data->upstreams.q.at[data->upstreams.head]; + const union kr_sockaddr *src = &req->upstream.transport->address; + switch (src->ip.sa_family) { + case AF_INET: memcpy(e, &src->ip4, sizeof(src->ip4)); break; + case AF_INET6: memcpy(e, &src->ip6, sizeof(src->ip6)); break; + default: return ctx->state; + } + /* Replace port number with the RTT information (cap is UINT16_MAX milliseconds) */ + e->sin6_rtt = req->upstream.rtt; + + /* Advance ring buffer head */ + data->upstreams.head = (data->upstreams.head + 1) % UPSTREAMS_COUNT; + return ctx->state; +} + +static int collect_transport(kr_layer_t *ctx) +{ + struct kr_request *req = ctx->req; + struct kr_module *module = ctx->api->data; + struct stat_data *data = module->data; + + stat_const_add(data, metric_request_total, 1); + if (req->qsource.dst_addr == NULL) { + stat_const_add(data, metric_request_internal, 1); + return ctx->state; + } + + /** + * Count each transport only once, + * i.e. DoT does not count as TCP and XDP does not count as UDP. + */ + if (req->qsource.flags.http) + stat_const_add(data, metric_request_doh, 1); + else if (req->qsource.flags.tls) + stat_const_add(data, metric_request_dot, 1); + else if (req->qsource.flags.tcp) + stat_const_add(data, metric_request_tcp, 1); + else if (req->qsource.flags.xdp) + stat_const_add(data, metric_request_xdp, 1); + else + stat_const_add(data, metric_request_udp, 1); + return ctx->state; +} + +static int collect(kr_layer_t *ctx) +{ + struct kr_request *param = ctx->req; + struct kr_module *module = ctx->api->data; + struct kr_rplan *rplan = ¶m->rplan; + struct stat_data *data = module->data; + + collect_sample(data, rplan); + if (!param->answer) { + /* The answer is being dropped. TODO: perhaps add some stat for this? */ + return ctx->state; + } + + /* Collect data on final answer */ + collect_answer(data, param->answer); + /* Count cached and unresolved */ + if (rplan->resolved.len > 0) { + /* Histogram of answer latency. */ + struct kr_query *first = rplan->resolved.at[0]; + uint64_t elapsed = kr_now() - first->timestamp_mono; + if (elapsed <= 1) { + stat_const_add(data, metric_answer_1ms, 1); + } else if (elapsed <= 10) { + stat_const_add(data, metric_answer_10ms, 1); + } else if (elapsed <= 50) { + stat_const_add(data, metric_answer_50ms, 1); + } else if (elapsed <= 100) { + stat_const_add(data, metric_answer_100ms, 1); + } else if (elapsed <= 250) { + stat_const_add(data, metric_answer_250ms, 1); + } else if (elapsed <= 500) { + stat_const_add(data, metric_answer_500ms, 1); + } else if (elapsed <= 1000) { + stat_const_add(data, metric_answer_1000ms, 1); + } else if (elapsed <= 1500) { + stat_const_add(data, metric_answer_1500ms, 1); + } else { + stat_const_add(data, metric_answer_slow, 1); + } + /* Observe the final query. */ + struct kr_query *last = kr_rplan_last(rplan); + stat_const_add(data, metric_answer_cached, last->flags.CACHED); + } + + /* Keep stats of all response header flags; + * these don't return bool, so that's why we use !! */ + stat_const_add(data, metric_answer_aa, !!knot_wire_get_aa(param->answer->wire)); + stat_const_add(data, metric_answer_tc, !!knot_wire_get_tc(param->answer->wire)); + stat_const_add(data, metric_answer_rd, !!knot_wire_get_rd(param->answer->wire)); + stat_const_add(data, metric_answer_ra, !!knot_wire_get_ra(param->answer->wire)); + stat_const_add(data, metric_answer_ad, !!knot_wire_get_ad(param->answer->wire)); + stat_const_add(data, metric_answer_cd, !!knot_wire_get_cd(param->answer->wire)); + + /* EDNS0 stats */ + stat_const_add(data, metric_answer_edns0, knot_pkt_has_edns(param->answer)); + stat_const_add(data, metric_answer_do, knot_pkt_has_dnssec(param->answer)); + + /* Query parameters and transport mode */ + /* + DEPRECATED + use new names metric_answer_edns0 and metric_answer_do + */ + stat_const_add(data, metric_query_edns, knot_pkt_has_edns(param->answer)); + stat_const_add(data, metric_query_dnssec, knot_pkt_has_dnssec(param->answer)); + + return ctx->state; +} + +/** + * Set nominal value of a key. + * + * Input: { key, val } + * + */ +static char* stats_set(void *env, struct kr_module *module, const char *args) +{ + if (args == NULL) + return NULL; + + struct stat_data *data = module->data; + + auto_free char *pair = strdup(args); + char *val = strchr(pair, ' '); + if (val) { + *val = '\0'; + size_t number = strtoul(val + 1, NULL, 10); + for (unsigned i = 0; i < metric_const_end; ++i) { + if (strcmp(const_metrics[i].key, pair) == 0) { + const_metrics[i].val = number; + return NULL; + } + } + trie_val_t *trie_val = trie_get_ins(data->trie, pair, strlen(pair)); + *trie_val = (void *)number; + } + + return NULL; +} + +/** + * Retrieve metrics by key. + * + * Input: string key + * Output: number value + */ +static char* stats_get(void *env, struct kr_module *module, const char *args) +{ + if (args == NULL) + return NULL; + + struct stat_data *data = module->data; + + /* Expecting CHAR_BIT to be 8, this is a safe bet */ + char *ret = malloc(3 * sizeof(size_t) + 2); + if (!ret) { + return NULL; + } + + /* Check if it exists in const map. */ + for (unsigned i = 0; i < metric_const_end; ++i) { + if (strcmp(const_metrics[i].key, args) == 0) { + sprintf(ret, "%zu", const_metrics[i].val); + return ret; + } + } + /* Check in variable map */ + trie_val_t *val = trie_get_try(data->trie, args, strlen(args)); + if (!val) { + free(ret); + return NULL; + } + sprintf(ret, "%zu", (size_t) *val); + return ret; +} + +/** Checks whether: + * - `key` starts with `prefix`; OR + * - The prefix is a wildcard, which is indicated by `prefix_len` being zero. */ +static inline bool key_matches_prefix(const char *key, size_t key_len, + const char *prefix, size_t prefix_len) +{ + return prefix_len == 0 || (prefix_len <= key_len && memcmp(key, prefix, prefix_len) == 0); +} + +struct list_entry_context { + JsonNode *root; /**< JSON object into which matching entries will be inserted. */ + const char *key_prefix; /**< The prefix against which entries will be matched. */ + size_t key_prefix_len; /**< Prefix length. Prefix is a wildcard if zero. */ +}; + +/** Inserts the entry with a matching key into the JSON object. */ +static int list_entry(const char *key, uint32_t key_len, trie_val_t *val, void *baton) +{ + struct list_entry_context *ctx = baton; + if (!key_matches_prefix(key, key_len, ctx->key_prefix, ctx->key_prefix_len)) + return 0; + size_t number = (size_t) *val; + auto_free char *key_nt = strndup(key, key_len); + json_append_member(ctx->root, key_nt, json_mknumber(number)); + return 0; +} + +/** + * List observed metrics. + * + * Output: { key: val, ... } + */ +static char* stats_list(void *env, struct kr_module *module, const char *args) +{ + JsonNode *root = json_mkobject(); + /* Walk const metrics map */ + size_t args_len = args ? strlen(args) : 0; + for (unsigned i = 0; i < metric_const_end; ++i) { + struct const_metric_elm *elm = &const_metrics[i]; + if (!args || strncmp(elm->key, args, args_len) == 0) { + json_append_member(root, elm->key, json_mknumber(elm->val)); + } + } + struct list_entry_context ctx = { + .root = root, + .key_prefix = args, + .key_prefix_len = args_len + }; + struct stat_data *data = module->data; + trie_apply_with_key(data->trie, list_entry, &ctx); + char *ret = json_encode(root); + json_delete(root); + return ret; +} + +/** @internal Helper for dump_list: add a single namehash_t item to JSON. */ +static enum lru_apply_do dump_value(const char *key, uint len, unsigned *val, void *baton) +{ + uint16_t key_type = 0; + /* Extract query name, type and counter */ + memcpy(&key_type, key, sizeof(key_type)); + KR_DNAME_GET_STR(key_name, (uint8_t *)key + sizeof(key_type)); + KR_RRTYPE_GET_STR(type_str, key_type); + + /* Convert to JSON object */ + JsonNode *json_val = json_mkobject(); + json_append_member(json_val, "count", json_mknumber(*val)); + json_append_member(json_val, "name", json_mkstring(key_name)); + json_append_member(json_val, "type", json_mkstring(type_str)); + json_append_element((JsonNode *)baton, json_val); + return LRU_APPLY_DO_NOTHING; // keep the item +} +/** + * List frequent names. + * + * Output: [{ count: <counter>, name: <qname>, type: <qtype>}, ... ] + */ +static char* dump_list(void *env, struct kr_module *module, const char *args, namehash_t *table) +{ + if (!table) { + return NULL; + } + JsonNode *root = json_mkarray(); + lru_apply(table, dump_value, root); + char *ret = json_encode(root); + json_delete(root); + return ret; +} + +static char* dump_frequent(void *env, struct kr_module *module, const char *args) +{ + struct stat_data *data = module->data; + return dump_list(env, module, args, data->queries.frequent); +} + +static char* clear_frequent(void *env, struct kr_module *module, const char *args) +{ + struct stat_data *data = module->data; + lru_reset(data->queries.frequent); + return NULL; +} + +static char* dump_upstreams(void *env, struct kr_module *module, const char *args) +{ + struct stat_data *data = module->data; + if (!data) { + return NULL; + } + + /* Walk the ring backwards until AF_UNSPEC or we hit head. */ + JsonNode *root = json_mkobject(); + size_t head = data->upstreams.head; + for (size_t i = 1; i < UPSTREAMS_COUNT; ++i) { + size_t h = (UPSTREAMS_COUNT + head - i) % UPSTREAMS_COUNT; + struct sockaddr_in6 *e = &data->upstreams.q.at[h]; + if (e->sin6_family == AF_UNSPEC) { + break; + } + /* Convert address to string */ + char addr_str[INET6_ADDRSTRLEN]; + const char *ret = inet_ntop(e->sin6_family, kr_inaddr((const struct sockaddr *)e), addr_str, sizeof(addr_str)); + if (!ret) { + break; + } + /* Append to map with an array encoding RTTs */ + JsonNode *json_val = json_find_member(root, addr_str); + if (!json_val) { + json_val = json_mkarray(); + json_append_member(root, addr_str, json_val); + } + json_append_element(json_val, json_mknumber(e->sin6_rtt)); + } + + /* Encode and return */ + char *ret = json_encode(root); + json_delete(root); + return ret; +} + +KR_EXPORT +int stats_init(struct kr_module *module) +{ + static kr_layer_api_t layer = { + .consume = &collect_rtt, + .finish = &collect, + .begin = &collect_transport, + }; + /* Store module reference */ + layer.data = module; + module->layer = &layer; + + static const struct kr_prop props[] = { + { &stats_set, "set", "Set {key, val} metrics.", }, + { &stats_get, "get", "Get metrics for given key.", }, + { &stats_list, "list", "List observed metrics.", }, + { &dump_frequent, "frequent", "List most frequent queries.", }, + { &clear_frequent,"clear_frequent", "Clear frequent queries log.", }, + { &dump_upstreams, "upstreams", "List recently seen authoritatives.", }, + { NULL, NULL, NULL } + }; + module->props = props; + + struct stat_data *data = calloc(1, sizeof(*data)); + if (!data) { + return kr_error(ENOMEM); + } + data->trie = trie_create(NULL); + module->data = data; + lru_create(&data->queries.frequent, FREQUENT_COUNT, NULL, NULL); + /* Initialize ring buffer of recently visited upstreams */ + array_init(data->upstreams.q); + if (array_reserve(data->upstreams.q, UPSTREAMS_COUNT) != 0) { + return kr_error(ENOMEM); + } + data->upstreams.q.len = UPSTREAMS_COUNT; /* signify we use the entries */ + for (size_t i = 0; i < UPSTREAMS_COUNT; ++i) { + data->upstreams.q.at[i].sin6_family = AF_UNSPEC; + } + return kr_ok(); +} + +KR_EXPORT +int stats_deinit(struct kr_module *module) +{ + struct stat_data *data = module->data; + if (data) { + trie_free(data->trie); + lru_free(data->queries.frequent); + array_clear(data->upstreams.q); + free(data); + } + return kr_ok(); +} + +KR_MODULE_EXPORT(stats) + +#undef VERBOSE_MSG diff --git a/modules/stats/test.integr/deckard.yaml b/modules/stats/test.integr/deckard.yaml new file mode 100644 index 0000000..6dd0c22 --- /dev/null +++ b/modules/stats/test.integr/deckard.yaml @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: GPL-3.0-or-later +programs: +- name: kresd + binary: kresd + additional: + - --noninteractive + templates: + - modules/stats/test.integr/kresd_config.j2 + - tests/integration/hints_zone.j2 + configs: + - config + - hints diff --git a/modules/stats/test.integr/kresd_config.j2 b/modules/stats/test.integr/kresd_config.j2 new file mode 100644 index 0000000..4db7caa --- /dev/null +++ b/modules/stats/test.integr/kresd_config.j2 @@ -0,0 +1,114 @@ +-- SPDX-License-Identifier: GPL-3.0-or-later +local ffi = require('ffi') + +{% raw %} +modules.load('stats') + +FWD_TARGET = policy.FORWARD('192.0.2.1') + +function check_stats(got) + log_info(ffi.C.LOG_GRP_TESTS, 'checking if stat values match expected values:') + local expected = { + ['answer.cd'] = 2, + ['answer.cached'] = 1, + ['answer.nodata'] = 1, + ['answer.noerror'] = 2, + ['answer.nxdomain'] = 1, + ['answer.servfail'] = 2, + ['answer.edns0'] = 6, + ['answer.ra'] = 6, + ['answer.rd'] = 5, + ['answer.do'] = 1, + ['answer.ad'] = 0, + ['answer.tc'] = 0, + ['answer.aa'] = 0, + ['answer.total'] = 6 + } + print(table_print(expected)) + + local ok = true + for key, expval in pairs(expected) do + if got[key] ~= expval then + log_info(ffi.C.LOG_GRP_TESTS, + 'ERROR: stats key ' .. key + .. ' has unexpected value' + .. ' (expected ' .. tostring(expval) + .. ' got ' .. tostring(got[key] .. ')')) + ok = false + end + end + if ok then + log_info(ffi.C.LOG_GRP_TESTS, 'no problem found') + return FWD_TARGET + else + return policy.DENY_MSG('Stats test failure') + end +end + +function reply_result(state, req) + local got = stats.list() + log_info(ffi.C.LOG_GRP_TESTS, 'current stats.list() values:') + print(table_print(got)) + local result = check_stats(got) + return result(state, req) +end +policy.add(policy.pattern(reply_result, 'stats.test.')) +policy.add(policy.all(FWD_TARGET)) -- avoid iteration + +-- make sure DNSSEC is turned off for tests +trust_anchors.remove('.') + +-- Enable queries without RD bit +pcall(modules.unload, 'refuse_nord') + +-- Disable RFC5011 TA update +if ta_update then + modules.unload('ta_update') +end + +-- Disable RFC8145 signaling, scenario doesn't provide expected answers +if ta_signal_query then + modules.unload('ta_signal_query') +end + +-- Disable RFC8109 priming, scenario doesn't provide expected answers +if priming then + modules.unload('priming') +end + +-- Disable this module because it make one priming query +if detect_time_skew then + modules.unload('detect_time_skew') +end + +_hint_root_file('hints') +cache.size = 2*MB +log_level('debug') +{% endraw %} + +net = { '{{SELF_ADDR}}' } + + +{% if QMIN == "false" %} +option('NO_MINIMIZE', true) +{% else %} +option('NO_MINIMIZE', false) +{% endif %} + + +-- Self-checks on globals +assert(help() ~= nil) +assert(worker.id ~= nil) +-- Self-checks on facilities +assert(cache.count() == 0) +assert(cache.stats() ~= nil) +assert(cache.backends() ~= nil) +assert(worker.stats() ~= nil) +assert(net.interfaces() ~= nil) +-- Self-checks on loaded stuff +assert(net.list()[1].transport.ip == '{{SELF_ADDR}}') +assert(#modules.list() > 0) +-- Self-check timers +ev = event.recurrent(1 * sec, function (ev) return 1 end) +event.cancel(ev) +ev = event.after(0, function (ev) return 1 end) diff --git a/modules/stats/test.integr/stats.rpl b/modules/stats/test.integr/stats.rpl new file mode 100644 index 0000000..ecab062 --- /dev/null +++ b/modules/stats/test.integr/stats.rpl @@ -0,0 +1,194 @@ +; SPDX-License-Identifier: GPL-3.0-or-later + trust-anchor: "example. DNSKEY 257 3 7 AwEAAcUlFV1vhmqx6NSOUOq2R/dsR7Xm3upJ ( j7IommWSpJABVfW8Q0rOvXdM6kzt+TAu92L9 AbsUdblMFin8CVF3n4s= )" +CONFIG_END + +SCENARIO_BEGIN Test stats module + +RANGE_BEGIN 0 100 + ADDRESS 192.0.2.1 + +ENTRY_BEGIN +REPLY QR RA RD CD NOERROR +MATCH opcode question rcode +ADJUST copy_id +SECTION QUESTION +cd.test. IN TXT +SECTION ANSWER +cd.test. IN TXT "CD is set" +ENTRY_END + +ENTRY_BEGIN +REPLY QR RA RD CD NOERROR +MATCH opcode question rcode +ADJUST copy_id +SECTION QUESTION +nodata.test. IN TXT +ENTRY_END + +ENTRY_BEGIN +REPLY QR RA RD CD NXDOMAIN +MATCH opcode question +ADJUST copy_id +SECTION QUESTION +nxdomain.test. IN TXT +ENTRY_END + +; failing DNSSEC-signed subdomain +ENTRY_BEGIN +REPLY QR RA RD CD SERVFAIL +MATCH opcode subdomain +ADJUST copy_id copy_query +SECTION QUESTION +bogus.test. IN TXT +ENTRY_END + +; query for this name triggers check in Lua config +ENTRY_BEGIN +REPLY QR RA RD CD NOERROR +MATCH opcode question rcode +ADJUST copy_id +SECTION QUESTION +stats.test. IN TXT +SECTION ANSWER +stats.test. IN TXT "Ok, trigger query was not intercepted!" +ENTRY_END + +ENTRY_BEGIN +REPLY QR RD RA CD TC NOERROR +MATCH opcode question rcode +ADJUST copy_id +SECTION QUESTION +tc.test. IN URI +ENTRY_END + +RANGE_END + + +; +cd +rd +STEP 10 QUERY +ENTRY_BEGIN +REPLY RD CD NOERROR +SECTION QUESTION +cd.test. IN TXT +ENTRY_END + +STEP 11 CHECK_ANSWER +ENTRY_BEGIN +MATCH all +REPLY QR RD RA CD NOERROR +SECTION QUESTION +cd.test. IN TXT +SECTION ANSWER +cd.test. IN TXT "CD is set" +ENTRY_END + +; +cd +cached +rd +STEP 12 QUERY +ENTRY_BEGIN +REPLY RD CD NOERROR +SECTION QUESTION +cd.test. IN TXT +ENTRY_END + +STEP 13 CHECK_ANSWER +ENTRY_BEGIN +MATCH all +REPLY QR RD RA CD NOERROR +SECTION QUESTION +cd.test. IN TXT +SECTION ANSWER +cd.test. IN TXT "CD is set" +ENTRY_END + +; +nodata +rd +STEP 20 QUERY +ENTRY_BEGIN +REPLY RD NOERROR +SECTION QUESTION +nodata.test. IN TXT +SECTION ADDITIONAL +ENTRY_END + +STEP 21 CHECK_ANSWER +ENTRY_BEGIN +REPLY QR RD RA NOERROR +MATCH all +SECTION QUESTION +nodata.test. IN TXT +ENTRY_END + +; +nxdomain +rd +STEP 30 QUERY +ENTRY_BEGIN +REPLY RD NOERROR +SECTION QUESTION +nxdomain.test. IN TXT +SECTION ADDITIONAL +ENTRY_END + +STEP 31 CHECK_ANSWER +ENTRY_BEGIN +REPLY QR RD RA NXDOMAIN +MATCH all +SECTION QUESTION +nxdomain.test. IN TXT +ENTRY_END + +; +servfail +do +rd +STEP 40 QUERY +ENTRY_BEGIN +REPLY RD DO NOERROR +SECTION QUESTION +bogus.test. IN TXT +SECTION ADDITIONAL +ENTRY_END + +STEP 41 CHECK_ANSWER +ENTRY_BEGIN +REPLY QR RD RA DO SERVFAIL +MATCH all +SECTION QUESTION +bogus.test. IN TXT +ENTRY_END + +; no rd +STEP 50 QUERY +ENTRY_BEGIN +REPLY NOERROR +SECTION QUESTION +bogus.test. IN TXT +SECTION ADDITIONAL +ENTRY_END + +STEP 51 CHECK_ANSWER +ENTRY_BEGIN +REPLY QR RA SERVFAIL +MATCH all +SECTION QUESTION +bogus.test. IN TXT +ENTRY_END + + + + +STEP 100 QUERY +ENTRY_BEGIN +REPLY RD NOERROR +SECTION QUESTION +stats.test. IN TXT +SECTION ADDITIONAL +ENTRY_END + +STEP 101 CHECK_ANSWER +ENTRY_BEGIN +REPLY NOERROR +MATCH opcode question additional rcode answer +; AD must not be set in the answer +SECTION QUESTION +stats.test. IN TXT +SECTION ANSWER +stats.test. IN TXT "Ok, trigger query was not intercepted!" +ENTRY_END + + +SCENARIO_END |