diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 15:26:00 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 15:26:00 +0000 |
commit | 830407e88f9d40d954356c3754f2647f91d5c06a (patch) | |
tree | d6a0ece6feea91f3c656166dbaa884ef8a29740e /daemon/bindings | |
parent | Initial commit. (diff) | |
download | knot-resolver-upstream/5.6.0.tar.xz knot-resolver-upstream/5.6.0.zip |
Adding upstream version 5.6.0.upstream/5.6.0upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'daemon/bindings')
-rw-r--r-- | daemon/bindings/api.h | 12 | ||||
-rw-r--r-- | daemon/bindings/cache.c | 382 | ||||
-rw-r--r-- | daemon/bindings/cache.rst | 338 | ||||
-rw-r--r-- | daemon/bindings/event.c | 209 | ||||
-rw-r--r-- | daemon/bindings/event.rst | 139 | ||||
-rw-r--r-- | daemon/bindings/impl.c | 95 | ||||
-rw-r--r-- | daemon/bindings/impl.h | 90 | ||||
-rw-r--r-- | daemon/bindings/modules.c | 77 | ||||
-rw-r--r-- | daemon/bindings/modules.rst | 43 | ||||
-rw-r--r-- | daemon/bindings/net.c | 1260 | ||||
-rw-r--r-- | daemon/bindings/net_client.rst | 34 | ||||
-rw-r--r-- | daemon/bindings/net_dns_tweaks.rst | 35 | ||||
-rw-r--r-- | daemon/bindings/net_server.rst | 225 | ||||
-rw-r--r-- | daemon/bindings/net_tlssrv.rst | 188 | ||||
-rw-r--r-- | daemon/bindings/net_xdpsrv.rst | 140 | ||||
-rw-r--r-- | daemon/bindings/worker.c | 81 | ||||
-rw-r--r-- | daemon/bindings/worker.rst | 35 |
17 files changed, 3383 insertions, 0 deletions
diff --git a/daemon/bindings/api.h b/daemon/bindings/api.h new file mode 100644 index 0000000..2b43385 --- /dev/null +++ b/daemon/bindings/api.h @@ -0,0 +1,12 @@ +/* Copyright (C) CZ.NIC, z.s.p.o. <knot-resolver@labs.nic.cz> + * SPDX-License-Identifier: GPL-3.0-or-later + */ + +#pragma once + +#include <lua.h> + +/** Make all the bindings accessible from the lua state, + * .i.e. define those lua tables. */ +void kr_bindings_register(lua_State *L); + diff --git a/daemon/bindings/cache.c b/daemon/bindings/cache.c new file mode 100644 index 0000000..d42ff62 --- /dev/null +++ b/daemon/bindings/cache.c @@ -0,0 +1,382 @@ +/* Copyright (C) CZ.NIC, z.s.p.o. <knot-resolver@labs.nic.cz> + * SPDX-License-Identifier: GPL-3.0-or-later + */ + +#include "daemon/bindings/impl.h" + +/** @internal return cache, or throw lua error if not open */ +static struct kr_cache * cache_assert_open(lua_State *L) +{ + struct kr_cache *cache = &the_worker->engine->resolver.cache; + if (kr_fails_assert(cache) || !kr_cache_is_open(cache)) + lua_error_p(L, "no cache is open yet, use cache.open() or cache.size, etc."); + return cache; +} + +/** Return available cached backends. */ +static int cache_backends(lua_State *L) +{ + struct engine *engine = the_worker->engine; + + lua_newtable(L); + for (unsigned i = 0; i < engine->backends.len; ++i) { + const struct kr_cdb_api *api = engine->backends.at[i]; + lua_pushboolean(L, api == engine->resolver.cache.api); + lua_setfield(L, -2, api->name); + } + return 1; +} + +/** Return number of cached records. */ +static int cache_count(lua_State *L) +{ + struct kr_cache *cache = cache_assert_open(L); + + int count = cache->api->count(cache->db, &cache->stats); + if (count >= 0) { + /* First key is a version counter, omit it if nonempty. */ + lua_pushinteger(L, count ? count - 1 : 0); + return 1; + } + return 0; +} + +/** Return time of last checkpoint, or re-set it if passed `true`. */ +static int cache_checkpoint(lua_State *L) +{ + struct kr_cache *cache = cache_assert_open(L); + + if (lua_gettop(L) == 0) { /* Return the current value. */ + lua_newtable(L); + lua_pushnumber(L, cache->checkpoint_monotime); + lua_setfield(L, -2, "monotime"); + lua_newtable(L); + lua_pushnumber(L, cache->checkpoint_walltime.tv_sec); + lua_setfield(L, -2, "sec"); + lua_pushnumber(L, cache->checkpoint_walltime.tv_usec); + lua_setfield(L, -2, "usec"); + lua_setfield(L, -2, "walltime"); + return 1; + } + + if (lua_gettop(L) != 1 || !lua_isboolean(L, 1) || !lua_toboolean(L, 1)) + lua_error_p(L, "cache.checkpoint() takes no parameters or a true value"); + + kr_cache_make_checkpoint(cache); + return 1; +} + +/** Return cache statistics. */ +static int cache_stats(lua_State *L) +{ + struct kr_cache *cache = cache_assert_open(L); + lua_newtable(L); +#define add_stat(name) \ + lua_pushinteger(L, (cache->stats.name)); \ + lua_setfield(L, -2, #name) + add_stat(open); + add_stat(close); + add_stat(count); + cache->stats.count_entries = cache->api->count(cache->db, &cache->stats); + add_stat(count_entries); + add_stat(clear); + add_stat(commit); + add_stat(read); + add_stat(read_miss); + add_stat(write); + add_stat(remove); + add_stat(remove_miss); + add_stat(match); + add_stat(match_miss); + add_stat(read_leq); + add_stat(read_leq_miss); + /* usage_percent statistics special case - double */ + cache->stats.usage_percent = cache->api->usage_percent(cache->db); + lua_pushnumber(L, cache->stats.usage_percent); + lua_setfield(L, -2, "usage_percent"); +#undef add_stat + + return 1; +} + +static const struct kr_cdb_api *cache_select(struct engine *engine, const char **conf) +{ + /* Return default backend */ + if (*conf == NULL || !strstr(*conf, "://")) { + return engine->backends.at[0]; + } + + /* Find storage backend from config prefix */ + for (unsigned i = 0; i < engine->backends.len; ++i) { + const struct kr_cdb_api *api = engine->backends.at[i]; + if (strncmp(*conf, api->name, strlen(api->name)) == 0) { + *conf += strlen(api->name) + strlen("://"); + return api; + } + } + + return NULL; +} + +static int cache_max_ttl(lua_State *L) +{ + struct kr_cache *cache = cache_assert_open(L); + + int n = lua_gettop(L); + if (n > 0) { + if (!lua_isnumber(L, 1) || n > 1) + lua_error_p(L, "expected 'max_ttl(number ttl)'"); + uint32_t min = cache->ttl_min; + int64_t ttl = lua_tointeger(L, 1); + if (ttl < 1 || ttl < min || ttl > TTL_MAX_MAX) { + lua_error_p(L, + "max_ttl must be larger than minimum TTL, and in range <1, " + STR(TTL_MAX_MAX) ">'"); + } + cache->ttl_max = ttl; + } + lua_pushinteger(L, cache->ttl_max); + return 1; +} + + +static int cache_min_ttl(lua_State *L) +{ + struct kr_cache *cache = cache_assert_open(L); + + int n = lua_gettop(L); + if (n > 0) { + if (!lua_isnumber(L, 1)) + lua_error_p(L, "expected 'min_ttl(number ttl)'"); + uint32_t max = cache->ttl_max; + int64_t ttl = lua_tointeger(L, 1); + if (ttl < 0 || ttl > max || ttl > TTL_MAX_MAX) { + lua_error_p(L, + "min_ttl must be smaller than maximum TTL, and in range <0, " + STR(TTL_MAX_MAX) ">'"); + } + cache->ttl_min = ttl; + } + lua_pushinteger(L, cache->ttl_min); + return 1; +} + +/** Open cache */ +static int cache_open(lua_State *L) +{ + /* Check parameters */ + int n = lua_gettop(L); + if (n < 1 || !lua_isnumber(L, 1)) + lua_error_p(L, "expected 'open(number max_size, string config = \"\")'"); + + /* Select cache storage backend */ + struct engine *engine = the_worker->engine; + + lua_Integer csize_lua = lua_tointeger(L, 1); + if (!(csize_lua >= 8192 && csize_lua < SIZE_MAX)) { /* min. is basically arbitrary */ + lua_error_p(L, "invalid cache size specified, it must be in range <8192, " + STR(SIZE_MAX) ">"); + } + size_t cache_size = csize_lua; + + const char *conf = n > 1 ? lua_tostring(L, 2) : NULL; + const char *uri = conf; + const struct kr_cdb_api *api = cache_select(engine, &conf); + if (!api) + lua_error_p(L, "unsupported cache backend"); + + /* Close if already open */ + kr_cache_close(&engine->resolver.cache); + + /* Reopen cache */ + struct kr_cdb_opts opts = { + (conf && strlen(conf)) ? conf : ".", + cache_size + }; + int ret = kr_cache_open(&engine->resolver.cache, api, &opts, engine->pool); + if (ret != 0) { + char cwd[PATH_MAX]; + get_workdir(cwd, sizeof(cwd)); + return luaL_error(L, "can't open cache path '%s'; working directory '%s'; %s", + opts.path, cwd, kr_strerror(ret)); + } + /* Let's check_health() every five seconds to avoid keeping old cache alive + * even in case of not having any work to do. */ + ret = kr_cache_check_health(&engine->resolver.cache, 5000); + if (ret != 0) { + kr_log_error(CACHE, "periodic health check failed (ignored): %s\n", + kr_strerror(ret)); + } + + /* Store current configuration */ + lua_getglobal(L, "cache"); + lua_pushstring(L, "current_size"); + lua_pushnumber(L, cache_size); + lua_rawset(L, -3); + lua_pushstring(L, "current_storage"); + lua_pushstring(L, uri); + lua_rawset(L, -3); + lua_pop(L, 1); + + lua_pushboolean(L, 1); + return 1; +} + +static int cache_close(lua_State *L) +{ + struct kr_cache *cache = &the_worker->engine->resolver.cache; + if (!kr_cache_is_open(cache)) { + return 0; + } + + kr_cache_close(cache); + lua_getglobal(L, "cache"); + lua_pushstring(L, "current_size"); + lua_pushnumber(L, 0); + lua_rawset(L, -3); + lua_pop(L, 1); + lua_pushboolean(L, 1); + return 1; +} + +#if 0 +/** @internal Prefix walk. */ +static int cache_prefixed(struct kr_cache *cache, const char *prefix, bool exact_name, + knot_db_val_t keyval[][2], int maxcount) +{ + /* Convert to domain name */ + uint8_t buf[KNOT_DNAME_MAXLEN]; + if (!knot_dname_from_str(buf, prefix, sizeof(buf))) { + return kr_error(EINVAL); + } + /* Start prefix search */ + return kr_cache_match(cache, buf, exact_name, keyval, maxcount); +} +#endif + +/** Clear everything. */ +static int cache_clear_everything(lua_State *L) +{ + struct kr_cache *cache = cache_assert_open(L); + + /* Clear records and packets. */ + int ret = kr_cache_clear(cache); + lua_error_maybe(L, ret); + + /* Clear reputation tables */ + struct kr_context *ctx = &the_worker->engine->resolver; + lru_reset(ctx->cache_cookie); + lua_pushboolean(L, true); + return 1; +} + +#if 0 +/** @internal Dump cache key into table on Lua stack. */ +static void cache_dump(lua_State *L, knot_db_val_t keyval[]) +{ + knot_dname_t dname[KNOT_DNAME_MAXLEN]; + char name[KNOT_DNAME_TXT_MAXLEN]; + uint16_t type; + + int ret = kr_unpack_cache_key(keyval[0], dname, &type); + if (ret < 0) { + return; + } + + ret = !knot_dname_to_str(name, dname, sizeof(name)); + if (kr_fails_assert(!ret)) return; + + /* If name typemap doesn't exist yet, create it */ + lua_getfield(L, -1, name); + if (lua_isnil(L, -1)) { + lua_pop(L, 1); + lua_newtable(L); + } + /* Append to typemap */ + char type_buf[KR_RRTYPE_STR_MAXLEN] = { '\0' }; + knot_rrtype_to_string(type, type_buf, sizeof(type_buf)); + lua_pushboolean(L, true); + lua_setfield(L, -2, type_buf); + /* Set name typemap */ + lua_setfield(L, -2, name); +} + +/** Query cached records. TODO: fix caveats in ./README.rst documentation? */ +static int cache_get(lua_State *L) +{ + //struct kr_cache *cache = cache_assert_open(L); // to be fixed soon + + /* Check parameters */ + int n = lua_gettop(L); + if (n < 1 || !lua_isstring(L, 1)) + lua_error_p(L, "expected 'cache.get(string key)'"); + + /* Retrieve set of keys */ + const char *prefix = lua_tostring(L, 1); + knot_db_val_t keyval[100][2]; + int ret = cache_prefixed(cache, prefix, false/*FIXME*/, keyval, 100); + lua_error_maybe(L, ret); + /* Format output */ + lua_newtable(L); + for (int i = 0; i < ret; ++i) { + cache_dump(L, keyval[i]); + } + return 1; +} +#endif +static int cache_get(lua_State *L) +{ + lua_error_maybe(L, ENOSYS); + return kr_error(ENOSYS); /* doesn't happen */ +} + +/** Set time interval for cleaning rtt cache. + * Servers with score >= KR_NS_TIMEOUT will be cleaned after + * this interval ended up, so that they will be able to participate + * in NS elections again. */ +static int cache_ns_tout(lua_State *L) +{ + struct kr_context *ctx = &the_worker->engine->resolver; + + /* Check parameters */ + int n = lua_gettop(L); + if (n < 1) { + lua_pushinteger(L, ctx->cache_rtt_tout_retry_interval); + return 1; + } + + if (!lua_isnumber(L, 1)) + lua_error_p(L, "expected 'cache.ns_tout(interval in ms)'"); + + lua_Integer interval_lua = lua_tointeger(L, 1); + if (!(interval_lua > 0 && interval_lua < UINT_MAX)) { + lua_error_p(L, "invalid interval specified, it must be in range > 0, < " + STR(UINT_MAX)); + } + + ctx->cache_rtt_tout_retry_interval = interval_lua; + lua_pushinteger(L, ctx->cache_rtt_tout_retry_interval); + return 1; +} + +int kr_bindings_cache(lua_State *L) +{ + static const luaL_Reg lib[] = { + { "backends", cache_backends }, + { "count", cache_count }, + { "stats", cache_stats }, + { "checkpoint", cache_checkpoint }, + { "open", cache_open }, + { "close", cache_close }, + { "clear_everything", cache_clear_everything }, + { "get", cache_get }, + { "max_ttl", cache_max_ttl }, + { "min_ttl", cache_min_ttl }, + { "ns_tout", cache_ns_tout }, + { NULL, NULL } + }; + + luaL_register(L, "cache", lib); + return 1; +} + diff --git a/daemon/bindings/cache.rst b/daemon/bindings/cache.rst new file mode 100644 index 0000000..36114d2 --- /dev/null +++ b/daemon/bindings/cache.rst @@ -0,0 +1,338 @@ +.. SPDX-License-Identifier: GPL-3.0-or-later + +Cache +===== + +Cache in Knot Resolver is stored on disk and also shared between +:ref:`systemd-multiple-instances` so resolver doesn't lose the cached data on +restart or crash. + +To improve performance even further the resolver implements so-called aggressive caching +for DNSSEC-validated data (:rfc:`8198`), which improves performance and also protects +against some types of Random Subdomain Attacks. + + +.. _`cache_sizing`: + +Sizing +------ + +For personal and small office use-cases cache size around 100 MB is more than enough. + +For large deployments we recommend to run Knot Resolver on a dedicated machine, +and to allocate 90% of machine's free memory for resolver's cache. + +.. note:: Choosing a cache size that can fit into RAM is important even if the + cache is stored on disk (default). Otherwise, the extra I/O caused by disk + access for missing pages can cause performance issues. + +For example, imagine you have a machine with 16 GB of memory. +After machine restart you use command ``free -m`` to determine +amount of free memory (without swap): + +.. code-block:: bash + + $ free -m + total used free + Mem: 15907 979 14928 + +Now you can configure cache size to be 90% of the free memory 14 928 MB, i.e. 13 453 MB: + +.. code-block:: lua + + -- 90 % of free memory after machine restart + cache.size = 13453 * MB + +It is also possible to set the cache size based on the file system size. This is useful +if you use a dedicated partition for cache (e.g. non-persistent tmpfs). It is recommended +to leave some free space for special files, such as locks.: + +.. code-block:: lua + + cache.size = cache.fssize() - 10*MB + +.. note:: The :ref:`garbage-collector` can be used to periodically trim the + cache. It is enabled and configured by default when running kresd with + systemd integration. + +.. _`cache_persistence`: + +Persistence +----------- +.. tip:: Using tmpfs for cache improves performance and reduces disk I/O. + +By default the cache is saved on a persistent storage device +so the content of the cache is persisted during system reboot. +This usually leads to smaller latency after restart etc., +however in certain situations a non-persistent cache storage might be preferred, e.g.: + + - Resolver handles high volume of queries and I/O performance to disk is too low. + - Threat model includes attacker getting access to disk content in power-off state. + - Disk has limited number of writes (e.g. flash memory in routers). + +If non-persistent cache is desired configure cache directory to be on +tmpfs_ filesystem, a temporary in-memory file storage. +The cache content will be saved in memory, and thus have faster access +and will be lost on power-off or reboot. + + +.. note:: In most of the Unix-like systems ``/tmp`` and ``/var/run`` are + commonly mounted as tmpfs. While it is technically possible to move the + cache to an existing tmpfs filesystem, it is *not recommended*, since the + path to cache is configured in multiple places. + +Mounting the cache directory as tmpfs_ is the recommended approach. Make sure +to use appropriate ``size=`` option and don't forget to adjust the size in the +config file as well. + +.. code-block:: none + + # /etc/fstab + tmpfs /var/cache/knot-resolver tmpfs rw,size=2G,uid=knot-resolver,gid=knot-resolver,nosuid,nodev,noexec,mode=0700 0 0 + +.. code-block:: lua + + -- /etc/knot-resolver/kresd.conf + cache.size = cache.fssize() - 10*MB + +.. _tmpfs: https://en.wikipedia.org/wiki/Tmpfs + +Configuration reference +----------------------- + +.. function:: cache.open(max_size[, config_uri]) + + :param number max_size: Maximum cache size in bytes. + :return: ``true`` if cache was opened + + Open cache with a size limit. The cache will be reopened if already open. + Note that the max_size cannot be lowered, only increased due to how cache is implemented. + + .. tip:: Use ``kB, MB, GB`` constants as a multiplier, e.g. ``100*MB``. + + The URI ``lmdb://path`` allows you to change the cache directory. + + Example: + + .. code-block:: lua + + cache.open(100 * MB, 'lmdb:///var/cache/knot-resolver') + +.. envvar:: cache.size + + Set the cache maximum size in bytes. Note that this is only a hint to the backend, + which may or may not respect it. See :func:`cache.open()`. + + .. code-block:: lua + + cache.size = 100 * MB -- equivalent to `cache.open(100 * MB)` + +.. envvar:: cache.current_size + + Get the maximum size in bytes. + + .. code-block:: lua + + print(cache.current_size) + +.. envvar:: cache.storage + + Set the cache storage backend configuration, see :func:`cache.backends()` for + more information. If the new storage configuration is invalid, it is not set. + + .. code-block:: lua + + cache.storage = 'lmdb://.' + +.. envvar:: cache.current_storage + + Get the storage backend configuration. + + .. code-block:: lua + + print(cache.current_storage) + +.. function:: cache.backends() + + :return: map of backends + + .. note:: For now there is only one backend implementation, even though the APIs are ready for different (synchronous) backends. + + The cache supports runtime-changeable backends, using the optional :rfc:`3986` URI, where the scheme + represents backend protocol and the rest of the URI backend-specific configuration. By default, it + is a ``lmdb`` backend in working directory, i.e. ``lmdb://``. + + Example output: + + .. code-block:: lua + + [lmdb://] => true + +.. function:: cache.count() + + :return: Number of entries in the cache. Meaning of the number is an implementation detail and is subject of change. + +.. function:: cache.close() + + :return: ``true`` if cache was closed + + Close the cache. + + .. note:: This may or may not clear the cache, depending on the cache backend. + +.. function:: cache.fssize() + + :return: Partition size of cache storage. + +.. function:: cache.stats() + + Return table with low-level statistics for internal cache operation and storage. + This counts each access to cache and does not directly map to individual + DNS queries or resource records. + For query-level statistics see :ref:`stats module <mod-stats>`. + + Example: + + .. code-block:: lua + + > cache.stats() + [clear] => 0 + [close] => 0 + [commit] => 117 + [count] => 2 + [count_entries] => 6187 + [match] => 21 + [match_miss] => 2 + [open] => 0 + [read] => 4313 + [read_leq] => 9 + [read_leq_miss] => 4 + [read_miss] => 1143 + [remove] => 17 + [remove_miss] => 0 + [usage_percent] => 15.625 + [write] => 189 + + + Cache operation `read_leq` (*read less or equal*, i.e. range search) was requested 9 times, + and 4 out of 9 operations were finished with *cache miss*. + Cache contains 6187 internal entries which occupy 15.625 % cache size. + + +.. function:: cache.max_ttl([ttl]) + + :param number ttl: maximum TTL in seconds (default: 1 day) + + .. KR_CACHE_DEFAULT_TTL_MAX ^^ + + :return: current maximum TTL + + Get or set upper TTL bound applied to all received records. + + .. note:: The `ttl` value must be in range `(min_ttl, 2147483647)`. + + .. code-block:: lua + + -- Get maximum TTL + cache.max_ttl() + 518400 + -- Set maximum TTL + cache.max_ttl(172800) + 172800 + +.. function:: cache.min_ttl([ttl]) + + :param number ttl: minimum TTL in seconds (default: 5 seconds) + + .. KR_CACHE_DEFAULT_TTL_MIN ^^ + + :return: current minimum TTL + + Get or set lower TTL bound applied to all received records. + Forcing TTL higher than specified violates DNS standards, so use higher values with care. + TTL still won't be extended beyond expiration of the corresponding DNSSEC signature. + + .. note:: The `ttl` value must be in range `<0, max_ttl)`. + + .. code-block:: lua + + -- Get minimum TTL + cache.min_ttl() + 0 + -- Set minimum TTL + cache.min_ttl(5) + 5 + +.. function:: cache.ns_tout([timeout]) + + :param number timeout: NS retry interval in milliseconds (default: :c:macro:`KR_NS_TIMEOUT_RETRY_INTERVAL`) + :return: current timeout + + Get or set time interval for which a nameserver address will be ignored after determining that it doesn't return (useful) answers. + The intention is to avoid waiting if there's little hope; instead, kresd can immediately SERVFAIL or immediately use stale records (with :ref:`serve_stale <mod-serve_stale>` module). + + .. warning:: This settings applies only to the current kresd process. + +.. function:: cache.get([domain]) + + This function is not implemented at this moment. + We plan to re-introduce it soon, probably with a slightly different API. + +.. function:: cache.clear([name], [exact_name], [rr_type], [chunk_size], [callback], [prev_state]) + + Purge cache records matching specified criteria. There are two specifics: + + * To reliably remove **negative** cache entries you need to clear subtree with the whole zone. E.g. to clear negative cache entries for (formerly non-existing) record `www.example.com. A` you need to flush whole subtree starting at zone apex, e.g. `example.com.` [#]_. + * This operation is asynchronous and might not be yet finished when call to ``cache.clear()`` function returns. Return value indicates if clearing continues asynchronously or not. + + :param string name: subtree to purge; if the name isn't provided, whole cache is purged + (and any other parameters are disregarded). + :param bool exact_name: if set to ``true``, only records with *the same* name are removed; + default: false. + :param kres.type rr_type: you may additionally specify the type to remove, + but that is only supported with ``exact_name == true``; default: nil. + :param integer chunk_size: the number of records to remove in one round; default: 100. + The purpose is not to block the resolver for long. + The default ``callback`` repeats the command after one millisecond + until all matching data are cleared. + :param function callback: a custom code to handle result of the underlying C call. + Its parameters are copies of those passed to `cache.clear()` with one additional + parameter ``rettable`` containing table with return value from current call. + ``count`` field contains a return code from :func:`kr_cache_remove_subtree()`. + :param table prev_state: return value from previous run (can be used by callback) + + :rtype: table + :return: ``count`` key is always present. Other keys are optional and their presence indicate special conditions. + + * **count** *(integer)* - number of items removed from cache by this call (can be 0 if no entry matched criteria) + * **not_apex** - cleared subtree is not cached as zone apex; proofs of non-existence were probably not removed + * **subtree** *(string)* - hint where zone apex lies (this is estimation from cache content and might not be accurate) + * **chunk_limit** - more than ``chunk_size`` items needs to be cleared, clearing will continue asynchronously + + + Examples: + + .. code-block:: lua + + -- Clear whole cache + > cache.clear() + [count] => 76 + + -- Clear records at and below 'com.' + > cache.clear('com.') + [chunk_limit] => chunk size limit reached; the default callback will continue asynchronously + [not_apex] => to clear proofs of non-existence call cache.clear('com.') + [count] => 100 + [round] => 1 + [subtree] => com. + > worker.sleep(0.1) + [cache] asynchronous cache.clear('com', false) finished + + -- Clear only 'www.example.com.' + > cache.clear('www.example.com.', true) + [round] => 1 + [count] => 1 + [not_apex] => to clear proofs of non-existence call cache.clear('example.com.') + [subtree] => example.com. + +.. [#] This is a consequence of DNSSEC negative cache which relies on proofs of non-existence on various owner nodes. It is impossible to efficiently flush part of DNS zones signed with NSEC3. diff --git a/daemon/bindings/event.c b/daemon/bindings/event.c new file mode 100644 index 0000000..4cefa13 --- /dev/null +++ b/daemon/bindings/event.c @@ -0,0 +1,209 @@ +/* Copyright (C) CZ.NIC, z.s.p.o. <knot-resolver@labs.nic.cz> + * SPDX-License-Identifier: GPL-3.0-or-later + */ + +#include "daemon/bindings/impl.h" + +#include <unistd.h> +#include <uv.h> + +static void event_free(uv_timer_t *timer) +{ + lua_State *L = the_worker->engine->L; + int ref = (intptr_t) timer->data; + luaL_unref(L, LUA_REGISTRYINDEX, ref); + free(timer); +} + +static void event_callback(uv_timer_t *timer) +{ + lua_State *L = the_worker->engine->L; + + /* Retrieve callback and execute */ + lua_rawgeti(L, LUA_REGISTRYINDEX, (intptr_t) timer->data); + lua_rawgeti(L, -1, 1); + lua_pushinteger(L, (intptr_t) timer->data); + int ret = execute_callback(L, 1); + /* Free callback if not recurrent or an error */ + if (ret != 0 || (uv_timer_get_repeat(timer) == 0 && uv_is_active((uv_handle_t *)timer) == 0)) { + if (!uv_is_closing((uv_handle_t *)timer)) { + uv_close((uv_handle_t *)timer, (uv_close_cb) event_free); + } + } +} + +static void event_fdcallback(uv_poll_t* handle, int status, int events) +{ + lua_State *L = the_worker->engine->L; + + /* Retrieve callback and execute */ + lua_rawgeti(L, LUA_REGISTRYINDEX, (intptr_t) handle->data); + lua_rawgeti(L, -1, 1); + lua_pushinteger(L, (intptr_t) handle->data); + lua_pushinteger(L, status); + lua_pushinteger(L, events); + int ret = execute_callback(L, 3); + /* Free callback if not recurrent or an error */ + if (ret != 0) { + if (!uv_is_closing((uv_handle_t *)handle)) { + uv_close((uv_handle_t *)handle, (uv_close_cb) event_free); + } + } +} + +static int event_sched(lua_State *L, unsigned timeout, unsigned repeat) +{ + uv_timer_t *timer = malloc(sizeof(*timer)); + if (!timer) + lua_error_p(L, "out of memory"); + + /* Start timer with the reference */ + uv_loop_t *loop = uv_default_loop(); + uv_timer_init(loop, timer); + int ret = uv_timer_start(timer, event_callback, timeout, repeat); + if (ret != 0) { + free(timer); + lua_error_p(L, "couldn't start the event"); + } + + /* Save callback and timer in registry */ + lua_newtable(L); + lua_pushvalue(L, 2); + lua_rawseti(L, -2, 1); + lua_pushpointer(L, timer); + lua_rawseti(L, -2, 2); + int ref = luaL_ref(L, LUA_REGISTRYINDEX); + + /* Save reference to the timer */ + timer->data = (void *) (intptr_t)ref; + lua_pushinteger(L, ref); + return 1; +} + +static int event_after(lua_State *L) +{ + /* Check parameters */ + int n = lua_gettop(L); + if (n < 2 || !lua_isnumber(L, 1) || !lua_isfunction(L, 2)) + lua_error_p(L, "expected 'after(number timeout, function)'"); + + return event_sched(L, lua_tointeger(L, 1), 0); +} + +static int event_recurrent(lua_State *L) +{ + /* Check parameters */ + int n = lua_gettop(L); + if (n < 2 || !lua_isnumber(L, 1) || lua_tointeger(L, 1) == 0 + || !lua_isfunction(L, 2)) + lua_error_p(L, "expected 'recurrent(number interval, function)'"); + + return event_sched(L, 0, lua_tointeger(L, 1)); +} + +static int event_cancel(lua_State *L) +{ + int n = lua_gettop(L); + if (n < 1 || !lua_isnumber(L, 1)) + lua_error_p(L, "expected 'cancel(number event)'"); + + /* Fetch event if it exists */ + lua_rawgeti(L, LUA_REGISTRYINDEX, lua_tointeger(L, 1)); + bool ok = lua_istable(L, -1); + + /* Close the timer */ + uv_handle_t **timer_pp = NULL; + if (ok) { + lua_rawgeti(L, -1, 2); + timer_pp = lua_touserdata(L, -1); + ok = timer_pp && *timer_pp; + /* That have been sufficient safety checks, hopefully. */ + } + if (ok && !uv_is_closing(*timer_pp)) { + uv_close(*timer_pp, (uv_close_cb)event_free); + } + lua_pushboolean(L, ok); + return 1; +} + +static int event_reschedule(lua_State *L) +{ + int n = lua_gettop(L); + if (n < 2 || !lua_isnumber(L, 1) || !lua_isnumber(L, 2)) + lua_error_p(L, "expected 'reschedule(number event, number timeout)'"); + + /* Fetch event if it exists */ + lua_rawgeti(L, LUA_REGISTRYINDEX, lua_tointeger(L, 1)); + bool ok = lua_istable(L, -1); + + /* Reschedule the timer */ + uv_handle_t **timer_pp = NULL; + if (ok) { + lua_rawgeti(L, -1, 2); + timer_pp = lua_touserdata(L, -1); + ok = timer_pp && *timer_pp; + /* That have been sufficient safety checks, hopefully. */ + } + if (ok && !uv_is_closing(*timer_pp)) { + int ret = uv_timer_start((uv_timer_t *)*timer_pp, + event_callback, lua_tointeger(L, 2), 0); + if (ret != 0) { + uv_close(*timer_pp, (uv_close_cb)event_free); + ok = false; + } + } + lua_pushboolean(L, ok); + return 1; +} + +static int event_fdwatch(lua_State *L) +{ + /* Check parameters */ + int n = lua_gettop(L); + if (n < 2 || !lua_isnumber(L, 1) || !lua_isfunction(L, 2)) + lua_error_p(L, "expected 'socket(number fd, function)'"); + + uv_poll_t *handle = malloc(sizeof(*handle)); + if (!handle) + lua_error_p(L, "out of memory"); + + /* Start timer with the reference */ + int sock = lua_tointeger(L, 1); + uv_loop_t *loop = uv_default_loop(); + int ret = uv_poll_init(loop, handle, sock); + if (ret == 0) + ret = uv_poll_start(handle, UV_READABLE, event_fdcallback); + if (ret != 0) { + free(handle); + lua_error_p(L, "couldn't start event poller"); + } + + /* Save callback and timer in registry */ + lua_newtable(L); + lua_pushvalue(L, 2); + lua_rawseti(L, -2, 1); + lua_pushpointer(L, handle); + lua_rawseti(L, -2, 2); + int ref = luaL_ref(L, LUA_REGISTRYINDEX); + + /* Save reference to the timer */ + handle->data = (void *) (intptr_t)ref; + lua_pushinteger(L, ref); + return 1; +} + +int kr_bindings_event(lua_State *L) +{ + static const luaL_Reg lib[] = { + { "after", event_after }, + { "recurrent", event_recurrent }, + { "cancel", event_cancel }, + { "socket", event_fdwatch }, + { "reschedule", event_reschedule }, + { NULL, NULL } + }; + + luaL_register(L, "event", lib); + return 1; +} + diff --git a/daemon/bindings/event.rst b/daemon/bindings/event.rst new file mode 100644 index 0000000..a96f299 --- /dev/null +++ b/daemon/bindings/event.rst @@ -0,0 +1,139 @@ +.. SPDX-License-Identifier: GPL-3.0-or-later + +Timers and events reference +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The timer represents exactly the thing described in the examples - it allows you to execute closures_ +after specified time, or event recurrent events. Time is always described in milliseconds, +but there are convenient variables that you can use - ``sec, minute, hour``. +For example, ``5 * hour`` represents five hours, or 5*60*60*100 milliseconds. + +.. function:: event.after(time, function) + + :return: event id + + Execute function after the specified time has passed. + The first parameter of the callback is the event itself. + + Example: + + .. code-block:: lua + + event.after(1 * minute, function() print('Hi!') end) + +.. function:: event.recurrent(interval, function) + + :return: event id + + Execute function immediately and then periodically after each ``interval``. + + Example: + + .. code-block:: lua + + msg_count = 0 + event.recurrent(5 * sec, function(e) + msg_count = msg_count + 1 + print('Hi #'..msg_count) + end) + +.. function:: event.reschedule(event_id, timeout) + + Reschedule a running event, it has no effect on canceled events. + New events may reuse the event_id, so the behaviour is undefined if the function + is called after another event is started. + + Example: + + .. code-block:: lua + + local interval = 1 * minute + event.after(1 * minute, function (ev) + print('Good morning!') + -- Halve the interval for each iteration + interval = interval / 2 + event.reschedule(ev, interval) + end) + +.. function:: event.cancel(event_id) + + Cancel running event, it has no effect on already canceled events. + New events may reuse the event_id, so the behaviour is undefined if the function + is called after another event is started. + + Example: + + .. code-block:: lua + + e = event.after(1 * minute, function() print('Hi!') end) + event.cancel(e) + +Watch for file descriptor activity. This allows embedding other event loops or simply +firing events when a pipe endpoint becomes active. In another words, asynchronous +notifications for daemon. + +.. function:: event.socket(fd, cb) + + :param number fd: file descriptor to watch + :param cb: closure or callback to execute when fd becomes active + :return: event id + + Execute function when there is activity on the file descriptor and calls a closure + with event id as the first parameter, status as second and number of events as third. + + Example: + + .. code-block:: lua + + e = event.socket(0, function(e, status, nevents) + print('activity detected') + end) + e.cancel(e) + +Asynchronous function execution +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The `event` package provides a very basic mean for non-blocking execution - it allows running code when activity on a file descriptor is detected, and when a certain amount of time passes. It doesn't however provide an easy to use abstraction for non-blocking I/O. This is instead exposed through the `worker` package (if `cqueues` Lua package is installed in the system). + +.. function:: worker.coroutine(function) + + Start a new coroutine with given function (closure). The function can do I/O or run timers without blocking the main thread. See cqueues_ for documentation of possible operations and synchronization primitives. The main limitation is that you can't wait for a finish of a coroutine from processing layers, because it's not currently possible to suspend and resume execution of processing layers. + + Example: + + .. code-block:: lua + + worker.coroutine(function () + for i = 0, 10 do + print('executing', i) + worker.sleep(1) + end + end) + +.. function:: worker.sleep(seconds) + + Pause execution of current function (asynchronously if running inside a worker coroutine). + +Example: + +.. code-block:: lua + + function async_print(testname, sleep) + log(testname .. ': system time before sleep' .. tostring(os.time()) + worker.sleep(sleep) -- other coroutines continue execution now + log(testname .. ': system time AFTER sleep' .. tostring(os.time()) + end + + worker.coroutine(function() async_print('call #1', 5) end) + worker.coroutine(function() async_print('call #2', 3) end) + +Output from this example demonstrates that both calls to function ``async_print`` were executed asynchronously: + + +.. code-block:: none + + call #2: system time before sleep 1578065073 + call #1: system time before sleep 1578065073 + call #2: system time AFTER sleep 1578065076 + call #1: system time AFTER sleep 1578065078 + diff --git a/daemon/bindings/impl.c b/daemon/bindings/impl.c new file mode 100644 index 0000000..8c48df8 --- /dev/null +++ b/daemon/bindings/impl.c @@ -0,0 +1,95 @@ +/* Copyright (C) CZ.NIC, z.s.p.o. <knot-resolver@labs.nic.cz> + * SPDX-License-Identifier: GPL-3.0-or-later + */ + +#include <dirent.h> +#include <lua.h> +#include <lauxlib.h> +#include <string.h> + + +const char * lua_table_checkindices(lua_State *L, const char *keys[]) +{ + /* Iterate over table at the top of the stack. + * http://www.lua.org/manual/5.1/manual.html#lua_next */ + for (lua_pushnil(L); lua_next(L, -2); lua_pop(L, 1)) { + lua_pop(L, 1); /* we don't need the value */ + /* We need to copy the key, as _tostring() confuses _next(). + * https://www.lua.org/manual/5.1/manual.html#lua_tolstring */ + lua_pushvalue(L, -1); + const char *key = lua_tostring(L, -1); + if (!key) + return "<NON-STRING_INDEX>"; + for (const char **k = keys; ; ++k) { + if (*k == NULL) + return key; + if (strcmp(*k, key) == 0) + break; + } + } + return NULL; +} + +/** Return table listing filenames in a given directory (ls -A). */ +static int kluautil_list_dir(lua_State *L) +{ + lua_newtable(L); // empty table even on errors + + const char *path = lua_tolstring(L, 1, NULL); + if (!path) return 1; + DIR *dir = opendir(path); + if (!dir) return 1; + + struct dirent *entry; + int lua_i = 1; + while ((entry = readdir(dir)) != NULL) { + if (strcmp(entry->d_name, ".") != 0 && strcmp(entry->d_name, "..") != 0) { + lua_pushstring(L, entry->d_name); + lua_rawseti(L, -2, lua_i++); + } + } + + closedir(dir); + return 1; +} + + +/* Each of these just creates the correspondingly named lua table of functions. */ +int kr_bindings_cache (lua_State *L); /* ./cache.c */ +int kr_bindings_event (lua_State *L); /* ./event.c */ +int kr_bindings_modules (lua_State *L); /* ./modules.c */ +int kr_bindings_net (lua_State *L); /* ./net.c */ +int kr_bindings_worker (lua_State *L); /* ./worker.c */ + +void kr_bindings_register(lua_State *L) +{ + kr_bindings_cache(L); + kr_bindings_event(L); + kr_bindings_modules(L); + kr_bindings_net(L); + kr_bindings_worker(L); + + /* Finally some lua utils *written in C*, not really a binding. */ + lua_register(L, "kluautil_list_dir", kluautil_list_dir); +} + +void lua_error_p(lua_State *L, const char *fmt, ...) +{ + /* Add a stack trace and throw the result as a lua error. */ + luaL_traceback(L, L, "error occurred here (config filename:lineno is at the bottom, if config is involved):", 0); + /* Push formatted custom message, prepended with "ERROR: ". */ + lua_pushliteral(L, "\nERROR: "); + { + va_list args; + va_start(args, fmt); + lua_pushvfstring(L, fmt, args); + va_end(args); + } + lua_concat(L, 3); + lua_error(L); + /* TODO: we might construct a little more friendly trace by using luaL_where(). + * In particular, in case the error happens in a function that was called + * directly from a config file (the most common case), there isn't much need + * to format the trace in this heavy way. */ +} + diff --git a/daemon/bindings/impl.h b/daemon/bindings/impl.h new file mode 100644 index 0000000..d522756 --- /dev/null +++ b/daemon/bindings/impl.h @@ -0,0 +1,90 @@ +/* Copyright (C) CZ.NIC, z.s.p.o. <knot-resolver@labs.nic.cz> + * SPDX-License-Identifier: GPL-3.0-or-later + */ + +#pragma once + +#include "daemon/engine.h" +#include "daemon/worker.h" /* the_worker is often useful */ + +#include <lua.h> +#include <lauxlib.h> +/* It may happen that include files are messed up and we're hitting a header + * e.g. from vanilla Lua. Even 5.1 won't work due to missing luaL_traceback() in <lauxlib.h>. */ +#if (LUA_VERSION_NUM) != 501 || !defined(LUA_LJDIR) + #error "Incorrect Lua version in #include <lua.h> - LuaJIT compatible with Lua 5.1 is required" +#endif + + +/** Useful to stringify macros into error strings. */ +#define STR(s) STRINGIFY_TOKEN(s) +#define STRINGIFY_TOKEN(s) #s + + +/** Check lua table at the top of the stack for allowed keys. + * \param keys NULL-terminated array of 0-terminated strings + * \return NULL if passed or the offending string (pushed on top of lua stack) + * \note Future work: if non-NULL is returned, there's extra stuff on the lua stack. + * \note Brute-force complexity: table length * summed length of keys. + */ +const char * lua_table_checkindices(lua_State *L, const char *keys[]); + +/** If the value at the top of the stack isn't a table, make it a single-element list. */ +static inline void lua_listify(lua_State *L) +{ + if (lua_istable(L, -1)) + return; + lua_createtable(L, 1, 0); + lua_insert(L, lua_gettop(L) - 1); /* swap the top two stack elements */ + lua_pushinteger(L, 1); + lua_insert(L, lua_gettop(L) - 1); /* swap the top two stack elements */ + lua_settable(L, -3); +} + + +/** Throw a formatted lua error. + * + * The message will get prefixed by "ERROR: " and supplemented by stack trace. + * \return never! It calls lua_error(). + * + * Example: + ERROR: not a valid pin_sha256: 'a1Z/3ek=', raw length 5 instead of 32 + stack traceback: + [C]: in function 'tls_client' + /PathToPREFIX/lib/kdns_modules/policy.lua:175: in function 'TLS_FORWARD' + /PathToConfig.lua:46: in main chunk + */ +KR_PRINTF(2) KR_NORETURN KR_COLD +void lua_error_p(lua_State *L, const char *fmt, ...); +/** @internal Annotate for static checkers. */ +KR_NORETURN int lua_error(lua_State *L); + +/** Shortcut for common case. */ +static inline void lua_error_maybe(lua_State *L, int err) +{ + if (err) lua_error_p(L, "%s", kr_strerror(err)); +} + +static inline int execute_callback(lua_State *L, int argc) +{ + int ret = engine_pcall(L, argc); + if (ret != 0) { + kr_log_error(SYSTEM, "error: %s\n", lua_tostring(L, -1)); + } + /* Clear the stack, there may be event a/o anything returned */ + lua_settop(L, 0); + return ret; +} + +/** Push a pointer as heavy/full userdata. + * + * It's useful as a replacement of lua_pushlightuserdata(), + * but note that it behaves differently in lua (converts to pointer-to-pointer). + */ +static inline void lua_pushpointer(lua_State *L, void *p) +{ + void **addr = lua_newuserdata(L, sizeof(void *)); + kr_require(addr); + memcpy(addr, &p, sizeof(void *)); +} + diff --git a/daemon/bindings/modules.c b/daemon/bindings/modules.c new file mode 100644 index 0000000..acae270 --- /dev/null +++ b/daemon/bindings/modules.c @@ -0,0 +1,77 @@ +/* Copyright (C) CZ.NIC, z.s.p.o. <knot-resolver@labs.nic.cz> + * SPDX-License-Identifier: GPL-3.0-or-later + */ + +#include "daemon/bindings/impl.h" + + +/** List loaded modules */ +static int mod_list(lua_State *L) +{ + const module_array_t * const modules = &the_worker->engine->modules; + lua_newtable(L); + for (unsigned i = 0; i < modules->len; ++i) { + struct kr_module *module = modules->at[i]; + lua_pushstring(L, module->name); + lua_rawseti(L, -2, i + 1); + } + return 1; +} + +/** Load module. */ +static int mod_load(lua_State *L) +{ + /* Check parameters */ + int n = lua_gettop(L); + if (n != 1 || !lua_isstring(L, 1)) + lua_error_p(L, "expected 'load(string name)'"); + /* Parse precedence declaration */ + char *declaration = strdup(lua_tostring(L, 1)); + if (!declaration) + return kr_error(ENOMEM); + const char *name = strtok(declaration, " "); + const char *precedence = strtok(NULL, " "); + const char *ref = strtok(NULL, " "); + /* Load engine module */ + int ret = engine_register(the_worker->engine, name, precedence, ref); + free(declaration); + if (ret != 0) { + if (ret == kr_error(EIDRM)) { + lua_error_p(L, "referenced module not found"); + } else { + lua_error_maybe(L, ret); + } + } + + lua_pushboolean(L, 1); + return 1; +} + +/** Unload module. */ +static int mod_unload(lua_State *L) +{ + /* Check parameters */ + int n = lua_gettop(L); + if (n != 1 || !lua_isstring(L, 1)) + lua_error_p(L, "expected 'unload(string name)'"); + /* Unload engine module */ + int ret = engine_unregister(the_worker->engine, lua_tostring(L, 1)); + lua_error_maybe(L, ret); + + lua_pushboolean(L, 1); + return 1; +} + +int kr_bindings_modules(lua_State *L) +{ + static const luaL_Reg lib[] = { + { "list", mod_list }, + { "load", mod_load }, + { "unload", mod_unload }, + { NULL, NULL } + }; + + luaL_register(L, "modules", lib); + return 1; +} + diff --git a/daemon/bindings/modules.rst b/daemon/bindings/modules.rst new file mode 100644 index 0000000..09df6ff --- /dev/null +++ b/daemon/bindings/modules.rst @@ -0,0 +1,43 @@ +.. SPDX-License-Identifier: GPL-3.0-or-later + +Modules +======= + +Knot Resolver functionality consists of separate modules, which allow you +to mix-and-match features you need without slowing down operation +by features you do not use. + +This practically means that you need to load module before using features contained in it, for example: + +.. code-block:: lua + + -- load module and make dnstap features available + modules.load('dnstap') + -- configure dnstap features + dnstap.config({ + socket_path = "/tmp/dnstap.sock" + }) + +Obviously ordering matters, so you have to load module first and configure it after it is loaded. + +Here is full reference manual for module configuration: + + +.. function:: modules.list() + + :return: List of loaded modules. + +.. function:: modules.load(name) + + :param string name: Module name, e.g. "hints" + :return: ``true`` if modules was (or already is) loaded, error otherwise. + + Load a module by name. + +.. function:: modules.unload(name) + + :param string name: Module name, e.g. "detect_time_jump" + :return: ``true`` if modules was unloaded, error otherwise. + + Unload a module by name. This is useful for unloading modules loaded by default, mainly for debugging purposes. + diff --git a/daemon/bindings/net.c b/daemon/bindings/net.c new file mode 100644 index 0000000..f1fa6f3 --- /dev/null +++ b/daemon/bindings/net.c @@ -0,0 +1,1260 @@ +/* Copyright (C) CZ.NIC, z.s.p.o. <knot-resolver@labs.nic.cz> + * SPDX-License-Identifier: GPL-3.0-or-later + */ + +#include "daemon/bindings/impl.h" + +#include "contrib/base64.h" +#include "contrib/cleanup.h" +#include "daemon/network.h" +#include "daemon/tls.h" +#include "lib/utils.h" + +#include <stdlib.h> + +#define PROXY_DATA_STRLEN (INET6_ADDRSTRLEN + 1 + 3 + 1) + +/** Table and next index on top of stack -> append entries for given endpoint_array_t. */ +static int net_list_add(const char *b_key, uint32_t key_len, trie_val_t *val, void *ext) +{ + endpoint_array_t *ep_array = *val; + lua_State *L = (lua_State *)ext; + lua_Integer i = lua_tointeger(L, -1); + for (int j = 0; j < ep_array->len; ++j) { + struct endpoint *ep = &ep_array->at[j]; + lua_newtable(L); // connection tuple + + if (ep->flags.kind) { + lua_pushstring(L, ep->flags.kind); + } else if (ep->flags.http && ep->flags.tls) { + lua_pushliteral(L, "doh2"); + } else if (ep->flags.tls) { + lua_pushliteral(L, "tls"); + } else if (ep->flags.xdp) { + lua_pushliteral(L, "xdp"); + } else { + lua_pushliteral(L, "dns"); + } + lua_setfield(L, -2, "kind"); + + lua_newtable(L); // "transport" table + + switch (ep->family) { + case AF_INET: + lua_pushliteral(L, "inet4"); + break; + case AF_INET6: + lua_pushliteral(L, "inet6"); + break; + case AF_XDP: + lua_pushliteral(L, "inet4+inet6"); // both UDP ports at once + break; + case AF_UNIX: + lua_pushliteral(L, "unix"); + break; + default: + kr_assert(false); + lua_pushliteral(L, "invalid"); + } + lua_setfield(L, -2, "family"); + + const char *ip_str_const = network_endpoint_key_str((struct endpoint_key *) b_key); + kr_require(ip_str_const); + auto_free char *ip_str = strdup(ip_str_const); + kr_require(ip_str); + char *hm = strchr(ip_str, '#'); + if (hm) /* Omit port */ + *hm = '\0'; + lua_pushstring(L, ip_str); + + if (ep->family == AF_INET || ep->family == AF_INET6) { + lua_setfield(L, -2, "ip"); + lua_pushboolean(L, ep->flags.freebind); + lua_setfield(L, -2, "freebind"); + } else if (ep->family == AF_UNIX) { + lua_setfield(L, -2, "path"); + } else if (ep->family == AF_XDP) { + lua_setfield(L, -2, "interface"); + lua_pushinteger(L, ep->nic_queue); + lua_setfield(L, -2, "nic_queue"); + } + + if (ep->family != AF_UNIX) { + lua_pushinteger(L, ep->port); + lua_setfield(L, -2, "port"); + } + + if (ep->family == AF_UNIX) { + lua_pushliteral(L, "stream"); + } else if (ep->flags.sock_type == SOCK_STREAM) { + lua_pushliteral(L, "tcp"); + } else if (ep->flags.sock_type == SOCK_DGRAM) { + lua_pushliteral(L, "udp"); + } else { + kr_assert(false); + lua_pushliteral(L, "invalid"); + } + lua_setfield(L, -2, "protocol"); + + lua_setfield(L, -2, "transport"); + + lua_settable(L, -3); + i++; + lua_pushinteger(L, i); + } + return kr_ok(); +} + +/** List active endpoints. */ +static int net_list(lua_State *L) +{ + lua_newtable(L); + lua_pushinteger(L, 1); + trie_apply_with_key(the_worker->engine->net.endpoints, net_list_add, L); + lua_pop(L, 1); + return 1; +} + +/** Listen on an address list represented by the top of lua stack. + * \note flags.kind ownership is not transferred, and flags.sock_type doesn't make sense + * \return success */ +static bool net_listen_addrs(lua_State *L, int port, endpoint_flags_t flags, int16_t nic_queue) +{ + if (kr_fails_assert(flags.xdp || nic_queue == -1)) + return false; + + /* Case: table with 'addr' field; only follow that field directly. */ + lua_getfield(L, -1, "addr"); + if (!lua_isnil(L, -1)) { + lua_replace(L, -2); + } else { + lua_pop(L, 1); + } + + /* Case: string, representing a single address. */ + const char *str = lua_tostring(L, -1); + if (str != NULL) { + struct network *net = &the_worker->engine->net; + const bool is_unix = str[0] == '/'; + int ret = 0; + if (!flags.kind && !flags.tls) { /* normal UDP or XDP */ + flags.sock_type = SOCK_DGRAM; + ret = network_listen(net, str, port, nic_queue, flags); + } + if (!flags.kind && !flags.xdp && ret == 0) { /* common for TCP, DoT and DoH (v2) */ + flags.sock_type = SOCK_STREAM; + ret = network_listen(net, str, port, nic_queue, flags); + } + if (flags.kind) { + flags.kind = strdup(flags.kind); + flags.sock_type = SOCK_STREAM; /* TODO: allow to override this? */ + ret = network_listen(net, str, (is_unix ? 0 : port), nic_queue, flags); + } + if (ret == 0) return true; /* success */ + + if (is_unix) { + kr_log_error(NETWORK, "bind to '%s' (UNIX): %s\n", + str, kr_strerror(ret)); + } else if (flags.xdp) { + const char *err_str = knot_strerror(ret); + if (ret == KNOT_ELIMIT) { + if ((strcmp(str, "::") == 0 || strcmp(str, "0.0.0.0") == 0)) { + err_str = "wildcard addresses not supported with XDP"; + } else { + err_str = "address matched multiple network interfaces"; + } + } else if (ret == kr_error(ENODEV)) { + err_str = "invalid address or interface name"; + } + /* Notable OK strerror: KNOT_EPERM Operation not permitted */ + + if (nic_queue == -1) { + kr_log_error(NETWORK, "failed to initialize XDP for '%s@%d'" + " (nic_queue = <auto>): %s\n", + str, port, err_str); + } else { + kr_log_error(NETWORK, "failed to initialize XDP for '%s@%d'" + " (nic_queue = %d): %s\n", + str, port, nic_queue, err_str); + } + + } else { + const char *stype = flags.sock_type == SOCK_DGRAM ? "UDP" : "TCP"; + kr_log_error(NETWORK, "bind to '%s@%d' (%s): %s\n", + str, port, stype, kr_strerror(ret)); + } + return false; /* failure */ + } + + /* Last case: table where all entries are added recursively. */ + if (!lua_istable(L, -1)) + lua_error_p(L, "bad type for address"); + lua_pushnil(L); + while (lua_next(L, -2)) { + if (!net_listen_addrs(L, port, flags, nic_queue)) + return false; + lua_pop(L, 1); + } + return true; +} + +static bool table_get_flag(lua_State *L, int index, const char *key, bool def) +{ + bool result = def; + lua_getfield(L, index, key); + if (lua_isboolean(L, -1)) { + result = lua_toboolean(L, -1); + } + lua_pop(L, 1); + return result; +} + +/** Listen on endpoint. */ +static int net_listen(lua_State *L) +{ + /* Check parameters */ + int n = lua_gettop(L); + if (n < 1 || n > 3) { + lua_error_p(L, "expected one to three arguments; usage:\n" + "net.listen(addresses, [port = " STR(KR_DNS_PORT) + ", flags = {tls = (port == " STR(KR_DNS_TLS_PORT) ")}])\n"); + } + + int port = KR_DNS_PORT; + if (n > 1) { + if (lua_isnumber(L, 2)) { + port = lua_tointeger(L, 2); + } else + if (!lua_isnil(L, 2)) { + lua_error_p(L, "wrong type of second parameter (port number)"); + } + } + + endpoint_flags_t flags = { 0 }; + if (port == KR_DNS_TLS_PORT) { + flags.tls = true; + } else if (port == KR_DNS_DOH_PORT) { + flags.http = flags.tls = true; + } + + int16_t nic_queue = -1; + if (n > 2 && !lua_isnil(L, 3)) { + if (!lua_istable(L, 3)) + lua_error_p(L, "wrong type of third parameter (table expected)"); + flags.tls = table_get_flag(L, 3, "tls", flags.tls); + flags.freebind = table_get_flag(L, 3, "freebind", false); + + lua_getfield(L, 3, "kind"); + const char *k = lua_tostring(L, -1); + if (k && strcasecmp(k, "dns") == 0) { + flags.tls = flags.http = false; + } else if (k && strcasecmp(k, "xdp") == 0) { + flags.tls = flags.http = false; + flags.xdp = true; + } else if (k && strcasecmp(k, "tls") == 0) { + flags.tls = true; + flags.http = false; + } else if (k && strcasecmp(k, "doh2") == 0) { + flags.tls = flags.http = true; + } else if (k) { + flags.kind = k; + if (strcasecmp(k, "doh") == 0) { + lua_error_p(L, "kind=\"doh\" was renamed to kind=\"doh_legacy\", switch to the new implementation with kind=\"doh2\" or update your config"); + } + } + + lua_getfield(L, 3, "nic_queue"); + if (lua_isnumber(L, -1)) { + if (flags.xdp) { + nic_queue = lua_tointeger(L, -1); + } else { + lua_error_p(L, "nic_queue only supported with kind = 'xdp'"); + } + } else if (!lua_isnil(L, -1)) { + lua_error_p(L, "wrong value of nic_queue (integer expected)"); + } + } + + /* Memory management of `kind` string is difficult due to longjmp etc. + * Pop will unreference the lua value, so we store it on C stack instead (!) */ + const int kind_alen = flags.kind ? strlen(flags.kind) + 1 : 1 /* 0 length isn't C standard */; + char kind_buf[kind_alen]; + if (flags.kind) { + memcpy(kind_buf, flags.kind, kind_alen); + flags.kind = kind_buf; + } + + /* Now focus on the first argument. */ + lua_settop(L, 1); + if (!net_listen_addrs(L, port, flags, nic_queue)) + lua_error_p(L, "net.listen() failed to bind"); + lua_pushboolean(L, true); + return 1; +} + +/** Prints the specified `data` into the specified `dst` buffer. */ +static char *proxy_data_to_string(int af, const struct net_proxy_data *data, + char *dst, size_t size) +{ + kr_assert(size >= PROXY_DATA_STRLEN); + const void *in_addr = (af == AF_INET) + ? (void *) &data->addr.ip4 + : (void *) &data->addr.ip6; + char *cur = dst; + + const char *ret = inet_ntop(af, in_addr, cur, size); + if (!ret) + return NULL; + + cur += strlen(cur); /*< advance cursor to after the address */ + *(cur++) = '/'; + int masklen = snprintf(cur, 3 + 1, "%u", data->netmask); + cur[masklen] = '\0'; + return dst; +} + +/** Put all IP addresses from `trie` into the table at the top of the Lua stack. + * For each address, increment the integer at `i`. All addresses in `trie` must + * be from the specified `family`. */ +static void net_proxy_addr_put(lua_State *L, int family, trie_t *trie, int *i) +{ + char addrbuf[PROXY_DATA_STRLEN]; + const char *addr; + trie_it_t *it; + for (it = trie_it_begin(trie); !trie_it_finished(it); trie_it_next(it)) { + lua_pushinteger(L, *i); + struct net_proxy_data *data = *trie_it_val(it); + addr = proxy_data_to_string(family, data, + addrbuf, sizeof(addrbuf)); + lua_pushstring(L, addr); + lua_settable(L, -3); + *i += 1; + } + trie_it_free(it); +} + +/** Allow PROXYv2 headers for IP address. */ +static int net_proxy_allowed(lua_State *L) +{ + struct network *net = &the_worker->engine->net; + int n = lua_gettop(L); + int i = 1; + const char *addr; + + /* Return current state */ + if (n == 0) { + lua_newtable(L); + i = 1; + + if (net->proxy_all4) { + lua_pushinteger(L, i); + lua_pushstring(L, "0.0.0.0/0"); + lua_settable(L, -3); + i += 1; + } else { + net_proxy_addr_put(L, AF_INET, net->proxy_addrs4, &i); + } + + if (net->proxy_all6) { + lua_pushinteger(L, i); + lua_pushstring(L, "::/0"); + lua_settable(L, -3); + i += 1; + } else { + net_proxy_addr_put(L, AF_INET6, net->proxy_addrs6, &i); + } + + return 1; + } + + if (n != 1) + lua_error_p(L, "net.proxy_allowed() takes one parameter (string or table)"); + + if (!lua_istable(L, 1) && !lua_isstring(L, 1)) + lua_error_p(L, "net.proxy_allowed() argument must be string or table"); + + /* Reset allowed proxy addresses */ + network_proxy_reset(net); + + /* Add new proxy addresses */ + if (lua_istable(L, 1)) { + for (i = 1; !lua_isnil(L, -1); i++) { + lua_pushinteger(L, i); + lua_gettable(L, 1); + if (lua_isnil(L, -1)) /* missing value - end iteration */ + break; + if (!lua_isstring(L, -1)) + lua_error_p(L, "net.proxy_allowed() argument may only contain strings"); + addr = lua_tostring(L, -1); + int ret = network_proxy_allow(net, addr); + if (ret) + lua_error_p(L, "invalid argument"); + } + } else if (lua_isstring(L, 1)) { + addr = lua_tostring(L, 1); + int ret = network_proxy_allow(net, addr); + if (ret) + lua_error_p(L, "invalid argument"); + } + + return 0; +} + +/** Close endpoint. */ +static int net_close(lua_State *L) +{ + /* Check parameters */ + const int n = lua_gettop(L); + bool ok = (n == 1 || n == 2) && lua_isstring(L, 1); + const char *addr = lua_tostring(L, 1); + int port; + if (ok && (n < 2 || lua_isnil(L, 2))) { + port = -1; + } else if (ok) { + ok = lua_isnumber(L, 2); + port = lua_tointeger(L, 2); + ok = ok && port >= 0 && port <= 65535; + } + if (!ok) + lua_error_p(L, "expected 'close(string addr, [number port])'"); + + int ret = network_close(&the_worker->engine->net, addr, port); + lua_pushboolean(L, ret == 0); + return 1; +} + +/** List available interfaces. */ +static int net_interfaces(lua_State *L) +{ + /* Retrieve interface list */ + int count = 0; + char buf[INET6_ADDRSTRLEN]; /* https://tools.ietf.org/html/rfc4291 */ + uv_interface_address_t *info = NULL; + uv_interface_addresses(&info, &count); + lua_newtable(L); + for (int i = 0; i < count; ++i) { + uv_interface_address_t iface = info[i]; + lua_getfield(L, -1, iface.name); + if (lua_isnil(L, -1)) { + lua_pop(L, 1); + lua_newtable(L); + } + + /* Address */ + lua_getfield(L, -1, "addr"); + if (lua_isnil(L, -1)) { + lua_pop(L, 1); + lua_newtable(L); + } + if (iface.address.address4.sin_family == AF_INET) { + uv_ip4_name(&iface.address.address4, buf, sizeof(buf)); + } else if (iface.address.address4.sin_family == AF_INET6) { + uv_ip6_name(&iface.address.address6, buf, sizeof(buf)); + } else { + buf[0] = '\0'; + } + + if (kr_sockaddr_link_local((struct sockaddr *) &iface.address)) { + /* Link-local IPv6: add %interface prefix */ + auto_free char *str = NULL; + int ret = asprintf(&str, "%s%%%s", buf, iface.name); + kr_assert(ret > 0); + lua_pushstring(L, str); + } else { + lua_pushstring(L, buf); + } + + lua_rawseti(L, -2, lua_objlen(L, -2) + 1); + lua_setfield(L, -2, "addr"); + + /* Hardware address. */ + char *p = buf; + for (int k = 0; k < sizeof(iface.phys_addr); ++k) { + sprintf(p, "%.2x:", (uint8_t)iface.phys_addr[k]); + p += 3; + } + p[-1] = '\0'; + lua_pushstring(L, buf); + lua_setfield(L, -2, "mac"); + + /* Push table */ + lua_setfield(L, -2, iface.name); + } + uv_free_interface_addresses(info, count); + + return 1; +} + +/** Set UDP maximum payload size. */ +static int net_bufsize(lua_State *L) +{ + struct kr_context *ctx = &the_worker->engine->resolver; + const int argc = lua_gettop(L); + if (argc == 0) { + lua_pushinteger(L, knot_edns_get_payload(ctx->downstream_opt_rr)); + lua_pushinteger(L, knot_edns_get_payload(ctx->upstream_opt_rr)); + return 2; + } + + if (argc == 1) { + int bufsize = lua_tointeger(L, 1); + if (bufsize < 512 || bufsize > UINT16_MAX) + lua_error_p(L, "bufsize must be within <512, " STR(UINT16_MAX) ">"); + knot_edns_set_payload(ctx->downstream_opt_rr, (uint16_t)bufsize); + knot_edns_set_payload(ctx->upstream_opt_rr, (uint16_t)bufsize); + } else if (argc == 2) { + int bufsize_downstream = lua_tointeger(L, 1); + int bufsize_upstream = lua_tointeger(L, 2); + if (bufsize_downstream < 512 || bufsize_upstream < 512 + || bufsize_downstream > UINT16_MAX || bufsize_upstream > UINT16_MAX) { + lua_error_p(L, "bufsize must be within <512, " STR(UINT16_MAX) ">"); + } + knot_edns_set_payload(ctx->downstream_opt_rr, (uint16_t)bufsize_downstream); + knot_edns_set_payload(ctx->upstream_opt_rr, (uint16_t)bufsize_upstream); + } + return 0; +} + +/** Set TCP pipelining size. */ +static int net_pipeline(lua_State *L) +{ + struct worker_ctx *worker = the_worker; + if (!worker) { + return 0; + } + if (!lua_isnumber(L, 1)) { + lua_pushinteger(L, worker->tcp_pipeline_max); + return 1; + } + int len = lua_tointeger(L, 1); + if (len < 0 || len > UINT16_MAX) + lua_error_p(L, "tcp_pipeline must be within <0, " STR(UINT16_MAX) ">"); + worker->tcp_pipeline_max = len; + lua_pushinteger(L, len); + return 1; +} + +static int net_tls(lua_State *L) +{ + struct network *net = &the_worker->engine->net; + if (!net) { + return 0; + } + + /* Only return current credentials. */ + if (lua_gettop(L) == 0) { + /* No credentials configured yet. */ + if (!net->tls_credentials) { + return 0; + } + lua_newtable(L); + lua_pushstring(L, net->tls_credentials->tls_cert); + lua_setfield(L, -2, "cert_file"); + lua_pushstring(L, net->tls_credentials->tls_key); + lua_setfield(L, -2, "key_file"); + return 1; + } + + if ((lua_gettop(L) != 2) || !lua_isstring(L, 1) || !lua_isstring(L, 2)) + lua_error_p(L, "net.tls takes two parameters: (\"cert_file\", \"key_file\")"); + + int r = tls_certificate_set(net, lua_tostring(L, 1), lua_tostring(L, 2)); + lua_error_maybe(L, r); + + lua_pushboolean(L, true); + return 1; +} + +/** Configure HTTP headers for DoH requests. */ +static int net_doh_headers(lua_State *L) +{ + doh_headerlist_t *headers = &the_worker->doh_qry_headers; + int i; + const char *name; + + /* Only return current configuration. */ + if (lua_gettop(L) == 0) { + lua_newtable(L); + for (i = 0; i < headers->len; i++) { + lua_pushinteger(L, i + 1); + name = headers->at[i]; + lua_pushlstring(L, name, strlen(name)); + lua_settable(L, -3); + } + return 1; + } + + if (lua_gettop(L) != 1) + lua_error_p(L, "net.doh_headers() takes one parameter (string or table)"); + + if (!lua_istable(L, 1) && !lua_isstring(L, 1)) + lua_error_p(L, "net.doh_headers() argument must be string or table"); + + /* Clear existing headers. */ + for (i = 0; i < headers->len; i++) + free((void *)headers->at[i]); + array_clear(*headers); + + if (lua_istable(L, 1)) { + for (i = 1; !lua_isnil(L, -1); i++) { + lua_pushinteger(L, i); + lua_gettable(L, 1); + if (lua_isnil(L, -1)) /* missing value - end iteration */ + break; + if (!lua_isstring(L, -1)) + lua_error_p(L, "net.doh_headers() argument table can only contain strings"); + name = lua_tostring(L, -1); + array_push(*headers, strdup(name)); + } + } else if (lua_isstring(L, 1)) { + name = lua_tostring(L, 1); + array_push(*headers, strdup(name)); + } + + return 0; +} + +/** Return a lua table with TLS authentication parameters. + * The format is the same as passed to policy.TLS_FORWARD(); + * more precisely, it's in a compatible canonical form. */ +static int tls_params2lua(lua_State *L, trie_t *params) +{ + lua_newtable(L); + if (!params) /* Allowed special case. */ + return 1; + trie_it_t *it; + size_t list_index = 0; + for (it = trie_it_begin(params); !trie_it_finished(it); trie_it_next(it)) { + /* Prepare table for the current address + * and its index in the returned list. */ + lua_pushinteger(L, ++list_index); + lua_createtable(L, 0, 2); + + /* Get the "addr#port" string... */ + size_t ia_len; + const char *key = trie_it_key(it, &ia_len); + int af = AF_UNSPEC; + if (ia_len == 2 + sizeof(struct in_addr)) { + af = AF_INET; + } else if (ia_len == 2 + sizeof(struct in6_addr)) { + af = AF_INET6; + } + if (kr_fails_assert(key && af != AF_UNSPEC)) + lua_error_p(L, "internal error: bad IP address"); + uint16_t port; + memcpy(&port, key, sizeof(port)); + port = ntohs(port); + const char *ia = key + sizeof(port); + char str[INET6_ADDRSTRLEN + 1 + 5 + 1]; + size_t len = sizeof(str); + if (kr_fails_assert(kr_ntop_str(af, ia, port, str, &len) == kr_ok())) + lua_error_p(L, "internal error: bad IP address conversion"); + /* ...and push it as [1]. */ + lua_pushinteger(L, 1); + lua_pushlstring(L, str, len - 1 /* len includes '\0' */); + lua_settable(L, -3); + + const tls_client_param_t *e = *trie_it_val(it); + if (kr_fails_assert(e)) + lua_error_p(L, "internal problem - NULL entry for %s", str); + + /* .hostname = */ + if (e->hostname) { + lua_pushstring(L, e->hostname); + lua_setfield(L, -2, "hostname"); + } + + /* .ca_files = */ + if (e->ca_files.len) { + lua_createtable(L, e->ca_files.len, 0); + for (size_t i = 0; i < e->ca_files.len; ++i) { + lua_pushinteger(L, i + 1); + lua_pushstring(L, e->ca_files.at[i]); + lua_settable(L, -3); + } + lua_setfield(L, -2, "ca_files"); + } + + /* .pin_sha256 = ... ; keep sane indentation via goto. */ + if (!e->pins.len) goto no_pins; + lua_createtable(L, e->pins.len, 0); + for (size_t i = 0; i < e->pins.len; ++i) { + uint8_t pin_base64[TLS_SHA256_BASE64_BUFLEN]; + int err = kr_base64_encode(e->pins.at[i], TLS_SHA256_RAW_LEN, + pin_base64, sizeof(pin_base64)); + if (kr_fails_assert(err >= 0)) + lua_error_p(L, + "internal problem when converting pin_sha256: %s", + kr_strerror(err)); + lua_pushinteger(L, i + 1); + lua_pushlstring(L, (const char *)pin_base64, err); + /* pin_base64 isn't 0-terminated ^^^ */ + lua_settable(L, -3); + } + lua_setfield(L, -2, "pin_sha256"); + + no_pins:/* .insecure = */ + if (e->insecure) { + lua_pushboolean(L, true); + lua_setfield(L, -2, "insecure"); + } + /* Now the whole table is pushed atop the returned list. */ + lua_settable(L, -3); + } + trie_it_free(it); + return 1; +} + +static inline int cmp_sha256(const void *p1, const void *p2) +{ + return memcmp(*(char * const *)p1, *(char * const *)p2, TLS_SHA256_RAW_LEN); +} +static int net_tls_client(lua_State *L) +{ + /* TODO idea: allow starting the lua table with *multiple* IP targets, + * meaning the authentication config should be applied to each. + */ + struct network *net = &the_worker->engine->net; + if (lua_gettop(L) == 0) + return tls_params2lua(L, net->tls_client_params); + /* Various basic sanity-checking. */ + if (lua_gettop(L) != 1 || !lua_istable(L, 1)) + lua_error_maybe(L, EINVAL); + /* check that only allowed keys are present */ + { + const char *bad_key = lua_table_checkindices(L, (const char *[]) + { "1", "hostname", "ca_file", "pin_sha256", "insecure", NULL }); + if (bad_key) + lua_error_p(L, "found unexpected key '%s'", bad_key); + } + + /**** Phase 1: get the parameter into a C struct, incl. parse of CA files, + * regardless of the address-pair having an entry already. */ + + tls_client_param_t *newcfg = tls_client_param_new(); + if (!newcfg) + lua_error_p(L, "out of memory or something like that :-/"); + /* Shortcut for cleanup actions needed from now on. */ + #define ERROR(...) do { \ + free(newcfg); \ + lua_error_p(L, __VA_ARGS__); \ + } while (false) + + /* .hostname - always accepted. */ + lua_getfield(L, 1, "hostname"); + if (!lua_isnil(L, -1)) { + const char *hn_str = lua_tostring(L, -1); + /* Convert to lower-case dname and back, for checking etc. */ + knot_dname_t dname[KNOT_DNAME_MAXLEN]; + if (!hn_str || !knot_dname_from_str(dname, hn_str, sizeof(dname))) + ERROR("invalid hostname"); + knot_dname_to_lower(dname); + char *h = knot_dname_to_str_alloc(dname); + if (!h) + ERROR("%s", kr_strerror(ENOMEM)); + /* Strip the final dot produced by knot_dname_*() */ + h[strlen(h) - 1] = '\0'; + newcfg->hostname = h; + } + lua_pop(L, 1); + + /* .ca_file - it can be a list of paths, contrary to the name. */ + bool has_ca_file = false; + lua_getfield(L, 1, "ca_file"); + if (!lua_isnil(L, -1)) { + if (!newcfg->hostname) + ERROR("missing hostname but specifying ca_file"); + lua_listify(L); + array_init(newcfg->ca_files); /*< placate apparently confused scan-build */ + if (array_reserve(newcfg->ca_files, lua_objlen(L, -1)) != 0) /*< optim. */ + ERROR("%s", kr_strerror(ENOMEM)); + /* Iterate over table at the top of the stack. + * http://www.lua.org/manual/5.1/manual.html#lua_next */ + for (lua_pushnil(L); lua_next(L, -2); lua_pop(L, 1)) { + has_ca_file = true; /* deferred here so that {} -> false */ + const char *ca_file = lua_tostring(L, -1); + if (!ca_file) + ERROR("ca_file contains a non-string"); + /* Let gnutls process it immediately, so garbage gets detected. */ + int ret = gnutls_certificate_set_x509_trust_file( + newcfg->credentials, ca_file, GNUTLS_X509_FMT_PEM); + if (ret < 0) { + ERROR("failed to import certificate file '%s': %s - %s\n", + ca_file, gnutls_strerror_name(ret), + gnutls_strerror(ret)); + } else { + kr_log_debug(TLSCLIENT, "imported %d certs from file '%s'\n", + ret, ca_file); + } + + ca_file = strdup(ca_file); + if (!ca_file || array_push(newcfg->ca_files, ca_file) < 0) + ERROR("%s", kr_strerror(ENOMEM)); + } + /* Sort the strings for easier comparison later. */ + if (newcfg->ca_files.len) { + qsort(&newcfg->ca_files.at[0], newcfg->ca_files.len, + sizeof(newcfg->ca_files.at[0]), strcmp_p); + } + } + lua_pop(L, 1); + + /* .pin_sha256 */ + lua_getfield(L, 1, "pin_sha256"); + if (!lua_isnil(L, -1)) { + if (has_ca_file) + ERROR("mixing pin_sha256 with ca_file is not supported"); + lua_listify(L); + array_init(newcfg->pins); /*< placate apparently confused scan-build */ + if (array_reserve(newcfg->pins, lua_objlen(L, -1)) != 0) /*< optim. */ + ERROR("%s", kr_strerror(ENOMEM)); + /* Iterate over table at the top of the stack. */ + for (lua_pushnil(L); lua_next(L, -2); lua_pop(L, 1)) { + const char *pin = lua_tostring(L, -1); + if (!pin) + ERROR("pin_sha256 is not a string"); + uint8_t *pin_raw = malloc(TLS_SHA256_RAW_LEN); + /* Push the string early to simplify error processing. */ + if (kr_fails_assert(pin_raw && array_push(newcfg->pins, pin_raw) >= 0)) { + free(pin_raw); + ERROR("%s", kr_strerror(ENOMEM)); + } + int ret = kr_base64_decode((const uint8_t *)pin, strlen(pin), + pin_raw, TLS_SHA256_RAW_LEN + 8); + if (ret < 0) { + ERROR("not a valid pin_sha256: '%s' (length %d), %s\n", + pin, (int)strlen(pin), knot_strerror(ret)); + } else if (ret != TLS_SHA256_RAW_LEN) { + ERROR("not a valid pin_sha256: '%s', " + "raw length %d instead of " + STR(TLS_SHA256_RAW_LEN)"\n", + pin, ret); + } + } + /* Sort the raw strings for easier comparison later. */ + if (newcfg->pins.len) { + qsort(&newcfg->pins.at[0], newcfg->pins.len, + sizeof(newcfg->pins.at[0]), cmp_sha256); + } + } + lua_pop(L, 1); + + /* .insecure */ + lua_getfield(L, 1, "insecure"); + if (lua_isnil(L, -1)) { + if (!newcfg->hostname && !newcfg->pins.len) + ERROR("no way to authenticate and not set as insecure"); + } else if (lua_isboolean(L, -1) && lua_toboolean(L, -1)) { + newcfg->insecure = true; + if (has_ca_file || newcfg->pins.len) + ERROR("set as insecure but provided authentication config"); + } else { + ERROR("incorrect value in the 'insecure' field"); + } + lua_pop(L, 1); + + /* Init CAs from system trust store, if needed. */ + if (!newcfg->insecure && !newcfg->pins.len && !has_ca_file) { + int ret = gnutls_certificate_set_x509_system_trust(newcfg->credentials); + if (ret <= 0) { + ERROR("failed to use system CA certificate store: %s", + ret ? gnutls_strerror(ret) : kr_strerror(ENOENT)); + } else { + kr_log_debug(TLSCLIENT, "imported %d certs from system store\n", + ret); + } + } + #undef ERROR + + /**** Phase 2: deal with the C authentication "table". */ + /* Parse address and port. */ + lua_pushinteger(L, 1); + lua_gettable(L, 1); + const char *addr_str = lua_tostring(L, -1); + if (!addr_str) + lua_error_p(L, "address is not a string"); + char buf[INET6_ADDRSTRLEN + 1]; + uint16_t port = 853; + const struct sockaddr *addr = NULL; + if (kr_straddr_split(addr_str, buf, &port) == kr_ok()) + addr = kr_straddr_socket(buf, port, NULL); + /* Add newcfg into the C map, saving the original into oldcfg. */ + if (!addr) + lua_error_p(L, "address '%s' could not be converted", addr_str); + tls_client_param_t **oldcfgp = tls_client_param_getptr( + &net->tls_client_params, addr, true); + free_const(addr); + if (!oldcfgp) + lua_error_p(L, "internal error when extending tls_client_params map"); + tls_client_param_t *oldcfg = *oldcfgp; + *oldcfgp = newcfg; /* replace old config in trie with the new one */ + /* If there was no original entry, it's easy! */ + if (!oldcfg) + return 0; + + /* Check for equality (newcfg vs. oldcfg), and print a warning if not equal.*/ + const bool ok_h = (!newcfg->hostname && !oldcfg->hostname) + || (newcfg->hostname && oldcfg->hostname && strcmp(newcfg->hostname, oldcfg->hostname) == 0); + bool ok_ca = newcfg->ca_files.len == oldcfg->ca_files.len; + for (int i = 0; ok_ca && i < newcfg->ca_files.len; ++i) + ok_ca = strcmp(newcfg->ca_files.at[i], oldcfg->ca_files.at[i]) == 0; + bool ok_pins = newcfg->pins.len == oldcfg->pins.len; + for (int i = 0; ok_pins && i < newcfg->pins.len; ++i) + ok_ca = memcmp(newcfg->pins.at[i], oldcfg->pins.at[i], TLS_SHA256_RAW_LEN) == 0; + const bool ok_insecure = newcfg->insecure == oldcfg->insecure; + if (!(ok_h && ok_ca && ok_pins && ok_insecure)) { + kr_log_warning(TLSCLIENT, + "warning: re-defining TLS authentication parameters for %s\n", + addr_str); + } + tls_client_param_unref(oldcfg); + return 0; +} + +int net_tls_client_clear(lua_State *L) +{ + /* One parameter: address -> convert it to a struct sockaddr. */ + if (lua_gettop(L) != 1 || !lua_isstring(L, 1)) + lua_error_p(L, "net.tls_client_clear() requires one parameter (\"address\")"); + const char *addr_str = lua_tostring(L, 1); + char buf[INET6_ADDRSTRLEN + 1]; + uint16_t port = 853; + const struct sockaddr *addr = NULL; + if (kr_straddr_split(addr_str, buf, &port) == kr_ok()) + addr = kr_straddr_socket(buf, port, NULL); + if (!addr) + lua_error_p(L, "invalid IP address"); + /* Do the actual removal. */ + struct network *net = &the_worker->engine->net; + int r = tls_client_param_remove(net->tls_client_params, addr); + free_const(addr); + lua_error_maybe(L, r); + lua_pushboolean(L, true); + return 1; +} + +static int net_tls_padding(lua_State *L) +{ + struct kr_context *ctx = &the_worker->engine->resolver; + + /* Only return current padding. */ + if (lua_gettop(L) == 0) { + if (ctx->tls_padding < 0) { + lua_pushboolean(L, true); + return 1; + } else if (ctx->tls_padding == 0) { + lua_pushboolean(L, false); + return 1; + } + lua_pushinteger(L, ctx->tls_padding); + return 1; + } + + const char *errstr = "net.tls_padding parameter has to be true, false," + " or a number between <0, " STR(MAX_TLS_PADDING) ">"; + if (lua_gettop(L) != 1) + lua_error_p(L, "%s", errstr); + if (lua_isboolean(L, 1)) { + bool x = lua_toboolean(L, 1); + if (x) { + ctx->tls_padding = -1; + } else { + ctx->tls_padding = 0; + } + } else if (lua_isnumber(L, 1)) { + int padding = lua_tointeger(L, 1); + if ((padding < 0) || (padding > MAX_TLS_PADDING)) + lua_error_p(L, "%s", errstr); + ctx->tls_padding = padding; + } else { + lua_error_p(L, "%s", errstr); + } + lua_pushboolean(L, true); + return 1; +} + +/** Shorter salt can't contain much entropy. */ +#define net_tls_sticket_MIN_SECRET_LEN 32 + +static int net_tls_sticket_secret_string(lua_State *L) +{ + struct network *net = &the_worker->engine->net; + + size_t secret_len; + const char *secret; + + if (lua_gettop(L) == 0) { + /* Zero-length secret, implying random key. */ + secret_len = 0; + secret = NULL; + } else { + if (lua_gettop(L) != 1 || !lua_isstring(L, 1)) { + lua_error_p(L, + "net.tls_sticket_secret takes one parameter: (\"secret string\")"); + } + secret = lua_tolstring(L, 1, &secret_len); + if (secret_len < net_tls_sticket_MIN_SECRET_LEN || !secret) { + lua_error_p(L, "net.tls_sticket_secret - the secret is shorter than " + STR(net_tls_sticket_MIN_SECRET_LEN) " bytes"); + } + } + + tls_session_ticket_ctx_destroy(net->tls_session_ticket_ctx); + net->tls_session_ticket_ctx = + tls_session_ticket_ctx_create(net->loop, secret, secret_len); + if (net->tls_session_ticket_ctx == NULL) { + lua_error_p(L, + "net.tls_sticket_secret_string - can't create session ticket context"); + } + + lua_pushboolean(L, true); + return 1; +} + +static int net_tls_sticket_secret_file(lua_State *L) +{ + if (lua_gettop(L) != 1 || !lua_isstring(L, 1)) { + lua_error_p(L, + "net.tls_sticket_secret_file takes one parameter: (\"file name\")"); + } + + const char *file_name = lua_tostring(L, 1); + if (strlen(file_name) == 0) + lua_error_p(L, "net.tls_sticket_secret_file - empty file name"); + + FILE *fp = fopen(file_name, "r"); + if (fp == NULL) { + lua_error_p(L, "net.tls_sticket_secret_file - can't open file '%s': %s", + file_name, strerror(errno)); + } + + char secret_buf[TLS_SESSION_TICKET_SECRET_MAX_LEN]; + const size_t secret_len = fread(secret_buf, 1, sizeof(secret_buf), fp); + int err = ferror(fp); + if (err) { + lua_error_p(L, + "net.tls_sticket_secret_file - error reading from file '%s': %s", + file_name, strerror(err)); + } + if (secret_len < net_tls_sticket_MIN_SECRET_LEN) { + lua_error_p(L, + "net.tls_sticket_secret_file - file '%s' is shorter than " + STR(net_tls_sticket_MIN_SECRET_LEN) " bytes", + file_name); + } + fclose(fp); + + struct network *net = &the_worker->engine->net; + + tls_session_ticket_ctx_destroy(net->tls_session_ticket_ctx); + net->tls_session_ticket_ctx = + tls_session_ticket_ctx_create(net->loop, secret_buf, secret_len); + if (net->tls_session_ticket_ctx == NULL) { + lua_error_p(L, + "net.tls_sticket_secret_file - can't create session ticket context"); + } + lua_pushboolean(L, true); + return 1; +} + +static int net_outgoing(lua_State *L, int family) +{ + union kr_sockaddr *addr; + if (family == AF_INET) + addr = (union kr_sockaddr*)&the_worker->out_addr4; + else + addr = (union kr_sockaddr*)&the_worker->out_addr6; + + if (lua_gettop(L) == 0) { /* Return the current value. */ + if (addr->ip.sa_family == AF_UNSPEC) { + lua_pushnil(L); + return 1; + } + if (kr_fails_assert(addr->ip.sa_family == family)) + lua_error_p(L, "bad address family"); + char addr_buf[INET6_ADDRSTRLEN]; + int err; + if (family == AF_INET) + err = uv_ip4_name(&addr->ip4, addr_buf, sizeof(addr_buf)); + else + err = uv_ip6_name(&addr->ip6, addr_buf, sizeof(addr_buf)); + lua_error_maybe(L, err); + lua_pushstring(L, addr_buf); + return 1; + } + + if ((lua_gettop(L) != 1) || (!lua_isstring(L, 1) && !lua_isnil(L, 1))) + lua_error_p(L, "net.outgoing_vX takes one address string parameter or nil"); + + if (lua_isnil(L, 1)) { + addr->ip.sa_family = AF_UNSPEC; + return 1; + } + + const char *addr_str = lua_tostring(L, 1); + int err; + if (family == AF_INET) + err = uv_ip4_addr(addr_str, 0, &addr->ip4); + else + err = uv_ip6_addr(addr_str, 0, &addr->ip6); + if (err) + lua_error_p(L, "net.outgoing_vX: failed to parse the address"); + lua_pushboolean(L, true); + return 1; +} + +static int net_outgoing_v4(lua_State *L) { return net_outgoing(L, AF_INET); } +static int net_outgoing_v6(lua_State *L) { return net_outgoing(L, AF_INET6); } + +static int net_update_timeout(lua_State *L, uint64_t *timeout, const char *name) +{ + /* Only return current idle timeout. */ + if (lua_gettop(L) == 0) { + lua_pushinteger(L, *timeout); + return 1; + } + + if ((lua_gettop(L) != 1)) + lua_error_p(L, "%s takes one parameter: (\"idle timeout\")", name); + + if (lua_isnumber(L, 1)) { + int idle_timeout = lua_tointeger(L, 1); + if (idle_timeout <= 0) + lua_error_p(L, "%s parameter has to be positive number", name); + *timeout = idle_timeout; + } else { + lua_error_p(L, "%s parameter has to be positive number", name); + } + lua_pushboolean(L, true); + return 1; +} + +static int net_tcp_in_idle(lua_State *L) +{ + struct network *net = &the_worker->engine->net; + return net_update_timeout(L, &net->tcp.in_idle_timeout, "net.tcp_in_idle"); +} + +static int net_tls_handshake_timeout(lua_State *L) +{ + struct network *net = &the_worker->engine->net; + return net_update_timeout(L, &net->tcp.tls_handshake_timeout, "net.tls_handshake_timeout"); +} + +static int net_bpf_set(lua_State *L) +{ + if (lua_gettop(L) != 1 || !lua_isnumber(L, 1)) { + lua_error_p(L, "net.bpf_set(fd) takes one parameter:" + " the open file descriptor of a loaded BPF program"); + } + +#if __linux__ + + int progfd = lua_tointeger(L, 1); + if (progfd == 0) { + /* conversion error despite that fact + * that lua_isnumber(L, 1) has returned true. + * Real or stdin? */ + lua_error_p(L, "failed to convert parameter"); + } + lua_pop(L, 1); + + if (network_set_bpf(&the_worker->engine->net, progfd) == 0) { + lua_error_p(L, "failed to attach BPF program to some networks: %s", + kr_strerror(errno)); + } + + lua_pushboolean(L, 1); + return 1; + +#endif + lua_error_p(L, "BPF is not supported on this operating system"); +} + +static int net_bpf_clear(lua_State *L) +{ + if (lua_gettop(L) != 0) + lua_error_p(L, "net.bpf_clear() does not take any parameters"); + +#if __linux__ + + network_clear_bpf(&the_worker->engine->net); + + lua_pushboolean(L, 1); + return 1; + +#endif + lua_error_p(L, "BPF is not supported on this operating system"); +} + +static int net_register_endpoint_kind(lua_State *L) +{ + const int param_count = lua_gettop(L); + if (param_count != 1 && param_count != 2) + lua_error_p(L, "expected one or two parameters"); + if (!lua_isstring(L, 1)) { + lua_error_p(L, "incorrect kind '%s'", lua_tostring(L, 1)); + } + size_t kind_len; + const char *kind = lua_tolstring(L, 1, &kind_len); + struct network *net = &the_worker->engine->net; + + /* Unregistering */ + if (param_count == 1) { + void *val; + if (trie_del(net->endpoint_kinds, kind, kind_len, &val) == KNOT_EOK) { + const int fun_id = (char *)val - (char *)NULL; + luaL_unref(L, LUA_REGISTRYINDEX, fun_id); + return 0; + } + lua_error_p(L, "attempt to unregister unknown kind '%s'\n", kind); + } /* else -> param_count == 2 */ + + /* Registering */ + if (!lua_isfunction(L, 2)) { + lua_error_p(L, "second parameter: expected function but got %s\n", + lua_typename(L, lua_type(L, 2))); + } + const int fun_id = luaL_ref(L, LUA_REGISTRYINDEX); + /* ^^ The function is on top of the stack, incidentally. */ + void **pp = trie_get_ins(net->endpoint_kinds, kind, kind_len); + if (!pp) lua_error_maybe(L, kr_error(ENOMEM)); + if (*pp != NULL || !strcasecmp(kind, "dns") || !strcasecmp(kind, "tls")) + lua_error_p(L, "attempt to register known kind '%s'\n", kind); + *pp = (char *)NULL + fun_id; + /* We don't attempt to engage corresponding endpoints now. + * That's the job for network_engage_endpoints() later. */ + return 0; +} + +int kr_bindings_net(lua_State *L) +{ + static const luaL_Reg lib[] = { + { "list", net_list }, + { "listen", net_listen }, + { "proxy_allowed", net_proxy_allowed }, + { "close", net_close }, + { "interfaces", net_interfaces }, + { "bufsize", net_bufsize }, + { "tcp_pipeline", net_pipeline }, + { "tls", net_tls }, + { "tls_server", net_tls }, + { "tls_client", net_tls_client }, + { "tls_client_clear", net_tls_client_clear }, + { "tls_padding", net_tls_padding }, + { "tls_sticket_secret", net_tls_sticket_secret_string }, + { "tls_sticket_secret_file", net_tls_sticket_secret_file }, + { "outgoing_v4", net_outgoing_v4 }, + { "outgoing_v6", net_outgoing_v6 }, + { "tcp_in_idle", net_tcp_in_idle }, + { "tls_handshake_timeout", net_tls_handshake_timeout }, + { "bpf_set", net_bpf_set }, + { "bpf_clear", net_bpf_clear }, + { "register_endpoint_kind", net_register_endpoint_kind }, + { "doh_headers", net_doh_headers }, + { NULL, NULL } + }; + luaL_register(L, "net", lib); + return 1; +} + diff --git a/daemon/bindings/net_client.rst b/daemon/bindings/net_client.rst new file mode 100644 index 0000000..34e6236 --- /dev/null +++ b/daemon/bindings/net_client.rst @@ -0,0 +1,34 @@ +.. SPDX-License-Identifier: GPL-3.0-or-later + +IPv4 and IPv6 usage +------------------- + +Following settings affect client part of the resolver, +i.e. communication between the resolver itself and other DNS servers. + +IPv4 and IPv6 protocols are used by default. For performance reasons it is +recommended to explicitly disable protocols which are not available +on your system, though the impact of IPv6 outage is lowered since release 5.3.0. + +.. envvar:: net.ipv4 = true|false + + :return: boolean (default: true) + + Enable/disable using IPv4 for contacting upstream nameservers. + +.. envvar:: net.ipv6 = true|false + + :return: boolean (default: true) + + Enable/disable using IPv6 for contacting upstream nameservers. + +.. function:: net.outgoing_v4([string address]) + + Get/set the IPv4 address used to perform queries. + The default is ``nil``, which lets the OS choose any address. + +.. function:: net.outgoing_v6([string address]) + + Get/set the IPv6 address used to perform queries. + The default is ``nil``, which lets the OS choose any address. + diff --git a/daemon/bindings/net_dns_tweaks.rst b/daemon/bindings/net_dns_tweaks.rst new file mode 100644 index 0000000..4cfeba6 --- /dev/null +++ b/daemon/bindings/net_dns_tweaks.rst @@ -0,0 +1,35 @@ +.. SPDX-License-Identifier: GPL-3.0-or-later + +DNS protocol tweaks +------------------- + +Following settings change low-level details of DNS protocol implementation. +Default values should not be changed except for very special cases. + +.. function:: net.bufsize([udp_downstream_bufsize][, udp_upstream_bufsize]) + + Get/set maximum EDNS payload size advertised in DNS packets. Different values can be configured for communication downstream (towards clients) and upstream (towards other DNS servers). Set and also get operations use values in this order. + + Default is 1232 bytes which was chosen to minimize risk of `issues caused by IP fragmentation <https://blog.apnic.net/2019/07/12/its-time-to-consider-avoiding-ip-fragmentation-in-the-dns/>`_. Further details can be found at `DNS Flag Day 2020 <https://www.dnsflagday.net/2020/>`_ web site. + + Minimal value allowed by standard :rfc:`6891` is 512 bytes, which is equal to DNS packet size without Extension Mechanisms for DNS. Value 1220 bytes is minimum size required by DNSSEC standard :rfc:`4035`. + + Example output: + + .. code-block:: lua + + -- set downstream and upstream bufsize to value 4096 + > net.bufsize(4096) + -- get configured downstream and upstream bufsizes, respectively + > net.bufsize() + 4096 -- result # 1 + 4096 -- result # 2 + + -- set downstream bufsize to 4096 and upstream bufsize to 1232 + > net.bufsize(4096, 1232) + -- get configured downstream and upstream bufsizes, respectively + > net.bufsize() + 4096 -- result # 1 + 1232 -- result # 2 + +.. include:: ../modules/workarounds/README.rst diff --git a/daemon/bindings/net_server.rst b/daemon/bindings/net_server.rst new file mode 100644 index 0000000..f346aeb --- /dev/null +++ b/daemon/bindings/net_server.rst @@ -0,0 +1,225 @@ +.. SPDX-License-Identifier: GPL-3.0-or-later + +Addresses and services +---------------------- + +Addresses, ports, protocols, and API calls available for clients communicating +with resolver are configured using :func:`net.listen`. + +First you need to decide what service should be available on given IP address ++ port combination. + +.. csv-table:: + :header: "Protocol/service", "net.listen *kind*" + + "DNS (unencrypted UDP+TCP, :rfc:`1034`)","``dns``" + "DNS (unencrypted UDP, :ref:`using XDP Linux API <dns-over-xdp>`)","``xdp``" + ":ref:`dns-over-tls`","``tls``" + ":ref:`dns-over-https`","``doh2``" + ":ref:`Web management <mod-http-built-in-services>`","``webmgmt``" + ":ref:`Control socket <control-sockets>`","``control``" + ":ref:`mod-http-doh`","``doh_legacy``" + +.. note:: By default, **unencrypted DNS and DNS-over-TLS** are configured to **listen + on localhost**. + + Control sockets are created either in + ``/run/knot-resolver/control/`` (when using systemd) or ``$PWD/control/``. + +.. function:: net.listen(addresses, [port = 53, { kind = 'dns', freebind = false }]) + + :return: ``true`` if port is bound, an error otherwise + + Listen on addresses; port and flags are optional. + The addresses can be specified as a string or device. + Port 853 implies ``kind = 'tls'`` but it is always better to be explicit. + Freebind allows binding to a non-local or not yet available address. + +.. csv-table:: + :header: "**Network protocol**", "**Configuration command**" + + "DNS (UDP+TCP, :rfc:`1034`)","``net.listen('192.0.2.123', 53)``" + "DNS (UDP, :ref:`using XDP <dns-over-xdp>`)","``net.listen('192.0.2.123', 53, { kind = 'xdp' })``" + ":ref:`dns-over-tls`","``net.listen('192.0.2.123', 853, { kind = 'tls' })``" + ":ref:`dns-over-https`","``net.listen('192.0.2.123', 443, { kind = 'doh2' })``" + ":ref:`Web management <mod-http-built-in-services>`","``net.listen('192.0.2.123', 8453, { kind = 'webmgmt' })``" + ":ref:`Control socket <control-sockets>`","``net.listen('/tmp/kres.control', nil, { kind = 'control' })``" + + +Examples: + + .. code-block:: lua + + net.listen('::1') + net.listen(net.lo, 53) + net.listen(net.eth0, 853, { kind = 'tls' }) + net.listen('192.0.2.1', 53, { freebind = true }) + net.listen({'127.0.0.1', '::1'}, 53, { kind = 'dns' }) + net.listen('::', 443, { kind = 'doh2' }) + net.listen('::', 8453, { kind = 'webmgmt' }) -- see http module + net.listen('/tmp/kresd-socket', nil, { kind = 'webmgmt' }) -- http module supports AF_UNIX + net.listen('eth0', 53, { kind = 'xdp' }) + net.listen('192.0.2.123', 53, { kind = 'xdp', nic_queue = 0 }) + +.. warning:: On machines with multiple IP addresses avoid listening on wildcards + ``0.0.0.0`` or ``::``. Knot Resolver could answer from different IP + addresses if the network address ranges overlap, + and clients would probably refuse such a response. + +.. _proxyv2: + +PROXYv2 protocol +^^^^^^^^^^^^^^^^ + +Knot Resolver supports proxies that utilize the `PROXYv2 protocol <https://www.haproxy.org/download/2.5/doc/proxy-protocol.txt>`_ +to identify clients. + +A PROXY header contains the IP address of the original client who sent a query. +This allows the resolver to treat queries as if they actually came from +the client's IP address rather than the address of the proxy they came through. +For example, :ref:`Views and ACLs <mod-view>` are able to work properly when +PROXYv2 is in use. + +Since allowing usage of the PROXYv2 protocol for all clients would be a security +vulnerability, because clients would then be able to spoof their IP addresses via +the PROXYv2 header, the resolver requires you to specify explicitly which clients +are allowed to send PROXYv2 headers via the :func:`net.proxy_allowed` function. + +PROXYv2 queries from clients who are not explicitly allowed to use this protocol +will be discarded. + +.. function:: net.proxy_allowed([addresses]) + + Allow usage of the PROXYv2 protocol headers by clients on the specified + ``addresses``. It is possible to permit whole networks to send PROXYv2 headers + by specifying the network mask using the CIDR notation + (e.g. ``172.22.0.0/16``). IPv4 as well as IPv6 addresses are supported. + + If you wish to allow all clients to use PROXYv2 (e.g. because you have this + kind of security handled on another layer of your network infrastructure), + you can specify a netmask of ``/0``. Please note that this setting is + address-family-specific, so this needs to be applied to both IPv4 and IPv6 + separately. + + Subsequent calls to the function overwrite the effects of all previous calls. + Providing a table of strings as the function parameter allows multiple + distinct addresses to use the PROXYv2 protocol. + + When called without arguments, ``net.proxy_allowed`` returns a table of all + addresses currently allowed to use the PROXYv2 protocol and does not change + the configuration. + +Examples: + + .. code-block:: lua + + net.proxy_allowed('172.22.0.1') -- allows '172.22.0.1' specifically + net.proxy_allowed('172.18.1.0/24') -- allows everyone at '172.18.1.*' + net.proxy_allowed({ + '172.22.0.1', '172.18.1.0/24' + }) -- allows both of the above at once + net.proxy_allowed({ 'fe80::/10' } -- allows everyone at IPv6 link-local + net.proxy_allowed({ + '::/0', '0.0.0.0/0' + }) -- allows everyone + net.proxy_allowed('::/0') -- allows all IPv6 (but no IPv4) + net.proxy_allowed({}) -- prevents everyone from using PROXYv2 + net.proxy_allowed() -- returns a list of all currently allowed addresses + +Features for scripting +^^^^^^^^^^^^^^^^^^^^^^ +Following configuration functions are useful mainly for scripting or :ref:`runtime-cfg`. + +.. function:: net.close(address, [port]) + + :return: boolean (at least one endpoint closed) + + Close all endpoints listening on the specified address, optionally restricted by port as well. + + +.. function:: net.list() + + :return: Table of bound interfaces. + + Example output: + + .. code-block:: none + + [1] => { + [kind] => tls + [transport] => { + [family] => inet4 + [ip] => 127.0.0.1 + [port] => 853 + [protocol] => tcp + } + } + [2] => { + [kind] => dns + [transport] => { + [family] => inet6 + [ip] => ::1 + [port] => 53 + [protocol] => udp + } + } + [3] => { + [kind] => dns + [transport] => { + [family] => inet6 + [ip] => ::1 + [port] => 53 + [protocol] => tcp + } + } + [4] => { + [kind] => xdp + [transport] => { + [family] => inet4+inet6 + [interface] => eth2 + [nic_queue] => 0 + [port] => 53 + [protocol] => udp + } + } + +.. function:: net.interfaces() + + :return: Table of available interfaces and their addresses. + + Example output: + + .. code-block:: none + + [lo0] => { + [addr] => { + [1] => ::1 + [2] => 127.0.0.1 + } + [mac] => 00:00:00:00:00:00 + } + [eth0] => { + [addr] => { + [1] => 192.168.0.1 + } + [mac] => de:ad:be:ef:aa:bb + } + + .. tip:: You can use ``net.<iface>`` as a shortcut for specific interface, e.g. ``net.eth0`` + +.. function:: net.tcp_pipeline([len]) + + Get/set per-client TCP pipeline limit, i.e. the number of outstanding queries that a single client connection can make in parallel. Default is 100. + + .. code-block:: lua + + > net.tcp_pipeline() + 100 + > net.tcp_pipeline(50) + 50 + + .. warning:: Please note that too large limit may have negative impact on performance and can lead to increased number of SERVFAIL answers. + +.. _`dnsproxy module`: https://www.knot-dns.cz/docs/2.7/html/modules.html#dnsproxy-tiny-dns-proxy + + diff --git a/daemon/bindings/net_tlssrv.rst b/daemon/bindings/net_tlssrv.rst new file mode 100644 index 0000000..f496cd7 --- /dev/null +++ b/daemon/bindings/net_tlssrv.rst @@ -0,0 +1,188 @@ +.. SPDX-License-Identifier: GPL-3.0-or-later + +.. _tls-server-config: + +DoT and DoH (encrypted DNS) +--------------------------- + +.. warning:: + + It is important to understand **limits of encrypting only DNS traffic**. + Relevant security analysis can be found in article + *Simran Patil and Nikita Borisov. 2019. What can you learn from an IP?* + See `slides <https://irtf.org/anrw/2019/slides-anrw19-final44.pdf>`_ + or `the article itself <https://dl.acm.org/authorize?N687437>`_. + +DoT and DoH encrypt DNS traffic with Transport Layer Security (TLS) protocol +and thus protects DNS traffic from certain types of attacks. + +You can learn more about DoT and DoH and their implementation in Knot Resolver +in `this article +<https://en.blog.nic.cz/2020/11/25/encrypted-dns-in-knot-resolver-dot-and-doh/>`_. + +.. _dns-over-tls: + +DNS-over-TLS (DoT) +^^^^^^^^^^^^^^^^^^ + +DNS-over-TLS server (:rfc:`7858`) can be configured using ``tls`` kind in +:func:`net.listen()`. It is enabled on localhost by default. + +For certificate configuration, refer to :ref:`dot-doh-config-options`. + +.. _dns-over-https: + +DNS-over-HTTPS (DoH) +^^^^^^^^^^^^^^^^^^^^ + +.. note:: Knot Resolver currently offers two DoH implementations. It is + recommended to use this new implementation, which is more reliable, scalable + and has fewer dependencies. Make sure to use ``doh2`` kind in + :func:`net.listen()` to select this implementation. + +.. tip:: Independent information about political controversies around the + DoH deployment by default can be found in blog posts `DNS Privacy at IETF + 104 <http://www.potaroo.net/ispcol/2019-04/angst.html>`_ and `More DOH + <http://www.potaroo.net/ispcol/2019-04/moredoh.html>`_ by Geoff Huston and + `Centralised DoH is bad for Privacy, in 2019 and beyond + <https://labs.ripe.net/Members/bert_hubert/centralised-doh-is-bad-for-privacy-in-2019-and-beyond>`_ + by Bert Hubert. + +DNS-over-HTTPS server (:rfc:`8484`) can be configured using ``doh2`` kind in +:func:`net.listen()`. + +This implementation supports HTTP/2 (:rfc:`7540`). Queries can be sent to the +``/dns-query`` endpoint, e.g.: + +.. code-block:: bash + + $ kdig @127.0.0.1 +https www.knot-resolver.cz AAAA + +**Only TLS version 1.3 (or higher) is supported with DNS-over-HTTPS.** The +additional considerations for TLS 1.2 required by HTTP/2 are not implemented +(:rfc:`7540#section-9.2`). + +.. warning:: Take care when configuring your server to listen on well known + HTTPS port. If an unrelated HTTPS service is running on the same port with + REUSEPORT enabled, you will end up with both services malfunctioning. + +.. _dot-doh-config-options: + +HTTP status codes +""""""""""""""""" + +As specified by :rfc:`8484`, the resolver responds with status **200 OK** whenever +it can produce a valid DNS reply for a given query, even in cases where the DNS +``rcode`` indicates an error (like ``NXDOMAIN``, ``SERVFAIL``, etc.). + +For DoH queries malformed at the HTTP level, the resolver may respond with +the following status codes: + + * **400 Bad Request** for a generally malformed query, like one not containing + a valid DNS packet + * **404 Not Found** when an incorrect HTTP endpoint is queried - the only + supported ones are ``/dns-query`` and ``/doh`` + * **413 Payload Too Large** when the DNS query exceeds its maximum size + * **415 Unsupported Media Type** when the query's ``Content-Type`` header + is not ``application/dns-message`` + * **431 Request Header Fields Too Large** when a header in the query is too + large to process + * **501 Not Implemented** when the query uses a method other than + ``GET``, ``POST``, or ``HEAD`` + +Configuration options for DoT and DoH +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. note:: These settings affect both DNS-over-TLS and DNS-over-HTTPS (except + the legacy implementation). + +A self-signed certificate is generated by default. For serious deployments +it is strongly recommended to configure your own TLS certificates signed +by a trusted CA. This is done using function :c:func:`net.tls()`. + +.. function:: net.tls([cert_path], [key_path]) + + When called with path arguments, the function loads the server TLS + certificate and private key for DoT and DoH. + + When called without arguments, the command returns the currently configured paths. + + Example output: + + .. code-block:: lua + + > net.tls("/etc/knot-resolver/server-cert.pem", "/etc/knot-resolver/server-key.pem") + > net.tls() -- print configured paths + [cert_file] => '/etc/knot-resolver/server-cert.pem' + [key_file] => '/etc/knot-resolver/server-key.pem' + + .. tip:: The certificate files aren't automatically reloaded on change. If + you update the certificate files, e.g. using ACME, you have to either + restart the service(s) or call this function again using + :ref:`control-sockets`. + +.. function:: net.tls_sticket_secret([string with pre-shared secret]) + + Set secret for TLS session resumption via tickets, by :rfc:`5077`. + + The server-side key is rotated roughly once per hour. + By default or if called without secret, the key is random. + That is good for long-term forward secrecy, but multiple kresd instances + won't be able to resume each other's sessions. + + If you provide the same secret to multiple instances, they will be able to resume + each other's sessions *without* any further communication between them. + This synchronization works only among instances having the same endianness + and time_t structure and size (`sizeof(time_t)`). + +.. _pfs: https://en.wikipedia.org/wiki/Forward_secrecy + + **For good security** the secret must have enough entropy to be hard to guess, + and it should still be occasionally rotated manually and securely forgotten, + to reduce the scope of privacy leak in case the + `secret leaks eventually <pfs_>`_. + + .. warning:: **Setting the secret is probably too risky with TLS <= 1.2 and + GnuTLS < 3.7.5**. GnuTLS 3.7.5 adds an option to disable resumption via + tickets for TLS <= 1.2, enabling them only for protocols that do guarantee + `PFS <pfs_>`_. Knot Resolver makes use of this new option when linked + against GnuTLS >= 3.7.5. + +.. function:: net.tls_sticket_secret_file([string with path to a file containing pre-shared secret]) + + The same as :func:`net.tls_sticket_secret`, + except the secret is read from a (binary) file. + +.. function:: net.tls_padding([true | false]) + + Get/set EDNS(0) padding of answers to queries that arrive over TLS + transport. If set to `true` (the default), it will use a sensible + default padding scheme, as implemented by libknot if available at + compile time. If set to a numeric value >= 2 it will pad the + answers to nearest *padding* boundary, e.g. if set to `64`, the + answer will have size of a multiple of 64 (64, 128, 192, ...). If + set to `false` (or a number < 2), it will disable padding entirely. + +Configuration options for DoH +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. function:: net.doh_headers([string or table of strings]) + + Selects the headers to be exposed. These headers and their values are + available in ``request.qsource.headers``. Comparison + is case-insensitive and pseudo-headers are supported as well. + + The following snippet can be used in the lua module to access headers + ``:method`` and ``user-agent``: + + .. code-block:: lua + + net.doh_headers({':method', 'user-agent'}) + + ... + + for i = 1, tonumber(req.qsource.headers.len) do + local name = ffi.string(req.qsource.headers.at[i - 1].name) + local value = ffi.string(req.qsource.headers.at[i - 1].value) + print(name, value) + end diff --git a/daemon/bindings/net_xdpsrv.rst b/daemon/bindings/net_xdpsrv.rst new file mode 100644 index 0000000..e3014fe --- /dev/null +++ b/daemon/bindings/net_xdpsrv.rst @@ -0,0 +1,140 @@ +.. SPDX-License-Identifier: GPL-3.0-or-later + +.. _dns-over-xdp: + +XDP for higher UDP performance +------------------------------ + +.. warning:: + As of version 5.2.0, XDP support in Knot Resolver is considered + experimental. The impact on overall throughput and performance may not + always be beneficial. + +Using XDP allows significant speedup of UDP packet processing in recent Linux kernels, +especially with some network drivers that implement good support. +The basic idea is that for selected packets the Linux networking stack is bypassed, +and some drivers can even directly use the user-space buffers for reading and writing. + +.. TODO perhaps some hint/link about how significant speedup one might get? (link to some talk video?) + +Prerequisites +^^^^^^^^^^^^^ +.. this is mostly copied from knot-dns doc/operations.rst + +.. warning:: + Bypassing the network stack has significant implications, such as bypassing the firewall + and monitoring solutions. + Make sure you're familiar with the trade-offs before using this feature. + Read more in :ref:`dns-over-xdp_limitations`. + +* Linux kernel 4.18+ (5.x+ is recommended for optimal performance) compiled with + the `CONFIG_XDP_SOCKETS=y` option. XDP isn't supported in other operating systems. +* libknot compiled with XDP support +* **A multiqueue network card with native XDP support is highly recommended**, + otherwise the performance gain will be much lower and you may encounter + issues due to XDP emulation. + Successfully tested cards: + + * Intel series 700 (driver `i40e`), maximum number of queues per interface is 64. + * Intel series 500 (driver `ixgbe`), maximum number of queues per interface is 64. + The number of CPUs available has to be at most 64! + + +Set up +^^^^^^ +.. first parts are mostly copied from knot-dns doc/operations.rst + +The server instances need additional Linux **capabilities** during startup. +(Or you could start them as `root`.) +Execute command + +.. code-block:: bash + + systemctl edit kresd@.service + +And insert these lines: + +.. code-block:: ini + + [Service] + CapabilityBoundingSet=CAP_NET_RAW CAP_NET_ADMIN CAP_SYS_ADMIN CAP_IPC_LOCK CAP_SYS_RESOURCE + AmbientCapabilities=CAP_NET_RAW CAP_NET_ADMIN CAP_SYS_ADMIN CAP_IPC_LOCK CAP_SYS_RESOURCE + +The ``CAP_SYS_RESOURCE`` is only needed on Linux < 5.11. + +.. TODO suggest some way for ethtool -L? Perhaps via systemd units? + +You want the same number of kresd instances and network **queues** on your card; +you can use ``ethtool -L`` before the services start. +With XDP this is more important than with vanilla UDP, as we only support one instance +per queue and unclaimed queues will fall back to vanilla UDP. +Ideally you can set these numbers as high as the number of CPUs that you want kresd to use. + +Modification of ``/etc/knot-resolver/kresd.conf`` may often be quite simple, for example: + +.. code-block:: lua + + net.listen('eth2', 53, { kind = 'xdp' }) + net.listen('203.0.113.53', 53, { kind = 'dns' }) + +Note that you want to also keep the vanilla DNS line to service TCP +and possibly any fallback UDP (e.g. from unclaimed queues). +XDP listening is in principle done on queues of whole network interfaces +and the target addresses of incoming packets aren't checked in any way, +but you are still allowed to specify interface by an address +(if it's unambiguous at that moment): + +.. code-block:: lua + + net.listen('203.0.113.53', 53, { kind = 'xdp' }) + net.listen('203.0.113.53', 53, { kind = 'dns' }) + +The default selection of queues is tailored for the usual naming convention: +``kresd@1.service``, ``kresd@2.service``, ... +but you can still specify them explicitly, e.g. the default is effectively the same as: + +.. code-block:: lua + + net.listen('eth2', 53, { kind = 'xdp', nic_queue = env.SYSTEMD_INSTANCE - 1 }) + + +Optimizations +^^^^^^^^^^^^^ +.. this is basically copied from knot-dns doc/operations.rst + +Some helpful commands: + +.. code-block:: text + + ethtool -N <interface> rx-flow-hash udp4 sdfn + ethtool -N <interface> rx-flow-hash udp6 sdfn + ethtool -L <interface> combined <queue-number> + ethtool -G <interface> rx <ring-size> tx <ring-size> + renice -n 19 -p $(pgrep '^ksoftirqd/[0-9]*$') + +.. TODO CPU affinities? `CPUAffinity=%i` in systemd unit sounds good. + + +.. _dns-over-xdp_limitations: + +Limitations +^^^^^^^^^^^ +.. this is basically copied from knot-dns doc/operations.rst + +* VLAN segmentation is not supported. +* MTU higher than 1792 bytes is not supported. +* Multiple BPF filters per one network device are not supported. +* Symmetrical routing is required (query source MAC/IP addresses and + reply destination MAC/IP addresses are the same). +* Systems with big-endian byte ordering require special recompilation of libknot. +* IPv4 header and UDP checksums are not verified on received DNS messages. +* DNS over XDP traffic is not visible to common system tools (e.g. firewall, tcpdump etc.). +* BPF filter is not automatically unloaded from the network device. Manual filter unload:: + + ip link set dev <interface> xdp off + +* Knot Resolver only supports using XDP towards clients currently (not towards upstreams). +* When starting up an XDP socket you may get a harmless warning:: + + libbpf: Kernel error message: XDP program already attached + diff --git a/daemon/bindings/worker.c b/daemon/bindings/worker.c new file mode 100644 index 0000000..d985000 --- /dev/null +++ b/daemon/bindings/worker.c @@ -0,0 +1,81 @@ +/* Copyright (C) CZ.NIC, z.s.p.o. <knot-resolver@labs.nic.cz> + * SPDX-License-Identifier: GPL-3.0-or-later + */ + +#include "daemon/bindings/impl.h" + + +static inline double getseconds(uv_timeval_t *tv) +{ + return (double)tv->tv_sec + 0.000001*((double)tv->tv_usec); +} + +/** Return worker statistics. */ +static int wrk_stats(lua_State *L) +{ + struct worker_ctx *worker = the_worker; + if (!worker) { + return 0; + } + lua_newtable(L); + lua_pushnumber(L, worker->stats.queries); + lua_setfield(L, -2, "queries"); + lua_pushnumber(L, worker->stats.concurrent); + lua_setfield(L, -2, "concurrent"); + lua_pushnumber(L, worker->stats.dropped); + lua_setfield(L, -2, "dropped"); + + lua_pushnumber(L, worker->stats.timeout); + lua_setfield(L, -2, "timeout"); + lua_pushnumber(L, worker->stats.udp); + lua_setfield(L, -2, "udp"); + lua_pushnumber(L, worker->stats.tcp); + lua_setfield(L, -2, "tcp"); + lua_pushnumber(L, worker->stats.tls); + lua_setfield(L, -2, "tls"); + lua_pushnumber(L, worker->stats.ipv4); + lua_setfield(L, -2, "ipv4"); + lua_pushnumber(L, worker->stats.ipv6); + lua_setfield(L, -2, "ipv6"); + lua_pushnumber(L, worker->stats.err_udp); + lua_setfield(L, -2, "err_udp"); + lua_pushnumber(L, worker->stats.err_tcp); + lua_setfield(L, -2, "err_tcp"); + lua_pushnumber(L, worker->stats.err_tls); + lua_setfield(L, -2, "err_tls"); + lua_pushnumber(L, worker->stats.err_http); + lua_setfield(L, -2, "err_http"); + + /* Add subset of rusage that represents counters. */ + uv_rusage_t rusage; + if (uv_getrusage(&rusage) == 0) { + lua_pushnumber(L, getseconds(&rusage.ru_utime)); + lua_setfield(L, -2, "usertime"); + lua_pushnumber(L, getseconds(&rusage.ru_stime)); + lua_setfield(L, -2, "systime"); + lua_pushnumber(L, rusage.ru_majflt); + lua_setfield(L, -2, "pagefaults"); + lua_pushnumber(L, rusage.ru_nswap); + lua_setfield(L, -2, "swaps"); + lua_pushnumber(L, rusage.ru_nvcsw + rusage.ru_nivcsw); + lua_setfield(L, -2, "csw"); + } + /* Get RSS */ + size_t rss = 0; + if (uv_resident_set_memory(&rss) == 0) { + lua_pushnumber(L, rss); + lua_setfield(L, -2, "rss"); + } + return 1; +} + +int kr_bindings_worker(lua_State *L) +{ + static const luaL_Reg lib[] = { + { "stats", wrk_stats }, + { NULL, NULL } + }; + luaL_register(L, "worker", lib); + return 1; +} + diff --git a/daemon/bindings/worker.rst b/daemon/bindings/worker.rst new file mode 100644 index 0000000..9dfcbe8 --- /dev/null +++ b/daemon/bindings/worker.rst @@ -0,0 +1,35 @@ +.. SPDX-License-Identifier: GPL-3.0-or-later + +Scripting worker +^^^^^^^^^^^^^^^^ + +Worker is a service over event loop that tracks and schedules outstanding queries, +you can see the statistics or schedule new queries. It also contains information about +specified worker count and process rank. + +.. envvar:: worker.id + + Value from environment variable ``SYSTEMD_INSTANCE``, + or if it is not set, :envvar:`PID <worker.pid>` (string). + +.. envvar:: worker.pid + + Current worker process PID (number). + +.. function:: worker.stats() + + Return table of statistics. See member descriptions in :c:type:`worker_stats`. + A few fields are added, mainly from POSIX ``getrusage()``: + + * ``usertime`` and ``systime`` -- CPU time used, in seconds + * ``pagefaults`` -- the number of hard page faults, i.e. those that required I/O activity + * ``swaps`` -- the number of times the process was “swapped” out of main memory; unused on Linux + * ``csw`` -- the number of context switches, both voluntary and involuntary + * ``rss`` -- current memory usage in bytes, including whole cache (resident set size) + + Example: + + .. code-block:: lua + + print(worker.stats().concurrent) + |