1
0
Fork 0
knot-resolver/daemon/main.c
Daniel Baumann fbc604e215
Adding upstream version 5.7.5.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
2025-06-21 13:56:17 +02:00

613 lines
18 KiB
C

/* Copyright (C) CZ.NIC, z.s.p.o. <knot-resolver@labs.nic.cz>
* SPDX-License-Identifier: GPL-3.0-or-later
*/
#include "kresconfig.h"
#include "contrib/ucw/mempool.h"
#include "daemon/engine.h"
#include "daemon/io.h"
#include "daemon/network.h"
#include "daemon/udp_queue.h"
#include "daemon/worker.h"
#include "lib/defines.h"
#include "lib/dnssec.h"
#include "lib/log.h"
#include <arpa/inet.h>
#include <getopt.h>
#include <libgen.h>
#include <signal.h>
#include <stdlib.h>
#include <string.h>
#include <sys/resource.h>
#include <sys/wait.h>
#include <unistd.h>
#if ENABLE_CAP_NG
#include <cap-ng.h>
#endif
#include <lua.h>
#include <uv.h>
#if ENABLE_LIBSYSTEMD
#include <systemd/sd-daemon.h>
#endif
#include <libknot/error.h>
#if ENABLE_JEMALLOC
/* Make the jemalloc library needed.
*
* The problem is with --as-needed for linker which is added by default by meson.
* If we don't use any jemalloc-specific calls, linker will decide that
* it is not needed and won't link it. Making it needed seems better than
* trying to override the flag which might be useful in some other cases, etc.
*
* Exporting the function is a very easy way of ensuring that it's not optimized out.
*/
#include <jemalloc/jemalloc.h>
KR_EXPORT void kr_jemalloc_unused(void)
{
malloc_stats_print(NULL, NULL, NULL);
}
/* We don't use threads (or rarely in some parts), so multiple arenas don't make sense.
https://jemalloc.net/jemalloc.3.html
*/
KR_EXPORT const char *malloc_conf = "narenas:1";
#endif
struct args the_args_value; /** Static allocation for the_args singleton. */
static void signal_handler(uv_signal_t *handle, int signum)
{
switch (signum) {
case SIGINT: /* Fallthrough. */
case SIGTERM:
uv_stop(uv_default_loop());
uv_signal_stop(handle);
break;
case SIGCHLD:
/* Wait for all dead processes. */
while (waitpid(-1, NULL, WNOHANG) > 0);
break;
default:
kr_log_error(SYSTEM, "unhandled signal: %d\n", signum);
break;
}
}
/** SIGBUS -> attempt to remove the overflowing cache file and abort. */
static void sigbus_handler(int sig, siginfo_t *siginfo, void *ptr)
{
/* We can't safely assume that printf-like functions work, but write() is OK.
* See POSIX for the safe functions, e.g. 2017 version just above this link:
* http://pubs.opengroup.org/onlinepubs/9699919799/functions/V2_chap02.html#tag_15_04_04
*/
#define WRITE_ERR(err_charray) \
(void)write(STDERR_FILENO, err_charray, sizeof(err_charray))
/* Unfortunately, void-cast on the write isn't enough to avoid the warning. */
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-result"
const char msg_typical[] =
"\nSIGBUS received; this is most likely due to filling up the filesystem where cache resides.\n",
msg_unknown[] = "\nSIGBUS received, cause unknown.\n",
msg_deleted[] = "Cache file deleted.\n",
msg_del_fail[] = "Cache file deletion failed.\n",
msg_final[] = "kresd can not recover reliably by itself, exiting.\n";
if (siginfo->si_code != BUS_ADRERR) {
WRITE_ERR(msg_unknown);
goto end;
}
WRITE_ERR(msg_typical);
if (!kr_cache_emergency_file_to_remove) goto end;
if (unlink(kr_cache_emergency_file_to_remove)) {
WRITE_ERR(msg_del_fail);
} else {
WRITE_ERR(msg_deleted);
}
end:
WRITE_ERR(msg_final);
_exit(128 - sig); /*< regular return from OS-raised SIGBUS can't work anyway */
#undef WRITE_ERR
#pragma GCC diagnostic pop
}
/*
* Server operation.
*/
static int fork_workers(int forks)
{
/* Fork subprocesses if requested */
while (--forks > 0) {
int pid = fork();
if (pid < 0) {
perror("[system] fork");
return kr_error(errno);
}
/* Forked process */
if (pid == 0) {
return forks;
}
}
return 0;
}
static void help(int argc, char *argv[])
{
printf("Usage: %s [parameters] [rundir]\n", argv[0]);
printf("\nParameters:\n"
" -a, --addr=[addr] Server address (default: localhost@53).\n"
" -t, --tls=[addr] Server address for TLS (default: off).\n"
" -S, --fd=[fd:kind] Listen on given fd (handed out by supervisor, :kind is optional).\n"
" -c, --config=[path] Config file path (relative to [rundir]) (default: config).\n"
" -n, --noninteractive Don't start the read-eval-print loop for stdin+stdout.\n"
" -q, --quiet No command prompt in interactive mode.\n"
" -v, --verbose Increase logging to debug level.\n"
" -V, --version Print version of the server.\n"
" -h, --help Print help and usage.\n"
"Options:\n"
" [rundir] Path to the working directory (default: .)\n");
}
/** \return exit code for main() */
static int run_worker(uv_loop_t *loop, struct engine *engine, bool leader, struct args *args)
{
/* Only some kinds of stdin work with uv_pipe_t.
* Otherwise we would abort() from libuv e.g. with </dev/null */
if (args->interactive) switch (uv_guess_handle(0)) {
case UV_TTY: /* standard terminal */
/* TODO: it has worked OK so far, but we'd better use uv_tty_*
* for this case instead of uv_pipe_*. */
case UV_NAMED_PIPE: /* echo 'quit()' | kresd ... */
break;
default:
kr_log_error(SYSTEM,
"error: standard input is not a terminal or pipe; "
"use '-n' if you want non-interactive mode. "
"Commands can be simply added to your configuration file or sent over the control socket.\n"
);
return EXIT_FAILURE;
}
/* Control sockets or TTY */
uv_pipe_t *pipe = malloc(sizeof(*pipe));
if (!pipe)
return EXIT_FAILURE;
uv_pipe_init(loop, pipe, 0);
if (args->interactive) {
if (!args->quiet)
printf("Interactive mode:\n" "> ");
pipe->data = io_tty_alloc_data();
uv_pipe_open(pipe, 0);
uv_read_start((uv_stream_t*)pipe, io_tty_alloc, io_tty_process_input);
} else if (args->control_fd != -1 && uv_pipe_open(pipe, args->control_fd) == 0) {
uv_listen((uv_stream_t *)pipe, 16, io_tty_accept);
}
/* Notify supervisor. */
#if ENABLE_LIBSYSTEMD
sd_notify(0, "READY=1");
#endif
/* Run event loop */
uv_run(loop, UV_RUN_DEFAULT);
/* Free pipe's data. Seems OK even on the stopped loop.
* In interactive case it may have been done in callbacks already (single leak). */
if (!args->interactive) {
uv_close((uv_handle_t *)pipe, NULL);
free(pipe);
}
return EXIT_SUCCESS;
}
static void args_init(struct args *args)
{
memset(args, 0, sizeof(struct args));
/* Zeroed arrays are OK. */
args->forks = 1;
args->control_fd = -1;
args->interactive = true;
args->quiet = false;
}
/* Free pointed-to resources. */
static void args_deinit(struct args *args)
{
array_clear(args->addrs);
array_clear(args->addrs_tls);
for (int i = 0; i < args->fds.len; ++i)
free_const(args->fds.at[i].flags.kind);
array_clear(args->fds);
array_clear(args->config);
}
/** Process arguments into struct args.
* @return >=0 if main() should be exited immediately.
*/
static int parse_args(int argc, char **argv, struct args *args)
{
/* Long options. */
int c = 0, li = 0;
struct option opts[] = {
{"addr", required_argument, 0, 'a'},
{"tls", required_argument, 0, 't'},
{"config", required_argument, 0, 'c'},
{"forks", required_argument, 0, 'f'},
{"noninteractive", no_argument, 0, 'n'},
{"verbose", no_argument, 0, 'v'},
{"quiet", no_argument, 0, 'q'},
{"version", no_argument, 0, 'V'},
{"help", no_argument, 0, 'h'},
{"fd", required_argument, 0, 'S'},
{0, 0, 0, 0}
};
while ((c = getopt_long(argc, argv, "a:t:c:f:nvqVhS:", opts, &li)) != -1) {
switch (c)
{
case 'a':
kr_require(optarg);
array_push(args->addrs, optarg);
break;
case 't':
kr_require(optarg);
array_push(args->addrs_tls, optarg);
break;
case 'c':
kr_require(optarg);
array_push(args->config, optarg);
break;
case 'f':
kr_require(optarg);
args->forks = strtol(optarg, NULL, 10);
if (args->forks == 1) {
kr_log_deprecate(SYSTEM, "use --noninteractive instead of --forks=1\n");
} else {
kr_log_deprecate(SYSTEM, "support for running multiple --forks will be removed\n");
}
if (args->forks <= 0) {
kr_log_error(SYSTEM, "error '-f' requires a positive"
" number, not '%s'\n", optarg);
return EXIT_FAILURE;
}
/* fall through */
case 'n':
args->interactive = false;
break;
case 'v':
kr_log_level_set(LOG_DEBUG);
break;
case 'q':
args->quiet = true;
break;
case 'V':
printf("Knot Resolver, version %s\n", PACKAGE_VERSION);
return EXIT_SUCCESS;
case 'h':
case '?':
help(argc, argv);
return EXIT_SUCCESS;
default:
help(argc, argv);
return EXIT_FAILURE;
case 'S':
kr_require(optarg);
flagged_fd_t ffd = { 0 };
char *endptr;
ffd.fd = strtol(optarg, &endptr, 10);
if (endptr != optarg && endptr[0] == '\0') {
/* Plain DNS */
ffd.flags.tls = false;
} else if (endptr[0] == ':' && strcasecmp(endptr + 1, "tls") == 0) {
/* DoT */
ffd.flags.tls = true;
/* We know what .sock_type should be but it wouldn't help. */
} else if (endptr[0] == ':' && endptr[1] != '\0') {
/* Some other kind; no checks here. */
ffd.flags.kind = strdup(endptr + 1);
} else {
kr_log_error(SYSTEM, "incorrect value passed to '-S/--fd': %s\n",
optarg);
return EXIT_FAILURE;
}
array_push(args->fds, ffd);
break;
}
}
if (optind < argc) {
args->rundir = argv[optind];
}
return -1;
}
/** Just convert addresses to file-descriptors; clear *addrs on success.
* @note AF_UNIX is supported (starting with '/').
* @return zero or exit code for main()
*/
static int bind_sockets(addr_array_t *addrs, bool tls, flagged_fd_array_t *fds)
{
bool has_error = false;
for (size_t i = 0; i < addrs->len; ++i) {
/* Get port and separate address string. */
uint16_t port = tls ? KR_DNS_TLS_PORT : KR_DNS_PORT;
char addr_buf[INET6_ADDRSTRLEN + 1];
int ret;
const char *addr_str;
const int family = kr_straddr_family(addrs->at[i]);
if (family == AF_UNIX) {
ret = 0;
addr_str = addrs->at[i];
} else { /* internet socket (or garbage) */
ret = kr_straddr_split(addrs->at[i], addr_buf, &port);
addr_str = addr_buf;
}
/* Get sockaddr. */
struct sockaddr *sa = NULL;
if (ret == 0) {
sa = kr_straddr_socket(addr_str, port, NULL);
if (!sa) ret = kr_error(EINVAL); /* could be ENOMEM but unlikely */
}
flagged_fd_t ffd = { .flags = { .tls = tls } };
if (ret == 0 && !tls && family != AF_UNIX) {
/* AF_UNIX can do SOCK_DGRAM, but let's not support that *here*. */
ffd.fd = io_bind(sa, SOCK_DGRAM, NULL);
if (ffd.fd < 0)
ret = ffd.fd;
else if (array_push(*fds, ffd) < 0)
ret = kr_error(ENOMEM);
}
if (ret == 0) { /* common for TCP and TLS, including AF_UNIX cases */
ffd.fd = io_bind(sa, SOCK_STREAM, NULL);
if (ffd.fd < 0)
ret = ffd.fd;
else if (array_push(*fds, ffd) < 0)
ret = kr_error(ENOMEM);
}
free(sa);
if (ret != 0) {
kr_log_error(NETWORK, "bind to '%s'%s: %s\n",
addrs->at[i], tls ? " (TLS)" : "", kr_strerror(ret));
has_error = true;
}
}
array_clear(*addrs);
return has_error ? EXIT_FAILURE : kr_ok();
}
static int start_listening(struct network *net, flagged_fd_array_t *fds) {
int some_bad_ret = 0;
for (size_t i = 0; i < fds->len; ++i) {
flagged_fd_t *ffd = &fds->at[i];
int ret = network_listen_fd(net, ffd->fd, ffd->flags);
if (ret != 0) {
some_bad_ret = ret;
/* TODO: try logging address@port. It's not too important,
* because typical problems happen during binding already.
* (invalid address, permission denied) */
kr_log_error(NETWORK, "listen on fd=%d: %s\n",
ffd->fd, kr_strerror(ret));
/* Continue printing all of these before exiting. */
} else {
ffd->flags.kind = NULL; /* ownership transferred */
}
}
return some_bad_ret;
}
/* Drop POSIX 1003.1e capabilities. */
static void drop_capabilities(void)
{
#if ENABLE_CAP_NG
/* Drop all capabilities when running under non-root user. */
if (geteuid() == 0) {
kr_log_debug(SYSTEM, "running as root, no capabilities dropped\n");
return;
}
if (capng_have_capability(CAPNG_EFFECTIVE, CAP_SETPCAP)) {
capng_clear(CAPNG_SELECT_BOTH);
/* Apply. */
if (capng_apply(CAPNG_SELECT_BOTH) < 0) {
kr_log_error(SYSTEM, "failed to set process capabilities: %s\n",
strerror(errno));
} else {
kr_log_debug(SYSTEM, "all capabilities dropped\n");
}
} else {
/* If user() was called, the capabilities were already dropped along with SETPCAP. */
kr_log_debug(SYSTEM, "process not allowed to set capabilities, skipping\n");
}
#endif /* ENABLE_CAP_NG */
}
int main(int argc, char **argv)
{
kr_log_group_reset();
if (setvbuf(stdout, NULL, _IONBF, 0) || setvbuf(stderr, NULL, _IONBF, 0)) {
kr_log_error(SYSTEM, "failed to set output buffering (ignored): %s\n",
strerror(errno));
(void)fflush(stderr);
}
if (strcmp("linux", OPERATING_SYSTEM) != 0)
kr_log_warning(SYSTEM, "Knot Resolver is tested on Linux, other platforms might exhibit bugs.\n"
"Please report issues to https://gitlab.nic.cz/knot/knot-resolver/issues/\n"
"Thank you for your time and interest!\n");
the_args = &the_args_value;
args_init(the_args);
int ret = parse_args(argc, argv, the_args);
if (ret >= 0) goto cleanup_args;
ret = bind_sockets(&the_args->addrs, false, &the_args->fds);
if (ret) goto cleanup_args;
ret = bind_sockets(&the_args->addrs_tls, true, &the_args->fds);
if (ret) goto cleanup_args;
/* Switch to rundir. */
if (the_args->rundir != NULL) {
/* FIXME: access isn't a good way if we start as root and drop privileges later */
if (access(the_args->rundir, W_OK) != 0
|| chdir(the_args->rundir) != 0) {
kr_log_error(SYSTEM, "rundir '%s': %s\n",
the_args->rundir, strerror(errno));
return EXIT_FAILURE;
}
}
/* Select which config files to load and verify they are read-able. */
bool load_defaults = true;
size_t i = 0;
while (i < the_args->config.len) {
const char *config = the_args->config.at[i];
if (strcmp(config, "-") == 0) {
load_defaults = false;
array_del(the_args->config, i);
continue; /* don't increment i */
} else if (access(config, R_OK) != 0) {
char cwd[PATH_MAX];
get_workdir(cwd, sizeof(cwd));
kr_log_error(SYSTEM, "config '%s' (workdir '%s'): %s\n",
config, cwd, strerror(errno));
return EXIT_FAILURE;
}
i++;
}
if (the_args->config.len == 0 && access("config", R_OK) == 0)
array_push(the_args->config, "config");
if (load_defaults)
array_push(the_args->config, LIBDIR "/postconfig.lua");
/* File-descriptor count limit: soft->hard. */
struct rlimit rlim;
ret = getrlimit(RLIMIT_NOFILE, &rlim);
if (ret == 0 && rlim.rlim_cur != rlim.rlim_max) {
kr_log_debug(SYSTEM, "increasing file-descriptor limit: %ld -> %ld\n",
(long)rlim.rlim_cur, (long)rlim.rlim_max);
rlim.rlim_cur = rlim.rlim_max;
ret = setrlimit(RLIMIT_NOFILE, &rlim);
}
if (ret) {
kr_log_error(SYSTEM, "failed to get or set file-descriptor limit: %s\n",
strerror(errno));
} else if (rlim.rlim_cur < (rlim_t)512 * 1024) {
kr_log_warning(SYSTEM, "warning: hard limit for number of file-descriptors is only %ld but recommended value is 524288\n",
(long)rlim.rlim_cur);
}
/* Fork subprocesses if requested */
int fork_id = fork_workers(the_args->forks);
if (fork_id < 0) {
return EXIT_FAILURE;
}
kr_crypto_init();
/* Create a server engine. */
knot_mm_t pool;
mm_ctx_mempool(&pool, MM_DEFAULT_BLKSIZE);
static struct engine engine;
ret = engine_init(&engine, &pool);
if (ret != 0) {
kr_log_error(SYSTEM, "failed to initialize engine: %s\n", kr_strerror(ret));
return EXIT_FAILURE;
}
/* Initialize the worker */
ret = worker_init(&engine, the_args->forks);
if (ret != 0) {
kr_log_error(SYSTEM, "failed to initialize worker: %s\n", kr_strerror(ret));
return EXIT_FAILURE;
}
uv_loop_t *loop = uv_default_loop();
/* Catch some signals. */
uv_signal_t sigint, sigterm, sigchld;
if (true) ret = uv_signal_init(loop, &sigint);
if (!ret) ret = uv_signal_init(loop, &sigterm);
if (!ret) ret = uv_signal_init(loop, &sigchld);
if (!ret) ret = uv_signal_start(&sigint, signal_handler, SIGINT);
if (!ret) ret = uv_signal_start(&sigterm, signal_handler, SIGTERM);
if (!ret) ret = uv_signal_start(&sigchld, signal_handler, SIGCHLD);
/* Block SIGPIPE; see https://github.com/libuv/libuv/issues/45 */
if (!ret && signal(SIGPIPE, SIG_IGN) == SIG_ERR) ret = errno;
if (!ret) {
/* Catching SIGBUS via uv_signal_* can't work; see:
* https://github.com/libuv/libuv/pull/1987 */
struct sigaction sa;
memset(&sa, 0, sizeof(sa));
sa.sa_sigaction = sigbus_handler;
sa.sa_flags = SA_SIGINFO;
if (sigaction(SIGBUS, &sa, NULL)) {
ret = errno;
}
}
if (ret) {
kr_log_error(SYSTEM, "failed to set up signal handlers: %s\n",
strerror(abs(errno)));
ret = EXIT_FAILURE;
goto cleanup;
}
/* Profiling: avoid SIGPROF waking up the event loop. Otherwise the profiles
* (of the usual type) may skew results, e.g. epoll_pwait() taking lots of time. */
ret = uv_loop_configure(loop, UV_LOOP_BLOCK_SIGNAL, SIGPROF);
if (ret) {
kr_log_info(SYSTEM, "failed to block SIGPROF in event loop, ignoring: %s\n",
uv_strerror(ret));
}
/* Start listening, in the sense of network_listen_fd(). */
if (start_listening(&engine.net, &the_args->fds) != 0) {
ret = EXIT_FAILURE;
goto cleanup;
}
ret = udp_queue_init_global(loop);
if (ret) {
kr_log_error(SYSTEM, "failed to initialize UDP queue: %s\n",
kr_strerror(ret));
ret = EXIT_FAILURE;
goto cleanup;
}
/* Start the scripting engine */
if (engine_load_sandbox(&engine) != 0) {
ret = EXIT_FAILURE;
goto cleanup;
}
for (i = 0; i < the_args->config.len; ++i) {
const char *config = the_args->config.at[i];
if (engine_loadconf(&engine, config) != 0) {
ret = EXIT_FAILURE;
goto cleanup;
}
lua_settop(engine.L, 0);
}
drop_capabilities();
if (engine_start(&engine) != 0) {
ret = EXIT_FAILURE;
goto cleanup;
}
if (network_engage_endpoints(&engine.net)) {
ret = EXIT_FAILURE;
goto cleanup;
}
/* Run the event loop */
ret = run_worker(loop, &engine, fork_id == 0, the_args);
cleanup:/* Cleanup. */
engine_deinit(&engine);
worker_deinit();
if (loop != NULL) {
uv_loop_close(loop);
}
mp_delete(pool.ctx);
cleanup_args:
args_deinit(the_args);
kr_crypto_cleanup();
return ret;
}