diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 15:26:00 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 15:26:00 +0000 |
commit | 830407e88f9d40d954356c3754f2647f91d5c06a (patch) | |
tree | d6a0ece6feea91f3c656166dbaa884ef8a29740e /daemon/main.c | |
parent | Initial commit. (diff) | |
download | knot-resolver-830407e88f9d40d954356c3754f2647f91d5c06a.tar.xz knot-resolver-830407e88f9d40d954356c3754f2647f91d5c06a.zip |
Adding upstream version 5.6.0.upstream/5.6.0upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r-- | daemon/main.c | 613 |
1 files changed, 613 insertions, 0 deletions
diff --git a/daemon/main.c b/daemon/main.c new file mode 100644 index 0000000..41a55ad --- /dev/null +++ b/daemon/main.c @@ -0,0 +1,613 @@ +/* Copyright (C) CZ.NIC, z.s.p.o. <knot-resolver@labs.nic.cz> + * SPDX-License-Identifier: GPL-3.0-or-later + */ + +#include "kresconfig.h" + +#include "contrib/ucw/mempool.h" +#include "daemon/engine.h" +#include "daemon/io.h" +#include "daemon/network.h" +#include "daemon/udp_queue.h" +#include "daemon/worker.h" +#include "lib/defines.h" +#include "lib/dnssec.h" +#include "lib/log.h" + +#include <arpa/inet.h> +#include <getopt.h> +#include <libgen.h> +#include <signal.h> +#include <stdlib.h> +#include <string.h> +#include <sys/resource.h> +#include <sys/wait.h> +#include <unistd.h> + +#if ENABLE_CAP_NG +#include <cap-ng.h> +#endif + +#include <lua.h> +#include <uv.h> +#if ENABLE_LIBSYSTEMD +#include <systemd/sd-daemon.h> +#endif +#include <libknot/error.h> + +#if ENABLE_JEMALLOC +/* Make the jemalloc library needed. + * + * The problem is with --as-needed for linker which is added by default by meson. + * If we don't use any jemalloc-specific calls, linker will decide that + * it is not needed and won't link it. Making it needed seems better than + * trying to override the flag which might be useful in some other cases, etc. + * + * Exporting the function is a very easy way of ensuring that it's not optimized out. + */ +#include <jemalloc/jemalloc.h> +KR_EXPORT void kr_jemalloc_unused(void) +{ + malloc_stats_print(NULL, NULL, NULL); +} +/* We don't use threads (or rarely in some parts), so multiple arenas don't make sense. + https://jemalloc.net/jemalloc.3.html + */ +KR_EXPORT const char *malloc_conf = "narenas:1"; +#endif + +struct args the_args_value; /** Static allocation for the_args singleton. */ + +static void signal_handler(uv_signal_t *handle, int signum) +{ + switch (signum) { + case SIGINT: /* Fallthrough. */ + case SIGTERM: + uv_stop(uv_default_loop()); + uv_signal_stop(handle); + break; + case SIGCHLD: + /* Wait for all dead processes. */ + while (waitpid(-1, NULL, WNOHANG) > 0); + break; + default: + kr_log_error(SYSTEM, "unhandled signal: %d\n", signum); + break; + } +} + +/** SIGBUS -> attempt to remove the overflowing cache file and abort. */ +static void sigbus_handler(int sig, siginfo_t *siginfo, void *ptr) +{ + /* We can't safely assume that printf-like functions work, but write() is OK. + * See POSIX for the safe functions, e.g. 2017 version just above this link: + * http://pubs.opengroup.org/onlinepubs/9699919799/functions/V2_chap02.html#tag_15_04_04 + */ + #define WRITE_ERR(err_charray) \ + (void)write(STDERR_FILENO, err_charray, sizeof(err_charray)) + /* Unfortunately, void-cast on the write isn't enough to avoid the warning. */ + #pragma GCC diagnostic push + #pragma GCC diagnostic ignored "-Wunused-result" + const char msg_typical[] = + "\nSIGBUS received; this is most likely due to filling up the filesystem where cache resides.\n", + msg_unknown[] = "\nSIGBUS received, cause unknown.\n", + msg_deleted[] = "Cache file deleted.\n", + msg_del_fail[] = "Cache file deletion failed.\n", + msg_final[] = "kresd can not recover reliably by itself, exiting.\n"; + if (siginfo->si_code != BUS_ADRERR) { + WRITE_ERR(msg_unknown); + goto end; + } + WRITE_ERR(msg_typical); + if (!kr_cache_emergency_file_to_remove) goto end; + if (unlink(kr_cache_emergency_file_to_remove)) { + WRITE_ERR(msg_del_fail); + } else { + WRITE_ERR(msg_deleted); + } +end: + WRITE_ERR(msg_final); + _exit(128 - sig); /*< regular return from OS-raised SIGBUS can't work anyway */ + #undef WRITE_ERR + #pragma GCC diagnostic pop +} + + +/* + * Server operation. + */ + +static int fork_workers(int forks) +{ + /* Fork subprocesses if requested */ + while (--forks > 0) { + int pid = fork(); + if (pid < 0) { + perror("[system] fork"); + return kr_error(errno); + } + + /* Forked process */ + if (pid == 0) { + return forks; + } + } + return 0; +} + +static void help(int argc, char *argv[]) +{ + printf("Usage: %s [parameters] [rundir]\n", argv[0]); + printf("\nParameters:\n" + " -a, --addr=[addr] Server address (default: localhost@53).\n" + " -t, --tls=[addr] Server address for TLS (default: off).\n" + " -S, --fd=[fd:kind] Listen on given fd (handed out by supervisor, :kind is optional).\n" + " -c, --config=[path] Config file path (relative to [rundir]) (default: config).\n" + " -n, --noninteractive Don't start the read-eval-print loop for stdin+stdout.\n" + " -q, --quiet No command prompt in interactive mode.\n" + " -v, --verbose Increase logging to debug level.\n" + " -V, --version Print version of the server.\n" + " -h, --help Print help and usage.\n" + "Options:\n" + " [rundir] Path to the working directory (default: .)\n"); +} + +/** \return exit code for main() */ +static int run_worker(uv_loop_t *loop, struct engine *engine, bool leader, struct args *args) +{ + /* Only some kinds of stdin work with uv_pipe_t. + * Otherwise we would abort() from libuv e.g. with </dev/null */ + if (args->interactive) switch (uv_guess_handle(0)) { + case UV_TTY: /* standard terminal */ + /* TODO: it has worked OK so far, but we'd better use uv_tty_* + * for this case instead of uv_pipe_*. */ + case UV_NAMED_PIPE: /* echo 'quit()' | kresd ... */ + break; + default: + kr_log_error(SYSTEM, + "error: standard input is not a terminal or pipe; " + "use '-n' if you want non-interactive mode. " + "Commands can be simply added to your configuration file or sent over the control socket.\n" + ); + return EXIT_FAILURE; + } + + /* Control sockets or TTY */ + uv_pipe_t *pipe = malloc(sizeof(*pipe)); + if (!pipe) + return EXIT_FAILURE; + uv_pipe_init(loop, pipe, 0); + if (args->interactive) { + if (!args->quiet) + printf("Interactive mode:\n" "> "); + pipe->data = io_tty_alloc_data(); + uv_pipe_open(pipe, 0); + uv_read_start((uv_stream_t*)pipe, io_tty_alloc, io_tty_process_input); + } else if (args->control_fd != -1 && uv_pipe_open(pipe, args->control_fd) == 0) { + uv_listen((uv_stream_t *)pipe, 16, io_tty_accept); + } + + /* Notify supervisor. */ +#if ENABLE_LIBSYSTEMD + sd_notify(0, "READY=1"); +#endif + /* Run event loop */ + uv_run(loop, UV_RUN_DEFAULT); + /* Free pipe's data. Seems OK even on the stopped loop. + * In interactive case it may have been done in callbacks already (single leak). */ + if (!args->interactive) { + uv_close((uv_handle_t *)pipe, NULL); + free(pipe); + } + return EXIT_SUCCESS; +} + +static void args_init(struct args *args) +{ + memset(args, 0, sizeof(struct args)); + /* Zeroed arrays are OK. */ + args->forks = 1; + args->control_fd = -1; + args->interactive = true; + args->quiet = false; +} + +/* Free pointed-to resources. */ +static void args_deinit(struct args *args) +{ + array_clear(args->addrs); + array_clear(args->addrs_tls); + for (int i = 0; i < args->fds.len; ++i) + free_const(args->fds.at[i].flags.kind); + array_clear(args->fds); + array_clear(args->config); +} + +/** Process arguments into struct args. + * @return >=0 if main() should be exited immediately. + */ +static int parse_args(int argc, char **argv, struct args *args) +{ + /* Long options. */ + int c = 0, li = 0; + struct option opts[] = { + {"addr", required_argument, 0, 'a'}, + {"tls", required_argument, 0, 't'}, + {"config", required_argument, 0, 'c'}, + {"forks", required_argument, 0, 'f'}, + {"noninteractive", no_argument, 0, 'n'}, + {"verbose", no_argument, 0, 'v'}, + {"quiet", no_argument, 0, 'q'}, + {"version", no_argument, 0, 'V'}, + {"help", no_argument, 0, 'h'}, + {"fd", required_argument, 0, 'S'}, + {0, 0, 0, 0} + }; + while ((c = getopt_long(argc, argv, "a:t:c:f:nvqVhS:", opts, &li)) != -1) { + switch (c) + { + case 'a': + kr_require(optarg); + array_push(args->addrs, optarg); + break; + case 't': + kr_require(optarg); + array_push(args->addrs_tls, optarg); + break; + case 'c': + kr_require(optarg); + array_push(args->config, optarg); + break; + case 'f': + kr_require(optarg); + args->forks = strtol(optarg, NULL, 10); + if (args->forks == 1) { + kr_log_deprecate(SYSTEM, "use --noninteractive instead of --forks=1\n"); + } else { + kr_log_deprecate(SYSTEM, "support for running multiple --forks will be removed\n"); + } + if (args->forks <= 0) { + kr_log_error(SYSTEM, "error '-f' requires a positive" + " number, not '%s'\n", optarg); + return EXIT_FAILURE; + } + /* fall through */ + case 'n': + args->interactive = false; + break; + case 'v': + kr_log_level_set(LOG_DEBUG); + break; + case 'q': + args->quiet = true; + break; + case 'V': + printf("Knot Resolver, version %s\n", PACKAGE_VERSION); + return EXIT_SUCCESS; + case 'h': + case '?': + help(argc, argv); + return EXIT_SUCCESS; + default: + help(argc, argv); + return EXIT_FAILURE; + case 'S': + kr_require(optarg); + flagged_fd_t ffd = { 0 }; + char *endptr; + ffd.fd = strtol(optarg, &endptr, 10); + if (endptr != optarg && endptr[0] == '\0') { + /* Plain DNS */ + ffd.flags.tls = false; + } else if (endptr[0] == ':' && strcasecmp(endptr + 1, "tls") == 0) { + /* DoT */ + ffd.flags.tls = true; + /* We know what .sock_type should be but it wouldn't help. */ + } else if (endptr[0] == ':' && endptr[1] != '\0') { + /* Some other kind; no checks here. */ + ffd.flags.kind = strdup(endptr + 1); + } else { + kr_log_error(SYSTEM, "incorrect value passed to '-S/--fd': %s\n", + optarg); + return EXIT_FAILURE; + } + array_push(args->fds, ffd); + break; + } + } + if (optind < argc) { + args->rundir = argv[optind]; + } + return -1; +} + +/** Just convert addresses to file-descriptors; clear *addrs on success. + * @note AF_UNIX is supported (starting with '/'). + * @return zero or exit code for main() + */ +static int bind_sockets(addr_array_t *addrs, bool tls, flagged_fd_array_t *fds) +{ + bool has_error = false; + for (size_t i = 0; i < addrs->len; ++i) { + /* Get port and separate address string. */ + uint16_t port = tls ? KR_DNS_TLS_PORT : KR_DNS_PORT; + char addr_buf[INET6_ADDRSTRLEN + 1]; + int ret; + const char *addr_str; + const int family = kr_straddr_family(addrs->at[i]); + if (family == AF_UNIX) { + ret = 0; + addr_str = addrs->at[i]; + } else { /* internet socket (or garbage) */ + ret = kr_straddr_split(addrs->at[i], addr_buf, &port); + addr_str = addr_buf; + } + /* Get sockaddr. */ + struct sockaddr *sa = NULL; + if (ret == 0) { + sa = kr_straddr_socket(addr_str, port, NULL); + if (!sa) ret = kr_error(EINVAL); /* could be ENOMEM but unlikely */ + } + flagged_fd_t ffd = { .flags = { .tls = tls } }; + if (ret == 0 && !tls && family != AF_UNIX) { + /* AF_UNIX can do SOCK_DGRAM, but let's not support that *here*. */ + ffd.fd = io_bind(sa, SOCK_DGRAM, NULL); + if (ffd.fd < 0) + ret = ffd.fd; + else if (array_push(*fds, ffd) < 0) + ret = kr_error(ENOMEM); + } + if (ret == 0) { /* common for TCP and TLS, including AF_UNIX cases */ + ffd.fd = io_bind(sa, SOCK_STREAM, NULL); + if (ffd.fd < 0) + ret = ffd.fd; + else if (array_push(*fds, ffd) < 0) + ret = kr_error(ENOMEM); + } + free(sa); + if (ret != 0) { + kr_log_error(NETWORK, "bind to '%s'%s: %s\n", + addrs->at[i], tls ? " (TLS)" : "", kr_strerror(ret)); + has_error = true; + } + } + array_clear(*addrs); + return has_error ? EXIT_FAILURE : kr_ok(); +} + +static int start_listening(struct network *net, flagged_fd_array_t *fds) { + int some_bad_ret = 0; + for (size_t i = 0; i < fds->len; ++i) { + flagged_fd_t *ffd = &fds->at[i]; + int ret = network_listen_fd(net, ffd->fd, ffd->flags); + if (ret != 0) { + some_bad_ret = ret; + /* TODO: try logging address@port. It's not too important, + * because typical problems happen during binding already. + * (invalid address, permission denied) */ + kr_log_error(NETWORK, "listen on fd=%d: %s\n", + ffd->fd, kr_strerror(ret)); + /* Continue printing all of these before exiting. */ + } else { + ffd->flags.kind = NULL; /* ownership transferred */ + } + } + return some_bad_ret; +} + +/* Drop POSIX 1003.1e capabilities. */ +static void drop_capabilities(void) +{ +#if ENABLE_CAP_NG + /* Drop all capabilities when running under non-root user. */ + if (geteuid() == 0) { + kr_log_debug(SYSTEM, "running as root, no capabilities dropped\n"); + return; + } + if (capng_have_capability(CAPNG_EFFECTIVE, CAP_SETPCAP)) { + capng_clear(CAPNG_SELECT_BOTH); + + /* Apply. */ + if (capng_apply(CAPNG_SELECT_BOTH) < 0) { + kr_log_error(SYSTEM, "failed to set process capabilities: %s\n", + strerror(errno)); + } else { + kr_log_debug(SYSTEM, "all capabilities dropped\n"); + } + } else { + /* If user() was called, the capabilities were already dropped along with SETPCAP. */ + kr_log_debug(SYSTEM, "process not allowed to set capabilities, skipping\n"); + } +#endif /* ENABLE_CAP_NG */ +} + +int main(int argc, char **argv) +{ + kr_log_group_reset(); + if (setvbuf(stdout, NULL, _IONBF, 0) || setvbuf(stderr, NULL, _IONBF, 0)) { + kr_log_error(SYSTEM, "failed to to set output buffering (ignored): %s\n", + strerror(errno)); + fflush(stderr); + } + if (strcmp("linux", OPERATING_SYSTEM) != 0) + kr_log_warning(SYSTEM, "Knot Resolver is tested on Linux, other platforms might exhibit bugs.\n" + "Please report issues to https://gitlab.nic.cz/knot/knot-resolver/issues/\n" + "Thank you for your time and interest!\n"); + + the_args = &the_args_value; + args_init(the_args); + int ret = parse_args(argc, argv, the_args); + if (ret >= 0) goto cleanup_args; + + ret = bind_sockets(&the_args->addrs, false, &the_args->fds); + if (ret) goto cleanup_args; + ret = bind_sockets(&the_args->addrs_tls, true, &the_args->fds); + if (ret) goto cleanup_args; + + /* Switch to rundir. */ + if (the_args->rundir != NULL) { + /* FIXME: access isn't a good way if we start as root and drop privileges later */ + if (access(the_args->rundir, W_OK) != 0 + || chdir(the_args->rundir) != 0) { + kr_log_error(SYSTEM, "rundir '%s': %s\n", + the_args->rundir, strerror(errno)); + return EXIT_FAILURE; + } + } + + /* Select which config files to load and verify they are read-able. */ + bool load_defaults = true; + size_t i = 0; + while (i < the_args->config.len) { + const char *config = the_args->config.at[i]; + if (strcmp(config, "-") == 0) { + load_defaults = false; + array_del(the_args->config, i); + continue; /* don't increment i */ + } else if (access(config, R_OK) != 0) { + char cwd[PATH_MAX]; + get_workdir(cwd, sizeof(cwd)); + kr_log_error(SYSTEM, "config '%s' (workdir '%s'): %s\n", + config, cwd, strerror(errno)); + return EXIT_FAILURE; + } + i++; + } + if (the_args->config.len == 0 && access("config", R_OK) == 0) + array_push(the_args->config, "config"); + if (load_defaults) + array_push(the_args->config, LIBDIR "/postconfig.lua"); + + /* File-descriptor count limit: soft->hard. */ + struct rlimit rlim; + ret = getrlimit(RLIMIT_NOFILE, &rlim); + if (ret == 0 && rlim.rlim_cur != rlim.rlim_max) { + kr_log_debug(SYSTEM, "increasing file-descriptor limit: %ld -> %ld\n", + (long)rlim.rlim_cur, (long)rlim.rlim_max); + rlim.rlim_cur = rlim.rlim_max; + ret = setrlimit(RLIMIT_NOFILE, &rlim); + } + if (ret) { + kr_log_error(SYSTEM, "failed to get or set file-descriptor limit: %s\n", + strerror(errno)); + } else if (rlim.rlim_cur < 512*1024) { + kr_log_warning(SYSTEM, "warning: hard limit for number of file-descriptors is only %ld but recommended value is 524288\n", + (long)rlim.rlim_cur); + } + + /* Fork subprocesses if requested */ + int fork_id = fork_workers(the_args->forks); + if (fork_id < 0) { + return EXIT_FAILURE; + } + + kr_crypto_init(); + + /* Create a server engine. */ + knot_mm_t pool; + mm_ctx_mempool(&pool, MM_DEFAULT_BLKSIZE); + static struct engine engine; + ret = engine_init(&engine, &pool); + if (ret != 0) { + kr_log_error(SYSTEM, "failed to initialize engine: %s\n", kr_strerror(ret)); + return EXIT_FAILURE; + } + /* Initialize the worker */ + ret = worker_init(&engine, the_args->forks); + if (ret != 0) { + kr_log_error(SYSTEM, "failed to initialize worker: %s\n", kr_strerror(ret)); + return EXIT_FAILURE; + } + + uv_loop_t *loop = uv_default_loop(); + /* Catch some signals. */ + uv_signal_t sigint, sigterm, sigchld; + if (true) ret = uv_signal_init(loop, &sigint); + if (!ret) ret = uv_signal_init(loop, &sigterm); + if (!ret) ret = uv_signal_init(loop, &sigchld); + if (!ret) ret = uv_signal_start(&sigint, signal_handler, SIGINT); + if (!ret) ret = uv_signal_start(&sigterm, signal_handler, SIGTERM); + if (!ret) ret = uv_signal_start(&sigchld, signal_handler, SIGCHLD); + /* Block SIGPIPE; see https://github.com/libuv/libuv/issues/45 */ + if (!ret && signal(SIGPIPE, SIG_IGN) == SIG_ERR) ret = errno; + if (!ret) { + /* Catching SIGBUS via uv_signal_* can't work; see: + * https://github.com/libuv/libuv/pull/1987 */ + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = sigbus_handler; + sa.sa_flags = SA_SIGINFO; + if (sigaction(SIGBUS, &sa, NULL)) { + ret = errno; + } + } + if (ret) { + kr_log_error(SYSTEM, "failed to set up signal handlers: %s\n", + strerror(abs(errno))); + ret = EXIT_FAILURE; + goto cleanup; + } + /* Profiling: avoid SIGPROF waking up the event loop. Otherwise the profiles + * (of the usual type) may skew results, e.g. epoll_pwait() taking lots of time. */ + ret = uv_loop_configure(loop, UV_LOOP_BLOCK_SIGNAL, SIGPROF); + if (ret) { + kr_log_info(SYSTEM, "failed to block SIGPROF in event loop, ignoring: %s\n", + uv_strerror(ret)); + } + + /* Start listening, in the sense of network_listen_fd(). */ + if (start_listening(&engine.net, &the_args->fds) != 0) { + ret = EXIT_FAILURE; + goto cleanup; + } + + ret = udp_queue_init_global(loop); + if (ret) { + kr_log_error(SYSTEM, "failed to initialize UDP queue: %s\n", + kr_strerror(ret)); + ret = EXIT_FAILURE; + goto cleanup; + } + + /* Start the scripting engine */ + if (engine_load_sandbox(&engine) != 0) { + ret = EXIT_FAILURE; + goto cleanup; + } + + for (i = 0; i < the_args->config.len; ++i) { + const char *config = the_args->config.at[i]; + if (engine_loadconf(&engine, config) != 0) { + ret = EXIT_FAILURE; + goto cleanup; + } + lua_settop(engine.L, 0); + } + + drop_capabilities(); + + if (engine_start(&engine) != 0) { + ret = EXIT_FAILURE; + goto cleanup; + } + + if (network_engage_endpoints(&engine.net)) { + ret = EXIT_FAILURE; + goto cleanup; + } + + /* Run the event loop */ + ret = run_worker(loop, &engine, fork_id == 0, the_args); + +cleanup:/* Cleanup. */ + engine_deinit(&engine); + worker_deinit(); + if (loop != NULL) { + uv_loop_close(loop); + } + mp_delete(pool.ctx); +cleanup_args: + args_deinit(the_args); + kr_crypto_cleanup(); + return ret; +} |