/* Copyright (C) CZ.NIC, z.s.p.o. <knot-resolver@labs.nic.cz> * SPDX-License-Identifier: GPL-3.0-or-later */ #include "kresconfig.h" #include "contrib/ucw/mempool.h" #include "daemon/engine.h" #include "daemon/io.h" #include "daemon/network.h" #include "daemon/udp_queue.h" #include "daemon/worker.h" #include "lib/defines.h" #include "lib/dnssec.h" #include "lib/log.h" #include <arpa/inet.h> #include <getopt.h> #include <libgen.h> #include <signal.h> #include <stdlib.h> #include <string.h> #include <sys/resource.h> #include <sys/wait.h> #include <unistd.h> #if ENABLE_CAP_NG #include <cap-ng.h> #endif #include <lua.h> #include <uv.h> #if ENABLE_LIBSYSTEMD #include <systemd/sd-daemon.h> #endif #include <libknot/error.h> #if ENABLE_JEMALLOC /* Make the jemalloc library needed. * * The problem is with --as-needed for linker which is added by default by meson. * If we don't use any jemalloc-specific calls, linker will decide that * it is not needed and won't link it. Making it needed seems better than * trying to override the flag which might be useful in some other cases, etc. * * Exporting the function is a very easy way of ensuring that it's not optimized out. */ #include <jemalloc/jemalloc.h> KR_EXPORT void kr_jemalloc_unused(void) { malloc_stats_print(NULL, NULL, NULL); } /* We don't use threads (or rarely in some parts), so multiple arenas don't make sense. https://jemalloc.net/jemalloc.3.html */ KR_EXPORT const char *malloc_conf = "narenas:1"; #endif struct args the_args_value; /** Static allocation for the_args singleton. */ static void signal_handler(uv_signal_t *handle, int signum) { switch (signum) { case SIGINT: /* Fallthrough. */ case SIGTERM: uv_stop(uv_default_loop()); uv_signal_stop(handle); break; case SIGCHLD: /* Wait for all dead processes. */ while (waitpid(-1, NULL, WNOHANG) > 0); break; default: kr_log_error(SYSTEM, "unhandled signal: %d\n", signum); break; } } /** SIGBUS -> attempt to remove the overflowing cache file and abort. */ static void sigbus_handler(int sig, siginfo_t *siginfo, void *ptr) { /* We can't safely assume that printf-like functions work, but write() is OK. * See POSIX for the safe functions, e.g. 2017 version just above this link: * http://pubs.opengroup.org/onlinepubs/9699919799/functions/V2_chap02.html#tag_15_04_04 */ #define WRITE_ERR(err_charray) \ (void)write(STDERR_FILENO, err_charray, sizeof(err_charray)) /* Unfortunately, void-cast on the write isn't enough to avoid the warning. */ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-result" const char msg_typical[] = "\nSIGBUS received; this is most likely due to filling up the filesystem where cache resides.\n", msg_unknown[] = "\nSIGBUS received, cause unknown.\n", msg_deleted[] = "Cache file deleted.\n", msg_del_fail[] = "Cache file deletion failed.\n", msg_final[] = "kresd can not recover reliably by itself, exiting.\n"; if (siginfo->si_code != BUS_ADRERR) { WRITE_ERR(msg_unknown); goto end; } WRITE_ERR(msg_typical); if (!kr_cache_emergency_file_to_remove) goto end; if (unlink(kr_cache_emergency_file_to_remove)) { WRITE_ERR(msg_del_fail); } else { WRITE_ERR(msg_deleted); } end: WRITE_ERR(msg_final); _exit(128 - sig); /*< regular return from OS-raised SIGBUS can't work anyway */ #undef WRITE_ERR #pragma GCC diagnostic pop } /* * Server operation. */ static int fork_workers(int forks) { /* Fork subprocesses if requested */ while (--forks > 0) { int pid = fork(); if (pid < 0) { perror("[system] fork"); return kr_error(errno); } /* Forked process */ if (pid == 0) { return forks; } } return 0; } static void help(int argc, char *argv[]) { printf("Usage: %s [parameters] [rundir]\n", argv[0]); printf("\nParameters:\n" " -a, --addr=[addr] Server address (default: localhost@53).\n" " -t, --tls=[addr] Server address for TLS (default: off).\n" " -S, --fd=[fd:kind] Listen on given fd (handed out by supervisor, :kind is optional).\n" " -c, --config=[path] Config file path (relative to [rundir]) (default: config).\n" " -n, --noninteractive Don't start the read-eval-print loop for stdin+stdout.\n" " -q, --quiet No command prompt in interactive mode.\n" " -v, --verbose Increase logging to debug level.\n" " -V, --version Print version of the server.\n" " -h, --help Print help and usage.\n" "Options:\n" " [rundir] Path to the working directory (default: .)\n"); } /** \return exit code for main() */ static int run_worker(uv_loop_t *loop, struct engine *engine, bool leader, struct args *args) { /* Only some kinds of stdin work with uv_pipe_t. * Otherwise we would abort() from libuv e.g. with </dev/null */ if (args->interactive) switch (uv_guess_handle(0)) { case UV_TTY: /* standard terminal */ /* TODO: it has worked OK so far, but we'd better use uv_tty_* * for this case instead of uv_pipe_*. */ case UV_NAMED_PIPE: /* echo 'quit()' | kresd ... */ break; default: kr_log_error(SYSTEM, "error: standard input is not a terminal or pipe; " "use '-n' if you want non-interactive mode. " "Commands can be simply added to your configuration file or sent over the control socket.\n" ); return EXIT_FAILURE; } /* Control sockets or TTY */ uv_pipe_t *pipe = malloc(sizeof(*pipe)); if (!pipe) return EXIT_FAILURE; uv_pipe_init(loop, pipe, 0); if (args->interactive) { if (!args->quiet) printf("Interactive mode:\n" "> "); pipe->data = io_tty_alloc_data(); uv_pipe_open(pipe, 0); uv_read_start((uv_stream_t*)pipe, io_tty_alloc, io_tty_process_input); } else if (args->control_fd != -1 && uv_pipe_open(pipe, args->control_fd) == 0) { uv_listen((uv_stream_t *)pipe, 16, io_tty_accept); } /* Notify supervisor. */ #if ENABLE_LIBSYSTEMD sd_notify(0, "READY=1"); #endif /* Run event loop */ uv_run(loop, UV_RUN_DEFAULT); /* Free pipe's data. Seems OK even on the stopped loop. * In interactive case it may have been done in callbacks already (single leak). */ if (!args->interactive) { uv_close((uv_handle_t *)pipe, NULL); free(pipe); } return EXIT_SUCCESS; } static void args_init(struct args *args) { memset(args, 0, sizeof(struct args)); /* Zeroed arrays are OK. */ args->forks = 1; args->control_fd = -1; args->interactive = true; args->quiet = false; } /* Free pointed-to resources. */ static void args_deinit(struct args *args) { array_clear(args->addrs); array_clear(args->addrs_tls); for (int i = 0; i < args->fds.len; ++i) free_const(args->fds.at[i].flags.kind); array_clear(args->fds); array_clear(args->config); } /** Process arguments into struct args. * @return >=0 if main() should be exited immediately. */ static int parse_args(int argc, char **argv, struct args *args) { /* Long options. */ int c = 0, li = 0; struct option opts[] = { {"addr", required_argument, 0, 'a'}, {"tls", required_argument, 0, 't'}, {"config", required_argument, 0, 'c'}, {"forks", required_argument, 0, 'f'}, {"noninteractive", no_argument, 0, 'n'}, {"verbose", no_argument, 0, 'v'}, {"quiet", no_argument, 0, 'q'}, {"version", no_argument, 0, 'V'}, {"help", no_argument, 0, 'h'}, {"fd", required_argument, 0, 'S'}, {0, 0, 0, 0} }; while ((c = getopt_long(argc, argv, "a:t:c:f:nvqVhS:", opts, &li)) != -1) { switch (c) { case 'a': kr_require(optarg); array_push(args->addrs, optarg); break; case 't': kr_require(optarg); array_push(args->addrs_tls, optarg); break; case 'c': kr_require(optarg); array_push(args->config, optarg); break; case 'f': kr_require(optarg); args->forks = strtol(optarg, NULL, 10); if (args->forks == 1) { kr_log_deprecate(SYSTEM, "use --noninteractive instead of --forks=1\n"); } else { kr_log_deprecate(SYSTEM, "support for running multiple --forks will be removed\n"); } if (args->forks <= 0) { kr_log_error(SYSTEM, "error '-f' requires a positive" " number, not '%s'\n", optarg); return EXIT_FAILURE; } /* fall through */ case 'n': args->interactive = false; break; case 'v': kr_log_level_set(LOG_DEBUG); break; case 'q': args->quiet = true; break; case 'V': printf("Knot Resolver, version %s\n", PACKAGE_VERSION); return EXIT_SUCCESS; case 'h': case '?': help(argc, argv); return EXIT_SUCCESS; default: help(argc, argv); return EXIT_FAILURE; case 'S': kr_require(optarg); flagged_fd_t ffd = { 0 }; char *endptr; ffd.fd = strtol(optarg, &endptr, 10); if (endptr != optarg && endptr[0] == '\0') { /* Plain DNS */ ffd.flags.tls = false; } else if (endptr[0] == ':' && strcasecmp(endptr + 1, "tls") == 0) { /* DoT */ ffd.flags.tls = true; /* We know what .sock_type should be but it wouldn't help. */ } else if (endptr[0] == ':' && endptr[1] != '\0') { /* Some other kind; no checks here. */ ffd.flags.kind = strdup(endptr + 1); } else { kr_log_error(SYSTEM, "incorrect value passed to '-S/--fd': %s\n", optarg); return EXIT_FAILURE; } array_push(args->fds, ffd); break; } } if (optind < argc) { args->rundir = argv[optind]; } return -1; } /** Just convert addresses to file-descriptors; clear *addrs on success. * @note AF_UNIX is supported (starting with '/'). * @return zero or exit code for main() */ static int bind_sockets(addr_array_t *addrs, bool tls, flagged_fd_array_t *fds) { bool has_error = false; for (size_t i = 0; i < addrs->len; ++i) { /* Get port and separate address string. */ uint16_t port = tls ? KR_DNS_TLS_PORT : KR_DNS_PORT; char addr_buf[INET6_ADDRSTRLEN + 1]; int ret; const char *addr_str; const int family = kr_straddr_family(addrs->at[i]); if (family == AF_UNIX) { ret = 0; addr_str = addrs->at[i]; } else { /* internet socket (or garbage) */ ret = kr_straddr_split(addrs->at[i], addr_buf, &port); addr_str = addr_buf; } /* Get sockaddr. */ struct sockaddr *sa = NULL; if (ret == 0) { sa = kr_straddr_socket(addr_str, port, NULL); if (!sa) ret = kr_error(EINVAL); /* could be ENOMEM but unlikely */ } flagged_fd_t ffd = { .flags = { .tls = tls } }; if (ret == 0 && !tls && family != AF_UNIX) { /* AF_UNIX can do SOCK_DGRAM, but let's not support that *here*. */ ffd.fd = io_bind(sa, SOCK_DGRAM, NULL); if (ffd.fd < 0) ret = ffd.fd; else if (array_push(*fds, ffd) < 0) ret = kr_error(ENOMEM); } if (ret == 0) { /* common for TCP and TLS, including AF_UNIX cases */ ffd.fd = io_bind(sa, SOCK_STREAM, NULL); if (ffd.fd < 0) ret = ffd.fd; else if (array_push(*fds, ffd) < 0) ret = kr_error(ENOMEM); } free(sa); if (ret != 0) { kr_log_error(NETWORK, "bind to '%s'%s: %s\n", addrs->at[i], tls ? " (TLS)" : "", kr_strerror(ret)); has_error = true; } } array_clear(*addrs); return has_error ? EXIT_FAILURE : kr_ok(); } static int start_listening(struct network *net, flagged_fd_array_t *fds) { int some_bad_ret = 0; for (size_t i = 0; i < fds->len; ++i) { flagged_fd_t *ffd = &fds->at[i]; int ret = network_listen_fd(net, ffd->fd, ffd->flags); if (ret != 0) { some_bad_ret = ret; /* TODO: try logging address@port. It's not too important, * because typical problems happen during binding already. * (invalid address, permission denied) */ kr_log_error(NETWORK, "listen on fd=%d: %s\n", ffd->fd, kr_strerror(ret)); /* Continue printing all of these before exiting. */ } else { ffd->flags.kind = NULL; /* ownership transferred */ } } return some_bad_ret; } /* Drop POSIX 1003.1e capabilities. */ static void drop_capabilities(void) { #if ENABLE_CAP_NG /* Drop all capabilities when running under non-root user. */ if (geteuid() == 0) { kr_log_debug(SYSTEM, "running as root, no capabilities dropped\n"); return; } if (capng_have_capability(CAPNG_EFFECTIVE, CAP_SETPCAP)) { capng_clear(CAPNG_SELECT_BOTH); /* Apply. */ if (capng_apply(CAPNG_SELECT_BOTH) < 0) { kr_log_error(SYSTEM, "failed to set process capabilities: %s\n", strerror(errno)); } else { kr_log_debug(SYSTEM, "all capabilities dropped\n"); } } else { /* If user() was called, the capabilities were already dropped along with SETPCAP. */ kr_log_debug(SYSTEM, "process not allowed to set capabilities, skipping\n"); } #endif /* ENABLE_CAP_NG */ } int main(int argc, char **argv) { kr_log_group_reset(); if (setvbuf(stdout, NULL, _IONBF, 0) || setvbuf(stderr, NULL, _IONBF, 0)) { kr_log_error(SYSTEM, "failed to to set output buffering (ignored): %s\n", strerror(errno)); fflush(stderr); } if (strcmp("linux", OPERATING_SYSTEM) != 0) kr_log_warning(SYSTEM, "Knot Resolver is tested on Linux, other platforms might exhibit bugs.\n" "Please report issues to https://gitlab.nic.cz/knot/knot-resolver/issues/\n" "Thank you for your time and interest!\n"); the_args = &the_args_value; args_init(the_args); int ret = parse_args(argc, argv, the_args); if (ret >= 0) goto cleanup_args; ret = bind_sockets(&the_args->addrs, false, &the_args->fds); if (ret) goto cleanup_args; ret = bind_sockets(&the_args->addrs_tls, true, &the_args->fds); if (ret) goto cleanup_args; /* Switch to rundir. */ if (the_args->rundir != NULL) { /* FIXME: access isn't a good way if we start as root and drop privileges later */ if (access(the_args->rundir, W_OK) != 0 || chdir(the_args->rundir) != 0) { kr_log_error(SYSTEM, "rundir '%s': %s\n", the_args->rundir, strerror(errno)); return EXIT_FAILURE; } } /* Select which config files to load and verify they are read-able. */ bool load_defaults = true; size_t i = 0; while (i < the_args->config.len) { const char *config = the_args->config.at[i]; if (strcmp(config, "-") == 0) { load_defaults = false; array_del(the_args->config, i); continue; /* don't increment i */ } else if (access(config, R_OK) != 0) { char cwd[PATH_MAX]; get_workdir(cwd, sizeof(cwd)); kr_log_error(SYSTEM, "config '%s' (workdir '%s'): %s\n", config, cwd, strerror(errno)); return EXIT_FAILURE; } i++; } if (the_args->config.len == 0 && access("config", R_OK) == 0) array_push(the_args->config, "config"); if (load_defaults) array_push(the_args->config, LIBDIR "/postconfig.lua"); /* File-descriptor count limit: soft->hard. */ struct rlimit rlim; ret = getrlimit(RLIMIT_NOFILE, &rlim); if (ret == 0 && rlim.rlim_cur != rlim.rlim_max) { kr_log_debug(SYSTEM, "increasing file-descriptor limit: %ld -> %ld\n", (long)rlim.rlim_cur, (long)rlim.rlim_max); rlim.rlim_cur = rlim.rlim_max; ret = setrlimit(RLIMIT_NOFILE, &rlim); } if (ret) { kr_log_error(SYSTEM, "failed to get or set file-descriptor limit: %s\n", strerror(errno)); } else if (rlim.rlim_cur < 512*1024) { kr_log_warning(SYSTEM, "warning: hard limit for number of file-descriptors is only %ld but recommended value is 524288\n", (long)rlim.rlim_cur); } /* Fork subprocesses if requested */ int fork_id = fork_workers(the_args->forks); if (fork_id < 0) { return EXIT_FAILURE; } kr_crypto_init(); /* Create a server engine. */ knot_mm_t pool; mm_ctx_mempool(&pool, MM_DEFAULT_BLKSIZE); static struct engine engine; ret = engine_init(&engine, &pool); if (ret != 0) { kr_log_error(SYSTEM, "failed to initialize engine: %s\n", kr_strerror(ret)); return EXIT_FAILURE; } /* Initialize the worker */ ret = worker_init(&engine, the_args->forks); if (ret != 0) { kr_log_error(SYSTEM, "failed to initialize worker: %s\n", kr_strerror(ret)); return EXIT_FAILURE; } uv_loop_t *loop = uv_default_loop(); /* Catch some signals. */ uv_signal_t sigint, sigterm, sigchld; if (true) ret = uv_signal_init(loop, &sigint); if (!ret) ret = uv_signal_init(loop, &sigterm); if (!ret) ret = uv_signal_init(loop, &sigchld); if (!ret) ret = uv_signal_start(&sigint, signal_handler, SIGINT); if (!ret) ret = uv_signal_start(&sigterm, signal_handler, SIGTERM); if (!ret) ret = uv_signal_start(&sigchld, signal_handler, SIGCHLD); /* Block SIGPIPE; see https://github.com/libuv/libuv/issues/45 */ if (!ret && signal(SIGPIPE, SIG_IGN) == SIG_ERR) ret = errno; if (!ret) { /* Catching SIGBUS via uv_signal_* can't work; see: * https://github.com/libuv/libuv/pull/1987 */ struct sigaction sa; memset(&sa, 0, sizeof(sa)); sa.sa_sigaction = sigbus_handler; sa.sa_flags = SA_SIGINFO; if (sigaction(SIGBUS, &sa, NULL)) { ret = errno; } } if (ret) { kr_log_error(SYSTEM, "failed to set up signal handlers: %s\n", strerror(abs(errno))); ret = EXIT_FAILURE; goto cleanup; } /* Profiling: avoid SIGPROF waking up the event loop. Otherwise the profiles * (of the usual type) may skew results, e.g. epoll_pwait() taking lots of time. */ ret = uv_loop_configure(loop, UV_LOOP_BLOCK_SIGNAL, SIGPROF); if (ret) { kr_log_info(SYSTEM, "failed to block SIGPROF in event loop, ignoring: %s\n", uv_strerror(ret)); } /* Start listening, in the sense of network_listen_fd(). */ if (start_listening(&engine.net, &the_args->fds) != 0) { ret = EXIT_FAILURE; goto cleanup; } ret = udp_queue_init_global(loop); if (ret) { kr_log_error(SYSTEM, "failed to initialize UDP queue: %s\n", kr_strerror(ret)); ret = EXIT_FAILURE; goto cleanup; } /* Start the scripting engine */ if (engine_load_sandbox(&engine) != 0) { ret = EXIT_FAILURE; goto cleanup; } for (i = 0; i < the_args->config.len; ++i) { const char *config = the_args->config.at[i]; if (engine_loadconf(&engine, config) != 0) { ret = EXIT_FAILURE; goto cleanup; } lua_settop(engine.L, 0); } drop_capabilities(); if (engine_start(&engine) != 0) { ret = EXIT_FAILURE; goto cleanup; } if (network_engage_endpoints(&engine.net)) { ret = EXIT_FAILURE; goto cleanup; } /* Run the event loop */ ret = run_worker(loop, &engine, fork_id == 0, the_args); cleanup:/* Cleanup. */ engine_deinit(&engine); worker_deinit(); if (loop != NULL) { uv_loop_close(loop); } mp_delete(pool.ctx); cleanup_args: args_deinit(the_args); kr_crypto_cleanup(); return ret; }