diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 09:51:24 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 09:51:24 +0000 |
commit | f7548d6d28c313cf80e6f3ef89aed16a19815df1 (patch) | |
tree | a3f6f2a3f247293bee59ecd28e8cd8ceb6ca064a /src/master/service-monitor.c | |
parent | Initial commit. (diff) | |
download | dovecot-f7548d6d28c313cf80e6f3ef89aed16a19815df1.tar.xz dovecot-f7548d6d28c313cf80e6f3ef89aed16a19815df1.zip |
Adding upstream version 1:2.3.19.1+dfsg1.upstream/1%2.3.19.1+dfsg1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/master/service-monitor.c')
-rw-r--r-- | src/master/service-monitor.c | 766 |
1 files changed, 766 insertions, 0 deletions
diff --git a/src/master/service-monitor.c b/src/master/service-monitor.c new file mode 100644 index 0000000..c10e99c --- /dev/null +++ b/src/master/service-monitor.c @@ -0,0 +1,766 @@ +/* Copyright (c) 2005-2018 Dovecot authors, see the included COPYING file */ + +#include "common.h" +#include "array.h" +#include "ioloop.h" +#include "hash.h" +#include "str.h" +#include "safe-mkstemp.h" +#include "time-util.h" +#include "sleep.h" +#include "master-client.h" +#include "service.h" +#include "service-process.h" +#include "service-process-notify.h" +#include "service-anvil.h" +#include "service-log.h" +#include "service-monitor.h" + +#include <unistd.h> +#include <sys/wait.h> +#include <syslog.h> +#include <signal.h> + +#define SERVICE_DROP_WARN_INTERVAL_SECS 1 +#define SERVICE_DROP_TIMEOUT_MSECS (10*1000) +#define SERVICE_LOG_DROP_WARNING_DELAY_MSECS 500 +#define MAX_DIE_WAIT_MSECS 5000 +#define SERVICE_MAX_EXIT_FAILURES_IN_SEC 10 +#define SERVICE_PREFORK_MAX_AT_ONCE 10 + +static void service_monitor_start_extra_avail(struct service *service); +static void service_status_more(struct service_process *process, + const struct master_status *status); +static void service_monitor_listen_start_force(struct service *service); + +static void service_process_kill_idle(struct service_process *process) +{ + struct service *service = process->service; + struct master_status status; + + i_assert(process->available_count == service->client_limit); + + if (service->process_avail <= service->set->process_min_avail) { + /* we don't have any extra idling processes anymore. */ + timeout_remove(&process->to_idle); + } else if (process->last_kill_sent > process->last_status_update+1) { + service_error(service, "Process %s is ignoring idle SIGINT", + dec2str(process->pid)); + + /* assume this process is busy */ + i_zero(&status); + service_status_more(process, &status); + process->available_count = 0; + } else { + if (kill(process->pid, SIGINT) < 0 && errno != ESRCH) { + service_error(service, "kill(%s, SIGINT) failed: %m", + dec2str(process->pid)); + } + process->last_kill_sent = ioloop_time; + } +} + +static void service_status_more(struct service_process *process, + const struct master_status *status) +{ + struct service *service = process->service; + + process->total_count += + process->available_count - status->available_count; + process->idle_start = 0; + + timeout_remove(&process->to_idle); + + if (status->available_count != 0) + return; + + /* process used up all of its clients */ + i_assert(service->process_avail > 0); + service->process_avail--; + + if (service->type == SERVICE_TYPE_LOGIN && + service->process_avail == 0 && + service->process_count == service->process_limit) + service_login_notify(service, TRUE); + + /* we may need to start more */ + service_monitor_start_extra_avail(service); + service_monitor_listen_start(service); +} + +static void service_check_idle(struct service_process *process) +{ + struct service *service = process->service; + + if (process->available_count != service->client_limit) + return; + process->idle_start = ioloop_time; + if (service->process_avail > service->set->process_min_avail && + process->to_idle == NULL && + service->idle_kill != UINT_MAX) { + /* we have more processes than we really need. + add a bit of randomness so that we don't send the + signal to all of them at once */ + process->to_idle = + timeout_add((service->idle_kill * 1000) + + i_rand_limit(100) * 10, + service_process_kill_idle, + process); + } +} + +static void service_status_less(struct service_process *process) +{ + struct service *service = process->service; + + /* some process got more connections - remove the delayed warning */ + timeout_remove(&service->to_drop_warning); + + if (process->available_count == 0) { + /* process can accept more clients again */ + if (service->process_avail++ == 0) + service_monitor_listen_stop(service); + i_assert(service->process_avail <= service->process_count); + } + if (service->type == SERVICE_TYPE_LOGIN) + service_login_notify(service, FALSE); +} + +static void +service_status_input_one(struct service *service, + const struct master_status *status) +{ + struct service_process *process; + + process = hash_table_lookup(service_pids, POINTER_CAST(status->pid)); + if (process == NULL) { + /* we've probably wait()ed it away already. ignore */ + return; + } + + if (process->uid != status->uid || process->service != service) { + /* a) Process was closed and another process was created with + the same PID, but we're still receiving status update from + the old process. + + b) Some process is trying to corrupt our internal state by + trying to pretend to be someone else. We could use stronger + randomness here, but the worst they can do is DoS and there + are already more serious problems if someone is able to do + this.. */ + service_error(service, "Ignoring invalid update from child %s " + "(UID=%u)", dec2str(status->pid), status->uid); + return; + } + process->last_status_update = ioloop_time; + + /* first status notification */ + timeout_remove(&process->to_status); + + if (process->available_count != status->available_count) { + if (process->available_count > status->available_count) { + /* process started servicing some more clients */ + service_status_more(process, status); + } else { + /* process finished servicing some clients */ + service_status_less(process); + } + process->available_count = status->available_count; + } + service_check_idle(process); +} + +static void service_status_input(struct service *service) +{ + struct master_status status[1024/sizeof(struct master_status)]; + unsigned int i, count; + ssize_t ret; + + ret = read(service->status_fd[0], &status, sizeof(status)); + if (ret <= 0) { + if (ret == 0) + service_error(service, "read(status) failed: EOF"); + else if (errno != EAGAIN) + service_error(service, "read(status) failed: %m"); + else + return; + service_monitor_stop(service); + return; + } + + if ((ret % sizeof(struct master_status)) != 0) { + service_error(service, "service sent partial status update " + "(%d bytes)", (int)ret); + return; + } + + count = ret / sizeof(struct master_status); + for (i = 0; i < count; i++) + service_status_input_one(service, &status[i]); +} + +static void service_log_drop_warning(struct service *service) +{ + const char *limit_name; + unsigned int limit; + + if (service->last_drop_warning + + SERVICE_DROP_WARN_INTERVAL_SECS <= ioloop_time) { + service->last_drop_warning = ioloop_time; + if (service->process_limit > 1) { + limit_name = "process_limit"; + limit = service->process_limit; + } else if (service->set->service_count == 1) { + i_assert(service->client_limit == 1); + limit_name = "client_limit/service_count"; + limit = 1; + } else { + limit_name = "client_limit"; + limit = service->client_limit; + } + i_warning("service(%s): %s (%u) reached, " + "client connections are being dropped", + service->set->name, limit_name, limit); + } +} + +static void service_monitor_throttle(struct service *service) +{ + if (service->to_throttle != NULL || service->list->destroying) + return; + + i_assert(service->throttle_msecs > 0); + + service_error(service, + "command startup failed, throttling for %u.%03u secs", + service->throttle_msecs / 1000, + service->throttle_msecs % 1000); + service_throttle(service, service->throttle_msecs); + service->throttle_msecs *= 2; + if (service->throttle_msecs > + SERVICE_STARTUP_FAILURE_THROTTLE_MAX_MSECS) { + service->throttle_msecs = + SERVICE_STARTUP_FAILURE_THROTTLE_MAX_MSECS; + } +} + +static void service_drop_timeout(struct service *service) +{ + struct service_listener *lp; + int fd; + + i_assert(service->process_avail == 0); + + /* drop all pending connections */ + array_foreach_elem(&service->listeners, lp) { + while ((fd = net_accept(lp->fd, NULL, NULL)) > 0) + net_disconnect(fd); + } + + service_monitor_listen_start_force(service); + service->listen_pending = TRUE; +} + +static void service_monitor_listen_pending(struct service *service) +{ + i_assert(service->process_avail == 0); + + service_monitor_listen_stop(service); + service->listen_pending = TRUE; + + service->to_drop = timeout_add(SERVICE_DROP_TIMEOUT_MSECS, + service_drop_timeout, service); +} + +static void service_drop_connections(struct service_listener *l) +{ + struct service *service = l->service; + int fd; + + if (service->type != SERVICE_TYPE_WORKER) + service_log_drop_warning(service); + + if (service->type == SERVICE_TYPE_LOGIN) { + /* reached process limit, notify processes that they + need to start killing existing connections if they + reach connection limit */ + service_login_notify(service, TRUE); + + service_monitor_listen_pending(service); + } else if (!service->listen_pending) { + /* maybe this is a temporary peak, stop for a while and + see if it goes away */ + service_monitor_listen_pending(service); + if (service->to_drop_warning == NULL && + service->type == SERVICE_TYPE_WORKER) { + service->to_drop_warning = + timeout_add_short(SERVICE_LOG_DROP_WARNING_DELAY_MSECS, + service_log_drop_warning, service); + } + } else { + /* this has been happening for a while now. just accept and + close the connection, so it's clear that this is happening + because of the limit, rather than because the service + processes aren't answering fast enough */ + fd = net_accept(l->fd, NULL, NULL); + if (fd > 0) + net_disconnect(fd); + } +} + +static void service_accept(struct service_listener *l) +{ + struct service *service = l->service; + + i_assert(service->process_avail == 0); + + if (service->process_count == service->process_limit) { + /* we've reached our limits, new clients will have to + wait until there are more processes available */ + service_drop_connections(l); + return; + } + + /* create a child process and let it accept() this connection */ + if (service_process_create(service) == NULL) + service_monitor_throttle(service); + else + service_monitor_listen_stop(service); +} + +static bool +service_monitor_start_count(struct service *service, unsigned int limit) +{ + unsigned int i, count; + + i_assert(service->set->process_min_avail >= service->process_avail); + + count = service->set->process_min_avail - service->process_avail; + if (service->process_count + count > service->process_limit) + count = service->process_limit - service->process_count; + if (count > limit) + count = limit; + + for (i = 0; i < count; i++) { + if (service_process_create(service) == NULL) { + service_monitor_throttle(service); + break; + } + } + if (i > 0) { + /* we created some processes, they'll do the listening now */ + service_monitor_listen_stop(service); + } + return i >= limit; +} + +static void service_monitor_prefork_timeout(struct service *service) +{ + /* don't prefork more processes if other more important processes had + been forked while we were waiting for this timeout (= master seems + busy) */ + if (service->list->fork_counter != service->prefork_counter) { + service->prefork_counter = service->list->fork_counter; + return; + } + if (service->process_avail < service->set->process_min_avail) { + if (service_monitor_start_count(service, SERVICE_PREFORK_MAX_AT_ONCE) && + service->process_avail < service->set->process_min_avail) { + /* All SERVICE_PREFORK_MAX_AT_ONCE were created, but + it still wasn't enough. Launch more in the next + timeout. */ + return; + } + } + timeout_remove(&service->to_prefork); +} + +static void service_monitor_start_extra_avail(struct service *service) +{ + if (service->process_avail >= service->set->process_min_avail || + service->process_count >= service->process_limit || + service->list->destroying) + return; + + if (service->process_avail == 0) { + /* quickly start one process now */ + if (!service_monitor_start_count(service, 1)) + return; + if (service->process_avail >= service->set->process_min_avail) + return; + } + if (service->to_prefork == NULL) { + /* ioloop handles timeouts before fds (= SIGCHLD callback), + so let the first timeout handler call simply update the fork + counter and the second one check if we're busy or not. */ + service->to_prefork = + timeout_add_short(0, service_monitor_prefork_timeout, service); + } +} + +static void service_monitor_listen_start_force(struct service *service) +{ + struct service_listener *l; + + service->listening = TRUE; + service->listen_pending = FALSE; + timeout_remove(&service->to_drop); + timeout_remove(&service->to_drop_warning); + + array_foreach_elem(&service->listeners, l) { + if (l->io == NULL && l->fd != -1) + l->io = io_add(l->fd, IO_READ, service_accept, l); + } +} + +void service_monitor_listen_start(struct service *service) +{ + if (service->process_avail > 0 || service->to_throttle != NULL || + (service->process_count == service->process_limit && + service->listen_pending)) + return; + + service_monitor_listen_start_force(service); +} + +void service_monitor_listen_stop(struct service *service) +{ + struct service_listener *l; + + array_foreach_elem(&service->listeners, l) + io_remove(&l->io); + service->listening = FALSE; + service->listen_pending = FALSE; + timeout_remove(&service->to_drop); + timeout_remove(&service->to_drop_warning); +} + +static int service_login_create_notify_fd(struct service *service) +{ + int fd, ret; + + if (service->login_notify_fd != -1) + return 0; + + T_BEGIN { + string_t *prefix = t_str_new(128); + const char *path; + + str_append(prefix, service->set->master_set->base_dir); + str_append(prefix, "/login-master-notify"); + + fd = safe_mkstemp(prefix, 0600, (uid_t)-1, (gid_t)-1); + path = str_c(prefix); + + if (fd == -1) { + service_error(service, "safe_mkstemp(%s) failed: %m", + path); + } else if (unlink(path) < 0) { + service_error(service, "unlink(%s) failed: %m", path); + } else { + fd_close_on_exec(fd, TRUE); + service->login_notify_fd = fd; + } + } T_END; + + ret = fd == -1 ? -1 : 0; + if (fd != service->login_notify_fd) + i_close_fd(&fd); + return ret; +} + +void services_monitor_start(struct service_list *service_list) +{ + ARRAY(struct service *) listener_services; + struct service *service; + + if (services_log_init(service_list) < 0) + return; + service_anvil_monitor_start(service_list); + + if (service_list->io_master == NULL && + service_list->master_fd != -1) { + service_list->io_master = + io_add(service_list->master_fd, IO_READ, + master_client_connected, service_list); + } + + t_array_init(&listener_services, array_count(&service_list->services)); + array_foreach_elem(&service_list->services, service) { + if (service->type == SERVICE_TYPE_LOGIN) { + if (service_login_create_notify_fd(service) < 0) + continue; + } + if (service->master_dead_pipe_fd[0] == -1) { + if (pipe(service->master_dead_pipe_fd) < 0) { + service_error(service, "pipe() failed: %m"); + continue; + } + fd_close_on_exec(service->master_dead_pipe_fd[0], TRUE); + fd_close_on_exec(service->master_dead_pipe_fd[1], TRUE); + } + if (service->status_fd[0] == -1) { + /* we haven't yet created status pipe */ + if (pipe(service->status_fd) < 0) { + service_error(service, "pipe() failed: %m"); + continue; + } + + net_set_nonblock(service->status_fd[0], TRUE); + fd_close_on_exec(service->status_fd[0], TRUE); + net_set_nonblock(service->status_fd[1], TRUE); + fd_close_on_exec(service->status_fd[1], TRUE); + } + if (service->io_status == NULL) { + service->io_status = + io_add(service->status_fd[0], IO_READ, + service_status_input, service); + } + service_monitor_listen_start(service); + array_push_back(&listener_services, &service); + } + + /* create processes only after adding all listeners */ + array_foreach_elem(&listener_services, service) + service_monitor_start_extra_avail(service); + + if (service_list->log->status_fd[0] != -1) { + if (service_process_create(service_list->log) != NULL) + service_monitor_listen_stop(service_list->log); + } + + /* start up a process for startup-services */ + array_foreach_elem(&service_list->services, service) { + if (service->type == SERVICE_TYPE_STARTUP && + service->status_fd[0] != -1) { + if (service_process_create(service) != NULL) + service_monitor_listen_stop(service); + } + } +} + +static void service_monitor_close_dead_pipe(struct service *service) +{ + if (service->master_dead_pipe_fd[0] != -1) { + i_close_fd(&service->master_dead_pipe_fd[0]); + i_close_fd(&service->master_dead_pipe_fd[1]); + } +} + +void service_monitor_stop(struct service *service) +{ + int i; + + io_remove(&service->io_status); + + if (service->status_fd[0] != -1 && + service->type != SERVICE_TYPE_ANVIL) { + for (i = 0; i < 2; i++) { + if (close(service->status_fd[i]) < 0) { + service_error(service, + "close(status fd) failed: %m"); + } + service->status_fd[i] = -1; + } + } + service_monitor_close_dead_pipe(service); + if (service->login_notify_fd != -1) { + if (close(service->login_notify_fd) < 0) { + service_error(service, + "close(login notify fd) failed: %m"); + } + service->login_notify_fd = -1; + } + timeout_remove(&service->to_login_notify); + service_monitor_listen_stop(service); + + timeout_remove(&service->to_throttle); + timeout_remove(&service->to_prefork); +} + +void service_monitor_stop_close(struct service *service) +{ + struct service_listener *l; + + service_monitor_stop(service); + + array_foreach_elem(&service->listeners, l) + i_close_fd(&l->fd); +} + +static void services_monitor_wait(struct service_list *service_list) +{ + struct service *service; + struct timeval tv_start; + bool finished; + + io_loop_time_refresh(); + tv_start = ioloop_timeval; + + for (;;) { + finished = TRUE; + services_monitor_reap_children(); + array_foreach_elem(&service_list->services, service) { + if (service->status_fd[0] != -1) + service_status_input(service); + if (service->process_avail > 0) + finished = FALSE; + } + io_loop_time_refresh(); + if (finished || + timeval_diff_msecs(&ioloop_timeval, &tv_start) > MAX_DIE_WAIT_MSECS) + break; + i_sleep_msecs(100); + } +} + +static bool service_processes_close_listeners(struct service *service) +{ + struct service_process *process = service->processes; + bool ret = FALSE; + + for (; process != NULL; process = process->next) { + if (kill(process->pid, SIGQUIT) == 0) + ret = TRUE; + else if (errno != ESRCH) { + service_error(service, "kill(%s, SIGQUIT) failed: %m", + dec2str(process->pid)); + } + } + return ret; +} + +static bool +service_list_processes_close_listeners(struct service_list *service_list) +{ + struct service *service; + bool ret = FALSE; + + array_foreach_elem(&service_list->services, service) { + if (service_processes_close_listeners(service)) + ret = TRUE; + } + return ret; +} + +static void services_monitor_wait_and_kill(struct service_list *service_list) +{ + /* we've notified all children that the master is dead. + now wait for the children to either die or to tell that + they're no longer listening for new connections. */ + services_monitor_wait(service_list); + + /* Even if the waiting stopped early because all the process_avail==0, + it can mean that there are processes that have the listener socket + open (just not actively being listened to). We'll need to make sure + that those sockets are closed before we exit, so that a restart + won't fail. Do this by sending SIGQUIT to all the child processes + that are left, which are handled by lib-master to immediately close + the listener in the signal handler itself. */ + if (service_list_processes_close_listeners(service_list)) { + /* SIGQUITs were sent. wait a little bit to make sure they're + also processed before quitting. */ + i_sleep_msecs(1000); + } +} + +void services_monitor_stop(struct service_list *service_list, bool wait) +{ + struct service *service; + + array_foreach_elem(&service_list->services, service) + service_monitor_close_dead_pipe(service); + + if (wait) + services_monitor_wait_and_kill(service_list); + + io_remove(&service_list->io_master); + + array_foreach_elem(&service_list->services, service) + service_monitor_stop(service); + + services_log_deinit(service_list); +} + +static bool +service_process_failure(struct service_process *process, int status) +{ + struct service *service = process->service; + bool throttle; + + service_process_log_status_error(process, status); + throttle = process->to_status != NULL; + if (!throttle && !service->have_successful_exits) { + /* this service has seen no successful exits yet. + try to avoid failure storms by throttling the service if it + only keeps failing rapidly. this is no longer done after + one success to avoid intentional DoSing, in case attacker + finds a way to quickly crash his own session. */ + if (service->exit_failure_last != ioloop_time) { + service->exit_failure_last = ioloop_time; + service->exit_failures_in_sec = 0; + } + if (++service->exit_failures_in_sec > SERVICE_MAX_EXIT_FAILURES_IN_SEC) + throttle = TRUE; + } + service_process_notify_add(service_anvil_global->kills, process); + return throttle; +} + +void services_monitor_reap_children(void) +{ + struct service_process *process; + struct service *service; + pid_t pid; + int status; + bool service_stopped, throttle; + + while ((pid = waitpid(-1, &status, WNOHANG)) > 0) { + process = hash_table_lookup(service_pids, POINTER_CAST(pid)); + if (process == NULL) { + i_error("waitpid() returned unknown PID %s", + dec2str(pid)); + continue; + } + + service = process->service; + if (status == 0) { + /* success - one success resets all failures */ + service->have_successful_exits = TRUE; + service->exit_failures_in_sec = 0; + service->throttle_msecs = + SERVICE_STARTUP_FAILURE_THROTTLE_MIN_MSECS; + throttle = FALSE; + } else { + throttle = service_process_failure(process, status); + } + if (service->type == SERVICE_TYPE_ANVIL) + service_anvil_process_destroyed(process); + + /* if we're reloading, we may get here with a service list + that's going to be destroyed after this process is + destroyed. keep the list referenced until we're done. */ + service_list_ref(service->list); + service_process_destroy(process); + + if (throttle) + service_monitor_throttle(service); + service_stopped = service->status_fd[0] == -1; + if (!service_stopped && !service->list->destroying) { + service_monitor_start_extra_avail(service); + /* if there are no longer listening processes, + start listening for more */ + if (service->to_throttle != NULL) { + /* throttling */ + } else if (service == service->list->log && + service->process_count == 0) { + /* log service must always be running */ + if (service_process_create(service) == NULL) + service_monitor_throttle(service); + } else { + service_monitor_listen_start(service); + } + } + service_list_unref(service->list); + } +} |