diff options
Diffstat (limited to 'src/master/service-process.c')
-rw-r--r-- | src/master/service-process.c | 676 |
1 files changed, 676 insertions, 0 deletions
diff --git a/src/master/service-process.c b/src/master/service-process.c new file mode 100644 index 0000000..24e68bf --- /dev/null +++ b/src/master/service-process.c @@ -0,0 +1,676 @@ +/* Copyright (c) 2005-2018 Dovecot authors, see the included COPYING file */ + +#include "common.h" +#include "array.h" +#include "aqueue.h" +#include "ioloop.h" +#include "istream.h" +#include "ostream.h" +#include "write-full.h" +#include "base64.h" +#include "hash.h" +#include "str.h" +#include "strescape.h" +#include "llist.h" +#include "hostpid.h" +#include "env-util.h" +#include "restrict-access.h" +#include "restrict-process-size.h" +#include "eacces-error.h" +#include "master-service.h" +#include "master-service-settings.h" +#include "dup2-array.h" +#include "service.h" +#include "service-anvil.h" +#include "service-listen.h" +#include "service-log.h" +#include "service-process-notify.h" +#include "service-process.h" + +#include <unistd.h> +#include <fcntl.h> +#include <syslog.h> +#include <signal.h> +#include <sys/wait.h> + +static void service_reopen_inet_listeners(struct service *service) +{ + struct service_listener *const *listeners; + unsigned int i, count; + int old_fd; + + listeners = array_get(&service->listeners, &count); + for (i = 0; i < count; i++) { + if (!listeners[i]->reuse_port || listeners[i]->fd == -1) + continue; + + old_fd = listeners[i]->fd; + listeners[i]->fd = -1; + if (service_listener_listen(listeners[i]) < 0) + listeners[i]->fd = old_fd; + } +} + +static void +service_dup_fds(struct service *service) +{ + struct service_listener *const *listeners; + ARRAY_TYPE(dup2) dups; + string_t *listener_settings; + int fd = MASTER_LISTEN_FD_FIRST; + unsigned int i, count, socket_listener_count; + + /* stdin/stdout is already redirected to /dev/null. Other master fds + should have been opened with fd_close_on_exec() so we don't have to + worry about them. + + because the destination fd might be another one's source fd we have + to be careful not to overwrite anything. dup() the fd when needed */ + + socket_listener_count = 0; + listeners = array_get(&service->listeners, &count); + t_array_init(&dups, count + 10); + + switch (service->type) { + case SERVICE_TYPE_LOG: + i_assert(fd == MASTER_LISTEN_FD_FIRST); + services_log_dup2(&dups, service->list, fd, + &socket_listener_count); + fd += socket_listener_count; + break; + case SERVICE_TYPE_ANVIL: + dup2_append(&dups, service_anvil_global->log_fdpass_fd[0], + MASTER_ANVIL_LOG_FDPASS_FD); + /* nonblocking anvil fd must be the first one. anvil treats it + as the master's fd */ + dup2_append(&dups, service_anvil_global->nonblocking_fd[0], fd++); + dup2_append(&dups, service_anvil_global->blocking_fd[0], fd++); + socket_listener_count += 2; + break; + default: + break; + } + + /* add listeners */ + listener_settings = t_str_new(256); + for (i = 0; i < count; i++) { + if (listeners[i]->fd != -1) { + str_truncate(listener_settings, 0); + str_append_tabescaped(listener_settings, listeners[i]->name); + + if (listeners[i]->type == SERVICE_LISTENER_INET) { + if (listeners[i]->set.inetset.set->ssl) + str_append(listener_settings, "\tssl"); + if (listeners[i]->set.inetset.set->haproxy) + str_append(listener_settings, "\thaproxy"); + } + + dup2_append(&dups, listeners[i]->fd, fd++); + + env_put(t_strdup_printf("SOCKET%d_SETTINGS", + socket_listener_count), + str_c(listener_settings)); + socket_listener_count++; + } + } + + if (service->login_notify_fd != -1) { + dup2_append(&dups, service->login_notify_fd, + MASTER_LOGIN_NOTIFY_FD); + } + switch (service->type) { + case SERVICE_TYPE_LOG: + case SERVICE_TYPE_ANVIL: + case SERVICE_TYPE_CONFIG: + dup2_append(&dups, dev_null_fd, MASTER_ANVIL_FD); + break; + case SERVICE_TYPE_UNKNOWN: + case SERVICE_TYPE_LOGIN: + case SERVICE_TYPE_STARTUP: + case SERVICE_TYPE_WORKER: + dup2_append(&dups, service_anvil_global->blocking_fd[1], + MASTER_ANVIL_FD); + break; + } + dup2_append(&dups, service->status_fd[1], MASTER_STATUS_FD); + if (service->type != SERVICE_TYPE_ANVIL) { + dup2_append(&dups, service->master_dead_pipe_fd[1], + MASTER_DEAD_FD); + } else { + dup2_append(&dups, global_master_dead_pipe_fd[1], + MASTER_DEAD_FD); + } + + if (service->type == SERVICE_TYPE_LOG) { + /* keep stderr as-is. this is especially important when + log_path=/dev/stderr, but might be helpful even in other + situations for logging startup errors */ + } else { + /* set log file to stderr. dup2() here immediately so that + we can set up logging to it without causing any log messages + to be lost. */ + i_assert(service->log_fd[1] != -1); + + env_put("LOG_SERVICE", "1"); + if (dup2(service->log_fd[1], STDERR_FILENO) < 0) + i_fatal("dup2(log fd) failed: %m"); + i_set_failure_internal(); + } + + /* Switch log writing back to stderr before the log fds are closed. + There's no guarantee that writing to stderr is visible anywhere, but + it's better than the process just dying with FATAL_LOGWRITE. */ + i_set_failure_file("/dev/stderr", + t_strdup_printf("service(%s): ", service->set->name)); + + /* make sure we don't leak syslog fd. try to do it as late as possible, + but also before dup2()s in case syslog fd is one of them. */ + closelog(); + + if (dup2_array(&dups) < 0) + i_fatal("service(%s): dup2s failed", service->set->name); + + i_assert(fd == MASTER_LISTEN_FD_FIRST + (int)socket_listener_count); + env_put("SOCKET_COUNT", dec2str(socket_listener_count)); +} + +static void +drop_privileges(struct service *service) +{ + struct restrict_access_settings rset; + bool allow_root; + size_t len; + + if (service->vsz_limit != 0) + restrict_process_size(service->vsz_limit); + + restrict_access_init(&rset); + rset.uid = service->uid; + rset.gid = service->gid; + rset.privileged_gid = service->privileged_gid; + rset.chroot_dir = *service->set->chroot == '\0' ? NULL : + service->set->chroot; + if (rset.chroot_dir != NULL) { + /* drop trailing / if it exists */ + len = strlen(rset.chroot_dir); + if (rset.chroot_dir[len-1] == '/') + rset.chroot_dir = t_strndup(rset.chroot_dir, len-1); + } + rset.extra_groups = service->extra_gids; + + restrict_access_set_env(&rset); + if (service->set->drop_priv_before_exec) { + allow_root = service->type != SERVICE_TYPE_LOGIN; + restrict_access(&rset, + allow_root ? RESTRICT_ACCESS_FLAG_ALLOW_ROOT : 0, + NULL); + } +} + +static void service_process_setup_config_environment(struct service *service) +{ + const struct master_service_settings *set = service->list->service_set; + + switch (service->type) { + case SERVICE_TYPE_CONFIG: + env_put(MASTER_CONFIG_FILE_ENV, service->config_file_path); + break; + case SERVICE_TYPE_LOG: + /* give the log's configuration directly, so it won't depend + on config process */ + env_put("DOVECONF_ENV", "1"); + env_put("LOG_PATH", set->log_path); + env_put("INFO_LOG_PATH", set->info_log_path); + env_put("DEBUG_LOG_PATH", set->debug_log_path); + env_put("LOG_TIMESTAMP", set->log_timestamp); + env_put("SYSLOG_FACILITY", set->syslog_facility); + env_put("INSTANCE_NAME", set->instance_name); + if (set->verbose_proctitle) + env_put("VERBOSE_PROCTITLE", "1"); + env_put("SSL", "no"); + break; + default: + env_put(MASTER_CONFIG_FILE_ENV, + services_get_config_socket_path(service->list)); + break; + } +} + +static void +service_process_setup_environment(struct service *service, unsigned int uid, + const char *hostdomain) +{ + const struct master_service_settings *service_set = + service->list->service_set; + master_service_env_clean(); + + env_put(MASTER_IS_PARENT_ENV, "1"); + service_process_setup_config_environment(service); + env_put(MASTER_SERVICE_ENV, service->set->name); + env_put(MASTER_CLIENT_LIMIT_ENV, dec2str(service->client_limit)); + env_put(MASTER_PROCESS_LIMIT_ENV, dec2str(service->process_limit)); + env_put(MASTER_PROCESS_MIN_AVAIL_ENV, + dec2str(service->set->process_min_avail)); + env_put(MASTER_SERVICE_IDLE_KILL_ENV, dec2str(service->idle_kill)); + if (service->set->service_count != 0) { + env_put(MASTER_SERVICE_COUNT_ENV, + dec2str(service->set->service_count)); + } + env_put(MASTER_UID_ENV, dec2str(uid)); + env_put(MY_HOSTNAME_ENV, my_hostname); + env_put(MY_HOSTDOMAIN_ENV, hostdomain); + + if (service_set->verbose_proctitle) + env_put(MASTER_VERBOSE_PROCTITLE_ENV, "1"); + if (!service->set->master_set->version_ignore) + env_put(MASTER_DOVECOT_VERSION_ENV, PACKAGE_VERSION); + + if (service_set->stats_writer_socket_path[0] == '\0') + ; /* stats-writer socket disabled */ + else if (service->set->chroot[0] != '\0') { + /* In a chroot - expect stats-writer socket to be in the + current directory. */ + env_put(DOVECOT_STATS_WRITER_SOCKET_PATH, + service_set->stats_writer_socket_path); + } else { + env_put(DOVECOT_STATS_WRITER_SOCKET_PATH, + t_strdup_printf("%s/%s", service_set->base_dir, + service_set->stats_writer_socket_path)); + } + if (ssl_manual_key_password != NULL && service->have_inet_listeners) { + /* manually given SSL password. give it only to services + that have inet listeners. */ + env_put(MASTER_SSL_KEY_PASSWORD_ENV, ssl_manual_key_password); + } + if (service->type == SERVICE_TYPE_ANVIL && + service_anvil_global->restarted) + env_put("ANVIL_RESTARTED", "1"); + env_put(DOVECOT_LOG_DEBUG_ENV, service_set->log_debug); +} + +static void service_process_status_timeout(struct service_process *process) +{ + service_error(process->service, + "Initial status notification not received in %d " + "seconds, killing the process", + SERVICE_FIRST_STATUS_TIMEOUT_SECS); + if (kill(process->pid, SIGKILL) < 0 && errno != ESRCH) { + service_error(process->service, "kill(%s, SIGKILL) failed: %m", + dec2str(process->pid)); + } + timeout_remove(&process->to_status); +} + +struct service_process *service_process_create(struct service *service) +{ + static unsigned int uid_counter = 0; + struct service_process *process; + unsigned int uid = ++uid_counter; + const char *hostdomain; + pid_t pid; + bool process_forked; + + i_assert(service->status_fd[0] != -1); + + if (service->to_throttle != NULL) { + /* throttling service, don't create new processes */ + return NULL; + } + if (service->list->destroying) { + /* these services are being destroyed, no point in creating + new processes now */ + return NULL; + } + /* look this up before fork()ing so that it gets cached for all the + future lookups. */ + hostdomain = my_hostdomain(); + + if (service->type == SERVICE_TYPE_ANVIL && + service_anvil_global->pid != 0) { + pid = service_anvil_global->pid; + uid = service_anvil_global->uid; + process_forked = FALSE; + } else { + pid = fork(); + process_forked = TRUE; + service->list->fork_counter++; + } + + if (pid < 0) { + int fork_errno = errno; + rlim_t limit; + const char *limit_str = ""; + + if (fork_errno == EAGAIN && + restrict_get_process_limit(&limit) == 0) { + limit_str = t_strdup_printf(" (ulimit -u %llu reached?)", + (unsigned long long)limit); + } + errno = fork_errno; + service_error(service, "fork() failed: %m%s", limit_str); + return NULL; + } + if (pid == 0) { + /* child */ + service_process_setup_environment(service, uid, hostdomain); + service_reopen_inet_listeners(service); + service_dup_fds(service); + drop_privileges(service); + process_exec(service->executable); + } + i_assert(hash_table_lookup(service_pids, POINTER_CAST(pid)) == NULL); + + process = i_new(struct service_process, 1); + process->service = service; + process->refcount = 1; + process->pid = pid; + process->uid = uid; + if (process_forked) { + process->to_status = + timeout_add(SERVICE_FIRST_STATUS_TIMEOUT_SECS * 1000, + service_process_status_timeout, process); + } + + process->available_count = service->client_limit; + process->idle_start = ioloop_time; + service->process_count_total++; + service->process_count++; + service->process_avail++; + service->process_idling++; + DLLIST2_APPEND(&service->idle_processes_head, + &service->idle_processes_tail, process); + + service_list_ref(service->list); + hash_table_insert(service_pids, POINTER_CAST(process->pid), process); + + if (service->type == SERVICE_TYPE_ANVIL && process_forked) + service_anvil_process_created(process); + return process; +} + +void service_process_destroy(struct service_process *process) +{ + struct service *service = process->service; + struct service_list *service_list = service->list; + + if (process->idle_start == 0) + DLLIST_REMOVE(&service->busy_processes, process); + else { + DLLIST2_REMOVE(&service->idle_processes_head, + &service->idle_processes_tail, process); + i_assert(service->process_idling > 0); + service->process_idling--; + service->process_idling_lowwater_since_kills = + I_MIN(service->process_idling_lowwater_since_kills, + service->process_idling); + } + hash_table_remove(service_pids, POINTER_CAST(process->pid)); + + if (process->available_count > 0) { + i_assert(service->process_avail > 0); + service->process_avail--; + } + i_assert(service->process_count > 0); + service->process_count--; + i_assert(service->process_avail <= service->process_count); + + timeout_remove(&process->to_status); + timeout_remove(&process->to_idle_kill); + if (service->list->log_byes != NULL) + service_process_notify_add(service->list->log_byes, process); + + process->destroyed = TRUE; + service_process_unref(process); + + if (service->process_count < service->process_limit && + service->type == SERVICE_TYPE_LOGIN) + service_login_notify(service, FALSE); + + service_list_unref(service_list); +} + +void service_process_ref(struct service_process *process) +{ + i_assert(process->refcount > 0); + + process->refcount++; +} + +void service_process_unref(struct service_process *process) +{ + i_assert(process->refcount > 0); + + if (--process->refcount > 0) + return; + + i_assert(process->destroyed); + i_free(process); +} + +static const char * +get_exit_status_message(struct service *service, enum fatal_exit_status status) +{ + string_t *str; + + switch (status) { + case FATAL_LOGOPEN: + return "Can't open log file"; + case FATAL_LOGWRITE: + return "Can't write to log file"; + case FATAL_LOGERROR: + return "Internal logging error"; + case FATAL_OUTOFMEM: + str = t_str_new(128); + str_append(str, "Out of memory"); + if (service->vsz_limit != 0) { + str_printfa(str, " (service %s { vsz_limit=%"PRIuUOFF_T" MB }, " + "you may need to increase it)", + service->set->name, + service->vsz_limit/1024/1024); + } + if (getenv("CORE_OUTOFMEM") == NULL) + str_append(str, " - set CORE_OUTOFMEM=1 environment to get core dump"); + return str_c(str); + case FATAL_EXEC: + return "exec() failed"; + + case FATAL_DEFAULT: + return "Fatal failure"; + } + + return NULL; +} + +static bool linux_proc_fs_suid_is_dumpable(unsigned int *value_r) +{ + int fd = open(LINUX_PROC_FS_SUID_DUMPABLE, O_RDONLY); + if (fd == -1) { + /* we already checked that it exists - shouldn't get here */ + i_error("open(%s) failed: %m", LINUX_PROC_FS_SUID_DUMPABLE); + have_proc_fs_suid_dumpable = FALSE; + return FALSE; + } + char buf[10]; + ssize_t ret = read(fd, buf, sizeof(buf)-1); + if (ret < 0) { + i_error("read(%s) failed: %m", LINUX_PROC_FS_SUID_DUMPABLE); + have_proc_fs_suid_dumpable = FALSE; + *value_r = 0; + } else { + buf[ret] = '\0'; + if (ret > 0 && buf[ret-1] == '\n') + buf[ret-1] = '\0'; + if (str_to_uint(buf, value_r) < 0) + *value_r = 0; + } + i_close_fd(&fd); + return *value_r != 0; +} + +static bool linux_is_absolute_core_pattern(void) +{ + int fd = open(LINUX_PROC_SYS_KERNEL_CORE_PATTERN, O_RDONLY); + if (fd == -1) { + /* we already checked that it exists - shouldn't get here */ + i_error("open(%s) failed: %m", LINUX_PROC_SYS_KERNEL_CORE_PATTERN); + have_proc_sys_kernel_core_pattern = FALSE; + return FALSE; + } + char buf[10]; + ssize_t ret = read(fd, buf, sizeof(buf)-1); + if (ret < 0) { + i_error("read(%s) failed: %m", LINUX_PROC_SYS_KERNEL_CORE_PATTERN); + have_proc_sys_kernel_core_pattern = FALSE; + buf[0] = '\0'; + } + i_close_fd(&fd); + return buf[0] == '/' || buf[0] == '|'; +} + +static void +log_coredump(struct service *service, string_t *str, int status) +{ +#define CORE_DUMP_URL "https://dovecot.org/bugreport.html#coredumps" +#ifdef WCOREDUMP + int signum = WTERMSIG(status); + unsigned int dumpable; + + if (WCOREDUMP(status) != 0) { + str_append(str, " (core dumped)"); + return; + } + + if (signum != SIGABRT && signum != SIGSEGV && signum != SIGBUS) + return; + + /* let's try to figure out why we didn't get a core dump */ + if (core_dumps_disabled) { + str_printfa(str, " (core dumps disabled - "CORE_DUMP_URL")"); + return; + } + str_append(str, " (core not dumped - "CORE_DUMP_URL); + + /* If we're running on Linux, the best way to get core dumps is to set + fs.suid_dumpable=2 and sys.kernel.core_pattern to be an absolute + path. */ + if (!have_proc_fs_suid_dumpable) + ; + else if (!linux_proc_fs_suid_is_dumpable(&dumpable)) { + str_printfa(str, " - set %s to 2)", LINUX_PROC_FS_SUID_DUMPABLE); + return; + } else if (dumpable == 2 && have_proc_sys_kernel_core_pattern && + !linux_is_absolute_core_pattern()) { + str_printfa(str, " - set %s to absolute path)", + LINUX_PROC_SYS_KERNEL_CORE_PATTERN); + return; + } else if (dumpable == 1 || have_proc_sys_kernel_core_pattern) { + str_append(str, " - core wasn't writable?)"); + return; + } + +#ifndef HAVE_PR_SET_DUMPABLE + if (!service->set->drop_priv_before_exec && service->uid != 0) { + str_printfa(str, " - set service %s " + "{ drop_priv_before_exec=yes })", + service->set->name); + return; + } + if (*service->set->privileged_group != '\0' && service->uid != 0) { + str_printfa(str, " - service %s " + "{ privileged_group } prevented it)", + service->set->name); + return; + } +#else + if (!service->set->login_dump_core && + service->type == SERVICE_TYPE_LOGIN) { + str_printfa(str, " - add -D parameter to " + "service %s { executable }", service->set->name); + return; + } +#endif + if (service->set->chroot[0] != '\0') { + str_printfa(str, " - try to clear " + "service %s { chroot = } )", service->set->name); + return; + } + str_append_c(str, ')'); +#endif +} + +static void +service_process_get_status_error(string_t *str, struct service_process *process, + int status, bool *default_fatal_r) +{ + struct service *service = process->service; + const char *msg; + + *default_fatal_r = FALSE; + + str_printfa(str, "service(%s): child %s ", service->set->name, + dec2str(process->pid)); + if (WIFSIGNALED(status)) { + str_printfa(str, "killed with signal %d", WTERMSIG(status)); + log_coredump(service, str, status); + return; + } + if (!WIFEXITED(status)) { + str_printfa(str, "died with status %d", status); + return; + } + + status = WEXITSTATUS(status); + if (status == 0) { + str_truncate(str, 0); + return; + } + str_printfa(str, "returned error %d", status); + + msg = get_exit_status_message(service, status); + if (msg != NULL) + str_printfa(str, " (%s)", msg); + + if (status == FATAL_DEFAULT) + *default_fatal_r = TRUE; +} + +static void service_process_log(struct service_process *process, + bool default_fatal, const char *str) +{ + const char *data; + + if (process->service->log_fd[1] == -1) { + i_error("%s", str); + return; + } + + /* log it via the log process in charge of handling + this process's logging */ + data = t_strdup_printf("%d %s %s %s\n", + process->service->log_process_internal_fd, + dec2str(process->pid), + default_fatal ? "DEFAULT-FATAL" : "FATAL", str); + if (write(process->service->list->master_log_fd[1], + data, strlen(data)) < 0) { + i_error("write(log process) failed: %m"); + i_error("%s", str); + } +} + +void service_process_log_status_error(struct service_process *process, + int status) +{ + if (WIFEXITED(status) && WEXITSTATUS(status) == 0) { + /* fast path */ + return; + } + T_BEGIN { + string_t *str = t_str_new(256); + bool default_fatal; + + service_process_get_status_error(str, process, status, + &default_fatal); + if (str_len(str) > 0) + service_process_log(process, default_fatal, str_c(str)); + } T_END; +} |