diff options
Diffstat (limited to 'src/director/director-request.c')
-rw-r--r-- | src/director/director-request.c | 354 |
1 files changed, 354 insertions, 0 deletions
diff --git a/src/director/director-request.c b/src/director/director-request.c new file mode 100644 index 0000000..af41f7f --- /dev/null +++ b/src/director/director-request.c @@ -0,0 +1,354 @@ +/* Copyright (c) 2010-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "ioloop.h" +#include "array.h" +#include "str.h" +#include "mail-host.h" +#include "director.h" +#include "director-request.h" + +#define DIRECTOR_REQUEST_TIMEOUT_SECS 30 +#define RING_NOCONN_WARNING_DELAY_MSECS (2*1000) + +enum director_request_delay_reason { + REQUEST_DELAY_NONE = 0, + REQUEST_DELAY_RINGNOTHANDSHAKED, + REQUEST_DELAY_RINGNOTSYNCED, + REQUEST_DELAY_NOHOSTS, + REQUEST_DELAY_WEAK, + REQUEST_DELAY_KILL +}; + +static const char *delay_reason_strings[] = { + "unknown", + "ring not handshaked", + "ring not synced", + "no hosts", + "weak user", + "kill waiting" +}; + +struct director_request { + struct director *dir; + + struct event *event; + time_t create_time; + unsigned int username_hash; + enum director_request_delay_reason delay_reason; + char *username_tag; + + director_request_callback *callback; + void *context; +}; + +static void director_request_free(struct director_request *request) +{ + event_unref(&request->event); + i_free(request->username_tag); + i_free(request); +} + +static const char * +director_request_get_timeout_error(struct director_request *request, + struct user *user, string_t *str) +{ + unsigned int secs; + + str_truncate(str, 0); + str_printfa(str, "Timeout because %s - queued for %u secs (", + delay_reason_strings[request->delay_reason], + (unsigned int)(ioloop_time - request->create_time)); + + if (request->dir->ring_last_sync_time == 0) + str_append(str, "Ring has never been synced"); + else { + secs = ioloop_time - request->dir->ring_last_sync_time; + if (request->dir->ring_synced) + str_printfa(str, "Ring synced for %u secs", secs); + else + str_printfa(str, "Ring not synced for %u secs", secs); + } + + if (user != NULL) { + if (user->weak) + str_append(str, ", weak user"); + str_printfa(str, ", user refreshed %u secs ago", + (unsigned int)(ioloop_time - user->timestamp)); + } + str_printfa(str, ", hash=%u", request->username_hash); + if (request->username_tag != NULL) + str_printfa(str, ", tag=%s", request->username_tag); + str_append_c(str, ')'); + return str_c(str); +} + +static void director_request_timeout(struct director *dir) +{ + struct director_request **requestp, *request; + struct user *user; + const char *errormsg; + string_t *str = t_str_new(128); + + while (array_count(&dir->pending_requests) > 0) { + requestp = array_front_modifiable(&dir->pending_requests); + request = *requestp; + + if (request->create_time + + DIRECTOR_REQUEST_TIMEOUT_SECS > ioloop_time) + break; + + const char *tag_name = request->username_tag == NULL ? "" : + request->username_tag; + struct mail_tag *tag = mail_tag_find(dir->mail_hosts, tag_name); + user = tag == NULL ? NULL : + user_directory_lookup(tag->users, request->username_hash); + + errormsg = director_request_get_timeout_error(request, + user, str); + if (user != NULL && + request->delay_reason == REQUEST_DELAY_WEAK) { + /* weakness appears to have gotten stuck. this is a + bug, but try to fix it for future requests by + removing the weakness. */ + user->weak = FALSE; + } + + i_assert(dir->requests_delayed_count > 0); + dir->requests_delayed_count--; + + array_pop_front(&dir->pending_requests); + T_BEGIN { + request->callback(NULL, NULL, errormsg, request->context); + } T_END; + director_request_free(request); + } + + if (array_count(&dir->pending_requests) == 0 && dir->to_request != NULL) + timeout_remove(&dir->to_request); +} + +void director_request(struct director *dir, const char *username, + const char *tag, + director_request_callback *callback, void *context) +{ + struct director_request *request; + unsigned int username_hash; + + if (!director_get_username_hash(dir, username, + &username_hash)) { + callback(NULL, NULL, "Failed to expand director_username_hash", context); + return; + } + + dir->num_requests++; + + request = i_new(struct director_request, 1); + request->dir = dir; + request->create_time = ioloop_time; + request->username_hash = username_hash; + request->username_tag = tag[0] == '\0' ? NULL : i_strdup(tag); + request->callback = callback; + request->context = context; + request->event = event_create(dir->event); + event_set_append_log_prefix(request->event, + t_strdup_printf("request: Hash %u ", username_hash)); + + if (director_request_continue(request)) + return; + + /* need to queue it */ + if (dir->to_request == NULL) { + dir->to_request = + timeout_add(DIRECTOR_REQUEST_TIMEOUT_SECS * 1000, + director_request_timeout, dir); + } + array_push_back(&dir->pending_requests, &request); +} + +static void ring_noconn_warning(struct director *dir) +{ + if (!dir->ring_handshaked) { + e_warning(dir->event, "Delaying all requests " + "until all directors have connected"); + } else { + e_warning(dir->event, + "Delaying new user requests until ring is synced"); + } + dir->ring_handshake_warning_sent = TRUE; + timeout_remove(&dir->to_handshake_warning); +} + +static void ring_log_delayed_warning(struct director *dir) +{ + if (dir->ring_handshake_warning_sent || + dir->to_handshake_warning != NULL) + return; + + dir->to_handshake_warning = timeout_add(RING_NOCONN_WARNING_DELAY_MSECS, + ring_noconn_warning, dir); +} + +static bool +director_request_existing(struct director_request *request, struct user *user) +{ + struct director *dir = request->dir; + struct mail_host *host; + + if (USER_IS_BEING_KILLED(user)) { + /* delay processing this user's connections until + its existing connections have been killed */ + request->delay_reason = REQUEST_DELAY_KILL; + e_debug(request->event, "waiting for kill to finish"); + return FALSE; + } + if (dir->right == NULL && dir->ring_synced) { + /* looks like all the other directors have died. we can do + whatever we want without breaking anything. remove the + user's weakness just in case it was set to TRUE when we + had more directors. */ + user->weak = FALSE; + return TRUE; + } + + if (user->weak) { + /* wait for user to become non-weak */ + request->delay_reason = REQUEST_DELAY_WEAK; + e_debug(request->event, "waiting for weakness"); + return FALSE; + } + if (!user_directory_user_is_near_expiring(user->host->tag->users, user)) + return TRUE; + + /* user is close to being expired. another director may have + already expired it. */ + host = mail_host_get_by_hash(dir->mail_hosts, user->username_hash, + user->host->tag->name); + if (!dir->ring_synced) { + /* try again later once ring is synced */ + request->delay_reason = REQUEST_DELAY_RINGNOTSYNCED; + e_debug(request->event, "waiting for sync for making weak"); + return FALSE; + } + if (user->host == host) { + /* doesn't matter, other directors would + assign the user the same way regardless */ + e_debug(request->event, "would be weak, but host doesn't change"); + return TRUE; + } + + /* We have to worry about two separate timepoints in here: + + a) some directors think the user isn't expiring, and + others think the user is near expiring + + b) some directors think the user is near expiring, and + others think the user has already expired + + What we don't have to worry about is: + + !c) some directors think the user isn't expiring, and + others think the user has already expired + + If !c) happens, the user might get redirected to different backends. + We'll use a large enough timeout between a) and b) states, so that + !c) should never happen. + + So what we'll do here is: + + 1. Send a USER-WEAK notification to all directors with the new host. + 2. Each director receiving USER-WEAK refreshes the user's timestamp + and host, but marks the user as being weak. + 3. Once USER-WEAK has reached all directors, a real USER update is + sent, which removes the weak-flag. + 4. If a director ever receives a USER update for a weak user, the + USER update overrides the host and removes the weak-flag. + 5. Director doesn't let any weak user log in, until the weak-flag + gets removed. + */ + if (dir->ring_min_version < DIRECTOR_VERSION_WEAK_USERS) { + /* weak users not supported by ring currently */ + return TRUE; + } else { + user->weak = TRUE; + director_update_user_weak(dir, dir->self_host, NULL, NULL, user); + request->delay_reason = REQUEST_DELAY_WEAK; + e_debug(request->event, "set to weak"); + return FALSE; + } +} + +static bool director_request_continue_real(struct director_request *request) +{ + struct director *dir = request->dir; + struct mail_host *host; + struct user *user; + const char *tag; + struct mail_tag *mail_tag; + + if (!dir->ring_handshaked) { + /* delay requests until ring handshaking is complete */ + e_debug(request->event, "waiting for handshake"); + ring_log_delayed_warning(dir); + request->delay_reason = REQUEST_DELAY_RINGNOTHANDSHAKED; + return FALSE; + } + + tag = request->username_tag == NULL ? "" : request->username_tag; + mail_tag = mail_tag_find(dir->mail_hosts, tag); + user = mail_tag == NULL ? NULL : + user_directory_lookup(mail_tag->users, request->username_hash); + + if (user != NULL) { + i_assert(user->host->tag == mail_tag); + if (!director_request_existing(request, user)) + return FALSE; + user_directory_refresh(mail_tag->users, user); + e_debug(request->event, "refreshed timeout to %u", + user->timestamp); + } else { + if (!dir->ring_synced) { + /* delay adding new users until ring is again synced */ + ring_log_delayed_warning(dir); + request->delay_reason = REQUEST_DELAY_RINGNOTSYNCED; + e_debug(request->event, "waiting for sync for adding"); + return FALSE; + } + host = mail_host_get_by_hash(dir->mail_hosts, + request->username_hash, tag); + if (host == NULL) { + /* all hosts have been removed */ + request->delay_reason = REQUEST_DELAY_NOHOSTS; + e_debug(request->event, "waiting for hosts"); + return FALSE; + } + user = user_directory_add(host->tag->users, + request->username_hash, + host, ioloop_time); + e_debug(request->event, "added timeout to %u (hosts_hash=%u)", + user->timestamp, mail_hosts_hash(dir->mail_hosts)); + } + + i_assert(!user->weak); + director_update_user(dir, dir->self_host, user); + T_BEGIN { + request->callback(user->host, user->host->hostname, + NULL, request->context); + } T_END; + director_request_free(request); + return TRUE; +} + +bool director_request_continue(struct director_request *request) +{ + if (request->delay_reason != REQUEST_DELAY_NONE) { + i_assert(request->dir->requests_delayed_count > 0); + request->dir->requests_delayed_count--; + } + if (!director_request_continue_real(request)) { + i_assert(request->delay_reason != REQUEST_DELAY_NONE); + request->dir->requests_delayed_count++; + return FALSE; + } + return TRUE; +} |