1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
|
/* Copyright (c) 2010-2018 Dovecot authors, see the included COPYING file */
#include "lib.h"
#include "ioloop.h"
#include "array.h"
#include "str.h"
#include "mail-host.h"
#include "director.h"
#include "director-request.h"
#define DIRECTOR_REQUEST_TIMEOUT_SECS 30
#define RING_NOCONN_WARNING_DELAY_MSECS (2*1000)
enum director_request_delay_reason {
REQUEST_DELAY_NONE = 0,
REQUEST_DELAY_RINGNOTHANDSHAKED,
REQUEST_DELAY_RINGNOTSYNCED,
REQUEST_DELAY_NOHOSTS,
REQUEST_DELAY_WEAK,
REQUEST_DELAY_KILL
};
static const char *delay_reason_strings[] = {
"unknown",
"ring not handshaked",
"ring not synced",
"no hosts",
"weak user",
"kill waiting"
};
struct director_request {
struct director *dir;
struct event *event;
time_t create_time;
unsigned int username_hash;
enum director_request_delay_reason delay_reason;
char *username_tag;
director_request_callback *callback;
void *context;
};
static void director_request_free(struct director_request *request)
{
event_unref(&request->event);
i_free(request->username_tag);
i_free(request);
}
static const char *
director_request_get_timeout_error(struct director_request *request,
struct user *user, string_t *str)
{
unsigned int secs;
str_truncate(str, 0);
str_printfa(str, "Timeout because %s - queued for %u secs (",
delay_reason_strings[request->delay_reason],
(unsigned int)(ioloop_time - request->create_time));
if (request->dir->ring_last_sync_time == 0)
str_append(str, "Ring has never been synced");
else {
secs = ioloop_time - request->dir->ring_last_sync_time;
if (request->dir->ring_synced)
str_printfa(str, "Ring synced for %u secs", secs);
else
str_printfa(str, "Ring not synced for %u secs", secs);
}
if (user != NULL) {
if (user->weak)
str_append(str, ", weak user");
str_printfa(str, ", user refreshed %u secs ago",
(unsigned int)(ioloop_time - user->timestamp));
}
str_printfa(str, ", hash=%u", request->username_hash);
if (request->username_tag != NULL)
str_printfa(str, ", tag=%s", request->username_tag);
str_append_c(str, ')');
return str_c(str);
}
static void director_request_timeout(struct director *dir)
{
struct director_request **requestp, *request;
struct user *user;
const char *errormsg;
string_t *str = t_str_new(128);
while (array_count(&dir->pending_requests) > 0) {
requestp = array_front_modifiable(&dir->pending_requests);
request = *requestp;
if (request->create_time +
DIRECTOR_REQUEST_TIMEOUT_SECS > ioloop_time)
break;
const char *tag_name = request->username_tag == NULL ? "" :
request->username_tag;
struct mail_tag *tag = mail_tag_find(dir->mail_hosts, tag_name);
user = tag == NULL ? NULL :
user_directory_lookup(tag->users, request->username_hash);
errormsg = director_request_get_timeout_error(request,
user, str);
if (user != NULL &&
request->delay_reason == REQUEST_DELAY_WEAK) {
/* weakness appears to have gotten stuck. this is a
bug, but try to fix it for future requests by
removing the weakness. */
user->weak = FALSE;
}
i_assert(dir->requests_delayed_count > 0);
dir->requests_delayed_count--;
array_pop_front(&dir->pending_requests);
T_BEGIN {
request->callback(NULL, NULL, errormsg, request->context);
} T_END;
director_request_free(request);
}
if (array_count(&dir->pending_requests) == 0 && dir->to_request != NULL)
timeout_remove(&dir->to_request);
}
void director_request(struct director *dir, const char *username,
const char *tag,
director_request_callback *callback, void *context)
{
struct director_request *request;
unsigned int username_hash;
if (!director_get_username_hash(dir, username,
&username_hash)) {
callback(NULL, NULL, "Failed to expand director_username_hash", context);
return;
}
dir->num_requests++;
request = i_new(struct director_request, 1);
request->dir = dir;
request->create_time = ioloop_time;
request->username_hash = username_hash;
request->username_tag = tag[0] == '\0' ? NULL : i_strdup(tag);
request->callback = callback;
request->context = context;
request->event = event_create(dir->event);
event_set_append_log_prefix(request->event,
t_strdup_printf("request: Hash %u ", username_hash));
if (director_request_continue(request))
return;
/* need to queue it */
if (dir->to_request == NULL) {
dir->to_request =
timeout_add(DIRECTOR_REQUEST_TIMEOUT_SECS * 1000,
director_request_timeout, dir);
}
array_push_back(&dir->pending_requests, &request);
}
static void ring_noconn_warning(struct director *dir)
{
if (!dir->ring_handshaked) {
e_warning(dir->event, "Delaying all requests "
"until all directors have connected");
} else {
e_warning(dir->event,
"Delaying new user requests until ring is synced");
}
dir->ring_handshake_warning_sent = TRUE;
timeout_remove(&dir->to_handshake_warning);
}
static void ring_log_delayed_warning(struct director *dir)
{
if (dir->ring_handshake_warning_sent ||
dir->to_handshake_warning != NULL)
return;
dir->to_handshake_warning = timeout_add(RING_NOCONN_WARNING_DELAY_MSECS,
ring_noconn_warning, dir);
}
static bool
director_request_existing(struct director_request *request, struct user *user)
{
struct director *dir = request->dir;
struct mail_host *host;
if (USER_IS_BEING_KILLED(user)) {
/* delay processing this user's connections until
its existing connections have been killed */
request->delay_reason = REQUEST_DELAY_KILL;
e_debug(request->event, "waiting for kill to finish");
return FALSE;
}
if (dir->right == NULL && dir->ring_synced) {
/* looks like all the other directors have died. we can do
whatever we want without breaking anything. remove the
user's weakness just in case it was set to TRUE when we
had more directors. */
user->weak = FALSE;
return TRUE;
}
if (user->weak) {
/* wait for user to become non-weak */
request->delay_reason = REQUEST_DELAY_WEAK;
e_debug(request->event, "waiting for weakness");
return FALSE;
}
if (!user_directory_user_is_near_expiring(user->host->tag->users, user))
return TRUE;
/* user is close to being expired. another director may have
already expired it. */
host = mail_host_get_by_hash(dir->mail_hosts, user->username_hash,
user->host->tag->name);
if (!dir->ring_synced) {
/* try again later once ring is synced */
request->delay_reason = REQUEST_DELAY_RINGNOTSYNCED;
e_debug(request->event, "waiting for sync for making weak");
return FALSE;
}
if (user->host == host) {
/* doesn't matter, other directors would
assign the user the same way regardless */
e_debug(request->event, "would be weak, but host doesn't change");
return TRUE;
}
/* We have to worry about two separate timepoints in here:
a) some directors think the user isn't expiring, and
others think the user is near expiring
b) some directors think the user is near expiring, and
others think the user has already expired
What we don't have to worry about is:
!c) some directors think the user isn't expiring, and
others think the user has already expired
If !c) happens, the user might get redirected to different backends.
We'll use a large enough timeout between a) and b) states, so that
!c) should never happen.
So what we'll do here is:
1. Send a USER-WEAK notification to all directors with the new host.
2. Each director receiving USER-WEAK refreshes the user's timestamp
and host, but marks the user as being weak.
3. Once USER-WEAK has reached all directors, a real USER update is
sent, which removes the weak-flag.
4. If a director ever receives a USER update for a weak user, the
USER update overrides the host and removes the weak-flag.
5. Director doesn't let any weak user log in, until the weak-flag
gets removed.
*/
if (dir->ring_min_version < DIRECTOR_VERSION_WEAK_USERS) {
/* weak users not supported by ring currently */
return TRUE;
} else {
user->weak = TRUE;
director_update_user_weak(dir, dir->self_host, NULL, NULL, user);
request->delay_reason = REQUEST_DELAY_WEAK;
e_debug(request->event, "set to weak");
return FALSE;
}
}
static bool director_request_continue_real(struct director_request *request)
{
struct director *dir = request->dir;
struct mail_host *host;
struct user *user;
const char *tag;
struct mail_tag *mail_tag;
if (!dir->ring_handshaked) {
/* delay requests until ring handshaking is complete */
e_debug(request->event, "waiting for handshake");
ring_log_delayed_warning(dir);
request->delay_reason = REQUEST_DELAY_RINGNOTHANDSHAKED;
return FALSE;
}
tag = request->username_tag == NULL ? "" : request->username_tag;
mail_tag = mail_tag_find(dir->mail_hosts, tag);
user = mail_tag == NULL ? NULL :
user_directory_lookup(mail_tag->users, request->username_hash);
if (user != NULL) {
i_assert(user->host->tag == mail_tag);
if (!director_request_existing(request, user))
return FALSE;
user_directory_refresh(mail_tag->users, user);
e_debug(request->event, "refreshed timeout to %u",
user->timestamp);
} else {
if (!dir->ring_synced) {
/* delay adding new users until ring is again synced */
ring_log_delayed_warning(dir);
request->delay_reason = REQUEST_DELAY_RINGNOTSYNCED;
e_debug(request->event, "waiting for sync for adding");
return FALSE;
}
host = mail_host_get_by_hash(dir->mail_hosts,
request->username_hash, tag);
if (host == NULL) {
/* all hosts have been removed */
request->delay_reason = REQUEST_DELAY_NOHOSTS;
e_debug(request->event, "waiting for hosts");
return FALSE;
}
user = user_directory_add(host->tag->users,
request->username_hash,
host, ioloop_time);
e_debug(request->event, "added timeout to %u (hosts_hash=%u)",
user->timestamp, mail_hosts_hash(dir->mail_hosts));
}
i_assert(!user->weak);
director_update_user(dir, dir->self_host, user);
T_BEGIN {
request->callback(user->host, user->host->hostname,
NULL, request->context);
} T_END;
director_request_free(request);
return TRUE;
}
bool director_request_continue(struct director_request *request)
{
if (request->delay_reason != REQUEST_DELAY_NONE) {
i_assert(request->dir->requests_delayed_count > 0);
request->dir->requests_delayed_count--;
}
if (!director_request_continue_real(request)) {
i_assert(request->delay_reason != REQUEST_DELAY_NONE);
request->dir->requests_delayed_count++;
return FALSE;
}
return TRUE;
}
|