1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
|
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
// vim: ts=8 sw=2 smarttab
/*
* Ceph - scalable distributed file system
*
* Copyright (C) 2014 UnitedStack <haomai@unitedstack.com>
*
* Author: Haomai Wang <haomaiwang@gmail.com>
*
* This is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License version 2.1, as published by the Free Software
* Foundation. See file COPYING.
*
*/
#ifndef CEPH_ASYNCMESSENGER_H
#define CEPH_ASYNCMESSENGER_H
#include <map>
#include <mutex>
#include "include/types.h"
#include "include/xlist.h"
#include "include/spinlock.h"
#include "include/unordered_map.h"
#include "include/unordered_set.h"
#include "common/Mutex.h"
#include "common/Cond.h"
#include "common/Thread.h"
#include "msg/SimplePolicyMessenger.h"
#include "msg/DispatchQueue.h"
#include "AsyncConnection.h"
#include "Event.h"
#include "include/ceph_assert.h"
class AsyncMessenger;
/**
* If the Messenger binds to a specific address, the Processor runs
* and listens for incoming connections.
*/
class Processor {
AsyncMessenger *msgr;
NetHandler net;
Worker *worker;
vector<ServerSocket> listen_sockets;
EventCallbackRef listen_handler;
class C_processor_accept;
public:
Processor(AsyncMessenger *r, Worker *w, CephContext *c);
~Processor() { delete listen_handler; };
void stop();
int bind(const entity_addrvec_t &bind_addrs,
const set<int>& avoid_ports,
entity_addrvec_t* bound_addrs);
void start();
void accept();
};
/*
* AsyncMessenger is represented for maintaining a set of asynchronous connections,
* it may own a bind address and the accepted connections will be managed by
* AsyncMessenger.
*
*/
class AsyncMessenger : public SimplePolicyMessenger {
// First we have the public Messenger interface implementation...
public:
/**
* Initialize the AsyncMessenger!
*
* @param cct The CephContext to use
* @param name The name to assign ourselves
* _nonce A unique ID to use for this AsyncMessenger. It should not
* be a value that will be repeated if the daemon restarts.
*/
AsyncMessenger(CephContext *cct, entity_name_t name, const std::string &type,
string mname, uint64_t _nonce);
/**
* Destroy the AsyncMessenger. Pretty simple since all the work is done
* elsewhere.
*/
~AsyncMessenger() override;
/** @defgroup Accessors
* @{
*/
bool set_addr_unknowns(const entity_addrvec_t &addr) override;
void set_addrs(const entity_addrvec_t &addrs) override;
int get_dispatch_queue_len() override {
return dispatch_queue.get_queue_len();
}
double get_dispatch_queue_max_age(utime_t now) override {
return dispatch_queue.get_max_age(now);
}
/** @} Accessors */
/**
* @defgroup Configuration functions
* @{
*/
void set_cluster_protocol(int p) override {
ceph_assert(!started && !did_bind);
cluster_protocol = p;
}
int bind(const entity_addr_t& bind_addr) override;
int rebind(const set<int>& avoid_ports) override;
int client_bind(const entity_addr_t& bind_addr) override;
int bindv(const entity_addrvec_t& bind_addrs) override;
bool should_use_msgr2() override;
/** @} Configuration functions */
/**
* @defgroup Startup/Shutdown
* @{
*/
int start() override;
void wait() override;
int shutdown() override;
/** @} // Startup/Shutdown */
/**
* @defgroup Messaging
* @{
*/
int send_to(Message *m, int type, const entity_addrvec_t& addrs) override;
/** @} // Messaging */
/**
* @defgroup Connection Management
* @{
*/
ConnectionRef connect_to(int type,
const entity_addrvec_t& addrs) override;
ConnectionRef get_loopback_connection() override;
void mark_down(const entity_addr_t& addr) override {
mark_down_addrs(entity_addrvec_t(addr));
}
void mark_down_addrs(const entity_addrvec_t& addrs) override;
void mark_down_all() override {
shutdown_connections(true);
}
/** @} // Connection Management */
/**
* @defgroup Inner classes
* @{
*/
/**
* @} // Inner classes
*/
protected:
/**
* @defgroup Messenger Interfaces
* @{
*/
/**
* Start up the DispatchQueue thread once we have somebody to dispatch to.
*/
void ready() override;
/** @} // Messenger Interfaces */
private:
/**
* @defgroup Utility functions
* @{
*/
/**
* Create a connection associated with the given entity (of the given type).
* Initiate the connection. (This function returning does not guarantee
* connection success.)
*
* @param addrs The address(es) of the entity to connect to.
* @param type The peer type of the entity at the address.
*
* @return a pointer to the newly-created connection. Caller does not own a
* reference; take one if you need it.
*/
AsyncConnectionRef create_connect(const entity_addrvec_t& addrs, int type);
/**
* Queue up a Message for delivery to the entity specified
* by addr and dest_type.
* submit_message() is responsible for creating
* new AsyncConnection (and closing old ones) as necessary.
*
* @param m The Message to queue up. This function eats a reference.
* @param con The existing Connection to use, or NULL if you don't know of one.
* @param dest_addr The address to send the Message to.
* @param dest_type The peer type of the address we're sending to
* just drop silently under failure.
*/
void submit_message(Message *m, AsyncConnectionRef con,
const entity_addrvec_t& dest_addrs, int dest_type);
void _finish_bind(const entity_addrvec_t& bind_addrs,
const entity_addrvec_t& listen_addrs);
entity_addrvec_t _filter_addrs(int type,
const entity_addrvec_t& addrs);
private:
static const uint64_t ReapDeadConnectionThreshold = 5;
NetworkStack *stack;
std::vector<Processor*> processors;
friend class Processor;
DispatchQueue dispatch_queue;
// the worker run messenger's cron jobs
Worker *local_worker;
std::string ms_type;
/// overall lock used for AsyncMessenger data structures
Mutex lock;
// AsyncMessenger stuff
/// approximately unique ID set by the Constructor for use in entity_addr_t
uint64_t nonce;
/// true, specifying we haven't learned our addr; set false when we find it.
// maybe this should be protected by the lock?
bool need_addr;
/**
* set to bind addresses if bind was called before NetworkStack was ready to
* bind
*/
entity_addrvec_t pending_bind_addrs;
/**
* false; set to true if a pending bind exists
*/
bool pending_bind = false;
/**
* The following aren't lock-protected since you shouldn't be able to race
* the only writers.
*/
/**
* false; set to true if the AsyncMessenger bound to a specific address;
* and set false again by Accepter::stop().
*/
bool did_bind;
/// counter for the global seq our connection protocol uses
__u32 global_seq;
/// lock to protect the global_seq
ceph::spinlock global_seq_lock;
/**
* hash map of addresses to Asyncconnection
*
* NOTE: a Asyncconnection* with state CLOSED may still be in the map but is considered
* invalid and can be replaced by anyone holding the msgr lock
*/
ceph::unordered_map<entity_addrvec_t, AsyncConnectionRef> conns;
/**
* list of connection are in the process of accepting
*
* These are not yet in the conns map.
*/
set<AsyncConnectionRef> accepting_conns;
/**
* list of connection are closed which need to be clean up
*
* Because AsyncMessenger and AsyncConnection follow a lock rule that
* we can lock AsyncMesenger::lock firstly then lock AsyncConnection::lock
* but can't reversed. This rule is aimed to avoid dead lock.
* So if AsyncConnection want to unregister itself from AsyncMessenger,
* we pick up this idea that just queue itself to this set and do lazy
* deleted for AsyncConnection. "_lookup_conn" must ensure not return a
* AsyncConnection in this set.
*/
Mutex deleted_lock;
set<AsyncConnectionRef> deleted_conns;
EventCallbackRef reap_handler;
/// internal cluster protocol version, if any, for talking to entities of the same type.
int cluster_protocol;
Cond stop_cond;
bool stopped;
AsyncConnectionRef _lookup_conn(const entity_addrvec_t& k) {
ceph_assert(lock.is_locked());
auto p = conns.find(k);
if (p == conns.end())
return NULL;
// lazy delete, see "deleted_conns"
Mutex::Locker l(deleted_lock);
if (deleted_conns.erase(p->second)) {
conns.erase(p);
return NULL;
}
return p->second;
}
void _init_local_connection() {
ceph_assert(lock.is_locked());
local_connection->peer_addrs = *my_addrs;
local_connection->peer_type = my_name.type();
local_connection->set_features(CEPH_FEATURES_ALL);
ms_deliver_handle_fast_connect(local_connection.get());
}
void shutdown_connections(bool queue_reset);
public:
/// con used for sending messages to ourselves
AsyncConnectionRef local_connection;
/**
* @defgroup AsyncMessenger internals
* @{
*/
/**
* This wraps _lookup_conn.
*/
AsyncConnectionRef lookup_conn(const entity_addrvec_t& k) {
Mutex::Locker l(lock);
return _lookup_conn(k);
}
int accept_conn(AsyncConnectionRef conn);
bool learned_addr(const entity_addr_t &peer_addr_for_me);
void add_accept(Worker *w, ConnectedSocket cli_socket,
const entity_addr_t &listen_addr,
const entity_addr_t &peer_addr);
NetworkStack *get_stack() {
return stack;
}
uint64_t get_nonce() const {
return nonce;
}
/**
* Increment the global sequence for this AsyncMessenger and return it.
* This is for the connect protocol, although it doesn't hurt if somebody
* else calls it.
*
* @return a global sequence ID that nobody else has seen.
*/
__u32 get_global_seq(__u32 old=0) {
std::lock_guard<ceph::spinlock> lg(global_seq_lock);
if (old > global_seq)
global_seq = old;
__u32 ret = ++global_seq;
return ret;
}
/**
* Get the protocol version we support for the given peer type: either
* a peer protocol (if it matches our own), the protocol version for the
* peer (if we're connecting), or our protocol version (if we're accepting).
*/
int get_proto_version(int peer_type, bool connect) const;
/**
* Fill in the address and peer type for the local connection, which
* is used for delivering messages back to ourself.
*/
void init_local_connection() {
Mutex::Locker l(lock);
_init_local_connection();
}
/**
* Unregister connection from `conns`
*
* See "deleted_conns"
*/
void unregister_conn(AsyncConnectionRef conn) {
Mutex::Locker l(deleted_lock);
conn->get_perf_counter()->dec(l_msgr_active_connections);
deleted_conns.emplace(std::move(conn));
if (deleted_conns.size() >= ReapDeadConnectionThreshold) {
local_worker->center.dispatch_event_external(reap_handler);
}
}
/**
* Reap dead connection from `deleted_conns`
*
* @return the number of dead connections
*
* See "deleted_conns"
*/
int reap_dead();
/**
* @} // AsyncMessenger Internals
*/
} ;
#endif /* CEPH_ASYNCMESSENGER_H */
|