1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
|
// SPDX-License-Identifier: GPL-2.0
/* Multipath TCP token management
* Copyright (c) 2017 - 2019, Intel Corporation.
*
* Note: This code is based on mptcp_ctrl.c from multipath-tcp.org,
* authored by:
*
* Sébastien Barré <sebastien.barre@uclouvain.be>
* Christoph Paasch <christoph.paasch@uclouvain.be>
* Jaakko Korkeaniemi <jaakko.korkeaniemi@aalto.fi>
* Gregory Detal <gregory.detal@uclouvain.be>
* Fabien Duchêne <fabien.duchene@uclouvain.be>
* Andreas Seelinger <Andreas.Seelinger@rwth-aachen.de>
* Lavkesh Lahngir <lavkesh51@gmail.com>
* Andreas Ripke <ripke@neclab.eu>
* Vlad Dogaru <vlad.dogaru@intel.com>
* Octavian Purdila <octavian.purdila@intel.com>
* John Ronan <jronan@tssg.org>
* Catalin Nicutar <catalin.nicutar@gmail.com>
* Brandon Heller <brandonh@stanford.edu>
*/
#define pr_fmt(fmt) "MPTCP: " fmt
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/memblock.h>
#include <linux/ip.h>
#include <linux/tcp.h>
#include <net/sock.h>
#include <net/inet_common.h>
#include <net/protocol.h>
#include <net/mptcp.h>
#include "protocol.h"
#define TOKEN_MAX_CHAIN_LEN 4
struct token_bucket {
spinlock_t lock;
int chain_len;
struct hlist_nulls_head req_chain;
struct hlist_nulls_head msk_chain;
};
static struct token_bucket *token_hash __read_mostly;
static unsigned int token_mask __read_mostly;
static struct token_bucket *token_bucket(u32 token)
{
return &token_hash[token & token_mask];
}
/* called with bucket lock held */
static struct mptcp_subflow_request_sock *
__token_lookup_req(struct token_bucket *t, u32 token)
{
struct mptcp_subflow_request_sock *req;
struct hlist_nulls_node *pos;
hlist_nulls_for_each_entry_rcu(req, pos, &t->req_chain, token_node)
if (req->token == token)
return req;
return NULL;
}
/* called with bucket lock held */
static struct mptcp_sock *
__token_lookup_msk(struct token_bucket *t, u32 token)
{
struct hlist_nulls_node *pos;
struct sock *sk;
sk_nulls_for_each_rcu(sk, pos, &t->msk_chain)
if (mptcp_sk(sk)->token == token)
return mptcp_sk(sk);
return NULL;
}
static bool __token_bucket_busy(struct token_bucket *t, u32 token)
{
return !token || t->chain_len >= TOKEN_MAX_CHAIN_LEN ||
__token_lookup_req(t, token) || __token_lookup_msk(t, token);
}
static void mptcp_crypto_key_gen_sha(u64 *key, u32 *token, u64 *idsn)
{
/* we might consider a faster version that computes the key as a
* hash of some information available in the MPTCP socket. Use
* random data at the moment, as it's probably the safest option
* in case multiple sockets are opened in different namespaces at
* the same time.
*/
get_random_bytes(key, sizeof(u64));
mptcp_crypto_key_sha(*key, token, idsn);
}
/**
* mptcp_token_new_request - create new key/idsn/token for subflow_request
* @req: the request socket
*
* This function is called when a new mptcp connection is coming in.
*
* It creates a unique token to identify the new mptcp connection,
* a secret local key and the initial data sequence number (idsn).
*
* Returns 0 on success.
*/
int mptcp_token_new_request(struct request_sock *req)
{
struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
struct token_bucket *bucket;
u32 token;
mptcp_crypto_key_sha(subflow_req->local_key,
&subflow_req->token,
&subflow_req->idsn);
pr_debug("req=%p local_key=%llu, token=%u, idsn=%llu\n",
req, subflow_req->local_key, subflow_req->token,
subflow_req->idsn);
token = subflow_req->token;
bucket = token_bucket(token);
spin_lock_bh(&bucket->lock);
if (__token_bucket_busy(bucket, token)) {
spin_unlock_bh(&bucket->lock);
return -EBUSY;
}
hlist_nulls_add_head_rcu(&subflow_req->token_node, &bucket->req_chain);
bucket->chain_len++;
spin_unlock_bh(&bucket->lock);
return 0;
}
/**
* mptcp_token_new_connect - create new key/idsn/token for subflow
* @ssk: the socket that will initiate a connection
*
* This function is called when a new outgoing mptcp connection is
* initiated.
*
* It creates a unique token to identify the new mptcp connection,
* a secret local key and the initial data sequence number (idsn).
*
* On success, the mptcp connection can be found again using
* the computed token at a later time, this is needed to process
* join requests.
*
* returns 0 on success.
*/
int mptcp_token_new_connect(struct sock *ssk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
struct mptcp_sock *msk = mptcp_sk(subflow->conn);
int retries = MPTCP_TOKEN_MAX_RETRIES;
struct sock *sk = subflow->conn;
struct token_bucket *bucket;
again:
mptcp_crypto_key_gen_sha(&subflow->local_key, &subflow->token,
&subflow->idsn);
bucket = token_bucket(subflow->token);
spin_lock_bh(&bucket->lock);
if (__token_bucket_busy(bucket, subflow->token)) {
spin_unlock_bh(&bucket->lock);
if (!--retries)
return -EBUSY;
goto again;
}
pr_debug("ssk=%p, local_key=%llu, token=%u, idsn=%llu\n",
ssk, subflow->local_key, subflow->token, subflow->idsn);
WRITE_ONCE(msk->token, subflow->token);
__sk_nulls_add_node_rcu((struct sock *)msk, &bucket->msk_chain);
bucket->chain_len++;
spin_unlock_bh(&bucket->lock);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
return 0;
}
/**
* mptcp_token_accept - replace a req sk with full sock in token hash
* @req: the request socket to be removed
* @msk: the just cloned socket linked to the new connection
*
* Called when a SYN packet creates a new logical connection, i.e.
* is not a join request.
*/
void mptcp_token_accept(struct mptcp_subflow_request_sock *req,
struct mptcp_sock *msk)
{
struct mptcp_subflow_request_sock *pos;
struct sock *sk = (struct sock *)msk;
struct token_bucket *bucket;
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
bucket = token_bucket(req->token);
spin_lock_bh(&bucket->lock);
/* pedantic lookup check for the moved token */
pos = __token_lookup_req(bucket, req->token);
if (!WARN_ON_ONCE(pos != req))
hlist_nulls_del_init_rcu(&req->token_node);
__sk_nulls_add_node_rcu((struct sock *)msk, &bucket->msk_chain);
spin_unlock_bh(&bucket->lock);
}
bool mptcp_token_exists(u32 token)
{
struct hlist_nulls_node *pos;
struct token_bucket *bucket;
struct mptcp_sock *msk;
struct sock *sk;
rcu_read_lock();
bucket = token_bucket(token);
again:
sk_nulls_for_each_rcu(sk, pos, &bucket->msk_chain) {
msk = mptcp_sk(sk);
if (READ_ONCE(msk->token) == token)
goto found;
}
if (get_nulls_value(pos) != (token & token_mask))
goto again;
rcu_read_unlock();
return false;
found:
rcu_read_unlock();
return true;
}
/**
* mptcp_token_get_sock - retrieve mptcp connection sock using its token
* @net: restrict to this namespace
* @token: token of the mptcp connection to retrieve
*
* This function returns the mptcp connection structure with the given token.
* A reference count on the mptcp socket returned is taken.
*
* returns NULL if no connection with the given token value exists.
*/
struct mptcp_sock *mptcp_token_get_sock(struct net *net, u32 token)
{
struct hlist_nulls_node *pos;
struct token_bucket *bucket;
struct mptcp_sock *msk;
struct sock *sk;
rcu_read_lock();
bucket = token_bucket(token);
again:
sk_nulls_for_each_rcu(sk, pos, &bucket->msk_chain) {
msk = mptcp_sk(sk);
if (READ_ONCE(msk->token) != token ||
!net_eq(sock_net(sk), net))
continue;
if (!refcount_inc_not_zero(&sk->sk_refcnt))
goto not_found;
if (READ_ONCE(msk->token) != token ||
!net_eq(sock_net(sk), net)) {
sock_put(sk);
goto again;
}
goto found;
}
if (get_nulls_value(pos) != (token & token_mask))
goto again;
not_found:
msk = NULL;
found:
rcu_read_unlock();
return msk;
}
EXPORT_SYMBOL_GPL(mptcp_token_get_sock);
/**
* mptcp_token_iter_next - iterate over the token container from given pos
* @net: namespace to be iterated
* @s_slot: start slot number
* @s_num: start number inside the given lock
*
* This function returns the first mptcp connection structure found inside the
* token container starting from the specified position, or NULL.
*
* On successful iteration, the iterator is moved to the next position and
* a reference to the returned socket is acquired.
*/
struct mptcp_sock *mptcp_token_iter_next(const struct net *net, long *s_slot,
long *s_num)
{
struct mptcp_sock *ret = NULL;
struct hlist_nulls_node *pos;
int slot, num = 0;
for (slot = *s_slot; slot <= token_mask; *s_num = 0, slot++) {
struct token_bucket *bucket = &token_hash[slot];
struct sock *sk;
num = 0;
if (hlist_nulls_empty(&bucket->msk_chain))
continue;
rcu_read_lock();
sk_nulls_for_each_rcu(sk, pos, &bucket->msk_chain) {
++num;
if (!net_eq(sock_net(sk), net))
continue;
if (num <= *s_num)
continue;
if (!refcount_inc_not_zero(&sk->sk_refcnt))
continue;
if (!net_eq(sock_net(sk), net)) {
sock_put(sk);
continue;
}
ret = mptcp_sk(sk);
rcu_read_unlock();
goto out;
}
rcu_read_unlock();
}
out:
*s_slot = slot;
*s_num = num;
return ret;
}
EXPORT_SYMBOL_GPL(mptcp_token_iter_next);
/**
* mptcp_token_destroy_request - remove mptcp connection/token
* @req: mptcp request socket dropping the token
*
* Remove the token associated to @req.
*/
void mptcp_token_destroy_request(struct request_sock *req)
{
struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
struct mptcp_subflow_request_sock *pos;
struct token_bucket *bucket;
if (hlist_nulls_unhashed(&subflow_req->token_node))
return;
bucket = token_bucket(subflow_req->token);
spin_lock_bh(&bucket->lock);
pos = __token_lookup_req(bucket, subflow_req->token);
if (!WARN_ON_ONCE(pos != subflow_req)) {
hlist_nulls_del_init_rcu(&pos->token_node);
bucket->chain_len--;
}
spin_unlock_bh(&bucket->lock);
}
/**
* mptcp_token_destroy - remove mptcp connection/token
* @msk: mptcp connection dropping the token
*
* Remove the token associated to @msk
*/
void mptcp_token_destroy(struct mptcp_sock *msk)
{
struct sock *sk = (struct sock *)msk;
struct token_bucket *bucket;
struct mptcp_sock *pos;
if (sk_unhashed((struct sock *)msk))
return;
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
bucket = token_bucket(msk->token);
spin_lock_bh(&bucket->lock);
pos = __token_lookup_msk(bucket, msk->token);
if (!WARN_ON_ONCE(pos != msk)) {
__sk_nulls_del_node_init_rcu((struct sock *)pos);
bucket->chain_len--;
}
spin_unlock_bh(&bucket->lock);
WRITE_ONCE(msk->token, 0);
}
void __init mptcp_token_init(void)
{
int i;
token_hash = alloc_large_system_hash("MPTCP token",
sizeof(struct token_bucket),
0,
20,/* one slot per 1MB of memory */
HASH_ZERO,
NULL,
&token_mask,
0,
64 * 1024);
for (i = 0; i < token_mask + 1; ++i) {
INIT_HLIST_NULLS_HEAD(&token_hash[i].req_chain, i);
INIT_HLIST_NULLS_HEAD(&token_hash[i].msk_chain, i);
spin_lock_init(&token_hash[i].lock);
}
}
#if IS_MODULE(CONFIG_MPTCP_KUNIT_TEST)
EXPORT_SYMBOL_GPL(mptcp_token_new_request);
EXPORT_SYMBOL_GPL(mptcp_token_new_connect);
EXPORT_SYMBOL_GPL(mptcp_token_accept);
EXPORT_SYMBOL_GPL(mptcp_token_destroy_request);
EXPORT_SYMBOL_GPL(mptcp_token_destroy);
#endif
|