1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
|
// SPDX-License-Identifier: GPL-2.0-only
/* Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc.
*/
#include <time.h>
#include <errno.h>
#include <stdio.h>
#include <getopt.h>
#include <locale.h>
#include <net/if.h>
#include <signal.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <bpf/bpf.h>
#include <stdbool.h>
#include <arpa/inet.h>
#include <bpf/libbpf.h>
#include <sys/sysinfo.h>
#include <linux/limits.h>
#include <sys/resource.h>
#include <linux/if_link.h>
#include <xdp/libxdp.h>
#include "logging.h"
#include "xdp-bench.h"
#include "xdp_sample.h"
#include "xdp_redirect_cpumap.skel.h"
static int map_fd;
static int avail_fd;
static int count_fd;
static int mask = SAMPLE_RX_CNT | SAMPLE_REDIRECT_ERR_MAP_CNT |
SAMPLE_CPUMAP_ENQUEUE_CNT | SAMPLE_CPUMAP_KTHREAD_CNT |
SAMPLE_EXCEPTION_CNT;
const struct cpumap_opts defaults_redirect_cpumap = {
.mode = XDP_MODE_NATIVE,
.interval = 2,
.qsize = 2048,
.program_mode = CPUMAP_CPU_L4_HASH,
};
static const char *cpumap_prog_names[] = {
"cpumap_no_touch",
"cpumap_touch_data",
"cpumap_round_robin",
"cpumap_l4_proto",
"cpumap_l4_filter",
"cpumap_l4_hash",
};
DEFINE_SAMPLE_INIT(xdp_redirect_cpumap);
static int create_cpu_entry(__u32 cpu, struct bpf_cpumap_val *value,
__u32 avail_idx, bool new)
{
__u32 curr_cpus_count = 0;
__u32 key = 0;
int ret;
/* Add a CPU entry to cpumap, as this allocate a cpu entry in
* the kernel for the cpu.
*/
ret = bpf_map_update_elem(map_fd, &cpu, value, 0);
if (ret < 0) {
pr_warn("Create CPU entry failed: %s\n", strerror(errno));
return ret;
}
/* Inform bpf_prog's that a new CPU is available to select
* from via some control maps.
*/
ret = bpf_map_update_elem(avail_fd, &avail_idx, &cpu, 0);
if (ret < 0) {
pr_warn("Add to avail CPUs failed: %s\n", strerror(errno));
return ret;
}
/* When not replacing/updating existing entry, bump the count */
ret = bpf_map_lookup_elem(count_fd, &key, &curr_cpus_count);
if (ret < 0) {
pr_warn("Failed reading curr cpus_count: %s\n",
strerror(errno));
return ret;
}
if (new) {
curr_cpus_count++;
ret = bpf_map_update_elem(count_fd, &key,
&curr_cpus_count, 0);
if (ret < 0) {
pr_warn("Failed write curr cpus_count: %s\n",
strerror(errno));
return ret;
}
}
pr_debug("%s CPU: %u as idx: %u qsize: %d cpumap_prog_fd: %d (cpus_count: %u)\n",
new ? "Add new" : "Replace", cpu, avail_idx,
value->qsize, value->bpf_prog.fd, curr_cpus_count);
return 0;
}
/* CPUs are zero-indexed. Thus, add a special sentinel default value
* in map cpus_available to mark CPU index'es not configured
*/
static int mark_cpus_unavailable(void)
{
int ret, i, n_cpus = libbpf_num_possible_cpus();
__u32 invalid_cpu = n_cpus;
for (i = 0; i < n_cpus; i++) {
ret = bpf_map_update_elem(avail_fd, &i,
&invalid_cpu, 0);
if (ret < 0) {
pr_warn("Failed marking CPU unavailable: %s\n",
strerror(errno));
return ret;
}
}
return 0;
}
/* Stress cpumap management code by concurrently changing underlying cpumap */
static void stress_cpumap(void *ctx)
{
struct bpf_cpumap_val *value = ctx;
/* Changing qsize will cause kernel to free and alloc a new
* bpf_cpu_map_entry, with an associated/complicated tear-down
* procedure.
*/
value->qsize = 1024;
create_cpu_entry(1, value, 0, false);
value->qsize = 8;
create_cpu_entry(1, value, 0, false);
value->qsize = 16000;
create_cpu_entry(1, value, 0, false);
}
static int set_cpumap_prog(struct xdp_redirect_cpumap *skel,
enum cpumap_remote_action action,
const struct iface *redir_iface)
{
struct bpf_devmap_val val = {};
__u32 key = 0;
int err;
switch (action) {
case ACTION_DISABLED:
return 0;
case ACTION_DROP:
return bpf_program__fd(skel->progs.cpumap_drop);
case ACTION_PASS:
return bpf_program__fd(skel->progs.cpumap_pass);
case ACTION_REDIRECT:
break;
default:
return -EINVAL;
}
if (!redir_iface->ifindex) {
pr_warn("Must specify redirect device when using --remote-action 'redirect'\n");
return -EINVAL;
}
if (get_mac_addr(redir_iface->ifindex, skel->bss->tx_mac_addr) < 0) {
pr_warn("Couldn't get MAC address for interface %s\n", redir_iface->ifname);
return -EINVAL;
}
val.ifindex = redir_iface->ifindex;
val.bpf_prog.fd = bpf_program__fd(skel->progs.redirect_egress_prog);
err = bpf_map_update_elem(bpf_map__fd(skel->maps.tx_port), &key, &val, 0);
if (err < 0)
return -errno;
return bpf_program__fd(skel->progs.cpumap_redirect);
}
int do_redirect_cpumap(const void *cfg, __unused const char *pin_root_path)
{
const struct cpumap_opts *opt = cfg;
DECLARE_LIBBPF_OPTS(xdp_program_opts, opts);
struct xdp_program *xdp_prog = NULL;
struct xdp_redirect_cpumap *skel;
struct bpf_program *prog = NULL;
struct bpf_map_info info = {};
struct bpf_cpumap_val value;
__u32 infosz = sizeof(info);
int ret = EXIT_FAIL_OPTION;
int n_cpus, fd;
size_t i;
if (opt->extended)
sample_switch_mode();
if (opt->stats)
mask |= SAMPLE_REDIRECT_MAP_CNT;
if (opt->redir_iface.ifindex)
mask |= SAMPLE_DEVMAP_XMIT_CNT_MULTI;
n_cpus = libbpf_num_possible_cpus();
/* Notice: Choosing the queue size is very important when CPU is
* configured with power-saving states.
*
* If deepest state take 133 usec to wakeup from (133/10^6). When link
* speed is 10Gbit/s ((10*10^9/8) in bytes/sec). How many bytes can
* arrive with in 133 usec at this speed: (10*10^9/8)*(133/10^6) =
* 166250 bytes. With MTU size packets this is 110 packets, and with
* minimum Ethernet (MAC-preamble + intergap) 84 bytes is 1979 packets.
*
* Setting default cpumap queue to 2048 as worst-case (small packet)
* should be +64 packet due kthread wakeup call (due to xdp_do_flush)
* worst-case is 2043 packets.
*
* Sysadm can configured system to avoid deep-sleep via:
* tuned-adm profile network-latency
*/
skel = xdp_redirect_cpumap__open();
if (!skel) {
pr_warn("Failed to xdp_redirect_cpumap__open: %s\n",
strerror(errno));
ret = EXIT_FAIL_BPF;
goto end;
}
/* Make sure we only load the one XDP program we are interested in */
while ((prog = bpf_object__next_program(skel->obj, prog)) != NULL)
if (bpf_program__type(prog) == BPF_PROG_TYPE_XDP &&
bpf_program__expected_attach_type(prog) == BPF_XDP)
bpf_program__set_autoload(prog, false);
prog = bpf_object__find_program_by_name(skel->obj,
cpumap_prog_names[opt->program_mode]);
if (!prog) {
pr_warn("Failed to find program '%s'\n",
cpumap_prog_names[opt->program_mode]);
goto end_destroy;
}
ret = sample_init_pre_load(skel, opt->iface_in.ifname);
if (ret < 0) {
pr_warn("Failed to sample_init_pre_load: %s\n", strerror(-ret));
ret = EXIT_FAIL_BPF;
goto end_destroy;
}
if (bpf_map__set_max_entries(skel->maps.cpu_map, n_cpus) < 0) {
pr_warn("Failed to set max entries for cpu_map map: %s",
strerror(errno));
ret = EXIT_FAIL_BPF;
goto end_destroy;
}
if (bpf_map__set_max_entries(skel->maps.cpus_available, n_cpus) < 0) {
pr_warn("Failed to set max entries for cpus_available map: %s",
strerror(errno));
ret = EXIT_FAIL_BPF;
goto end_destroy;
}
ret = EXIT_FAIL_OPTION;
skel->rodata->from_match[0] = opt->iface_in.ifindex;
if (opt->redir_iface.ifindex)
skel->rodata->to_match[0] = opt->redir_iface.ifindex;
opts.obj = skel->obj;
opts.prog_name = bpf_program__name(prog);
xdp_prog = xdp_program__create(&opts);
if (!xdp_prog) {
ret = -errno;
pr_warn("Couldn't open XDP program: %s\n",
strerror(-ret));
goto end_destroy;
}
ret = xdp_program__attach(xdp_prog, opt->iface_in.ifindex, opt->mode, 0);
if (ret < 0) {
pr_warn("Failed to attach XDP program: %s\n",
strerror(-ret));
goto end_destroy;
}
ret = bpf_obj_get_info_by_fd(bpf_map__fd(skel->maps.cpu_map), &info, &infosz);
if (ret < 0) {
pr_warn("Failed bpf_obj_get_info_by_fd for cpumap: %s\n",
strerror(errno));
goto end_detach;
}
skel->bss->cpumap_map_id = info.id;
map_fd = bpf_map__fd(skel->maps.cpu_map);
avail_fd = bpf_map__fd(skel->maps.cpus_available);
count_fd = bpf_map__fd(skel->maps.cpus_count);
ret = mark_cpus_unavailable();
if (ret < 0) {
pr_warn("Unable to mark CPUs as unavailable\n");
goto end_detach;
}
ret = sample_init(skel, mask, opt->iface_in.ifindex, 0);
if (ret < 0) {
pr_warn("Failed to initialize sample: %s\n", strerror(-ret));
ret = EXIT_FAIL;
goto end_detach;
}
fd = set_cpumap_prog(skel, opt->remote_action, &opt->redir_iface);
if (fd < 0) {
ret = EXIT_FAIL_BPF;
goto end_detach;
}
value.qsize = opt->qsize;
value.bpf_prog.fd = fd;
for (i = 0; i < opt->cpus.num_vals; i++) {
if (create_cpu_entry(opt->cpus.vals[i], &value, i, true) < 0) {
pr_warn("Cannot proceed, exiting\n");
ret = EXIT_FAIL;
goto end_detach;
}
}
ret = sample_run(opt->interval, opt->stress_mode ? stress_cpumap : NULL, &value);
if (ret < 0) {
pr_warn("Failed during sample run: %s\n", strerror(-ret));
ret = EXIT_FAIL;
goto end_detach;
}
ret = EXIT_OK;
end_detach:
xdp_program__detach(xdp_prog, opt->iface_in.ifindex, opt->mode, 0);
end_destroy:
xdp_program__close(xdp_prog);
xdp_redirect_cpumap__destroy(skel);
end:
sample_teardown();
return ret;
}
|