1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
|
// SPDX-License-Identifier: GPL-2.0
/*
* xapic_ipi_test
*
* Copyright (C) 2020, Google LLC.
*
* This work is licensed under the terms of the GNU GPL, version 2.
*
* Test that when the APIC is in xAPIC mode, a vCPU can send an IPI to wake
* another vCPU that is halted when KVM's backing page for the APIC access
* address has been moved by mm.
*
* The test starts two vCPUs: one that sends IPIs and one that continually
* executes HLT. The sender checks that the halter has woken from the HLT and
* has reentered HLT before sending the next IPI. While the vCPUs are running,
* the host continually calls migrate_pages to move all of the process' pages
* amongst the available numa nodes on the machine.
*
* Migration is a command line option. When used on non-numa machines will
* exit with error. Test is still usefull on non-numa for testing IPIs.
*/
#define _GNU_SOURCE /* for program_invocation_short_name */
#include <getopt.h>
#include <pthread.h>
#include <inttypes.h>
#include <string.h>
#include <time.h>
#include "kvm_util.h"
#include "numaif.h"
#include "processor.h"
#include "test_util.h"
#include "vmx.h"
/* Default running time for the test */
#define DEFAULT_RUN_SECS 3
/* Default delay between migrate_pages calls (microseconds) */
#define DEFAULT_DELAY_USECS 500000
/*
* Vector for IPI from sender vCPU to halting vCPU.
* Value is arbitrary and was chosen for the alternating bit pattern. Any
* value should work.
*/
#define IPI_VECTOR 0xa5
/*
* Incremented in the IPI handler. Provides evidence to the sender that the IPI
* arrived at the destination
*/
static volatile uint64_t ipis_rcvd;
/* Data struct shared between host main thread and vCPUs */
struct test_data_page {
uint32_t halter_apic_id;
volatile uint64_t hlt_count;
volatile uint64_t wake_count;
uint64_t ipis_sent;
uint64_t migrations_attempted;
uint64_t migrations_completed;
uint32_t icr;
uint32_t icr2;
uint32_t halter_tpr;
uint32_t halter_ppr;
/*
* Record local version register as a cross-check that APIC access
* worked. Value should match what KVM reports (APIC_VERSION in
* arch/x86/kvm/lapic.c). If test is failing, check that values match
* to determine whether APIC access exits are working.
*/
uint32_t halter_lvr;
};
struct thread_params {
struct test_data_page *data;
struct kvm_vcpu *vcpu;
uint64_t *pipis_rcvd; /* host address of ipis_rcvd global */
};
void verify_apic_base_addr(void)
{
uint64_t msr = rdmsr(MSR_IA32_APICBASE);
uint64_t base = GET_APIC_BASE(msr);
GUEST_ASSERT(base == APIC_DEFAULT_GPA);
}
static void halter_guest_code(struct test_data_page *data)
{
verify_apic_base_addr();
xapic_enable();
data->halter_apic_id = GET_APIC_ID_FIELD(xapic_read_reg(APIC_ID));
data->halter_lvr = xapic_read_reg(APIC_LVR);
/*
* Loop forever HLTing and recording halts & wakes. Disable interrupts
* each time around to minimize window between signaling the pending
* halt to the sender vCPU and executing the halt. No need to disable on
* first run as this vCPU executes first and the host waits for it to
* signal going into first halt before starting the sender vCPU. Record
* TPR and PPR for diagnostic purposes in case the test fails.
*/
for (;;) {
data->halter_tpr = xapic_read_reg(APIC_TASKPRI);
data->halter_ppr = xapic_read_reg(APIC_PROCPRI);
data->hlt_count++;
asm volatile("sti; hlt; cli");
data->wake_count++;
}
}
/*
* Runs on halter vCPU when IPI arrives. Write an arbitrary non-zero value to
* enable diagnosing errant writes to the APIC access address backing page in
* case of test failure.
*/
static void guest_ipi_handler(struct ex_regs *regs)
{
ipis_rcvd++;
xapic_write_reg(APIC_EOI, 77);
}
static void sender_guest_code(struct test_data_page *data)
{
uint64_t last_wake_count;
uint64_t last_hlt_count;
uint64_t last_ipis_rcvd_count;
uint32_t icr_val;
uint32_t icr2_val;
uint64_t tsc_start;
verify_apic_base_addr();
xapic_enable();
/*
* Init interrupt command register for sending IPIs
*
* Delivery mode=fixed, per SDM:
* "Delivers the interrupt specified in the vector field to the target
* processor."
*
* Destination mode=physical i.e. specify target by its local APIC
* ID. This vCPU assumes that the halter vCPU has already started and
* set data->halter_apic_id.
*/
icr_val = (APIC_DEST_PHYSICAL | APIC_DM_FIXED | IPI_VECTOR);
icr2_val = SET_APIC_DEST_FIELD(data->halter_apic_id);
data->icr = icr_val;
data->icr2 = icr2_val;
last_wake_count = data->wake_count;
last_hlt_count = data->hlt_count;
last_ipis_rcvd_count = ipis_rcvd;
for (;;) {
/*
* Send IPI to halter vCPU.
* First IPI can be sent unconditionally because halter vCPU
* starts earlier.
*/
xapic_write_reg(APIC_ICR2, icr2_val);
xapic_write_reg(APIC_ICR, icr_val);
data->ipis_sent++;
/*
* Wait up to ~1 sec for halter to indicate that it has:
* 1. Received the IPI
* 2. Woken up from the halt
* 3. Gone back into halt
* Current CPUs typically run at 2.x Ghz which is ~2
* billion ticks per second.
*/
tsc_start = rdtsc();
while (rdtsc() - tsc_start < 2000000000) {
if ((ipis_rcvd != last_ipis_rcvd_count) &&
(data->wake_count != last_wake_count) &&
(data->hlt_count != last_hlt_count))
break;
}
GUEST_ASSERT((ipis_rcvd != last_ipis_rcvd_count) &&
(data->wake_count != last_wake_count) &&
(data->hlt_count != last_hlt_count));
last_wake_count = data->wake_count;
last_hlt_count = data->hlt_count;
last_ipis_rcvd_count = ipis_rcvd;
}
}
static void *vcpu_thread(void *arg)
{
struct thread_params *params = (struct thread_params *)arg;
struct kvm_vcpu *vcpu = params->vcpu;
struct ucall uc;
int old;
int r;
unsigned int exit_reason;
r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old);
TEST_ASSERT(r == 0,
"pthread_setcanceltype failed on vcpu_id=%u with errno=%d",
vcpu->id, r);
fprintf(stderr, "vCPU thread running vCPU %u\n", vcpu->id);
vcpu_run(vcpu);
exit_reason = vcpu->run->exit_reason;
TEST_ASSERT(exit_reason == KVM_EXIT_IO,
"vCPU %u exited with unexpected exit reason %u-%s, expected KVM_EXIT_IO",
vcpu->id, exit_reason, exit_reason_str(exit_reason));
if (get_ucall(vcpu, &uc) == UCALL_ABORT) {
TEST_ASSERT(false,
"vCPU %u exited with error: %s.\n"
"Sending vCPU sent %lu IPIs to halting vCPU\n"
"Halting vCPU halted %lu times, woke %lu times, received %lu IPIs.\n"
"Halter TPR=%#x PPR=%#x LVR=%#x\n"
"Migrations attempted: %lu\n"
"Migrations completed: %lu\n",
vcpu->id, (const char *)uc.args[0],
params->data->ipis_sent, params->data->hlt_count,
params->data->wake_count,
*params->pipis_rcvd, params->data->halter_tpr,
params->data->halter_ppr, params->data->halter_lvr,
params->data->migrations_attempted,
params->data->migrations_completed);
}
return NULL;
}
static void cancel_join_vcpu_thread(pthread_t thread, struct kvm_vcpu *vcpu)
{
void *retval;
int r;
r = pthread_cancel(thread);
TEST_ASSERT(r == 0,
"pthread_cancel on vcpu_id=%d failed with errno=%d",
vcpu->id, r);
r = pthread_join(thread, &retval);
TEST_ASSERT(r == 0,
"pthread_join on vcpu_id=%d failed with errno=%d",
vcpu->id, r);
TEST_ASSERT(retval == PTHREAD_CANCELED,
"expected retval=%p, got %p", PTHREAD_CANCELED,
retval);
}
void do_migrations(struct test_data_page *data, int run_secs, int delay_usecs,
uint64_t *pipis_rcvd)
{
long pages_not_moved;
unsigned long nodemask = 0;
unsigned long nodemasks[sizeof(nodemask) * 8];
int nodes = 0;
time_t start_time, last_update, now;
time_t interval_secs = 1;
int i, r;
int from, to;
unsigned long bit;
uint64_t hlt_count;
uint64_t wake_count;
uint64_t ipis_sent;
fprintf(stderr, "Calling migrate_pages every %d microseconds\n",
delay_usecs);
/* Get set of first 64 numa nodes available */
r = get_mempolicy(NULL, &nodemask, sizeof(nodemask) * 8,
0, MPOL_F_MEMS_ALLOWED);
TEST_ASSERT(r == 0, "get_mempolicy failed errno=%d", errno);
fprintf(stderr, "Numa nodes found amongst first %lu possible nodes "
"(each 1-bit indicates node is present): %#lx\n",
sizeof(nodemask) * 8, nodemask);
/* Init array of masks containing a single-bit in each, one for each
* available node. migrate_pages called below requires specifying nodes
* as bit masks.
*/
for (i = 0, bit = 1; i < sizeof(nodemask) * 8; i++, bit <<= 1) {
if (nodemask & bit) {
nodemasks[nodes] = nodemask & bit;
nodes++;
}
}
TEST_ASSERT(nodes > 1,
"Did not find at least 2 numa nodes. Can't do migration\n");
fprintf(stderr, "Migrating amongst %d nodes found\n", nodes);
from = 0;
to = 1;
start_time = time(NULL);
last_update = start_time;
ipis_sent = data->ipis_sent;
hlt_count = data->hlt_count;
wake_count = data->wake_count;
while ((int)(time(NULL) - start_time) < run_secs) {
data->migrations_attempted++;
/*
* migrate_pages with PID=0 will migrate all pages of this
* process between the nodes specified as bitmasks. The page
* backing the APIC access address belongs to this process
* because it is allocated by KVM in the context of the
* KVM_CREATE_VCPU ioctl. If that assumption ever changes this
* test may break or give a false positive signal.
*/
pages_not_moved = migrate_pages(0, sizeof(nodemasks[from]),
&nodemasks[from],
&nodemasks[to]);
if (pages_not_moved < 0)
fprintf(stderr,
"migrate_pages failed, errno=%d\n", errno);
else if (pages_not_moved > 0)
fprintf(stderr,
"migrate_pages could not move %ld pages\n",
pages_not_moved);
else
data->migrations_completed++;
from = to;
to++;
if (to == nodes)
to = 0;
now = time(NULL);
if (((now - start_time) % interval_secs == 0) &&
(now != last_update)) {
last_update = now;
fprintf(stderr,
"%lu seconds: Migrations attempted=%lu completed=%lu, "
"IPIs sent=%lu received=%lu, HLTs=%lu wakes=%lu\n",
now - start_time, data->migrations_attempted,
data->migrations_completed,
data->ipis_sent, *pipis_rcvd,
data->hlt_count, data->wake_count);
TEST_ASSERT(ipis_sent != data->ipis_sent &&
hlt_count != data->hlt_count &&
wake_count != data->wake_count,
"IPI, HLT and wake count have not increased "
"in the last %lu seconds. "
"HLTer is likely hung.\n", interval_secs);
ipis_sent = data->ipis_sent;
hlt_count = data->hlt_count;
wake_count = data->wake_count;
}
usleep(delay_usecs);
}
}
void get_cmdline_args(int argc, char *argv[], int *run_secs,
bool *migrate, int *delay_usecs)
{
for (;;) {
int opt = getopt(argc, argv, "s:d:m");
if (opt == -1)
break;
switch (opt) {
case 's':
*run_secs = parse_size(optarg);
break;
case 'm':
*migrate = true;
break;
case 'd':
*delay_usecs = parse_size(optarg);
break;
default:
TEST_ASSERT(false,
"Usage: -s <runtime seconds>. Default is %d seconds.\n"
"-m adds calls to migrate_pages while vCPUs are running."
" Default is no migrations.\n"
"-d <delay microseconds> - delay between migrate_pages() calls."
" Default is %d microseconds.\n",
DEFAULT_RUN_SECS, DEFAULT_DELAY_USECS);
}
}
}
int main(int argc, char *argv[])
{
int r;
int wait_secs;
const int max_halter_wait = 10;
int run_secs = 0;
int delay_usecs = 0;
struct test_data_page *data;
vm_vaddr_t test_data_page_vaddr;
bool migrate = false;
pthread_t threads[2];
struct thread_params params[2];
struct kvm_vm *vm;
uint64_t *pipis_rcvd;
get_cmdline_args(argc, argv, &run_secs, &migrate, &delay_usecs);
if (run_secs <= 0)
run_secs = DEFAULT_RUN_SECS;
if (delay_usecs <= 0)
delay_usecs = DEFAULT_DELAY_USECS;
vm = vm_create_with_one_vcpu(¶ms[0].vcpu, halter_guest_code);
vm_init_descriptor_tables(vm);
vcpu_init_descriptor_tables(params[0].vcpu);
vm_install_exception_handler(vm, IPI_VECTOR, guest_ipi_handler);
virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
params[1].vcpu = vm_vcpu_add(vm, 1, sender_guest_code);
test_data_page_vaddr = vm_vaddr_alloc_page(vm);
data = addr_gva2hva(vm, test_data_page_vaddr);
memset(data, 0, sizeof(*data));
params[0].data = data;
params[1].data = data;
vcpu_args_set(params[0].vcpu, 1, test_data_page_vaddr);
vcpu_args_set(params[1].vcpu, 1, test_data_page_vaddr);
pipis_rcvd = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ipis_rcvd);
params[0].pipis_rcvd = pipis_rcvd;
params[1].pipis_rcvd = pipis_rcvd;
/* Start halter vCPU thread and wait for it to execute first HLT. */
r = pthread_create(&threads[0], NULL, vcpu_thread, ¶ms[0]);
TEST_ASSERT(r == 0,
"pthread_create halter failed errno=%d", errno);
fprintf(stderr, "Halter vCPU thread started\n");
wait_secs = 0;
while ((wait_secs < max_halter_wait) && !data->hlt_count) {
sleep(1);
wait_secs++;
}
TEST_ASSERT(data->hlt_count,
"Halter vCPU did not execute first HLT within %d seconds",
max_halter_wait);
fprintf(stderr,
"Halter vCPU thread reported its APIC ID: %u after %d seconds.\n",
data->halter_apic_id, wait_secs);
r = pthread_create(&threads[1], NULL, vcpu_thread, ¶ms[1]);
TEST_ASSERT(r == 0, "pthread_create sender failed errno=%d", errno);
fprintf(stderr,
"IPI sender vCPU thread started. Letting vCPUs run for %d seconds.\n",
run_secs);
if (!migrate)
sleep(run_secs);
else
do_migrations(data, run_secs, delay_usecs, pipis_rcvd);
/*
* Cancel threads and wait for them to stop.
*/
cancel_join_vcpu_thread(threads[0], params[0].vcpu);
cancel_join_vcpu_thread(threads[1], params[1].vcpu);
fprintf(stderr,
"Test successful after running for %d seconds.\n"
"Sending vCPU sent %lu IPIs to halting vCPU\n"
"Halting vCPU halted %lu times, woke %lu times, received %lu IPIs.\n"
"Halter APIC ID=%#x\n"
"Sender ICR value=%#x ICR2 value=%#x\n"
"Halter TPR=%#x PPR=%#x LVR=%#x\n"
"Migrations attempted: %lu\n"
"Migrations completed: %lu\n",
run_secs, data->ipis_sent,
data->hlt_count, data->wake_count, *pipis_rcvd,
data->halter_apic_id,
data->icr, data->icr2,
data->halter_tpr, data->halter_ppr, data->halter_lvr,
data->migrations_attempted, data->migrations_completed);
kvm_vm_free(vm);
return 0;
}
|