summaryrefslogtreecommitdiffstats
path: root/tools/testing/selftests/x86/test_syscall_vdso.c
blob: 8965c311bd65b689ae5158c24ffa41471619fa1a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
// SPDX-License-Identifier: GPL-2.0-only
/*
 * 32-bit syscall ABI conformance test.
 *
 * Copyright (c) 2015 Denys Vlasenko
 */
/*
 * Can be built statically:
 * gcc -Os -Wall -static -m32 test_syscall_vdso.c thunks_32.S
 */
#undef _GNU_SOURCE
#define _GNU_SOURCE 1
#undef __USE_GNU
#define __USE_GNU 1
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <signal.h>
#include <sys/types.h>
#include <sys/select.h>
#include <sys/time.h>
#include <elf.h>
#include <sys/ptrace.h>
#include <sys/wait.h>

#if !defined(__i386__)
int main(int argc, char **argv, char **envp)
{
	printf("[SKIP]\tNot a 32-bit x86 userspace\n");
	return 0;
}
#else

long syscall_addr;
long get_syscall(char **envp)
{
	Elf32_auxv_t *auxv;
	while (*envp++ != NULL)
		continue;
	for (auxv = (void *)envp; auxv->a_type != AT_NULL; auxv++)
		if (auxv->a_type == AT_SYSINFO)
			return auxv->a_un.a_val;
	printf("[WARN]\tAT_SYSINFO not supplied\n");
	return 0;
}

asm (
	"	.pushsection .text\n"
	"	.global	int80\n"
	"int80:\n"
	"	int	$0x80\n"
	"	ret\n"
	"	.popsection\n"
);
extern char int80;

struct regs64 {
	uint64_t rax, rbx, rcx, rdx;
	uint64_t rsi, rdi, rbp, rsp;
	uint64_t r8,  r9,  r10, r11;
	uint64_t r12, r13, r14, r15;
};
struct regs64 regs64;
int kernel_is_64bit;

asm (
	"	.pushsection .text\n"
	"	.code64\n"
	"get_regs64:\n"
	"	push	%rax\n"
	"	mov	$regs64, %eax\n"
	"	pop	0*8(%rax)\n"
	"	movq	%rbx, 1*8(%rax)\n"
	"	movq	%rcx, 2*8(%rax)\n"
	"	movq	%rdx, 3*8(%rax)\n"
	"	movq	%rsi, 4*8(%rax)\n"
	"	movq	%rdi, 5*8(%rax)\n"
	"	movq	%rbp, 6*8(%rax)\n"
	"	movq	%rsp, 7*8(%rax)\n"
	"	movq	%r8,  8*8(%rax)\n"
	"	movq	%r9,  9*8(%rax)\n"
	"	movq	%r10, 10*8(%rax)\n"
	"	movq	%r11, 11*8(%rax)\n"
	"	movq	%r12, 12*8(%rax)\n"
	"	movq	%r13, 13*8(%rax)\n"
	"	movq	%r14, 14*8(%rax)\n"
	"	movq	%r15, 15*8(%rax)\n"
	"	ret\n"
	"poison_regs64:\n"
	"	movq	$0x7f7f7f7f, %r8\n"
	"	shl	$32, %r8\n"
	"	orq	$0x7f7f7f7f, %r8\n"
	"	movq	%r8, %r9\n"
	"	incq	%r9\n"
	"	movq	%r9, %r10\n"
	"	incq	%r10\n"
	"	movq	%r10, %r11\n"
	"	incq	%r11\n"
	"	movq	%r11, %r12\n"
	"	incq	%r12\n"
	"	movq	%r12, %r13\n"
	"	incq	%r13\n"
	"	movq	%r13, %r14\n"
	"	incq	%r14\n"
	"	movq	%r14, %r15\n"
	"	incq	%r15\n"
	"	ret\n"
	"	.code32\n"
	"	.popsection\n"
);
extern void get_regs64(void);
extern void poison_regs64(void);
extern unsigned long call64_from_32(void (*function)(void));
void print_regs64(void)
{
	if (!kernel_is_64bit)
		return;
	printf("ax:%016llx bx:%016llx cx:%016llx dx:%016llx\n", regs64.rax,  regs64.rbx,  regs64.rcx,  regs64.rdx);
	printf("si:%016llx di:%016llx bp:%016llx sp:%016llx\n", regs64.rsi,  regs64.rdi,  regs64.rbp,  regs64.rsp);
	printf(" 8:%016llx  9:%016llx 10:%016llx 11:%016llx\n", regs64.r8 ,  regs64.r9 ,  regs64.r10,  regs64.r11);
	printf("12:%016llx 13:%016llx 14:%016llx 15:%016llx\n", regs64.r12,  regs64.r13,  regs64.r14,  regs64.r15);
}

int check_regs64(void)
{
	int err = 0;
	int num = 8;
	uint64_t *r64 = &regs64.r8;
	uint64_t expected = 0x7f7f7f7f7f7f7f7fULL;

	if (!kernel_is_64bit)
		return 0;

	do {
		if (*r64 == expected++)
			continue; /* register did not change */
		if (syscall_addr != (long)&int80) {
			/*
			 * Non-INT80 syscall entrypoints are allowed to clobber R8+ regs:
			 * either clear them to 0, or for R11, load EFLAGS.
			 */
			if (*r64 == 0)
				continue;
			if (num == 11) {
				printf("[NOTE]\tR11 has changed:%016llx - assuming clobbered by SYSRET insn\n", *r64);
				continue;
			}
		} else {
			/*
			 * INT80 syscall entrypoint can be used by
			 * 64-bit programs too, unlike SYSCALL/SYSENTER.
			 * Therefore it must preserve R12+
			 * (they are callee-saved registers in 64-bit C ABI).
			 *
			 * Starting in Linux 4.17 (and any kernel that
			 * backports the change), R8..11 are preserved.
			 * Historically (and probably unintentionally), they
			 * were clobbered or zeroed.
			 */
		}
		printf("[FAIL]\tR%d has changed:%016llx\n", num, *r64);
		err++;
	} while (r64++, ++num < 16);

	if (!err)
		printf("[OK]\tR8..R15 did not leak kernel data\n");
	return err;
}

int nfds;
fd_set rfds;
fd_set wfds;
fd_set efds;
struct timespec timeout;
sigset_t sigmask;
struct {
	sigset_t *sp;
	int sz;
} sigmask_desc;

void prep_args()
{
	nfds = 42;
	FD_ZERO(&rfds);
	FD_ZERO(&wfds);
	FD_ZERO(&efds);
	FD_SET(0, &rfds);
	FD_SET(1, &wfds);
	FD_SET(2, &efds);
	timeout.tv_sec = 0;
	timeout.tv_nsec = 123;
	sigemptyset(&sigmask);
	sigaddset(&sigmask, SIGINT);
	sigaddset(&sigmask, SIGUSR2);
	sigaddset(&sigmask, SIGRTMAX);
	sigmask_desc.sp = &sigmask;
	sigmask_desc.sz = 8; /* bytes */
}

static void print_flags(const char *name, unsigned long r)
{
	static const char *bitarray[] = {
	"\n" ,"c\n" ,/* Carry Flag */
	"0 " ,"1 "  ,/* Bit 1 - always on */
	""   ,"p "  ,/* Parity Flag */
	"0 " ,"3? " ,
	""   ,"a "  ,/* Auxiliary carry Flag */
	"0 " ,"5? " ,
	""   ,"z "  ,/* Zero Flag */
	""   ,"s "  ,/* Sign Flag */
	""   ,"t "  ,/* Trap Flag */
	""   ,"i "  ,/* Interrupt Flag */
	""   ,"d "  ,/* Direction Flag */
	""   ,"o "  ,/* Overflow Flag */
	"0 " ,"1 "  ,/* I/O Privilege Level (2 bits) */
	"0"  ,"1"   ,/* I/O Privilege Level (2 bits) */
	""   ,"n "  ,/* Nested Task */
	"0 " ,"15? ",
	""   ,"r "  ,/* Resume Flag */
	""   ,"v "  ,/* Virtual Mode */
	""   ,"ac " ,/* Alignment Check/Access Control */
	""   ,"vif ",/* Virtual Interrupt Flag */
	""   ,"vip ",/* Virtual Interrupt Pending */
	""   ,"id " ,/* CPUID detection */
	NULL
	};
	const char **bitstr;
	int bit;

	printf("%s=%016lx ", name, r);
	bitstr = bitarray + 42;
	bit = 21;
	if ((r >> 22) != 0)
		printf("(extra bits are set) ");
	do {
		if (bitstr[(r >> bit) & 1][0])
			fputs(bitstr[(r >> bit) & 1], stdout);
		bitstr -= 2;
		bit--;
	} while (bit >= 0);
}

int run_syscall(void)
{
	long flags, bad_arg;

	prep_args();

	if (kernel_is_64bit)
		call64_from_32(poison_regs64);
	/*print_regs64();*/

	asm("\n"
	/* Try 6-arg syscall: pselect. It should return quickly */
	"	push	%%ebp\n"
	"	mov	$308, %%eax\n"     /* PSELECT */
	"	mov	nfds, %%ebx\n"     /* ebx  arg1 */
	"	mov	$rfds, %%ecx\n"    /* ecx  arg2 */
	"	mov	$wfds, %%edx\n"    /* edx  arg3 */
	"	mov	$efds, %%esi\n"    /* esi  arg4 */
	"	mov	$timeout, %%edi\n" /* edi  arg5 */
	"	mov	$sigmask_desc, %%ebp\n" /* %ebp arg6 */
	"	push	$0x200ed7\n"      /* set almost all flags */
	"	popf\n"		/* except TF, IOPL, NT, RF, VM, AC, VIF, VIP */
	"	call	*syscall_addr\n"
	/* Check that registers are not clobbered */
	"	pushf\n"
	"	pop	%%eax\n"
	"	cld\n"
	"	cmp	nfds, %%ebx\n"     /* ebx  arg1 */
	"	mov	$1, %%ebx\n"
	"	jne	1f\n"
	"	cmp	$rfds, %%ecx\n"    /* ecx  arg2 */
	"	mov	$2, %%ebx\n"
	"	jne	1f\n"
	"	cmp	$wfds, %%edx\n"    /* edx  arg3 */
	"	mov	$3, %%ebx\n"
	"	jne	1f\n"
	"	cmp	$efds, %%esi\n"    /* esi  arg4 */
	"	mov	$4, %%ebx\n"
	"	jne	1f\n"
	"	cmp	$timeout, %%edi\n" /* edi  arg5 */
	"	mov	$5, %%ebx\n"
	"	jne	1f\n"
	"	cmpl	$sigmask_desc, %%ebp\n" /* %ebp arg6 */
	"	mov	$6, %%ebx\n"
	"	jne	1f\n"
	"	mov	$0, %%ebx\n"
	"1:\n"
	"	pop	%%ebp\n"
	: "=a" (flags), "=b" (bad_arg)
	:
	: "cx", "dx", "si", "di"
	);

	if (kernel_is_64bit) {
		memset(&regs64, 0x77, sizeof(regs64));
		call64_from_32(get_regs64);
		/*print_regs64();*/
	}

	/*
	 * On paravirt kernels, flags are not preserved across syscalls.
	 * Thus, we do not consider it a bug if some are changed.
	 * We just show ones which do.
	 */
	if ((0x200ed7 ^ flags) != 0) {
		print_flags("[WARN]\tFlags before", 0x200ed7);
		print_flags("[WARN]\tFlags  after", flags);
		print_flags("[WARN]\tFlags change", (0x200ed7 ^ flags));
	}

	if (bad_arg) {
		printf("[FAIL]\targ#%ld clobbered\n", bad_arg);
		return 1;
	}
	printf("[OK]\tArguments are preserved across syscall\n");

	return check_regs64();
}

int run_syscall_twice()
{
	int exitcode = 0;
	long sv;

	if (syscall_addr) {
		printf("[RUN]\tExecuting 6-argument 32-bit syscall via VDSO\n");
		exitcode = run_syscall();
	}
	sv = syscall_addr;
	syscall_addr = (long)&int80;
	printf("[RUN]\tExecuting 6-argument 32-bit syscall via INT 80\n");
	exitcode += run_syscall();
	syscall_addr = sv;
	return exitcode;
}

void ptrace_me()
{
	pid_t pid;

	fflush(NULL);
	pid = fork();
	if (pid < 0)
		exit(1);
	if (pid == 0) {
		/* child */
		if (ptrace(PTRACE_TRACEME, 0L, 0L, 0L) != 0)
			exit(0);
		raise(SIGSTOP);
		return;
	}
	/* parent */
	printf("[RUN]\tRunning tests under ptrace\n");
	while (1) {
		int status;
		pid = waitpid(-1, &status, __WALL);
		if (WIFEXITED(status))
			exit(WEXITSTATUS(status));
		if (WIFSIGNALED(status))
			exit(WTERMSIG(status));
		if (pid <= 0 || !WIFSTOPPED(status)) /* paranoia */
			exit(255);
		/*
		 * Note: we do not inject sig = WSTOPSIG(status).
		 * We probably should, but careful: do not inject SIGTRAP
		 * generated by syscall entry/exit stops.
		 * That kills the child.
		 */
		ptrace(PTRACE_SYSCALL, pid, 0L, 0L /*sig*/);
	}
}

int main(int argc, char **argv, char **envp)
{
	int exitcode = 0;
	int cs;

	asm("\n"
	"	movl	%%cs, %%eax\n"
	: "=a" (cs)
	);
	kernel_is_64bit = (cs == 0x23);
	if (!kernel_is_64bit)
		printf("[NOTE]\tNot a 64-bit kernel, won't test R8..R15 leaks\n");

	/* This only works for non-static builds:
	 * syscall_addr = dlsym(dlopen("linux-gate.so.1", RTLD_NOW), "__kernel_vsyscall");
	 */
	syscall_addr = get_syscall(envp);

	exitcode += run_syscall_twice();
	ptrace_me();
	exitcode += run_syscall_twice();

	return exitcode;
}
#endif