summaryrefslogtreecommitdiffstats
path: root/security/sandbox/chromium/sandbox/linux/seccomp-bpf/syscall.cc
blob: 34edabd2b82df59f79f6c6dfedab36a92ce0cba3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "sandbox/linux/seccomp-bpf/syscall.h"

#include <errno.h>
#include <stdint.h>

#include "base/logging.h"
#include "build/build_config.h"
#include "sandbox/linux/bpf_dsl/seccomp_macros.h"

namespace sandbox {

namespace {

#if defined(ARCH_CPU_X86_FAMILY) || defined(ARCH_CPU_ARM_FAMILY) || \
    defined(ARCH_CPU_MIPS_FAMILY)
// Number that's not currently used by any Linux kernel ABIs.
const int kInvalidSyscallNumber = 0x351d3;
#else
#error Unrecognized architecture
#endif

asm(// We need to be able to tell the kernel exactly where we made a
    // system call. The C++ compiler likes to sometimes clone or
    // inline code, which would inadvertently end up duplicating
    // the entry point.
    // "gcc" can suppress code duplication with suitable function
    // attributes, but "clang" doesn't have this ability.
    // The "clang" developer mailing list suggested that the correct
    // and portable solution is a file-scope assembly block.
    // N.B. We do mark our code as a proper function so that backtraces
    // work correctly. But we make absolutely no attempt to use the
    // ABI's calling conventions for passing arguments. We will only
    // ever be called from assembly code and thus can pick more
    // suitable calling conventions.
#if defined(__i386__)
    ".text\n"
    ".align 16, 0x90\n"
    ".type SyscallAsm, @function\n"
    "SyscallAsm:.cfi_startproc\n"
    // Check if "%eax" is negative. If so, do not attempt to make a
    // system call. Instead, compute the return address that is visible
    // to the kernel after we execute "int $0x80". This address can be
    // used as a marker that BPF code inspects.
    "test %eax, %eax\n"
    "jge  1f\n"
    // Always, make sure that our code is position-independent, or
    // address space randomization might not work on i386. This means,
    // we can't use "lea", but instead have to rely on "call/pop".
    "call 0f;   .cfi_adjust_cfa_offset  4\n"
    "0:pop  %eax; .cfi_adjust_cfa_offset -4\n"
    "addl $2f-0b, %eax\n"
    "ret\n"
    // Save register that we don't want to clobber. On i386, we need to
    // save relatively aggressively, as there are a couple or registers
    // that are used internally (e.g. %ebx for position-independent
    // code, and %ebp for the frame pointer), and as we need to keep at
    // least a few registers available for the register allocator.
    "1:push %esi; .cfi_adjust_cfa_offset 4; .cfi_rel_offset esi, 0\n"
    "push %edi; .cfi_adjust_cfa_offset 4; .cfi_rel_offset edi, 0\n"
    "push %ebx; .cfi_adjust_cfa_offset 4; .cfi_rel_offset ebx, 0\n"
    "push %ebp; .cfi_adjust_cfa_offset 4; .cfi_rel_offset ebp, 0\n"
    // Copy entries from the array holding the arguments into the
    // correct CPU registers.
    "movl  0(%edi), %ebx\n"
    "movl  4(%edi), %ecx\n"
    "movl  8(%edi), %edx\n"
    "movl 12(%edi), %esi\n"
    "movl 20(%edi), %ebp\n"
    "movl 16(%edi), %edi\n"
    // Enter the kernel.
    "int  $0x80\n"
    // This is our "magic" return address that the BPF filter sees.
    "2:"
    // Restore any clobbered registers that we didn't declare to the
    // compiler.
    "pop  %ebp; .cfi_restore ebp; .cfi_adjust_cfa_offset -4\n"
    "pop  %ebx; .cfi_restore ebx; .cfi_adjust_cfa_offset -4\n"
    "pop  %edi; .cfi_restore edi; .cfi_adjust_cfa_offset -4\n"
    "pop  %esi; .cfi_restore esi; .cfi_adjust_cfa_offset -4\n"
    "ret\n"
    ".cfi_endproc\n"
    "9:.size SyscallAsm, 9b-SyscallAsm\n"
#elif defined(__x86_64__)
    ".text\n"
    ".align 16, 0x90\n"
    ".type SyscallAsm, @function\n"
    "SyscallAsm:.cfi_startproc\n"
    // Check if "%rdi" is negative. If so, do not attempt to make a
    // system call. Instead, compute the return address that is visible
    // to the kernel after we execute "syscall". This address can be
    // used as a marker that BPF code inspects.
    "test %rdi, %rdi\n"
    "jge  1f\n"
    // Always make sure that our code is position-independent, or the
    // linker will throw a hissy fit on x86-64.
    "lea 2f(%rip), %rax\n"
    "ret\n"
    // Now we load the registers used to pass arguments to the system
    // call: system call number in %rax, and arguments in %rdi, %rsi,
    // %rdx, %r10, %r8, %r9. Note: These are all caller-save registers
    // (only %rbx, %rbp, %rsp, and %r12-%r15 are callee-save), so no
    // need to worry here about spilling registers or CFI directives.
    "1:movq %rdi, %rax\n"
    "movq  0(%rsi), %rdi\n"
    "movq 16(%rsi), %rdx\n"
    "movq 24(%rsi), %r10\n"
    "movq 32(%rsi), %r8\n"
    "movq 40(%rsi), %r9\n"
    "movq  8(%rsi), %rsi\n"
    // Enter the kernel.
    "syscall\n"
    // This is our "magic" return address that the BPF filter sees.
    "2:ret\n"
    ".cfi_endproc\n"
    "9:.size SyscallAsm, 9b-SyscallAsm\n"
#elif defined(__arm__)
    // Throughout this file, we use the same mode (ARM vs. thumb)
    // that the C++ compiler uses. This means, when transfering control
    // from C++ to assembly code, we do not need to switch modes (e.g.
    // by using the "bx" instruction). It also means that our assembly
    // code should not be invoked directly from code that lives in
    // other compilation units, as we don't bother implementing thumb
    // interworking. That's OK, as we don't make any of the assembly
    // symbols public. They are all local to this file.
    ".text\n"
    ".align 2\n"
    ".type SyscallAsm, %function\n"
#if defined(__thumb__)
    ".thumb_func\n"
#else
    ".arm\n"
#endif
    "SyscallAsm:\n"
#if !defined(__native_client_nonsfi__)
    // .fnstart and .fnend pseudo operations creates unwind table.
    // It also creates a reference to the symbol __aeabi_unwind_cpp_pr0, which
    // is not provided by PNaCl toolchain. Disable it.
    ".fnstart\n"
#endif
    "@ args = 0, pretend = 0, frame = 8\n"
    "@ frame_needed = 1, uses_anonymous_args = 0\n"
#if defined(__thumb__)
    ".cfi_startproc\n"
    "push {r7, lr}\n"
    ".save {r7, lr}\n"
    ".cfi_offset 14, -4\n"
    ".cfi_offset  7, -8\n"
    ".cfi_def_cfa_offset 8\n"
#else
    "stmfd sp!, {fp, lr}\n"
    "add fp, sp, #4\n"
#endif
    // Check if "r0" is negative. If so, do not attempt to make a
    // system call. Instead, compute the return address that is visible
    // to the kernel after we execute "swi 0". This address can be
    // used as a marker that BPF code inspects.
    "cmp r0, #0\n"
    "bge 1f\n"
    "adr r0, 2f\n"
    "b   2f\n"
    // We declared (almost) all clobbered registers to the compiler. On
    // ARM there is no particular register pressure. So, we can go
    // ahead and directly copy the entries from the arguments array
    // into the appropriate CPU registers.
    "1:ldr r5, [r6, #20]\n"
    "ldr r4, [r6, #16]\n"
    "ldr r3, [r6, #12]\n"
    "ldr r2, [r6, #8]\n"
    "ldr r1, [r6, #4]\n"
    "mov r7, r0\n"
    "ldr r0, [r6, #0]\n"
    // Enter the kernel
    "swi 0\n"
// Restore the frame pointer. Also restore the program counter from
// the link register; this makes us return to the caller.
#if defined(__thumb__)
    "2:pop {r7, pc}\n"
    ".cfi_endproc\n"
#else
    "2:ldmfd sp!, {fp, pc}\n"
#endif
#if !defined(__native_client_nonsfi__)
    // Do not use .fnstart and .fnend for PNaCl toolchain. See above comment,
    // for more details.
    ".fnend\n"
#endif
    "9:.size SyscallAsm, 9b-SyscallAsm\n"
#elif (defined(ARCH_CPU_MIPS_FAMILY) && defined(ARCH_CPU_32_BITS))
    ".text\n"
    ".option pic2\n"
    ".align 4\n"
    ".global SyscallAsm\n"
    ".type SyscallAsm, @function\n"
    "SyscallAsm:.ent SyscallAsm\n"
    ".frame  $sp, 40, $ra\n"
    ".set   push\n"
    ".set   noreorder\n"
    ".cpload $t9\n"
    "addiu  $sp, $sp, -40\n"
    "sw     $ra, 36($sp)\n"
    // Check if "v0" is negative. If so, do not attempt to make a
    // system call. Instead, compute the return address that is visible
    // to the kernel after we execute "syscall". This address can be
    // used as a marker that BPF code inspects.
    "bgez   $v0, 1f\n"
    " nop\n"
    // This is equivalent to "la $v0, 2f".
    // LA macro has to be avoided since LLVM-AS has issue with LA in PIC mode
    // https://llvm.org/bugs/show_bug.cgi?id=27644
    "lw     $v0, %got(2f)($gp)\n"
    "addiu  $v0, $v0, %lo(2f)\n"
    "b      2f\n"
    " nop\n"
    // On MIPS first four arguments go to registers a0 - a3 and any
    // argument after that goes to stack. We can go ahead and directly
    // copy the entries from the arguments array into the appropriate
    // CPU registers and on the stack.
    "1:lw     $a3, 28($a0)\n"
    "lw     $a2, 24($a0)\n"
    "lw     $a1, 20($a0)\n"
    "lw     $t0, 16($a0)\n"
    "sw     $a3, 28($sp)\n"
    "sw     $a2, 24($sp)\n"
    "sw     $a1, 20($sp)\n"
    "sw     $t0, 16($sp)\n"
    "lw     $a3, 12($a0)\n"
    "lw     $a2, 8($a0)\n"
    "lw     $a1, 4($a0)\n"
    "lw     $a0, 0($a0)\n"
    // Enter the kernel
    "syscall\n"
    // This is our "magic" return address that the BPF filter sees.
    // Restore the return address from the stack.
    "2:lw     $ra, 36($sp)\n"
    "jr     $ra\n"
    " addiu  $sp, $sp, 40\n"
    ".set    pop\n"
    ".end    SyscallAsm\n"
    ".size   SyscallAsm,.-SyscallAsm\n"
#elif defined(ARCH_CPU_MIPS_FAMILY) && defined(ARCH_CPU_64_BITS)
    ".text\n"
    ".option pic2\n"
    ".global SyscallAsm\n"
    ".type SyscallAsm, @function\n"
    "SyscallAsm:.ent SyscallAsm\n"
    ".frame  $sp, 16, $ra\n"
    ".set   push\n"
    ".set   noreorder\n"
    "daddiu  $sp, $sp, -16\n"
    ".cpsetup $25, 0, SyscallAsm\n"
    "sd     $ra, 8($sp)\n"
    // Check if "v0" is negative. If so, do not attempt to make a
    // system call. Instead, compute the return address that is visible
    // to the kernel after we execute "syscall". This address can be
    // used as a marker that BPF code inspects.
    "bgez   $v0, 1f\n"
    " nop\n"
    // This is equivalent to "la $v0, 2f".
    // LA macro has to be avoided since LLVM-AS has issue with LA in PIC mode
    // https://llvm.org/bugs/show_bug.cgi?id=27644
    "ld     $v0, %got(2f)($gp)\n"
    "daddiu  $v0, $v0, %lo(2f)\n"
    "b      2f\n"
    " nop\n"
    // On MIPS N64 all eight arguments go to registers a0 - a7
    // We can go ahead and directly copy the entries from the arguments array
    // into the appropriate CPU registers.
    "1:ld     $a7, 56($a0)\n"
    "ld     $a6, 48($a0)\n"
    "ld     $a5, 40($a0)\n"
    "ld     $a4, 32($a0)\n"
    "ld     $a3, 24($a0)\n"
    "ld     $a2, 16($a0)\n"
    "ld     $a1, 8($a0)\n"
    "ld     $a0, 0($a0)\n"
    // Enter the kernel
    "syscall\n"
    // This is our "magic" return address that the BPF filter sees.
    // Restore the return address from the stack.
    "2:ld     $ra, 8($sp)\n"
    ".cpreturn\n"
    "jr     $ra\n"
    "daddiu  $sp, $sp, 16\n"
    ".set    pop\n"
    ".end    SyscallAsm\n"
    ".size   SyscallAsm,.-SyscallAsm\n"
#elif defined(__aarch64__)
    ".text\n"
    ".align 2\n"
    ".type SyscallAsm, %function\n"
    "SyscallAsm:\n"
    ".cfi_startproc\n"
    "cmp x0, #0\n"
    "b.ge 1f\n"
    "adr x0,2f\n"
    "b 2f\n"
    "1:ldr x5, [x6, #40]\n"
    "ldr x4, [x6, #32]\n"
    "ldr x3, [x6, #24]\n"
    "ldr x2, [x6, #16]\n"
    "ldr x1, [x6, #8]\n"
    "mov x8, x0\n"
    "ldr x0, [x6, #0]\n"
    // Enter the kernel
    "svc 0\n"
    "2:ret\n"
    ".cfi_endproc\n"
    ".size SyscallAsm, .-SyscallAsm\n"
#endif
    );  // asm

#if defined(__x86_64__)
extern "C" {
intptr_t SyscallAsm(intptr_t nr, const intptr_t args[6]);
}
#elif defined(__mips__)
extern "C" {
intptr_t SyscallAsm(intptr_t nr, const intptr_t args[8]);
}
#endif

}  // namespace

intptr_t Syscall::InvalidCall() {
  // Explicitly pass eight zero arguments just in case.
  return Call(kInvalidSyscallNumber, 0, 0, 0, 0, 0, 0, 0, 0);
}

intptr_t Syscall::Call(int nr,
                       intptr_t p0,
                       intptr_t p1,
                       intptr_t p2,
                       intptr_t p3,
                       intptr_t p4,
                       intptr_t p5,
                       intptr_t p6,
                       intptr_t p7) {
  // We rely on "intptr_t" to be the exact size as a "void *". This is
  // typically true, but just in case, we add a check. The language
  // specification allows platforms some leeway in cases, where
  // "sizeof(void *)" is not the same as "sizeof(void (*)())". We expect
  // that this would only be an issue for IA64, which we are currently not
  // planning on supporting. And it is even possible that this would work
  // on IA64, but for lack of actual hardware, I cannot test.
  static_assert(sizeof(void*) == sizeof(intptr_t),
                "pointer types and intptr_t must be exactly the same size");

  // TODO(nedeljko): Enable use of more than six parameters on architectures
  //                 where that makes sense.
#if defined(__mips__)
  const intptr_t args[8] = {p0, p1, p2, p3, p4, p5, p6, p7};
#else
  DCHECK_EQ(p6, 0) << " Support for syscalls with more than six arguments not "
                      "added for this architecture";
  DCHECK_EQ(p7, 0) << " Support for syscalls with more than six arguments not "
                      "added for this architecture";
  const intptr_t args[6] = {p0, p1, p2, p3, p4, p5};
#endif  // defined(__mips__)

// Invoke our file-scope assembly code. The constraints have been picked
// carefully to match what the rest of the assembly code expects in input,
// output, and clobbered registers.
#if defined(__i386__)
  intptr_t ret = nr;
  asm volatile(
      "call SyscallAsm\n"
      // N.B. These are not the calling conventions normally used by the ABI.
      : "=a"(ret)
      : "0"(ret), "D"(args)
      : "cc", "esp", "memory", "ecx", "edx");
#elif defined(__x86_64__)
  intptr_t ret = SyscallAsm(nr, args);
#elif defined(__arm__)
  intptr_t ret;
  {
    register intptr_t inout __asm__("r0") = nr;
    register const intptr_t* data __asm__("r6") = args;
    asm volatile(
        "bl SyscallAsm\n"
        // N.B. These are not the calling conventions normally used by the ABI.
        : "=r"(inout)
        : "0"(inout), "r"(data)
        : "cc",
          "lr",
          "memory",
          "r1",
          "r2",
          "r3",
          "r4",
          "r5"
#if !defined(__thumb__)
          // In thumb mode, we cannot use "r7" as a general purpose register, as
          // it is our frame pointer. We have to manually manage and preserve
          // it.
          // In ARM mode, we have a dedicated frame pointer register and "r7" is
          // thus available as a general purpose register. We don't preserve it,
          // but instead mark it as clobbered.
          ,
          "r7"
#endif  // !defined(__thumb__)
        );
    ret = inout;
  }
#elif defined(__mips__)
  intptr_t err_status;
  intptr_t ret = Syscall::SandboxSyscallRaw(nr, args, &err_status);

  if (err_status) {
    // On error, MIPS returns errno from syscall instead of -errno.
    // The purpose of this negation is for SandboxSyscall() to behave
    // more like it would on other architectures.
    ret = -ret;
  }
#elif defined(__aarch64__)
  intptr_t ret;
  {
    register intptr_t inout __asm__("x0") = nr;
    register const intptr_t* data __asm__("x6") = args;
    asm volatile("bl SyscallAsm\n"
                 : "=r"(inout)
                 : "0"(inout), "r"(data)
                 : "memory", "x1", "x2", "x3", "x4", "x5", "x8", "x30");
    ret = inout;
  }

#else
#error "Unimplemented architecture"
#endif
  return ret;
}

void Syscall::PutValueInUcontext(intptr_t ret_val, ucontext_t* ctx) {
#if defined(__mips__)
  // Mips ABI states that on error a3 CPU register has non zero value and if
  // there is no error, it should be zero.
  if (ret_val <= -1 && ret_val >= -4095) {
    // |ret_val| followes the Syscall::Call() convention of being -errno on
    // errors. In order to write correct value to return register this sign
    // needs to be changed back.
    ret_val = -ret_val;
    SECCOMP_PARM4(ctx) = 1;
  } else
    SECCOMP_PARM4(ctx) = 0;
#endif
  SECCOMP_RESULT(ctx) = static_cast<greg_t>(ret_val);
}

#if defined(__mips__)
intptr_t Syscall::SandboxSyscallRaw(int nr,
                                    const intptr_t* args,
                                    intptr_t* err_ret) {
  register intptr_t ret __asm__("v0") = nr;
  register intptr_t syscallasm __asm__("t9") = (intptr_t) &SyscallAsm;
  // a3 register becomes non zero on error.
  register intptr_t err_stat __asm__("a3") = 0;
  {
    register const intptr_t* data __asm__("a0") = args;
    asm volatile(
        "jalr $t9\n"
        " nop\n"
        : "=r"(ret), "=r"(err_stat)
        : "0"(ret),
          "r"(data),
          "r"(syscallasm)
          // a2 is in the clober list so inline assembly can not change its
          // value.
        : "memory", "ra", "a2");
  }

  // Set an error status so it can be used outside of this function
  *err_ret = err_stat;

  return ret;
}
#endif  // defined(__mips__)

}  // namespace sandbox