summaryrefslogtreecommitdiffstats
path: root/man2/syscall.2
diff options
context:
space:
mode:
Diffstat (limited to 'man2/syscall.2')
-rw-r--r--man2/syscall.2367
1 files changed, 367 insertions, 0 deletions
diff --git a/man2/syscall.2 b/man2/syscall.2
new file mode 100644
index 0000000..43f054a
--- /dev/null
+++ b/man2/syscall.2
@@ -0,0 +1,367 @@
+'\" t
+.\" Copyright (c) 1980, 1991, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" SPDX-License-Identifier: BSD-4-Clause-UC
+.\"
+.\" @(#)syscall.2 8.1 (Berkeley) 6/16/93
+.\"
+.\"
+.\" 2002-03-20 Christoph Hellwig <hch@infradead.org>
+.\" - adopted for Linux
+.\" 2015-01-17, Kees Cook <keescook@chromium.org>
+.\" Added mips and arm64.
+.\"
+.TH syscall 2 2023-05-03 "Linux man-pages 6.05.01"
+.SH NAME
+syscall \- indirect system call
+.SH LIBRARY
+Standard C library
+.RI ( libc ", " \-lc )
+.SH SYNOPSIS
+.nf
+.BR "#include <sys/syscall.h>" " /* Definition of " SYS_* " constants */"
+.B #include <unistd.h>
+.PP
+.BI "long syscall(long " number ", ...);"
+.fi
+.PP
+.RS -4
+Feature Test Macro Requirements for glibc (see
+.BR feature_test_macros (7)):
+.RE
+.PP
+.BR syscall ():
+.nf
+ Since glibc 2.19:
+ _DEFAULT_SOURCE
+ Before glibc 2.19:
+ _BSD_SOURCE || _SVID_SOURCE
+.fi
+.SH DESCRIPTION
+.BR syscall ()
+is a small library function that invokes
+the system call whose assembly language
+interface has the specified
+.I number
+with the specified arguments.
+Employing
+.BR syscall ()
+is useful, for example,
+when invoking a system call that has no wrapper function in the C library.
+.PP
+.BR syscall ()
+saves CPU registers before making the system call,
+restores the registers upon return from the system call,
+and stores any error returned by the system call in
+.BR errno (3).
+.PP
+Symbolic constants for system call numbers can be found in the header file
+.IR <sys/syscall.h> .
+.SH RETURN VALUE
+The return value is defined by the system call being invoked.
+In general, a 0 return value indicates success.
+A \-1 return value indicates an error,
+and an error number is stored in
+.IR errno .
+.SH ERRORS
+.TP
+.B ENOSYS
+The requested system call number is not implemented.
+.PP
+Other errors are specific to the invoked system call.
+.SH NOTES
+.BR syscall ()
+first appeared in
+4BSD.
+.SS Architecture-specific requirements
+Each architecture ABI has its own requirements on how
+system call arguments are passed to the kernel.
+For system calls that have a glibc wrapper (e.g., most system calls),
+glibc handles the details of copying arguments to the right registers
+in a manner suitable for the architecture.
+However, when using
+.BR syscall ()
+to make a system call,
+the caller might need to handle architecture-dependent details;
+this requirement is most commonly encountered on certain 32-bit architectures.
+.PP
+For example, on the ARM architecture Embedded ABI (EABI), a
+64-bit value (e.g.,
+.IR "long long" )
+must be aligned to an even register pair.
+Thus, using
+.BR syscall ()
+instead of the wrapper provided by glibc,
+the
+.BR readahead (2)
+system call would be invoked as follows on the ARM architecture with the EABI
+in little endian mode:
+.PP
+.in +4n
+.EX
+syscall(SYS_readahead, fd, 0,
+ (unsigned int) (offset & 0xFFFFFFFF),
+ (unsigned int) (offset >> 32),
+ count);
+.EE
+.in
+.PP
+Since the offset argument is 64 bits, and the first argument
+.RI ( fd )
+is passed in
+.IR r0 ,
+the caller must manually split and align the 64-bit value
+so that it is passed in the
+.IR r2 / r3
+register pair.
+That means inserting a dummy value into
+.I r1
+(the second argument of 0).
+Care also must be taken so that the split follows endian conventions
+(according to the C ABI for the platform).
+.PP
+Similar issues can occur on MIPS with the O32 ABI,
+on PowerPC and parisc with the 32-bit ABI, and on Xtensa.
+.\" Mike Frysinger: this issue ends up forcing MIPS
+.\" O32 to take 7 arguments to syscall()
+.PP
+.\" See arch/parisc/kernel/sys_parisc.c.
+Note that while the parisc C ABI also uses aligned register pairs,
+it uses a shim layer to hide the issue from user space.
+.PP
+The affected system calls are
+.BR fadvise64_64 (2),
+.BR ftruncate64 (2),
+.BR posix_fadvise (2),
+.BR pread64 (2),
+.BR pwrite64 (2),
+.BR readahead (2),
+.BR sync_file_range (2),
+and
+.BR truncate64 (2).
+.PP
+.\" You need to look up the syscalls directly in the kernel source to see if
+.\" they should be in this list. For example, look at fs/read_write.c and
+.\" the function signatures that do:
+.\" ..., unsigned long, pos_l, unsigned long, pos_h, ...
+.\" If they use off_t, then they most likely do not belong in this list.
+This does not affect syscalls that manually split and assemble 64-bit values
+such as
+.BR _llseek (2),
+.BR preadv (2),
+.BR preadv2 (2),
+.BR pwritev (2),
+and
+.BR pwritev2 (2).
+Welcome to the wonderful world of historical baggage.
+.SS Architecture calling conventions
+Every architecture has its own way of invoking and passing arguments to the
+kernel.
+The details for various architectures are listed in the two tables below.
+.PP
+The first table lists the instruction used to transition to kernel mode
+(which might not be the fastest or best way to transition to the kernel,
+so you might have to refer to
+.BR vdso (7)),
+the register used to indicate the system call number,
+the register(s) used to return the system call result,
+and the register used to signal an error.
+.if t \{\
+.ft CW
+\}
+.TS
+l2 l2 l2 l2 l1 l2 l.
+Arch/ABI Instruction System Ret Ret Error Notes
+ call # val val2
+_
+alpha callsys v0 v0 a4 a3 1, 6
+arc trap0 r8 r0 - -
+arm/OABI swi NR - r0 - - 2
+arm/EABI swi 0x0 r7 r0 r1 -
+arm64 svc #0 w8 x0 x1 -
+blackfin excpt 0x0 P0 R0 - -
+i386 int $0x80 eax eax edx -
+ia64 break 0x100000 r15 r8 r9 r10 1, 6
+loongarch syscall 0 a7 a0 - -
+m68k trap #0 d0 d0 - -
+microblaze brki r14,8 r12 r3 - -
+mips syscall v0 v0 v1 a3 1, 6
+nios2 trap r2 r2 - r7
+parisc ble 0x100(%sr2, %r0) r20 r28 - -
+powerpc sc r0 r3 - r0 1
+powerpc64 sc r0 r3 - cr0.SO 1
+riscv ecall a7 a0 a1 -
+s390 svc 0 r1 r2 r3 - 3
+s390x svc 0 r1 r2 r3 - 3
+superh trapa #31 r3 r0 r1 - 4, 6
+sparc/32 t 0x10 g1 o0 o1 psr/csr 1, 6
+sparc/64 t 0x6d g1 o0 o1 psr/csr 1, 6
+tile swint1 R10 R00 - R01 1
+x86-64 syscall rax rax rdx - 5
+x32 syscall rax rax rdx - 5
+xtensa syscall a2 a2 - -
+.TE
+.PP
+Notes:
+.IP \[bu] 3
+On a few architectures,
+a register is used as a boolean
+(0 indicating no error, and \-1 indicating an error) to signal that the
+system call failed.
+The actual error value is still contained in the return register.
+On sparc, the carry bit
+.RI ( csr )
+in the processor status register
+.RI ( psr )
+is used instead of a full register.
+On powerpc64, the summary overflow bit
+.RI ( SO )
+in field 0 of the condition register
+.RI ( cr0 )
+is used.
+.IP \[bu]
+.I NR
+is the system call number.
+.IP \[bu]
+For s390 and s390x,
+.I NR
+(the system call number) may be passed directly with
+.I "svc\ NR"
+if it is less than 256.
+.IP \[bu]
+On SuperH additional trap numbers are supported for historic reasons, but
+.BR trapa #31
+is the recommended "unified" ABI.
+.IP \[bu]
+The x32 ABI shares syscall table with x86-64 ABI, but there are some
+nuances:
+.RS
+.IP \[bu] 3
+In order to indicate that a system call is called under the x32 ABI,
+an additional bit,
+.BR __X32_SYSCALL_BIT ,
+is bitwise ORed with the system call number.
+The ABI used by a process affects some process behaviors,
+including signal handling or system call restarting.
+.IP \[bu]
+Since x32 has different sizes for
+.I long
+and pointer types, layouts of some (but not all;
+.I struct timeval
+or
+.I struct rlimit
+are 64-bit, for example) structures are different.
+In order to handle this,
+additional system calls are added to the system call table,
+starting from number 512
+(without the
+.BR __X32_SYSCALL_BIT ).
+For example,
+.B __NR_readv
+is defined as 19 for the x86-64 ABI and as
+.IR __X32_SYSCALL_BIT " | " \fB515\fP
+for the x32 ABI.
+Most of these additional system calls are actually identical
+to the system calls used for providing i386 compat.
+There are some notable exceptions, however, such as
+.BR preadv2 (2),
+which uses
+.I struct iovec
+entities with 4-byte pointers and sizes ("compat_iovec" in kernel terms),
+but passes an 8-byte
+.I pos
+argument in a single register and not two, as is done in every other ABI.
+.RE
+.IP \[bu]
+Some architectures
+(namely, Alpha, IA-64, MIPS, SuperH, sparc/32, and sparc/64)
+use an additional register ("Retval2" in the above table)
+to pass back a second return value from the
+.BR pipe (2)
+system call;
+Alpha uses this technique in the architecture-specific
+.BR getxpid (2),
+.BR getxuid (2),
+and
+.BR getxgid (2)
+system calls as well.
+Other architectures do not use the second return value register
+in the system call interface, even if it is defined in the System V ABI.
+.if t \{\
+.in
+.ft P
+\}
+.PP
+The second table shows the registers used to pass the system call arguments.
+.if t \{\
+.ft CW
+\}
+.TS
+l l2 l2 l2 l2 l2 l2 l2 l.
+Arch/ABI arg1 arg2 arg3 arg4 arg5 arg6 arg7 Notes
+_
+alpha a0 a1 a2 a3 a4 a5 -
+arc r0 r1 r2 r3 r4 r5 -
+arm/OABI r0 r1 r2 r3 r4 r5 r6
+arm/EABI r0 r1 r2 r3 r4 r5 r6
+arm64 x0 x1 x2 x3 x4 x5 -
+blackfin R0 R1 R2 R3 R4 R5 -
+i386 ebx ecx edx esi edi ebp -
+ia64 out0 out1 out2 out3 out4 out5 -
+loongarch a0 a1 a2 a3 a4 a5 a6
+m68k d1 d2 d3 d4 d5 a0 -
+microblaze r5 r6 r7 r8 r9 r10 -
+mips/o32 a0 a1 a2 a3 - - - 1
+mips/n32,64 a0 a1 a2 a3 a4 a5 -
+nios2 r4 r5 r6 r7 r8 r9 -
+parisc r26 r25 r24 r23 r22 r21 -
+powerpc r3 r4 r5 r6 r7 r8 r9
+powerpc64 r3 r4 r5 r6 r7 r8 -
+riscv a0 a1 a2 a3 a4 a5 -
+s390 r2 r3 r4 r5 r6 r7 -
+s390x r2 r3 r4 r5 r6 r7 -
+superh r4 r5 r6 r7 r0 r1 r2
+sparc/32 o0 o1 o2 o3 o4 o5 -
+sparc/64 o0 o1 o2 o3 o4 o5 -
+tile R00 R01 R02 R03 R04 R05 -
+x86-64 rdi rsi rdx r10 r8 r9 -
+x32 rdi rsi rdx r10 r8 r9 -
+xtensa a6 a3 a4 a5 a8 a9 -
+.TE
+.PP
+Notes:
+.IP \[bu] 3
+The mips/o32 system call convention passes
+arguments 5 through 8 on the user stack.
+.if t \{\
+.in
+.ft P
+\}
+.PP
+Note that these tables don't cover the entire calling convention\[em]some
+architectures may indiscriminately clobber other registers not listed here.
+.SH EXAMPLES
+.\" SRC BEGIN (syscall.c)
+.EX
+#define _GNU_SOURCE
+#include <signal.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+\&
+int
+main(void)
+{
+ pid_t tid;
+\&
+ tid = syscall(SYS_gettid);
+ syscall(SYS_tgkill, getpid(), tid, SIGHUP);
+}
+.EE
+.\" SRC END
+.SH SEE ALSO
+.BR _syscall (2),
+.BR intro (2),
+.BR syscalls (2),
+.BR errno (3),
+.BR vdso (7)