From 3d08cd331c1adcf0d917392f7e527b3f00511748 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Fri, 24 May 2024 06:52:22 +0200 Subject: Merging upstream version 6.8. Signed-off-by: Daniel Baumann --- man2/membarrier.2 | 460 ------------------------------------------------------ 1 file changed, 460 deletions(-) delete mode 100644 man2/membarrier.2 (limited to 'man2/membarrier.2') diff --git a/man2/membarrier.2 b/man2/membarrier.2 deleted file mode 100644 index ce5a9de..0000000 --- a/man2/membarrier.2 +++ /dev/null @@ -1,460 +0,0 @@ -'\" t -.\" Copyright 2015-2017 Mathieu Desnoyers -.\" -.\" SPDX-License-Identifier: Linux-man-pages-copyleft -.\" -.TH membarrier 2 2023-10-31 "Linux man-pages 6.7" -.SH NAME -membarrier \- issue memory barriers on a set of threads -.SH LIBRARY -Standard C library -.RI ( libc ", " \-lc ) -.SH SYNOPSIS -.nf -.P -.BR "#include " \ -" /* Definition of " MEMBARRIER_* " constants */" -.BR "#include " " /* Definition of " SYS_* " constants */" -.B #include -.P -.BI "int syscall(SYS_membarrier, int " cmd ", unsigned int " flags \ -", int " cpu_id ); -.fi -.P -.IR Note : -glibc provides no wrapper for -.BR membarrier (), -necessitating the use of -.BR syscall (2). -.SH DESCRIPTION -The -.BR membarrier () -system call helps reducing the overhead of the memory barrier -instructions required to order memory accesses on multi-core systems. -However, this system call is heavier than a memory barrier, so using it -effectively is -.I not -as simple as replacing memory barriers with this -system call, but requires understanding of the details below. -.P -Use of memory barriers needs to be done taking into account that a -memory barrier always needs to be either matched with its memory barrier -counterparts, or that the architecture's memory model doesn't require the -matching barriers. -.P -There are cases where one side of the matching barriers (which we will -refer to as "fast side") is executed much more often than the other -(which we will refer to as "slow side"). -This is a prime target for the use of -.BR membarrier (). -The key idea is to replace, for these matching -barriers, the fast-side memory barriers by simple compiler barriers, -for example: -.P -.in +4n -.EX -asm volatile ("" : : : "memory") -.EE -.in -.P -and replace the slow-side memory barriers by calls to -.BR membarrier (). -.P -This will add overhead to the slow side, and remove overhead from the -fast side, thus resulting in an overall performance increase as long as -the slow side is infrequent enough that the overhead of the -.BR membarrier () -calls does not outweigh the performance gain on the fast side. -.P -The -.I cmd -argument is one of the following: -.TP -.BR MEMBARRIER_CMD_QUERY " (since Linux 4.3)" -Query the set of supported commands. -The return value of the call is a bit mask of supported -commands. -.BR MEMBARRIER_CMD_QUERY , -which has the value 0, -is not itself included in this bit mask. -This command is always supported (on kernels where -.BR membarrier () -is provided). -.TP -.BR MEMBARRIER_CMD_GLOBAL " (since Linux 4.16)" -Ensure that all threads from all processes on the system pass through a -state where all memory accesses to user-space addresses match program -order between entry to and return from the -.BR membarrier () -system call. -All threads on the system are targeted by this command. -.TP -.BR MEMBARRIER_CMD_GLOBAL_EXPEDITED " (since Linux 4.16)" -Execute a memory barrier on all running threads of all processes that -previously registered with -.BR MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED . -.IP -Upon return from the system call, the calling thread has a guarantee that all -running threads have passed through a state where all memory accesses to -user-space addresses match program order between entry to and return -from the system call (non-running threads are de facto in such a state). -This guarantee is provided only for the threads of processes that -previously registered with -.BR MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED . -.IP -Given that registration is about the intent to receive the barriers, it -is valid to invoke -.B MEMBARRIER_CMD_GLOBAL_EXPEDITED -from a process that has not employed -.BR MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED . -.IP -The "expedited" commands complete faster than the non-expedited ones; -they never block, but have the downside of causing extra overhead. -.TP -.BR MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED " (since Linux 4.16)" -Register the process's intent to receive -.B MEMBARRIER_CMD_GLOBAL_EXPEDITED -memory barriers. -.TP -.BR MEMBARRIER_CMD_PRIVATE_EXPEDITED " (since Linux 4.14)" -Execute a memory barrier on each running thread belonging to the same -process as the calling thread. -.IP -Upon return from the system call, the calling -thread has a guarantee that all its running thread siblings have passed -through a state where all memory accesses to user-space addresses match -program order between entry to and return from the system call -(non-running threads are de facto in such a state). -This guarantee is provided only for threads in -the same process as the calling thread. -.IP -The "expedited" commands complete faster than the non-expedited ones; -they never block, but have the downside of causing extra overhead. -.IP -A process must register its intent to use the private -expedited command prior to using it. -.TP -.BR MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED " (since Linux 4.14)" -Register the process's intent to use -.BR MEMBARRIER_CMD_PRIVATE_EXPEDITED . -.TP -.BR MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE " (since Linux 4.16)" -In addition to providing the memory ordering guarantees described in -.BR MEMBARRIER_CMD_PRIVATE_EXPEDITED , -upon return from system call the calling thread has a guarantee that all its -running thread siblings have executed a core serializing instruction. -This guarantee is provided only for threads in -the same process as the calling thread. -.IP -The "expedited" commands complete faster than the non-expedited ones, -they never block, but have the downside of causing extra overhead. -.IP -A process must register its intent to use the private expedited sync -core command prior to using it. -.TP -.BR MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE " (since Linux 4.16)" -Register the process's intent to use -.BR MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE . -.TP -.BR MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ " (since Linux 5.10)" -Ensure the caller thread, upon return from system call, that all its -running thread siblings have any currently running rseq critical sections -restarted if -.I flags -parameter is 0; if -.I flags -parameter is -.BR MEMBARRIER_CMD_FLAG_CPU , -then this operation is performed only on CPU indicated by -.IR cpu_id . -This guarantee is provided only for threads in -the same process as the calling thread. -.IP -RSEQ membarrier is only available in the "private expedited" form. -.IP -A process must register its intent to use the private expedited rseq -command prior to using it. -.TP -.BR MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ " (since Linux 5.10)" -Register the process's intent to use -.BR MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ . -.TP -.BR MEMBARRIER_CMD_SHARED " (since Linux 4.3)" -This is an alias for -.B MEMBARRIER_CMD_GLOBAL -that exists for header backward compatibility. -.P -The -.I flags -argument must be specified as 0 unless the command is -.BR MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ , -in which case -.I flags -can be either 0 or -.BR MEMBARRIER_CMD_FLAG_CPU . -.P -The -.I cpu_id -argument is ignored unless -.I flags -is -.BR MEMBARRIER_CMD_FLAG_CPU , -in which case it must specify the CPU targeted by this membarrier -command. -.P -All memory accesses performed in program order from each targeted thread -are guaranteed to be ordered with respect to -.BR membarrier (). -.P -If we use the semantic -.I barrier() -to represent a compiler barrier forcing memory -accesses to be performed in program order across the barrier, and -.I smp_mb() -to represent explicit memory barriers forcing full memory -ordering across the barrier, we have the following ordering table for -each pairing of -.IR barrier() , -.BR membarrier (), -and -.IR smp_mb() . -The pair ordering is detailed as (O: ordered, X: not ordered): -.P -.RS -.TS -l c c c. -\& barrier() smp_mb() membarrier() -barrier() X X O -smp_mb() X O O -membarrier() O O O -.TE -.RE -.SH RETURN VALUE -On success, the -.B MEMBARRIER_CMD_QUERY -operation returns a bit mask of supported commands, and the -.BR MEMBARRIER_CMD_GLOBAL , -.BR MEMBARRIER_CMD_GLOBAL_EXPEDITED , -.BR MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED , -.BR MEMBARRIER_CMD_PRIVATE_EXPEDITED , -.BR MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED , -.BR MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE , -and -.B MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE -operations return zero. -On error, \-1 is returned, -and -.I errno -is set to indicate the error. -.P -For a given command, with -.I flags -set to 0, this system call is -guaranteed to always return the same value until reboot. -Further calls with the same arguments will lead to the same result. -Therefore, with -.I flags -set to 0, error handling is required only for the first call to -.BR membarrier (). -.SH ERRORS -.TP -.B EINVAL -.I cmd -is invalid, or -.I flags -is nonzero, or the -.B MEMBARRIER_CMD_GLOBAL -command is disabled because the -.I nohz_full -CPU parameter has been set, or the -.B MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE -and -.B MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE -commands are not implemented by the architecture. -.TP -.B ENOSYS -The -.BR membarrier () -system call is not implemented by this kernel. -.TP -.B EPERM -The current process was not registered prior to using private expedited -commands. -.SH STANDARDS -Linux. -.SH HISTORY -Linux 4.3. -.P -Before Linux 5.10, the prototype was: -.P -.in +4n -.EX -.BI "int membarrier(int " cmd ", int " flags ); -.EE -.in -.SH NOTES -A memory barrier instruction is part of the instruction set of -architectures with weakly ordered memory models. -It orders memory -accesses prior to the barrier and after the barrier with respect to -matching barriers on other cores. -For instance, a load fence can order -loads prior to and following that fence with respect to stores ordered -by store fences. -.P -Program order is the order in which instructions are ordered in the -program assembly code. -.P -Examples where -.BR membarrier () -can be useful include implementations -of Read-Copy-Update libraries and garbage collectors. -.SH EXAMPLES -Assuming a multithreaded application where "fast_path()" is executed -very frequently, and where "slow_path()" is executed infrequently, the -following code (x86) can be transformed using -.BR membarrier (): -.P -.in +4n -.\" SRC BEGIN (membarrier.c) -.EX -#include -\& -static volatile int a, b; -\& -static void -fast_path(int *read_b) -{ - a = 1; - asm volatile ("mfence" : : : "memory"); - *read_b = b; -} -\& -static void -slow_path(int *read_a) -{ - b = 1; - asm volatile ("mfence" : : : "memory"); - *read_a = a; -} -\& -int -main(void) -{ - int read_a, read_b; -\& - /* - * Real applications would call fast_path() and slow_path() - * from different threads. Call those from main() to keep - * this example short. - */ -\& - slow_path(&read_a); - fast_path(&read_b); -\& - /* - * read_b == 0 implies read_a == 1 and - * read_a == 0 implies read_b == 1. - */ -\& - if (read_b == 0 && read_a == 0) - abort(); -\& - exit(EXIT_SUCCESS); -} -.EE -.\" SRC END -.in -.P -The code above transformed to use -.BR membarrier () -becomes: -.P -.in +4n -.EX -#define _GNU_SOURCE -#include -#include -#include -#include -#include -\& -static volatile int a, b; -\& -static int -membarrier(int cmd, unsigned int flags, int cpu_id) -{ - return syscall(__NR_membarrier, cmd, flags, cpu_id); -} -\& -static int -init_membarrier(void) -{ - int ret; -\& - /* Check that membarrier() is supported. */ -\& - ret = membarrier(MEMBARRIER_CMD_QUERY, 0, 0); - if (ret < 0) { - perror("membarrier"); - return \-1; - } -\& - if (!(ret & MEMBARRIER_CMD_GLOBAL)) { - fprintf(stderr, - "membarrier does not support MEMBARRIER_CMD_GLOBAL\en"); - return \-1; - } -\& - return 0; -} -\& -static void -fast_path(int *read_b) -{ - a = 1; - asm volatile ("" : : : "memory"); - *read_b = b; -} -\& -static void -slow_path(int *read_a) -{ - b = 1; - membarrier(MEMBARRIER_CMD_GLOBAL, 0, 0); - *read_a = a; -} -\& -int -main(int argc, char *argv[]) -{ - int read_a, read_b; -\& - if (init_membarrier()) - exit(EXIT_FAILURE); -\& - /* - * Real applications would call fast_path() and slow_path() - * from different threads. Call those from main() to keep - * this example short. - */ -\& - slow_path(&read_a); - fast_path(&read_b); -\& - /* - * read_b == 0 implies read_a == 1 and - * read_a == 0 implies read_b == 1. - */ -\& - if (read_b == 0 && read_a == 0) - abort(); -\& - exit(EXIT_SUCCESS); -} -.EE -.in -.\" .SH SEE ALSO -.\" FIXME See if the following syscalls make it into Linux 4.15 or later -.\" .BR cpu_opv (2), -.\" .BR rseq (2) -- cgit v1.2.3