summaryrefslogtreecommitdiffstats
path: root/man2/membarrier.2
diff options
context:
space:
mode:
Diffstat (limited to 'man2/membarrier.2')
-rw-r--r--man2/membarrier.2460
1 files changed, 0 insertions, 460 deletions
diff --git a/man2/membarrier.2 b/man2/membarrier.2
deleted file mode 100644
index ce5a9de..0000000
--- a/man2/membarrier.2
+++ /dev/null
@@ -1,460 +0,0 @@
-'\" t
-.\" Copyright 2015-2017 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
-.\"
-.\" SPDX-License-Identifier: Linux-man-pages-copyleft
-.\"
-.TH membarrier 2 2023-10-31 "Linux man-pages 6.7"
-.SH NAME
-membarrier \- issue memory barriers on a set of threads
-.SH LIBRARY
-Standard C library
-.RI ( libc ", " \-lc )
-.SH SYNOPSIS
-.nf
-.P
-.BR "#include <linux/membarrier.h>" \
-" /* Definition of " MEMBARRIER_* " constants */"
-.BR "#include <sys/syscall.h>" " /* Definition of " SYS_* " constants */"
-.B #include <unistd.h>
-.P
-.BI "int syscall(SYS_membarrier, int " cmd ", unsigned int " flags \
-", int " cpu_id );
-.fi
-.P
-.IR Note :
-glibc provides no wrapper for
-.BR membarrier (),
-necessitating the use of
-.BR syscall (2).
-.SH DESCRIPTION
-The
-.BR membarrier ()
-system call helps reducing the overhead of the memory barrier
-instructions required to order memory accesses on multi-core systems.
-However, this system call is heavier than a memory barrier, so using it
-effectively is
-.I not
-as simple as replacing memory barriers with this
-system call, but requires understanding of the details below.
-.P
-Use of memory barriers needs to be done taking into account that a
-memory barrier always needs to be either matched with its memory barrier
-counterparts, or that the architecture's memory model doesn't require the
-matching barriers.
-.P
-There are cases where one side of the matching barriers (which we will
-refer to as "fast side") is executed much more often than the other
-(which we will refer to as "slow side").
-This is a prime target for the use of
-.BR membarrier ().
-The key idea is to replace, for these matching
-barriers, the fast-side memory barriers by simple compiler barriers,
-for example:
-.P
-.in +4n
-.EX
-asm volatile ("" : : : "memory")
-.EE
-.in
-.P
-and replace the slow-side memory barriers by calls to
-.BR membarrier ().
-.P
-This will add overhead to the slow side, and remove overhead from the
-fast side, thus resulting in an overall performance increase as long as
-the slow side is infrequent enough that the overhead of the
-.BR membarrier ()
-calls does not outweigh the performance gain on the fast side.
-.P
-The
-.I cmd
-argument is one of the following:
-.TP
-.BR MEMBARRIER_CMD_QUERY " (since Linux 4.3)"
-Query the set of supported commands.
-The return value of the call is a bit mask of supported
-commands.
-.BR MEMBARRIER_CMD_QUERY ,
-which has the value 0,
-is not itself included in this bit mask.
-This command is always supported (on kernels where
-.BR membarrier ()
-is provided).
-.TP
-.BR MEMBARRIER_CMD_GLOBAL " (since Linux 4.16)"
-Ensure that all threads from all processes on the system pass through a
-state where all memory accesses to user-space addresses match program
-order between entry to and return from the
-.BR membarrier ()
-system call.
-All threads on the system are targeted by this command.
-.TP
-.BR MEMBARRIER_CMD_GLOBAL_EXPEDITED " (since Linux 4.16)"
-Execute a memory barrier on all running threads of all processes that
-previously registered with
-.BR MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED .
-.IP
-Upon return from the system call, the calling thread has a guarantee that all
-running threads have passed through a state where all memory accesses to
-user-space addresses match program order between entry to and return
-from the system call (non-running threads are de facto in such a state).
-This guarantee is provided only for the threads of processes that
-previously registered with
-.BR MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED .
-.IP
-Given that registration is about the intent to receive the barriers, it
-is valid to invoke
-.B MEMBARRIER_CMD_GLOBAL_EXPEDITED
-from a process that has not employed
-.BR MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED .
-.IP
-The "expedited" commands complete faster than the non-expedited ones;
-they never block, but have the downside of causing extra overhead.
-.TP
-.BR MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED " (since Linux 4.16)"
-Register the process's intent to receive
-.B MEMBARRIER_CMD_GLOBAL_EXPEDITED
-memory barriers.
-.TP
-.BR MEMBARRIER_CMD_PRIVATE_EXPEDITED " (since Linux 4.14)"
-Execute a memory barrier on each running thread belonging to the same
-process as the calling thread.
-.IP
-Upon return from the system call, the calling
-thread has a guarantee that all its running thread siblings have passed
-through a state where all memory accesses to user-space addresses match
-program order between entry to and return from the system call
-(non-running threads are de facto in such a state).
-This guarantee is provided only for threads in
-the same process as the calling thread.
-.IP
-The "expedited" commands complete faster than the non-expedited ones;
-they never block, but have the downside of causing extra overhead.
-.IP
-A process must register its intent to use the private
-expedited command prior to using it.
-.TP
-.BR MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED " (since Linux 4.14)"
-Register the process's intent to use
-.BR MEMBARRIER_CMD_PRIVATE_EXPEDITED .
-.TP
-.BR MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE " (since Linux 4.16)"
-In addition to providing the memory ordering guarantees described in
-.BR MEMBARRIER_CMD_PRIVATE_EXPEDITED ,
-upon return from system call the calling thread has a guarantee that all its
-running thread siblings have executed a core serializing instruction.
-This guarantee is provided only for threads in
-the same process as the calling thread.
-.IP
-The "expedited" commands complete faster than the non-expedited ones,
-they never block, but have the downside of causing extra overhead.
-.IP
-A process must register its intent to use the private expedited sync
-core command prior to using it.
-.TP
-.BR MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE " (since Linux 4.16)"
-Register the process's intent to use
-.BR MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE .
-.TP
-.BR MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ " (since Linux 5.10)"
-Ensure the caller thread, upon return from system call, that all its
-running thread siblings have any currently running rseq critical sections
-restarted if
-.I flags
-parameter is 0; if
-.I flags
-parameter is
-.BR MEMBARRIER_CMD_FLAG_CPU ,
-then this operation is performed only on CPU indicated by
-.IR cpu_id .
-This guarantee is provided only for threads in
-the same process as the calling thread.
-.IP
-RSEQ membarrier is only available in the "private expedited" form.
-.IP
-A process must register its intent to use the private expedited rseq
-command prior to using it.
-.TP
-.BR MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ " (since Linux 5.10)"
-Register the process's intent to use
-.BR MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ .
-.TP
-.BR MEMBARRIER_CMD_SHARED " (since Linux 4.3)"
-This is an alias for
-.B MEMBARRIER_CMD_GLOBAL
-that exists for header backward compatibility.
-.P
-The
-.I flags
-argument must be specified as 0 unless the command is
-.BR MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ ,
-in which case
-.I flags
-can be either 0 or
-.BR MEMBARRIER_CMD_FLAG_CPU .
-.P
-The
-.I cpu_id
-argument is ignored unless
-.I flags
-is
-.BR MEMBARRIER_CMD_FLAG_CPU ,
-in which case it must specify the CPU targeted by this membarrier
-command.
-.P
-All memory accesses performed in program order from each targeted thread
-are guaranteed to be ordered with respect to
-.BR membarrier ().
-.P
-If we use the semantic
-.I barrier()
-to represent a compiler barrier forcing memory
-accesses to be performed in program order across the barrier, and
-.I smp_mb()
-to represent explicit memory barriers forcing full memory
-ordering across the barrier, we have the following ordering table for
-each pairing of
-.IR barrier() ,
-.BR membarrier (),
-and
-.IR smp_mb() .
-The pair ordering is detailed as (O: ordered, X: not ordered):
-.P
-.RS
-.TS
-l c c c.
-\& barrier() smp_mb() membarrier()
-barrier() X X O
-smp_mb() X O O
-membarrier() O O O
-.TE
-.RE
-.SH RETURN VALUE
-On success, the
-.B MEMBARRIER_CMD_QUERY
-operation returns a bit mask of supported commands, and the
-.BR MEMBARRIER_CMD_GLOBAL ,
-.BR MEMBARRIER_CMD_GLOBAL_EXPEDITED ,
-.BR MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED ,
-.BR MEMBARRIER_CMD_PRIVATE_EXPEDITED ,
-.BR MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED ,
-.BR MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE ,
-and
-.B MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE
-operations return zero.
-On error, \-1 is returned,
-and
-.I errno
-is set to indicate the error.
-.P
-For a given command, with
-.I flags
-set to 0, this system call is
-guaranteed to always return the same value until reboot.
-Further calls with the same arguments will lead to the same result.
-Therefore, with
-.I flags
-set to 0, error handling is required only for the first call to
-.BR membarrier ().
-.SH ERRORS
-.TP
-.B EINVAL
-.I cmd
-is invalid, or
-.I flags
-is nonzero, or the
-.B MEMBARRIER_CMD_GLOBAL
-command is disabled because the
-.I nohz_full
-CPU parameter has been set, or the
-.B MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE
-and
-.B MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE
-commands are not implemented by the architecture.
-.TP
-.B ENOSYS
-The
-.BR membarrier ()
-system call is not implemented by this kernel.
-.TP
-.B EPERM
-The current process was not registered prior to using private expedited
-commands.
-.SH STANDARDS
-Linux.
-.SH HISTORY
-Linux 4.3.
-.P
-Before Linux 5.10, the prototype was:
-.P
-.in +4n
-.EX
-.BI "int membarrier(int " cmd ", int " flags );
-.EE
-.in
-.SH NOTES
-A memory barrier instruction is part of the instruction set of
-architectures with weakly ordered memory models.
-It orders memory
-accesses prior to the barrier and after the barrier with respect to
-matching barriers on other cores.
-For instance, a load fence can order
-loads prior to and following that fence with respect to stores ordered
-by store fences.
-.P
-Program order is the order in which instructions are ordered in the
-program assembly code.
-.P
-Examples where
-.BR membarrier ()
-can be useful include implementations
-of Read-Copy-Update libraries and garbage collectors.
-.SH EXAMPLES
-Assuming a multithreaded application where "fast_path()" is executed
-very frequently, and where "slow_path()" is executed infrequently, the
-following code (x86) can be transformed using
-.BR membarrier ():
-.P
-.in +4n
-.\" SRC BEGIN (membarrier.c)
-.EX
-#include <stdlib.h>
-\&
-static volatile int a, b;
-\&
-static void
-fast_path(int *read_b)
-{
- a = 1;
- asm volatile ("mfence" : : : "memory");
- *read_b = b;
-}
-\&
-static void
-slow_path(int *read_a)
-{
- b = 1;
- asm volatile ("mfence" : : : "memory");
- *read_a = a;
-}
-\&
-int
-main(void)
-{
- int read_a, read_b;
-\&
- /*
- * Real applications would call fast_path() and slow_path()
- * from different threads. Call those from main() to keep
- * this example short.
- */
-\&
- slow_path(&read_a);
- fast_path(&read_b);
-\&
- /*
- * read_b == 0 implies read_a == 1 and
- * read_a == 0 implies read_b == 1.
- */
-\&
- if (read_b == 0 && read_a == 0)
- abort();
-\&
- exit(EXIT_SUCCESS);
-}
-.EE
-.\" SRC END
-.in
-.P
-The code above transformed to use
-.BR membarrier ()
-becomes:
-.P
-.in +4n
-.EX
-#define _GNU_SOURCE
-#include <stdlib.h>
-#include <stdio.h>
-#include <unistd.h>
-#include <sys/syscall.h>
-#include <linux/membarrier.h>
-\&
-static volatile int a, b;
-\&
-static int
-membarrier(int cmd, unsigned int flags, int cpu_id)
-{
- return syscall(__NR_membarrier, cmd, flags, cpu_id);
-}
-\&
-static int
-init_membarrier(void)
-{
- int ret;
-\&
- /* Check that membarrier() is supported. */
-\&
- ret = membarrier(MEMBARRIER_CMD_QUERY, 0, 0);
- if (ret < 0) {
- perror("membarrier");
- return \-1;
- }
-\&
- if (!(ret & MEMBARRIER_CMD_GLOBAL)) {
- fprintf(stderr,
- "membarrier does not support MEMBARRIER_CMD_GLOBAL\en");
- return \-1;
- }
-\&
- return 0;
-}
-\&
-static void
-fast_path(int *read_b)
-{
- a = 1;
- asm volatile ("" : : : "memory");
- *read_b = b;
-}
-\&
-static void
-slow_path(int *read_a)
-{
- b = 1;
- membarrier(MEMBARRIER_CMD_GLOBAL, 0, 0);
- *read_a = a;
-}
-\&
-int
-main(int argc, char *argv[])
-{
- int read_a, read_b;
-\&
- if (init_membarrier())
- exit(EXIT_FAILURE);
-\&
- /*
- * Real applications would call fast_path() and slow_path()
- * from different threads. Call those from main() to keep
- * this example short.
- */
-\&
- slow_path(&read_a);
- fast_path(&read_b);
-\&
- /*
- * read_b == 0 implies read_a == 1 and
- * read_a == 0 implies read_b == 1.
- */
-\&
- if (read_b == 0 && read_a == 0)
- abort();
-\&
- exit(EXIT_SUCCESS);
-}
-.EE
-.in
-.\" .SH SEE ALSO
-.\" FIXME See if the following syscalls make it into Linux 4.15 or later
-.\" .BR cpu_opv (2),
-.\" .BR rseq (2)