diff options
Diffstat (limited to 'man2/sched_setaffinity.2')
-rw-r--r-- | man2/sched_setaffinity.2 | 427 |
1 files changed, 427 insertions, 0 deletions
diff --git a/man2/sched_setaffinity.2 b/man2/sched_setaffinity.2 new file mode 100644 index 0000000..9389e09 --- /dev/null +++ b/man2/sched_setaffinity.2 @@ -0,0 +1,427 @@ +.\" Copyright (C) 2002 Robert Love +.\" and Copyright (C) 2006, 2015 Michael Kerrisk +.\" +.\" SPDX-License-Identifier: GPL-2.0-or-later +.\" +.\" 2002-11-19 Robert Love <rml@tech9.net> - initial version +.\" 2004-04-20 mtk - fixed description of return value +.\" 2004-04-22 aeb - added glibc prototype history +.\" 2005-05-03 mtk - noted that sched_setaffinity may cause thread +.\" migration and that CPU affinity is a per-thread attribute. +.\" 2006-02-03 mtk -- Major rewrite +.\" 2008-11-12, mtk, removed CPU_*() macro descriptions to a +.\" separate CPU_SET(3) page. +.\" +.TH sched_setaffinity 2 2023-05-03 "Linux man-pages 6.05.01" +.SH NAME +sched_setaffinity, sched_getaffinity \- \ +set and get a thread's CPU affinity mask +.SH LIBRARY +Standard C library +.RI ( libc ", " \-lc ) +.SH SYNOPSIS +.nf +.BR "#define _GNU_SOURCE" " /* See feature_test_macros(7) */" +.B #include <sched.h> +.PP +.BI "int sched_setaffinity(pid_t " pid ", size_t " cpusetsize , +.BI " const cpu_set_t *" mask ); +.BI "int sched_getaffinity(pid_t " pid ", size_t " cpusetsize , +.BI " cpu_set_t *" mask ); +.fi +.SH DESCRIPTION +A thread's CPU affinity mask determines the set of CPUs on which +it is eligible to run. +On a multiprocessor system, setting the CPU affinity mask +can be used to obtain performance benefits. +For example, +by dedicating one CPU to a particular thread +(i.e., setting the affinity mask of that thread to specify a single CPU, +and setting the affinity mask of all other threads to exclude that CPU), +it is possible to ensure maximum execution speed for that thread. +Restricting a thread to run on a single CPU also avoids +the performance cost caused by the cache invalidation that occurs +when a thread ceases to execute on one CPU and then +recommences execution on a different CPU. +.PP +A CPU affinity mask is represented by the +.I cpu_set_t +structure, a "CPU set", pointed to by +.IR mask . +A set of macros for manipulating CPU sets is described in +.BR CPU_SET (3). +.PP +.BR sched_setaffinity () +sets the CPU affinity mask of the thread whose ID is +.I pid +to the value specified by +.IR mask . +If +.I pid +is zero, then the calling thread is used. +The argument +.I cpusetsize +is the length (in bytes) of the data pointed to by +.IR mask . +Normally this argument would be specified as +.IR "sizeof(cpu_set_t)" . +.PP +If the thread specified by +.I pid +is not currently running on one of the CPUs specified in +.IR mask , +then that thread is migrated to one of the CPUs specified in +.IR mask . +.PP +.BR sched_getaffinity () +writes the affinity mask of the thread whose ID is +.I pid +into the +.I cpu_set_t +structure pointed to by +.IR mask . +The +.I cpusetsize +argument specifies the size (in bytes) of +.IR mask . +If +.I pid +is zero, then the mask of the calling thread is returned. +.SH RETURN VALUE +On success, +.BR sched_setaffinity () +and +.BR sched_getaffinity () +return 0 (but see "C library/kernel differences" below, +which notes that the underlying +.BR sched_getaffinity () +differs in its return value). +On failure, \-1 is returned, and +.I errno +is set to indicate the error. +.SH ERRORS +.TP +.B EFAULT +A supplied memory address was invalid. +.TP +.B EINVAL +The affinity bit mask +.I mask +contains no processors that are currently physically on the system +and permitted to the thread according to any restrictions that +may be imposed by +.I cpuset +cgroups or the "cpuset" mechanism described in +.BR cpuset (7). +.TP +.B EINVAL +.RB ( sched_getaffinity () +and, before Linux 2.6.9, +.BR sched_setaffinity ()) +.I cpusetsize +is smaller than the size of the affinity mask used by the kernel. +.TP +.B EPERM +.RB ( sched_setaffinity ()) +The calling thread does not have appropriate privileges. +The caller needs an effective user ID equal to the real user ID +or effective user ID of the thread identified by +.IR pid , +or it must possess the +.B CAP_SYS_NICE +capability in the user namespace of the thread +.IR pid . +.TP +.B ESRCH +The thread whose ID is \fIpid\fP could not be found. +.SH STANDARDS +Linux. +.SH HISTORY +Linux 2.5.8, +glibc 2.3. +.PP +Initially, the glibc interfaces included a +.I cpusetsize +argument, typed as +.IR "unsigned int" . +In glibc 2.3.3, the +.I cpusetsize +argument was removed, but was then restored in glibc 2.3.4, with type +.IR size_t . +.SH NOTES +After a call to +.BR sched_setaffinity (), +the set of CPUs on which the thread will actually run is +the intersection of the set specified in the +.I mask +argument and the set of CPUs actually present on the system. +The system may further restrict the set of CPUs on which the thread +runs if the "cpuset" mechanism described in +.BR cpuset (7) +is being used. +These restrictions on the actual set of CPUs on which the thread +will run are silently imposed by the kernel. +.PP +There are various ways of determining the number of CPUs +available on the system, including: inspecting the contents of +.IR /proc/cpuinfo ; +using +.BR sysconf (3) +to obtain the values of the +.B _SC_NPROCESSORS_CONF +and +.B _SC_NPROCESSORS_ONLN +parameters; and inspecting the list of CPU directories under +.IR /sys/devices/system/cpu/ . +.PP +.BR sched (7) +has a description of the Linux scheduling scheme. +.PP +The affinity mask is a per-thread attribute that can be +adjusted independently for each of the threads in a thread group. +The value returned from a call to +.BR gettid (2) +can be passed in the argument +.IR pid . +Specifying +.I pid +as 0 will set the attribute for the calling thread, +and passing the value returned from a call to +.BR getpid (2) +will set the attribute for the main thread of the thread group. +(If you are using the POSIX threads API, then use +.BR pthread_setaffinity_np (3) +instead of +.BR sched_setaffinity ().) +.PP +The +.I isolcpus +boot option can be used to isolate one or more CPUs at boot time, +so that no processes are scheduled onto those CPUs. +Following the use of this boot option, +the only way to schedule processes onto the isolated CPUs is via +.BR sched_setaffinity () +or the +.BR cpuset (7) +mechanism. +For further information, see the kernel source file +.IR Documentation/admin\-guide/kernel\-parameters.txt . +As noted in that file, +.I isolcpus +is the preferred mechanism of isolating CPUs +(versus the alternative of manually setting the CPU affinity +of all processes on the system). +.PP +A child created via +.BR fork (2) +inherits its parent's CPU affinity mask. +The affinity mask is preserved across an +.BR execve (2). +.SS C library/kernel differences +This manual page describes the glibc interface for the CPU affinity calls. +The actual system call interface is slightly different, with the +.I mask +being typed as +.IR "unsigned long\ *" , +reflecting the fact that the underlying implementation of CPU +sets is a simple bit mask. +.PP +On success, the raw +.BR sched_getaffinity () +system call returns the number of bytes placed copied into the +.I mask +buffer; +this will be the minimum of +.I cpusetsize +and the size (in bytes) of the +.I cpumask_t +data type that is used internally by the kernel to +represent the CPU set bit mask. +.SS Handling systems with large CPU affinity masks +The underlying system calls (which represent CPU masks as bit masks of type +.IR "unsigned long\ *" ) +impose no restriction on the size of the CPU mask. +However, the +.I cpu_set_t +data type used by glibc has a fixed size of 128 bytes, +meaning that the maximum CPU number that can be represented is 1023. +.\" FIXME . See https://sourceware.org/bugzilla/show_bug.cgi?id=15630 +.\" and https://sourceware.org/ml/libc-alpha/2013-07/msg00288.html +If the kernel CPU affinity mask is larger than 1024, +then calls of the form: +.PP +.in +4n +.EX +sched_getaffinity(pid, sizeof(cpu_set_t), &mask); +.EE +.in +.PP +fail with the error +.BR EINVAL , +the error produced by the underlying system call for the case where the +.I mask +size specified in +.I cpusetsize +is smaller than the size of the affinity mask used by the kernel. +(Depending on the system CPU topology, the kernel affinity mask can +be substantially larger than the number of active CPUs in the system.) +.PP +When working on systems with large kernel CPU affinity masks, +one must dynamically allocate the +.I mask +argument (see +.BR CPU_ALLOC (3)). +Currently, the only way to do this is by probing for the size +of the required mask using +.BR sched_getaffinity () +calls with increasing mask sizes (until the call does not fail with the error +.BR EINVAL ). +.PP +Be aware that +.BR CPU_ALLOC (3) +may allocate a slightly larger CPU set than requested +(because CPU sets are implemented as bit masks allocated in units of +.IR sizeof(long) ). +Consequently, +.BR sched_getaffinity () +can set bits beyond the requested allocation size, because the kernel +sees a few additional bits. +Therefore, the caller should iterate over the bits in the returned set, +counting those which are set, and stop upon reaching the value returned by +.BR CPU_COUNT (3) +(rather than iterating over the number of bits +requested to be allocated). +.SH EXAMPLES +The program below creates a child process. +The parent and child then each assign themselves to a specified CPU +and execute identical loops that consume some CPU time. +Before terminating, the parent waits for the child to complete. +The program takes three command-line arguments: +the CPU number for the parent, +the CPU number for the child, +and the number of loop iterations that both processes should perform. +.PP +As the sample runs below demonstrate, the amount of real and CPU time +consumed when running the program will depend on intra-core caching effects +and whether the processes are using the same CPU. +.PP +We first employ +.BR lscpu (1) +to determine that this (x86) +system has two cores, each with two CPUs: +.PP +.in +4n +.EX +$ \fBlscpu | egrep \-i \[aq]core.*:|socket\[aq]\fP +Thread(s) per core: 2 +Core(s) per socket: 2 +Socket(s): 1 +.EE +.in +.PP +We then time the operation of the example program for three cases: +both processes running on the same CPU; +both processes running on different CPUs on the same core; +and both processes running on different CPUs on different cores. +.PP +.in +4n +.EX +$ \fBtime \-p ./a.out 0 0 100000000\fP +real 14.75 +user 3.02 +sys 11.73 +$ \fBtime \-p ./a.out 0 1 100000000\fP +real 11.52 +user 3.98 +sys 19.06 +$ \fBtime \-p ./a.out 0 3 100000000\fP +real 7.89 +user 3.29 +sys 12.07 +.EE +.in +.SS Program source +\& +.\" SRC BEGIN (sched_setaffinity.c) +.EX +#define _GNU_SOURCE +#include <err.h> +#include <sched.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/wait.h> +#include <unistd.h> +\& +int +main(int argc, char *argv[]) +{ + int parentCPU, childCPU; + cpu_set_t set; + unsigned int nloops; +\& + if (argc != 4) { + fprintf(stderr, "Usage: %s parent\-cpu child\-cpu num\-loops\en", + argv[0]); + exit(EXIT_FAILURE); + } +\& + parentCPU = atoi(argv[1]); + childCPU = atoi(argv[2]); + nloops = atoi(argv[3]); +\& + CPU_ZERO(&set); +\& + switch (fork()) { + case \-1: /* Error */ + err(EXIT_FAILURE, "fork"); +\& + case 0: /* Child */ + CPU_SET(childCPU, &set); +\& + if (sched_setaffinity(getpid(), sizeof(set), &set) == \-1) + err(EXIT_FAILURE, "sched_setaffinity"); +\& + for (unsigned int j = 0; j < nloops; j++) + getppid(); +\& + exit(EXIT_SUCCESS); +\& + default: /* Parent */ + CPU_SET(parentCPU, &set); +\& + if (sched_setaffinity(getpid(), sizeof(set), &set) == \-1) + err(EXIT_FAILURE, "sched_setaffinity"); +\& + for (unsigned int j = 0; j < nloops; j++) + getppid(); +\& + wait(NULL); /* Wait for child to terminate */ + exit(EXIT_SUCCESS); + } +} +.EE +.\" SRC END +.SH SEE ALSO +.ad l +.nh +.BR lscpu (1), +.BR nproc (1), +.BR taskset (1), +.BR clone (2), +.BR getcpu (2), +.BR getpriority (2), +.BR gettid (2), +.BR nice (2), +.BR sched_get_priority_max (2), +.BR sched_get_priority_min (2), +.BR sched_getscheduler (2), +.BR sched_setscheduler (2), +.BR setpriority (2), +.BR CPU_SET (3), +.BR get_nprocs (3), +.BR pthread_setaffinity_np (3), +.BR sched_getcpu (3), +.BR capabilities (7), +.BR cpuset (7), +.BR sched (7), +.BR numactl (8) |