diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-10 20:49:52 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-10 20:49:52 +0000 |
commit | 55944e5e40b1be2afc4855d8d2baf4b73d1876b5 (patch) | |
tree | 33f869f55a1b149e9b7c2b7e201867ca5dd52992 /src/basic/limits-util.c | |
parent | Initial commit. (diff) | |
download | systemd-55944e5e40b1be2afc4855d8d2baf4b73d1876b5.tar.xz systemd-55944e5e40b1be2afc4855d8d2baf4b73d1876b5.zip |
Adding upstream version 255.4.upstream/255.4
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/basic/limits-util.c')
-rw-r--r-- | src/basic/limits-util.c | 192 |
1 files changed, 192 insertions, 0 deletions
diff --git a/src/basic/limits-util.c b/src/basic/limits-util.c new file mode 100644 index 0000000..9597c4c --- /dev/null +++ b/src/basic/limits-util.c @@ -0,0 +1,192 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ + +#include <unistd.h> + +#include "alloc-util.h" +#include "cgroup-util.h" +#include "limits-util.h" +#include "memory-util.h" +#include "parse-util.h" +#include "process-util.h" +#include "procfs-util.h" +#include "string-util.h" + +uint64_t physical_memory(void) { + _cleanup_free_ char *root = NULL, *value = NULL; + uint64_t mem, lim; + size_t ps; + long sc; + int r; + + /* We return this as uint64_t in case we are running as 32-bit process on a 64-bit kernel with huge amounts of + * memory. + * + * In order to support containers nicely that have a configured memory limit we'll take the minimum of the + * physically reported amount of memory and the limit configured for the root cgroup, if there is any. */ + + sc = sysconf(_SC_PHYS_PAGES); + assert(sc > 0); + + ps = page_size(); + mem = (uint64_t) sc * (uint64_t) ps; + + r = cg_get_root_path(&root); + if (r < 0) { + log_debug_errno(r, "Failed to determine root cgroup, ignoring cgroup memory limit: %m"); + return mem; + } + + r = cg_all_unified(); + if (r < 0) { + log_debug_errno(r, "Failed to determine root unified mode, ignoring cgroup memory limit: %m"); + return mem; + } + if (r > 0) { + r = cg_get_attribute("memory", root, "memory.max", &value); + if (r == -ENOENT) /* Field does not exist on the system's top-level cgroup, hence don't + * complain. (Note that it might exist on our own root though, if we live + * in a cgroup namespace, hence check anyway instead of not even + * trying.) */ + return mem; + if (r < 0) { + log_debug_errno(r, "Failed to read memory.max cgroup attribute, ignoring cgroup memory limit: %m"); + return mem; + } + + if (streq(value, "max")) + return mem; + } else { + r = cg_get_attribute("memory", root, "memory.limit_in_bytes", &value); + if (r < 0) { + log_debug_errno(r, "Failed to read memory.limit_in_bytes cgroup attribute, ignoring cgroup memory limit: %m"); + return mem; + } + } + + r = safe_atou64(value, &lim); + if (r < 0) { + log_debug_errno(r, "Failed to parse cgroup memory limit '%s', ignoring: %m", value); + return mem; + } + if (lim == UINT64_MAX) + return mem; + + /* Make sure the limit is a multiple of our own page size */ + lim /= ps; + lim *= ps; + + return MIN(mem, lim); +} + +uint64_t physical_memory_scale(uint64_t v, uint64_t max) { + uint64_t p, m, ps; + + /* Shortcut two special cases */ + if (v == 0) + return 0; + if (v == max) + return physical_memory(); + + assert(max > 0); + + /* Returns the physical memory size, multiplied by v divided by max. Returns UINT64_MAX on overflow. On success + * the result is a multiple of the page size (rounds down). */ + + ps = page_size(); + assert(ps > 0); + + p = physical_memory() / ps; + assert(p > 0); + + if (v > UINT64_MAX / p) + return UINT64_MAX; + + m = p * v; + m /= max; + + if (m > UINT64_MAX / ps) + return UINT64_MAX; + + return m * ps; +} + +uint64_t system_tasks_max(void) { + uint64_t a = TASKS_MAX, b = TASKS_MAX, c = TASKS_MAX; + _cleanup_free_ char *root = NULL; + int r; + + /* Determine the maximum number of tasks that may run on this system. We check three sources to + * determine this limit: + * + * a) kernel.threads-max sysctl: the maximum number of tasks (threads) the kernel allows. + * + * This puts a direct limit on the number of concurrent tasks. + * + * b) kernel.pid_max sysctl: the maximum PID value. + * + * This limits the numeric range PIDs can take, and thus indirectly also limits the number of + * concurrent threads. It's primarily a compatibility concept: some crappy old code used a signed + * 16-bit type for PIDs, hence the kernel provides a way to ensure the PIDs never go beyond + * INT16_MAX by default. + * + * Also note the weird definition: PIDs assigned will be kept below this value, which means + * the number of tasks that can be created is one lower, as PID 0 is not a valid process ID. + * + * c) pids.max on the root cgroup: the kernel's configured maximum number of tasks. + * + * and then pick the smallest of the three. + * + * By default pid_max is set to much lower values than threads-max, hence the limit people come into + * contact with first, as it's the lowest boundary they need to bump when they want higher number of + * processes. + */ + + r = procfs_get_threads_max(&a); + if (r < 0) + log_debug_errno(r, "Failed to read kernel.threads-max, ignoring: %m"); + + r = procfs_get_pid_max(&b); + if (r < 0) + log_debug_errno(r, "Failed to read kernel.pid_max, ignoring: %m"); + else if (b > 0) + /* Subtract one from pid_max, since PID 0 is not a valid PID */ + b--; + + r = cg_get_root_path(&root); + if (r < 0) + log_debug_errno(r, "Failed to determine cgroup root path, ignoring: %m"); + else { + /* We'll have the "pids.max" attribute on the our root cgroup only if we are in a + * CLONE_NEWCGROUP namespace. On the top-level namespace this attribute is missing, hence + * suppress any message about that */ + r = cg_get_attribute_as_uint64("pids", root, "pids.max", &c); + if (r < 0 && r != -ENODATA) + log_debug_errno(r, "Failed to read pids.max attribute of root cgroup, ignoring: %m"); + } + + return MIN3(a, b, c); +} + +uint64_t system_tasks_max_scale(uint64_t v, uint64_t max) { + uint64_t t, m; + + /* Shortcut two special cases */ + if (v == 0) + return 0; + if (v == max) + return system_tasks_max(); + + assert(max > 0); + + /* Multiply the system's task value by the fraction v/max. Hence, if max==100 this calculates percentages + * relative to the system's maximum number of tasks. Returns UINT64_MAX on overflow. */ + + t = system_tasks_max(); + assert(t > 0); + + if (v > UINT64_MAX / t) /* overflow? */ + return UINT64_MAX; + + m = t * v; + return m / max; +} |