summaryrefslogtreecommitdiffstats
path: root/src/basic/limits-util.c
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 15:35:18 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 15:35:18 +0000
commitb750101eb236130cf056c675997decbac904cc49 (patch)
treea5df1a06754bdd014cb975c051c83b01c9a97532 /src/basic/limits-util.c
parentInitial commit. (diff)
downloadsystemd-b750101eb236130cf056c675997decbac904cc49.tar.xz
systemd-b750101eb236130cf056c675997decbac904cc49.zip
Adding upstream version 252.22.upstream/252.22
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/basic/limits-util.c')
-rw-r--r--src/basic/limits-util.c192
1 files changed, 192 insertions, 0 deletions
diff --git a/src/basic/limits-util.c b/src/basic/limits-util.c
new file mode 100644
index 0000000..35cb066
--- /dev/null
+++ b/src/basic/limits-util.c
@@ -0,0 +1,192 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#include <unistd.h>
+
+#include "alloc-util.h"
+#include "cgroup-util.h"
+#include "limits-util.h"
+#include "memory-util.h"
+#include "parse-util.h"
+#include "process-util.h"
+#include "procfs-util.h"
+#include "string-util.h"
+
+uint64_t physical_memory(void) {
+ _cleanup_free_ char *root = NULL, *value = NULL;
+ uint64_t mem, lim;
+ size_t ps;
+ long sc;
+ int r;
+
+ /* We return this as uint64_t in case we are running as 32bit process on a 64bit kernel with huge amounts of
+ * memory.
+ *
+ * In order to support containers nicely that have a configured memory limit we'll take the minimum of the
+ * physically reported amount of memory and the limit configured for the root cgroup, if there is any. */
+
+ sc = sysconf(_SC_PHYS_PAGES);
+ assert(sc > 0);
+
+ ps = page_size();
+ mem = (uint64_t) sc * (uint64_t) ps;
+
+ r = cg_get_root_path(&root);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to determine root cgroup, ignoring cgroup memory limit: %m");
+ return mem;
+ }
+
+ r = cg_all_unified();
+ if (r < 0) {
+ log_debug_errno(r, "Failed to determine root unified mode, ignoring cgroup memory limit: %m");
+ return mem;
+ }
+ if (r > 0) {
+ r = cg_get_attribute("memory", root, "memory.max", &value);
+ if (r == -ENOENT) /* Field does not exist on the system's top-level cgroup, hence don't
+ * complain. (Note that it might exist on our own root though, if we live
+ * in a cgroup namespace, hence check anyway instead of not even
+ * trying.) */
+ return mem;
+ if (r < 0) {
+ log_debug_errno(r, "Failed to read memory.max cgroup attribute, ignoring cgroup memory limit: %m");
+ return mem;
+ }
+
+ if (streq(value, "max"))
+ return mem;
+ } else {
+ r = cg_get_attribute("memory", root, "memory.limit_in_bytes", &value);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to read memory.limit_in_bytes cgroup attribute, ignoring cgroup memory limit: %m");
+ return mem;
+ }
+ }
+
+ r = safe_atou64(value, &lim);
+ if (r < 0) {
+ log_debug_errno(r, "Failed to parse cgroup memory limit '%s', ignoring: %m", value);
+ return mem;
+ }
+ if (lim == UINT64_MAX)
+ return mem;
+
+ /* Make sure the limit is a multiple of our own page size */
+ lim /= ps;
+ lim *= ps;
+
+ return MIN(mem, lim);
+}
+
+uint64_t physical_memory_scale(uint64_t v, uint64_t max) {
+ uint64_t p, m, ps;
+
+ /* Shortcut two special cases */
+ if (v == 0)
+ return 0;
+ if (v == max)
+ return physical_memory();
+
+ assert(max > 0);
+
+ /* Returns the physical memory size, multiplied by v divided by max. Returns UINT64_MAX on overflow. On success
+ * the result is a multiple of the page size (rounds down). */
+
+ ps = page_size();
+ assert(ps > 0);
+
+ p = physical_memory() / ps;
+ assert(p > 0);
+
+ if (v > UINT64_MAX / p)
+ return UINT64_MAX;
+
+ m = p * v;
+ m /= max;
+
+ if (m > UINT64_MAX / ps)
+ return UINT64_MAX;
+
+ return m * ps;
+}
+
+uint64_t system_tasks_max(void) {
+ uint64_t a = TASKS_MAX, b = TASKS_MAX, c = TASKS_MAX;
+ _cleanup_free_ char *root = NULL;
+ int r;
+
+ /* Determine the maximum number of tasks that may run on this system. We check three sources to
+ * determine this limit:
+ *
+ * a) kernel.threads-max sysctl: the maximum number of tasks (threads) the kernel allows.
+ *
+ * This puts a direct limit on the number of concurrent tasks.
+ *
+ * b) kernel.pid_max sysctl: the maximum PID value.
+ *
+ * This limits the numeric range PIDs can take, and thus indirectly also limits the number of
+ * concurrent threads. It's primarily a compatibility concept: some crappy old code used a signed
+ * 16bit type for PIDs, hence the kernel provides a way to ensure the PIDs never go beyond
+ * INT16_MAX by default.
+ *
+ * Also note the weird definition: PIDs assigned will be kept below this value, which means
+ * the number of tasks that can be created is one lower, as PID 0 is not a valid process ID.
+ *
+ * c) pids.max on the root cgroup: the kernel's configured maximum number of tasks.
+ *
+ * and then pick the smallest of the three.
+ *
+ * By default pid_max is set to much lower values than threads-max, hence the limit people come into
+ * contact with first, as it's the lowest boundary they need to bump when they want higher number of
+ * processes.
+ */
+
+ r = procfs_get_threads_max(&a);
+ if (r < 0)
+ log_debug_errno(r, "Failed to read kernel.threads-max, ignoring: %m");
+
+ r = procfs_get_pid_max(&b);
+ if (r < 0)
+ log_debug_errno(r, "Failed to read kernel.pid_max, ignoring: %m");
+ else if (b > 0)
+ /* Subtract one from pid_max, since PID 0 is not a valid PID */
+ b--;
+
+ r = cg_get_root_path(&root);
+ if (r < 0)
+ log_debug_errno(r, "Failed to determine cgroup root path, ignoring: %m");
+ else {
+ /* We'll have the "pids.max" attribute on the our root cgroup only if we are in a
+ * CLONE_NEWCGROUP namespace. On the top-level namespace this attribute is missing, hence
+ * suppress any message about that */
+ r = cg_get_attribute_as_uint64("pids", root, "pids.max", &c);
+ if (r < 0 && r != -ENODATA)
+ log_debug_errno(r, "Failed to read pids.max attribute of root cgroup, ignoring: %m");
+ }
+
+ return MIN3(a, b, c);
+}
+
+uint64_t system_tasks_max_scale(uint64_t v, uint64_t max) {
+ uint64_t t, m;
+
+ /* Shortcut two special cases */
+ if (v == 0)
+ return 0;
+ if (v == max)
+ return system_tasks_max();
+
+ assert(max > 0);
+
+ /* Multiply the system's task value by the fraction v/max. Hence, if max==100 this calculates percentages
+ * relative to the system's maximum number of tasks. Returns UINT64_MAX on overflow. */
+
+ t = system_tasks_max();
+ assert(t > 0);
+
+ if (v > UINT64_MAX / t) /* overflow? */
+ return UINT64_MAX;
+
+ m = t * v;
+ return m / max;
+}