summaryrefslogtreecommitdiffstats
path: root/src/core/cgroup.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/cgroup.h')
-rw-r--r--src/core/cgroup.h132
1 files changed, 128 insertions, 4 deletions
diff --git a/src/core/cgroup.h b/src/core/cgroup.h
index f1b674b..72fe275 100644
--- a/src/core/cgroup.h
+++ b/src/core/cgroup.h
@@ -3,7 +3,10 @@
#include <stdbool.h>
-#include "bpf-lsm.h"
+#include "sd-event.h"
+
+#include "bpf-program.h"
+#include "bpf-restrict-fs.h"
#include "cgroup-util.h"
#include "cpu-set-util.h"
#include "firewall-util.h"
@@ -35,6 +38,7 @@ typedef struct CGroupBlockIODeviceWeight CGroupBlockIODeviceWeight;
typedef struct CGroupBlockIODeviceBandwidth CGroupBlockIODeviceBandwidth;
typedef struct CGroupBPFForeignProgram CGroupBPFForeignProgram;
typedef struct CGroupSocketBindItem CGroupSocketBindItem;
+typedef struct CGroupRuntime CGroupRuntime;
typedef enum CGroupDevicePolicy {
/* When devices listed, will allow those, plus built-in ones, if none are listed will allow
@@ -53,7 +57,9 @@ typedef enum CGroupDevicePolicy {
typedef enum FreezerAction {
FREEZER_FREEZE,
+ FREEZER_PARENT_FREEZE,
FREEZER_THAW,
+ FREEZER_PARENT_THAW,
_FREEZER_ACTION_MAX,
_FREEZER_ACTION_INVALID = -EINVAL,
@@ -129,6 +135,9 @@ typedef enum CGroupPressureWatch {
_CGROUP_PRESSURE_WATCH_INVALID = -EINVAL,
} CGroupPressureWatch;
+/* The user-supplied cgroup-related configuration options. This remains mostly immutable while the service
+ * manager is running (except for an occasional SetProperty() configuration change), outside of reload
+ * cycles. When adding members make sure to update cgroup_context_copy() accordingly. */
struct CGroupContext {
bool cpu_accounting;
bool io_accounting;
@@ -188,6 +197,8 @@ struct CGroupContext {
bool startup_memory_swap_max_set:1;
bool startup_memory_zswap_max_set:1;
+ bool memory_zswap_writeback;
+
Set *ip_address_allow;
Set *ip_address_deny;
/* These two flags indicate that redundant entries have been removed from
@@ -276,6 +287,95 @@ typedef enum CGroupMemoryAccountingMetric {
_CGROUP_MEMORY_ACCOUNTING_METRIC_INVALID = -EINVAL,
} CGroupMemoryAccountingMetric;
+/* Used for limits whose value sets have infimum */
+typedef enum CGroupLimitType {
+ CGROUP_LIMIT_MEMORY_MAX,
+ CGROUP_LIMIT_MEMORY_HIGH,
+ CGROUP_LIMIT_TASKS_MAX,
+ _CGROUP_LIMIT_TYPE_MAX,
+ _CGROUP_LIMIT_INVALID = -EINVAL,
+} CGroupLimitType;
+
+/* The dynamic, regular updated information about a unit that as a realized cgroup. This is only allocated when a unit is first realized */
+typedef struct CGroupRuntime {
+ /* Where the cpu.stat or cpuacct.usage was at the time the unit was started */
+ nsec_t cpu_usage_base;
+ nsec_t cpu_usage_last; /* the most recently read value */
+
+ /* Most recently read value of memory accounting metrics */
+ uint64_t memory_accounting_last[_CGROUP_MEMORY_ACCOUNTING_METRIC_CACHED_LAST + 1];
+
+ /* The current counter of OOM kills initiated by systemd-oomd */
+ uint64_t managed_oom_kill_last;
+
+ /* The current counter of the oom_kill field in the memory.events cgroup attribute */
+ uint64_t oom_kill_last;
+
+ /* Where the io.stat data was at the time the unit was started */
+ uint64_t io_accounting_base[_CGROUP_IO_ACCOUNTING_METRIC_MAX];
+ uint64_t io_accounting_last[_CGROUP_IO_ACCOUNTING_METRIC_MAX]; /* the most recently read value */
+
+ /* Counterparts in the cgroup filesystem */
+ char *cgroup_path;
+ uint64_t cgroup_id;
+ CGroupMask cgroup_realized_mask; /* In which hierarchies does this unit's cgroup exist? (only relevant on cgroup v1) */
+ CGroupMask cgroup_enabled_mask; /* Which controllers are enabled (or more correctly: enabled for the children) for this unit's cgroup? (only relevant on cgroup v2) */
+ CGroupMask cgroup_invalidated_mask; /* A mask specifying controllers which shall be considered invalidated, and require re-realization */
+ CGroupMask cgroup_members_mask; /* A cache for the controllers required by all children of this cgroup (only relevant for slice units) */
+
+ /* Inotify watch descriptors for watching cgroup.events and memory.events on cgroupv2 */
+ int cgroup_control_inotify_wd;
+ int cgroup_memory_inotify_wd;
+
+ /* Device Controller BPF program */
+ BPFProgram *bpf_device_control_installed;
+
+ /* IP BPF Firewalling/accounting */
+ int ip_accounting_ingress_map_fd;
+ int ip_accounting_egress_map_fd;
+ uint64_t ip_accounting_extra[_CGROUP_IP_ACCOUNTING_METRIC_MAX];
+
+ int ipv4_allow_map_fd;
+ int ipv6_allow_map_fd;
+ int ipv4_deny_map_fd;
+ int ipv6_deny_map_fd;
+ BPFProgram *ip_bpf_ingress, *ip_bpf_ingress_installed;
+ BPFProgram *ip_bpf_egress, *ip_bpf_egress_installed;
+
+ Set *ip_bpf_custom_ingress;
+ Set *ip_bpf_custom_ingress_installed;
+ Set *ip_bpf_custom_egress;
+ Set *ip_bpf_custom_egress_installed;
+
+ /* BPF programs managed (e.g. loaded to kernel) by an entity external to systemd,
+ * attached to unit cgroup by provided program fd and attach type. */
+ Hashmap *bpf_foreign_by_key;
+
+ FDSet *initial_socket_bind_link_fds;
+#if BPF_FRAMEWORK
+ /* BPF links to BPF programs attached to cgroup/bind{4|6} hooks and
+ * responsible for allowing or denying a unit to bind(2) to a socket
+ * address. */
+ struct bpf_link *ipv4_socket_bind_link;
+ struct bpf_link *ipv6_socket_bind_link;
+#endif
+
+ FDSet *initial_restrict_ifaces_link_fds;
+#if BPF_FRAMEWORK
+ struct bpf_link *restrict_ifaces_ingress_bpf_link;
+ struct bpf_link *restrict_ifaces_egress_bpf_link;
+#endif
+
+ bool cgroup_realized:1;
+ bool cgroup_members_mask_valid:1;
+
+ /* Reset cgroup accounting next time we fork something off */
+ bool reset_accounting:1;
+
+ /* Whether we warned about clamping the CPU quota period */
+ bool warned_clamping_cpu_quota_period:1;
+} CGroupRuntime;
+
typedef struct Unit Unit;
typedef struct Manager Manager;
typedef enum ManagerState ManagerState;
@@ -285,6 +385,7 @@ uint64_t cgroup_context_cpu_weight(CGroupContext *c, ManagerState state);
usec_t cgroup_cpu_adjust_period(usec_t period, usec_t quota, usec_t resolution, usec_t max_period);
void cgroup_context_init(CGroupContext *c);
+int cgroup_context_copy(CGroupContext *dst, const CGroupContext *src);
void cgroup_context_done(CGroupContext *c);
void cgroup_context_dump(Unit *u, FILE* f, const char *prefix);
void cgroup_context_dump_socket_bind_item(const CGroupSocketBindItem *item, FILE *f);
@@ -309,6 +410,17 @@ static inline bool cgroup_context_want_memory_pressure(const CGroupContext *c) {
int cgroup_context_add_device_allow(CGroupContext *c, const char *dev, CGroupDevicePermissions p);
int cgroup_context_add_or_update_device_allow(CGroupContext *c, const char *dev, CGroupDevicePermissions p);
int cgroup_context_add_bpf_foreign_program(CGroupContext *c, uint32_t attach_type, const char *path);
+static inline int cgroup_context_add_bpf_foreign_program_dup(CGroupContext *c, const CGroupBPFForeignProgram *p) {
+ return cgroup_context_add_bpf_foreign_program(c, p->attach_type, p->bpffs_path);
+}
+int cgroup_context_add_io_device_limit_dup(CGroupContext *c, const CGroupIODeviceLimit *l);
+int cgroup_context_add_io_device_weight_dup(CGroupContext *c, const CGroupIODeviceWeight *w);
+int cgroup_context_add_io_device_latency_dup(CGroupContext *c, const CGroupIODeviceLatency *l);
+int cgroup_context_add_block_io_device_weight_dup(CGroupContext *c, const CGroupBlockIODeviceWeight *w);
+int cgroup_context_add_block_io_device_bandwidth_dup(CGroupContext *c, const CGroupBlockIODeviceBandwidth *b);
+int cgroup_context_add_device_allow_dup(CGroupContext *c, const CGroupDeviceAllow *a);
+int cgroup_context_add_socket_bind_item_allow_dup(CGroupContext *c, const CGroupSocketBindItem *i);
+int cgroup_context_add_socket_bind_item_deny_dup(CGroupContext *c, const CGroupSocketBindItem *i);
void unit_modify_nft_set(Unit *u, bool add);
@@ -336,6 +448,7 @@ int unit_watch_cgroup(Unit *u);
int unit_watch_cgroup_memory(Unit *u);
void unit_add_to_cgroup_realize_queue(Unit *u);
+int unit_cgroup_is_empty(Unit *u);
void unit_release_cgroup(Unit *u);
/* Releases the cgroup only if it is recursively empty.
* Returns true if the cgroup was released, false otherwise. */
@@ -353,9 +466,9 @@ void manager_shutdown_cgroup(Manager *m, bool delete);
unsigned manager_dispatch_cgroup_realize_queue(Manager *m);
Unit *manager_get_unit_by_cgroup(Manager *m, const char *cgroup);
-Unit *manager_get_unit_by_pidref_cgroup(Manager *m, PidRef *pid);
-Unit *manager_get_unit_by_pidref_watching(Manager *m, PidRef *pid);
-Unit* manager_get_unit_by_pidref(Manager *m, PidRef *pid);
+Unit *manager_get_unit_by_pidref_cgroup(Manager *m, const PidRef *pid);
+Unit *manager_get_unit_by_pidref_watching(Manager *m, const PidRef *pid);
+Unit* manager_get_unit_by_pidref(Manager *m, const PidRef *pid);
Unit* manager_get_unit_by_pid(Manager *m, pid_t pid);
uint64_t unit_get_ancestor_memory_min(Unit *u);
@@ -374,6 +487,7 @@ int unit_get_tasks_current(Unit *u, uint64_t *ret);
int unit_get_cpu_usage(Unit *u, nsec_t *ret);
int unit_get_io_accounting(Unit *u, CGroupIOAccountingMetric metric, bool allow_cache, uint64_t *ret);
int unit_get_ip_accounting(Unit *u, CGroupIPAccountingMetric metric, uint64_t *ret);
+int unit_get_effective_limit(Unit *u, CGroupLimitType type, uint64_t *ret);
int unit_reset_cpu_accounting(Unit *u);
void unit_reset_memory_accounting_last(Unit *u);
@@ -413,6 +527,13 @@ int unit_cgroup_freezer_action(Unit *u, FreezerAction action);
const char* freezer_action_to_string(FreezerAction a) _const_;
FreezerAction freezer_action_from_string(const char *s) _pure_;
+CGroupRuntime *cgroup_runtime_new(void);
+CGroupRuntime *cgroup_runtime_free(CGroupRuntime *crt);
+DEFINE_TRIVIAL_CLEANUP_FUNC(CGroupRuntime*, cgroup_runtime_free);
+
+int cgroup_runtime_serialize(Unit *u, FILE *f, FDSet *fds);
+int cgroup_runtime_deserialize_one(Unit *u, const char *key, const char *value, FDSet *fds);
+
const char* cgroup_pressure_watch_to_string(CGroupPressureWatch a) _const_;
CGroupPressureWatch cgroup_pressure_watch_from_string(const char *s) _pure_;
@@ -425,5 +546,8 @@ CGroupIPAccountingMetric cgroup_ip_accounting_metric_from_string(const char *s)
const char* cgroup_io_accounting_metric_to_string(CGroupIOAccountingMetric m) _const_;
CGroupIOAccountingMetric cgroup_io_accounting_metric_from_string(const char *s) _pure_;
+const char* cgroup_effective_limit_type_to_string(CGroupLimitType m) _const_;
+CGroupLimitType cgroup_effective_limit_type_from_string(const char *s) _pure_;
+
const char* cgroup_memory_accounting_metric_to_string(CGroupMemoryAccountingMetric m) _const_;
CGroupMemoryAccountingMetric cgroup_memory_accounting_metric_from_string(const char *s) _pure_;