summaryrefslogtreecommitdiffstats
path: root/collectors/cgroups.plugin/sys_fs_cgroup.c
diff options
context:
space:
mode:
Diffstat (limited to 'collectors/cgroups.plugin/sys_fs_cgroup.c')
-rw-r--r--collectors/cgroups.plugin/sys_fs_cgroup.c874
1 files changed, 731 insertions, 143 deletions
diff --git a/collectors/cgroups.plugin/sys_fs_cgroup.c b/collectors/cgroups.plugin/sys_fs_cgroup.c
index d9d130f7e..705c51735 100644
--- a/collectors/cgroups.plugin/sys_fs_cgroup.c
+++ b/collectors/cgroups.plugin/sys_fs_cgroup.c
@@ -23,6 +23,11 @@ static int cgroup_enable_blkio_throttle_io = CONFIG_BOOLEAN_AUTO;
static int cgroup_enable_blkio_throttle_ops = CONFIG_BOOLEAN_AUTO;
static int cgroup_enable_blkio_merged_ops = CONFIG_BOOLEAN_AUTO;
static int cgroup_enable_blkio_queued_ops = CONFIG_BOOLEAN_AUTO;
+static int cgroup_enable_pressure_cpu = CONFIG_BOOLEAN_AUTO;
+static int cgroup_enable_pressure_io_some = CONFIG_BOOLEAN_AUTO;
+static int cgroup_enable_pressure_io_full = CONFIG_BOOLEAN_AUTO;
+static int cgroup_enable_pressure_memory_some = CONFIG_BOOLEAN_AUTO;
+static int cgroup_enable_pressure_memory_full = CONFIG_BOOLEAN_AUTO;
static int cgroup_enable_systemd_services = CONFIG_BOOLEAN_YES;
static int cgroup_enable_systemd_services_detailed_memory = CONFIG_BOOLEAN_NO;
@@ -68,6 +73,130 @@ static uint32_t Write_hash = 0;
static uint32_t user_hash = 0;
static uint32_t system_hash = 0;
+enum cgroups_type { CGROUPS_AUTODETECT_FAIL, CGROUPS_V1, CGROUPS_V2 };
+
+enum cgroups_systemd_setting {
+ SYSTEMD_CGROUP_ERR,
+ SYSTEMD_CGROUP_LEGACY,
+ SYSTEMD_CGROUP_HYBRID,
+ SYSTEMD_CGROUP_UNIFIED
+};
+
+struct cgroups_systemd_config_setting {
+ char *name;
+ enum cgroups_systemd_setting setting;
+};
+
+static struct cgroups_systemd_config_setting cgroups_systemd_options[] = {
+ { .name = "legacy", .setting = SYSTEMD_CGROUP_LEGACY },
+ { .name = "hybrid", .setting = SYSTEMD_CGROUP_HYBRID },
+ { .name = "unified", .setting = SYSTEMD_CGROUP_UNIFIED },
+ { .name = NULL, .setting = SYSTEMD_CGROUP_ERR },
+};
+
+/* on Fed systemd is not in PATH for some reason */
+#define SYSTEMD_CMD_RHEL "/usr/lib/systemd/systemd --version"
+#define SYSTEMD_HIERARCHY_STRING "default-hierarchy="
+
+#define MAXSIZE_PROC_CMDLINE 4096
+static enum cgroups_systemd_setting cgroups_detect_systemd(const char *exec)
+{
+ pid_t command_pid;
+ enum cgroups_systemd_setting retval = SYSTEMD_CGROUP_ERR;
+ char buf[MAXSIZE_PROC_CMDLINE];
+ char *begin, *end;
+
+ FILE *f = mypopen(exec, &command_pid);
+
+ if (!f)
+ return retval;
+
+ while (fgets(buf, MAXSIZE_PROC_CMDLINE, f) != NULL) {
+ if ((begin = strstr(buf, SYSTEMD_HIERARCHY_STRING))) {
+ end = begin = begin + strlen(SYSTEMD_HIERARCHY_STRING);
+ if (!*begin)
+ break;
+ while (isalpha(*end))
+ end++;
+ *end = 0;
+ for (int i = 0; cgroups_systemd_options[i].name; i++) {
+ if (!strcmp(begin, cgroups_systemd_options[i].name)) {
+ retval = cgroups_systemd_options[i].setting;
+ break;
+ }
+ }
+ break;
+ }
+ }
+
+ if (mypclose(f, command_pid))
+ return SYSTEMD_CGROUP_ERR;
+
+ return retval;
+}
+
+static enum cgroups_type cgroups_try_detect_version()
+{
+ pid_t command_pid;
+ char buf[MAXSIZE_PROC_CMDLINE];
+ enum cgroups_systemd_setting systemd_setting;
+ int cgroups2_available = 0;
+
+ // 1. check if cgroups2 availible on system at all
+ FILE *f = mypopen("grep cgroup /proc/filesystems", &command_pid);
+ if (!f) {
+ error("popen failed");
+ return CGROUPS_AUTODETECT_FAIL;
+ }
+ while (fgets(buf, MAXSIZE_PROC_CMDLINE, f) != NULL) {
+ if (strstr(buf, "cgroup2")) {
+ cgroups2_available = 1;
+ break;
+ }
+ }
+ if(mypclose(f, command_pid))
+ return CGROUPS_AUTODETECT_FAIL;
+
+ if(!cgroups2_available)
+ return CGROUPS_V1;
+
+ // 2. check systemd compiletime setting
+ if ((systemd_setting = cgroups_detect_systemd("systemd --version")) == SYSTEMD_CGROUP_ERR)
+ systemd_setting = cgroups_detect_systemd(SYSTEMD_CMD_RHEL);
+
+ if(systemd_setting == SYSTEMD_CGROUP_ERR)
+ return CGROUPS_AUTODETECT_FAIL;
+
+ if(systemd_setting == SYSTEMD_CGROUP_LEGACY || systemd_setting == SYSTEMD_CGROUP_HYBRID) {
+ // curently we prefer V1 if HYBRID is set as it seems to be more feature complete
+ // in the future we might want to continue here if SYSTEMD_CGROUP_HYBRID
+ // and go ahead with V2
+ return CGROUPS_V1;
+ }
+
+ // 3. if we are unified as on Fedora (default cgroups2 only mode)
+ // check kernel command line flag that can override that setting
+ f = fopen("/proc/cmdline", "r");
+ if (!f) {
+ error("Error reading kernel boot commandline parameters");
+ return CGROUPS_AUTODETECT_FAIL;
+ }
+
+ if (!fgets(buf, MAXSIZE_PROC_CMDLINE, f)) {
+ error("couldn't read all cmdline params into buffer");
+ fclose(f);
+ return CGROUPS_AUTODETECT_FAIL;
+ }
+
+ fclose(f);
+
+ if (strstr(buf, "systemd.unified_cgroup_hierarchy=0")) {
+ info("cgroups v2 (unified cgroups) is available but are disabled on this system.");
+ return CGROUPS_V1;
+ }
+ return CGROUPS_V2;
+}
+
void read_cgroup_plugin_configuration() {
system_page_size = sysconf(_SC_PAGESIZE);
@@ -84,7 +213,11 @@ void read_cgroup_plugin_configuration() {
if(cgroup_check_for_new_every < cgroup_update_every)
cgroup_check_for_new_every = cgroup_update_every;
- cgroup_use_unified_cgroups = config_get_boolean_ondemand("plugin:cgroups", "use unified cgroups", cgroup_use_unified_cgroups);
+ cgroup_use_unified_cgroups = config_get_boolean_ondemand("plugin:cgroups", "use unified cgroups", CONFIG_BOOLEAN_AUTO);
+ if(cgroup_use_unified_cgroups == CONFIG_BOOLEAN_AUTO)
+ cgroup_use_unified_cgroups = (cgroups_try_detect_version() == CGROUPS_V2);
+
+ info("use unified cgroups %s", cgroup_use_unified_cgroups ? "true" : "false");
cgroup_containers_chart_priority = (int)config_get_number("plugin:cgroups", "containers priority", cgroup_containers_chart_priority);
if(cgroup_containers_chart_priority < 1)
@@ -105,6 +238,12 @@ void read_cgroup_plugin_configuration() {
cgroup_enable_blkio_queued_ops = config_get_boolean_ondemand("plugin:cgroups", "enable blkio queued operations", cgroup_enable_blkio_queued_ops);
cgroup_enable_blkio_merged_ops = config_get_boolean_ondemand("plugin:cgroups", "enable blkio merged operations", cgroup_enable_blkio_merged_ops);
+ cgroup_enable_pressure_cpu = config_get_boolean_ondemand("plugin:cgroups", "enable cpu pressure", cgroup_enable_pressure_cpu);
+ cgroup_enable_pressure_io_some = config_get_boolean_ondemand("plugin:cgroups", "enable io some pressure", cgroup_enable_pressure_io_some);
+ cgroup_enable_pressure_io_full = config_get_boolean_ondemand("plugin:cgroups", "enable io full pressure", cgroup_enable_pressure_io_full);
+ cgroup_enable_pressure_memory_some = config_get_boolean_ondemand("plugin:cgroups", "enable memory some pressure", cgroup_enable_pressure_memory_some);
+ cgroup_enable_pressure_memory_full = config_get_boolean_ondemand("plugin:cgroups", "enable memory full pressure", cgroup_enable_pressure_memory_full);
+
cgroup_recheck_zero_blkio_every_iterations = (int)config_get_number("plugin:cgroups", "recheck zero blkio every iterations", cgroup_recheck_zero_blkio_every_iterations);
cgroup_recheck_zero_mem_failcnt_every_iterations = (int)config_get_number("plugin:cgroups", "recheck zero memory failcnt every iterations", cgroup_recheck_zero_mem_failcnt_every_iterations);
cgroup_recheck_zero_mem_detailed_every_iterations = (int)config_get_number("plugin:cgroups", "recheck zero detailed memory every iterations", cgroup_recheck_zero_mem_detailed_every_iterations);
@@ -116,6 +255,13 @@ void read_cgroup_plugin_configuration() {
char filename[FILENAME_MAX + 1], *s;
struct mountinfo *mi, *root = mountinfo_read(0);
if(!cgroup_use_unified_cgroups) {
+ // cgroup v1 does not have pressure metrics
+ cgroup_enable_pressure_cpu =
+ cgroup_enable_pressure_io_some =
+ cgroup_enable_pressure_io_full =
+ cgroup_enable_pressure_memory_some =
+ cgroup_enable_pressure_memory_full = CONFIG_BOOLEAN_NO;
+
mi = mountinfo_find_by_filesystem_super_option(root, "cgroup", "cpuacct");
if(!mi) mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "cpuacct");
if(!mi) {
@@ -216,9 +362,12 @@ void read_cgroup_plugin_configuration() {
// ----------------------------------------------------------------
" /machine.slice/*.service " // #3367 systemd-nspawn
+ " /kubepods/pod*/* " // k8s containers
+ " /kubepods/*/pod*/* " // k8s containers
// ----------------------------------------------------------------
+ " !/kubepods* " // all other k8s cgroups
" !*/vcpu* " // libvirtd adds these sub-cgroups
" !*/emulator " // libvirtd adds these sub-cgroups
" !*.mount "
@@ -233,6 +382,9 @@ void read_cgroup_plugin_configuration() {
" !/libvirt "
" !/lxc "
" !/lxc/*/* " // #1397 #2649
+ " !/lxc.monitor* "
+ " !/lxc.pivot "
+ " !/lxc.payload "
" !/machine "
" !/qemu "
" !/system "
@@ -252,6 +404,9 @@ void read_cgroup_plugin_configuration() {
" !/user "
" !/user.slice "
" !/lxc/*/* " // #2161 #2649
+ " !/lxc.monitor "
+ " !/lxc.payload/*/* "
+ " !/lxc.payload.* "
" * "
), NULL, SIMPLE_PATTERN_EXACT);
@@ -429,6 +584,7 @@ struct cgroup_network_interface {
#define CGROUP_OPTIONS_SYSTEM_SLICE_SERVICE 0x00000002
#define CGROUP_OPTIONS_IS_UNIFIED 0x00000004
+// *** WARNING *** The fields are not thread safe. Take care of safe usage.
struct cgroup {
uint32_t options;
@@ -445,6 +601,8 @@ struct cgroup {
char *chart_title;
+ struct label *chart_labels;
+
struct cpuacct_stat cpuacct_stat;
struct cpuacct_usage cpuacct_usage;
@@ -461,6 +619,10 @@ struct cgroup {
struct cgroup_network_interface *interfaces;
+ struct pressure cpu_pressure;
+ struct pressure io_pressure;
+ struct pressure memory_pressure;
+
// per cgroup charts
RRDSET *st_cpu;
RRDSET *st_cpu_limit;
@@ -530,9 +692,22 @@ struct cgroup {
RRDDIM *rd_io_merged_write;
struct cgroup *next;
+ struct cgroup *discovered_next;
} *cgroup_root = NULL;
+uv_mutex_t cgroup_root_mutex;
+
+struct cgroup *discovered_cgroup_root = NULL;
+
+struct discovery_thread {
+ uv_thread_t thread;
+ uv_mutex_t mutex;
+ uv_cond_t cond_var;
+ int start_discovery;
+ int exited;
+} discovery_thread;
+
// ----------------------------------------------------------------------------
// read values from /sys
@@ -798,6 +973,54 @@ static inline void cgroup2_read_blkio(struct blkio *io, unsigned int word_offset
}
}
+static inline void cgroup2_read_pressure(struct pressure *res) {
+ static procfile *ff = NULL;
+
+ if (likely(res->filename)) {
+ ff = procfile_reopen(ff, res->filename, " =", PROCFILE_FLAG_DEFAULT);
+ if (unlikely(!ff)) {
+ res->updated = 0;
+ cgroups_check = 1;
+ return;
+ }
+
+ ff = procfile_readall(ff);
+ if (unlikely(!ff)) {
+ res->updated = 0;
+ cgroups_check = 1;
+ return;
+ }
+
+ size_t lines = procfile_lines(ff);
+ if (lines < 1) {
+ error("CGROUP: file '%s' should have 1+ lines.", res->filename);
+ res->updated = 0;
+ return;
+ }
+
+ res->some.value10 = strtod(procfile_lineword(ff, 0, 2), NULL);
+ res->some.value60 = strtod(procfile_lineword(ff, 0, 4), NULL);
+ res->some.value300 = strtod(procfile_lineword(ff, 0, 6), NULL);
+
+ if (lines > 2) {
+ res->full.value10 = strtod(procfile_lineword(ff, 1, 2), NULL);
+ res->full.value60 = strtod(procfile_lineword(ff, 1, 4), NULL);
+ res->full.value300 = strtod(procfile_lineword(ff, 1, 6), NULL);
+ }
+
+ res->updated = 1;
+
+ if (unlikely(res->some.enabled == CONFIG_BOOLEAN_AUTO)) {
+ res->some.enabled = CONFIG_BOOLEAN_YES;
+ if (lines > 2) {
+ res->full.enabled = CONFIG_BOOLEAN_YES;
+ } else {
+ res->full.enabled = CONFIG_BOOLEAN_NO;
+ }
+ }
+ }
+}
+
static inline void cgroup_read_memory(struct memory *mem, char parent_cg_is_unified) {
static procfile *ff = NULL;
@@ -946,6 +1169,9 @@ static inline void cgroup_read(struct cgroup *cg) {
cgroup2_read_blkio(&cg->io_service_bytes, 0);
cgroup2_read_blkio(&cg->io_serviced, 4);
cgroup2_read_cpuacct_stat(&cg->cpuacct_stat);
+ cgroup2_read_pressure(&cg->cpu_pressure);
+ cgroup2_read_pressure(&cg->io_pressure);
+ cgroup2_read_pressure(&cg->memory_pressure);
cgroup_read_memory(&cg->memory, 1);
}
}
@@ -956,7 +1182,7 @@ static inline void read_all_cgroups(struct cgroup *root) {
struct cgroup *cg;
for(cg = root; cg ; cg = cg->next)
- if(cg->enabled && cg->available && !cg->pending_renames)
+ if(cg->enabled && !cg->pending_renames)
cgroup_read(cg);
}
@@ -1016,7 +1242,7 @@ static inline void read_cgroup_network_interfaces(struct cgroup *cg) {
info("CGROUP: cgroup '%s' has network interface '%s' as '%s'", cg->id, i->host_device, i->container_device);
// register a device rename to proc_net_dev.c
- netdev_rename_device_add(i->host_device, i->container_device, cg->chart_id);
+ netdev_rename_device_add(i->host_device, i->container_device, cg->chart_id, cg->chart_labels);
}
}
@@ -1065,6 +1291,35 @@ static inline char *cgroup_chart_id_strdupz(const char *s) {
return r;
}
+char *parse_k8s_data(struct label **labels, char *data)
+{
+ char *name = mystrsep(&data, " ");
+
+ if (!data) {
+ return name;
+ }
+
+ while (data) {
+ char *key = mystrsep(&data, "=");
+
+ char *value;
+ if (data && *data == ',') {
+ value = "";
+ *data++ = '\0';
+ } else {
+ value = mystrsep(&data, ",");
+ }
+ value = strip_double_quotes(value, 1);
+
+ if (!key || *key == '\0' || !value || *value == '\0')
+ continue;
+
+ *labels = add_label_to_list(*labels, key, value, LABEL_SOURCE_KUBERNETES);
+ }
+
+ return name;
+}
+
static inline void cgroup_get_chart_name(struct cgroup *cg) {
debug(D_CGROUP, "looking for the name of cgroup '%s' with chart id '%s' and title '%s'", cg->id, cg->chart_id, cg->chart_title);
@@ -1095,12 +1350,19 @@ static inline void cgroup_get_chart_name(struct cgroup *cg) {
cg->enabled = 0;
}
- if(likely(cg->pending_renames < 2)) {
+ if (likely(cg->pending_renames < 2)) {
+ char *name = s;
+
+ if (!strncmp(s, "k8s_", 4)) {
+ free_label_list(cg->chart_labels);
+ name = parse_k8s_data(&cg->chart_labels, s);
+ }
+
freez(cg->chart_title);
- cg->chart_title = cgroup_title_strdupz(s);
+ cg->chart_title = cgroup_title_strdupz(name);
freez(cg->chart_id);
- cg->chart_id = cgroup_chart_id_strdupz(s);
+ cg->chart_id = cgroup_chart_id_strdupz(name);
cg->hash_chart = simple_hash(cg->chart_id);
}
}
@@ -1132,13 +1394,13 @@ static inline struct cgroup *cgroup_add(const char *id) {
if(cgroup_use_unified_cgroups) cg->options |= CGROUP_OPTIONS_IS_UNIFIED;
- if(!cgroup_root)
- cgroup_root = cg;
+ if(!discovered_cgroup_root)
+ discovered_cgroup_root = cg;
else {
// append it
struct cgroup *e;
- for(e = cgroup_root; e->next ;e = e->next) ;
- e->next = cg;
+ for(e = discovered_cgroup_root; e->discovered_next ;e = e->discovered_next) ;
+ e->discovered_next = cg;
}
cgroup_root_count++;
@@ -1204,24 +1466,23 @@ static inline struct cgroup *cgroup_add(const char *id) {
// detect duplicate cgroups
if(cg->enabled) {
struct cgroup *t;
- for (t = cgroup_root; t; t = t->next) {
+ for (t = discovered_cgroup_root; t; t = t->discovered_next) {
if (t != cg && t->enabled && t->hash_chart == cg->hash_chart && !strcmp(t->chart_id, cg->chart_id)) {
- if (!strncmp(t->chart_id, "/system.slice/", 14) && !strncmp(cg->chart_id, "/init.scope/system.slice/", 25)) {
- error("CGROUP: chart id '%s' already exists with id '%s' and is enabled. Swapping them by enabling cgroup with id '%s' and disabling cgroup with id '%s'.",
- cg->chart_id, t->id, cg->id, t->id);
- debug(D_CGROUP, "Control group with chart id '%s' already exists with id '%s' and is enabled. Swapping them by enabling cgroup with id '%s' and disabling cgroup with id '%s'.",
- cg->chart_id, t->id, cg->id, t->id);
- t->enabled = 0;
- t->options |= CGROUP_OPTIONS_DISABLED_DUPLICATE;
- }
- else {
- error("CGROUP: chart id '%s' already exists with id '%s' and is enabled and available. Disabling cgroup with id '%s'.",
- cg->chart_id, t->id, cg->id);
- debug(D_CGROUP, "Control group with chart id '%s' already exists with id '%s' and is enabled and available. Disabling cgroup with id '%s'.",
- cg->chart_id, t->id, cg->id);
- cg->enabled = 0;
- cg->options |= CGROUP_OPTIONS_DISABLED_DUPLICATE;
- }
+ // TODO: use it after refactoring if system.slice might be scanned before init.scope/system.slice
+ //
+ // if (!strncmp(t->id, "/system.slice/", 14) && !strncmp(cg->id, "/init.scope/system.slice/", 25)) {
+ // error("CGROUP: chart id '%s' already exists with id '%s' and is enabled. Swapping them by enabling cgroup with id '%s' and disabling cgroup with id '%s'.",
+ // cg->chart_id, t->id, cg->id, t->id);
+ // t->enabled = 0;
+ // t->options |= CGROUP_OPTIONS_DISABLED_DUPLICATE;
+ // }
+ // else {}
+ //
+ // https://github.com/netdata/netdata/issues/797#issuecomment-241248884
+ error("CGROUP: chart id '%s' already exists with id '%s' and is enabled and available. Disabling cgroup with id '%s'.",
+ cg->chart_id, t->id, cg->id);
+ cg->enabled = 0;
+ cg->options |= CGROUP_OPTIONS_DISABLED_DUPLICATE;
break;
}
@@ -1236,6 +1497,12 @@ static inline struct cgroup *cgroup_add(const char *id) {
return cg;
}
+static inline void free_pressure(struct pressure *res) {
+ if (res->some.st) rrdset_is_obsolete(res->some.st);
+ if (res->full.st) rrdset_is_obsolete(res->full.st);
+ freez(res->filename);
+}
+
static inline void cgroup_free(struct cgroup *cg) {
debug(D_CGROUP, "Removing cgroup '%s' with chart id '%s' (was %s and %s)", cg->id, cg->chart_id, (cg->enabled)?"enabled":"disabled", (cg->available)?"available":"not available");
@@ -1284,10 +1551,16 @@ static inline void cgroup_free(struct cgroup *cg) {
freez(cg->io_merged.filename);
freez(cg->io_queued.filename);
+ free_pressure(&cg->cpu_pressure);
+ free_pressure(&cg->io_pressure);
+ free_pressure(&cg->memory_pressure);
+
freez(cg->id);
freez(cg->chart_id);
freez(cg->chart_title);
+ free_label_list(cg->chart_labels);
+
freez(cg);
cgroup_root_count--;
@@ -1300,7 +1573,7 @@ static inline struct cgroup *cgroup_find(const char *id) {
uint32_t hash = simple_hash(id);
struct cgroup *cg;
- for(cg = cgroup_root; cg ; cg = cg->next) {
+ for(cg = discovered_cgroup_root; cg ; cg = cg->discovered_next) {
if(hash == cg->hash && strcmp(id, cg->id) == 0)
break;
}
@@ -1426,103 +1699,16 @@ static inline void mark_all_cgroups_as_not_available() {
struct cgroup *cg;
// mark all as not available
- for(cg = cgroup_root; cg ; cg = cg->next) {
+ for(cg = discovered_cgroup_root; cg ; cg = cg->discovered_next) {
cg->available = 0;
}
}
-static inline void cleanup_all_cgroups() {
- struct cgroup *cg = cgroup_root, *last = NULL;
-
- for(; cg ;) {
- if(!cg->available) {
- // enable the first duplicate cgroup
- {
- struct cgroup *t;
- for(t = cgroup_root; t ; t = t->next) {
- if(t != cg && t->available && !t->enabled && t->options & CGROUP_OPTIONS_DISABLED_DUPLICATE && t->hash_chart == cg->hash_chart && !strcmp(t->chart_id, cg->chart_id)) {
- debug(D_CGROUP, "Enabling duplicate of cgroup '%s' with id '%s', because the original with id '%s' stopped.", t->chart_id, t->id, cg->id);
- t->enabled = 1;
- t->options &= ~CGROUP_OPTIONS_DISABLED_DUPLICATE;
- break;
- }
- }
- }
-
- if(!last)
- cgroup_root = cg->next;
- else
- last->next = cg->next;
-
- cgroup_free(cg);
-
- if(!last)
- cg = cgroup_root;
- else
- cg = last->next;
- }
- else {
- last = cg;
- cg = cg->next;
- }
- }
-}
-
-static inline void find_all_cgroups() {
- debug(D_CGROUP, "searching for cgroups");
-
- mark_all_cgroups_as_not_available();
- if(!cgroup_use_unified_cgroups) {
- if(cgroup_enable_cpuacct_stat || cgroup_enable_cpuacct_usage) {
- if(find_dir_in_subdirs(cgroup_cpuacct_base, NULL, found_subdir_in_dir) == -1) {
- cgroup_enable_cpuacct_stat =
- cgroup_enable_cpuacct_usage = CONFIG_BOOLEAN_NO;
- error("CGROUP: disabled cpu statistics.");
- }
- }
-
- if(cgroup_enable_blkio_io || cgroup_enable_blkio_ops || cgroup_enable_blkio_throttle_io || cgroup_enable_blkio_throttle_ops || cgroup_enable_blkio_merged_ops || cgroup_enable_blkio_queued_ops) {
- if(find_dir_in_subdirs(cgroup_blkio_base, NULL, found_subdir_in_dir) == -1) {
- cgroup_enable_blkio_io =
- cgroup_enable_blkio_ops =
- cgroup_enable_blkio_throttle_io =
- cgroup_enable_blkio_throttle_ops =
- cgroup_enable_blkio_merged_ops =
- cgroup_enable_blkio_queued_ops = CONFIG_BOOLEAN_NO;
- error("CGROUP: disabled blkio statistics.");
- }
- }
-
- if(cgroup_enable_memory || cgroup_enable_detailed_memory || cgroup_enable_swap || cgroup_enable_memory_failcnt) {
- if(find_dir_in_subdirs(cgroup_memory_base, NULL, found_subdir_in_dir) == -1) {
- cgroup_enable_memory =
- cgroup_enable_detailed_memory =
- cgroup_enable_swap =
- cgroup_enable_memory_failcnt = CONFIG_BOOLEAN_NO;
- error("CGROUP: disabled memory statistics.");
- }
- }
-
- if(cgroup_search_in_devices) {
- if(find_dir_in_subdirs(cgroup_devices_base, NULL, found_subdir_in_dir) == -1) {
- cgroup_search_in_devices = 0;
- error("CGROUP: disabled devices statistics.");
- }
- }
- }
- else {
- if (find_dir_in_subdirs(cgroup_unified_base, NULL, found_subdir_in_dir) == -1) {
- cgroup_unified_exist = CONFIG_BOOLEAN_NO;
- error("CGROUP: disabled unified cgroups statistics.");
- }
- }
-
- // remove any non-existing cgroups
- cleanup_all_cgroups();
-
+static inline void update_filenames()
+{
struct cgroup *cg;
struct stat buf;
- for(cg = cgroup_root; cg ; cg = cg->next) {
+ for(cg = discovered_cgroup_root; cg ; cg = cg->discovered_next) {
// fprintf(stderr, " >>> CGROUP '%s' (%u - %s) with name '%s'\n", cg->id, cg->hash, cg->available?"available":"stopped", cg->name);
if(unlikely(cg->pending_renames))
@@ -1748,12 +1934,175 @@ static inline void find_all_cgroups() {
else
debug(D_CGROUP, "memory.swap file for cgroup '%s': '%s' does not exist.", cg->id, filename);
}
+
+ if (unlikely(cgroup_enable_pressure_cpu && !cg->cpu_pressure.filename)) {
+ snprintfz(filename, FILENAME_MAX, "%s%s/cpu.pressure", cgroup_unified_base, cg->id);
+ if (likely(stat(filename, &buf) != -1)) {
+ cg->cpu_pressure.filename = strdupz(filename);
+ cg->cpu_pressure.some.enabled = cgroup_enable_pressure_cpu;
+ cg->cpu_pressure.full.enabled = CONFIG_BOOLEAN_NO;
+ debug(D_CGROUP, "cpu.pressure filename for cgroup '%s': '%s'", cg->id, cg->cpu_pressure.filename);
+ } else {
+ debug(D_CGROUP, "cpu.pressure file for cgroup '%s': '%s' does not exist", cg->id, filename);
+ }
+ }
+
+ if (unlikely((cgroup_enable_pressure_io_some || cgroup_enable_pressure_io_full) && !cg->io_pressure.filename)) {
+ snprintfz(filename, FILENAME_MAX, "%s%s/io.pressure", cgroup_unified_base, cg->id);
+ if (likely(stat(filename, &buf) != -1)) {
+ cg->io_pressure.filename = strdupz(filename);
+ cg->io_pressure.some.enabled = cgroup_enable_pressure_io_some;
+ cg->io_pressure.full.enabled = cgroup_enable_pressure_io_full;
+ debug(D_CGROUP, "io.pressure filename for cgroup '%s': '%s'", cg->id, cg->io_pressure.filename);
+ } else {
+ debug(D_CGROUP, "io.pressure file for cgroup '%s': '%s' does not exist", cg->id, filename);
+ }
+ }
+
+ if (unlikely((cgroup_enable_pressure_memory_some || cgroup_enable_pressure_memory_full) && !cg->memory_pressure.filename)) {
+ snprintfz(filename, FILENAME_MAX, "%s%s/memory.pressure", cgroup_unified_base, cg->id);
+ if (likely(stat(filename, &buf) != -1)) {
+ cg->memory_pressure.filename = strdupz(filename);
+ cg->memory_pressure.some.enabled = cgroup_enable_pressure_memory_some;
+ cg->memory_pressure.full.enabled = cgroup_enable_pressure_memory_full;
+ debug(D_CGROUP, "memory.pressure filename for cgroup '%s': '%s'", cg->id, cg->memory_pressure.filename);
+ } else {
+ debug(D_CGROUP, "memory.pressure file for cgroup '%s': '%s' does not exist", cg->id, filename);
+ }
+ }
}
}
+}
+
+static inline void cleanup_all_cgroups() {
+ struct cgroup *cg = discovered_cgroup_root, *last = NULL;
+
+ for(; cg ;) {
+ if(!cg->available) {
+ // enable the first duplicate cgroup
+ {
+ struct cgroup *t;
+ for(t = discovered_cgroup_root; t ; t = t->discovered_next) {
+ if(t != cg && t->available && !t->enabled && t->options & CGROUP_OPTIONS_DISABLED_DUPLICATE && t->hash_chart == cg->hash_chart && !strcmp(t->chart_id, cg->chart_id)) {
+ debug(D_CGROUP, "Enabling duplicate of cgroup '%s' with id '%s', because the original with id '%s' stopped.", t->chart_id, t->id, cg->id);
+ t->enabled = 1;
+ t->options &= ~CGROUP_OPTIONS_DISABLED_DUPLICATE;
+ break;
+ }
+ }
+ }
+
+ if(!last)
+ discovered_cgroup_root = cg->discovered_next;
+ else
+ last->discovered_next = cg->discovered_next;
+
+ cgroup_free(cg);
+
+ if(!last)
+ cg = discovered_cgroup_root;
+ else
+ cg = last->discovered_next;
+ }
+ else {
+ last = cg;
+ cg = cg->discovered_next;
+ }
+ }
+}
+
+static inline void copy_discovered_cgroups()
+{
+ debug(D_CGROUP, "copy discovered cgroups to the main group list");
+
+ struct cgroup *cg;
+
+ for(cg = discovered_cgroup_root; cg ; cg = cg->discovered_next) {
+ cg->next = cg->discovered_next;
+ }
+
+ cgroup_root = discovered_cgroup_root;
+}
+
+static inline void find_all_cgroups() {
+ debug(D_CGROUP, "searching for cgroups");
+
+ mark_all_cgroups_as_not_available();
+ if(!cgroup_use_unified_cgroups) {
+ if(cgroup_enable_cpuacct_stat || cgroup_enable_cpuacct_usage) {
+ if(find_dir_in_subdirs(cgroup_cpuacct_base, NULL, found_subdir_in_dir) == -1) {
+ cgroup_enable_cpuacct_stat =
+ cgroup_enable_cpuacct_usage = CONFIG_BOOLEAN_NO;
+ error("CGROUP: disabled cpu statistics.");
+ }
+ }
+
+ if(cgroup_enable_blkio_io || cgroup_enable_blkio_ops || cgroup_enable_blkio_throttle_io || cgroup_enable_blkio_throttle_ops || cgroup_enable_blkio_merged_ops || cgroup_enable_blkio_queued_ops) {
+ if(find_dir_in_subdirs(cgroup_blkio_base, NULL, found_subdir_in_dir) == -1) {
+ cgroup_enable_blkio_io =
+ cgroup_enable_blkio_ops =
+ cgroup_enable_blkio_throttle_io =
+ cgroup_enable_blkio_throttle_ops =
+ cgroup_enable_blkio_merged_ops =
+ cgroup_enable_blkio_queued_ops = CONFIG_BOOLEAN_NO;
+ error("CGROUP: disabled blkio statistics.");
+ }
+ }
+
+ if(cgroup_enable_memory || cgroup_enable_detailed_memory || cgroup_enable_swap || cgroup_enable_memory_failcnt) {
+ if(find_dir_in_subdirs(cgroup_memory_base, NULL, found_subdir_in_dir) == -1) {
+ cgroup_enable_memory =
+ cgroup_enable_detailed_memory =
+ cgroup_enable_swap =
+ cgroup_enable_memory_failcnt = CONFIG_BOOLEAN_NO;
+ error("CGROUP: disabled memory statistics.");
+ }
+ }
+
+ if(cgroup_search_in_devices) {
+ if(find_dir_in_subdirs(cgroup_devices_base, NULL, found_subdir_in_dir) == -1) {
+ cgroup_search_in_devices = 0;
+ error("CGROUP: disabled devices statistics.");
+ }
+ }
+ }
+ else {
+ if (find_dir_in_subdirs(cgroup_unified_base, NULL, found_subdir_in_dir) == -1) {
+ cgroup_unified_exist = CONFIG_BOOLEAN_NO;
+ error("CGROUP: disabled unified cgroups statistics.");
+ }
+ }
+
+ update_filenames();
+
+ uv_mutex_lock(&cgroup_root_mutex);
+ cleanup_all_cgroups();
+ copy_discovered_cgroups();
+ uv_mutex_unlock(&cgroup_root_mutex);
debug(D_CGROUP, "done searching for cgroups");
}
+void cgroup_discovery_worker(void *ptr)
+{
+ UNUSED(ptr);
+
+ while (!netdata_exit) {
+ uv_mutex_lock(&discovery_thread.mutex);
+ while (!discovery_thread.start_discovery)
+ uv_cond_wait(&discovery_thread.cond_var, &discovery_thread.mutex);
+ discovery_thread.start_discovery = 0;
+ uv_mutex_unlock(&discovery_thread.mutex);
+
+ if (unlikely(netdata_exit))
+ break;
+
+ find_all_cgroups();
+ }
+
+ discovery_thread.exited = 1;
+}
+
// ----------------------------------------------------------------------------
// generate charts
@@ -1807,7 +2156,7 @@ void update_systemd_services_charts(
if(likely(do_cpu)) {
if(unlikely(!st_cpu)) {
char title[CHART_TITLE_MAX + 1];
- snprintfz(title, CHART_TITLE_MAX, "Systemd Services CPU utilization (%d%% = %d core%s)", (processors * 100), processors, (processors > 1) ? "s" : "");
+ snprintfz(title, CHART_TITLE_MAX, "Systemd Services CPU utilization (100%% = 1 core)");
st_cpu = rrdset_create_localhost(
"services"
@@ -2335,7 +2684,7 @@ void update_systemd_services_charts(
// update the values
struct cgroup *cg;
for(cg = cgroup_root; cg ; cg = cg->next) {
- if(unlikely(!cg->available || !cg->enabled || cg->pending_renames || !(cg->options & CGROUP_OPTIONS_SYSTEM_SLICE_SERVICE)))
+ if(unlikely(!cg->enabled || cg->pending_renames || !(cg->options & CGROUP_OPTIONS_SYSTEM_SLICE_SERVICE)))
continue;
if(likely(do_cpu && cg->cpuacct_stat.updated)) {
@@ -2726,7 +3075,7 @@ void update_cgroup_charts(int update_every) {
struct cgroup *cg;
for(cg = cgroup_root; cg ; cg = cg->next) {
- if(unlikely(!cg->available || !cg->enabled || cg->pending_renames))
+ if(unlikely(!cg->enabled || cg->pending_renames))
continue;
if(likely(cgroup_enable_systemd_services && cg->options & CGROUP_OPTIONS_SYSTEM_SLICE_SERVICE)) {
@@ -2750,7 +3099,7 @@ void update_cgroup_charts(int update_every) {
if(likely(cg->cpuacct_stat.updated && cg->cpuacct_stat.enabled == CONFIG_BOOLEAN_YES)) {
if(unlikely(!cg->st_cpu)) {
- snprintfz(title, CHART_TITLE_MAX, "CPU Usage (%d%% = %d core%s) for cgroup %s", (processors * 100), processors, (processors > 1) ? "s" : "", cg->chart_title);
+ snprintfz(title, CHART_TITLE_MAX, "CPU Usage (100%% = 1 core)");
cg->st_cpu = rrdset_create_localhost(
cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
@@ -2766,6 +3115,9 @@ void update_cgroup_charts(int update_every) {
, update_every
, RRDSET_TYPE_STACKED
);
+
+ rrdset_update_labels(cg->st_cpu, cg->chart_labels);
+
if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) {
rrddim_add(cg->st_cpu, "user", NULL, 100, system_hz, RRD_ALGORITHM_INCREMENTAL);
rrddim_add(cg->st_cpu, "system", NULL, 100, system_hz, RRD_ALGORITHM_INCREMENTAL);
@@ -2820,7 +3172,7 @@ void update_cgroup_charts(int update_every) {
rrdsetvar_custom_chart_variable_set(cg->chart_var_cpu_limit, value);
if(unlikely(!cg->st_cpu_limit)) {
- snprintfz(title, CHART_TITLE_MAX, "CPU Usage within the limits for cgroup %s", cg->chart_title);
+ snprintfz(title, CHART_TITLE_MAX, "CPU Usage within the limits");
cg->st_cpu_limit = rrdset_create_localhost(
cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
@@ -2837,6 +3189,8 @@ void update_cgroup_charts(int update_every) {
, RRDSET_TYPE_LINE
);
+ rrdset_update_labels(cg->st_cpu_limit, cg->chart_labels);
+
if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED))
rrddim_add(cg->st_cpu_limit, "used", NULL, 1, system_hz, RRD_ALGORITHM_ABSOLUTE);
else
@@ -2873,7 +3227,7 @@ void update_cgroup_charts(int update_every) {
unsigned int i;
if(unlikely(!cg->st_cpu_per_core)) {
- snprintfz(title, CHART_TITLE_MAX, "CPU Usage (%d%% = %d core%s) Per Core for cgroup %s", (processors * 100), processors, (processors > 1) ? "s" : "", cg->chart_title);
+ snprintfz(title, CHART_TITLE_MAX, "CPU Usage (100%% = 1 core) Per Core");
cg->st_cpu_per_core = rrdset_create_localhost(
cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
@@ -2890,6 +3244,8 @@ void update_cgroup_charts(int update_every) {
, RRDSET_TYPE_STACKED
);
+ rrdset_update_labels(cg->st_cpu_per_core, cg->chart_labels);
+
for(i = 0; i < cg->cpuacct_usage.cpus; i++) {
snprintfz(id, RRD_ID_LENGTH_MAX, "cpu%u", i);
rrddim_add(cg->st_cpu_per_core, id, NULL, 100, 1000000000, RRD_ALGORITHM_INCREMENTAL);
@@ -2907,7 +3263,7 @@ void update_cgroup_charts(int update_every) {
if(likely(cg->memory.updated_detailed && cg->memory.enabled_detailed == CONFIG_BOOLEAN_YES)) {
if(unlikely(!cg->st_mem)) {
- snprintfz(title, CHART_TITLE_MAX, "Memory Usage for cgroup %s", cg->chart_title);
+ snprintfz(title, CHART_TITLE_MAX, "Memory Usage");
cg->st_mem = rrdset_create_localhost(
cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
@@ -2923,6 +3279,9 @@ void update_cgroup_charts(int update_every) {
, update_every
, RRDSET_TYPE_STACKED
);
+
+ rrdset_update_labels(cg->st_mem, cg->chart_labels);
+
if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) {
rrddim_add(cg->st_mem, "cache", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE);
rrddim_add(cg->st_mem, "rss", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE);
@@ -2964,7 +3323,7 @@ void update_cgroup_charts(int update_every) {
rrdset_done(cg->st_mem);
if(unlikely(!cg->st_writeback)) {
- snprintfz(title, CHART_TITLE_MAX, "Writeback Memory for cgroup %s", cg->chart_title);
+ snprintfz(title, CHART_TITLE_MAX, "Writeback Memory");
cg->st_writeback = rrdset_create_localhost(
cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
@@ -2981,6 +3340,8 @@ void update_cgroup_charts(int update_every) {
, RRDSET_TYPE_AREA
);
+ rrdset_update_labels(cg->st_writeback, cg->chart_labels);
+
if(cg->memory.detailed_has_dirty)
rrddim_add(cg->st_writeback, "dirty", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE);
@@ -2997,7 +3358,7 @@ void update_cgroup_charts(int update_every) {
if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) {
if(unlikely(!cg->st_mem_activity)) {
- snprintfz(title, CHART_TITLE_MAX, "Memory Activity for cgroup %s", cg->chart_title);
+ snprintfz(title, CHART_TITLE_MAX, "Memory Activity");
cg->st_mem_activity = rrdset_create_localhost(
cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
@@ -3014,6 +3375,8 @@ void update_cgroup_charts(int update_every) {
, RRDSET_TYPE_LINE
);
+ rrdset_update_labels(cg->st_mem_activity, cg->chart_labels);
+
rrddim_add(cg->st_mem_activity, "pgpgin", "in", system_page_size, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL);
rrddim_add(cg->st_mem_activity, "pgpgout", "out", -system_page_size, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL);
}
@@ -3026,7 +3389,7 @@ void update_cgroup_charts(int update_every) {
}
if(unlikely(!cg->st_pgfaults)) {
- snprintfz(title, CHART_TITLE_MAX, "Memory Page Faults for cgroup %s", cg->chart_title);
+ snprintfz(title, CHART_TITLE_MAX, "Memory Page Faults");
cg->st_pgfaults = rrdset_create_localhost(
cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
@@ -3043,6 +3406,8 @@ void update_cgroup_charts(int update_every) {
, RRDSET_TYPE_LINE
);
+ rrdset_update_labels(cg->st_pgfaults, cg->chart_labels);
+
rrddim_add(cg->st_pgfaults, "pgfault", NULL, system_page_size, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL);
rrddim_add(cg->st_pgfaults, "pgmajfault", "swap", -system_page_size, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL);
}
@@ -3056,7 +3421,7 @@ void update_cgroup_charts(int update_every) {
if(likely(cg->memory.updated_usage_in_bytes && cg->memory.enabled_usage_in_bytes == CONFIG_BOOLEAN_YES)) {
if(unlikely(!cg->st_mem_usage)) {
- snprintfz(title, CHART_TITLE_MAX, "Used Memory %sfor cgroup %s", (cgroup_used_memory_without_cache && cg->memory.updated_detailed)?"without Cache ":"", cg->chart_title);
+ snprintfz(title, CHART_TITLE_MAX, "Used Memory %s", (cgroup_used_memory_without_cache && cg->memory.updated_detailed)?"without Cache ":"");
cg->st_mem_usage = rrdset_create_localhost(
cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
@@ -3073,6 +3438,8 @@ void update_cgroup_charts(int update_every) {
, RRDSET_TYPE_STACKED
);
+ rrdset_update_labels(cg->st_mem_usage, cg->chart_labels);
+
rrddim_add(cg->st_mem_usage, "ram", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE);
rrddim_add(cg->st_mem_usage, "swap", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE);
}
@@ -3117,7 +3484,7 @@ void update_cgroup_charts(int update_every) {
memory_limit = cg->memory_limit;
if(unlikely(!cg->st_mem_usage_limit)) {
- snprintfz(title, CHART_TITLE_MAX, "Used RAM without Cache within the limits for cgroup %s", cg->chart_title);
+ snprintfz(title, CHART_TITLE_MAX, "Used RAM without Cache within the limits");
cg->st_mem_usage_limit = rrdset_create_localhost(
cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
@@ -3134,6 +3501,8 @@ void update_cgroup_charts(int update_every) {
, RRDSET_TYPE_STACKED
);
+ rrdset_update_labels(cg->st_mem_usage_limit, cg->chart_labels);
+
rrddim_add(cg->st_mem_usage_limit, "available", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE);
rrddim_add(cg->st_mem_usage_limit, "used", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE);
}
@@ -3159,7 +3528,7 @@ void update_cgroup_charts(int update_every) {
if(likely(cg->memory.updated_failcnt && cg->memory.enabled_failcnt == CONFIG_BOOLEAN_YES)) {
if(unlikely(!cg->st_mem_failcnt)) {
- snprintfz(title, CHART_TITLE_MAX, "Memory Limit Failures for cgroup %s", cg->chart_title);
+ snprintfz(title, CHART_TITLE_MAX, "Memory Limit Failures");
cg->st_mem_failcnt = rrdset_create_localhost(
cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
@@ -3175,6 +3544,8 @@ void update_cgroup_charts(int update_every) {
, update_every
, RRDSET_TYPE_LINE
);
+
+ rrdset_update_labels(cg->st_mem_failcnt, cg->chart_labels);
rrddim_add(cg->st_mem_failcnt, "failures", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
}
@@ -3187,7 +3558,7 @@ void update_cgroup_charts(int update_every) {
if(likely(cg->io_service_bytes.updated && cg->io_service_bytes.enabled == CONFIG_BOOLEAN_YES)) {
if(unlikely(!cg->st_io)) {
- snprintfz(title, CHART_TITLE_MAX, "I/O Bandwidth (all disks) for cgroup %s", cg->chart_title);
+ snprintfz(title, CHART_TITLE_MAX, "I/O Bandwidth (all disks)");
cg->st_io = rrdset_create_localhost(
cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
@@ -3204,6 +3575,8 @@ void update_cgroup_charts(int update_every) {
, RRDSET_TYPE_AREA
);
+ rrdset_update_labels(cg->st_io, cg->chart_labels);
+
rrddim_add(cg->st_io, "read", NULL, 1, 1024, RRD_ALGORITHM_INCREMENTAL);
rrddim_add(cg->st_io, "write", NULL, -1, 1024, RRD_ALGORITHM_INCREMENTAL);
}
@@ -3217,7 +3590,7 @@ void update_cgroup_charts(int update_every) {
if(likely(cg->io_serviced.updated && cg->io_serviced.enabled == CONFIG_BOOLEAN_YES)) {
if(unlikely(!cg->st_serviced_ops)) {
- snprintfz(title, CHART_TITLE_MAX, "Serviced I/O Operations (all disks) for cgroup %s", cg->chart_title);
+ snprintfz(title, CHART_TITLE_MAX, "Serviced I/O Operations (all disks)");
cg->st_serviced_ops = rrdset_create_localhost(
cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
@@ -3234,6 +3607,8 @@ void update_cgroup_charts(int update_every) {
, RRDSET_TYPE_LINE
);
+ rrdset_update_labels(cg->st_serviced_ops, cg->chart_labels);
+
rrddim_add(cg->st_serviced_ops, "read", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
rrddim_add(cg->st_serviced_ops, "write", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL);
}
@@ -3247,7 +3622,7 @@ void update_cgroup_charts(int update_every) {
if(likely(cg->throttle_io_service_bytes.updated && cg->throttle_io_service_bytes.enabled == CONFIG_BOOLEAN_YES)) {
if(unlikely(!cg->st_throttle_io)) {
- snprintfz(title, CHART_TITLE_MAX, "Throttle I/O Bandwidth (all disks) for cgroup %s", cg->chart_title);
+ snprintfz(title, CHART_TITLE_MAX, "Throttle I/O Bandwidth (all disks)");
cg->st_throttle_io = rrdset_create_localhost(
cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
@@ -3263,6 +3638,8 @@ void update_cgroup_charts(int update_every) {
, update_every
, RRDSET_TYPE_AREA
);
+
+ rrdset_update_labels(cg->st_throttle_io, cg->chart_labels);
rrddim_add(cg->st_throttle_io, "read", NULL, 1, 1024, RRD_ALGORITHM_INCREMENTAL);
rrddim_add(cg->st_throttle_io, "write", NULL, -1, 1024, RRD_ALGORITHM_INCREMENTAL);
@@ -3277,7 +3654,7 @@ void update_cgroup_charts(int update_every) {
if(likely(cg->throttle_io_serviced.updated && cg->throttle_io_serviced.enabled == CONFIG_BOOLEAN_YES)) {
if(unlikely(!cg->st_throttle_serviced_ops)) {
- snprintfz(title, CHART_TITLE_MAX, "Throttle Serviced I/O Operations (all disks) for cgroup %s", cg->chart_title);
+ snprintfz(title, CHART_TITLE_MAX, "Throttle Serviced I/O Operations (all disks)");
cg->st_throttle_serviced_ops = rrdset_create_localhost(
cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
@@ -3293,6 +3670,8 @@ void update_cgroup_charts(int update_every) {
, update_every
, RRDSET_TYPE_LINE
);
+
+ rrdset_update_labels(cg->st_throttle_serviced_ops, cg->chart_labels);
rrddim_add(cg->st_throttle_serviced_ops, "read", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
rrddim_add(cg->st_throttle_serviced_ops, "write", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL);
@@ -3307,7 +3686,7 @@ void update_cgroup_charts(int update_every) {
if(likely(cg->io_queued.updated && cg->io_queued.enabled == CONFIG_BOOLEAN_YES)) {
if(unlikely(!cg->st_queued_ops)) {
- snprintfz(title, CHART_TITLE_MAX, "Queued I/O Operations (all disks) for cgroup %s", cg->chart_title);
+ snprintfz(title, CHART_TITLE_MAX, "Queued I/O Operations (all disks)");
cg->st_queued_ops = rrdset_create_localhost(
cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
@@ -3323,6 +3702,8 @@ void update_cgroup_charts(int update_every) {
, update_every
, RRDSET_TYPE_LINE
);
+
+ rrdset_update_labels(cg->st_queued_ops, cg->chart_labels);
rrddim_add(cg->st_queued_ops, "read", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
rrddim_add(cg->st_queued_ops, "write", NULL, -1, 1, RRD_ALGORITHM_ABSOLUTE);
@@ -3337,7 +3718,7 @@ void update_cgroup_charts(int update_every) {
if(likely(cg->io_merged.updated && cg->io_merged.enabled == CONFIG_BOOLEAN_YES)) {
if(unlikely(!cg->st_merged_ops)) {
- snprintfz(title, CHART_TITLE_MAX, "Merged I/O Operations (all disks) for cgroup %s", cg->chart_title);
+ snprintfz(title, CHART_TITLE_MAX, "Merged I/O Operations (all disks)");
cg->st_merged_ops = rrdset_create_localhost(
cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
@@ -3353,6 +3734,8 @@ void update_cgroup_charts(int update_every) {
, update_every
, RRDSET_TYPE_LINE
);
+
+ rrdset_update_labels(cg->st_merged_ops, cg->chart_labels);
rrddim_add(cg->st_merged_ops, "read", NULL, 1, 1024, RRD_ALGORITHM_INCREMENTAL);
rrddim_add(cg->st_merged_ops, "write", NULL, -1, 1024, RRD_ALGORITHM_INCREMENTAL);
@@ -3364,6 +3747,171 @@ void update_cgroup_charts(int update_every) {
rrddim_set(cg->st_merged_ops, "write", cg->io_merged.Write);
rrdset_done(cg->st_merged_ops);
}
+
+ if (cg->options & CGROUP_OPTIONS_IS_UNIFIED) {
+ struct pressure *res = &cg->cpu_pressure;
+ if (likely(res->updated && res->some.enabled)) {
+ if (unlikely(!res->some.st)) {
+ RRDSET *chart;
+ snprintfz(title, CHART_TITLE_MAX, "CPU pressure");
+
+ chart = res->some.st = rrdset_create_localhost(
+ cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
+ , "cpu_pressure"
+ , NULL
+ , "cpu"
+ , "cgroup.cpu_pressure"
+ , title
+ , "percentage"
+ , PLUGIN_CGROUPS_NAME
+ , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME
+ , cgroup_containers_chart_priority + 2200
+ , update_every
+ , RRDSET_TYPE_LINE
+ );
+
+ rrdset_update_labels(chart = res->some.st, cg->chart_labels);
+
+ res->some.rd10 = rrddim_add(chart, "some 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
+ res->some.rd60 = rrddim_add(chart, "some 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
+ res->some.rd300 = rrddim_add(chart, "some 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
+ } else {
+ rrdset_next(res->some.st);
+ }
+
+ update_pressure_chart(&res->some);
+ }
+
+ res = &cg->memory_pressure;
+ if (likely(res->updated && res->some.enabled)) {
+ if (unlikely(!res->some.st)) {
+ RRDSET *chart;
+ snprintfz(title, CHART_TITLE_MAX, "Memory pressure");
+
+ chart = res->some.st = rrdset_create_localhost(
+ cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
+ , "mem_pressure"
+ , NULL
+ , "mem"
+ , "cgroup.memory_pressure"
+ , title
+ , "percentage"
+ , PLUGIN_CGROUPS_NAME
+ , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME
+ , cgroup_containers_chart_priority + 2300
+ , update_every
+ , RRDSET_TYPE_LINE
+ );
+
+ rrdset_update_labels(chart = res->some.st, cg->chart_labels);
+
+ res->some.rd10 = rrddim_add(chart, "some 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
+ res->some.rd60 = rrddim_add(chart, "some 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
+ res->some.rd300 = rrddim_add(chart, "some 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
+ } else {
+ rrdset_next(res->some.st);
+ }
+
+ update_pressure_chart(&res->some);
+ }
+
+ if (likely(res->updated && res->full.enabled)) {
+ if (unlikely(!res->full.st)) {
+ RRDSET *chart;
+ snprintfz(title, CHART_TITLE_MAX, "Memory full pressure");
+
+ chart = res->full.st = rrdset_create_localhost(
+ cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
+ , "mem_full_pressure"
+ , NULL
+ , "mem"
+ , "cgroup.memory_full_pressure"
+ , title
+ , "percentage"
+ , PLUGIN_CGROUPS_NAME
+ , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME
+ , cgroup_containers_chart_priority + 2350
+ , update_every
+ , RRDSET_TYPE_LINE
+ );
+
+ rrdset_update_labels(chart = res->full.st, cg->chart_labels);
+
+ res->full.rd10 = rrddim_add(chart, "full 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
+ res->full.rd60 = rrddim_add(chart, "full 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
+ res->full.rd300 = rrddim_add(chart, "full 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
+ } else {
+ rrdset_next(res->full.st);
+ }
+
+ update_pressure_chart(&res->full);
+ }
+
+ res = &cg->io_pressure;
+ if (likely(res->updated && res->some.enabled)) {
+ if (unlikely(!res->some.st)) {
+ RRDSET *chart;
+ snprintfz(title, CHART_TITLE_MAX, "I/O pressure");
+
+ chart = res->some.st = rrdset_create_localhost(
+ cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
+ , "io_pressure"
+ , NULL
+ , "disk"
+ , "cgroup.io_pressure"
+ , title
+ , "percentage"
+ , PLUGIN_CGROUPS_NAME
+ , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME
+ , cgroup_containers_chart_priority + 2400
+ , update_every
+ , RRDSET_TYPE_LINE
+ );
+
+ rrdset_update_labels(chart = res->some.st, cg->chart_labels);
+
+ res->some.rd10 = rrddim_add(chart, "some 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
+ res->some.rd60 = rrddim_add(chart, "some 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
+ res->some.rd300 = rrddim_add(chart, "some 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
+ } else {
+ rrdset_next(res->some.st);
+ }
+
+ update_pressure_chart(&res->some);
+ }
+
+ if (likely(res->updated && res->full.enabled)) {
+ if (unlikely(!res->full.st)) {
+ RRDSET *chart;
+ snprintfz(title, CHART_TITLE_MAX, "I/O full pressure");
+
+ chart = res->full.st = rrdset_create_localhost(
+ cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
+ , "io_full_pressure"
+ , NULL
+ , "disk"
+ , "cgroup.io_full_pressure"
+ , title
+ , "percentage"
+ , PLUGIN_CGROUPS_NAME
+ , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME
+ , cgroup_containers_chart_priority + 2450
+ , update_every
+ , RRDSET_TYPE_LINE
+ );
+
+ rrdset_update_labels(chart = res->full.st, cg->chart_labels);
+
+ res->full.rd10 = rrddim_add(chart, "full 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
+ res->full.rd60 = rrddim_add(chart, "full 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
+ res->full.rd300 = rrddim_add(chart, "full 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
+ } else {
+ rrdset_next(res->full.st);
+ }
+
+ update_pressure_chart(&res->full);
+ }
+ }
}
if(likely(cgroup_enable_systemd_services))
@@ -3385,6 +3933,21 @@ static void cgroup_main_cleanup(void *ptr) {
info("cleaning up...");
+ usec_t max = 2 * USEC_PER_SEC, step = 50000;
+
+ if (!discovery_thread.exited) {
+ info("stopping discovery thread worker");
+ uv_mutex_unlock(&discovery_thread.mutex);
+ discovery_thread.start_discovery = 1;
+ uv_cond_signal(&discovery_thread.cond_var);
+ }
+
+ while (!discovery_thread.exited && max > 0) {
+ max -= step;
+ info("waiting for discovery thread to finish...");
+ sleep_usec(step);
+ }
+
static_thread->enabled = NETDATA_MAIN_THREAD_EXITED;
}
@@ -3400,6 +3963,31 @@ void *cgroups_main(void *ptr) {
RRDSET *stcpu_thread = NULL;
+ if (uv_mutex_init(&cgroup_root_mutex)) {
+ error("CGROUP: cannot initialize mutex for the main cgroup list");
+ goto exit;
+ }
+
+ // dispatch a discovery worker thread
+ discovery_thread.start_discovery = 0;
+ discovery_thread.exited = 0;
+
+ if (uv_mutex_init(&discovery_thread.mutex)) {
+ error("CGROUP: cannot initialize mutex for discovery thread");
+ goto exit;
+ }
+ if (uv_cond_init(&discovery_thread.cond_var)) {
+ error("CGROUP: cannot initialize conditional variable for discovery thread");
+ goto exit;
+ }
+
+ int error = uv_thread_create(&discovery_thread.thread, cgroup_discovery_worker, NULL);
+ if (error) {
+ error("CGROUP: cannot create tread worker. uv_thread_create(): %s", uv_strerror(error));
+ goto exit;
+ }
+ uv_thread_set_name_np(discovery_thread.thread, "PLUGIN[cgroups]");
+
heartbeat_t hb;
heartbeat_init(&hb);
usec_t step = cgroup_update_every * USEC_PER_SEC;
@@ -3409,19 +3997,18 @@ void *cgroups_main(void *ptr) {
usec_t hb_dt = heartbeat_next(&hb, step);
if(unlikely(netdata_exit)) break;
- // BEGIN -- the job to be done
-
find_dt += hb_dt;
if(unlikely(find_dt >= find_every || cgroups_check)) {
- find_all_cgroups();
+ uv_cond_signal(&discovery_thread.cond_var);
+ discovery_thread.start_discovery = 1;
find_dt = 0;
cgroups_check = 0;
}
+ uv_mutex_lock(&cgroup_root_mutex);
read_all_cgroups(cgroup_root);
update_cgroup_charts(cgroup_update_every);
-
- // END -- the job is done
+ uv_mutex_unlock(&cgroup_root_mutex);
// --------------------------------------------------------------------
@@ -3457,6 +4044,7 @@ void *cgroups_main(void *ptr) {
}
}
+exit:
netdata_thread_cleanup_pop(1);
return NULL;
}