diff options
Diffstat (limited to 'collectors/cgroups.plugin/sys_fs_cgroup.c')
-rw-r--r-- | collectors/cgroups.plugin/sys_fs_cgroup.c | 874 |
1 files changed, 731 insertions, 143 deletions
diff --git a/collectors/cgroups.plugin/sys_fs_cgroup.c b/collectors/cgroups.plugin/sys_fs_cgroup.c index d9d130f7e..705c51735 100644 --- a/collectors/cgroups.plugin/sys_fs_cgroup.c +++ b/collectors/cgroups.plugin/sys_fs_cgroup.c @@ -23,6 +23,11 @@ static int cgroup_enable_blkio_throttle_io = CONFIG_BOOLEAN_AUTO; static int cgroup_enable_blkio_throttle_ops = CONFIG_BOOLEAN_AUTO; static int cgroup_enable_blkio_merged_ops = CONFIG_BOOLEAN_AUTO; static int cgroup_enable_blkio_queued_ops = CONFIG_BOOLEAN_AUTO; +static int cgroup_enable_pressure_cpu = CONFIG_BOOLEAN_AUTO; +static int cgroup_enable_pressure_io_some = CONFIG_BOOLEAN_AUTO; +static int cgroup_enable_pressure_io_full = CONFIG_BOOLEAN_AUTO; +static int cgroup_enable_pressure_memory_some = CONFIG_BOOLEAN_AUTO; +static int cgroup_enable_pressure_memory_full = CONFIG_BOOLEAN_AUTO; static int cgroup_enable_systemd_services = CONFIG_BOOLEAN_YES; static int cgroup_enable_systemd_services_detailed_memory = CONFIG_BOOLEAN_NO; @@ -68,6 +73,130 @@ static uint32_t Write_hash = 0; static uint32_t user_hash = 0; static uint32_t system_hash = 0; +enum cgroups_type { CGROUPS_AUTODETECT_FAIL, CGROUPS_V1, CGROUPS_V2 }; + +enum cgroups_systemd_setting { + SYSTEMD_CGROUP_ERR, + SYSTEMD_CGROUP_LEGACY, + SYSTEMD_CGROUP_HYBRID, + SYSTEMD_CGROUP_UNIFIED +}; + +struct cgroups_systemd_config_setting { + char *name; + enum cgroups_systemd_setting setting; +}; + +static struct cgroups_systemd_config_setting cgroups_systemd_options[] = { + { .name = "legacy", .setting = SYSTEMD_CGROUP_LEGACY }, + { .name = "hybrid", .setting = SYSTEMD_CGROUP_HYBRID }, + { .name = "unified", .setting = SYSTEMD_CGROUP_UNIFIED }, + { .name = NULL, .setting = SYSTEMD_CGROUP_ERR }, +}; + +/* on Fed systemd is not in PATH for some reason */ +#define SYSTEMD_CMD_RHEL "/usr/lib/systemd/systemd --version" +#define SYSTEMD_HIERARCHY_STRING "default-hierarchy=" + +#define MAXSIZE_PROC_CMDLINE 4096 +static enum cgroups_systemd_setting cgroups_detect_systemd(const char *exec) +{ + pid_t command_pid; + enum cgroups_systemd_setting retval = SYSTEMD_CGROUP_ERR; + char buf[MAXSIZE_PROC_CMDLINE]; + char *begin, *end; + + FILE *f = mypopen(exec, &command_pid); + + if (!f) + return retval; + + while (fgets(buf, MAXSIZE_PROC_CMDLINE, f) != NULL) { + if ((begin = strstr(buf, SYSTEMD_HIERARCHY_STRING))) { + end = begin = begin + strlen(SYSTEMD_HIERARCHY_STRING); + if (!*begin) + break; + while (isalpha(*end)) + end++; + *end = 0; + for (int i = 0; cgroups_systemd_options[i].name; i++) { + if (!strcmp(begin, cgroups_systemd_options[i].name)) { + retval = cgroups_systemd_options[i].setting; + break; + } + } + break; + } + } + + if (mypclose(f, command_pid)) + return SYSTEMD_CGROUP_ERR; + + return retval; +} + +static enum cgroups_type cgroups_try_detect_version() +{ + pid_t command_pid; + char buf[MAXSIZE_PROC_CMDLINE]; + enum cgroups_systemd_setting systemd_setting; + int cgroups2_available = 0; + + // 1. check if cgroups2 availible on system at all + FILE *f = mypopen("grep cgroup /proc/filesystems", &command_pid); + if (!f) { + error("popen failed"); + return CGROUPS_AUTODETECT_FAIL; + } + while (fgets(buf, MAXSIZE_PROC_CMDLINE, f) != NULL) { + if (strstr(buf, "cgroup2")) { + cgroups2_available = 1; + break; + } + } + if(mypclose(f, command_pid)) + return CGROUPS_AUTODETECT_FAIL; + + if(!cgroups2_available) + return CGROUPS_V1; + + // 2. check systemd compiletime setting + if ((systemd_setting = cgroups_detect_systemd("systemd --version")) == SYSTEMD_CGROUP_ERR) + systemd_setting = cgroups_detect_systemd(SYSTEMD_CMD_RHEL); + + if(systemd_setting == SYSTEMD_CGROUP_ERR) + return CGROUPS_AUTODETECT_FAIL; + + if(systemd_setting == SYSTEMD_CGROUP_LEGACY || systemd_setting == SYSTEMD_CGROUP_HYBRID) { + // curently we prefer V1 if HYBRID is set as it seems to be more feature complete + // in the future we might want to continue here if SYSTEMD_CGROUP_HYBRID + // and go ahead with V2 + return CGROUPS_V1; + } + + // 3. if we are unified as on Fedora (default cgroups2 only mode) + // check kernel command line flag that can override that setting + f = fopen("/proc/cmdline", "r"); + if (!f) { + error("Error reading kernel boot commandline parameters"); + return CGROUPS_AUTODETECT_FAIL; + } + + if (!fgets(buf, MAXSIZE_PROC_CMDLINE, f)) { + error("couldn't read all cmdline params into buffer"); + fclose(f); + return CGROUPS_AUTODETECT_FAIL; + } + + fclose(f); + + if (strstr(buf, "systemd.unified_cgroup_hierarchy=0")) { + info("cgroups v2 (unified cgroups) is available but are disabled on this system."); + return CGROUPS_V1; + } + return CGROUPS_V2; +} + void read_cgroup_plugin_configuration() { system_page_size = sysconf(_SC_PAGESIZE); @@ -84,7 +213,11 @@ void read_cgroup_plugin_configuration() { if(cgroup_check_for_new_every < cgroup_update_every) cgroup_check_for_new_every = cgroup_update_every; - cgroup_use_unified_cgroups = config_get_boolean_ondemand("plugin:cgroups", "use unified cgroups", cgroup_use_unified_cgroups); + cgroup_use_unified_cgroups = config_get_boolean_ondemand("plugin:cgroups", "use unified cgroups", CONFIG_BOOLEAN_AUTO); + if(cgroup_use_unified_cgroups == CONFIG_BOOLEAN_AUTO) + cgroup_use_unified_cgroups = (cgroups_try_detect_version() == CGROUPS_V2); + + info("use unified cgroups %s", cgroup_use_unified_cgroups ? "true" : "false"); cgroup_containers_chart_priority = (int)config_get_number("plugin:cgroups", "containers priority", cgroup_containers_chart_priority); if(cgroup_containers_chart_priority < 1) @@ -105,6 +238,12 @@ void read_cgroup_plugin_configuration() { cgroup_enable_blkio_queued_ops = config_get_boolean_ondemand("plugin:cgroups", "enable blkio queued operations", cgroup_enable_blkio_queued_ops); cgroup_enable_blkio_merged_ops = config_get_boolean_ondemand("plugin:cgroups", "enable blkio merged operations", cgroup_enable_blkio_merged_ops); + cgroup_enable_pressure_cpu = config_get_boolean_ondemand("plugin:cgroups", "enable cpu pressure", cgroup_enable_pressure_cpu); + cgroup_enable_pressure_io_some = config_get_boolean_ondemand("plugin:cgroups", "enable io some pressure", cgroup_enable_pressure_io_some); + cgroup_enable_pressure_io_full = config_get_boolean_ondemand("plugin:cgroups", "enable io full pressure", cgroup_enable_pressure_io_full); + cgroup_enable_pressure_memory_some = config_get_boolean_ondemand("plugin:cgroups", "enable memory some pressure", cgroup_enable_pressure_memory_some); + cgroup_enable_pressure_memory_full = config_get_boolean_ondemand("plugin:cgroups", "enable memory full pressure", cgroup_enable_pressure_memory_full); + cgroup_recheck_zero_blkio_every_iterations = (int)config_get_number("plugin:cgroups", "recheck zero blkio every iterations", cgroup_recheck_zero_blkio_every_iterations); cgroup_recheck_zero_mem_failcnt_every_iterations = (int)config_get_number("plugin:cgroups", "recheck zero memory failcnt every iterations", cgroup_recheck_zero_mem_failcnt_every_iterations); cgroup_recheck_zero_mem_detailed_every_iterations = (int)config_get_number("plugin:cgroups", "recheck zero detailed memory every iterations", cgroup_recheck_zero_mem_detailed_every_iterations); @@ -116,6 +255,13 @@ void read_cgroup_plugin_configuration() { char filename[FILENAME_MAX + 1], *s; struct mountinfo *mi, *root = mountinfo_read(0); if(!cgroup_use_unified_cgroups) { + // cgroup v1 does not have pressure metrics + cgroup_enable_pressure_cpu = + cgroup_enable_pressure_io_some = + cgroup_enable_pressure_io_full = + cgroup_enable_pressure_memory_some = + cgroup_enable_pressure_memory_full = CONFIG_BOOLEAN_NO; + mi = mountinfo_find_by_filesystem_super_option(root, "cgroup", "cpuacct"); if(!mi) mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "cpuacct"); if(!mi) { @@ -216,9 +362,12 @@ void read_cgroup_plugin_configuration() { // ---------------------------------------------------------------- " /machine.slice/*.service " // #3367 systemd-nspawn + " /kubepods/pod*/* " // k8s containers + " /kubepods/*/pod*/* " // k8s containers // ---------------------------------------------------------------- + " !/kubepods* " // all other k8s cgroups " !*/vcpu* " // libvirtd adds these sub-cgroups " !*/emulator " // libvirtd adds these sub-cgroups " !*.mount " @@ -233,6 +382,9 @@ void read_cgroup_plugin_configuration() { " !/libvirt " " !/lxc " " !/lxc/*/* " // #1397 #2649 + " !/lxc.monitor* " + " !/lxc.pivot " + " !/lxc.payload " " !/machine " " !/qemu " " !/system " @@ -252,6 +404,9 @@ void read_cgroup_plugin_configuration() { " !/user " " !/user.slice " " !/lxc/*/* " // #2161 #2649 + " !/lxc.monitor " + " !/lxc.payload/*/* " + " !/lxc.payload.* " " * " ), NULL, SIMPLE_PATTERN_EXACT); @@ -429,6 +584,7 @@ struct cgroup_network_interface { #define CGROUP_OPTIONS_SYSTEM_SLICE_SERVICE 0x00000002 #define CGROUP_OPTIONS_IS_UNIFIED 0x00000004 +// *** WARNING *** The fields are not thread safe. Take care of safe usage. struct cgroup { uint32_t options; @@ -445,6 +601,8 @@ struct cgroup { char *chart_title; + struct label *chart_labels; + struct cpuacct_stat cpuacct_stat; struct cpuacct_usage cpuacct_usage; @@ -461,6 +619,10 @@ struct cgroup { struct cgroup_network_interface *interfaces; + struct pressure cpu_pressure; + struct pressure io_pressure; + struct pressure memory_pressure; + // per cgroup charts RRDSET *st_cpu; RRDSET *st_cpu_limit; @@ -530,9 +692,22 @@ struct cgroup { RRDDIM *rd_io_merged_write; struct cgroup *next; + struct cgroup *discovered_next; } *cgroup_root = NULL; +uv_mutex_t cgroup_root_mutex; + +struct cgroup *discovered_cgroup_root = NULL; + +struct discovery_thread { + uv_thread_t thread; + uv_mutex_t mutex; + uv_cond_t cond_var; + int start_discovery; + int exited; +} discovery_thread; + // ---------------------------------------------------------------------------- // read values from /sys @@ -798,6 +973,54 @@ static inline void cgroup2_read_blkio(struct blkio *io, unsigned int word_offset } } +static inline void cgroup2_read_pressure(struct pressure *res) { + static procfile *ff = NULL; + + if (likely(res->filename)) { + ff = procfile_reopen(ff, res->filename, " =", PROCFILE_FLAG_DEFAULT); + if (unlikely(!ff)) { + res->updated = 0; + cgroups_check = 1; + return; + } + + ff = procfile_readall(ff); + if (unlikely(!ff)) { + res->updated = 0; + cgroups_check = 1; + return; + } + + size_t lines = procfile_lines(ff); + if (lines < 1) { + error("CGROUP: file '%s' should have 1+ lines.", res->filename); + res->updated = 0; + return; + } + + res->some.value10 = strtod(procfile_lineword(ff, 0, 2), NULL); + res->some.value60 = strtod(procfile_lineword(ff, 0, 4), NULL); + res->some.value300 = strtod(procfile_lineword(ff, 0, 6), NULL); + + if (lines > 2) { + res->full.value10 = strtod(procfile_lineword(ff, 1, 2), NULL); + res->full.value60 = strtod(procfile_lineword(ff, 1, 4), NULL); + res->full.value300 = strtod(procfile_lineword(ff, 1, 6), NULL); + } + + res->updated = 1; + + if (unlikely(res->some.enabled == CONFIG_BOOLEAN_AUTO)) { + res->some.enabled = CONFIG_BOOLEAN_YES; + if (lines > 2) { + res->full.enabled = CONFIG_BOOLEAN_YES; + } else { + res->full.enabled = CONFIG_BOOLEAN_NO; + } + } + } +} + static inline void cgroup_read_memory(struct memory *mem, char parent_cg_is_unified) { static procfile *ff = NULL; @@ -946,6 +1169,9 @@ static inline void cgroup_read(struct cgroup *cg) { cgroup2_read_blkio(&cg->io_service_bytes, 0); cgroup2_read_blkio(&cg->io_serviced, 4); cgroup2_read_cpuacct_stat(&cg->cpuacct_stat); + cgroup2_read_pressure(&cg->cpu_pressure); + cgroup2_read_pressure(&cg->io_pressure); + cgroup2_read_pressure(&cg->memory_pressure); cgroup_read_memory(&cg->memory, 1); } } @@ -956,7 +1182,7 @@ static inline void read_all_cgroups(struct cgroup *root) { struct cgroup *cg; for(cg = root; cg ; cg = cg->next) - if(cg->enabled && cg->available && !cg->pending_renames) + if(cg->enabled && !cg->pending_renames) cgroup_read(cg); } @@ -1016,7 +1242,7 @@ static inline void read_cgroup_network_interfaces(struct cgroup *cg) { info("CGROUP: cgroup '%s' has network interface '%s' as '%s'", cg->id, i->host_device, i->container_device); // register a device rename to proc_net_dev.c - netdev_rename_device_add(i->host_device, i->container_device, cg->chart_id); + netdev_rename_device_add(i->host_device, i->container_device, cg->chart_id, cg->chart_labels); } } @@ -1065,6 +1291,35 @@ static inline char *cgroup_chart_id_strdupz(const char *s) { return r; } +char *parse_k8s_data(struct label **labels, char *data) +{ + char *name = mystrsep(&data, " "); + + if (!data) { + return name; + } + + while (data) { + char *key = mystrsep(&data, "="); + + char *value; + if (data && *data == ',') { + value = ""; + *data++ = '\0'; + } else { + value = mystrsep(&data, ","); + } + value = strip_double_quotes(value, 1); + + if (!key || *key == '\0' || !value || *value == '\0') + continue; + + *labels = add_label_to_list(*labels, key, value, LABEL_SOURCE_KUBERNETES); + } + + return name; +} + static inline void cgroup_get_chart_name(struct cgroup *cg) { debug(D_CGROUP, "looking for the name of cgroup '%s' with chart id '%s' and title '%s'", cg->id, cg->chart_id, cg->chart_title); @@ -1095,12 +1350,19 @@ static inline void cgroup_get_chart_name(struct cgroup *cg) { cg->enabled = 0; } - if(likely(cg->pending_renames < 2)) { + if (likely(cg->pending_renames < 2)) { + char *name = s; + + if (!strncmp(s, "k8s_", 4)) { + free_label_list(cg->chart_labels); + name = parse_k8s_data(&cg->chart_labels, s); + } + freez(cg->chart_title); - cg->chart_title = cgroup_title_strdupz(s); + cg->chart_title = cgroup_title_strdupz(name); freez(cg->chart_id); - cg->chart_id = cgroup_chart_id_strdupz(s); + cg->chart_id = cgroup_chart_id_strdupz(name); cg->hash_chart = simple_hash(cg->chart_id); } } @@ -1132,13 +1394,13 @@ static inline struct cgroup *cgroup_add(const char *id) { if(cgroup_use_unified_cgroups) cg->options |= CGROUP_OPTIONS_IS_UNIFIED; - if(!cgroup_root) - cgroup_root = cg; + if(!discovered_cgroup_root) + discovered_cgroup_root = cg; else { // append it struct cgroup *e; - for(e = cgroup_root; e->next ;e = e->next) ; - e->next = cg; + for(e = discovered_cgroup_root; e->discovered_next ;e = e->discovered_next) ; + e->discovered_next = cg; } cgroup_root_count++; @@ -1204,24 +1466,23 @@ static inline struct cgroup *cgroup_add(const char *id) { // detect duplicate cgroups if(cg->enabled) { struct cgroup *t; - for (t = cgroup_root; t; t = t->next) { + for (t = discovered_cgroup_root; t; t = t->discovered_next) { if (t != cg && t->enabled && t->hash_chart == cg->hash_chart && !strcmp(t->chart_id, cg->chart_id)) { - if (!strncmp(t->chart_id, "/system.slice/", 14) && !strncmp(cg->chart_id, "/init.scope/system.slice/", 25)) { - error("CGROUP: chart id '%s' already exists with id '%s' and is enabled. Swapping them by enabling cgroup with id '%s' and disabling cgroup with id '%s'.", - cg->chart_id, t->id, cg->id, t->id); - debug(D_CGROUP, "Control group with chart id '%s' already exists with id '%s' and is enabled. Swapping them by enabling cgroup with id '%s' and disabling cgroup with id '%s'.", - cg->chart_id, t->id, cg->id, t->id); - t->enabled = 0; - t->options |= CGROUP_OPTIONS_DISABLED_DUPLICATE; - } - else { - error("CGROUP: chart id '%s' already exists with id '%s' and is enabled and available. Disabling cgroup with id '%s'.", - cg->chart_id, t->id, cg->id); - debug(D_CGROUP, "Control group with chart id '%s' already exists with id '%s' and is enabled and available. Disabling cgroup with id '%s'.", - cg->chart_id, t->id, cg->id); - cg->enabled = 0; - cg->options |= CGROUP_OPTIONS_DISABLED_DUPLICATE; - } + // TODO: use it after refactoring if system.slice might be scanned before init.scope/system.slice + // + // if (!strncmp(t->id, "/system.slice/", 14) && !strncmp(cg->id, "/init.scope/system.slice/", 25)) { + // error("CGROUP: chart id '%s' already exists with id '%s' and is enabled. Swapping them by enabling cgroup with id '%s' and disabling cgroup with id '%s'.", + // cg->chart_id, t->id, cg->id, t->id); + // t->enabled = 0; + // t->options |= CGROUP_OPTIONS_DISABLED_DUPLICATE; + // } + // else {} + // + // https://github.com/netdata/netdata/issues/797#issuecomment-241248884 + error("CGROUP: chart id '%s' already exists with id '%s' and is enabled and available. Disabling cgroup with id '%s'.", + cg->chart_id, t->id, cg->id); + cg->enabled = 0; + cg->options |= CGROUP_OPTIONS_DISABLED_DUPLICATE; break; } @@ -1236,6 +1497,12 @@ static inline struct cgroup *cgroup_add(const char *id) { return cg; } +static inline void free_pressure(struct pressure *res) { + if (res->some.st) rrdset_is_obsolete(res->some.st); + if (res->full.st) rrdset_is_obsolete(res->full.st); + freez(res->filename); +} + static inline void cgroup_free(struct cgroup *cg) { debug(D_CGROUP, "Removing cgroup '%s' with chart id '%s' (was %s and %s)", cg->id, cg->chart_id, (cg->enabled)?"enabled":"disabled", (cg->available)?"available":"not available"); @@ -1284,10 +1551,16 @@ static inline void cgroup_free(struct cgroup *cg) { freez(cg->io_merged.filename); freez(cg->io_queued.filename); + free_pressure(&cg->cpu_pressure); + free_pressure(&cg->io_pressure); + free_pressure(&cg->memory_pressure); + freez(cg->id); freez(cg->chart_id); freez(cg->chart_title); + free_label_list(cg->chart_labels); + freez(cg); cgroup_root_count--; @@ -1300,7 +1573,7 @@ static inline struct cgroup *cgroup_find(const char *id) { uint32_t hash = simple_hash(id); struct cgroup *cg; - for(cg = cgroup_root; cg ; cg = cg->next) { + for(cg = discovered_cgroup_root; cg ; cg = cg->discovered_next) { if(hash == cg->hash && strcmp(id, cg->id) == 0) break; } @@ -1426,103 +1699,16 @@ static inline void mark_all_cgroups_as_not_available() { struct cgroup *cg; // mark all as not available - for(cg = cgroup_root; cg ; cg = cg->next) { + for(cg = discovered_cgroup_root; cg ; cg = cg->discovered_next) { cg->available = 0; } } -static inline void cleanup_all_cgroups() { - struct cgroup *cg = cgroup_root, *last = NULL; - - for(; cg ;) { - if(!cg->available) { - // enable the first duplicate cgroup - { - struct cgroup *t; - for(t = cgroup_root; t ; t = t->next) { - if(t != cg && t->available && !t->enabled && t->options & CGROUP_OPTIONS_DISABLED_DUPLICATE && t->hash_chart == cg->hash_chart && !strcmp(t->chart_id, cg->chart_id)) { - debug(D_CGROUP, "Enabling duplicate of cgroup '%s' with id '%s', because the original with id '%s' stopped.", t->chart_id, t->id, cg->id); - t->enabled = 1; - t->options &= ~CGROUP_OPTIONS_DISABLED_DUPLICATE; - break; - } - } - } - - if(!last) - cgroup_root = cg->next; - else - last->next = cg->next; - - cgroup_free(cg); - - if(!last) - cg = cgroup_root; - else - cg = last->next; - } - else { - last = cg; - cg = cg->next; - } - } -} - -static inline void find_all_cgroups() { - debug(D_CGROUP, "searching for cgroups"); - - mark_all_cgroups_as_not_available(); - if(!cgroup_use_unified_cgroups) { - if(cgroup_enable_cpuacct_stat || cgroup_enable_cpuacct_usage) { - if(find_dir_in_subdirs(cgroup_cpuacct_base, NULL, found_subdir_in_dir) == -1) { - cgroup_enable_cpuacct_stat = - cgroup_enable_cpuacct_usage = CONFIG_BOOLEAN_NO; - error("CGROUP: disabled cpu statistics."); - } - } - - if(cgroup_enable_blkio_io || cgroup_enable_blkio_ops || cgroup_enable_blkio_throttle_io || cgroup_enable_blkio_throttle_ops || cgroup_enable_blkio_merged_ops || cgroup_enable_blkio_queued_ops) { - if(find_dir_in_subdirs(cgroup_blkio_base, NULL, found_subdir_in_dir) == -1) { - cgroup_enable_blkio_io = - cgroup_enable_blkio_ops = - cgroup_enable_blkio_throttle_io = - cgroup_enable_blkio_throttle_ops = - cgroup_enable_blkio_merged_ops = - cgroup_enable_blkio_queued_ops = CONFIG_BOOLEAN_NO; - error("CGROUP: disabled blkio statistics."); - } - } - - if(cgroup_enable_memory || cgroup_enable_detailed_memory || cgroup_enable_swap || cgroup_enable_memory_failcnt) { - if(find_dir_in_subdirs(cgroup_memory_base, NULL, found_subdir_in_dir) == -1) { - cgroup_enable_memory = - cgroup_enable_detailed_memory = - cgroup_enable_swap = - cgroup_enable_memory_failcnt = CONFIG_BOOLEAN_NO; - error("CGROUP: disabled memory statistics."); - } - } - - if(cgroup_search_in_devices) { - if(find_dir_in_subdirs(cgroup_devices_base, NULL, found_subdir_in_dir) == -1) { - cgroup_search_in_devices = 0; - error("CGROUP: disabled devices statistics."); - } - } - } - else { - if (find_dir_in_subdirs(cgroup_unified_base, NULL, found_subdir_in_dir) == -1) { - cgroup_unified_exist = CONFIG_BOOLEAN_NO; - error("CGROUP: disabled unified cgroups statistics."); - } - } - - // remove any non-existing cgroups - cleanup_all_cgroups(); - +static inline void update_filenames() +{ struct cgroup *cg; struct stat buf; - for(cg = cgroup_root; cg ; cg = cg->next) { + for(cg = discovered_cgroup_root; cg ; cg = cg->discovered_next) { // fprintf(stderr, " >>> CGROUP '%s' (%u - %s) with name '%s'\n", cg->id, cg->hash, cg->available?"available":"stopped", cg->name); if(unlikely(cg->pending_renames)) @@ -1748,12 +1934,175 @@ static inline void find_all_cgroups() { else debug(D_CGROUP, "memory.swap file for cgroup '%s': '%s' does not exist.", cg->id, filename); } + + if (unlikely(cgroup_enable_pressure_cpu && !cg->cpu_pressure.filename)) { + snprintfz(filename, FILENAME_MAX, "%s%s/cpu.pressure", cgroup_unified_base, cg->id); + if (likely(stat(filename, &buf) != -1)) { + cg->cpu_pressure.filename = strdupz(filename); + cg->cpu_pressure.some.enabled = cgroup_enable_pressure_cpu; + cg->cpu_pressure.full.enabled = CONFIG_BOOLEAN_NO; + debug(D_CGROUP, "cpu.pressure filename for cgroup '%s': '%s'", cg->id, cg->cpu_pressure.filename); + } else { + debug(D_CGROUP, "cpu.pressure file for cgroup '%s': '%s' does not exist", cg->id, filename); + } + } + + if (unlikely((cgroup_enable_pressure_io_some || cgroup_enable_pressure_io_full) && !cg->io_pressure.filename)) { + snprintfz(filename, FILENAME_MAX, "%s%s/io.pressure", cgroup_unified_base, cg->id); + if (likely(stat(filename, &buf) != -1)) { + cg->io_pressure.filename = strdupz(filename); + cg->io_pressure.some.enabled = cgroup_enable_pressure_io_some; + cg->io_pressure.full.enabled = cgroup_enable_pressure_io_full; + debug(D_CGROUP, "io.pressure filename for cgroup '%s': '%s'", cg->id, cg->io_pressure.filename); + } else { + debug(D_CGROUP, "io.pressure file for cgroup '%s': '%s' does not exist", cg->id, filename); + } + } + + if (unlikely((cgroup_enable_pressure_memory_some || cgroup_enable_pressure_memory_full) && !cg->memory_pressure.filename)) { + snprintfz(filename, FILENAME_MAX, "%s%s/memory.pressure", cgroup_unified_base, cg->id); + if (likely(stat(filename, &buf) != -1)) { + cg->memory_pressure.filename = strdupz(filename); + cg->memory_pressure.some.enabled = cgroup_enable_pressure_memory_some; + cg->memory_pressure.full.enabled = cgroup_enable_pressure_memory_full; + debug(D_CGROUP, "memory.pressure filename for cgroup '%s': '%s'", cg->id, cg->memory_pressure.filename); + } else { + debug(D_CGROUP, "memory.pressure file for cgroup '%s': '%s' does not exist", cg->id, filename); + } + } } } +} + +static inline void cleanup_all_cgroups() { + struct cgroup *cg = discovered_cgroup_root, *last = NULL; + + for(; cg ;) { + if(!cg->available) { + // enable the first duplicate cgroup + { + struct cgroup *t; + for(t = discovered_cgroup_root; t ; t = t->discovered_next) { + if(t != cg && t->available && !t->enabled && t->options & CGROUP_OPTIONS_DISABLED_DUPLICATE && t->hash_chart == cg->hash_chart && !strcmp(t->chart_id, cg->chart_id)) { + debug(D_CGROUP, "Enabling duplicate of cgroup '%s' with id '%s', because the original with id '%s' stopped.", t->chart_id, t->id, cg->id); + t->enabled = 1; + t->options &= ~CGROUP_OPTIONS_DISABLED_DUPLICATE; + break; + } + } + } + + if(!last) + discovered_cgroup_root = cg->discovered_next; + else + last->discovered_next = cg->discovered_next; + + cgroup_free(cg); + + if(!last) + cg = discovered_cgroup_root; + else + cg = last->discovered_next; + } + else { + last = cg; + cg = cg->discovered_next; + } + } +} + +static inline void copy_discovered_cgroups() +{ + debug(D_CGROUP, "copy discovered cgroups to the main group list"); + + struct cgroup *cg; + + for(cg = discovered_cgroup_root; cg ; cg = cg->discovered_next) { + cg->next = cg->discovered_next; + } + + cgroup_root = discovered_cgroup_root; +} + +static inline void find_all_cgroups() { + debug(D_CGROUP, "searching for cgroups"); + + mark_all_cgroups_as_not_available(); + if(!cgroup_use_unified_cgroups) { + if(cgroup_enable_cpuacct_stat || cgroup_enable_cpuacct_usage) { + if(find_dir_in_subdirs(cgroup_cpuacct_base, NULL, found_subdir_in_dir) == -1) { + cgroup_enable_cpuacct_stat = + cgroup_enable_cpuacct_usage = CONFIG_BOOLEAN_NO; + error("CGROUP: disabled cpu statistics."); + } + } + + if(cgroup_enable_blkio_io || cgroup_enable_blkio_ops || cgroup_enable_blkio_throttle_io || cgroup_enable_blkio_throttle_ops || cgroup_enable_blkio_merged_ops || cgroup_enable_blkio_queued_ops) { + if(find_dir_in_subdirs(cgroup_blkio_base, NULL, found_subdir_in_dir) == -1) { + cgroup_enable_blkio_io = + cgroup_enable_blkio_ops = + cgroup_enable_blkio_throttle_io = + cgroup_enable_blkio_throttle_ops = + cgroup_enable_blkio_merged_ops = + cgroup_enable_blkio_queued_ops = CONFIG_BOOLEAN_NO; + error("CGROUP: disabled blkio statistics."); + } + } + + if(cgroup_enable_memory || cgroup_enable_detailed_memory || cgroup_enable_swap || cgroup_enable_memory_failcnt) { + if(find_dir_in_subdirs(cgroup_memory_base, NULL, found_subdir_in_dir) == -1) { + cgroup_enable_memory = + cgroup_enable_detailed_memory = + cgroup_enable_swap = + cgroup_enable_memory_failcnt = CONFIG_BOOLEAN_NO; + error("CGROUP: disabled memory statistics."); + } + } + + if(cgroup_search_in_devices) { + if(find_dir_in_subdirs(cgroup_devices_base, NULL, found_subdir_in_dir) == -1) { + cgroup_search_in_devices = 0; + error("CGROUP: disabled devices statistics."); + } + } + } + else { + if (find_dir_in_subdirs(cgroup_unified_base, NULL, found_subdir_in_dir) == -1) { + cgroup_unified_exist = CONFIG_BOOLEAN_NO; + error("CGROUP: disabled unified cgroups statistics."); + } + } + + update_filenames(); + + uv_mutex_lock(&cgroup_root_mutex); + cleanup_all_cgroups(); + copy_discovered_cgroups(); + uv_mutex_unlock(&cgroup_root_mutex); debug(D_CGROUP, "done searching for cgroups"); } +void cgroup_discovery_worker(void *ptr) +{ + UNUSED(ptr); + + while (!netdata_exit) { + uv_mutex_lock(&discovery_thread.mutex); + while (!discovery_thread.start_discovery) + uv_cond_wait(&discovery_thread.cond_var, &discovery_thread.mutex); + discovery_thread.start_discovery = 0; + uv_mutex_unlock(&discovery_thread.mutex); + + if (unlikely(netdata_exit)) + break; + + find_all_cgroups(); + } + + discovery_thread.exited = 1; +} + // ---------------------------------------------------------------------------- // generate charts @@ -1807,7 +2156,7 @@ void update_systemd_services_charts( if(likely(do_cpu)) { if(unlikely(!st_cpu)) { char title[CHART_TITLE_MAX + 1]; - snprintfz(title, CHART_TITLE_MAX, "Systemd Services CPU utilization (%d%% = %d core%s)", (processors * 100), processors, (processors > 1) ? "s" : ""); + snprintfz(title, CHART_TITLE_MAX, "Systemd Services CPU utilization (100%% = 1 core)"); st_cpu = rrdset_create_localhost( "services" @@ -2335,7 +2684,7 @@ void update_systemd_services_charts( // update the values struct cgroup *cg; for(cg = cgroup_root; cg ; cg = cg->next) { - if(unlikely(!cg->available || !cg->enabled || cg->pending_renames || !(cg->options & CGROUP_OPTIONS_SYSTEM_SLICE_SERVICE))) + if(unlikely(!cg->enabled || cg->pending_renames || !(cg->options & CGROUP_OPTIONS_SYSTEM_SLICE_SERVICE))) continue; if(likely(do_cpu && cg->cpuacct_stat.updated)) { @@ -2726,7 +3075,7 @@ void update_cgroup_charts(int update_every) { struct cgroup *cg; for(cg = cgroup_root; cg ; cg = cg->next) { - if(unlikely(!cg->available || !cg->enabled || cg->pending_renames)) + if(unlikely(!cg->enabled || cg->pending_renames)) continue; if(likely(cgroup_enable_systemd_services && cg->options & CGROUP_OPTIONS_SYSTEM_SLICE_SERVICE)) { @@ -2750,7 +3099,7 @@ void update_cgroup_charts(int update_every) { if(likely(cg->cpuacct_stat.updated && cg->cpuacct_stat.enabled == CONFIG_BOOLEAN_YES)) { if(unlikely(!cg->st_cpu)) { - snprintfz(title, CHART_TITLE_MAX, "CPU Usage (%d%% = %d core%s) for cgroup %s", (processors * 100), processors, (processors > 1) ? "s" : "", cg->chart_title); + snprintfz(title, CHART_TITLE_MAX, "CPU Usage (100%% = 1 core)"); cg->st_cpu = rrdset_create_localhost( cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) @@ -2766,6 +3115,9 @@ void update_cgroup_charts(int update_every) { , update_every , RRDSET_TYPE_STACKED ); + + rrdset_update_labels(cg->st_cpu, cg->chart_labels); + if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) { rrddim_add(cg->st_cpu, "user", NULL, 100, system_hz, RRD_ALGORITHM_INCREMENTAL); rrddim_add(cg->st_cpu, "system", NULL, 100, system_hz, RRD_ALGORITHM_INCREMENTAL); @@ -2820,7 +3172,7 @@ void update_cgroup_charts(int update_every) { rrdsetvar_custom_chart_variable_set(cg->chart_var_cpu_limit, value); if(unlikely(!cg->st_cpu_limit)) { - snprintfz(title, CHART_TITLE_MAX, "CPU Usage within the limits for cgroup %s", cg->chart_title); + snprintfz(title, CHART_TITLE_MAX, "CPU Usage within the limits"); cg->st_cpu_limit = rrdset_create_localhost( cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) @@ -2837,6 +3189,8 @@ void update_cgroup_charts(int update_every) { , RRDSET_TYPE_LINE ); + rrdset_update_labels(cg->st_cpu_limit, cg->chart_labels); + if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) rrddim_add(cg->st_cpu_limit, "used", NULL, 1, system_hz, RRD_ALGORITHM_ABSOLUTE); else @@ -2873,7 +3227,7 @@ void update_cgroup_charts(int update_every) { unsigned int i; if(unlikely(!cg->st_cpu_per_core)) { - snprintfz(title, CHART_TITLE_MAX, "CPU Usage (%d%% = %d core%s) Per Core for cgroup %s", (processors * 100), processors, (processors > 1) ? "s" : "", cg->chart_title); + snprintfz(title, CHART_TITLE_MAX, "CPU Usage (100%% = 1 core) Per Core"); cg->st_cpu_per_core = rrdset_create_localhost( cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) @@ -2890,6 +3244,8 @@ void update_cgroup_charts(int update_every) { , RRDSET_TYPE_STACKED ); + rrdset_update_labels(cg->st_cpu_per_core, cg->chart_labels); + for(i = 0; i < cg->cpuacct_usage.cpus; i++) { snprintfz(id, RRD_ID_LENGTH_MAX, "cpu%u", i); rrddim_add(cg->st_cpu_per_core, id, NULL, 100, 1000000000, RRD_ALGORITHM_INCREMENTAL); @@ -2907,7 +3263,7 @@ void update_cgroup_charts(int update_every) { if(likely(cg->memory.updated_detailed && cg->memory.enabled_detailed == CONFIG_BOOLEAN_YES)) { if(unlikely(!cg->st_mem)) { - snprintfz(title, CHART_TITLE_MAX, "Memory Usage for cgroup %s", cg->chart_title); + snprintfz(title, CHART_TITLE_MAX, "Memory Usage"); cg->st_mem = rrdset_create_localhost( cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) @@ -2923,6 +3279,9 @@ void update_cgroup_charts(int update_every) { , update_every , RRDSET_TYPE_STACKED ); + + rrdset_update_labels(cg->st_mem, cg->chart_labels); + if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) { rrddim_add(cg->st_mem, "cache", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); rrddim_add(cg->st_mem, "rss", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); @@ -2964,7 +3323,7 @@ void update_cgroup_charts(int update_every) { rrdset_done(cg->st_mem); if(unlikely(!cg->st_writeback)) { - snprintfz(title, CHART_TITLE_MAX, "Writeback Memory for cgroup %s", cg->chart_title); + snprintfz(title, CHART_TITLE_MAX, "Writeback Memory"); cg->st_writeback = rrdset_create_localhost( cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) @@ -2981,6 +3340,8 @@ void update_cgroup_charts(int update_every) { , RRDSET_TYPE_AREA ); + rrdset_update_labels(cg->st_writeback, cg->chart_labels); + if(cg->memory.detailed_has_dirty) rrddim_add(cg->st_writeback, "dirty", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); @@ -2997,7 +3358,7 @@ void update_cgroup_charts(int update_every) { if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) { if(unlikely(!cg->st_mem_activity)) { - snprintfz(title, CHART_TITLE_MAX, "Memory Activity for cgroup %s", cg->chart_title); + snprintfz(title, CHART_TITLE_MAX, "Memory Activity"); cg->st_mem_activity = rrdset_create_localhost( cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) @@ -3014,6 +3375,8 @@ void update_cgroup_charts(int update_every) { , RRDSET_TYPE_LINE ); + rrdset_update_labels(cg->st_mem_activity, cg->chart_labels); + rrddim_add(cg->st_mem_activity, "pgpgin", "in", system_page_size, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL); rrddim_add(cg->st_mem_activity, "pgpgout", "out", -system_page_size, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL); } @@ -3026,7 +3389,7 @@ void update_cgroup_charts(int update_every) { } if(unlikely(!cg->st_pgfaults)) { - snprintfz(title, CHART_TITLE_MAX, "Memory Page Faults for cgroup %s", cg->chart_title); + snprintfz(title, CHART_TITLE_MAX, "Memory Page Faults"); cg->st_pgfaults = rrdset_create_localhost( cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) @@ -3043,6 +3406,8 @@ void update_cgroup_charts(int update_every) { , RRDSET_TYPE_LINE ); + rrdset_update_labels(cg->st_pgfaults, cg->chart_labels); + rrddim_add(cg->st_pgfaults, "pgfault", NULL, system_page_size, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL); rrddim_add(cg->st_pgfaults, "pgmajfault", "swap", -system_page_size, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL); } @@ -3056,7 +3421,7 @@ void update_cgroup_charts(int update_every) { if(likely(cg->memory.updated_usage_in_bytes && cg->memory.enabled_usage_in_bytes == CONFIG_BOOLEAN_YES)) { if(unlikely(!cg->st_mem_usage)) { - snprintfz(title, CHART_TITLE_MAX, "Used Memory %sfor cgroup %s", (cgroup_used_memory_without_cache && cg->memory.updated_detailed)?"without Cache ":"", cg->chart_title); + snprintfz(title, CHART_TITLE_MAX, "Used Memory %s", (cgroup_used_memory_without_cache && cg->memory.updated_detailed)?"without Cache ":""); cg->st_mem_usage = rrdset_create_localhost( cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) @@ -3073,6 +3438,8 @@ void update_cgroup_charts(int update_every) { , RRDSET_TYPE_STACKED ); + rrdset_update_labels(cg->st_mem_usage, cg->chart_labels); + rrddim_add(cg->st_mem_usage, "ram", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); rrddim_add(cg->st_mem_usage, "swap", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); } @@ -3117,7 +3484,7 @@ void update_cgroup_charts(int update_every) { memory_limit = cg->memory_limit; if(unlikely(!cg->st_mem_usage_limit)) { - snprintfz(title, CHART_TITLE_MAX, "Used RAM without Cache within the limits for cgroup %s", cg->chart_title); + snprintfz(title, CHART_TITLE_MAX, "Used RAM without Cache within the limits"); cg->st_mem_usage_limit = rrdset_create_localhost( cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) @@ -3134,6 +3501,8 @@ void update_cgroup_charts(int update_every) { , RRDSET_TYPE_STACKED ); + rrdset_update_labels(cg->st_mem_usage_limit, cg->chart_labels); + rrddim_add(cg->st_mem_usage_limit, "available", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); rrddim_add(cg->st_mem_usage_limit, "used", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE); } @@ -3159,7 +3528,7 @@ void update_cgroup_charts(int update_every) { if(likely(cg->memory.updated_failcnt && cg->memory.enabled_failcnt == CONFIG_BOOLEAN_YES)) { if(unlikely(!cg->st_mem_failcnt)) { - snprintfz(title, CHART_TITLE_MAX, "Memory Limit Failures for cgroup %s", cg->chart_title); + snprintfz(title, CHART_TITLE_MAX, "Memory Limit Failures"); cg->st_mem_failcnt = rrdset_create_localhost( cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) @@ -3175,6 +3544,8 @@ void update_cgroup_charts(int update_every) { , update_every , RRDSET_TYPE_LINE ); + + rrdset_update_labels(cg->st_mem_failcnt, cg->chart_labels); rrddim_add(cg->st_mem_failcnt, "failures", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); } @@ -3187,7 +3558,7 @@ void update_cgroup_charts(int update_every) { if(likely(cg->io_service_bytes.updated && cg->io_service_bytes.enabled == CONFIG_BOOLEAN_YES)) { if(unlikely(!cg->st_io)) { - snprintfz(title, CHART_TITLE_MAX, "I/O Bandwidth (all disks) for cgroup %s", cg->chart_title); + snprintfz(title, CHART_TITLE_MAX, "I/O Bandwidth (all disks)"); cg->st_io = rrdset_create_localhost( cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) @@ -3204,6 +3575,8 @@ void update_cgroup_charts(int update_every) { , RRDSET_TYPE_AREA ); + rrdset_update_labels(cg->st_io, cg->chart_labels); + rrddim_add(cg->st_io, "read", NULL, 1, 1024, RRD_ALGORITHM_INCREMENTAL); rrddim_add(cg->st_io, "write", NULL, -1, 1024, RRD_ALGORITHM_INCREMENTAL); } @@ -3217,7 +3590,7 @@ void update_cgroup_charts(int update_every) { if(likely(cg->io_serviced.updated && cg->io_serviced.enabled == CONFIG_BOOLEAN_YES)) { if(unlikely(!cg->st_serviced_ops)) { - snprintfz(title, CHART_TITLE_MAX, "Serviced I/O Operations (all disks) for cgroup %s", cg->chart_title); + snprintfz(title, CHART_TITLE_MAX, "Serviced I/O Operations (all disks)"); cg->st_serviced_ops = rrdset_create_localhost( cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) @@ -3234,6 +3607,8 @@ void update_cgroup_charts(int update_every) { , RRDSET_TYPE_LINE ); + rrdset_update_labels(cg->st_serviced_ops, cg->chart_labels); + rrddim_add(cg->st_serviced_ops, "read", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); rrddim_add(cg->st_serviced_ops, "write", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); } @@ -3247,7 +3622,7 @@ void update_cgroup_charts(int update_every) { if(likely(cg->throttle_io_service_bytes.updated && cg->throttle_io_service_bytes.enabled == CONFIG_BOOLEAN_YES)) { if(unlikely(!cg->st_throttle_io)) { - snprintfz(title, CHART_TITLE_MAX, "Throttle I/O Bandwidth (all disks) for cgroup %s", cg->chart_title); + snprintfz(title, CHART_TITLE_MAX, "Throttle I/O Bandwidth (all disks)"); cg->st_throttle_io = rrdset_create_localhost( cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) @@ -3263,6 +3638,8 @@ void update_cgroup_charts(int update_every) { , update_every , RRDSET_TYPE_AREA ); + + rrdset_update_labels(cg->st_throttle_io, cg->chart_labels); rrddim_add(cg->st_throttle_io, "read", NULL, 1, 1024, RRD_ALGORITHM_INCREMENTAL); rrddim_add(cg->st_throttle_io, "write", NULL, -1, 1024, RRD_ALGORITHM_INCREMENTAL); @@ -3277,7 +3654,7 @@ void update_cgroup_charts(int update_every) { if(likely(cg->throttle_io_serviced.updated && cg->throttle_io_serviced.enabled == CONFIG_BOOLEAN_YES)) { if(unlikely(!cg->st_throttle_serviced_ops)) { - snprintfz(title, CHART_TITLE_MAX, "Throttle Serviced I/O Operations (all disks) for cgroup %s", cg->chart_title); + snprintfz(title, CHART_TITLE_MAX, "Throttle Serviced I/O Operations (all disks)"); cg->st_throttle_serviced_ops = rrdset_create_localhost( cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) @@ -3293,6 +3670,8 @@ void update_cgroup_charts(int update_every) { , update_every , RRDSET_TYPE_LINE ); + + rrdset_update_labels(cg->st_throttle_serviced_ops, cg->chart_labels); rrddim_add(cg->st_throttle_serviced_ops, "read", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); rrddim_add(cg->st_throttle_serviced_ops, "write", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); @@ -3307,7 +3686,7 @@ void update_cgroup_charts(int update_every) { if(likely(cg->io_queued.updated && cg->io_queued.enabled == CONFIG_BOOLEAN_YES)) { if(unlikely(!cg->st_queued_ops)) { - snprintfz(title, CHART_TITLE_MAX, "Queued I/O Operations (all disks) for cgroup %s", cg->chart_title); + snprintfz(title, CHART_TITLE_MAX, "Queued I/O Operations (all disks)"); cg->st_queued_ops = rrdset_create_localhost( cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) @@ -3323,6 +3702,8 @@ void update_cgroup_charts(int update_every) { , update_every , RRDSET_TYPE_LINE ); + + rrdset_update_labels(cg->st_queued_ops, cg->chart_labels); rrddim_add(cg->st_queued_ops, "read", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); rrddim_add(cg->st_queued_ops, "write", NULL, -1, 1, RRD_ALGORITHM_ABSOLUTE); @@ -3337,7 +3718,7 @@ void update_cgroup_charts(int update_every) { if(likely(cg->io_merged.updated && cg->io_merged.enabled == CONFIG_BOOLEAN_YES)) { if(unlikely(!cg->st_merged_ops)) { - snprintfz(title, CHART_TITLE_MAX, "Merged I/O Operations (all disks) for cgroup %s", cg->chart_title); + snprintfz(title, CHART_TITLE_MAX, "Merged I/O Operations (all disks)"); cg->st_merged_ops = rrdset_create_localhost( cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) @@ -3353,6 +3734,8 @@ void update_cgroup_charts(int update_every) { , update_every , RRDSET_TYPE_LINE ); + + rrdset_update_labels(cg->st_merged_ops, cg->chart_labels); rrddim_add(cg->st_merged_ops, "read", NULL, 1, 1024, RRD_ALGORITHM_INCREMENTAL); rrddim_add(cg->st_merged_ops, "write", NULL, -1, 1024, RRD_ALGORITHM_INCREMENTAL); @@ -3364,6 +3747,171 @@ void update_cgroup_charts(int update_every) { rrddim_set(cg->st_merged_ops, "write", cg->io_merged.Write); rrdset_done(cg->st_merged_ops); } + + if (cg->options & CGROUP_OPTIONS_IS_UNIFIED) { + struct pressure *res = &cg->cpu_pressure; + if (likely(res->updated && res->some.enabled)) { + if (unlikely(!res->some.st)) { + RRDSET *chart; + snprintfz(title, CHART_TITLE_MAX, "CPU pressure"); + + chart = res->some.st = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "cpu_pressure" + , NULL + , "cpu" + , "cgroup.cpu_pressure" + , title + , "percentage" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 2200 + , update_every + , RRDSET_TYPE_LINE + ); + + rrdset_update_labels(chart = res->some.st, cg->chart_labels); + + res->some.rd10 = rrddim_add(chart, "some 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + res->some.rd60 = rrddim_add(chart, "some 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + res->some.rd300 = rrddim_add(chart, "some 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + } else { + rrdset_next(res->some.st); + } + + update_pressure_chart(&res->some); + } + + res = &cg->memory_pressure; + if (likely(res->updated && res->some.enabled)) { + if (unlikely(!res->some.st)) { + RRDSET *chart; + snprintfz(title, CHART_TITLE_MAX, "Memory pressure"); + + chart = res->some.st = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "mem_pressure" + , NULL + , "mem" + , "cgroup.memory_pressure" + , title + , "percentage" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 2300 + , update_every + , RRDSET_TYPE_LINE + ); + + rrdset_update_labels(chart = res->some.st, cg->chart_labels); + + res->some.rd10 = rrddim_add(chart, "some 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + res->some.rd60 = rrddim_add(chart, "some 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + res->some.rd300 = rrddim_add(chart, "some 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + } else { + rrdset_next(res->some.st); + } + + update_pressure_chart(&res->some); + } + + if (likely(res->updated && res->full.enabled)) { + if (unlikely(!res->full.st)) { + RRDSET *chart; + snprintfz(title, CHART_TITLE_MAX, "Memory full pressure"); + + chart = res->full.st = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "mem_full_pressure" + , NULL + , "mem" + , "cgroup.memory_full_pressure" + , title + , "percentage" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 2350 + , update_every + , RRDSET_TYPE_LINE + ); + + rrdset_update_labels(chart = res->full.st, cg->chart_labels); + + res->full.rd10 = rrddim_add(chart, "full 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + res->full.rd60 = rrddim_add(chart, "full 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + res->full.rd300 = rrddim_add(chart, "full 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + } else { + rrdset_next(res->full.st); + } + + update_pressure_chart(&res->full); + } + + res = &cg->io_pressure; + if (likely(res->updated && res->some.enabled)) { + if (unlikely(!res->some.st)) { + RRDSET *chart; + snprintfz(title, CHART_TITLE_MAX, "I/O pressure"); + + chart = res->some.st = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "io_pressure" + , NULL + , "disk" + , "cgroup.io_pressure" + , title + , "percentage" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 2400 + , update_every + , RRDSET_TYPE_LINE + ); + + rrdset_update_labels(chart = res->some.st, cg->chart_labels); + + res->some.rd10 = rrddim_add(chart, "some 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + res->some.rd60 = rrddim_add(chart, "some 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + res->some.rd300 = rrddim_add(chart, "some 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + } else { + rrdset_next(res->some.st); + } + + update_pressure_chart(&res->some); + } + + if (likely(res->updated && res->full.enabled)) { + if (unlikely(!res->full.st)) { + RRDSET *chart; + snprintfz(title, CHART_TITLE_MAX, "I/O full pressure"); + + chart = res->full.st = rrdset_create_localhost( + cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX) + , "io_full_pressure" + , NULL + , "disk" + , "cgroup.io_full_pressure" + , title + , "percentage" + , PLUGIN_CGROUPS_NAME + , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME + , cgroup_containers_chart_priority + 2450 + , update_every + , RRDSET_TYPE_LINE + ); + + rrdset_update_labels(chart = res->full.st, cg->chart_labels); + + res->full.rd10 = rrddim_add(chart, "full 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + res->full.rd60 = rrddim_add(chart, "full 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + res->full.rd300 = rrddim_add(chart, "full 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + } else { + rrdset_next(res->full.st); + } + + update_pressure_chart(&res->full); + } + } } if(likely(cgroup_enable_systemd_services)) @@ -3385,6 +3933,21 @@ static void cgroup_main_cleanup(void *ptr) { info("cleaning up..."); + usec_t max = 2 * USEC_PER_SEC, step = 50000; + + if (!discovery_thread.exited) { + info("stopping discovery thread worker"); + uv_mutex_unlock(&discovery_thread.mutex); + discovery_thread.start_discovery = 1; + uv_cond_signal(&discovery_thread.cond_var); + } + + while (!discovery_thread.exited && max > 0) { + max -= step; + info("waiting for discovery thread to finish..."); + sleep_usec(step); + } + static_thread->enabled = NETDATA_MAIN_THREAD_EXITED; } @@ -3400,6 +3963,31 @@ void *cgroups_main(void *ptr) { RRDSET *stcpu_thread = NULL; + if (uv_mutex_init(&cgroup_root_mutex)) { + error("CGROUP: cannot initialize mutex for the main cgroup list"); + goto exit; + } + + // dispatch a discovery worker thread + discovery_thread.start_discovery = 0; + discovery_thread.exited = 0; + + if (uv_mutex_init(&discovery_thread.mutex)) { + error("CGROUP: cannot initialize mutex for discovery thread"); + goto exit; + } + if (uv_cond_init(&discovery_thread.cond_var)) { + error("CGROUP: cannot initialize conditional variable for discovery thread"); + goto exit; + } + + int error = uv_thread_create(&discovery_thread.thread, cgroup_discovery_worker, NULL); + if (error) { + error("CGROUP: cannot create tread worker. uv_thread_create(): %s", uv_strerror(error)); + goto exit; + } + uv_thread_set_name_np(discovery_thread.thread, "PLUGIN[cgroups]"); + heartbeat_t hb; heartbeat_init(&hb); usec_t step = cgroup_update_every * USEC_PER_SEC; @@ -3409,19 +3997,18 @@ void *cgroups_main(void *ptr) { usec_t hb_dt = heartbeat_next(&hb, step); if(unlikely(netdata_exit)) break; - // BEGIN -- the job to be done - find_dt += hb_dt; if(unlikely(find_dt >= find_every || cgroups_check)) { - find_all_cgroups(); + uv_cond_signal(&discovery_thread.cond_var); + discovery_thread.start_discovery = 1; find_dt = 0; cgroups_check = 0; } + uv_mutex_lock(&cgroup_root_mutex); read_all_cgroups(cgroup_root); update_cgroup_charts(cgroup_update_every); - - // END -- the job is done + uv_mutex_unlock(&cgroup_root_mutex); // -------------------------------------------------------------------- @@ -3457,6 +4044,7 @@ void *cgroups_main(void *ptr) { } } +exit: netdata_thread_cleanup_pop(1); return NULL; } |