diff options
Diffstat (limited to 'src/sys_fs_cgroup.c')
-rw-r--r-- | src/sys_fs_cgroup.c | 129 |
1 files changed, 119 insertions, 10 deletions
diff --git a/src/sys_fs_cgroup.c b/src/sys_fs_cgroup.c index 892f737c1..298f38a38 100644 --- a/src/sys_fs_cgroup.c +++ b/src/sys_fs_cgroup.c @@ -138,6 +138,18 @@ struct memory { unsigned long long total_active_file; unsigned long long total_unevictable; */ + + int usage_in_bytes_updated; + char *filename_usage_in_bytes; + unsigned long long usage_in_bytes; + + int msw_usage_in_bytes_updated; + char *filename_msw_usage_in_bytes; + unsigned long long msw_usage_in_bytes; + + int failcnt_updated; + char *filename_failcnt; + unsigned long long failcnt; }; // https://www.kernel.org/doc/Documentation/cgroup-v1/cpuacct.txt @@ -160,9 +172,13 @@ struct cpuacct_usage { unsigned long long *cpu_percpu; }; +#define CGROUP_OPTIONS_DISABLED_DUPLICATE 0x00000001 + struct cgroup { - int available; // found in the filesystem - int enabled; // enabled in the config + uint32_t options; + + char available; // found in the filesystem + char enabled; // enabled in the config char *id; uint32_t hash; @@ -553,6 +569,24 @@ void cgroup_read_memory(struct memory *mem) { mem->updated = 1; } + + mem->usage_in_bytes_updated = 0; + if(mem->filename_usage_in_bytes) { + if(likely(!read_single_number_file(mem->filename_usage_in_bytes, &mem->usage_in_bytes))) + mem->usage_in_bytes_updated = 1; + } + + mem->msw_usage_in_bytes_updated = 0; + if(mem->filename_msw_usage_in_bytes) { + if(likely(!read_single_number_file(mem->filename_msw_usage_in_bytes, &mem->msw_usage_in_bytes))) + mem->msw_usage_in_bytes_updated = 1; + } + + mem->failcnt_updated = 0; + if(mem->filename_failcnt) { + if(likely(!read_single_number_file(mem->filename_failcnt, &mem->failcnt))) + mem->failcnt_updated = 1; + } } void cgroup_read(struct cgroup *cg) { @@ -714,11 +748,18 @@ struct cgroup *cgroup_add(const char *id) { if (!strncmp(t->chart_id, "/system.slice/", 14) && !strncmp(cg->chart_id, "/init.scope/system.slice/", 25)) { error("Control group with chart id '%s' already exists with id '%s' and is enabled. Swapping them by enabling cgroup with id '%s' and disabling cgroup with id '%s'.", cg->chart_id, t->id, cg->id, t->id); + debug(D_CGROUP, "Control group with chart id '%s' already exists with id '%s' and is enabled. Swapping them by enabling cgroup with id '%s' and disabling cgroup with id '%s'.", + cg->chart_id, t->id, cg->id, t->id); t->enabled = 0; - } else { - error("Control group with chart id '%s' already exists with id '%s' and is enabled. Disabling cgroup with id '%s'.", + t->options |= CGROUP_OPTIONS_DISABLED_DUPLICATE; + } + else { + error("Control group with chart id '%s' already exists with id '%s' and is enabled and available. Disabling cgroup with id '%s'.", + cg->chart_id, t->id, cg->id); + debug(D_CGROUP, "Control group with chart id '%s' already exists with id '%s' and is enabled and available. Disabling cgroup with id '%s'.", cg->chart_id, t->id, cg->id); cg->enabled = 0; + cg->options |= CGROUP_OPTIONS_DISABLED_DUPLICATE; } break; @@ -865,8 +906,9 @@ void mark_all_cgroups_as_not_available() { struct cgroup *cg; // mark all as not available - for(cg = cgroup_root; cg ; cg = cg->next) + for(cg = cgroup_root; cg ; cg = cg->next) { cg->available = 0; + } } void cleanup_all_cgroups() { @@ -874,6 +916,18 @@ void cleanup_all_cgroups() { for(; cg ;) { if(!cg->available) { + // enable the first duplicate cgroup + { + struct cgroup *t; + for(t = cgroup_root; t ; t = t->next) { + if(t != cg && t->available && !t->enabled && t->options & CGROUP_OPTIONS_DISABLED_DUPLICATE && t->hash_chart == cg->hash_chart && !strcmp(t->chart_id, cg->chart_id)) { + debug(D_CGROUP, "Enabling duplicate of cgroup '%s' with id '%s', because the original with id '%s' stopped.", t->chart_id, t->id, cg->id); + t->enabled = 1; + t->options &= ~CGROUP_OPTIONS_DISABLED_DUPLICATE; + break; + } + } + } if(!last) cgroup_root = cg->next; @@ -966,6 +1020,27 @@ void find_all_cgroups() { debug(D_CGROUP, "memory.stat filename for cgroup '%s': '%s'", cg->id, cg->memory.filename); } else debug(D_CGROUP, "memory.stat file for cgroup '%s': '%s' does not exist.", cg->id, filename); + + snprintfz(filename, FILENAME_MAX, "%s%s/memory.usage_in_bytes", cgroup_memory_base, cg->id); + if(stat(filename, &buf) != -1) { + cg->memory.filename_usage_in_bytes = strdupz(filename); + debug(D_CGROUP, "memory.usage_in_bytes filename for cgroup '%s': '%s'", cg->id, cg->memory.filename_usage_in_bytes); + } + else debug(D_CGROUP, "memory.usage_in_bytes file for cgroup '%s': '%s' does not exist.", cg->id, filename); + + snprintfz(filename, FILENAME_MAX, "%s%s/memory.msw_usage_in_bytes", cgroup_memory_base, cg->id); + if(stat(filename, &buf) != -1) { + cg->memory.filename_msw_usage_in_bytes = strdupz(filename); + debug(D_CGROUP, "memory.msw_usage_in_bytes filename for cgroup '%s': '%s'", cg->id, cg->memory.filename_msw_usage_in_bytes); + } + else debug(D_CGROUP, "memory.msw_usage_in_bytes file for cgroup '%s': '%s' does not exist.", cg->id, filename); + + snprintfz(filename, FILENAME_MAX, "%s%s/memory.failcnt", cgroup_memory_base, cg->id); + if(stat(filename, &buf) != -1) { + cg->memory.filename_failcnt = strdupz(filename); + debug(D_CGROUP, "memory.failcnt filename for cgroup '%s': '%s'", cg->id, cg->memory.filename_failcnt); + } + else debug(D_CGROUP, "memory.failcnt file for cgroup '%s': '%s' does not exist.", cg->id, filename); } if(cgroup_enable_blkio) { if(!cg->io_service_bytes.filename) { @@ -1053,7 +1128,7 @@ void update_cgroup_charts(int update_every) { if(cg->cpuacct_stat.updated) { st = rrdset_find_bytype(type, "cpu"); if(!st) { - snprintfz(title, CHART_TITLE_MAX, "CPU Usage for cgroup %s", cg->chart_title); + snprintfz(title, CHART_TITLE_MAX, "CPU Usage (%d%% = %d core%s) for cgroup %s", (processors * 100), processors, (processors>1)?"s":"", cg->chart_title); st = rrdset_create(type, "cpu", NULL, "cpu", "cgroup.cpu", title, "%", 40000, update_every, RRDSET_TYPE_STACKED); rrddim_add(st, "user", NULL, 100, hz, RRDDIM_INCREMENTAL); @@ -1072,7 +1147,7 @@ void update_cgroup_charts(int update_every) { st = rrdset_find_bytype(type, "cpu_per_core"); if(!st) { - snprintfz(title, CHART_TITLE_MAX, "CPU Usage Per Core for cgroup %s", cg->chart_title); + snprintfz(title, CHART_TITLE_MAX, "CPU Usage (%d%% = %d core%s) Per Core for cgroup %s", (processors * 100), processors, (processors>1)?"s":"", cg->chart_title); st = rrdset_create(type, "cpu_per_core", NULL, "cpu", "cgroup.cpu_per_core", title, "%", 40100, update_every, RRDSET_TYPE_STACKED); for(i = 0; i < cg->cpuacct_usage.cpus ;i++) { @@ -1094,7 +1169,7 @@ void update_cgroup_charts(int update_every) { st = rrdset_find_bytype(type, "mem"); if(!st) { snprintfz(title, CHART_TITLE_MAX, "Memory Usage for cgroup %s", cg->chart_title); - st = rrdset_create(type, "mem", NULL, "mem", "cgroup.mem", title, "MB", 40200, update_every, + st = rrdset_create(type, "mem", NULL, "mem", "cgroup.mem", title, "MB", 40210, update_every, RRDSET_TYPE_STACKED); rrddim_add(st, "cache", NULL, 1, 1024 * 1024, RRDDIM_ABSOLUTE); @@ -1167,6 +1242,38 @@ void update_cgroup_charts(int update_every) { } } + if(cg->memory.usage_in_bytes_updated) { + st = rrdset_find_bytype(type, "mem_usage"); + if(!st) { + snprintfz(title, CHART_TITLE_MAX, "Total Memory for cgroup %s", cg->chart_title); + st = rrdset_create(type, "mem_usage", NULL, "mem", "cgroup.mem_usage", title, "MB", 40200, + update_every, RRDSET_TYPE_STACKED); + + rrddim_add(st, "ram", NULL, 1, 1024 * 1024, RRDDIM_ABSOLUTE); + rrddim_add(st, "swap", NULL, 1, 1024 * 1024, RRDDIM_ABSOLUTE); + } + else rrdset_next(st); + + rrddim_set(st, "ram", cg->memory.usage_in_bytes); + rrddim_set(st, "swap", (cg->memory.msw_usage_in_bytes > cg->memory.usage_in_bytes)?cg->memory.msw_usage_in_bytes - cg->memory.usage_in_bytes:0); + rrdset_done(st); + } + + if(cg->memory.failcnt_updated && cg->memory.failcnt > 0) { + st = rrdset_find_bytype(type, "mem_failcnt"); + if(!st) { + snprintfz(title, CHART_TITLE_MAX, "Memory Limit Failures for cgroup %s", cg->chart_title); + st = rrdset_create(type, "mem_failcnt", NULL, "mem", "cgroup.mem_failcnt", title, "MB", 40250, + update_every, RRDSET_TYPE_LINE); + + rrddim_add(st, "failures", NULL, 1, 1, RRDDIM_INCREMENTAL); + } + else rrdset_next(st); + + rrddim_set(st, "failures", cg->memory.failcnt); + rrdset_done(st); + } + if(cg->io_service_bytes.updated && cg->io_service_bytes.Read + cg->io_service_bytes.Write > 0) { st = rrdset_find_bytype(type, "io"); if(!st) { @@ -1302,7 +1409,7 @@ int do_sys_fs_cgroup(int update_every, unsigned long long dt) { void *cgroups_main(void *ptr) { - if(ptr) { ; } + (void)ptr; info("CGROUP Plugin thread created with task id %d", gettid()); @@ -1327,7 +1434,7 @@ void *cgroups_main(void *ptr) RRDSET *stcpu_thread = NULL; - for(;1;) { + for(;;) { if(unlikely(netdata_exit)) break; // delay until it is our time to run @@ -1372,6 +1479,8 @@ void *cgroups_main(void *ptr) } } + info("CGROUP thread exiting"); + pthread_exit(NULL); return NULL; } |