summaryrefslogtreecommitdiffstats
path: root/collectors/cgroups.plugin/sys_fs_cgroup.c
diff options
context:
space:
mode:
Diffstat (limited to 'collectors/cgroups.plugin/sys_fs_cgroup.c')
-rw-r--r--collectors/cgroups.plugin/sys_fs_cgroup.c310
1 files changed, 299 insertions, 11 deletions
diff --git a/collectors/cgroups.plugin/sys_fs_cgroup.c b/collectors/cgroups.plugin/sys_fs_cgroup.c
index f8e5167ff..41a5af31e 100644
--- a/collectors/cgroups.plugin/sys_fs_cgroup.c
+++ b/collectors/cgroups.plugin/sys_fs_cgroup.c
@@ -39,6 +39,7 @@ static int cgroup_recheck_zero_mem_failcnt_every_iterations = 10;
static int cgroup_recheck_zero_mem_detailed_every_iterations = 10;
static char *cgroup_cpuacct_base = NULL;
+static char *cgroup_cpuset_base = NULL;
static char *cgroup_blkio_base = NULL;
static char *cgroup_memory_base = NULL;
static char *cgroup_devices_base = NULL;
@@ -114,6 +115,16 @@ void read_cgroup_plugin_configuration() {
snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, s);
cgroup_cpuacct_base = config_get("plugin:cgroups", "path to /sys/fs/cgroup/cpuacct", filename);
+ mi = mountinfo_find_by_filesystem_super_option(root, "cgroup", "cpuset");
+ if(!mi) mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "cpuset");
+ if(!mi) {
+ error("CGROUP: cannot find cpuset mountinfo. Assuming default: /sys/fs/cgroup/cpuset");
+ s = "/sys/fs/cgroup/cpuset";
+ }
+ else s = mi->mount_point;
+ snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, s);
+ cgroup_cpuset_base = config_get("plugin:cgroups", "path to /sys/fs/cgroup/cpuset", filename);
+
mi = mountinfo_find_by_filesystem_super_option(root, "cgroup", "blkio");
if(!mi) mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "blkio");
if(!mi) {
@@ -391,12 +402,14 @@ struct cgroup {
// per cgroup charts
RRDSET *st_cpu;
+ RRDSET *st_cpu_limit;
RRDSET *st_cpu_per_core;
RRDSET *st_mem;
RRDSET *st_writeback;
RRDSET *st_mem_activity;
RRDSET *st_pgfaults;
RRDSET *st_mem_usage;
+ RRDSET *st_mem_usage_limit;
RRDSET *st_mem_failcnt;
RRDSET *st_io;
RRDSET *st_serviced_ops;
@@ -405,6 +418,27 @@ struct cgroup {
RRDSET *st_queued_ops;
RRDSET *st_merged_ops;
+ // per cgroup chart variables
+ char *filename_cpuset_cpus;
+ unsigned long long cpuset_cpus;
+
+ char *filename_cpu_cfs_period;
+ unsigned long long cpu_cfs_period;
+
+ char *filename_cpu_cfs_quota;
+ unsigned long long cpu_cfs_quota;
+
+ RRDSETVAR *chart_var_cpu_limit;
+ calculated_number prev_cpu_usage;
+
+ char *filename_memory_limit;
+ unsigned long long memory_limit;
+ RRDSETVAR *chart_var_memory_limit;
+
+ char *filename_memoryswap_limit;
+ unsigned long long memoryswap_limit;
+ RRDSETVAR *chart_var_memoryswap_limit;
+
// services
RRDDIM *rd_cpu;
RRDDIM *rd_mem_usage;
@@ -867,14 +901,15 @@ static inline void cgroup_get_chart_name(struct cgroup *cg) {
if(s && *s && *s != '\n') {
debug(D_CGROUP, "cgroup '%s' should be renamed to '%s'", cg->id, s);
- trim(s);
+ s = trim(s);
+ if (s) {
+ freez(cg->chart_title);
+ cg->chart_title = cgroup_title_strdupz(s);
- freez(cg->chart_title);
- cg->chart_title = cgroup_title_strdupz(s);
-
- freez(cg->chart_id);
- cg->chart_id = cgroup_chart_id_strdupz(s);
- cg->hash_chart = simple_hash(cg->chart_id);
+ freez(cg->chart_id);
+ cg->chart_id = cgroup_chart_id_strdupz(s);
+ cg->hash_chart = simple_hash(cg->chart_id);
+ }
}
}
else
@@ -1008,12 +1043,14 @@ static inline void cgroup_free(struct cgroup *cg) {
debug(D_CGROUP, "Removing cgroup '%s' with chart id '%s' (was %s and %s)", cg->id, cg->chart_id, (cg->enabled)?"enabled":"disabled", (cg->available)?"available":"not available");
if(cg->st_cpu) rrdset_is_obsolete(cg->st_cpu);
+ if(cg->st_cpu_limit) rrdset_is_obsolete(cg->st_cpu_limit);
if(cg->st_cpu_per_core) rrdset_is_obsolete(cg->st_cpu_per_core);
if(cg->st_mem) rrdset_is_obsolete(cg->st_mem);
if(cg->st_writeback) rrdset_is_obsolete(cg->st_writeback);
if(cg->st_mem_activity) rrdset_is_obsolete(cg->st_mem_activity);
if(cg->st_pgfaults) rrdset_is_obsolete(cg->st_pgfaults);
if(cg->st_mem_usage) rrdset_is_obsolete(cg->st_mem_usage);
+ if (cg->st_mem_usage_limit) rrdset_is_obsolete(cg->st_mem_usage_limit);
if(cg->st_mem_failcnt) rrdset_is_obsolete(cg->st_mem_failcnt);
if(cg->st_io) rrdset_is_obsolete(cg->st_io);
if(cg->st_serviced_ops) rrdset_is_obsolete(cg->st_serviced_ops);
@@ -1022,6 +1059,12 @@ static inline void cgroup_free(struct cgroup *cg) {
if(cg->st_queued_ops) rrdset_is_obsolete(cg->st_queued_ops);
if(cg->st_merged_ops) rrdset_is_obsolete(cg->st_merged_ops);
+ freez(cg->filename_cpuset_cpus);
+ freez(cg->filename_cpu_cfs_period);
+ freez(cg->filename_cpu_cfs_quota);
+ freez(cg->filename_memory_limit);
+ freez(cg->filename_memoryswap_limit);
+
free_cgroup_network_interfaces(cg);
freez(cg->cpuacct_usage.cpu_percpu);
@@ -1272,6 +1315,12 @@ static inline void find_all_cgroups() {
if(likely(stat(filename, &buf) != -1)) {
cg->cpuacct_stat.filename = strdupz(filename);
cg->cpuacct_stat.enabled = cgroup_enable_cpuacct_stat;
+ snprintfz(filename, FILENAME_MAX, "%s%s/cpuset.cpus", cgroup_cpuset_base, cg->id);
+ cg->filename_cpuset_cpus = strdupz(filename);
+ snprintfz(filename, FILENAME_MAX, "%s%s/cpu.cfs_period_us", cgroup_cpuacct_base, cg->id);
+ cg->filename_cpu_cfs_period = strdupz(filename);
+ snprintfz(filename, FILENAME_MAX, "%s%s/cpu.cfs_quota_us", cgroup_cpuacct_base, cg->id);
+ cg->filename_cpu_cfs_quota = strdupz(filename);
debug(D_CGROUP, "cpuacct.stat filename for cgroup '%s': '%s'", cg->id, cg->cpuacct_stat.filename);
}
else
@@ -1306,16 +1355,20 @@ static inline void find_all_cgroups() {
cg->memory.filename_usage_in_bytes = strdupz(filename);
cg->memory.enabled_usage_in_bytes = cgroup_enable_memory;
debug(D_CGROUP, "memory.usage_in_bytes filename for cgroup '%s': '%s'", cg->id, cg->memory.filename_usage_in_bytes);
+ snprintfz(filename, FILENAME_MAX, "%s%s/memory.limit_in_bytes", cgroup_memory_base, cg->id);
+ cg->filename_memory_limit = strdupz(filename);
}
else
debug(D_CGROUP, "memory.usage_in_bytes file for cgroup '%s': '%s' does not exist.", cg->id, filename);
}
if(unlikely(cgroup_enable_swap && !cg->memory.filename_msw_usage_in_bytes)) {
- snprintfz(filename, FILENAME_MAX, "%s%s/memory.msw_usage_in_bytes", cgroup_memory_base, cg->id);
+ snprintfz(filename, FILENAME_MAX, "%s%s/memory.memsw.usage_in_bytes", cgroup_memory_base, cg->id);
if(likely(stat(filename, &buf) != -1)) {
cg->memory.filename_msw_usage_in_bytes = strdupz(filename);
cg->memory.enabled_msw_usage_in_bytes = cgroup_enable_swap;
+ snprintfz(filename, FILENAME_MAX, "%s%s/memory.memsw.limit_in_bytes", cgroup_memory_base, cg->id);
+ cg->filename_memoryswap_limit = strdupz(filename);
debug(D_CGROUP, "memory.msw_usage_in_bytes filename for cgroup '%s': '%s'", cg->id, cg->memory.filename_msw_usage_in_bytes);
}
else
@@ -1465,7 +1518,7 @@ void update_systemd_services_charts(
, "cpu"
, "services.cpu"
, title
- , "%"
+ , "percentage"
, PLUGIN_CGROUPS_NAME
, PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME
, NETDATA_CHART_PRIO_CGROUPS_SYSTEMD
@@ -2197,6 +2250,96 @@ static inline char *cgroup_chart_type(char *buffer, const char *id, size_t len)
return buffer;
}
+static inline unsigned long long cpuset_str2ull(char **s) {
+ unsigned long long n = 0;
+ char c;
+ for(c = **s; c >= '0' && c <= '9' ; c = *(++*s)) {
+ n *= 10;
+ n += c - '0';
+ }
+ return n;
+}
+
+static inline void update_cpu_limits(char **filename, unsigned long long *value, struct cgroup *cg) {
+ if(*filename) {
+ int ret = -1;
+
+ if(value == &cg->cpuset_cpus) {
+ static char *buf = NULL;
+ static size_t buf_size = 0;
+
+ if(!buf) {
+ buf_size = 100U + 6 * get_system_cpus(); // taken from kernel/cgroup/cpuset.c
+ buf = mallocz(buf_size + 1);
+ }
+
+ ret = read_file(*filename, buf, buf_size);
+
+ if(!ret) {
+ char *s = buf;
+ unsigned long long ncpus = 0;
+
+ // parse the cpuset string and calculate the number of cpus the cgroup is allowed to use
+ while(*s) {
+ unsigned long long n = cpuset_str2ull(&s);
+ if(*s == ',') {
+ s++;
+ ncpus++;
+ continue;
+ }
+ if(*s == '-') {
+ s++;
+ unsigned long long m = cpuset_str2ull(&s);
+ ncpus += m - n + 1; // calculate the number of cpus in the region
+ }
+ s++;
+ }
+
+ if(likely(ncpus)) *value = ncpus;
+ }
+ }
+ else if(value == &cg->cpu_cfs_period) {
+ ret = read_single_number_file(*filename, value);
+ }
+ else if(value == &cg->cpu_cfs_quota) {
+ ret = read_single_number_file(*filename, value);
+ }
+ else ret = -1;
+
+ if(ret) {
+ error("Cannot refresh cgroup %s cpu limit by reading '%s'. Will not update its limit anymore.", cg->id, *filename);
+ freez(*filename);
+ *filename = NULL;
+ }
+ }
+}
+
+static inline int update_memory_limits(char **filename, RRDSETVAR **chart_var, unsigned long long *value, const char *chart_var_name, struct cgroup *cg) {
+ if(*filename) {
+ if(unlikely(!*chart_var)) {
+ *chart_var = rrdsetvar_custom_chart_variable_create(cg->st_mem_usage, chart_var_name);
+ if(!*chart_var) {
+ error("Cannot create cgroup %s chart variable '%s'. Will not update its limit anymore.", cg->id, chart_var_name);
+ freez(*filename);
+ *filename = NULL;
+ }
+ }
+
+ if(*filename && *chart_var) {
+ if(read_single_number_file(*filename, value)) {
+ error("Cannot refresh cgroup %s memory limit by reading '%s'. Will not update its limit anymore.", cg->id, *filename);
+ freez(*filename);
+ *filename = NULL;
+ }
+ else {
+ rrdsetvar_custom_chart_variable_set(*chart_var, (calculated_number)(*value / (1024 * 1024)));
+ return 1;
+ }
+ }
+ }
+ return 0;
+}
+
void update_cgroup_charts(int update_every) {
debug(D_CGROUP, "updating cgroups charts");
@@ -2250,7 +2393,7 @@ void update_cgroup_charts(int update_every) {
, "cpu"
, "cgroup.cpu"
, title
- , "%"
+ , "percentage"
, PLUGIN_CGROUPS_NAME
, PLUGIN_CGROUPS_MODULE_CGROUPS_NAME
, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS
@@ -2267,6 +2410,82 @@ void update_cgroup_charts(int update_every) {
rrddim_set(cg->st_cpu, "user", cg->cpuacct_stat.user);
rrddim_set(cg->st_cpu, "system", cg->cpuacct_stat.system);
rrdset_done(cg->st_cpu);
+
+ if(likely(cg->filename_cpuset_cpus || cg->filename_cpu_cfs_period || cg->filename_cpu_cfs_quota)) {
+ update_cpu_limits(&cg->filename_cpuset_cpus, &cg->cpuset_cpus, cg);
+ update_cpu_limits(&cg->filename_cpu_cfs_period, &cg->cpu_cfs_period, cg);
+ update_cpu_limits(&cg->filename_cpu_cfs_quota, &cg->cpu_cfs_quota, cg);
+
+ if(unlikely(!cg->chart_var_cpu_limit)) {
+ cg->chart_var_cpu_limit = rrdsetvar_custom_chart_variable_create(cg->st_cpu, "cpu_limit");
+ if(!cg->chart_var_cpu_limit) {
+ error("Cannot create cgroup %s chart variable 'cpu_limit'. Will not update its limit anymore.", cg->id);
+ freez(cg->filename_cpuset_cpus);
+ cg->filename_cpuset_cpus = NULL;
+ freez(cg->filename_cpu_cfs_period);
+ cg->filename_cpu_cfs_period = NULL;
+ freez(cg->filename_cpu_cfs_quota);
+ cg->filename_cpu_cfs_quota = NULL;
+ }
+ }
+ else {
+ calculated_number value = 0, quota = 0;
+
+ if(likely(cg->filename_cpuset_cpus || (cg->filename_cpu_cfs_period && cg->filename_cpu_cfs_quota))) {
+ if(unlikely(cg->cpu_cfs_quota > 0))
+ quota = (calculated_number)cg->cpu_cfs_quota / (calculated_number)cg->cpu_cfs_period;
+
+ if(unlikely(quota > 0 && quota < cg->cpuset_cpus))
+ value = quota * 100;
+ else
+ value = (calculated_number)cg->cpuset_cpus * 100;
+ }
+ if(likely(value)) {
+ rrdsetvar_custom_chart_variable_set(cg->chart_var_cpu_limit, value);
+
+ if(unlikely(!cg->st_cpu_limit)) {
+ snprintfz(title, CHART_TITLE_MAX, "CPU Usage within the limits for cgroup %s", cg->chart_title);
+
+ cg->st_cpu_limit = rrdset_create_localhost(
+ cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
+ , "cpu_limit"
+ , NULL
+ , "cpu"
+ , "cgroup.cpu_limit"
+ , title
+ , "percentage"
+ , PLUGIN_CGROUPS_NAME
+ , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME
+ , NETDATA_CHART_PRIO_CGROUPS_CONTAINERS - 1
+ , update_every
+ , RRDSET_TYPE_LINE
+ );
+
+ rrddim_add(cg->st_cpu_limit, "used", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ }
+ else
+ rrdset_next(cg->st_cpu_limit);
+
+ calculated_number cpu_usage = (calculated_number)(cg->cpuacct_stat.user + cg->cpuacct_stat.system) * 100 / system_hz;
+ calculated_number cpu_used = 100 * (cpu_usage - cg->prev_cpu_usage) / (value * update_every);
+
+ rrdset_isnot_obsolete(cg->st_cpu_limit);
+
+ rrddim_set(cg->st_cpu_limit, "used", (cpu_used > 0)?cpu_used:0);
+
+ cg->prev_cpu_usage = cpu_usage;
+
+ rrdset_done(cg->st_cpu_limit);
+ }
+ else {
+ rrdsetvar_custom_chart_variable_set(cg->chart_var_cpu_limit, NAN);
+ if(unlikely(cg->st_cpu_limit)) {
+ rrdset_is_obsolete(cg->st_cpu_limit);
+ cg->st_cpu_limit = NULL;
+ }
+ }
+ }
+ }
}
if(likely(cg->cpuacct_usage.updated && cg->cpuacct_usage.enabled == CONFIG_BOOLEAN_YES)) {
@@ -2283,7 +2502,7 @@ void update_cgroup_charts(int update_every) {
, "cpu"
, "cgroup.cpu_per_core"
, title
- , "%"
+ , "percentage"
, PLUGIN_CGROUPS_NAME
, PLUGIN_CGROUPS_MODULE_CGROUPS_NAME
, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 100
@@ -2464,6 +2683,75 @@ void update_cgroup_charts(int update_every) {
rrddim_set(cg->st_mem_usage, "ram", cg->memory.usage_in_bytes - ((cgroup_used_memory_without_cache)?cg->memory.cache:0));
rrddim_set(cg->st_mem_usage, "swap", (cg->memory.msw_usage_in_bytes > cg->memory.usage_in_bytes)?cg->memory.msw_usage_in_bytes - cg->memory.usage_in_bytes:0);
rrdset_done(cg->st_mem_usage);
+
+ if (likely(update_memory_limits(&cg->filename_memory_limit, &cg->chart_var_memory_limit, &cg->memory_limit, "memory_limit", cg))) {
+ static unsigned long long ram_total = 0;
+
+ if(unlikely(!ram_total)) {
+ procfile *ff = NULL;
+
+ char filename[FILENAME_MAX + 1];
+ snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/proc/meminfo");
+ ff = procfile_open(config_get("plugin:cgroups", "meminfo filename to monitor", filename), " \t:", PROCFILE_FLAG_DEFAULT);
+
+ if(likely(ff))
+ ff = procfile_readall(ff);
+ if(likely(ff && procfile_lines(ff) && !strncmp(procfile_word(ff, 0), "MemTotal", 8)))
+ ram_total = str2ull(procfile_word(ff, 1)) * 1024;
+ else {
+ error("Cannot read file %s. Will not update cgroup %s RAM limit anymore.", filename, cg->id);
+ freez(cg->filename_memory_limit);
+ cg->filename_memory_limit = NULL;
+ }
+
+ procfile_close(ff);
+ }
+
+ if(likely(ram_total)) {
+ unsigned long long memory_limit = ram_total;
+
+ if(unlikely(cg->memory_limit < ram_total))
+ memory_limit = cg->memory_limit;
+
+ if(unlikely(!cg->st_mem_usage_limit)) {
+ snprintfz(title, CHART_TITLE_MAX, "Used RAM without Cache within the limits for cgroup %s", cg->chart_title);
+
+ cg->st_mem_usage_limit = rrdset_create_localhost(
+ cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
+ , "mem_usage_limit"
+ , NULL
+ , "mem"
+ , "cgroup.mem_usage_limit"
+ , title
+ , "MiB"
+ , PLUGIN_CGROUPS_NAME
+ , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME
+ , NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 199
+ , update_every
+ , RRDSET_TYPE_STACKED
+ );
+
+ rrddim_add(cg->st_mem_usage_limit, "available", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE);
+ rrddim_add(cg->st_mem_usage_limit, "used", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE);
+ }
+ else
+ rrdset_next(cg->st_mem_usage_limit);
+
+ rrdset_isnot_obsolete(cg->st_mem_usage_limit);
+
+ rrddim_set(cg->st_mem_usage_limit, "available", memory_limit - (cg->memory.usage_in_bytes - ((cgroup_used_memory_without_cache)?cg->memory.cache:0)));
+ rrddim_set(cg->st_mem_usage_limit, "used", cg->memory.usage_in_bytes - ((cgroup_used_memory_without_cache)?cg->memory.cache:0));
+ rrdset_done(cg->st_mem_usage_limit);
+ }
+ }
+ else {
+ if(unlikely(cg->st_mem_usage_limit)) {
+ rrdset_is_obsolete(cg->st_mem_usage_limit);
+ cg->st_mem_usage_limit = NULL;
+ }
+ }
+
+ update_memory_limits(&cg->filename_memoryswap_limit, &cg->chart_var_memoryswap_limit, &cg->memoryswap_limit, "memory_and_swap_limit", cg);
}
if(likely(cg->memory.updated_failcnt && cg->memory.enabled_failcnt == CONFIG_BOOLEAN_YES)) {