diff options
Diffstat (limited to 'collectors/proc.plugin/proc_stat.c')
-rw-r--r--[-rwxr-xr-x] | collectors/proc.plugin/proc_stat.c | 347 |
1 files changed, 342 insertions, 5 deletions
diff --git a/collectors/proc.plugin/proc_stat.c b/collectors/proc.plugin/proc_stat.c index 931b415a5..f345a39d6 100755..100644 --- a/collectors/proc.plugin/proc_stat.c +++ b/collectors/proc.plugin/proc_stat.c @@ -52,6 +52,7 @@ struct cpu_chart { }; static int keep_per_core_fds_open = CONFIG_BOOLEAN_YES; +static int keep_cpuidle_fds_open = CONFIG_BOOLEAN_YES; static int read_per_core_files(struct cpu_chart *all_cpu_charts, size_t len, size_t index) { char buf[50 + 1]; @@ -161,7 +162,7 @@ static int read_per_core_time_in_state_files(struct cpu_chart *all_cpu_charts, s // the whole period under schedutil governor? // freez(tsf->last_ticks); // tsf->last_ticks = NULL; - // tsf->last_ticks_len = 0; + // tsf->last_ticks_len = 0; continue; } @@ -237,15 +238,249 @@ static void chart_per_core_files(struct cpu_chart *all_cpu_charts, size_t len, s } } +struct cpuidle_state { + char *name; + + char *time_filename; + int time_fd; + + collected_number value; + + RRDDIM *rd; +}; + +struct per_core_cpuidle_chart { + RRDSET *st; + + RRDDIM *active_time_rd; + collected_number active_time; + collected_number last_active_time; + + struct cpuidle_state *cpuidle_state; + size_t cpuidle_state_len; + int rescan_cpu_states; +}; + +static void* wake_cpu_thread(void* core) { + pthread_t thread; + cpu_set_t cpu_set; + static size_t cpu_wakeups = 0; + static int errors = 0; + + CPU_ZERO(&cpu_set); + CPU_SET(*(int*)core, &cpu_set); + + thread = pthread_self(); + if(unlikely(pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpu_set))) { + if(unlikely(errors < 8)) { + error("Cannot set CPU affinity for core %d", *(int*)core); + errors++; + } + else if(unlikely(errors < 9)) { + error("CPU affinity errors are disabled"); + errors++; + } + } + + // Make the CPU core do something to force it to update its idle counters + cpu_wakeups++; + + return 0; +} + +static int read_schedstat(char *schedstat_filename, struct per_core_cpuidle_chart **cpuidle_charts_address, size_t *schedstat_cores_found) { + static size_t cpuidle_charts_len = 0; + static procfile *ff = NULL; + struct per_core_cpuidle_chart *cpuidle_charts = *cpuidle_charts_address; + size_t cores_found = 0; + + if(unlikely(!ff)) { + ff = procfile_open(schedstat_filename, " \t:", PROCFILE_FLAG_DEFAULT); + if(unlikely(!ff)) return 1; + } + + ff = procfile_readall(ff); + if(unlikely(!ff)) return 1; + + size_t lines = procfile_lines(ff), l; + size_t words; + + for(l = 0; l < lines ;l++) { + char *row_key = procfile_lineword(ff, l, 0); + + // faster strncmp(row_key, "cpu", 3) == 0 + if(likely(row_key[0] == 'c' && row_key[1] == 'p' && row_key[2] == 'u')) { + words = procfile_linewords(ff, l); + if(unlikely(words < 10)) { + error("Cannot read /proc/schedstat cpu line. Expected 9 params, read %zu.", words); + return 1; + } + cores_found++; + + size_t core = str2ul(&row_key[3]); + if(unlikely(core >= cores_found)) { + error("Core %zu found but no more than %zu cores were expected.", core, cores_found); + return 1; + } + + if(unlikely(cpuidle_charts_len < cores_found)) { + cpuidle_charts = reallocz(cpuidle_charts, sizeof(struct per_core_cpuidle_chart) * cores_found); + *cpuidle_charts_address = cpuidle_charts; + memset(cpuidle_charts + cpuidle_charts_len, 0, sizeof(struct per_core_cpuidle_chart) * (cores_found - cpuidle_charts_len)); + cpuidle_charts_len = cores_found; + } + + cpuidle_charts[core].active_time = str2ull(procfile_lineword(ff, l, 7)) / 1000; + } + } + + *schedstat_cores_found = cores_found; + return 0; +} + +static int read_one_state(char *buf, const char *filename, int *fd) { + ssize_t ret = read(*fd, buf, 50); + + if(unlikely(ret <= 0)) { + // cannot read that file + error("Cannot read file '%s'", filename); + close(*fd); + *fd = -1; + return 0; + } + else { + // successful read + + // terminate the buffer + buf[ret - 1] = '\0'; + + if(unlikely(keep_cpuidle_fds_open != CONFIG_BOOLEAN_YES)) { + close(*fd); + *fd = -1; + } + else if(lseek(*fd, 0, SEEK_SET) == -1) { + error("Cannot seek in file '%s'", filename); + close(*fd); + *fd = -1; + } + } + + return 1; +} + +static int read_cpuidle_states(char *cpuidle_name_filename , char *cpuidle_time_filename, struct per_core_cpuidle_chart *cpuidle_charts, size_t core) { + char filename[FILENAME_MAX + 1]; + static char next_state_filename[FILENAME_MAX + 1]; + struct stat stbuf; + struct per_core_cpuidle_chart *cc = &cpuidle_charts[core]; + size_t state; + + if(unlikely(!cc->cpuidle_state_len || cc->rescan_cpu_states)) { + int state_file_found = 1; // check at least one state + + if(cc->cpuidle_state_len) { + for(state = 0; state < cc->cpuidle_state_len; state++) { + freez(cc->cpuidle_state[state].name); + + freez(cc->cpuidle_state[state].time_filename); + close(cc->cpuidle_state[state].time_fd); + cc->cpuidle_state[state].time_fd = -1; + } + + freez(cc->cpuidle_state); + cc->cpuidle_state = NULL; + cc->cpuidle_state_len = 0; + + cc->active_time_rd = NULL; + cc->st = NULL; + } + + while(likely(state_file_found)) { + snprintfz(filename, FILENAME_MAX, cpuidle_name_filename, core, cc->cpuidle_state_len); + if (stat(filename, &stbuf) == 0) + cc->cpuidle_state_len++; + else + state_file_found = 0; + } + snprintfz(next_state_filename, FILENAME_MAX, cpuidle_name_filename, core, cc->cpuidle_state_len); + + cc->cpuidle_state = callocz(cc->cpuidle_state_len, sizeof(struct cpuidle_state)); + memset(cc->cpuidle_state, 0, sizeof(struct cpuidle_state) * cc->cpuidle_state_len); + + for(state = 0; state < cc->cpuidle_state_len; state++) { + char name_buf[50 + 1]; + snprintfz(filename, FILENAME_MAX, cpuidle_name_filename, core, state); + + int fd = open(filename, O_RDONLY, 0666); + if(unlikely(fd == -1)) { + error("Cannot open file '%s'", filename); + cc->rescan_cpu_states = 1; + return 1; + } + + ssize_t r = read(fd, name_buf, 50); + if(unlikely(r < 1)) { + error("Cannot read file '%s'", filename); + close(fd); + cc->rescan_cpu_states = 1; + return 1; + } + + name_buf[r - 1] = '\0'; // erase extra character + cc->cpuidle_state[state].name = strdupz(name_buf); + close(fd); + + snprintfz(filename, FILENAME_MAX, cpuidle_time_filename, core, state); + cc->cpuidle_state[state].time_filename = strdupz(filename); + cc->cpuidle_state[state].time_fd = -1; + } + + cc->rescan_cpu_states = 0; + } + + for(state = 0; state < cc->cpuidle_state_len; state++) { + + struct cpuidle_state *cs = &cc->cpuidle_state[state]; + + if(unlikely(cs->time_fd == -1)) { + cs->time_fd = open(cs->time_filename, O_RDONLY); + if (unlikely(cs->time_fd == -1)) { + error("Cannot open file '%s'", cs->time_filename); + cc->rescan_cpu_states = 1; + return 1; + } + } + + char time_buf[50 + 1]; + if(likely(read_one_state(time_buf, cs->time_filename, &cs->time_fd))) { + cs->value = str2ll(time_buf, NULL); + } + else { + cc->rescan_cpu_states = 1; + return 1; + } + } + + // check if the number of states was increased + if(unlikely(stat(next_state_filename, &stbuf) == 0)) { + cc->rescan_cpu_states = 1; + return 1; + } + + return 0; +} + int do_proc_stat(int update_every, usec_t dt) { (void)dt; static struct cpu_chart *all_cpu_charts = NULL; static size_t all_cpu_charts_size = 0; static procfile *ff = NULL; - static int do_cpu = -1, do_cpu_cores = -1, do_interrupts = -1, do_context = -1, do_forks = -1, do_processes = -1, do_core_throttle_count = -1, do_package_throttle_count = -1, do_cpu_freq = -1; + static int do_cpu = -1, do_cpu_cores = -1, do_interrupts = -1, do_context = -1, do_forks = -1, do_processes = -1, + do_core_throttle_count = -1, do_package_throttle_count = -1, do_cpu_freq = -1, do_cpuidle = -1; static uint32_t hash_intr, hash_ctxt, hash_processes, hash_procs_running, hash_procs_blocked; - static char *core_throttle_count_filename = NULL, *package_throttle_count_filename = NULL, *scaling_cur_freq_filename = NULL, *time_in_state_filename = NULL; + static char *core_throttle_count_filename = NULL, *package_throttle_count_filename = NULL, *scaling_cur_freq_filename = NULL, + *time_in_state_filename = NULL, *schedstat_filename = NULL, *cpuidle_name_filename = NULL, *cpuidle_time_filename = NULL; static RRDVAR *cpus_var = NULL; static int accurate_freq_avail = 0, accurate_freq_is_used = 0; size_t cores_found = (size_t)processors; @@ -265,6 +500,7 @@ int do_proc_stat(int update_every, usec_t dt) { do_core_throttle_count = CONFIG_BOOLEAN_NO; do_package_throttle_count = CONFIG_BOOLEAN_NO; do_cpu_freq = CONFIG_BOOLEAN_NO; + do_cpuidle = CONFIG_BOOLEAN_NO; } else { // the system has a reasonable number of processors @@ -272,12 +508,23 @@ int do_proc_stat(int update_every, usec_t dt) { do_core_throttle_count = CONFIG_BOOLEAN_AUTO; do_package_throttle_count = CONFIG_BOOLEAN_NO; do_cpu_freq = CONFIG_BOOLEAN_YES; + do_cpuidle = CONFIG_BOOLEAN_YES; + } + if(unlikely(processors > 24)) { + // the system has too many processors + keep_cpuidle_fds_open = CONFIG_BOOLEAN_NO; + } + else { + // the system has a reasonable number of processors + keep_cpuidle_fds_open = CONFIG_BOOLEAN_YES; } keep_per_core_fds_open = config_get_boolean("plugin:proc:/proc/stat", "keep per core files open", keep_per_core_fds_open); + keep_cpuidle_fds_open = config_get_boolean("plugin:proc:/proc/stat", "keep cpuidle files open", keep_cpuidle_fds_open); do_core_throttle_count = config_get_boolean_ondemand("plugin:proc:/proc/stat", "core_throttle_count", do_core_throttle_count); do_package_throttle_count = config_get_boolean_ondemand("plugin:proc:/proc/stat", "package_throttle_count", do_package_throttle_count); do_cpu_freq = config_get_boolean_ondemand("plugin:proc:/proc/stat", "cpu frequency", do_cpu_freq); + do_cpuidle = config_get_boolean_ondemand("plugin:proc:/proc/stat", "cpu idle states", do_cpuidle); hash_intr = simple_hash("intr"); hash_ctxt = simple_hash("ctxt"); @@ -297,6 +544,22 @@ int do_proc_stat(int update_every, usec_t dt) { snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/devices/system/cpu/%s/cpufreq/stats/time_in_state"); time_in_state_filename = config_get("plugin:proc:/proc/stat", "time_in_state filename to monitor", filename); + + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/proc/schedstat"); + schedstat_filename = config_get("plugin:proc:/proc/stat", "schedstat filename to monitor", filename); + + if(do_cpuidle != CONFIG_BOOLEAN_NO) { + struct stat stbuf; + + if (stat(schedstat_filename, &stbuf)) + do_cpuidle = CONFIG_BOOLEAN_NO; + } + + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/devices/system/cpu/cpu%zu/cpuidle/state%zu/name"); + cpuidle_name_filename = config_get("plugin:proc:/proc/stat", "cpuidle name filename to monitor", filename); + + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/devices/system/cpu/cpu%zu/cpuidle/state%zu/time"); + cpuidle_time_filename = config_get("plugin:proc:/proc/stat", "cpuidle time filename to monitor", filename); } if(unlikely(!ff)) { @@ -407,7 +670,7 @@ int do_proc_stat(int update_every, usec_t dt) { cpu_chart->files[CPU_FREQ_INDEX].fd = -1; do_cpu_freq = CONFIG_BOOLEAN_YES; } - + snprintfz(filename, FILENAME_MAX, time_in_state_filename, id); if (stat(filename, &stbuf) == 0) { @@ -702,7 +965,7 @@ int do_proc_stat(int update_every, usec_t dt) { , "MHz" , PLUGIN_PROC_NAME , PLUGIN_PROC_MODULE_STAT_NAME - , 5003 + , NETDATA_CHART_PRIO_CPUFREQ_SCALING_CUR_FREQ , update_every , RRDSET_TYPE_LINE ); @@ -715,6 +978,80 @@ int do_proc_stat(int update_every, usec_t dt) { } } + // -------------------------------------------------------------------- + + static struct per_core_cpuidle_chart *cpuidle_charts = NULL; + size_t schedstat_cores_found = 0; + + if(likely(do_cpuidle != CONFIG_BOOLEAN_NO && !read_schedstat(schedstat_filename, &cpuidle_charts, &schedstat_cores_found))) { + int cpu_states_updated = 0; + size_t core, state; + + + // proc.plugin runs on Linux systems only. Multi-platform compatibility is not needed here, + // so bare pthread functions are used to avoid unneeded overheads. + for(core = 0; core < schedstat_cores_found; core++) { + if(unlikely(!(cpuidle_charts[core].active_time - cpuidle_charts[core].last_active_time))) { + pthread_t thread; + + if(unlikely(pthread_create(&thread, NULL, wake_cpu_thread, (void *)&core))) + error("Cannot create wake_cpu_thread"); + else if(unlikely(pthread_join(thread, NULL))) + error("Cannot join wake_cpu_thread"); + cpu_states_updated = 1; + } + } + + if(unlikely(!cpu_states_updated || !read_schedstat(schedstat_filename, &cpuidle_charts, &schedstat_cores_found))) { + for(core = 0; core < schedstat_cores_found; core++) { + cpuidle_charts[core].last_active_time = cpuidle_charts[core].active_time; + + int r = read_cpuidle_states(cpuidle_name_filename, cpuidle_time_filename, cpuidle_charts, core); + if(likely(r != -1 && (do_cpuidle == CONFIG_BOOLEAN_YES || r > 0))) { + do_cpuidle = CONFIG_BOOLEAN_YES; + + char cpuidle_chart_id[RRD_ID_LENGTH_MAX + 1]; + snprintfz(cpuidle_chart_id, RRD_ID_LENGTH_MAX, "cpu%zu_cpuidle", core); + + if(unlikely(!cpuidle_charts[core].st)) { + cpuidle_charts[core].st = rrdset_create_localhost( + "cpu" + , cpuidle_chart_id + , NULL + , "cpuidle" + , "cpuidle.cpuidle" + , "C-state residency time" + , "percentage" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_STAT_NAME + , NETDATA_CHART_PRIO_CPUIDLE + core + , update_every + , RRDSET_TYPE_STACKED + ); + + char cpuidle_dim_id[RRD_ID_LENGTH_MAX + 1]; + snprintfz(cpuidle_dim_id, RRD_ID_LENGTH_MAX, "cpu%zu_active_time", core); + cpuidle_charts[core].active_time_rd = rrddim_add(cpuidle_charts[core].st, cpuidle_dim_id, "C0 (active)", 1, 1, RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL); + for(state = 0; state < cpuidle_charts[core].cpuidle_state_len; state++) { + snprintfz(cpuidle_dim_id, RRD_ID_LENGTH_MAX, "cpu%zu_cpuidle_state%zu_time", core, state); + cpuidle_charts[core].cpuidle_state[state].rd = rrddim_add(cpuidle_charts[core].st, cpuidle_dim_id, + cpuidle_charts[core].cpuidle_state[state].name, + 1, 1, RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL); + } + } + else + rrdset_next(cpuidle_charts[core].st); + + rrddim_set_by_pointer(cpuidle_charts[core].st, cpuidle_charts[core].active_time_rd, cpuidle_charts[core].active_time); + for(state = 0; state < cpuidle_charts[core].cpuidle_state_len; state++) { + rrddim_set_by_pointer(cpuidle_charts[core].st, cpuidle_charts[core].cpuidle_state[state].rd, cpuidle_charts[core].cpuidle_state[state].value); + } + rrdset_done(cpuidle_charts[core].st); + } + } + } + } + if(cpus_var) rrdvar_custom_host_variable_set(localhost, cpus_var, cores_found); |