diff options
Diffstat (limited to '')
44 files changed, 4269 insertions, 1091 deletions
diff --git a/src/Makefile.am b/src/Makefile.am index 8fa6d5bd..86b9a9fe 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -52,9 +52,11 @@ netdata_SOURCES = \ proc_net_dev.c \ proc_net_ip_vs_stats.c \ proc_net_netstat.c \ + proc_net_rpc_nfs.c \ proc_net_rpc_nfsd.c \ proc_net_snmp.c \ proc_net_snmp6.c \ + proc_net_softnet_stat.c \ proc_net_stat_conntrack.c \ proc_net_stat_synproxy.c \ proc_stat.c \ diff --git a/src/Makefile.in b/src/Makefile.in index 1a721a04..6645f40d 100644 --- a/src/Makefile.in +++ b/src/Makefile.in @@ -86,8 +86,13 @@ DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am \ $(top_srcdir)/depcomp $(dist_cache_DATA) $(dist_log_DATA) \ $(dist_registry_DATA) $(dist_varlib_DATA) ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 -am__aclocal_m4_deps = $(top_srcdir)/m4/ax_pthread.m4 \ - $(top_srcdir)/configure.ac +am__aclocal_m4_deps = $(top_srcdir)/m4/ax_c___atomic.m4 \ + $(top_srcdir)/m4/ax_c__generic.m4 \ + $(top_srcdir)/m4/ax_c_mallinfo.m4 \ + $(top_srcdir)/m4/ax_c_mallopt.m4 \ + $(top_srcdir)/m4/ax_check_compile_flag.m4 \ + $(top_srcdir)/m4/ax_pthread.m4 $(top_srcdir)/m4/jemalloc.m4 \ + $(top_srcdir)/m4/tcmalloc.m4 $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) mkinstalldirs = $(install_sh) -d @@ -113,8 +118,9 @@ am_netdata_OBJECTS = appconfig.$(OBJEXT) avl.$(OBJEXT) \ proc_interrupts.$(OBJEXT) proc_softirqs.$(OBJEXT) \ proc_loadavg.$(OBJEXT) proc_meminfo.$(OBJEXT) \ proc_net_dev.$(OBJEXT) proc_net_ip_vs_stats.$(OBJEXT) \ - proc_net_netstat.$(OBJEXT) proc_net_rpc_nfsd.$(OBJEXT) \ - proc_net_snmp.$(OBJEXT) proc_net_snmp6.$(OBJEXT) \ + proc_net_netstat.$(OBJEXT) proc_net_rpc_nfs.$(OBJEXT) \ + proc_net_rpc_nfsd.$(OBJEXT) proc_net_snmp.$(OBJEXT) \ + proc_net_snmp6.$(OBJEXT) proc_net_softnet_stat.$(OBJEXT) \ proc_net_stat_conntrack.$(OBJEXT) \ proc_net_stat_synproxy.$(OBJEXT) proc_stat.$(OBJEXT) \ proc_self_mountinfo.$(OBJEXT) \ @@ -279,6 +285,7 @@ PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ PTHREAD_LIBS = @PTHREAD_LIBS@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ +SSE_CANDIDATE = @SSE_CANDIDATE@ STRIP = @STRIP@ UUID_CFLAGS = @UUID_CFLAGS@ UUID_LIBS = @UUID_LIBS@ @@ -311,6 +318,8 @@ datarootdir = @datarootdir@ docdir = @docdir@ dvidir = @dvidir@ exec_prefix = @exec_prefix@ +has_jemalloc = @has_jemalloc@ +has_tcmalloc = @has_tcmalloc@ host = @host@ host_alias = @host_alias@ host_cpu = @host_cpu@ @@ -398,9 +407,11 @@ netdata_SOURCES = \ proc_net_dev.c \ proc_net_ip_vs_stats.c \ proc_net_netstat.c \ + proc_net_rpc_nfs.c \ proc_net_rpc_nfsd.c \ proc_net_snmp.c \ proc_net_snmp6.c \ + proc_net_softnet_stat.c \ proc_net_stat_conntrack.c \ proc_net_stat_synproxy.c \ proc_stat.c \ @@ -596,9 +607,11 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/proc_net_dev.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/proc_net_ip_vs_stats.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/proc_net_netstat.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/proc_net_rpc_nfs.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/proc_net_rpc_nfsd.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/proc_net_snmp.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/proc_net_snmp6.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/proc_net_softnet_stat.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/proc_net_stat_conntrack.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/proc_net_stat_synproxy.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/proc_self_mountinfo.Po@am__quote@ diff --git a/src/appconfig.c b/src/appconfig.c index 34fb6d7d..94740748 100644 --- a/src/appconfig.c +++ b/src/appconfig.c @@ -464,9 +464,17 @@ void generate_config(BUFFER *wb, int only_changed) switch(i) { case 0: buffer_strcat(wb, - "# NetData Configuration\n" + "# netdata configuration\n" + "#\n" + "# You can download the latest version of this file, using:\n" + "#\n" + "# wget -O /etc/netdata/netdata.conf http://localhost:19999/netdata.conf\n" + "# or\n" + "# curl -o /etc/netdata/netdata.conf http://localhost:19999/netdata.conf\n" + "#\n" "# You can uncomment and change any of the options below.\n" "# The value shown in the commented settings, is the default value.\n" + "#\n" "\n# global netdata configuration\n"); break; diff --git a/src/apps_plugin.c b/src/apps_plugin.c index ee400b72..f22a575b 100644 --- a/src/apps_plugin.c +++ b/src/apps_plugin.c @@ -13,8 +13,6 @@ // etc. #define RATES_DETAIL 10000ULL -int processors = 1; -pid_t pid_max = 32768; int debug = 0; int update_every = 1; @@ -22,7 +20,6 @@ unsigned long long global_iterations_counter = 1; unsigned long long file_counter = 0; int proc_pid_cmdline_is_needed = 0; int include_exited_childs = 1; -char *host_prefix = ""; char *config_dir = CONFIG_DIR; pid_t *all_pids_sortlist = NULL; @@ -42,58 +39,6 @@ void netdata_cleanup_and_exit(int ret) { // ---------------------------------------------------------------------------- -// system functions -// to retrieve settings of the system - -long get_system_cpus(void) { - procfile *ff = NULL; - - int processors = 0; - - char filename[FILENAME_MAX + 1]; - snprintfz(filename, FILENAME_MAX, "%s/proc/stat", host_prefix); - - ff = procfile_open(filename, NULL, PROCFILE_FLAG_DEFAULT); - if(!ff) return 1; - - ff = procfile_readall(ff); - if(!ff) - return 1; - - unsigned int i; - for(i = 0; i < procfile_lines(ff); i++) { - if(!procfile_linewords(ff, i)) continue; - - if(strncmp(procfile_lineword(ff, i, 0), "cpu", 3) == 0) processors++; - } - processors--; - if(processors < 1) processors = 1; - - procfile_close(ff); - return processors; -} - -pid_t get_system_pid_max(void) { - procfile *ff = NULL; - pid_t mpid = 32768; - - char filename[FILENAME_MAX + 1]; - snprintfz(filename, FILENAME_MAX, "%s/proc/sys/kernel/pid_max", host_prefix); - ff = procfile_open(filename, NULL, PROCFILE_FLAG_DEFAULT); - if(!ff) return mpid; - - ff = procfile_readall(ff); - if(!ff) - return mpid; - - mpid = (pid_t)atoi(procfile_lineword(ff, 0, 0)); - if(!mpid) mpid = 32768; - - procfile_close(ff); - return mpid; -} - -// ---------------------------------------------------------------------------- // target // target is the structure that process data are aggregated @@ -121,23 +66,23 @@ struct target { unsigned long long cstime; unsigned long long cgtime; unsigned long long num_threads; - unsigned long long rss; + // unsigned long long rss; unsigned long long statm_size; unsigned long long statm_resident; unsigned long long statm_share; - unsigned long long statm_text; - unsigned long long statm_lib; - unsigned long long statm_data; - unsigned long long statm_dirty; + // unsigned long long statm_text; + // unsigned long long statm_lib; + // unsigned long long statm_data; + // unsigned long long statm_dirty; unsigned long long io_logical_bytes_read; unsigned long long io_logical_bytes_written; - unsigned long long io_read_calls; - unsigned long long io_write_calls; + // unsigned long long io_read_calls; + // unsigned long long io_write_calls; unsigned long long io_storage_bytes_read; unsigned long long io_storage_bytes_written; - unsigned long long io_cancelled_write_bytes; + // unsigned long long io_cancelled_write_bytes; int *fds; unsigned long long openfiles; @@ -440,7 +385,7 @@ struct pid_stat { // int64_t itrealvalue; // unsigned long long starttime; // unsigned long long vsize; - unsigned long long rss; + // unsigned long long rss; // unsigned long long rsslim; // unsigned long long starcode; // unsigned long long endcode; @@ -466,26 +411,26 @@ struct pid_stat { unsigned long long statm_size; unsigned long long statm_resident; unsigned long long statm_share; - unsigned long long statm_text; - unsigned long long statm_lib; - unsigned long long statm_data; - unsigned long long statm_dirty; + // unsigned long long statm_text; + // unsigned long long statm_lib; + // unsigned long long statm_data; + // unsigned long long statm_dirty; unsigned long long io_logical_bytes_read_raw; unsigned long long io_logical_bytes_written_raw; - unsigned long long io_read_calls_raw; - unsigned long long io_write_calls_raw; + // unsigned long long io_read_calls_raw; + // unsigned long long io_write_calls_raw; unsigned long long io_storage_bytes_read_raw; unsigned long long io_storage_bytes_written_raw; - unsigned long long io_cancelled_write_bytes_raw; + // unsigned long long io_cancelled_write_bytes_raw; unsigned long long io_logical_bytes_read; unsigned long long io_logical_bytes_written; - unsigned long long io_read_calls; - unsigned long long io_write_calls; + // unsigned long long io_read_calls; + // unsigned long long io_write_calls; unsigned long long io_storage_bytes_read; unsigned long long io_storage_bytes_written; - unsigned long long io_cancelled_write_bytes; + // unsigned long long io_cancelled_write_bytes; int *fds; // array of fds it uses int fds_size; // the size of the fds array @@ -573,10 +518,10 @@ void del_pid_entry(pid_t pid) { // update pids from proc int read_proc_pid_cmdline(struct pid_stat *p) { - + if(unlikely(!p->cmdline_filename)) { char filename[FILENAME_MAX + 1]; - snprintfz(filename, FILENAME_MAX, "%s/proc/%d/cmdline", host_prefix, p->pid); + snprintfz(filename, FILENAME_MAX, "%s/proc/%d/cmdline", global_host_prefix, p->pid); p->cmdline_filename = strdupz(filename); } @@ -629,7 +574,7 @@ int read_proc_pid_stat(struct pid_stat *p) { if(unlikely(!p->stat_filename)) { char filename[FILENAME_MAX + 1]; - snprintfz(filename, FILENAME_MAX, "%s/proc/%d/stat", host_prefix, p->pid); + snprintfz(filename, FILENAME_MAX, "%s/proc/%d/stat", global_host_prefix, p->pid); p->stat_filename = strdupz(filename); } @@ -700,7 +645,7 @@ int read_proc_pid_stat(struct pid_stat *p) { // p->itrealvalue = strtoull(procfile_lineword(ff, 0, 20), NULL, 10); // p->starttime = strtoull(procfile_lineword(ff, 0, 21), NULL, 10); // p->vsize = strtoull(procfile_lineword(ff, 0, 22), NULL, 10); - p->rss = strtoull(procfile_lineword(ff, 0, 23), NULL, 10); + // p->rss = strtoull(procfile_lineword(ff, 0, 23), NULL, 10); // p->rsslim = strtoull(procfile_lineword(ff, 0, 24), NULL, 10); // p->starcode = strtoull(procfile_lineword(ff, 0, 25), NULL, 10); // p->endcode = strtoull(procfile_lineword(ff, 0, 26), NULL, 10); @@ -737,7 +682,7 @@ int read_proc_pid_stat(struct pid_stat *p) { } if(unlikely(debug || (p->target && p->target->debug))) - fprintf(stderr, "apps.plugin: READ PROC/PID/STAT: %s/proc/%d/stat, process: '%s' on target '%s' (dt=%llu) VALUES: utime=%llu, stime=%llu, cutime=%llu, cstime=%llu, minflt=%llu, majflt=%llu, cminflt=%llu, cmajflt=%llu, threads=%d\n", host_prefix, p->pid, p->comm, (p->target)?p->target->name:"UNSET", p->stat_collected_usec - p->last_stat_collected_usec, p->utime, p->stime, p->cutime, p->cstime, p->minflt, p->majflt, p->cminflt, p->cmajflt, p->num_threads); + fprintf(stderr, "apps.plugin: READ PROC/PID/STAT: %s/proc/%d/stat, process: '%s' on target '%s' (dt=%llu) VALUES: utime=%llu, stime=%llu, cutime=%llu, cstime=%llu, minflt=%llu, majflt=%llu, cminflt=%llu, cmajflt=%llu, threads=%d\n", global_host_prefix, p->pid, p->comm, (p->target)?p->target->name:"UNSET", p->stat_collected_usec - p->last_stat_collected_usec, p->utime, p->stime, p->cutime, p->cstime, p->minflt, p->majflt, p->cminflt, p->cmajflt, p->num_threads); if(unlikely(global_iterations_counter == 1)) { p->minflt = 0; @@ -766,7 +711,7 @@ cleanup: p->cstime = 0; p->cgtime = 0; p->num_threads = 0; - p->rss = 0; + // p->rss = 0; return 0; } @@ -775,7 +720,7 @@ int read_proc_pid_statm(struct pid_stat *p) { if(unlikely(!p->statm_filename)) { char filename[FILENAME_MAX + 1]; - snprintfz(filename, FILENAME_MAX, "%s/proc/%d/statm", host_prefix, p->pid); + snprintfz(filename, FILENAME_MAX, "%s/proc/%d/statm", global_host_prefix, p->pid); p->statm_filename = strdupz(filename); } @@ -790,10 +735,10 @@ int read_proc_pid_statm(struct pid_stat *p) { p->statm_size = strtoull(procfile_lineword(ff, 0, 0), NULL, 10); p->statm_resident = strtoull(procfile_lineword(ff, 0, 1), NULL, 10); p->statm_share = strtoull(procfile_lineword(ff, 0, 2), NULL, 10); - p->statm_text = strtoull(procfile_lineword(ff, 0, 3), NULL, 10); - p->statm_lib = strtoull(procfile_lineword(ff, 0, 4), NULL, 10); - p->statm_data = strtoull(procfile_lineword(ff, 0, 5), NULL, 10); - p->statm_dirty = strtoull(procfile_lineword(ff, 0, 6), NULL, 10); + // p->statm_text = strtoull(procfile_lineword(ff, 0, 3), NULL, 10); + // p->statm_lib = strtoull(procfile_lineword(ff, 0, 4), NULL, 10); + // p->statm_data = strtoull(procfile_lineword(ff, 0, 5), NULL, 10); + // p->statm_dirty = strtoull(procfile_lineword(ff, 0, 6), NULL, 10); return 1; @@ -801,10 +746,10 @@ cleanup: p->statm_size = 0; p->statm_resident = 0; p->statm_share = 0; - p->statm_text = 0; - p->statm_lib = 0; - p->statm_data = 0; - p->statm_dirty = 0; + // p->statm_text = 0; + // p->statm_lib = 0; + // p->statm_data = 0; + // p->statm_dirty = 0; return 0; } @@ -813,7 +758,7 @@ int read_proc_pid_io(struct pid_stat *p) { if(unlikely(!p->io_filename)) { char filename[FILENAME_MAX + 1]; - snprintfz(filename, FILENAME_MAX, "%s/proc/%d/io", host_prefix, p->pid); + snprintfz(filename, FILENAME_MAX, "%s/proc/%d/io", global_host_prefix, p->pid); p->io_filename = strdupz(filename); } @@ -839,13 +784,13 @@ int read_proc_pid_io(struct pid_stat *p) { p->io_logical_bytes_written_raw = strtoull(procfile_lineword(ff, 1, 1), NULL, 10); p->io_logical_bytes_written = (p->io_logical_bytes_written_raw - last) * (1000000ULL * RATES_DETAIL) / (p->io_collected_usec - p->last_io_collected_usec); - last = p->io_read_calls_raw; - p->io_read_calls_raw = strtoull(procfile_lineword(ff, 2, 1), NULL, 10); - p->io_read_calls = (p->io_read_calls_raw - last) * (1000000ULL * RATES_DETAIL) / (p->io_collected_usec - p->last_io_collected_usec); + // last = p->io_read_calls_raw; + // p->io_read_calls_raw = strtoull(procfile_lineword(ff, 2, 1), NULL, 10); + // p->io_read_calls = (p->io_read_calls_raw - last) * (1000000ULL * RATES_DETAIL) / (p->io_collected_usec - p->last_io_collected_usec); - last = p->io_write_calls_raw; - p->io_write_calls_raw = strtoull(procfile_lineword(ff, 3, 1), NULL, 10); - p->io_write_calls = (p->io_write_calls_raw - last) * (1000000ULL * RATES_DETAIL) / (p->io_collected_usec - p->last_io_collected_usec); + // last = p->io_write_calls_raw; + // p->io_write_calls_raw = strtoull(procfile_lineword(ff, 3, 1), NULL, 10); + // p->io_write_calls = (p->io_write_calls_raw - last) * (1000000ULL * RATES_DETAIL) / (p->io_collected_usec - p->last_io_collected_usec); last = p->io_storage_bytes_read_raw; p->io_storage_bytes_read_raw = strtoull(procfile_lineword(ff, 4, 1), NULL, 10); @@ -855,18 +800,18 @@ int read_proc_pid_io(struct pid_stat *p) { p->io_storage_bytes_written_raw = strtoull(procfile_lineword(ff, 5, 1), NULL, 10); p->io_storage_bytes_written = (p->io_storage_bytes_written_raw - last) * (1000000ULL * RATES_DETAIL) / (p->io_collected_usec - p->last_io_collected_usec); - last = p->io_cancelled_write_bytes_raw; - p->io_cancelled_write_bytes_raw = strtoull(procfile_lineword(ff, 6, 1), NULL, 10); - p->io_cancelled_write_bytes = (p->io_cancelled_write_bytes_raw - last) * (1000000ULL * RATES_DETAIL) / (p->io_collected_usec - p->last_io_collected_usec); + // last = p->io_cancelled_write_bytes_raw; + // p->io_cancelled_write_bytes_raw = strtoull(procfile_lineword(ff, 6, 1), NULL, 10); + // p->io_cancelled_write_bytes = (p->io_cancelled_write_bytes_raw - last) * (1000000ULL * RATES_DETAIL) / (p->io_collected_usec - p->last_io_collected_usec); if(unlikely(global_iterations_counter == 1)) { p->io_logical_bytes_read = 0; p->io_logical_bytes_written = 0; - p->io_read_calls = 0; - p->io_write_calls = 0; + // p->io_read_calls = 0; + // p->io_write_calls = 0; p->io_storage_bytes_read = 0; p->io_storage_bytes_written = 0; - p->io_cancelled_write_bytes = 0; + // p->io_cancelled_write_bytes = 0; } return 1; @@ -874,11 +819,11 @@ int read_proc_pid_io(struct pid_stat *p) { cleanup: p->io_logical_bytes_read = 0; p->io_logical_bytes_written = 0; - p->io_read_calls = 0; - p->io_write_calls = 0; + // p->io_read_calls = 0; + // p->io_write_calls = 0; p->io_storage_bytes_read = 0; p->io_storage_bytes_written = 0; - p->io_cancelled_write_bytes = 0; + // p->io_cancelled_write_bytes = 0; return 0; } @@ -892,7 +837,7 @@ int read_proc_stat() { static unsigned long long utime_raw = 0, stime_raw = 0, gtime_raw = 0, gntime_raw = 0, ntime_raw = 0, collected_usec = 0, last_collected_usec = 0; if(unlikely(!ff)) { - snprintfz(filename, FILENAME_MAX, "%s/proc/stat", host_prefix); + snprintfz(filename, FILENAME_MAX, "%s/proc/stat", global_host_prefix); ff = procfile_open(filename, " \t:", PROCFILE_FLAG_DEFAULT); if(unlikely(!ff)) goto cleanup; } @@ -1194,7 +1139,7 @@ int file_descriptor_find_or_add(const char *name) int read_pid_file_descriptors(struct pid_stat *p) { char dirname[FILENAME_MAX+1]; - snprintfz(dirname, FILENAME_MAX, "%s/proc/%d/fd", host_prefix, p->pid); + snprintfz(dirname, FILENAME_MAX, "%s/proc/%d/fd", global_host_prefix, p->pid); DIR *fds = opendir(dirname); if(fds) { int c; @@ -1232,7 +1177,7 @@ int read_pid_file_descriptors(struct pid_stat *p) { if(p->fds[fdid] == 0) { // we don't know this fd, get it - sprintf(fdname, "%s/proc/%d/fd/%s", host_prefix, p->pid, de->d_name); + sprintf(fdname, "%s/proc/%d/fd/%s", global_host_prefix, p->pid, de->d_name); ssize_t l = readlink(fdname, linkname, FILENAME_MAX); if(l == -1) { if(debug || (p->target && p->target->debug)) { @@ -1328,14 +1273,14 @@ void find_lost_child_debug(struct pid_stat *pe, unsigned long long lost, int typ found++; } break; - + case 2: if(p->cmajflt > lost) { fprintf(stderr, " > process %d (%s) could use the lost exited child majflt %llu of process %d (%s)\n", p->pid, p->comm, lost, pe->pid, pe->comm); found++; } break; - + case 3: if(p->cutime > lost) { fprintf(stderr, " > process %d (%s) could use the lost exited child utime %llu of process %d (%s)\n", p->pid, p->comm, lost, pe->pid, pe->comm); @@ -1364,11 +1309,11 @@ void find_lost_child_debug(struct pid_stat *pe, unsigned long long lost, int typ case 1: fprintf(stderr, " > cannot find any process to use the lost exited child minflt %llu of process %d (%s)\n", lost, pe->pid, pe->comm); break; - + case 2: fprintf(stderr, " > cannot find any process to use the lost exited child majflt %llu of process %d (%s)\n", lost, pe->pid, pe->comm); break; - + case 3: fprintf(stderr, " > cannot find any process to use the lost exited child utime %llu of process %d (%s)\n", lost, pe->pid, pe->comm); break; @@ -1583,19 +1528,19 @@ static inline int managed_log(struct pid_stat *p, uint32_t log, int status) { p->log_thrown |= log; switch(log) { case PID_LOG_IO: - error("Cannot process %s/proc/%d/io (command '%s')", host_prefix, p->pid, p->comm); + error("Cannot process %s/proc/%d/io (command '%s')", global_host_prefix, p->pid, p->comm); break; case PID_LOG_STATM: - error("Cannot process %s/proc/%d/statm (command '%s')", host_prefix, p->pid, p->comm); + error("Cannot process %s/proc/%d/statm (command '%s')", global_host_prefix, p->pid, p->comm); break; case PID_LOG_CMDLINE: - error("Cannot process %s/proc/%d/cmdline (command '%s')", host_prefix, p->pid, p->comm); + error("Cannot process %s/proc/%d/cmdline (command '%s')", global_host_prefix, p->pid, p->comm); break; case PID_LOG_FDS: - error("Cannot process entries in %s/proc/%d/fd (command '%s')", host_prefix, p->pid, p->comm); + error("Cannot process entries in %s/proc/%d/fd (command '%s')", global_host_prefix, p->pid, p->comm); break; case PID_LOG_STAT: @@ -1751,7 +1696,7 @@ int collect_data_for_all_processes_from_proc(void) { char dirname[FILENAME_MAX + 1]; - snprintfz(dirname, FILENAME_MAX, "%s/proc", host_prefix); + snprintfz(dirname, FILENAME_MAX, "%s/proc", global_host_prefix); DIR *dir = opendir(dirname); if(!dir) return 0; @@ -1957,24 +1902,24 @@ long zero_all_targets(struct target *root) { w->cstime = 0; w->cgtime = 0; w->num_threads = 0; - w->rss = 0; + // w->rss = 0; w->processes = 0; w->statm_size = 0; w->statm_resident = 0; w->statm_share = 0; - w->statm_text = 0; - w->statm_lib = 0; - w->statm_data = 0; - w->statm_dirty = 0; + // w->statm_text = 0; + // w->statm_lib = 0; + // w->statm_data = 0; + // w->statm_dirty = 0; w->io_logical_bytes_read = 0; w->io_logical_bytes_written = 0; - w->io_read_calls = 0; - w->io_write_calls = 0; + // w->io_read_calls = 0; + // w->io_write_calls = 0; w->io_storage_bytes_read = 0; w->io_storage_bytes_written = 0; - w->io_cancelled_write_bytes = 0; + // w->io_cancelled_write_bytes = 0; } return count; @@ -1999,23 +1944,23 @@ void aggregate_pid_on_target(struct target *w, struct pid_stat *p, struct target w->minflt += p->minflt; w->majflt += p->majflt; - w->rss += p->rss; + // w->rss += p->rss; w->statm_size += p->statm_size; w->statm_resident += p->statm_resident; w->statm_share += p->statm_share; - w->statm_text += p->statm_text; - w->statm_lib += p->statm_lib; - w->statm_data += p->statm_data; - w->statm_dirty += p->statm_dirty; + // w->statm_text += p->statm_text; + // w->statm_lib += p->statm_lib; + // w->statm_data += p->statm_data; + // w->statm_dirty += p->statm_dirty; w->io_logical_bytes_read += p->io_logical_bytes_read; w->io_logical_bytes_written += p->io_logical_bytes_written; - w->io_read_calls += p->io_read_calls; - w->io_write_calls += p->io_write_calls; + // w->io_read_calls += p->io_read_calls; + // w->io_write_calls += p->io_write_calls; w->io_storage_bytes_read += p->io_storage_bytes_read; w->io_storage_bytes_written += p->io_storage_bytes_written; - w->io_cancelled_write_bytes += p->io_cancelled_write_bytes; + // w->io_cancelled_write_bytes += p->io_cancelled_write_bytes; w->processes++; w->num_threads += p->num_threads; @@ -2230,8 +2175,8 @@ unsigned long long send_resource_usage_to_netdata() { cpuuser = me.ru_utime.tv_sec * 1000000ULL + me.ru_utime.tv_usec; cpusyst = me.ru_stime.tv_sec * 1000000ULL + me.ru_stime.tv_usec; - bcopy(&now, &last, sizeof(struct timeval)); - bcopy(&me, &me_last, sizeof(struct rusage)); + memmove(&last, &now, sizeof(struct timeval)); + memmove(&me_last, &me, sizeof(struct rusage)); } buffer_sprintf(output, @@ -2488,6 +2433,13 @@ void send_collected_data_to_netdata(struct target *root, const char *type, unsig } send_END(); + send_BEGIN(type, "vmem", usec); + for (w = root; w ; w = w->next) { + if(unlikely(w->exposed)) + send_SET(w->name, w->statm_size); + } + send_END(); + send_BEGIN(type, "minor_faults", usec); for (w = root; w ; w = w->next) { if(unlikely(w->exposed)) @@ -2584,7 +2536,13 @@ void send_charts_updates_to_netdata(struct target *root, const char *type, const buffer_sprintf(output, "DIMENSION %s '' absolute 1 %llu %s\n", w->name, hz * RATES_DETAIL / 100, w->hidden ? "hidden" : ""); } - buffer_sprintf(output, "CHART %s.mem '' '%s Dedicated Memory (w/o shared)' 'MB' mem %s.mem stacked 20003 %d\n", type, title, type, update_every); + buffer_sprintf(output, "CHART %s.mem '' '%s Real Memory (w/o shared)' 'MB' mem %s.mem stacked 20003 %d\n", type, title, type, update_every); + for (w = root; w ; w = w->next) { + if(unlikely(w->exposed)) + buffer_sprintf(output, "DIMENSION %s '' absolute %ld %ld\n", w->name, sysconf(_SC_PAGESIZE), 1024L*1024L); + } + + buffer_sprintf(output, "CHART %s.vmem '' '%s Virtual Memory Size' 'MB' mem %s.vmem stacked 20004 %d\n", type, title, type, update_every); for (w = root; w ; w = w->next) { if(unlikely(w->exposed)) buffer_sprintf(output, "DIMENSION %s '' absolute %ld %ld\n", w->name, sysconf(_SC_PAGESIZE), 1024L*1024L); @@ -2803,12 +2761,12 @@ int main(int argc, char **argv) error_log_errors_per_period = 100; error_log_throttle_period = 3600; - host_prefix = getenv("NETDATA_HOST_PREFIX"); - if(host_prefix == NULL) { + global_host_prefix = getenv("NETDATA_HOST_PREFIX"); + if(global_host_prefix == NULL) { // info("NETDATA_HOST_PREFIX is not passed from netdata"); - host_prefix = ""; + global_host_prefix = ""; } - // else info("Found NETDATA_HOST_PREFIX='%s'", host_prefix); + // else info("Found NETDATA_HOST_PREFIX='%s'", global_host_prefix); config_dir = getenv("NETDATA_CONFIG_DIR"); if(config_dir == NULL) { @@ -2830,9 +2788,9 @@ int main(int argc, char **argv) time_t started_t = time(NULL); time_t current_t; - get_HZ(); - pid_max = get_system_pid_max(); - processors = get_system_cpus(); + get_system_HZ(); + get_system_pid_max(); + get_system_cpus(); parse_args(argc, argv); diff --git a/src/common.c b/src/common.c index 7d0fac9a..e1925ff5 100644 --- a/src/common.c +++ b/src/common.c @@ -14,31 +14,181 @@ volatile sig_atomic_t netdata_exit = 0; // its lifetime), these can be used to override the default system allocation // routines. +#ifdef NETDATA_LOG_ALLOCATIONS +static struct memory_statistics { + volatile size_t malloc_calls_made; + volatile size_t calloc_calls_made; + volatile size_t realloc_calls_made; + volatile size_t strdup_calls_made; + volatile size_t free_calls_made; + volatile size_t memory_calls_made; + volatile size_t allocated_memory; + volatile size_t mmapped_memory; +} memory_statistics; + +static inline void print_allocations(const char *file, const char *function, const unsigned long line) { + static struct memory_statistics old = { 0, 0, 0, 0, 0, 0, 0, 0 }; + + //if(unlikely(!(memory_statistics.memory_calls_made % 5))) { + fprintf(stderr, "(%04lu@%-10.10s:%-15.15s): Allocated %zu KB (+%zu B), mmapped %zu KB (+%zu B): malloc %zu (+%zu), calloc %zu (+%zu), realloc %zu (+%zu), strdup %zu (+%zu), free %zu (+%zu)\n", + line, file, function, + (memory_statistics.allocated_memory + 512) / 1024, memory_statistics.allocated_memory - old.allocated_memory, + (memory_statistics.mmapped_memory + 512) / 1024, memory_statistics.mmapped_memory - old.mmapped_memory, + memory_statistics.malloc_calls_made, memory_statistics.malloc_calls_made - old.malloc_calls_made, + memory_statistics.calloc_calls_made, memory_statistics.calloc_calls_made - old.calloc_calls_made, + memory_statistics.realloc_calls_made, memory_statistics.realloc_calls_made - old.realloc_calls_made, + memory_statistics.strdup_calls_made, memory_statistics.strdup_calls_made - old.strdup_calls_made, + memory_statistics.free_calls_made, memory_statistics.free_calls_made - old.free_calls_made + ); + + memcpy(&old, &memory_statistics, sizeof(struct memory_statistics)); + //} +} + +static inline void malloc_accounting(const char *file, const char *function, const unsigned long line, size_t size) { +#if defined(HAVE_C___ATOMIC) && !defined(NETDATA_NO_ATOMIC_INSTRUCTIONS) + __atomic_fetch_add(&memory_statistics.memory_calls_made, 1, __ATOMIC_SEQ_CST); + __atomic_fetch_add(&memory_statistics.malloc_calls_made, 1, __ATOMIC_SEQ_CST); + __atomic_fetch_add(&memory_statistics.allocated_memory, size, __ATOMIC_SEQ_CST); +#else + // this is for debugging - we don't care locking it + memory_statistics.memory_calls_made++; + memory_statistics.malloc_calls_made++; + memory_statistics.allocated_memory += size; +#endif + print_allocations(file, function, line); +} + +static inline void mmap_accounting(size_t size) { +#if defined(HAVE_C___ATOMIC) && !defined(NETDATA_NO_ATOMIC_INSTRUCTIONS) + __atomic_fetch_add(&memory_statistics.malloc_calls_made, 1, __ATOMIC_SEQ_CST); + __atomic_fetch_add(&memory_statistics.mmapped_memory, size, __ATOMIC_SEQ_CST); +#else + // this is for debugging - we don't care locking it + memory_statistics.memory_calls_made++; + memory_statistics.mmapped_memory += size; +#endif +} + +static inline void calloc_accounting(const char *file, const char *function, const unsigned long line, size_t size) { +#if defined(HAVE_C___ATOMIC) && !defined(NETDATA_NO_ATOMIC_INSTRUCTIONS) + __atomic_fetch_add(&memory_statistics.memory_calls_made, 1, __ATOMIC_SEQ_CST); + __atomic_fetch_add(&memory_statistics.calloc_calls_made, 1, __ATOMIC_SEQ_CST); + __atomic_fetch_add(&memory_statistics.allocated_memory, size, __ATOMIC_SEQ_CST); +#else + // this is for debugging - we don't care locking it + memory_statistics.memory_calls_made++; + memory_statistics.calloc_calls_made++; + memory_statistics.allocated_memory += size; +#endif + print_allocations(file, function, line); +} + +static inline void realloc_accounting(const char *file, const char *function, const unsigned long line, void *ptr, size_t size) { + (void)ptr; + +#if defined(HAVE_C___ATOMIC) && !defined(NETDATA_NO_ATOMIC_INSTRUCTIONS) + __atomic_fetch_add(&memory_statistics.memory_calls_made, 1, __ATOMIC_SEQ_CST); + __atomic_fetch_add(&memory_statistics.realloc_calls_made, 1, __ATOMIC_SEQ_CST); + __atomic_fetch_add(&memory_statistics.allocated_memory, size, __ATOMIC_SEQ_CST); +#else + // this is for debugging - we don't care locking it + memory_statistics.memory_calls_made++; + memory_statistics.realloc_calls_made++; + memory_statistics.allocated_memory += size; +#endif + print_allocations(file, function, line); +} + +static inline void strdup_accounting(const char *file, const char *function, const unsigned long line, const char *s) { + size_t size = strlen(s) + 1; + +#if defined(HAVE_C___ATOMIC) && !defined(NETDATA_NO_ATOMIC_INSTRUCTIONS) + __atomic_fetch_add(&memory_statistics.memory_calls_made, 1, __ATOMIC_SEQ_CST); + __atomic_fetch_add(&memory_statistics.strdup_calls_made, 1, __ATOMIC_SEQ_CST); + __atomic_fetch_add(&memory_statistics.allocated_memory, size, __ATOMIC_SEQ_CST); +#else + // this is for debugging - we don't care locking it + memory_statistics.memory_calls_made++; + memory_statistics.strdup_calls_made++; + memory_statistics.allocated_memory += size; +#endif + print_allocations(file, function, line); +} + +static inline void free_accounting(const char *file, const char *function, const unsigned long line, void *ptr) { + (void)file; + (void)function; + (void)line; + + if(likely(ptr)) { +#if defined(HAVE_C___ATOMIC) && !defined(NETDATA_NO_ATOMIC_INSTRUCTIONS) + __atomic_fetch_add(&memory_statistics.memory_calls_made, 1, __ATOMIC_SEQ_CST); + __atomic_fetch_add(&memory_statistics.free_calls_made, 1, __ATOMIC_SEQ_CST); +#else + // this is for debugging - we don't care locking it + memory_statistics.memory_calls_made++; + memory_statistics.free_calls_made++; +#endif + } +} +#endif + +#ifdef NETDATA_LOG_ALLOCATIONS +char *strdupz_int(const char *file, const char *function, const unsigned long line, const char *s) { + strdup_accounting(file, function, line, s); +#else char *strdupz(const char *s) { +#endif + char *t = strdup(s); if (unlikely(!t)) fatal("Cannot strdup() string '%s'", s); return t; } +#ifdef NETDATA_LOG_ALLOCATIONS +void *mallocz_int(const char *file, const char *function, const unsigned long line, size_t size) { + malloc_accounting(file, function, line, size); +#else void *mallocz(size_t size) { +#endif + void *p = malloc(size); if (unlikely(!p)) fatal("Cannot allocate %zu bytes of memory.", size); return p; } +#ifdef NETDATA_LOG_ALLOCATIONS +void *callocz_int(const char *file, const char *function, const unsigned long line, size_t nmemb, size_t size) { + calloc_accounting(file, function, line, nmemb * size); +#else void *callocz(size_t nmemb, size_t size) { +#endif + void *p = calloc(nmemb, size); if (unlikely(!p)) fatal("Cannot allocate %zu bytes of memory.", nmemb * size); return p; } +#ifdef NETDATA_LOG_ALLOCATIONS +void *reallocz_int(const char *file, const char *function, const unsigned long line, void *ptr, size_t size) { + realloc_accounting(file, function, line, ptr, size); +#else void *reallocz(void *ptr, size_t size) { +#endif + void *p = realloc(ptr, size); if (unlikely(!p)) fatal("Cannot re-allocate memory to %zu bytes.", size); return p; } +#ifdef NETDATA_LOG_ALLOCATIONS +void freez_int(const char *file, const char *function, const unsigned long line, void *ptr) { + free_accounting(file, function, line, ptr); +#else void freez(void *ptr) { +#endif + free(ptr); } @@ -770,7 +920,14 @@ void *mymmap(const char *filename, size_t size, int flags, int ksm) { if (flags & MAP_SHARED || !enable_ksm || !ksm) { #endif mem = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, fd, 0); - if (mem != MAP_FAILED) { + if (mem == MAP_FAILED) { + error("Cannot allocate SHARED memory for file '%s'.", filename); + mem = NULL; + } + else { +#ifdef NETDATA_LOG_ALLOCATIONS + mmap_accounting(size); +#endif int advise = MADV_SEQUENTIAL | MADV_DONTFORK; if (flags & MAP_SHARED) advise |= MADV_WILLNEED; @@ -780,7 +937,8 @@ void *mymmap(const char *filename, size_t size, int flags, int ksm) { } } #ifdef MADV_MERGEABLE - } else { + } + else { /* // test - load the file into memory mem = calloc(1, size); @@ -794,7 +952,14 @@ void *mymmap(const char *filename, size_t size, int flags, int ksm) { } */ mem = mmap(NULL, size, PROT_READ | PROT_WRITE, flags | MAP_ANONYMOUS, -1, 0); - if (mem != MAP_FAILED) { + if (mem == MAP_FAILED) { + error("Cannot allocate PRIVATE ANONYMOUS memory for KSM for file '%s'.", filename); + mem = NULL; + } + else { +#ifdef NETDATA_LOG_ALLOCATIONS + mmap_accounting(size); +#endif if (lseek(fd, 0, SEEK_SET) == 0) { if (read(fd, mem, size) != (ssize_t) size) error("Cannot read from file '%s'", filename); @@ -813,17 +978,19 @@ void *mymmap(const char *filename, size_t size, int flags, int ksm) { filename); log_madvise_3--; } - } else - error("Cannot allocate PRIVATE ANONYMOUS memory for KSM for file '%s'.", filename); + } } #endif - } else + } + else error("Cannot write to file '%s' at position %zu.", filename, size); - } else + } + else error("Cannot seek file '%s' to size %zu.", filename, size); close(fd); - } else + } + else error("Cannot create/open file '%s'.", filename); return mem; @@ -860,23 +1027,6 @@ int fd_is_valid(int fd) { return fcntl(fd, F_GETFD) != -1 || errno != EBADF; } -/* - *************************************************************************** - * Get number of clock ticks per second. - *************************************************************************** - */ -unsigned int hz; - -void get_HZ(void) { - long ticks; - - if ((ticks = sysconf(_SC_CLK_TCK)) == -1) { - perror("sysconf"); - } - - hz = (unsigned int) ticks; -} - pid_t gettid(void) { return (pid_t)syscall(SYS_gettid); } @@ -933,3 +1083,103 @@ int snprintfz(char *dst, size_t n, const char *fmt, ...) { return ret; } + +// ---------------------------------------------------------------------------- +// system functions +// to retrieve settings of the system + +int processors = 1; +long get_system_cpus(void) { + procfile *ff = NULL; + + processors = 1; + + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s/proc/stat", global_host_prefix); + + ff = procfile_open(filename, NULL, PROCFILE_FLAG_DEFAULT); + if(!ff) { + error("Cannot open file '%s'. Assuming system has %d processors.", filename, processors); + return processors; + } + + ff = procfile_readall(ff); + if(!ff) { + error("Cannot open file '%s'. Assuming system has %d processors.", filename, processors); + return processors; + } + + processors = 0; + unsigned int i; + for(i = 0; i < procfile_lines(ff); i++) { + if(!procfile_linewords(ff, i)) continue; + + if(strncmp(procfile_lineword(ff, i, 0), "cpu", 3) == 0) processors++; + } + processors--; + if(processors < 1) processors = 1; + + procfile_close(ff); + + info("System has %d processors.", processors); + return processors; +} + +pid_t pid_max = 32768; +pid_t get_system_pid_max(void) { + procfile *ff = NULL; + + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s/proc/sys/kernel/pid_max", global_host_prefix); + ff = procfile_open(filename, NULL, PROCFILE_FLAG_DEFAULT); + if(!ff) { + error("Cannot open file '%s'. Assuming system supports %d pids.", filename, pid_max); + return pid_max; + } + + ff = procfile_readall(ff); + if(!ff) { + error("Cannot read file '%s'. Assuming system supports %d pids.", filename, pid_max); + return pid_max; + } + + pid_max = (pid_t)atoi(procfile_lineword(ff, 0, 0)); + if(!pid_max) { + procfile_close(ff); + pid_max = 32768; + error("Cannot parse file '%s'. Assuming system supports %d pids.", filename, pid_max); + return pid_max; + } + + procfile_close(ff); + info("System supports %d pids.", pid_max); + return pid_max; +} + +unsigned int hz; +void get_system_HZ(void) { + long ticks; + + if ((ticks = sysconf(_SC_CLK_TCK)) == -1) { + perror("sysconf"); + } + + hz = (unsigned int) ticks; +} + +int read_single_number_file(const char *filename, unsigned long long *result) { + char buffer[1024 + 1]; + + int fd = open(filename, O_RDONLY, 0666); + if(unlikely(fd == -1)) return 1; + + ssize_t r = read(fd, buffer, 1024); + if(unlikely(r == -1)) { + close(fd); + return 2; + } + + close(fd); + *result = strtoull(buffer, NULL, 0); + return 0; +} diff --git a/src/common.h b/src/common.h index a6e85034..9ffa8c8b 100644 --- a/src/common.h +++ b/src/common.h @@ -5,6 +5,25 @@ #include <config.h> #endif +/* select the memory allocator, based on autoconf findings */ +#if defined(ENABLE_JEMALLOC) + +#if defined(HAVE_JEMALLOC_JEMALLOC_H) +#include <jemalloc/jemalloc.h> +#else +#include <malloc.h> +#endif + +#elif defined(ENABLE_TCMALLOC) + +#include <google/tcmalloc.h> + +#else /* !defined(ENABLE_JEMALLOC) && !defined(ENABLE_TCMALLOC) */ + +#include <malloc.h> + +#endif + #include <pthread.h> #include <errno.h> @@ -27,7 +46,7 @@ #include <grp.h> #include <pwd.h> #include <locale.h> -#include <malloc.h> + #include <netdb.h> #include <poll.h> #include <signal.h> @@ -60,24 +79,16 @@ #include <zlib.h> #endif -#ifndef __ATOMIC_SEQ_CST -#define NETDATA_NO_ATOMIC_INSTRUCTIONS 1 -#endif - -#ifdef __GNUC__ -#define GCC_VERSION (__GNUC__ * 10000 \ - + __GNUC_MINOR__ * 100 \ - + __GNUC_PATCHLEVEL__) - -#if __x86_64__ || __ppc64__ +#if (SIZEOF_VOID_P == 8) #define ENVIRONMENT64 -#else +#elif (SIZEOF_VOID_P == 4) #define ENVIRONMENT32 +#else +#error "Cannot detect if this is a 32 or 64 bit CPU" #endif -#else // !__GNUC__ -#define NETDATA_NO_ATOMIC_INSTRUCTIONS 1 -#define ENVIRONMENT32 +#ifdef __GNUC__ +#define GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) #endif // __GNUC__ #include "avl.h" @@ -139,11 +150,25 @@ extern int vsnprintfz(char *dst, size_t n, const char *fmt, va_list args); extern int snprintfz(char *dst, size_t n, const char *fmt, ...) __attribute__ (( format (printf, 3, 4))); // memory allocation functions that handle failures +#ifdef NETDATA_LOG_ALLOCATIONS +#define strdupz(s) strdupz_int(__FILE__, __FUNCTION__, __LINE__, s) +#define callocz(nmemb, size) callocz_int(__FILE__, __FUNCTION__, __LINE__, nmemb, size) +#define mallocz(size) mallocz_int(__FILE__, __FUNCTION__, __LINE__, size) +#define reallocz(ptr, size) reallocz_int(__FILE__, __FUNCTION__, __LINE__, ptr, size) +#define freez(ptr) freez_int(__FILE__, __FUNCTION__, __LINE__, ptr) + +extern char *strdupz_int(const char *file, const char *function, const unsigned long line, const char *s); +extern void *callocz_int(const char *file, const char *function, const unsigned long line, size_t nmemb, size_t size); +extern void *mallocz_int(const char *file, const char *function, const unsigned long line, size_t size); +extern void *reallocz_int(const char *file, const char *function, const unsigned long line, void *ptr, size_t size); +extern void freez_int(const char *file, const char *function, const unsigned long line, void *ptr); +#else extern char *strdupz(const char *s); extern void *callocz(size_t nmemb, size_t size); extern void *mallocz(size_t size); -extern void freez(void *ptr); extern void *reallocz(void *ptr, size_t size); +extern void freez(void *ptr); +#endif extern void *mymmap(const char *filename, size_t size, int flags, int ksm); extern int savememory(const char *filename, void *mem, size_t size); @@ -153,10 +178,6 @@ extern int fd_is_valid(int fd); extern char *global_host_prefix; extern int enable_ksm; -/* Number of ticks per second */ -extern unsigned int hz; -extern void get_HZ(void); - extern pid_t gettid(void); extern unsigned long long time_usec(void); @@ -164,6 +185,17 @@ extern int sleep_usec(unsigned long long usec); extern char *fgets_trim_len(char *buf, size_t buf_size, FILE *fp, size_t *len); +extern int processors; +extern long get_system_cpus(void); + +extern pid_t pid_max; +extern pid_t get_system_pid_max(void); + +/* Number of ticks per second */ +extern unsigned int hz; +extern void get_system_HZ(void); + + /* fix for alpine linux */ #ifndef RUSAGE_THREAD #ifdef RUSAGE_CHILDREN @@ -171,4 +203,6 @@ extern char *fgets_trim_len(char *buf, size_t buf_size, FILE *fp, size_t *len); #endif #endif +extern int read_single_number_file(const char *filename, unsigned long long *result); + #endif /* NETDATA_COMMON_H */ diff --git a/src/daemon.c b/src/daemon.c index bc4614f0..1c34405d 100644 --- a/src/daemon.c +++ b/src/daemon.c @@ -15,27 +15,30 @@ void sig_handler_exit(int signo) void sig_handler_logrotate(int signo) { if(signo) { - error_log_limit_reset(); + error_log_limit_unlimited(); info("Received signal %d to re-open the log files", signo); reopen_all_log_files(); + error_log_limit_reset(); } } void sig_handler_save(int signo) { if(signo) { - error_log_limit_reset(); + error_log_limit_unlimited(); info("Received signal %d to save the database...", signo); rrdset_save_all(); + error_log_limit_reset(); } } void sig_handler_reload_health(int signo) { if(signo) { - error_log_limit_reset(); + error_log_limit_unlimited(); info("Received signal %d to reload health configuration...", signo); health_reload(); + error_log_limit_reset(); } } @@ -74,7 +74,7 @@ static inline calculated_number eval_check_number(calculated_number n, int *erro } static inline calculated_number eval_variable(EVAL_EXPRESSION *exp, EVAL_VARIABLE *v, int *error) { - static uint32_t this_hash = 0, now_hash = 0, after_hash = 0, before_hash = 0; + static uint32_t this_hash = 0, now_hash = 0, after_hash = 0, before_hash = 0, status_hash = 0, removed_hash = 0, uninitialized_hash = 0, undefined_hash = 0, clear_hash = 0, warning_hash = 0, critical_hash = 0; calculated_number n; if(unlikely(this_hash == 0)) { @@ -82,9 +82,16 @@ static inline calculated_number eval_variable(EVAL_EXPRESSION *exp, EVAL_VARIABL now_hash = simple_hash("now"); after_hash = simple_hash("after"); before_hash = simple_hash("before"); + status_hash = simple_hash("status"); + removed_hash = simple_hash("REMOVED"); + uninitialized_hash = simple_hash("UNINITIALIZED"); + undefined_hash = simple_hash("UNDEFINED"); + clear_hash = simple_hash("CLEAR"); + warning_hash = simple_hash("WARNING"); + critical_hash = simple_hash("CRITICAL"); } - if(v->hash == this_hash && !strcmp(v->name, "this")) { + if(unlikely(v->hash == this_hash && !strcmp(v->name, "this"))) { n = (exp->this)?*exp->this:NAN; buffer_strcat(exp->error_msg, "[ $this = "); print_parsed_as_constant(exp->error_msg, n); @@ -92,7 +99,7 @@ static inline calculated_number eval_variable(EVAL_EXPRESSION *exp, EVAL_VARIABL return n; } - if(v->hash == after_hash && !strcmp(v->name, "after")) { + if(unlikely(v->hash == after_hash && !strcmp(v->name, "after"))) { n = (exp->after && *exp->after)?*exp->after:NAN; buffer_strcat(exp->error_msg, "[ $after = "); print_parsed_as_constant(exp->error_msg, n); @@ -100,7 +107,7 @@ static inline calculated_number eval_variable(EVAL_EXPRESSION *exp, EVAL_VARIABL return n; } - if(v->hash == before_hash && !strcmp(v->name, "before")) { + if(unlikely(v->hash == before_hash && !strcmp(v->name, "before"))) { n = (exp->before && *exp->before)?*exp->before:NAN; buffer_strcat(exp->error_msg, "[ $before = "); print_parsed_as_constant(exp->error_msg, n); @@ -108,7 +115,7 @@ static inline calculated_number eval_variable(EVAL_EXPRESSION *exp, EVAL_VARIABL return n; } - if(v->hash == now_hash && !strcmp(v->name, "now")) { + if(unlikely(v->hash == now_hash && !strcmp(v->name, "now"))) { n = time(NULL); buffer_strcat(exp->error_msg, "[ $now = "); print_parsed_as_constant(exp->error_msg, n); @@ -116,6 +123,62 @@ static inline calculated_number eval_variable(EVAL_EXPRESSION *exp, EVAL_VARIABL return n; } + if(unlikely(v->hash == status_hash && !strcmp(v->name, "status"))) { + n = (exp->status)?*exp->status:RRDCALC_STATUS_UNINITIALIZED; + buffer_strcat(exp->error_msg, "[ $status = "); + print_parsed_as_constant(exp->error_msg, n); + buffer_strcat(exp->error_msg, " ] "); + return n; + } + + if(unlikely(v->hash == removed_hash && !strcmp(v->name, "REMOVED"))) { + n = RRDCALC_STATUS_REMOVED; + buffer_strcat(exp->error_msg, "[ $REMOVED = "); + print_parsed_as_constant(exp->error_msg, n); + buffer_strcat(exp->error_msg, " ] "); + return n; + } + + if(unlikely(v->hash == uninitialized_hash && !strcmp(v->name, "UNINITIALIZED"))) { + n = RRDCALC_STATUS_UNINITIALIZED; + buffer_strcat(exp->error_msg, "[ $UNINITIALIZED = "); + print_parsed_as_constant(exp->error_msg, n); + buffer_strcat(exp->error_msg, " ] "); + return n; + } + + if(unlikely(v->hash == undefined_hash && !strcmp(v->name, "UNDEFINED"))) { + n = RRDCALC_STATUS_UNDEFINED; + buffer_strcat(exp->error_msg, "[ $UNDEFINED = "); + print_parsed_as_constant(exp->error_msg, n); + buffer_strcat(exp->error_msg, " ] "); + return n; + } + + if(unlikely(v->hash == clear_hash && !strcmp(v->name, "CLEAR"))) { + n = RRDCALC_STATUS_CLEAR; + buffer_strcat(exp->error_msg, "[ $CLEAR = "); + print_parsed_as_constant(exp->error_msg, n); + buffer_strcat(exp->error_msg, " ] "); + return n; + } + + if(unlikely(v->hash == warning_hash && !strcmp(v->name, "WARNING"))) { + n = RRDCALC_STATUS_WARNING; + buffer_strcat(exp->error_msg, "[ $WARNING = "); + print_parsed_as_constant(exp->error_msg, n); + buffer_strcat(exp->error_msg, " ] "); + return n; + } + + if(unlikely(v->hash == critical_hash && !strcmp(v->name, "CRITICAL"))) { + n = RRDCALC_STATUS_CRITICAL; + buffer_strcat(exp->error_msg, "[ $CRITICAL = "); + print_parsed_as_constant(exp->error_msg, n); + buffer_strcat(exp->error_msg, " ] "); + return n; + } + if(exp->rrdcalc && health_variable_lookup(v->name, v->hash, exp->rrdcalc, &n)) { buffer_sprintf(exp->error_msg, "[ $%s = ", v->name); print_parsed_as_constant(exp->error_msg, n); @@ -265,7 +328,6 @@ static struct operator { // this is a random access array // we always access it with a known EVAL_OPERATOR_X - [EVAL_OPERATOR_IF_THEN_ELSE] = { "?", 1, 3, 0, eval_if_then_else }, [EVAL_OPERATOR_AND] = { "&&", 2, 2, 0, eval_and }, [EVAL_OPERATOR_OR] = { "||", 2, 2, 0, eval_or }, [EVAL_OPERATOR_GREATER_THAN_OR_EQUAL] = { ">=", 3, 2, 0, eval_greater_than_or_equal }, @@ -282,8 +344,9 @@ static struct operator { [EVAL_OPERATOR_SIGN_PLUS] = { "+", 6, 1, 0, eval_sign_plus }, [EVAL_OPERATOR_SIGN_MINUS] = { "-", 6, 1, 0, eval_sign_minus }, [EVAL_OPERATOR_ABS] = { "abs(",6,1, 1, eval_abs }, - [EVAL_OPERATOR_NOP] = { NULL, 7, 1, 0, eval_nop }, - [EVAL_OPERATOR_EXPRESSION_OPEN] = { NULL, 7, 1, 0, eval_nop }, + [EVAL_OPERATOR_IF_THEN_ELSE] = { "?", 7, 3, 0, eval_if_then_else }, + [EVAL_OPERATOR_NOP] = { NULL, 8, 1, 0, eval_nop }, + [EVAL_OPERATOR_EXPRESSION_OPEN] = { NULL, 8, 1, 0, eval_nop }, // this should exist in our evaluation list [EVAL_OPERATOR_EXPRESSION_CLOSE] = { NULL, 99, 1, 0, eval_nop } @@ -14,6 +14,7 @@ typedef struct eval_expression { const char *source; const char *parsed_as; + int *status; calculated_number *this; time_t *after; time_t *before; diff --git a/src/global_statistics.c b/src/global_statistics.c index f39a4cf2..bb2b1f08 100644 --- a/src/global_statistics.c +++ b/src/global_statistics.c @@ -25,7 +25,7 @@ void finished_web_request_statistics(uint64_t dt, uint64_t bytes_sent, uint64_t content_size, uint64_t compressed_content_size) { -#ifndef NETDATA_NO_ATOMIC_INSTRUCTIONS +#if defined(HAVE_C___ATOMIC) && !defined(NETDATA_NO_ATOMIC_INSTRUCTIONS) uint64_t old_web_usec_max = global_statistics.web_usec_max; while(dt > old_web_usec_max) __atomic_compare_exchange(&global_statistics.web_usec_max, &old_web_usec_max, &dt, 1, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); @@ -57,7 +57,7 @@ void finished_web_request_statistics(uint64_t dt, } void web_client_connected(void) { -#ifndef NETDATA_NO_ATOMIC_INSTRUCTIONS +#if defined(HAVE_C___ATOMIC) && !defined(NETDATA_NO_ATOMIC_INSTRUCTIONS) __atomic_fetch_add(&global_statistics.connected_clients, 1, __ATOMIC_SEQ_CST); #else if (web_server_mode == WEB_SERVER_MODE_MULTI_THREADED) @@ -71,7 +71,7 @@ void web_client_connected(void) { } void web_client_disconnected(void) { -#ifndef NETDATA_NO_ATOMIC_INSTRUCTIONS +#if defined(HAVE_C___ATOMIC) && !defined(NETDATA_NO_ATOMIC_INSTRUCTIONS) __atomic_fetch_sub(&global_statistics.connected_clients, 1, __ATOMIC_SEQ_CST); #else if (web_server_mode == WEB_SERVER_MODE_MULTI_THREADED) @@ -86,7 +86,7 @@ void web_client_disconnected(void) { inline void global_statistics_copy(struct global_statistics *gs, uint8_t options) { -#ifndef NETDATA_NO_ATOMIC_INSTRUCTIONS +#if defined(HAVE_C___ATOMIC) && !defined(NETDATA_NO_ATOMIC_INSTRUCTIONS) gs->connected_clients = __atomic_fetch_add(&global_statistics.connected_clients, 0, __ATOMIC_SEQ_CST); gs->web_requests = __atomic_fetch_add(&global_statistics.web_requests, 0, __ATOMIC_SEQ_CST); gs->web_usec = __atomic_fetch_add(&global_statistics.web_usec, 0, __ATOMIC_SEQ_CST); @@ -259,4 +259,4 @@ void global_statistics_charts(void) { rrddim_set(stcompression, "savings", compression_ratio); rrdset_done(stcompression); -}
\ No newline at end of file +} diff --git a/src/health.c b/src/health.c index 3156cd08..596b143a 100644 --- a/src/health.c +++ b/src/health.c @@ -2,10 +2,179 @@ #define RRDVAR_MAX_LENGTH 1024 -static const char *health_default_exec = PLUGINS_DIR "/alarm-email.sh"; +struct health_options { + const char *health_default_exec; + const char *health_default_recipient; + const char *log_filename; + FILE *log_fp; +}; + +static struct health_options health = { + .health_default_exec = PLUGINS_DIR "/alarm-notify.sh", + .health_default_recipient = "root", + .log_filename = VARLIB_DIR "/health/alarm_log.db", + .log_fp = NULL +}; + int health_enabled = 1; // ---------------------------------------------------------------------------- +// health alarm log load/save +// no need for locking - only one thread is reading / writing the alarms log + +static inline int health_alarm_log_open(void) { + if(health.log_fp) + fclose(health.log_fp); + + health.log_fp = fopen(health.log_filename, "a"); + + if(health.log_fp) { + if (setvbuf(health.log_fp, NULL, _IOLBF, 0) != 0) + error("Cannot set line buffering on health log file."); + return 0; + } + + error("Cannot open health log file '%s'. Health data will be lost in case of netdata or server crash.", health.log_filename); + return -1; +} + +static inline void health_alarm_log_close(void) { + if(health.log_fp) { + fclose(health.log_fp); + health.log_fp = NULL; + } +} + +static inline void health_log_recreate(void) { + if(health.log_fp != NULL) { + health_alarm_log_close(); + + // open it with truncate + health.log_fp = fopen(health.log_filename, "w"); + if(health.log_fp) fclose(health.log_fp); + else error("Cannot truncate health log '%s'", health.log_filename); + + health.log_fp = NULL; + + health_alarm_log_open(); + } +} + +static inline void health_alarm_log_save(RRDHOST *host, ALARM_ENTRY *ae) { + (void)host; + (void)ae; + +/* if(likely(health.log_fp)) { + if(unlikely(fprintf(health.log_fp, "A\t%s\t%08x\t%08x\t%08x\t%08x\t%08x\t%08x\t%s\t%s\t%s\t%s\t%s\t%08x\n", + host->hostname, + ae->unique_id, + ae->alarm_id, + ae->alarm_event_id, + (uint32_t)ae->when, + (uint32_t)ae->duration, + (uint32_t)ae->non_clear_duration, + (uint32_t)ae->exec_run_timestamp, + ae->name, + ae->chart, + ae->family, + ae->exec, + ae->recipient + ) < 0)) + error("Health: failed to save alarm log entry. Health data may be lost in case of abnormal restart."); + } +*/ +} + +static inline void health_alarm_log_load(RRDHOST *host) { + (void)host; + +} + +// ---------------------------------------------------------------------------- +// health alarm log management + +static inline void health_alarm_log(RRDHOST *host, + uint32_t alarm_id, uint32_t alarm_event_id, + time_t when, + const char *name, const char *chart, const char *family, + const char *exec, const char *recipient, time_t duration, + calculated_number old_value, calculated_number new_value, + int old_status, int new_status, + const char *source, + const char *units, + const char *info, + int delay +) { + debug(D_HEALTH, "Health adding alarm log entry with id: %u", host->health_log.next_log_id); + + ALARM_ENTRY *ae = callocz(1, sizeof(ALARM_ENTRY)); + ae->name = strdupz(name); + ae->hash_name = simple_hash(ae->name); + + if(chart) { + ae->chart = strdupz(chart); + ae->hash_chart = simple_hash(ae->chart); + } + + if(family) + ae->family = strdupz(family); + + if(exec) ae->exec = strdupz(exec); + if(recipient) ae->recipient = strdupz(recipient); + if(source) ae->source = strdupz(source); + if(units) ae->units = strdupz(units); + if(info) ae->info = strdupz(info); + + ae->unique_id = host->health_log.next_log_id++; + ae->alarm_id = alarm_id; + ae->alarm_event_id = alarm_event_id; + ae->when = when; + ae->old_value = old_value; + ae->new_value = new_value; + ae->old_status = old_status; + ae->new_status = new_status; + ae->duration = duration; + ae->delay = delay; + ae->delay_up_to_timestamp = when + delay; + + if(ae->old_status == RRDCALC_STATUS_WARNING || ae->old_status == RRDCALC_STATUS_CRITICAL) + ae->non_clear_duration += ae->duration; + + // link it + pthread_rwlock_wrlock(&host->health_log.alarm_log_rwlock); + ae->next = host->health_log.alarms; + host->health_log.alarms = ae; + host->health_log.count++; + pthread_rwlock_unlock(&host->health_log.alarm_log_rwlock); + + // match previous alarms + pthread_rwlock_rdlock(&host->health_log.alarm_log_rwlock); + ALARM_ENTRY *t; + for(t = host->health_log.alarms ; t ; t = t->next) { + if(t != ae && t->alarm_id == ae->alarm_id) { + if(!(t->notifications & HEALTH_ENTRY_NOTIFICATIONS_UPDATED) && !t->updated_by_id) { + t->notifications |= HEALTH_ENTRY_NOTIFICATIONS_UPDATED; + t->updated_by_id = ae->unique_id; + ae->updates_id = t->unique_id; + + if((t->new_status == RRDCALC_STATUS_WARNING || t->new_status == RRDCALC_STATUS_CRITICAL) && + (t->old_status == RRDCALC_STATUS_WARNING || t->old_status == RRDCALC_STATUS_CRITICAL)) + ae->non_clear_duration += t->non_clear_duration; + + health_alarm_log_save(host, t); + } + else { + // no need to continue + break; + } + } + } + pthread_rwlock_unlock(&host->health_log.alarm_log_rwlock); + + health_alarm_log_save(host, ae); +} + +// ---------------------------------------------------------------------------- // RRDVAR management static inline int rrdvar_fix_name(char *variable) { @@ -410,12 +579,15 @@ void rrddimvar_free(RRDDIMVAR *rs) { static inline const char *rrdcalc_status2string(int status) { switch(status) { - case RRDCALC_STATUS_UNINITIALIZED: - return "UNINITIALIZED"; + case RRDCALC_STATUS_REMOVED: + return "REMOVED"; case RRDCALC_STATUS_UNDEFINED: return "UNDEFINED"; + case RRDCALC_STATUS_UNINITIALIZED: + return "UNINITIALIZED"; + case RRDCALC_STATUS_CLEAR: return "CLEAR"; @@ -429,6 +601,7 @@ static inline const char *rrdcalc_status2string(int status) { return "CRITICAL"; default: + error("Unknown alarm status %d", status); return "UNKNOWN"; } } @@ -441,6 +614,10 @@ static void rrdsetcalc_link(RRDSET *st, RRDCALC *rc) { rc->rrdset_next = st->alarms; rc->rrdset_prev = NULL; + + if(rc->rrdset_next) + rc->rrdset_next->rrdset_prev = rc; + st->alarms = rc; if(rc->update_every < rc->rrdset->update_every) { @@ -469,6 +646,11 @@ static void rrdsetcalc_link(RRDSET *st, RRDCALC *rc) { rc->hostname = rrdvar_create_and_index("host", &st->rrdhost->variables_root_index, fullname, RRDVAR_TYPE_CALCULATED, &rc->value); if(!rc->units) rc->units = strdupz(st->units); + + { + time_t now = time(NULL); + health_alarm_log(st->rrdhost, rc->id, rc->next_event_id++, now, rc->name, rc->rrdset->id, rc->rrdset->family, rc->exec, rc->recipient, now - rc->last_status_change, rc->old_value, rc->value, rc->status, RRDCALC_STATUS_UNINITIALIZED, rc->source, rc->units, rc->info, 0); + } } static inline int rrdcalc_is_matching_this_rrdset(RRDCALC *rc, RRDSET *st) { @@ -485,9 +667,10 @@ inline void rrdsetcalc_link_matching(RRDSET *st) { RRDCALC *rc; for(rc = st->rrdhost->alarms; rc ; rc = rc->next) { - if(rc->rrdset) continue; + if(unlikely(rc->rrdset)) + continue; - if(rrdcalc_is_matching_this_rrdset(rc, st)) + if(unlikely(rrdcalc_is_matching_this_rrdset(rc, st))) rrdsetcalc_link(st, rc); } } @@ -497,10 +680,16 @@ inline void rrdsetcalc_unlink(RRDCALC *rc) { RRDSET *st = rc->rrdset; if(!st) { + debug(D_HEALTH, "Requested to unlink RRDCALC '%s.%s' which is not linked to any RRDSET", rc->chart?rc->chart:"NOCHART", rc->name); error("Requested to unlink RRDCALC '%s.%s' which is not linked to any RRDSET", rc->chart?rc->chart:"NOCHART", rc->name); return; } + { + time_t now = time(NULL); + health_alarm_log(st->rrdhost, rc->id, rc->next_event_id++, now, rc->name, rc->rrdset->id, rc->rrdset->family, rc->exec, rc->recipient, now - rc->last_status_change, rc->old_value, rc->value, rc->status, RRDCALC_STATUS_REMOVED, rc->source, rc->units, rc->info, 0); + } + RRDHOST *host = st->rrdhost; debug(D_HEALTH, "Health unlinking alarm '%s.%s' from chart '%s' of host '%s'", rc->chart?rc->chart:"NOCHART", rc->name, st->id, host->hostname); @@ -541,20 +730,29 @@ RRDCALC *rrdcalc_find(RRDSET *st, const char *name) { uint32_t hash = simple_hash(name); for( rc = st->alarms; rc ; rc = rc->rrdset_next ) { - if(rc->hash == hash && !strcmp(rc->name, name)) + if(unlikely(rc->hash == hash && !strcmp(rc->name, name))) return rc; } return NULL; } -static inline int rrdcalc_exists(RRDHOST *host, const char *name, uint32_t hash) { +static inline int rrdcalc_exists(RRDHOST *host, const char *chart, const char *name, uint32_t hash_chart, uint32_t hash_name) { RRDCALC *rc; + if(unlikely(!chart)) { + error("attempt to find RRDCALC '%s' without giving a chart name", name); + return 1; + } + + if(unlikely(!hash_chart)) hash_chart = simple_hash(chart); + if(unlikely(!hash_name)) hash_name = simple_hash(name); + // make sure it does not already exist for(rc = host->alarms; rc ; rc = rc->next) { - if (rc->hash == hash && !strcmp(name, rc->name)) { - error("Health alarm '%s' already exists in host '%s'.", name, host->hostname); + if (unlikely(rc->chart && rc->hash == hash_name && rc->hash_chart == hash_chart && !strcmp(name, rc->name) && !strcmp(chart, rc->chart))) { + debug(D_HEALTH, "Health alarm '%s.%s' already exists in host '%s'.", chart, name, host->hostname); + error("Health alarm '%s.%s' already exists in host '%s'.", chart, name, host->hostname); return 1; } } @@ -562,10 +760,29 @@ static inline int rrdcalc_exists(RRDHOST *host, const char *name, uint32_t hash) return 0; } +static inline uint32_t rrdcalc_get_unique_id(RRDHOST *host, const char *chart, const char *name, uint32_t *next_event_id) { + if(chart && name) { + uint32_t hash_chart = simple_hash(chart); + uint32_t hash_name = simple_hash(name); + + // re-use old IDs, by looking them up in the alarm log + ALARM_ENTRY *ae; + for(ae = host->health_log.alarms; ae ;ae = ae->next) { + if(unlikely(ae->hash_name == hash_name && ae->hash_chart == hash_chart && !strcmp(name, ae->name) && !strcmp(chart, ae->chart))) { + if(next_event_id) *next_event_id = ae->alarm_event_id + 1; + return ae->alarm_id; + } + } + } + + return host->health_log.next_alarm_id++; +} + static inline void rrdcalc_create_part2(RRDHOST *host, RRDCALC *rc) { rrdhost_check_rdlock(host); if(rc->calculation) { + rc->calculation->status = &rc->status; rc->calculation->this = &rc->value; rc->calculation->after = &rc->db_after; rc->calculation->before = &rc->db_before; @@ -573,6 +790,7 @@ static inline void rrdcalc_create_part2(RRDHOST *host, RRDCALC *rc) { } if(rc->warning) { + rc->warning->status = &rc->status; rc->warning->this = &rc->value; rc->warning->after = &rc->db_after; rc->warning->before = &rc->db_before; @@ -580,6 +798,7 @@ static inline void rrdcalc_create_part2(RRDHOST *host, RRDCALC *rc) { } if(rc->critical) { + rc->critical->status = &rc->status; rc->critical->this = &rc->value; rc->critical->after = &rc->db_after; rc->critical->before = &rc->db_before; @@ -587,8 +806,15 @@ static inline void rrdcalc_create_part2(RRDHOST *host, RRDCALC *rc) { } // link it to the host - rc->next = host->alarms; - host->alarms = rc; + if(likely(host->alarms)) { + // append it + RRDCALC *t; + for(t = host->alarms; t && t->next ; t = t->next) ; + t->next = rc; + } + else { + host->alarms = rc; + } // link it to its chart RRDSET *st; @@ -600,71 +826,66 @@ static inline void rrdcalc_create_part2(RRDHOST *host, RRDCALC *rc) { } } -static inline uint32_t rrdcalc_fullname(char *fullname, size_t len, const char *chart, const char *name) { - snprintfz(fullname, len - 1, "%s%s%s", chart?chart:"", chart?".":"", name); - rrdvar_fix_name(fullname); - return simple_hash(fullname); -} - -static inline RRDCALC *rrdcalc_create(RRDHOST *host, const char *name, const char *chart, const char *dimensions, - const char *units, const char *info, - int group_method, int after, int before, int update_every, uint32_t options, - calculated_number green, calculated_number red, - const char *exec, const char *source, - const char *calc, const char *warn, const char *crit) { +static inline RRDCALC *rrdcalc_create(RRDHOST *host, RRDCALCTEMPLATE *rt, const char *chart) { - char fullname[RRDVAR_MAX_LENGTH + 1]; - uint32_t hash = rrdcalc_fullname(fullname, RRDVAR_MAX_LENGTH + 1, chart, name); + debug(D_HEALTH, "Health creating dynamic alarm (from template) '%s.%s'", chart, rt->name); - if(rrdcalc_exists(host, fullname, hash)) + if(rrdcalc_exists(host, chart, rt->name, 0, 0)) return NULL; RRDCALC *rc = callocz(1, sizeof(RRDCALC)); - - rc->name = strdupz(name); + rc->next_event_id = 1; + rc->id = rrdcalc_get_unique_id(host, chart, rt->name, &rc->next_event_id); + rc->name = strdupz(rt->name); rc->hash = simple_hash(rc->name); - rc->chart = strdupz(chart); rc->hash_chart = simple_hash(rc->chart); - if(dimensions) rc->dimensions = strdupz(dimensions); + if(rt->dimensions) rc->dimensions = strdupz(rt->dimensions); - rc->green = green; - rc->red = red; + rc->green = rt->green; + rc->red = rt->red; rc->value = NAN; rc->old_value = NAN; - rc->group = group_method; - rc->after = after; - rc->before = before; - rc->update_every = update_every; - rc->options = options; - - if(exec) rc->exec = strdupz(exec); - if(source) rc->source = strdupz(source); - if(units) rc->units = strdupz(units); - if(info) rc->info = strdupz(info); - - if(calc) { - rc->calculation = expression_parse(calc, NULL, NULL); + rc->delay_up_duration = rt->delay_up_duration; + rc->delay_down_duration = rt->delay_down_duration; + rc->delay_max_duration = rt->delay_max_duration; + rc->delay_multiplier = rt->delay_multiplier; + + rc->group = rt->group; + rc->after = rt->after; + rc->before = rt->before; + rc->update_every = rt->update_every; + rc->options = rt->options; + + if(rt->exec) rc->exec = strdupz(rt->exec); + if(rt->recipient) rc->recipient = strdupz(rt->recipient); + if(rt->source) rc->source = strdupz(rt->source); + if(rt->units) rc->units = strdupz(rt->units); + if(rt->info) rc->info = strdupz(rt->info); + + if(rt->calculation) { + rc->calculation = expression_parse(rt->calculation->source, NULL, NULL); if(!rc->calculation) - error("Health alarm '%s.%s': failed to parse calculation expression '%s'", chart, name, calc); + error("Health alarm '%s.%s': failed to parse calculation expression '%s'", chart, rt->name, rt->calculation->source); } - if(warn) { - rc->warning = expression_parse(warn, NULL, NULL); + if(rt->warning) { + rc->warning = expression_parse(rt->warning->source, NULL, NULL); if(!rc->warning) - error("Health alarm '%s.%s': failed to re-parse warning expression '%s'", chart, name, warn); + error("Health alarm '%s.%s': failed to re-parse warning expression '%s'", chart, rt->name, rt->warning->source); } - if(crit) { - rc->critical = expression_parse(crit, NULL, NULL); + if(rt->critical) { + rc->critical = expression_parse(rt->critical->source, NULL, NULL); if(!rc->critical) - error("Health alarm '%s.%s': failed to re-parse critical expression '%s'", chart, name, crit); + error("Health alarm '%s.%s': failed to re-parse critical expression '%s'", chart, rt->name, rt->critical->source); } - debug(D_HEALTH, "Health runtime added alarm '%s.%s': exec '%s', green %Lf, red %Lf, lookup: group %d, after %d, before %d, options %u, dimensions '%s', update every %d, calculation '%s', warning '%s', critical '%s', source '%s", + debug(D_HEALTH, "Health runtime added alarm '%s.%s': exec '%s', recipient '%s', green %Lf, red %Lf, lookup: group %d, after %d, before %d, options %u, dimensions '%s', update every %d, calculation '%s', warning '%s', critical '%s', source '%s', delay up %d, delay down %d, delay max %d, delay_multiplier %f", (rc->chart)?rc->chart:"NOCHART", rc->name, (rc->exec)?rc->exec:"DEFAULT", + (rc->recipient)?rc->recipient:"DEFAULT", rc->green, rc->red, rc->group, @@ -676,7 +897,11 @@ static inline RRDCALC *rrdcalc_create(RRDHOST *host, const char *name, const cha (rc->calculation)?rc->calculation->parsed_as:"NONE", (rc->warning)?rc->warning->parsed_as:"NONE", (rc->critical)?rc->critical->parsed_as:"NONE", - rc->source + rc->source, + rc->delay_up_duration, + rc->delay_down_duration, + rc->delay_max_duration, + rc->delay_multiplier ); rrdcalc_create_part2(host, rc); @@ -692,14 +917,13 @@ void rrdcalc_free(RRDHOST *host, RRDCALC *rc) { if(rc->rrdset) rrdsetcalc_unlink(rc); // unlink it from RRDHOST - if(rc == host->alarms) + if(unlikely(rc == host->alarms)) host->alarms = rc->next; - else if(host->alarms) { + else if(likely(host->alarms)) { RRDCALC *t, *last = host->alarms; - for(t = last->next; t && t != rc; last = t, t = t->next) ; - if(last && last->next == rc) + if(last->next == rc) last->next = rc->next; else error("Cannot unlink alarm '%s.%s' from host '%s': not found", rc->chart?rc->chart:"NOCHART", rc->name, host->hostname); @@ -716,6 +940,7 @@ void rrdcalc_free(RRDHOST *host, RRDCALC *rc) { freez(rc->family); freez(rc->dimensions); freez(rc->exec); + freez(rc->recipient); freez(rc->source); freez(rc->units); freez(rc->info); @@ -730,22 +955,13 @@ void rrdcalctemplate_link_matching(RRDSET *st) { for(rt = st->rrdhost->templates; rt ; rt = rt->next) { if(rt->hash_context == st->hash_context && !strcmp(rt->context, st->context)) { - - RRDCALC *rc = rrdcalc_create(st->rrdhost, rt->name, st->id, - rt->dimensions, rt->units, rt->info, rt->group, rt->after, rt->before, rt->update_every, rt->options, - rt->green, rt->red, rt->exec, rt->source, - (rt->calculation)?rt->calculation->source:NULL, - (rt->warning)?rt->warning->source:NULL, - (rt->critical)?rt->critical->source:NULL); - - if(!rc) + RRDCALC *rc = rrdcalc_create(st->rrdhost, rt, st->id); + if(unlikely(!rc)) error("Health tried to create alarm from template '%s', but it failed", rt->name); #ifdef NETDATA_INTERNAL_CHECKS else if(rc->rrdset != st) error("Health alarm '%s.%s' should be linked to chart '%s', but it is not", rc->chart?rc->chart:"NOCHART", rc->name, st->id); -#else - (void)rc; #endif } } @@ -776,6 +992,7 @@ static inline void rrdcalctemplate_free(RRDHOST *host, RRDCALCTEMPLATE *rt) { freez(rt->name); freez(rt->exec); + freez(rt->recipient); freez(rt->context); freez(rt->source); freez(rt->units); @@ -800,18 +1017,12 @@ static inline void rrdcalctemplate_free(RRDHOST *host, RRDCALCTEMPLATE *rt) { #define HEALTH_WARN_KEY "warn" #define HEALTH_CRIT_KEY "crit" #define HEALTH_EXEC_KEY "exec" +#define HEALTH_RECIPIENT_KEY "to" #define HEALTH_UNITS_KEY "units" #define HEALTH_INFO_KEY "info" +#define HEALTH_DELAY_KEY "delay" static inline int rrdcalc_add_alarm_from_config(RRDHOST *host, RRDCALC *rc) { - { - char fullname[RRDVAR_MAX_LENGTH + 1]; - uint32_t hash = rrdcalc_fullname(fullname, RRDVAR_MAX_LENGTH + 1, rc->chart, rc->name); - - if (rrdcalc_exists(host, fullname, hash)) - return 0; - } - if(!rc->chart) { error("Health configuration for alarm '%s' does not have a chart", rc->name); return 0; @@ -827,10 +1038,17 @@ static inline int rrdcalc_add_alarm_from_config(RRDHOST *host, RRDCALC *rc) { return 0; } - debug(D_HEALTH, "Health configuration adding alarm '%s.%s': exec '%s', green %Lf, red %Lf, lookup: group %d, after %d, before %d, options %u, dimensions '%s', update every %d, calculation '%s', warning '%s', critical '%s', source '%s", + if (rrdcalc_exists(host, rc->chart, rc->name, rc->hash_chart, rc->hash)) + return 0; + + rc->id = rrdcalc_get_unique_id(&localhost, rc->chart, rc->name, &rc->next_event_id); + + debug(D_HEALTH, "Health configuration adding alarm '%s.%s' (%u): exec '%s', recipient '%s', green %Lf, red %Lf, lookup: group %d, after %d, before %d, options %u, dimensions '%s', update every %d, calculation '%s', warning '%s', critical '%s', source '%s', delay up %d, delay down %d, delay max %d, delay_multiplier %f", rc->chart?rc->chart:"NOCHART", rc->name, + rc->id, (rc->exec)?rc->exec:"DEFAULT", + (rc->recipient)?rc->recipient:"DEFAULT", rc->green, rc->red, rc->group, @@ -842,7 +1060,11 @@ static inline int rrdcalc_add_alarm_from_config(RRDHOST *host, RRDCALC *rc) { (rc->calculation)?rc->calculation->parsed_as:"NONE", (rc->warning)?rc->warning->parsed_as:"NONE", (rc->critical)?rc->critical->parsed_as:"NONE", - rc->source + rc->source, + rc->delay_up_duration, + rc->delay_down_duration, + rc->delay_max_duration, + rc->delay_multiplier ); rrdcalc_create_part2(host, rc); @@ -850,33 +1072,34 @@ static inline int rrdcalc_add_alarm_from_config(RRDHOST *host, RRDCALC *rc) { } static inline int rrdcalctemplate_add_template_from_config(RRDHOST *host, RRDCALCTEMPLATE *rt) { - if(!rt->context) { + if(unlikely(!rt->context)) { error("Health configuration for template '%s' does not have a context", rt->name); return 0; } - if(!rt->update_every) { + if(unlikely(!rt->update_every)) { error("Health configuration for template '%s' has no frequency (parameter 'every'). Ignoring it.", rt->name); return 0; } - if(!RRDCALCTEMPLATE_HAS_CALCULATION(rt) && !rt->warning && !rt->critical) { + if(unlikely(!RRDCALCTEMPLATE_HAS_CALCULATION(rt) && !rt->warning && !rt->critical)) { error("Health configuration for template '%s' is useless (no calculation, no warning and no critical evaluation)", rt->name); return 0; } - RRDCALCTEMPLATE *t; - for (t = host->templates; t ; t = t->next) { - if(t->hash_name == rt->hash_name && !strcmp(t->name, rt->name)) { + RRDCALCTEMPLATE *t, *last = NULL; + for (t = host->templates; t ; last = t, t = t->next) { + if(unlikely(t->hash_name == rt->hash_name && !strcmp(t->name, rt->name))) { error("Health configuration template '%s' already exists for host '%s'.", rt->name, host->hostname); return 0; } } - debug(D_HEALTH, "Health configuration adding template '%s': context '%s', exec '%s', green %Lf, red %Lf, lookup: group %d, after %d, before %d, options %u, dimensions '%s', update every %d, calculation '%s', warning '%s', critical '%s', source '%s'", + debug(D_HEALTH, "Health configuration adding template '%s': context '%s', exec '%s', recipient '%s', green %Lf, red %Lf, lookup: group %d, after %d, before %d, options %u, dimensions '%s', update every %d, calculation '%s', warning '%s', critical '%s', source '%s', delay up %d, delay down %d, delay max %d, delay_multiplier %f", rt->name, (rt->context)?rt->context:"NONE", (rt->exec)?rt->exec:"DEFAULT", + (rt->recipient)?rt->recipient:"DEFAULT", rt->green, rt->red, rt->group, @@ -888,11 +1111,21 @@ static inline int rrdcalctemplate_add_template_from_config(RRDHOST *host, RRDCAL (rt->calculation)?rt->calculation->parsed_as:"NONE", (rt->warning)?rt->warning->parsed_as:"NONE", (rt->critical)?rt->critical->parsed_as:"NONE", - rt->source + rt->source, + rt->delay_up_duration, + rt->delay_down_duration, + rt->delay_max_duration, + rt->delay_multiplier ); - rt->next = host->templates; - host->templates = rt; + if(likely(last)) { + last->next = rt; + } + else { + rt->next = host->templates; + host->templates = rt; + } + return 1; } @@ -938,6 +1171,86 @@ static inline int health_parse_duration(char *string, int *result) { return 1; } +static inline int health_parse_delay( + size_t line, const char *path, const char *file, char *string, + int *delay_up_duration, + int *delay_down_duration, + int *delay_max_duration, + float *delay_multiplier) { + + char given_up = 0; + char given_down = 0; + char given_max = 0; + char given_multiplier = 0; + + char *s = string; + while(*s) { + char *key = s; + + while(*s && !isspace(*s)) s++; + while(*s && isspace(*s)) *s++ = '\0'; + + if(!*key) break; + + char *value = s; + while(*s && !isspace(*s)) s++; + while(*s && isspace(*s)) *s++ = '\0'; + + if(!strcasecmp(key, "up")) { + if (!health_parse_duration(value, delay_up_duration)) { + error("Health configuration at line %zu of file '%s/%s': invalid value '%s' for '%s' keyword", + line, path, file, value, key); + } + else given_up = 1; + } + else if(!strcasecmp(key, "down")) { + if (!health_parse_duration(value, delay_down_duration)) { + error("Health configuration at line %zu of file '%s/%s': invalid value '%s' for '%s' keyword", + line, path, file, value, key); + } + else given_down = 1; + } + else if(!strcasecmp(key, "multiplier")) { + *delay_multiplier = strtof(value, NULL); + if(isnan(*delay_multiplier) || isinf(*delay_multiplier) || islessequal(*delay_multiplier, 0)) { + error("Health configuration at line %zu of file '%s/%s': invalid value '%s' for '%s' keyword", + line, path, file, value, key); + } + else given_multiplier = 1; + } + else if(!strcasecmp(key, "max")) { + if (!health_parse_duration(value, delay_max_duration)) { + error("Health configuration at line %zu of file '%s/%s': invalid value '%s' for '%s' keyword", + line, path, file, value, key); + } + else given_max = 1; + } + else { + error("Health configuration at line %zu of file '%s/%s': unknown keyword '%s'", + line, path, file, key); + } + } + + if(!given_up) + *delay_up_duration = 0; + + if(!given_down) + *delay_down_duration = 0; + + if(!given_multiplier) + *delay_multiplier = 1.0; + + if(!given_max) { + if((*delay_max_duration) < (*delay_up_duration) * (*delay_multiplier)) + *delay_max_duration = (*delay_up_duration) * (*delay_multiplier); + + if((*delay_max_duration) < (*delay_down_duration) * (*delay_multiplier)) + *delay_max_duration = (*delay_down_duration) * (*delay_multiplier); + } + + return 1; +} + static inline int health_parse_db_lookup( size_t line, const char *path, const char *file, char *string, int *group_method, int *after, int *before, int *every, @@ -1056,7 +1369,7 @@ static inline void strip_quotes(char *s) { int health_readfile(const char *path, const char *filename) { debug(D_HEALTH, "Health configuration reading file '%s/%s'", path, filename); - static uint32_t hash_alarm = 0, hash_template = 0, hash_on = 0, hash_calc = 0, hash_green = 0, hash_red = 0, hash_warn = 0, hash_crit = 0, hash_exec = 0, hash_every = 0, hash_lookup = 0, hash_units = 0, hash_info = 0; + static uint32_t hash_alarm = 0, hash_template = 0, hash_on = 0, hash_calc = 0, hash_green = 0, hash_red = 0, hash_warn = 0, hash_crit = 0, hash_exec = 0, hash_every = 0, hash_lookup = 0, hash_units = 0, hash_info = 0, hash_recipient = 0, hash_delay = 0; char buffer[HEALTH_CONF_MAX_LINE + 1]; if(unlikely(!hash_alarm)) { @@ -1073,6 +1386,8 @@ int health_readfile(const char *path, const char *filename) { hash_every = simple_uhash(HEALTH_EVERY_KEY); hash_units = simple_hash(HEALTH_UNITS_KEY); hash_info = simple_hash(HEALTH_INFO_KEY); + hash_recipient = simple_hash(HEALTH_RECIPIENT_KEY); + hash_delay = simple_uhash(HEALTH_DELAY_KEY); } snprintfz(buffer, HEALTH_CONF_MAX_LINE, "%s/%s", path, filename); @@ -1144,6 +1459,7 @@ int health_readfile(const char *path, const char *filename) { } rc = callocz(1, sizeof(RRDCALC)); + rc->next_event_id = 1; rc->name = strdupz(value); rc->hash = simple_hash(rc->name); rc->source = health_source_file(line, path, filename); @@ -1151,6 +1467,7 @@ int health_readfile(const char *path, const char *filename) { rc->red = NAN; rc->value = NAN; rc->old_value = NAN; + rc->delay_multiplier = 1.0; if(rrdvar_fix_name(rc->name)) error("Health configuration renamed alarm '%s' to '%s'", value, rc->name); @@ -1171,6 +1488,7 @@ int health_readfile(const char *path, const char *filename) { rt->source = health_source_file(line, path, filename); rt->green = NAN; rt->red = NAN; + rt->delay_multiplier = 1.0; if(rrdvar_fix_name(rt->name)) error("Health configuration renamed template '%s' to '%s'", value, rt->name); @@ -1250,6 +1568,16 @@ int health_readfile(const char *path, const char *filename) { } rc->exec = strdupz(value); } + else if(hash == hash_recipient && !strcasecmp(key, HEALTH_RECIPIENT_KEY)) { + if(rc->recipient) { + if(strcmp(rc->recipient, value)) + info("Health configuration at line %zu of file '%s/%s' for alarm '%s' has key '%s' twice, once with value '%s' and later with value '%s'. Using ('%s').", + line, path, filename, rc->name, key, rc->recipient, value, value); + + freez(rc->recipient); + } + rc->recipient = strdupz(value); + } else if(hash == hash_units && !strcasecmp(key, HEALTH_UNITS_KEY)) { if(rc->units) { if(strcmp(rc->units, value)) @@ -1272,6 +1600,9 @@ int health_readfile(const char *path, const char *filename) { rc->info = strdupz(value); strip_quotes(rc->info); } + else if(hash == hash_delay && !strcasecmp(key, HEALTH_DELAY_KEY)) { + health_parse_delay(line, path, filename, value, &rc->delay_up_duration, &rc->delay_down_duration, &rc->delay_max_duration, &rc->delay_multiplier); + } else { error("Health configuration at line %zu of file '%s/%s' for alarm '%s' has unknown key '%s'.", line, path, filename, rc->name, key); @@ -1352,6 +1683,16 @@ int health_readfile(const char *path, const char *filename) { } rt->exec = strdupz(value); } + else if(hash == hash_recipient && !strcasecmp(key, HEALTH_RECIPIENT_KEY)) { + if(rt->recipient) { + if(strcmp(rt->recipient, value)) + info("Health configuration at line %zu of file '%s/%s' for template '%s' has key '%s' twice, once with value '%s' and later with value '%s'. Using ('%s').", + line, path, filename, rt->name, key, rt->recipient, value, value); + + freez(rt->recipient); + } + rt->recipient = strdupz(value); + } else if(hash == hash_units && !strcasecmp(key, HEALTH_UNITS_KEY)) { if(rt->units) { if(strcmp(rt->units, value)) @@ -1374,6 +1715,9 @@ int health_readfile(const char *path, const char *filename) { rt->info = strdupz(value); strip_quotes(rt->info); } + else if(hash == hash_delay && !strcasecmp(key, HEALTH_DELAY_KEY)) { + health_parse_delay(line, path, filename, value, &rt->delay_up_duration, &rt->delay_down_duration, &rt->delay_max_duration, &rt->delay_multiplier); + } else { error("Health configuration at line %zu of file '%s/%s' for template '%s' has unknown key '%s'.", line, path, filename, rt->name, key); @@ -1414,8 +1758,10 @@ void health_readdir(const char *path) { && ( (de->d_name[0] == '.' && de->d_name[1] == '\0') || (de->d_name[0] == '.' && de->d_name[1] == '.' && de->d_name[2] == '\0') - )) + )) { + debug(D_HEALTH, "Ignoring directory '%s'", de->d_name); continue; + } else if(de->d_type == DT_DIR) { char *s = mallocz(pathlen + strlen(de->d_name) + 2); @@ -1427,10 +1773,12 @@ void health_readdir(const char *path) { continue; } - else if((de->d_type == DT_LNK || de->d_type == DT_REG) && + else if((de->d_type == DT_LNK || de->d_type == DT_REG || de->d_type == DT_UNKNOWN) && len > 5 && !strcmp(&de->d_name[len - 5], ".conf")) { health_readfile(path, de->d_name); } + + else debug(D_HEALTH, "Ignoring file '%s'", de->d_name); } closedir(dir); @@ -1450,16 +1798,18 @@ void health_init(void) { return; } + health_alarm_log_load(&localhost); + char *path = health_config_dir(); { char buffer[FILENAME_MAX + 1]; - snprintfz(buffer, FILENAME_MAX, "%s/alarm-email.sh", config_get("global", "plugins directory", PLUGINS_DIR)); - health_default_exec = config_get("health", "script to execute on alarm", buffer); + snprintfz(buffer, FILENAME_MAX, "%s/alarm-notify.sh", config_get("global", "plugins directory", PLUGINS_DIR)); + health.health_default_exec = config_get("health", "script to execute on alarm", buffer); } long n = config_get_number("health", "in memory max health log entries", (long)localhost.health_log.max); - if(n < 2) { + if(n < 10) { error("Health configuration has invalid max log entries %ld. Using default %u", n, localhost.health_log.max); config_set_number("health", "in memory max health log entries", (long)localhost.health_log.max); } @@ -1480,35 +1830,47 @@ static inline void health_string2json(BUFFER *wb, const char *prefix, const char buffer_sprintf(wb, "%s\"%s\":null%s", prefix, label, suffix); } -static inline void health_alarm_entry2json_nolock(BUFFER *wb, ALARM_ENTRY *ae) { +static inline void health_alarm_entry2json_nolock(BUFFER *wb, ALARM_ENTRY *ae, RRDHOST *host) { buffer_sprintf(wb, "\n\t{\n" - "\t\t\"id\":%u,\n" - "\t\t\"name\":\"%s\",\n" - "\t\t\"chart\":\"%s\",\n" - "\t\t\"family\":\"%s\",\n" - "\t\t\"processed\":%s,\n" - "\t\t\"updated\":%s,\n" - "\t\t\"exec_run\":%s,\n" - "\t\t\"exec_failed\":%s,\n" - "\t\t\"exec\":\"%s\",\n" - "\t\t\"exec_code\":%d,\n" - "\t\t\"source\":\"%s\",\n" - "\t\t\"units\":\"%s\",\n" - "\t\t\"info\":\"%s\",\n" - "\t\t\"when\":%lu,\n" - "\t\t\"duration\":%lu,\n" - "\t\t\"non_clear_duration\":%lu,\n" - "\t\t\"status\":\"%s\",\n" - "\t\t\"old_status\":\"%s\",\n", - ae->id, + "\t\t\"hostname\": \"%s\",\n" + "\t\t\"unique_id\": %u,\n" + "\t\t\"alarm_id\": %u,\n" + "\t\t\"alarm_event_id\": %u,\n" + "\t\t\"name\": \"%s\",\n" + "\t\t\"chart\": \"%s\",\n" + "\t\t\"family\": \"%s\",\n" + "\t\t\"processed\": %s,\n" + "\t\t\"updated\": %s,\n" + "\t\t\"exec_run\": %lu,\n" + "\t\t\"exec_failed\": %s,\n" + "\t\t\"exec\": \"%s\",\n" + "\t\t\"recipient\": \"%s\",\n" + "\t\t\"exec_code\": %d,\n" + "\t\t\"source\": \"%s\",\n" + "\t\t\"units\": \"%s\",\n" + "\t\t\"info\": \"%s\",\n" + "\t\t\"when\": %lu,\n" + "\t\t\"duration\": %lu,\n" + "\t\t\"non_clear_duration\": %lu,\n" + "\t\t\"status\": \"%s\",\n" + "\t\t\"old_status\": \"%s\",\n" + "\t\t\"delay\": %d,\n" + "\t\t\"delay_up_to_timestamp\": %lu,\n" + "\t\t\"updated_by_id\": %u,\n" + "\t\t\"updates_id\": %u,\n", + host->hostname, + ae->unique_id, + ae->alarm_id, + ae->alarm_event_id, ae->name, ae->chart, ae->family, (ae->notifications & HEALTH_ENTRY_NOTIFICATIONS_PROCESSED)?"true":"false", (ae->notifications & HEALTH_ENTRY_NOTIFICATIONS_UPDATED)?"true":"false", - (ae->notifications & HEALTH_ENTRY_NOTIFICATIONS_EXEC_RUN)?"true":"false", + (unsigned long)ae->exec_run_timestamp, (ae->notifications & HEALTH_ENTRY_NOTIFICATIONS_EXEC_FAILED)?"true":"false", - ae->exec?ae->exec:health_default_exec, + ae->exec?ae->exec:health.health_default_exec, + ae->recipient?ae->recipient:health.health_default_recipient, ae->exec_code, ae->source, ae->units?ae->units:"", @@ -1517,7 +1879,11 @@ static inline void health_alarm_entry2json_nolock(BUFFER *wb, ALARM_ENTRY *ae) { (unsigned long)ae->duration, (unsigned long)ae->non_clear_duration, rrdcalc_status2string(ae->new_status), - rrdcalc_status2string(ae->old_status) + rrdcalc_status2string(ae->old_status), + ae->delay, + (unsigned long)ae->delay_up_to_timestamp, + ae->updated_by_id, + ae->updates_id ); buffer_strcat(wb, "\t\t\"value\":"); @@ -1531,7 +1897,7 @@ static inline void health_alarm_entry2json_nolock(BUFFER *wb, ALARM_ENTRY *ae) { buffer_strcat(wb, "\t}"); } -void health_alarm_log2json(RRDHOST *host, BUFFER *wb) { +void health_alarm_log2json(RRDHOST *host, BUFFER *wb, uint32_t after) { pthread_rwlock_rdlock(&host->health_log.alarm_log_rwlock); buffer_strcat(wb, "["); @@ -1540,8 +1906,10 @@ void health_alarm_log2json(RRDHOST *host, BUFFER *wb) { unsigned int count = 0; ALARM_ENTRY *ae; for(ae = host->health_log.alarms; ae && count < max ; count++, ae = ae->next) { - if(likely(count)) buffer_strcat(wb, ","); - health_alarm_entry2json_nolock(wb, ae); + if(ae->unique_id > after) { + if(likely(count)) buffer_strcat(wb, ","); + health_alarm_entry2json_nolock(wb, ae, host); + } } buffer_strcat(wb, "\n]\n"); @@ -1552,11 +1920,13 @@ void health_alarm_log2json(RRDHOST *host, BUFFER *wb) { static inline void health_rrdcalc2json_nolock(BUFFER *wb, RRDCALC *rc) { buffer_sprintf(wb, "\t\t\"%s.%s\": {\n" + "\t\t\t\"id\": %lu,\n" "\t\t\t\"name\": \"%s\",\n" "\t\t\t\"chart\": \"%s\",\n" "\t\t\t\"family\": \"%s\",\n" "\t\t\t\"active\": %s,\n" "\t\t\t\"exec\": \"%s\",\n" + "\t\t\t\"recipient\": \"%s\",\n" "\t\t\t\"source\": \"%s\",\n" "\t\t\t\"units\": \"%s\",\n" "\t\t\t\"info\": \"%s\",\n" @@ -1565,12 +1935,20 @@ static inline void health_rrdcalc2json_nolock(BUFFER *wb, RRDCALC *rc) { "\t\t\t\"last_updated\": %lu,\n" "\t\t\t\"next_update\": %lu,\n" "\t\t\t\"update_every\": %d,\n" + "\t\t\t\"delay_up_duration\": %d,\n" + "\t\t\t\"delay_down_duration\": %d,\n" + "\t\t\t\"delay_max_duration\": %d,\n" + "\t\t\t\"delay_multiplier\": %f,\n" + "\t\t\t\"delay\": %d,\n" + "\t\t\t\"delay_up_to_timestamp\": %lu,\n" , rc->chart, rc->name + , (unsigned long)rc->id , rc->name , rc->chart , (rc->rrdset && rc->rrdset->family)?rc->rrdset->family:"" , (rc->rrdset)?"true":"false" - , rc->exec?rc->exec:health_default_exec + , rc->exec?rc->exec:health.health_default_exec + , rc->recipient?rc->recipient:health.health_default_recipient , rc->source , rc->units?rc->units:"" , rc->info?rc->info:"" @@ -1579,6 +1957,12 @@ static inline void health_rrdcalc2json_nolock(BUFFER *wb, RRDCALC *rc) { , (unsigned long)rc->last_updated , (unsigned long)rc->next_update , rc->update_every + , rc->delay_up_duration + , rc->delay_down_duration + , rc->delay_max_duration + , rc->delay_multiplier + , rc->delay_last + , (unsigned long)rc->delay_up_to_timestamp ); if(RRDCALC_HAS_DB_LOOKUP(rc)) { @@ -1638,15 +2022,24 @@ static inline void health_rrdcalc2json_nolock(BUFFER *wb, RRDCALC *rc) { void health_alarms2json(RRDHOST *host, BUFFER *wb, int all) { int i; + rrdhost_rdlock(&localhost); + buffer_sprintf(wb, "{\n\t\"hostname\": \"%s\"," + "\n\t\"latest_alarm_log_unique_id\": %u," + "\n\t\"status\": %s," + "\n\t\"now\": %lu," + "\n\t\"alarms\": {\n", + host->hostname, + (host->health_log.next_log_id > 0)?(host->health_log.next_log_id - 1):0, + health_enabled?"true":"false", + (unsigned long)time(NULL)); - buffer_strcat(wb, "{\n\t\"alarms\": {\n"); RRDCALC *rc; for(i = 0, rc = host->alarms; rc ; rc = rc->next) { - if(!rc->rrdset) + if(unlikely(!rc->rrdset || !rc->rrdset->last_collected_time.tv_sec)) continue; - if(!all && !(rc->status == RRDCALC_STATUS_WARNING || rc->status == RRDCALC_STATUS_CRITICAL)) + if(likely(!all && !(rc->status == RRDCALC_STATUS_WARNING || rc->status == RRDCALC_STATUS_CRITICAL))) continue; if(likely(i)) buffer_strcat(wb, ",\n"); @@ -1655,12 +2048,11 @@ void health_alarms2json(RRDHOST *host, BUFFER *wb, int all) { } // buffer_strcat(wb, "\n\t},\n\t\"templates\": {"); - // RRDCALCTEMPLATE *rt; // for(rt = host->templates; rt ; rt = rt->next) // health_rrdcalctemplate2json_nolock(wb, rt); - buffer_sprintf(wb, "\n\t},\n\t\"now\": %lu\n}\n", (unsigned long)time(NULL)); + buffer_strcat(wb, "\n\t}\n}\n"); rrdhost_unlock(&localhost); } @@ -1684,20 +2076,31 @@ void health_reload(void) { char *path = health_config_dir(); + // free all running alarms rrdhost_rwlock(&localhost); health_free_all_nolock(&localhost); rrdhost_unlock(&localhost); + // invalidate all previous entries in the alarm log + ALARM_ENTRY *t; + for(t = localhost.health_log.alarms ; t ; t = t->next) { + if(t->new_status != RRDCALC_STATUS_REMOVED) + t->notifications |= HEALTH_ENTRY_NOTIFICATIONS_UPDATED; + } + + // reset all thresholds to all charts RRDSET *st; for(st = localhost.rrdset_root; st ; st = st->next) { st->green = NAN; st->red = NAN; } + // load the new alarms rrdhost_rwlock(&localhost); health_readdir(path); rrdhost_unlock(&localhost); + // link the loaded alarms to their charts for(st = localhost.rrdset_root; st ; st = st->next) { rrdhost_rwlock(&localhost); @@ -1708,50 +2111,54 @@ void health_reload(void) { } } - // ---------------------------------------------------------------------------- // health main thread and friends -static inline int rrdcalc_isrunnable(RRDCALC *rc, time_t now, time_t *next_run) { - if (unlikely(!rc->rrdset)) { - debug(D_HEALTH, "Health not running alarm '%s.%s'. It is not linked to a chart.", rc->chart?rc->chart:"NOCHART", rc->name); - return 0; - } - - if (unlikely(!rc->update_every)) { - debug(D_HEALTH, "Health not running alarm '%s.%s'. It does not have an update frequency", rc->chart?rc->chart:"NOCHART", rc->name); - return 0; - } - - if (unlikely(rc->next_update > now)) { - if (*next_run > rc->next_update) - *next_run = rc->next_update; - - debug(D_HEALTH, "Health not examining alarm '%s.%s' yet (will do in %d secs).", rc->chart?rc->chart:"NOCHART", rc->name, (int) (rc->next_update - now)); - return 0; - } - - return 1; -} - static inline int rrdcalc_value2status(calculated_number n) { if(isnan(n)) return RRDCALC_STATUS_UNDEFINED; if(n) return RRDCALC_STATUS_RAISED; return RRDCALC_STATUS_CLEAR; } -static inline void health_alarm_execute(ALARM_ENTRY *ae) { - if(ae->old_status == RRDCALC_STATUS_UNINITIALIZED && ae->new_status == RRDCALC_STATUS_CLEAR) - return; +static inline void health_alarm_execute(RRDHOST *host, ALARM_ENTRY *ae) { + ae->notifications |= HEALTH_ENTRY_NOTIFICATIONS_PROCESSED; + + // find the previous notification for the same alarm + ALARM_ENTRY *t; + for(t = ae->next; t ;t = t->next) { + if(t->alarm_id == ae->alarm_id && t->notifications & HEALTH_ENTRY_NOTIFICATIONS_EXEC_RUN) + break; + } + + if(t && t->new_status == ae->new_status) { + // don't send the same notification again + info("Health not sending again notification for alarm '%s.%s' status %s", ae->chart, ae->name, rrdcalc_status2string(ae->new_status)); + goto done; + } + + if((ae->old_status == RRDCALC_STATUS_UNDEFINED && ae->new_status == RRDCALC_STATUS_UNINITIALIZED) + || (ae->old_status == RRDCALC_STATUS_UNINITIALIZED && ae->new_status == RRDCALC_STATUS_CLEAR)) { + info("Health not sending notification for first initialization of alarm '%s.%s' status %s", ae->chart, ae->name, rrdcalc_status2string(ae->new_status)); + goto done; + } char buffer[FILENAME_MAX + 1]; pid_t command_pid; const char *exec = ae->exec; - if(!exec) exec = health_default_exec; + if(!exec) exec = health.health_default_exec; - snprintfz(buffer, FILENAME_MAX, "exec %s '%s' '%s' '%s' '%s' '%s' '%0.0Lf' '%0.0Lf' '%s' '%u' '%u' '%s' '%s'", + const char *recipient = ae->recipient; + if(!recipient) recipient = health.health_default_recipient; + + snprintfz(buffer, FILENAME_MAX, "exec %s '%s' '%s' '%u' '%u' '%u' '%lu' '%s' '%s' '%s' '%s' '%s' '%0.0Lf' '%0.0Lf' '%s' '%u' '%u' '%s' '%s'", exec, + recipient, + host->hostname, + ae->unique_id, + ae->alarm_id, + ae->alarm_event_id, + (unsigned long)ae->when, ae->name, ae->chart?ae->chart:"NOCAHRT", ae->family?ae->family:"NOFAMILY", @@ -1767,25 +2174,29 @@ static inline void health_alarm_execute(ALARM_ENTRY *ae) { ); ae->notifications |= HEALTH_ENTRY_NOTIFICATIONS_EXEC_RUN; + ae->exec_run_timestamp = time(NULL); debug(D_HEALTH, "executing command '%s'", buffer); FILE *fp = mypopen(buffer, &command_pid); if(!fp) { error("HEALTH: Cannot popen(\"%s\", \"r\").", buffer); - return; + goto done; } debug(D_HEALTH, "HEALTH reading from command"); char *s = fgets(buffer, FILENAME_MAX, fp); (void)s; - debug(D_HEALTH, "HEALTH closing command"); ae->exec_code = mypclose(fp, command_pid); debug(D_HEALTH, "done executing command - returned with code %d", ae->exec_code); if(ae->exec_code != 0) ae->notifications |= HEALTH_ENTRY_NOTIFICATIONS_EXEC_FAILED; + +done: + health_alarm_log_save(host, ae); + return; } -static inline void health_process_notifications(ALARM_ENTRY *ae) { +static inline void health_process_notifications(RRDHOST *host, ALARM_ENTRY *ae) { info("Health alarm '%s.%s' = %0.2Lf - changed status from %s to %s", ae->chart?ae->chart:"NOCHART", ae->name, ae->new_value, @@ -1793,98 +2204,33 @@ static inline void health_process_notifications(ALARM_ENTRY *ae) { rrdcalc_status2string(ae->new_status) ); - health_alarm_execute(ae); + health_alarm_execute(host, ae); } -static inline void health_alarm_log(RRDHOST *host, time_t when, - const char *name, const char *chart, const char *family, - const char *exec, time_t duration, - calculated_number old_value, calculated_number new_value, - int old_status, int new_status, - const char *source, - const char *units, - const char *info -) { - ALARM_ENTRY *ae = callocz(1, sizeof(ALARM_ENTRY)); - ae->name = strdupz(name); - ae->hash_name = simple_hash(ae->name); - - if(chart) { - ae->chart = strdupz(chart); - ae->hash_chart = simple_hash(ae->chart); - } - - if(family) - ae->family = strdupz(family); - - if(exec) ae->exec = strdupz(exec); - if(source) ae->source = strdupz(source); - if(units) ae->units = strdupz(units); - if(info) ae->info = strdupz(info); - - ae->id = host->health_log.nextid++; - ae->when = when; - ae->old_value = old_value; - ae->new_value = new_value; - ae->old_status = old_status; - ae->new_status = new_status; - ae->duration = duration; - - if(ae->old_status == RRDCALC_STATUS_WARNING || ae->old_status == RRDCALC_STATUS_CRITICAL) - ae->non_clear_duration += ae->duration; - - // link it - pthread_rwlock_wrlock(&host->health_log.alarm_log_rwlock); - ae->next = host->health_log.alarms; - host->health_log.alarms = ae; - host->health_log.count++; - pthread_rwlock_unlock(&host->health_log.alarm_log_rwlock); +static inline void health_alarm_log_process(RRDHOST *host) { + static uint32_t stop_at_id = 0; + uint32_t first_waiting = (host->health_log.alarms)?host->health_log.alarms->unique_id:0; + time_t now = time(NULL); - // match previous alarms pthread_rwlock_rdlock(&host->health_log.alarm_log_rwlock); - ALARM_ENTRY *t; - for(t = host->health_log.alarms ; t ; t = t->next) { - if(t != ae && - t->hash_name == ae->hash_name && - t->hash_chart == ae->hash_chart && - !strcmp(t->name, ae->name) && - t->chart && ae->chart && !strcmp(t->chart, ae->chart)) { - - if(!(t->notifications & HEALTH_ENTRY_NOTIFICATIONS_UPDATED) && !t->updated_by) { - t->notifications |= HEALTH_ENTRY_NOTIFICATIONS_UPDATED; - t->updated_by = ae; - if((t->new_status == RRDCALC_STATUS_WARNING || t->new_status == RRDCALC_STATUS_CRITICAL) && - (t->old_status == RRDCALC_STATUS_WARNING || t->old_status == RRDCALC_STATUS_CRITICAL)) - ae->non_clear_duration += t->non_clear_duration; - } - else { - // no need to continue - break; - } - } - } - pthread_rwlock_unlock(&host->health_log.alarm_log_rwlock); -} - -static inline void health_alarm_log_process(RRDHOST *host) { - static uint32_t last_processed = 0; ALARM_ENTRY *ae; + for(ae = host->health_log.alarms; ae && ae->unique_id >= stop_at_id ; ae = ae->next) { + if(unlikely( + !(ae->notifications & HEALTH_ENTRY_NOTIFICATIONS_PROCESSED) && + !(ae->notifications & HEALTH_ENTRY_NOTIFICATIONS_UPDATED) + )) { - pthread_rwlock_rdlock(&host->health_log.alarm_log_rwlock); - - for(ae = host->health_log.alarms; ae ;ae = ae->next) { - if(last_processed >= ae->id) break; + if(unlikely(ae->unique_id < first_waiting)) + first_waiting = ae->unique_id; - if(!(ae->notifications & HEALTH_ENTRY_NOTIFICATIONS_PROCESSED) && - !(ae->notifications & HEALTH_ENTRY_NOTIFICATIONS_UPDATED)) { - ae->notifications |= HEALTH_ENTRY_NOTIFICATIONS_PROCESSED; - health_process_notifications(ae); + if(likely(now >= ae->delay_up_to_timestamp)) + health_process_notifications(host, ae); } } - if(host->health_log.alarms) - last_processed = host->health_log.alarms->id; + // remember this for the next iteration + stop_at_id = first_waiting; pthread_rwlock_unlock(&host->health_log.alarm_log_rwlock); @@ -1895,7 +2241,7 @@ static inline void health_alarm_log_process(RRDHOST *host) { pthread_rwlock_wrlock(&host->health_log.alarm_log_rwlock); ALARM_ENTRY *last = NULL; - unsigned int count = host->health_log.max; + unsigned int count = host->health_log.max * 2 / 3; for(ae = host->health_log.alarms; ae && count ; count--, last = ae, ae = ae->next) ; if(ae && last && last->next == ae) @@ -1904,23 +2250,60 @@ static inline void health_alarm_log_process(RRDHOST *host) { ae = NULL; while(ae) { + debug(D_HEALTH, "Health removing alarm log entry with id: %u", ae->unique_id); + ALARM_ENTRY *t = ae->next; freez(ae->name); freez(ae->chart); freez(ae->family); freez(ae->exec); + freez(ae->recipient); freez(ae->source); freez(ae->units); freez(ae->info); freez(ae); ae = t; + host->health_log.count--; } pthread_rwlock_unlock(&host->health_log.alarm_log_rwlock); } +static inline int rrdcalc_isrunnable(RRDCALC *rc, time_t now, time_t *next_run) { + if (unlikely(!rc->rrdset)) { + debug(D_HEALTH, "Health not running alarm '%s.%s'. It is not linked to a chart.", rc->chart?rc->chart:"NOCHART", rc->name); + return 0; + } + + if (unlikely(!rc->rrdset->last_collected_time.tv_sec)) { + debug(D_HEALTH, "Health not running alarm '%s.%s'. Chart is not yet collected.", rc->chart?rc->chart:"NOCHART", rc->name); + return 0; + } + + if (unlikely(!rc->update_every)) { + debug(D_HEALTH, "Health not running alarm '%s.%s'. It does not have an update frequency", rc->chart?rc->chart:"NOCHART", rc->name); + return 0; + } + + if (unlikely(rc->next_update > now)) { + if (unlikely(*next_run > rc->next_update)) + *next_run = rc->next_update; + + debug(D_HEALTH, "Health not examining alarm '%s.%s' yet (will do in %d secs).", rc->chart?rc->chart:"NOCHART", rc->name, (int) (rc->next_update - now)); + return 0; + } + + // FIXME + // we should check that the DB lookup is possible + // i.e. + // - the duration of the chart includes the required timeframe + // we SHOULD NOT check the dimensions - there might be alarms that refer non-existing dimensions (e.g. cpu steal) + + return 1; +} + void *health_main(void *ptr) { (void)ptr; @@ -1938,7 +2321,7 @@ void *health_main(void *ptr) { BUFFER *wb = buffer_create(100); unsigned int loop = 0; - while(health_enabled) { + while(health_enabled && !netdata_exit) { loop++; debug(D_HEALTH, "Health monitoring iteration no %u started", loop); @@ -2053,7 +2436,7 @@ void *health_main(void *ptr) { } rrdhost_unlock(&localhost); - if (runnable) { + if (unlikely(runnable && !netdata_exit)) { rrdhost_rdlock(&localhost); for (rc = localhost.alarms; rc; rc = rc->next) { @@ -2063,7 +2446,7 @@ void *health_main(void *ptr) { int warning_status = RRDCALC_STATUS_UNDEFINED; int critical_status = RRDCALC_STATUS_UNDEFINED; - if(unlikely(rc->warning)) { + if(likely(rc->warning)) { if(unlikely(!expression_evaluate(rc->warning))) { // calculation failed @@ -2093,7 +2476,7 @@ void *health_main(void *ptr) { } } - if(unlikely(rc->critical)) { + if(likely(rc->critical)) { if(unlikely(!expression_evaluate(rc->critical))) { // calculation failed @@ -2153,7 +2536,34 @@ void *health_main(void *ptr) { } if(status != rc->status) { - health_alarm_log(&localhost, time(NULL), rc->name, rc->rrdset->id, rc->rrdset->family, rc->exec, now - rc->last_status_change, rc->old_value, rc->value, rc->status, status, rc->source, rc->units, rc->info); + int delay = 0; + + if(now > rc->delay_up_to_timestamp) { + rc->delay_up_current = rc->delay_up_duration; + rc->delay_down_current = rc->delay_down_duration; + rc->delay_last = 0; + rc->delay_up_to_timestamp = 0; + } + else { + rc->delay_up_current = (int)(rc->delay_up_current * rc->delay_multiplier); + if(rc->delay_up_current > rc->delay_max_duration) rc->delay_up_current = rc->delay_max_duration; + + rc->delay_down_current = (int)(rc->delay_down_current * rc->delay_multiplier); + if(rc->delay_down_current > rc->delay_max_duration) rc->delay_down_current = rc->delay_max_duration; + } + + if(status > rc->status) + delay = rc->delay_up_current; + else + delay = rc->delay_down_current; + + // COMMENTED: because we do need to send raising alarms + // if(now + delay < rc->delay_up_to_timestamp) + // delay = (int)(rc->delay_up_to_timestamp - now); + + rc->delay_last = delay; + rc->delay_up_to_timestamp = now + delay; + health_alarm_log(&localhost, rc->id, rc->next_event_id++, now, rc->name, rc->rrdset->id, rc->rrdset->family, rc->exec, rc->recipient, now - rc->last_status_change, rc->old_value, rc->value, rc->status, status, rc->source, rc->units, rc->info, rc->delay_last); rc->last_status_change = now; rc->status = status; } @@ -2171,10 +2581,16 @@ void *health_main(void *ptr) { if (unlikely(pthread_setcancelstate(oldstate, NULL) != 0)) error("Cannot set pthread cancel state to RESTORE (%d).", oldstate); + if(unlikely(netdata_exit)) + break; + // execute notifications // and cleanup health_alarm_log_process(&localhost); + if(unlikely(netdata_exit)) + break; + now = time(NULL); if(now < next_run) { debug(D_HEALTH, "Health monitoring iteration no %u done. Next iteration in %d secs", diff --git a/src/health.h b/src/health.h index ef1158a2..9d5834fc 100644 --- a/src/health.h +++ b/src/health.h @@ -105,8 +105,9 @@ typedef struct rrddimvar { // having as RRDSET.calculations the RRDCALC to be processed // next. -#define RRDCALC_STATUS_UNINITIALIZED 0 +#define RRDCALC_STATUS_REMOVED -2 #define RRDCALC_STATUS_UNDEFINED -1 +#define RRDCALC_STATUS_UNINITIALIZED 0 #define RRDCALC_STATUS_CLEAR 1 #define RRDCALC_STATUS_RAISED 2 #define RRDCALC_STATUS_WARNING 3 @@ -120,52 +121,89 @@ typedef struct rrddimvar { #define RRDCALC_FLAG_CRIT_ERROR 0x00000020 typedef struct rrdcalc { - char *name; - uint32_t hash; + uint32_t id; // the unique id of this alarm + uint32_t next_event_id; // the next event id that will be used for this alarm - char *exec; + char *name; // the name of this alarm + uint32_t hash; - char *chart; // the chart id this should be linked to + char *exec; // the command to execute when this alarm switches state + char *recipient; // the recipient of the alarm (the first parameter to exec) + + char *chart; // the chart id this should be linked to uint32_t hash_chart; - char *source; // the source of this calculation - char *units; - char *info; + char *source; // the source of this alarm + char *units; // the units of the alarm + char *info; // a short description of the alarm + + int update_every; // update frequency for the alarm + + // the red and green threshold of this alarm (to be set to the chart) + calculated_number green; + calculated_number red; - char *dimensions; // the chart dimensions + // ------------------------------------------------------------------------ + // database lookup settings - int group; // grouping method: average, max, etc. - int before; // ending point in time-series - int after; // starting point in time-series - uint32_t options; // calculation options - int update_every; // update frequency for the calculation + char *dimensions; // the chart dimensions + int group; // grouping method: average, max, etc. + int before; // ending point in time-series + int after; // starting point in time-series + uint32_t options; // calculation options - time_t last_updated; - time_t next_update; + // ------------------------------------------------------------------------ + // expressions related to the alarm - EVAL_EXPRESSION *calculation; - EVAL_EXPRESSION *warning; - EVAL_EXPRESSION *critical; + EVAL_EXPRESSION *calculation; // expression to calculate the value of the alarm + EVAL_EXPRESSION *warning; // expression to check the warning condition + EVAL_EXPRESSION *critical; // expression to check the critical condition - uint32_t rrdcalc_flags; - int status; + // ------------------------------------------------------------------------ + // notification delay settings - time_t db_after; - time_t db_before; - time_t last_status_change; + int delay_up_duration; // duration to delay notifications when alarm raises + int delay_down_duration; // duration to delay notifications when alarm lowers + int delay_max_duration; // the absolute max delay to apply to this alarm + float delay_multiplier; // multiplier for all delays when alarms switch status + // while now < delay_up_to - calculated_number value; - calculated_number old_value; + // ------------------------------------------------------------------------ + // runtime information - calculated_number green; - calculated_number red; + int status; // the current status of the alarm + + calculated_number value; // the current value of the alarm + calculated_number old_value; // the previous value of the alarm + + uint32_t rrdcalc_flags; // check RRDCALC_FLAG_* + + time_t last_updated; // the last update timestamp of the alarm + time_t next_update; // the next update timestamp of the alarm + time_t last_status_change; // the timestamp of the last time this alarm changed status + + time_t db_after; // the first timestamp evaluated by the db lookup + time_t db_before; // the last timestamp evaluated by the db lookup + + time_t delay_up_to_timestamp; // the timestamp up to which we should delay notifications + int delay_up_current; // the current up notification delay duration + int delay_down_current; // the current down notification delay duration + int delay_last; // the last delay we used + + // ------------------------------------------------------------------------ + // variables this alarm exposes to the rest of the alarms RRDVAR *local; RRDVAR *family; RRDVAR *hostid; RRDVAR *hostname; + // ------------------------------------------------------------------------ + // the chart this alarm it is linked to + struct rrdset *rrdset; + + // linking of this alarm on its chart struct rrdcalc *rrdset_next; struct rrdcalc *rrdset_prev; @@ -182,29 +220,45 @@ typedef struct rrdcalctemplate { uint32_t hash_name; char *exec; + char *recipient; char *context; uint32_t hash_context; - char *source; // the source of this template - char *units; - char *info; + char *source; // the source of this alarm + char *units; // the units of the alarm + char *info; // a short description of the alarm + + int update_every; // update frequency for the alarm - char *dimensions; + // the red and green threshold of this alarm (to be set to the chart) + calculated_number green; + calculated_number red; + + // ------------------------------------------------------------------------ + // database lookup settings + + char *dimensions; // the chart dimensions + int group; // grouping method: average, max, etc. + int before; // ending point in time-series + int after; // starting point in time-series + uint32_t options; // calculation options + + // ------------------------------------------------------------------------ + // notification delay settings - int group; // grouping method: average, max, etc. - int before; // ending point in time-series - int after; // starting point in time-series - uint32_t options; // calculation options - int update_every; // update frequency for the calculation + int delay_up_duration; // duration to delay notifications when alarm raises + int delay_down_duration; // duration to delay notifications when alarm lowers + int delay_max_duration; // the absolute max delay to apply to this alarm + float delay_multiplier; // multiplier for all delays when alarms switch status + + // ------------------------------------------------------------------------ + // expressions related to the alarm EVAL_EXPRESSION *calculation; EVAL_EXPRESSION *warning; EVAL_EXPRESSION *critical; - calculated_number green; - calculated_number red; - struct rrdcalctemplate *next; } RRDCALCTEMPLATE; @@ -216,7 +270,9 @@ typedef struct rrdcalctemplate { #define HEALTH_ENTRY_NOTIFICATIONS_EXEC_FAILED 0x00000008 typedef struct alarm_entry { - uint32_t id; + uint32_t unique_id; + uint32_t alarm_id; + uint32_t alarm_event_id; time_t when; time_t duration; @@ -231,6 +287,8 @@ typedef struct alarm_entry { char *family; char *exec; + char *recipient; + time_t exec_run_timestamp; int exec_code; char *source; @@ -244,12 +302,18 @@ typedef struct alarm_entry { uint32_t notifications; - struct alarm_entry *updated_by; + int delay; + time_t delay_up_to_timestamp; + + uint32_t updated_by_id; + uint32_t updates_id; + struct alarm_entry *next; } ALARM_ENTRY; typedef struct alarm_log { - uint32_t nextid; + uint32_t next_log_id; + uint32_t next_alarm_id; unsigned int count; unsigned int max; ALARM_ENTRY *alarms; @@ -278,6 +342,6 @@ extern void health_reload(void); extern int health_variable_lookup(const char *variable, uint32_t hash, RRDCALC *rc, calculated_number *result); extern void health_alarms2json(RRDHOST *host, BUFFER *wb, int all); -extern void health_alarm_log2json(RRDHOST *host, BUFFER *wb); +extern void health_alarm_log2json(RRDHOST *host, BUFFER *wb, uint32_t after); #endif //NETDATA_HEALTH_H @@ -90,27 +90,28 @@ int open_log_file(int fd, FILE **fp, const char *filename, int *enabled_syslog) void reopen_all_log_files() { if(stdout_filename) - open_log_file(STDOUT_FILENO, &stdout, stdout_filename, &output_log_syslog); + open_log_file(STDOUT_FILENO, (FILE **)&stdout, stdout_filename, &output_log_syslog); if(stderr_filename) - open_log_file(STDERR_FILENO, &stderr, stderr_filename, &error_log_syslog); + open_log_file(STDERR_FILENO, (FILE **)&stderr, stderr_filename, &error_log_syslog); if(stdaccess_filename) - stdaccess_fd = open_log_file(stdaccess_fd, &stdaccess, stdaccess_filename, &access_log_syslog); + stdaccess_fd = open_log_file(stdaccess_fd, (FILE **)&stdaccess, stdaccess_filename, &access_log_syslog); } void open_all_log_files() { // disable stdin - open_log_file(STDIN_FILENO, &stdin, "/dev/null", NULL); + open_log_file(STDIN_FILENO, (FILE **)&stdin, "/dev/null", NULL); - open_log_file(STDOUT_FILENO, &stdout, stdout_filename, &output_log_syslog); - open_log_file(STDERR_FILENO, &stderr, stderr_filename, &error_log_syslog); - stdaccess_fd = open_log_file(stdaccess_fd, &stdaccess, stdaccess_filename, &access_log_syslog); + open_log_file(STDOUT_FILENO, (FILE **)&stdout, stdout_filename, &output_log_syslog); + open_log_file(STDERR_FILENO, (FILE **)&stderr, stderr_filename, &error_log_syslog); + stdaccess_fd = open_log_file(stdaccess_fd, (FILE **)&stdaccess, stdaccess_filename, &access_log_syslog); } // ---------------------------------------------------------------------------- // error log throttling +time_t error_log_throttle_period_backup = 0; time_t error_log_throttle_period = 1200; unsigned long error_log_errors_per_period = 200; @@ -265,6 +266,28 @@ void info_int( const char *file, const char *function, const unsigned long line, // ---------------------------------------------------------------------------- // error log +#if defined(STRERROR_R_CHAR_P) +// GLIBC version of strerror_r +static const char *strerror_result(const char *a, const char *b) { (void)b; return a; } +#elif defined(HAVE_STRERROR_R) +// POSIX version of strerror_r +static const char *strerror_result(int a, const char *b) { (void)a; return b; } +#elif defined(HAVE_C__GENERIC) + +// what a trick! +// http://stackoverflow.com/questions/479207/function-overloading-in-c +static const char *strerror_result_int(int a, const char *b) { (void)a; return b; } +static const char *strerror_result_string(const char *a, const char *b) { (void)b; return a; } + +#define strerror_result(a, b) _Generic((a), \ + int: strerror_result_int, \ + char *: strerror_result_string \ + )(a, b) + +#else +#error "cannot detect the format of function strerror_r()" +#endif + void error_int( const char *prefix, const char *file, const char *function, const unsigned long line, const char *fmt, ... ) { va_list args; @@ -282,7 +305,7 @@ void error_int( const char *prefix, const char *file, const char *function, cons if(errno) { char buf[1024]; - fprintf(stderr, " (errno %d, %s)\n", errno, strerror_r(errno, buf, 1023)); + fprintf(stderr, " (errno %d, %s)\n", errno, strerror_result(strerror_r(errno, buf, 1023), buf)); errno = 0; } else @@ -45,14 +45,14 @@ extern int access_log_syslog; extern int error_log_syslog; extern int output_log_syslog; -extern time_t error_log_throttle_period; +extern time_t error_log_throttle_period, error_log_throttle_period_backup; extern unsigned long error_log_errors_per_period; extern int error_log_limit(int reset); extern void open_all_log_files(); extern void reopen_all_log_files(); -#define error_log_limit_reset() do { error_log_limit(1); } while(0) +#define error_log_limit_reset() do { error_log_throttle_period = error_log_throttle_period_backup; error_log_limit(1); } while(0) #define error_log_limit_unlimited() do { error_log_throttle_period = 0; } while(0) #define debug(type, args...) do { if(unlikely(debug_flags & type)) debug_int(__FILE__, __FUNCTION__, __LINE__, ##args); } while(0) @@ -276,20 +276,16 @@ static const char *verify_required_directory(const char *dir) { int main(int argc, char **argv) { + char *hostname = "localhost"; int i, check_config = 0; int config_loaded = 0; int dont_fork = 0; size_t wanted_stacksize = 0, stacksize = 0; pthread_attr_t attr; - // global initialization - get_HZ(); - // set the name for logging program_name = "netdata"; - // parse command line. - // parse depercated options // TODO: Remove this block with the next major release. { @@ -378,7 +374,7 @@ int main(int argc, char **argv) break; case 'v': // TODO: Outsource version to makefile which can compute version from git. - printf("netdata 1.3.0\n"); + printf("netdata %s\n", VERSION); return 0; case 'W': { @@ -411,6 +407,16 @@ int main(int argc, char **argv) load_config(NULL, 0); { + char *pmax = config_get("global", "glibc malloc arena max for plugins", "1"); + if(pmax && *pmax) + setenv("MALLOC_ARENA_MAX", pmax, 1); + +#if defined(HAVE_C_MALLOPT) + int i = config_get_number("global", "glibc malloc arena max for netdata", 1); + if(i > 0) + mallopt(M_ARENA_MAX, 1); +#endif + char *config_dir = config_get("global", "config directory", CONFIG_DIR); // prepare configuration environment variables for the plugins @@ -471,13 +477,18 @@ int main(int argc, char **argv) global_host_prefix = config_get("global", "host access prefix", ""); setenv("NETDATA_HOST_PREFIX", global_host_prefix, 1); + get_system_HZ(); + get_system_cpus(); + get_system_pid_max(); + // -------------------------------------------------------------------- stdout_filename = config_get("global", "debug log", LOG_DIR "/debug.log"); stderr_filename = config_get("global", "error log", LOG_DIR "/error.log"); stdaccess_filename = config_get("global", "access log", LOG_DIR "/access.log"); - error_log_throttle_period = config_get_number("global", "errors flood protection period", error_log_throttle_period); + error_log_throttle_period_backup = + error_log_throttle_period = config_get_number("global", "errors flood protection period", error_log_throttle_period); setenv("NETDATA_ERRORS_THROTTLE_PERIOD", config_get("global", "errors flood protection period" , ""), 1); error_log_errors_per_period = (unsigned long)config_get_number("global", "errors to trigger flood protection", error_log_errors_per_period); @@ -488,6 +499,7 @@ int main(int argc, char **argv) error_log_throttle_period = 0; error_log_errors_per_period = 0; } + error_log_limit_unlimited(); // -------------------------------------------------------------------- @@ -647,6 +659,11 @@ int main(int argc, char **argv) } // ------------------------------------------------------------------------ + // initialize rrd host + + rrdhost_init(hostname); + + // ------------------------------------------------------------------------ // initialize the registry registry_init(); @@ -660,6 +677,11 @@ int main(int argc, char **argv) exit(1); // ------------------------------------------------------------------------ + // enable log flood protection + + error_log_limit_reset(); + + // ------------------------------------------------------------------------ // spawn the threads web_server_threading_selection(); diff --git a/src/plugin_nfacct.c b/src/plugin_nfacct.c index b2396fac..7843161d 100644 --- a/src/plugin_nfacct.c +++ b/src/plugin_nfacct.c @@ -27,11 +27,7 @@ static int nfacct_callback(const struct nlmsghdr *nlh, void *data) { info("nfacct.plugin: increasing nfacct_list to size %d", size); - nfacct_list = realloc(nfacct_list, sizeof(struct nfacct_list) + (sizeof(struct mynfacct) * size)); - if(!nfacct_list) { - error("nfacct.plugin: cannot allocate nfacct_list."); - return MNL_CB_OK; - } + nfacct_list = reallocz(nfacct_list, sizeof(struct nfacct_list) + (sizeof(struct mynfacct) * size)); nfacct_list->data[len].nfacct = nfacct_alloc(); if(!nfacct_list->data[size - 1].nfacct) { @@ -150,7 +146,7 @@ void *nfacct_main(void *ptr) { st = rrdset_find_bytype("netfilter", "nfacct_packets"); if(!st) { - st = rrdset_create("netfilter", "nfacct_packets", NULL, "nfacct", NULL, "Netfilter Accounting Packets", "packets/s", 1006, rrd_update_every, RRDSET_TYPE_STACKED); + st = rrdset_create("netfilter", "nfacct_packets", NULL, "nfacct", NULL, "Netfilter Accounting Packets", "packets/s", 3206, rrd_update_every, RRDSET_TYPE_STACKED); for(i = 0; i < nfacct_list->len ; i++) rrddim_add(st, nfacct_list->data[i].name, NULL, 1, rrd_update_every, RRDDIM_INCREMENTAL); @@ -170,7 +166,7 @@ void *nfacct_main(void *ptr) { st = rrdset_find_bytype("netfilter", "nfacct_bytes"); if(!st) { - st = rrdset_create("netfilter", "nfacct_bytes", NULL, "nfacct", NULL, "Netfilter Accounting Bandwidth", "kilobytes/s", 1007, rrd_update_every, RRDSET_TYPE_STACKED); + st = rrdset_create("netfilter", "nfacct_bytes", NULL, "nfacct", NULL, "Netfilter Accounting Bandwidth", "kilobytes/s", 3207, rrd_update_every, RRDSET_TYPE_STACKED); for(i = 0; i < nfacct_list->len ; i++) rrddim_add(st, nfacct_list->data[i].name, NULL, 1, 1000 * rrd_update_every, RRDDIM_INCREMENTAL); @@ -192,7 +188,7 @@ void *nfacct_main(void *ptr) { usleep(susec); // copy current to last - bcopy(&now, &last, sizeof(struct timeval)); + memmove(&last, &now, sizeof(struct timeval)); } mnl_socket_close(nl); diff --git a/src/plugin_proc.c b/src/plugin_proc.c index a1bf314d..a50a2251 100644 --- a/src/plugin_proc.c +++ b/src/plugin_proc.c @@ -28,10 +28,12 @@ void *proc_main(void *ptr) int vdo_proc_stat = !config_get_boolean("plugin:proc", "/proc/stat", 1); int vdo_proc_meminfo = !config_get_boolean("plugin:proc", "/proc/meminfo", 1); int vdo_proc_vmstat = !config_get_boolean("plugin:proc", "/proc/vmstat", 1); + int vdo_proc_net_rpc_nfs = !config_get_boolean("plugin:proc", "/proc/net/rpc/nfs", 1); int vdo_proc_net_rpc_nfsd = !config_get_boolean("plugin:proc", "/proc/net/rpc/nfsd", 1); int vdo_proc_sys_kernel_random_entropy_avail = !config_get_boolean("plugin:proc", "/proc/sys/kernel/random/entropy_avail", 1); int vdo_proc_interrupts = !config_get_boolean("plugin:proc", "/proc/interrupts", 1); int vdo_proc_softirqs = !config_get_boolean("plugin:proc", "/proc/softirqs", 1); + int vdo_proc_net_softnet_stat = !config_get_boolean("plugin:proc", "/proc/net/softnet_stat", 1); int vdo_proc_loadavg = !config_get_boolean("plugin:proc", "/proc/loadavg", 1); int vdo_sys_kernel_mm_ksm = !config_get_boolean("plugin:proc", "/sys/kernel/mm/ksm", 1); int vdo_cpu_netdata = !config_get_boolean("plugin:proc", "netdata server resources", 1); @@ -48,10 +50,12 @@ void *proc_main(void *ptr) unsigned long long sutime_proc_stat = 0ULL; unsigned long long sutime_proc_meminfo = 0ULL; unsigned long long sutime_proc_vmstat = 0ULL; + unsigned long long sutime_proc_net_rpc_nfs = 0ULL; unsigned long long sutime_proc_net_rpc_nfsd = 0ULL; unsigned long long sutime_proc_sys_kernel_random_entropy_avail = 0ULL; unsigned long long sutime_proc_interrupts = 0ULL; unsigned long long sutime_proc_softirqs = 0ULL; + unsigned long long sutime_proc_net_softnet_stat = 0ULL; unsigned long long sutime_proc_loadavg = 0ULL; unsigned long long sutime_sys_kernel_mm_ksm = 0ULL; @@ -59,7 +63,7 @@ void *proc_main(void *ptr) unsigned long long sunext = (time(NULL) - (time(NULL) % rrd_update_every) + rrd_update_every) * 1000000ULL; unsigned long long sunow; - for(;1;) { + for(;;) { if(unlikely(netdata_exit)) break; // delay until it is our time to run @@ -107,6 +111,14 @@ void *proc_main(void *ptr) } if(unlikely(netdata_exit)) break; + if(!vdo_proc_net_softnet_stat) { + debug(D_PROCNETDEV_LOOP, "PROCNETDEV: calling do_proc_net_softnet_stat()."); + sunow = time_usec(); + vdo_proc_net_softnet_stat = do_proc_net_softnet_stat(rrd_update_every, (sutime_proc_net_softnet_stat > 0)?sunow - sutime_proc_net_softnet_stat:0ULL); + sutime_proc_net_softnet_stat = sunow; + } + if(unlikely(netdata_exit)) break; + if(!vdo_proc_sys_kernel_random_entropy_avail) { debug(D_PROCNETDEV_LOOP, "PROCNETDEV: calling do_proc_sys_kernel_random_entropy_avail()."); sunow = time_usec(); @@ -211,6 +223,14 @@ void *proc_main(void *ptr) } if(unlikely(netdata_exit)) break; + if(!vdo_proc_net_rpc_nfs) { + debug(D_PROCNETDEV_LOOP, "PROCNETDEV: calling do_proc_net_rpc_nfs()."); + sunow = time_usec(); + vdo_proc_net_rpc_nfs = do_proc_net_rpc_nfs(rrd_update_every, (sutime_proc_net_rpc_nfs > 0)?sunow - sutime_proc_net_rpc_nfs:0ULL); + sutime_proc_net_rpc_nfs = sunow; + } + if(unlikely(netdata_exit)) break; + // END -- the job is done // -------------------------------------------------------------------- @@ -221,6 +241,8 @@ void *proc_main(void *ptr) } } + info("PROC thread exiting"); + pthread_exit(NULL); return NULL; } diff --git a/src/plugin_proc.h b/src/plugin_proc.h index a512e1cd..f72a9970 100644 --- a/src/plugin_proc.h +++ b/src/plugin_proc.h @@ -13,6 +13,7 @@ extern int do_proc_net_ip_vs_stats(int update_every, unsigned long long dt); extern int do_proc_stat(int update_every, unsigned long long dt); extern int do_proc_meminfo(int update_every, unsigned long long dt); extern int do_proc_vmstat(int update_every, unsigned long long dt); +extern int do_proc_net_rpc_nfs(int update_every, unsigned long long dt); extern int do_proc_net_rpc_nfsd(int update_every, unsigned long long dt); extern int do_proc_sys_kernel_random_entropy_avail(int update_every, unsigned long long dt); extern int do_proc_interrupts(int update_every, unsigned long long dt); @@ -20,5 +21,6 @@ extern int do_proc_softirqs(int update_every, unsigned long long dt); extern int do_sys_kernel_mm_ksm(int update_every, unsigned long long dt); extern int do_proc_loadavg(int update_every, unsigned long long dt); extern int do_proc_net_stat_synproxy(int update_every, unsigned long long dt); +extern int do_proc_net_softnet_stat(int update_every, unsigned long long dt); #endif /* NETDATA_PLUGIN_PROC_H */ diff --git a/src/plugin_tc.c b/src/plugin_tc.c index 408069db..399fcd6d 100644 --- a/src/plugin_tc.c +++ b/src/plugin_tc.c @@ -39,6 +39,8 @@ struct tc_class { RRDDIM *rd_bytes; RRDDIM *rd_packets; RRDDIM *rd_dropped; + RRDDIM *rd_tokens; + RRDDIM *rd_ctokens; char name_updated; char updated; // updated bytes @@ -64,10 +66,14 @@ struct tc_device { char enabled_bytes; char enabled_packets; char enabled_dropped; + char enabled_tokens; + char enabled_ctokens; RRDSET *st_bytes; RRDSET *st_packets; RRDSET *st_dropped; + RRDSET *st_tokens; + RRDSET *st_ctokens; avl_tree classes_index; @@ -177,18 +183,20 @@ static inline void tc_device_classes_cleanup(struct tc_device *d) { } static inline void tc_device_commit(struct tc_device *d) { - static int enable_new_interfaces = -1, enable_bytes = -1, enable_packets = -1, enable_dropped = -1; + static int enable_new_interfaces = -1, enable_bytes = -1, enable_packets = -1, enable_dropped = -1, enable_tokens = -1, enable_ctokens = -1; if(unlikely(enable_new_interfaces == -1)) { enable_new_interfaces = config_get_boolean_ondemand("plugin:tc", "enable new interfaces detected at runtime", CONFIG_ONDEMAND_YES); enable_bytes = config_get_boolean_ondemand("plugin:tc", "enable traffic charts for all interfaces", CONFIG_ONDEMAND_ONDEMAND); enable_packets = config_get_boolean_ondemand("plugin:tc", "enable packets charts for all interfaces", CONFIG_ONDEMAND_ONDEMAND); enable_dropped = config_get_boolean_ondemand("plugin:tc", "enable dropped charts for all interfaces", CONFIG_ONDEMAND_ONDEMAND); + enable_tokens = config_get_boolean_ondemand("plugin:tc", "enable tokens charts for all interfaces", CONFIG_ONDEMAND_NO); + enable_ctokens = config_get_boolean_ondemand("plugin:tc", "enable ctokens charts for all interfaces", CONFIG_ONDEMAND_NO); } // we only need to add leaf classes struct tc_class *c, *x; - unsigned long long bytes_sum = 0, packets_sum = 0, dropped_sum = 0; + unsigned long long bytes_sum = 0, packets_sum = 0, dropped_sum = 0, tokens_sum = 0, ctokens_sum = 0; int active_classes = 0; // set all classes @@ -216,23 +224,26 @@ static inline void tc_device_commit(struct tc_device *d) { } } - // debugging: + // debugging only /* - for ( c = d->classes ; c ; c = c->next) { - if(c->isleaf && c->hasparent) debug(D_TC_LOOP, "TC: Device %s, class %s, OK", d->name, c->id); - else debug(D_TC_LOOP, "TC: Device %s, class %s, IGNORE (isleaf: %d, hasparent: %d, parent: %s)", d->name, c->id, c->isleaf, c->hasparent, c->parentid); + if(unlikely(debug_flags & D_TC_LOOP)) { + for(c = d->classes ; c ; c = c->next) { + if(c->isleaf && c->hasparent) debug(D_TC_LOOP, "TC: Device '%s', class %s, OK", d->name, c->id); + else debug(D_TC_LOOP, "TC: Device '%s', class %s, IGNORE (isleaf: %d, hasparent: %d, parent: %s)", d->name?d->name:d->id, c->id, c->isleaf, c->hasparent, c->parentid?c->parentid:"(unset)"); + } } */ // we need at least a class for(c = d->classes ; c ; c = c->next) { // debug(D_TC_LOOP, "TC: Device '%s', class '%s', isLeaf=%d, HasParent=%d, Seen=%d", d->name?d->name:d->id, c->name?c->name:c->id, c->isleaf, c->hasparent, c->seen); - if(!c->updated) continue; - if(c->isleaf && c->hasparent) { + if(unlikely(c->updated && c->isleaf && c->hasparent)) { active_classes++; bytes_sum += c->bytes; packets_sum += c->packets; dropped_sum += c->dropped; + tokens_sum += c->tokens; + ctokens_sum += c->ctokens; } } @@ -242,7 +253,7 @@ static inline void tc_device_commit(struct tc_device *d) { return; } - if(unlikely(d->enabled == -1)) { + if(unlikely(d->enabled == (char)-1)) { char var_name[CONFIG_MAX_NAME + 1]; snprintfz(var_name, CONFIG_MAX_NAME, "qos for %s", d->id); d->enabled = config_get_boolean_ondemand("plugin:tc", var_name, enable_new_interfaces); @@ -255,8 +266,30 @@ static inline void tc_device_commit(struct tc_device *d) { snprintfz(var_name, CONFIG_MAX_NAME, "dropped packets chart for %s", d->id); d->enabled_dropped = config_get_boolean_ondemand("plugin:tc", var_name, enable_dropped); + + snprintfz(var_name, CONFIG_MAX_NAME, "tokens chart for %s", d->id); + d->enabled_tokens = config_get_boolean_ondemand("plugin:tc", var_name, enable_tokens); + + snprintfz(var_name, CONFIG_MAX_NAME, "ctokens chart for %s", d->id); + d->enabled_ctokens = config_get_boolean_ondemand("plugin:tc", var_name, enable_ctokens); } + debug(D_TC_LOOP, "TC: evaluating TC device '%s'. enabled = %d/%d (bytes: %d/%d, packets: %d/%d, dropped: %d/%d, tokens: %d/%d, ctokens: %d/%d), classes = %d (bytes = %llu, packets = %llu, dropped = %llu, tokens = %llu, ctokens = %llu).", + d->name?d->name:d->id, + d->enabled, enable_new_interfaces, + d->enabled_bytes, enable_bytes, + d->enabled_packets, enable_packets, + d->enabled_dropped, enable_dropped, + d->enabled_tokens, enable_tokens, + d->enabled_ctokens, enable_ctokens, + active_classes, + bytes_sum, + packets_sum, + dropped_sum, + tokens_sum, + ctokens_sum + ); + if(likely(d->enabled)) { // -------------------------------------------------------------------- // bytes @@ -421,6 +454,114 @@ static inline void tc_device_commit(struct tc_device *d) { } rrdset_done(d->st_dropped); } + + // -------------------------------------------------------------------- + // tokens + + if(d->enabled_tokens == CONFIG_ONDEMAND_YES || (d->enabled_tokens == CONFIG_ONDEMAND_ONDEMAND && tokens_sum)) { + d->enabled_tokens = CONFIG_ONDEMAND_YES; + + if(unlikely(!d->st_tokens)) { + char id[RRD_ID_LENGTH_MAX + 1]; + char name[RRD_ID_LENGTH_MAX + 1]; + snprintfz(id, RRD_ID_LENGTH_MAX, "%s_tokens", d->id); + snprintfz(name, RRD_ID_LENGTH_MAX, "%s_tokens", d->name?d->name:d->id); + + d->st_tokens = rrdset_find_bytype(RRD_TYPE_TC, id); + if(unlikely(!d->st_tokens)) { + debug(D_TC_LOOP, "TC: Creating new _tokens chart for device '%s'", d->name?d->name:d->id); + d->st_tokens = rrdset_create(RRD_TYPE_TC, id, name, d->family?d->family:d->id, RRD_TYPE_TC ".qos_tokens", "Class Tokens", "tokens", 7030, rrd_update_every, RRDSET_TYPE_LINE); + } + } + else { + debug(D_TC_LOOP, "TC: Updating _tokens chart for device '%s'", d->name?d->name:d->id); + rrdset_next_plugins(d->st_tokens); + + // FIXME + // update the family + } + + for(c = d->classes ; c ; c = c->next) { + if(unlikely(!c->updated)) continue; + + if(c->isleaf && c->hasparent) { + if(unlikely(!c->rd_tokens)) { + c->rd_tokens = rrddim_find(d->st_tokens, c->id); + if(unlikely(!c->rd_tokens)) { + debug(D_TC_LOOP, "TC: Adding to chart '%s', dimension '%s' (name: '%s')", d->st_tokens->id, c->id, c->name); + + // new class, we have to add it + c->rd_tokens = rrddim_add(d->st_tokens, c->id, c->name?c->name:c->id, 1, 1, RRDDIM_ABSOLUTE); + } + else debug(D_TC_LOOP, "TC: Updating chart '%s', dimension '%s'", d->st_tokens->id, c->id); + } + + rrddim_set_by_pointer(d->st_tokens, c->rd_tokens, c->tokens); + + // if it has a name, different to the id + if(unlikely(c->name_updated && c->name && strcmp(c->id, c->name) != 0)) { + // update the rrd dimension with the new name + debug(D_TC_LOOP, "TC: Setting chart '%s', dimension '%s' name to '%s'", d->st_tokens->id, c->rd_tokens->id, c->name); + rrddim_set_name(d->st_tokens, c->rd_tokens, c->name); + } + } + } + rrdset_done(d->st_tokens); + } + + // -------------------------------------------------------------------- + // ctokens + + if(d->enabled_ctokens == CONFIG_ONDEMAND_YES || (d->enabled_ctokens == CONFIG_ONDEMAND_ONDEMAND && ctokens_sum)) { + d->enabled_ctokens = CONFIG_ONDEMAND_YES; + + if(unlikely(!d->st_ctokens)) { + char id[RRD_ID_LENGTH_MAX + 1]; + char name[RRD_ID_LENGTH_MAX + 1]; + snprintfz(id, RRD_ID_LENGTH_MAX, "%s_ctokens", d->id); + snprintfz(name, RRD_ID_LENGTH_MAX, "%s_ctokens", d->name?d->name:d->id); + + d->st_ctokens = rrdset_find_bytype(RRD_TYPE_TC, id); + if(unlikely(!d->st_ctokens)) { + debug(D_TC_LOOP, "TC: Creating new _ctokens chart for device '%s'", d->name?d->name:d->id); + d->st_ctokens = rrdset_create(RRD_TYPE_TC, id, name, d->family?d->family:d->id, RRD_TYPE_TC ".qos_ctokens", "Class cTokens", "ctokens", 7040, rrd_update_every, RRDSET_TYPE_LINE); + } + } + else { + debug(D_TC_LOOP, "TC: Updating _ctokens chart for device '%s'", d->name?d->name:d->id); + rrdset_next_plugins(d->st_ctokens); + + // FIXME + // update the family + } + + for(c = d->classes ; c ; c = c->next) { + if(unlikely(!c->updated)) continue; + + if(c->isleaf && c->hasparent) { + if(unlikely(!c->rd_ctokens)) { + c->rd_ctokens = rrddim_find(d->st_ctokens, c->id); + if(unlikely(!c->rd_ctokens)) { + debug(D_TC_LOOP, "TC: Adding to chart '%s', dimension '%s' (name: '%s')", d->st_ctokens->id, c->id, c->name); + + // new class, we have to add it + c->rd_ctokens = rrddim_add(d->st_ctokens, c->id, c->name?c->name:c->id, 1, 1, RRDDIM_ABSOLUTE); + } + else debug(D_TC_LOOP, "TC: Updating chart '%s', dimension '%s'", d->st_ctokens->id, c->id); + } + + rrddim_set_by_pointer(d->st_ctokens, c->rd_ctokens, c->ctokens); + + // if it has a name, different to the id + if(unlikely(c->name_updated && c->name && strcmp(c->id, c->name) != 0)) { + // update the rrd dimension with the new name + debug(D_TC_LOOP, "TC: Setting chart '%s', dimension '%s' name to '%s'", d->st_ctokens->id, c->rd_ctokens->id, c->name); + rrddim_set_name(d->st_ctokens, c->rd_ctokens, c->name); + } + } + } + rrdset_done(d->st_ctokens); + } } tc_device_classes_cleanup(d); @@ -475,7 +616,7 @@ static inline struct tc_device *tc_device_create(char *id) d->id = strdupz(id); d->hash = simple_hash(d->id); - d->enabled = -1; + d->enabled = (char)-1; avl_init(&d->classes_index, tc_class_compare); tc_device_index_add(d); diff --git a/src/plugins_d.c b/src/plugins_d.c index 627cc90e..0030e221 100644 --- a/src/plugins_d.c +++ b/src/plugins_d.c @@ -109,7 +109,7 @@ void *pluginsd_worker_thread(void *arg) size_t count = 0; - while(likely(1)) { + for(;;) { if(unlikely(netdata_exit)) break; FILE *fp = mypopen(cd->cmd, &cd->pid); @@ -358,7 +358,7 @@ void *pluginsd_worker_thread(void *arg) error("PLUGINSD: %s sleeping for %llu. Will kill with SIGCONT pid %d to wake it up.\n", cd->fullfilename, susec, cd->pid); usleep(susec); killpid(cd->pid, SIGCONT); - bcopy(&now, &last, sizeof(struct timeval)); + memmove(&last, &now, sizeof(struct timeval)); break; } #endif @@ -380,16 +380,9 @@ void *pluginsd_worker_thread(void *arg) // get the return code int code = mypclose(fp, cd->pid); - - if(netdata_exit) { - cd->pid = 0; - cd->enabled = 0; - cd->obsolete = 1; - pthread_exit(NULL); - return NULL; - } - - if(code != 0) { + + if(unlikely(netdata_exit)) break; + else if(code != 0) { // the plugin reports failure if(likely(!cd->successful_collections)) { @@ -430,10 +423,11 @@ void *pluginsd_worker_thread(void *arg) } cd->pid = 0; - if(unlikely(!cd->enabled)) - break; + if(unlikely(!cd->enabled)) break; } + info("PLUGINSD: '%s' thread exiting", cd->fullfilename); + cd->obsolete = 1; pthread_exit(NULL); return NULL; @@ -462,7 +456,7 @@ void *pluginsd_main(void *ptr) { if(scan_frequency < 1) scan_frequency = 1; - while(likely(1)) { + for(;;) { if(unlikely(netdata_exit)) break; dir = opendir(dir_name); @@ -542,6 +536,8 @@ void *pluginsd_main(void *ptr) { sleep((unsigned int) scan_frequency); } + info("PLUGINS.D thread exiting"); + pthread_exit(NULL); return NULL; } diff --git a/src/popen.c b/src/popen.c index 193efc0f..ad8d7596 100644 --- a/src/popen.c +++ b/src/popen.c @@ -147,6 +147,13 @@ int mypclose(FILE *fp, pid_t pid) { debug(D_EXIT, "Request to mypclose() on pid %d", pid); /*mypopen_del(fp);*/ + + // close the pipe fd + // this is required in musl + // without it the childs do not exit + close(fileno(fp)); + + // close the pipe file pointer fclose(fp); siginfo_t info; diff --git a/src/proc_interrupts.c b/src/proc_interrupts.c index fe0e9b1a..f277a5a9 100644 --- a/src/proc_interrupts.c +++ b/src/proc_interrupts.c @@ -21,8 +21,7 @@ static inline struct interrupt *get_interrupts_array(int lines, int cpus) { static struct interrupt *irrs = NULL; static int allocated = 0; - if(lines < allocated) return irrs; - else { + if(lines > allocated) { irrs = (struct interrupt *)reallocz(irrs, lines * recordsize(cpus)); allocated = lines; } @@ -143,15 +142,14 @@ int do_proc_interrupts(int update_every, unsigned long long dt) { int c; for(c = 0; c < cpus ; c++) { - char id[256+1]; - snprintfz(id, 256, "cpu%d_interrupts", c); + char id[50+1]; + snprintfz(id, 50, "cpu%d_interrupts", c); st = rrdset_find_bytype("cpu", id); if(!st) { - char name[256+1], title[256+1]; - snprintfz(name, 256, "cpu%d_interrupts", c); - snprintfz(title, 256, "CPU%d Interrupts", c); - st = rrdset_create("cpu", id, name, "interrupts", "cpu.interrupts", title, "interrupts/s", 2000 + c, update_every, RRDSET_TYPE_STACKED); + char title[100+1]; + snprintfz(title, 100, "CPU%d Interrupts", c); + st = rrdset_create("cpu", id, NULL, "interrupts", "cpu.interrupts", title, "interrupts/s", 1100 + c, update_every, RRDSET_TYPE_STACKED); for(l = 0; l < lines ;l++) { struct interrupt *irr = irrindex(irrs, l, cpus); diff --git a/src/proc_meminfo.c b/src/proc_meminfo.c index 4295cd6d..999c9538 100644 --- a/src/proc_meminfo.c +++ b/src/proc_meminfo.c @@ -9,14 +9,14 @@ int do_proc_meminfo(int update_every, unsigned long long dt) { static int do_ram = -1, do_swap = -1, do_hwcorrupt = -1, do_committed = -1, do_writeback = -1, do_kernel = -1, do_slab = -1; if(do_ram == -1) do_ram = config_get_boolean("plugin:proc:/proc/meminfo", "system ram", 1); - if(do_swap == -1) do_swap = config_get_boolean("plugin:proc:/proc/meminfo", "system swap", 1); + if(do_swap == -1) do_swap = config_get_boolean_ondemand("plugin:proc:/proc/meminfo", "system swap", CONFIG_ONDEMAND_ONDEMAND); if(do_hwcorrupt == -1) do_hwcorrupt = config_get_boolean_ondemand("plugin:proc:/proc/meminfo", "hardware corrupted ECC", CONFIG_ONDEMAND_ONDEMAND); if(do_committed == -1) do_committed = config_get_boolean("plugin:proc:/proc/meminfo", "committed memory", 1); if(do_writeback == -1) do_writeback = config_get_boolean("plugin:proc:/proc/meminfo", "writeback memory", 1); if(do_kernel == -1) do_kernel = config_get_boolean("plugin:proc:/proc/meminfo", "kernel memory", 1); if(do_slab == -1) do_slab = config_get_boolean("plugin:proc:/proc/meminfo", "slab memory", 1); - if(dt) {}; + (void)dt; if(!ff) { char filename[FILENAME_MAX + 1]; @@ -105,15 +105,15 @@ int do_proc_meminfo(int update_every, unsigned long long dt) { if(!st) { st = rrdset_create("system", "ram", NULL, "ram", NULL, "System RAM", "MB", 200, update_every, RRDSET_TYPE_STACKED); - rrddim_add(st, "buffers", NULL, 1, 1024, RRDDIM_ABSOLUTE); + rrddim_add(st, "free", NULL, 1, 1024, RRDDIM_ABSOLUTE); rrddim_add(st, "used", NULL, 1, 1024, RRDDIM_ABSOLUTE); rrddim_add(st, "cached", NULL, 1, 1024, RRDDIM_ABSOLUTE); - rrddim_add(st, "free", NULL, 1, 1024, RRDDIM_ABSOLUTE); + rrddim_add(st, "buffers", NULL, 1, 1024, RRDDIM_ABSOLUTE); } else rrdset_next(st); - rrddim_set(st, "used", MemUsed); rrddim_set(st, "free", MemFree); + rrddim_set(st, "used", MemUsed); rrddim_set(st, "cached", Cached); rrddim_set(st, "buffers", Buffers); rrdset_done(st); @@ -123,7 +123,9 @@ int do_proc_meminfo(int update_every, unsigned long long dt) { unsigned long long SwapUsed = SwapTotal - SwapFree; - if(do_swap) { + if(SwapTotal || SwapUsed || SwapFree || do_swap == CONFIG_ONDEMAND_YES) { + do_swap = CONFIG_ONDEMAND_YES; + st = rrdset_find("system.swap"); if(!st) { st = rrdset_create("system", "swap", NULL, "swap", NULL, "System Swap", "MB", 201, update_every, RRDSET_TYPE_STACKED); diff --git a/src/proc_net_ip_vs_stats.c b/src/proc_net_ip_vs_stats.c index 96efd792..de3e0e46 100644 --- a/src/proc_net_ip_vs_stats.c +++ b/src/proc_net_ip_vs_stats.c @@ -44,7 +44,7 @@ int do_proc_net_ip_vs_stats(int update_every, unsigned long long dt) { if(do_sockets) { st = rrdset_find(RRD_TYPE_NET_IPVS ".sockets"); if(!st) { - st = rrdset_create(RRD_TYPE_NET_IPVS, "sockets", NULL, RRD_TYPE_NET_IPVS, NULL, "IPVS New Connections", "connections/s", 1001, update_every, RRDSET_TYPE_LINE); + st = rrdset_create(RRD_TYPE_NET_IPVS, "sockets", NULL, RRD_TYPE_NET_IPVS, NULL, "IPVS New Connections", "connections/s", 3101, update_every, RRDSET_TYPE_LINE); rrddim_add(st, "connections", NULL, 1, 1, RRDDIM_INCREMENTAL); } @@ -59,7 +59,7 @@ int do_proc_net_ip_vs_stats(int update_every, unsigned long long dt) { if(do_packets) { st = rrdset_find(RRD_TYPE_NET_IPVS ".packets"); if(!st) { - st = rrdset_create(RRD_TYPE_NET_IPVS, "packets", NULL, RRD_TYPE_NET_IPVS, NULL, "IPVS Packets", "packets/s", 1002, update_every, RRDSET_TYPE_LINE); + st = rrdset_create(RRD_TYPE_NET_IPVS, "packets", NULL, RRD_TYPE_NET_IPVS, NULL, "IPVS Packets", "packets/s", 3102, update_every, RRDSET_TYPE_LINE); rrddim_add(st, "received", NULL, 1, 1, RRDDIM_INCREMENTAL); rrddim_add(st, "sent", NULL, -1, 1, RRDDIM_INCREMENTAL); @@ -76,7 +76,7 @@ int do_proc_net_ip_vs_stats(int update_every, unsigned long long dt) { if(do_bandwidth) { st = rrdset_find(RRD_TYPE_NET_IPVS ".net"); if(!st) { - st = rrdset_create(RRD_TYPE_NET_IPVS, "net", NULL, RRD_TYPE_NET_IPVS, NULL, "IPVS Bandwidth", "kilobits/s", 1000, update_every, RRDSET_TYPE_AREA); + st = rrdset_create(RRD_TYPE_NET_IPVS, "net", NULL, RRD_TYPE_NET_IPVS, NULL, "IPVS Bandwidth", "kilobits/s", 3100, update_every, RRDSET_TYPE_AREA); rrddim_add(st, "received", NULL, 8, 1024, RRDDIM_INCREMENTAL); rrddim_add(st, "sent", NULL, -8, 1024, RRDDIM_INCREMENTAL); diff --git a/src/proc_net_netstat.c b/src/proc_net_netstat.c index fe1c4d97..ea38acf2 100644 --- a/src/proc_net_netstat.c +++ b/src/proc_net_netstat.c @@ -1,177 +1,838 @@ #include "common.h" + +struct netstat_columns { + char *name; + uint32_t hash; + unsigned long long value; + int multiplier; // not needed everywhere + char *label; // not needed everywhere +}; + +static struct netstat_columns tcpext_data[] = { + { "SyncookiesSent", 0, 0, 1, NULL }, + { "SyncookiesRecv", 0, 0, 1, NULL }, + { "SyncookiesFailed", 0, 0, 1, NULL }, + { "EmbryonicRsts", 0, 0, 1, NULL }, + { "PruneCalled", 0, 0, 1, NULL }, + { "RcvPruned", 0, 0, 1, NULL }, + { "OfoPruned", 0, 0, 1, NULL }, + { "OutOfWindowIcmps", 0, 0, 1, NULL }, + { "LockDroppedIcmps", 0, 0, 1, NULL }, + { "ArpFilter", 0, 0, 1, NULL }, + { "TW", 0, 0, 1, NULL }, + { "TWRecycled", 0, 0, 1, NULL }, + { "TWKilled", 0, 0, 1, NULL }, + { "PAWSPassive", 0, 0, 1, NULL }, + { "PAWSActive", 0, 0, 1, NULL }, + { "PAWSEstab", 0, 0, 1, NULL }, + { "DelayedACKs", 0, 0, 1, NULL }, + { "DelayedACKLocked", 0, 0, 1, NULL }, + { "DelayedACKLost", 0, 0, 1, NULL }, + { "ListenOverflows", 0, 0, 1, NULL }, + { "ListenDrops", 0, 0, 1, NULL }, + { "TCPPrequeued", 0, 0, 1, NULL }, + { "TCPDirectCopyFromBacklog", 0, 0, 1, NULL }, + { "TCPDirectCopyFromPrequeue", 0, 0, 1, NULL }, + { "TCPPrequeueDropped", 0, 0, 1, NULL }, + { "TCPHPHits", 0, 0, 1, NULL }, + { "TCPHPHitsToUser", 0, 0, 1, NULL }, + { "TCPPureAcks", 0, 0, 1, NULL }, + { "TCPHPAcks", 0, 0, 1, NULL }, + { "TCPRenoRecovery", 0, 0, 1, NULL }, + { "TCPSackRecovery", 0, 0, 1, NULL }, + { "TCPSACKReneging", 0, 0, 1, NULL }, + { "TCPFACKReorder", 0, 0, 1, NULL }, + { "TCPSACKReorder", 0, 0, 1, NULL }, + { "TCPRenoReorder", 0, 0, 1, NULL }, + { "TCPTSReorder", 0, 0, 1, NULL }, + { "TCPFullUndo", 0, 0, 1, NULL }, + { "TCPPartialUndo", 0, 0, 1, NULL }, + { "TCPDSACKUndo", 0, 0, 1, NULL }, + { "TCPLossUndo", 0, 0, 1, NULL }, + { "TCPLostRetransmit", 0, 0, 1, NULL }, + { "TCPRenoFailures", 0, 0, 1, NULL }, + { "TCPSackFailures", 0, 0, 1, NULL }, + { "TCPLossFailures", 0, 0, 1, NULL }, + { "TCPFastRetrans", 0, 0, 1, NULL }, + { "TCPForwardRetrans", 0, 0, 1, NULL }, + { "TCPSlowStartRetrans", 0, 0, 1, NULL }, + { "TCPTimeouts", 0, 0, 1, NULL }, + { "TCPLossProbes", 0, 0, 1, NULL }, + { "TCPLossProbeRecovery", 0, 0, 1, NULL }, + { "TCPRenoRecoveryFail", 0, 0, 1, NULL }, + { "TCPSackRecoveryFail", 0, 0, 1, NULL }, + { "TCPSchedulerFailed", 0, 0, 1, NULL }, + { "TCPRcvCollapsed", 0, 0, 1, NULL }, + { "TCPDSACKOldSent", 0, 0, 1, NULL }, + { "TCPDSACKOfoSent", 0, 0, 1, NULL }, + { "TCPDSACKRecv", 0, 0, 1, NULL }, + { "TCPDSACKOfoRecv", 0, 0, 1, NULL }, + { "TCPAbortOnData", 0, 0, 1, NULL }, + { "TCPAbortOnClose", 0, 0, 1, NULL }, + { "TCPAbortOnMemory", 0, 0, 1, NULL }, + { "TCPAbortOnTimeout", 0, 0, 1, NULL }, + { "TCPAbortOnLinger", 0, 0, 1, NULL }, + { "TCPAbortFailed", 0, 0, 1, NULL }, + { "TCPMemoryPressures", 0, 0, 1, NULL }, + { "TCPSACKDiscard", 0, 0, 1, NULL }, + { "TCPDSACKIgnoredOld", 0, 0, 1, NULL }, + { "TCPDSACKIgnoredNoUndo", 0, 0, 1, NULL }, + { "TCPSpuriousRTOs", 0, 0, 1, NULL }, + { "TCPMD5NotFound", 0, 0, 1, NULL }, + { "TCPMD5Unexpected", 0, 0, 1, NULL }, + { "TCPSackShifted", 0, 0, 1, NULL }, + { "TCPSackMerged", 0, 0, 1, NULL }, + { "TCPSackShiftFallback", 0, 0, 1, NULL }, + { "TCPBacklogDrop", 0, 0, 1, NULL }, + { "TCPMinTTLDrop", 0, 0, 1, NULL }, + { "TCPDeferAcceptDrop", 0, 0, 1, NULL }, + { "IPReversePathFilter", 0, 0, 1, NULL }, + { "TCPTimeWaitOverflow", 0, 0, 1, NULL }, + { "TCPReqQFullDoCookies", 0, 0, 1, NULL }, + { "TCPReqQFullDrop", 0, 0, 1, NULL }, + { "TCPRetransFail", 0, 0, 1, NULL }, + { "TCPRcvCoalesce", 0, 0, 1, NULL }, + { "TCPOFOQueue", 0, 0, 1, NULL }, + { "TCPOFODrop", 0, 0, 1, NULL }, + { "TCPOFOMerge", 0, 0, 1, NULL }, + { "TCPChallengeACK", 0, 0, 1, NULL }, + { "TCPSYNChallenge", 0, 0, 1, NULL }, + { "TCPFastOpenActive", 0, 0, 1, NULL }, + { "TCPFastOpenActiveFail", 0, 0, 1, NULL }, + { "TCPFastOpenPassive", 0, 0, 1, NULL }, + { "TCPFastOpenPassiveFail", 0, 0, 1, NULL }, + { "TCPFastOpenListenOverflow", 0, 0, 1, NULL }, + { "TCPFastOpenCookieReqd", 0, 0, 1, NULL }, + { "TCPSpuriousRtxHostQueues", 0, 0, 1, NULL }, + { "BusyPollRxPackets", 0, 0, 1, NULL }, + { "TCPAutoCorking", 0, 0, 1, NULL }, + { "TCPFromZeroWindowAdv", 0, 0, 1, NULL }, + { "TCPToZeroWindowAdv", 0, 0, 1, NULL }, + { "TCPWantZeroWindowAdv", 0, 0, 1, NULL }, + { "TCPSynRetrans", 0, 0, 1, NULL }, + { "TCPOrigDataSent", 0, 0, 1, NULL }, + { "TCPHystartTrainDetect", 0, 0, 1, NULL }, + { "TCPHystartTrainCwnd", 0, 0, 1, NULL }, + { "TCPHystartDelayDetect", 0, 0, 1, NULL }, + { "TCPHystartDelayCwnd", 0, 0, 1, NULL }, + { "TCPACKSkippedSynRecv", 0, 0, 1, NULL }, + { "TCPACKSkippedPAWS", 0, 0, 1, NULL }, + { "TCPACKSkippedSeq", 0, 0, 1, NULL }, + { "TCPACKSkippedFinWait2", 0, 0, 1, NULL }, + { "TCPACKSkippedTimeWait", 0, 0, 1, NULL }, + { "TCPACKSkippedChallenge", 0, 0, 1, NULL }, + { "TCPWinProbe", 0, 0, 1, NULL }, + { "TCPKeepAlive", 0, 0, 1, NULL }, + { "TCPMTUPFail", 0, 0, 1, NULL }, + { "TCPMTUPSuccess", 0, 0, 1, NULL }, + { NULL, 0, 0, 0, NULL } +}; + +static struct netstat_columns ipext_data[] = { + { "InNoRoutes", 0, 0, 1, NULL }, + { "InTruncatedPkts", 0, 0, 1, NULL }, + { "InMcastPkts", 0, 0, 1, NULL }, + { "OutMcastPkts", 0, 0, 1, NULL }, + { "InBcastPkts", 0, 0, 1, NULL }, + { "OutBcastPkts", 0, 0, 1, NULL }, + { "InOctets", 0, 0, 1, NULL }, + { "OutOctets", 0, 0, 1, NULL }, + { "InMcastOctets", 0, 0, 1, NULL }, + { "OutMcastOctets", 0, 0, 1, NULL }, + { "InBcastOctets", 0, 0, 1, NULL }, + { "OutBcastOctets", 0, 0, 1, NULL }, + { "InCsumErrors", 0, 0, 1, NULL }, + { "InNoECTPkts", 0, 0, 1, NULL }, + { "InECT1Pkts", 0, 0, 1, NULL }, + { "InECT0Pkts", 0, 0, 1, NULL }, + { "InCEPkts", 0, 0, 1, NULL }, + { NULL, 0, 0, 0, NULL } +}; + +static void hash_array(struct netstat_columns *nc) { + int i; + + for(i = 0; nc[i].name ;i++) + nc[i].hash = simple_hash(nc[i].name); +} + +static unsigned long long *netstat_columns_find(struct netstat_columns *nc, const char *name) { + uint32_t i, hash = simple_hash(name); + + for(i = 0; nc[i].name ;i++) + if(unlikely(nc[i].hash == hash && !strcmp(nc[i].name, name))) + return &nc[i].value; + + fatal("Cannot find key '%s' in /proc/net/netstat internal array.", name); +} + +static void parse_line_pair(procfile *ff, struct netstat_columns *nc, uint32_t header_line, uint32_t values_line) { + uint32_t hwords = procfile_linewords(ff, header_line); + uint32_t vwords = procfile_linewords(ff, values_line); + uint32_t w, i; + + if(unlikely(vwords > hwords)) { + error("File /proc/net/netstat on header line %u has %u words, but on value line %u has %u words.", header_line, hwords, values_line, vwords); + vwords = hwords; + } + + for(w = 1; w < vwords ;w++) { + char *key = procfile_lineword(ff, header_line, w); + uint32_t hash = simple_hash(key); + + for(i = 0 ; nc[i].name ;i++) { + if(unlikely(hash == nc[i].hash && !strcmp(key, nc[i].name))) { + nc[i].value = strtoull(procfile_lineword(ff, values_line, w), NULL, 10); + break; + } + } + } +} + + int do_proc_net_netstat(int update_every, unsigned long long dt) { - static int do_bandwidth = -1, do_inerrors = -1, do_mcast = -1, do_bcast = -1, do_mcast_p = -1, do_bcast_p = -1; + (void)dt; + + static int do_bandwidth = -1, do_inerrors = -1, do_mcast = -1, do_bcast = -1, do_mcast_p = -1, do_bcast_p = -1, do_ecn = -1, \ + do_tcpext_reorder = -1, do_tcpext_syscookies = -1, do_tcpext_ofo = -1, do_tcpext_connaborts = -1, do_tcpext_memory = -1; + static uint32_t hash_ipext = 0, hash_tcpext = 0; static procfile *ff = NULL; - if(do_bandwidth == -1) do_bandwidth = config_get_boolean_ondemand("plugin:proc:/proc/net/netstat", "bandwidth", CONFIG_ONDEMAND_ONDEMAND); - if(do_inerrors == -1) do_inerrors = config_get_boolean_ondemand("plugin:proc:/proc/net/netstat", "input errors", CONFIG_ONDEMAND_ONDEMAND); - if(do_mcast == -1) do_mcast = config_get_boolean_ondemand("plugin:proc:/proc/net/netstat", "multicast bandwidth", CONFIG_ONDEMAND_ONDEMAND); - if(do_bcast == -1) do_bcast = config_get_boolean_ondemand("plugin:proc:/proc/net/netstat", "broadcast bandwidth", CONFIG_ONDEMAND_ONDEMAND); - if(do_mcast_p == -1) do_mcast_p = config_get_boolean_ondemand("plugin:proc:/proc/net/netstat", "multicast packets", CONFIG_ONDEMAND_ONDEMAND); - if(do_bcast_p == -1) do_bcast_p = config_get_boolean_ondemand("plugin:proc:/proc/net/netstat", "broadcast packets", CONFIG_ONDEMAND_ONDEMAND); + static unsigned long long *tcpext_TCPRenoReorder = NULL; + static unsigned long long *tcpext_TCPFACKReorder = NULL; + static unsigned long long *tcpext_TCPSACKReorder = NULL; + static unsigned long long *tcpext_TCPTSReorder = NULL; + + static unsigned long long *tcpext_SyncookiesSent = NULL; + static unsigned long long *tcpext_SyncookiesRecv = NULL; + static unsigned long long *tcpext_SyncookiesFailed = NULL; - if(dt) {}; + static unsigned long long *tcpext_TCPOFOQueue = NULL; // Number of packets queued in OFO queue + static unsigned long long *tcpext_TCPOFODrop = NULL; // Number of packets meant to be queued in OFO but dropped because socket rcvbuf limit hit. + static unsigned long long *tcpext_TCPOFOMerge = NULL; // Number of packets in OFO that were merged with other packets. + static unsigned long long *tcpext_OfoPruned = NULL; // packets dropped from out-of-order queue because of socket buffer overrun + + static unsigned long long *tcpext_TCPAbortOnData = NULL; // connections reset due to unexpected data + static unsigned long long *tcpext_TCPAbortOnClose = NULL; // connections reset due to early user close + static unsigned long long *tcpext_TCPAbortOnMemory = NULL; // connections aborted due to memory pressure + static unsigned long long *tcpext_TCPAbortOnTimeout = NULL; // connections aborted due to timeout + static unsigned long long *tcpext_TCPAbortOnLinger = NULL; // connections aborted after user close in linger timeout + static unsigned long long *tcpext_TCPAbortFailed = NULL; // times unable to send RST due to no memory + + static unsigned long long *tcpext_TCPMemoryPressures = NULL; + +/* + // connection rejects + static unsigned long long *tcpext_PAWSActive = NULL; // active connections rejected because of time stamp + static unsigned long long *tcpext_PAWSPassive = NULL; // passive connections rejected because of time stamp + + static unsigned long long *tcpext_TCPTimeouts = NULL; + + static unsigned long long *tcpext_TCPDSACKUndo = NULL; + static unsigned long long *tcpext_TCPDSACKOldSent = NULL; + static unsigned long long *tcpext_TCPDSACKOfoSent = NULL; + static unsigned long long *tcpext_TCPDSACKRecv = NULL; + static unsigned long long *tcpext_TCPDSACKOfoRecv = NULL; + static unsigned long long *tcpext_TCPDSACKIgnoredOld = NULL; + static unsigned long long *tcpext_TCPDSACKIgnoredNoUndo = NULL; + + + static unsigned long long *tcpext_EmbryonicRsts = NULL; + + static unsigned long long *tcpext_PruneCalled = NULL; + static unsigned long long *tcpext_RcvPruned = NULL; + static unsigned long long *tcpext_OutOfWindowIcmps = NULL; + static unsigned long long *tcpext_LockDroppedIcmps = NULL; + static unsigned long long *tcpext_ArpFilter = NULL; + + static unsigned long long *tcpext_TW = NULL; + static unsigned long long *tcpext_TWRecycled = NULL; + static unsigned long long *tcpext_TWKilled = NULL; + + static unsigned long long *tcpext_PAWSEstab = NULL; + + static unsigned long long *tcpext_DelayedACKs = NULL; + static unsigned long long *tcpext_DelayedACKLocked = NULL; + static unsigned long long *tcpext_DelayedACKLost = NULL; + + static unsigned long long *tcpext_ListenOverflows = NULL; + static unsigned long long *tcpext_ListenDrops = NULL; + + static unsigned long long *tcpext_TCPPrequeued = NULL; + + static unsigned long long *tcpext_TCPDirectCopyFromBacklog = NULL; + static unsigned long long *tcpext_TCPDirectCopyFromPrequeue = NULL; + static unsigned long long *tcpext_TCPPrequeueDropped = NULL; + + static unsigned long long *tcpext_TCPHPHits = NULL; + static unsigned long long *tcpext_TCPHPHitsToUser = NULL; + static unsigned long long *tcpext_TCPHPAcks = NULL; + + static unsigned long long *tcpext_TCPPureAcks = NULL; + static unsigned long long *tcpext_TCPRenoRecovery = NULL; + + static unsigned long long *tcpext_TCPSackRecovery = NULL; + static unsigned long long *tcpext_TCPSackFailures = NULL; + static unsigned long long *tcpext_TCPSACKReneging = NULL; + static unsigned long long *tcpext_TCPSackRecoveryFail = NULL; + static unsigned long long *tcpext_TCPSACKDiscard = NULL; + static unsigned long long *tcpext_TCPSackShifted = NULL; + static unsigned long long *tcpext_TCPSackMerged = NULL; + static unsigned long long *tcpext_TCPSackShiftFallback = NULL; + + + static unsigned long long *tcpext_TCPFullUndo = NULL; + static unsigned long long *tcpext_TCPPartialUndo = NULL; + + static unsigned long long *tcpext_TCPLossUndo = NULL; + static unsigned long long *tcpext_TCPLostRetransmit = NULL; + + static unsigned long long *tcpext_TCPRenoFailures = NULL; + + static unsigned long long *tcpext_TCPLossFailures = NULL; + static unsigned long long *tcpext_TCPFastRetrans = NULL; + static unsigned long long *tcpext_TCPForwardRetrans = NULL; + static unsigned long long *tcpext_TCPSlowStartRetrans = NULL; + static unsigned long long *tcpext_TCPLossProbes = NULL; + static unsigned long long *tcpext_TCPLossProbeRecovery = NULL; + static unsigned long long *tcpext_TCPRenoRecoveryFail = NULL; + static unsigned long long *tcpext_TCPSchedulerFailed = NULL; + static unsigned long long *tcpext_TCPRcvCollapsed = NULL; + + static unsigned long long *tcpext_TCPSpuriousRTOs = NULL; + static unsigned long long *tcpext_TCPMD5NotFound = NULL; + static unsigned long long *tcpext_TCPMD5Unexpected = NULL; + + static unsigned long long *tcpext_TCPBacklogDrop = NULL; + static unsigned long long *tcpext_TCPMinTTLDrop = NULL; + static unsigned long long *tcpext_TCPDeferAcceptDrop = NULL; + static unsigned long long *tcpext_IPReversePathFilter = NULL; + static unsigned long long *tcpext_TCPTimeWaitOverflow = NULL; + static unsigned long long *tcpext_TCPReqQFullDoCookies = NULL; + static unsigned long long *tcpext_TCPReqQFullDrop = NULL; + static unsigned long long *tcpext_TCPRetransFail = NULL; + static unsigned long long *tcpext_TCPRcvCoalesce = NULL; + + static unsigned long long *tcpext_TCPChallengeACK = NULL; + static unsigned long long *tcpext_TCPSYNChallenge = NULL; + + static unsigned long long *tcpext_TCPFastOpenActive = NULL; + static unsigned long long *tcpext_TCPFastOpenActiveFail = NULL; + static unsigned long long *tcpext_TCPFastOpenPassive = NULL; + static unsigned long long *tcpext_TCPFastOpenPassiveFail = NULL; + static unsigned long long *tcpext_TCPFastOpenListenOverflow = NULL; + static unsigned long long *tcpext_TCPFastOpenCookieReqd = NULL; + + static unsigned long long *tcpext_TCPSpuriousRtxHostQueues = NULL; + static unsigned long long *tcpext_BusyPollRxPackets = NULL; + static unsigned long long *tcpext_TCPAutoCorking = NULL; + static unsigned long long *tcpext_TCPFromZeroWindowAdv = NULL; + static unsigned long long *tcpext_TCPToZeroWindowAdv = NULL; + static unsigned long long *tcpext_TCPWantZeroWindowAdv = NULL; + static unsigned long long *tcpext_TCPSynRetrans = NULL; + static unsigned long long *tcpext_TCPOrigDataSent = NULL; + + static unsigned long long *tcpext_TCPHystartTrainDetect = NULL; + static unsigned long long *tcpext_TCPHystartTrainCwnd = NULL; + static unsigned long long *tcpext_TCPHystartDelayDetect = NULL; + static unsigned long long *tcpext_TCPHystartDelayCwnd = NULL; + + static unsigned long long *tcpext_TCPACKSkippedSynRecv = NULL; + static unsigned long long *tcpext_TCPACKSkippedPAWS = NULL; + static unsigned long long *tcpext_TCPACKSkippedSeq = NULL; + static unsigned long long *tcpext_TCPACKSkippedFinWait2 = NULL; + static unsigned long long *tcpext_TCPACKSkippedTimeWait = NULL; + static unsigned long long *tcpext_TCPACKSkippedChallenge = NULL; + + static unsigned long long *tcpext_TCPWinProbe = NULL; + static unsigned long long *tcpext_TCPKeepAlive = NULL; + + static unsigned long long *tcpext_TCPMTUPFail = NULL; + static unsigned long long *tcpext_TCPMTUPSuccess = NULL; +*/ + + static unsigned long long *ipext_InNoRoutes = NULL; + static unsigned long long *ipext_InTruncatedPkts = NULL; + static unsigned long long *ipext_InMcastPkts = NULL; + static unsigned long long *ipext_OutMcastPkts = NULL; + static unsigned long long *ipext_InBcastPkts = NULL; + static unsigned long long *ipext_OutBcastPkts = NULL; + static unsigned long long *ipext_InOctets = NULL; + static unsigned long long *ipext_OutOctets = NULL; + static unsigned long long *ipext_InMcastOctets = NULL; + static unsigned long long *ipext_OutMcastOctets = NULL; + static unsigned long long *ipext_InBcastOctets = NULL; + static unsigned long long *ipext_OutBcastOctets = NULL; + static unsigned long long *ipext_InCsumErrors = NULL; + static unsigned long long *ipext_InNoECTPkts = NULL; + static unsigned long long *ipext_InECT1Pkts = NULL; + static unsigned long long *ipext_InECT0Pkts = NULL; + static unsigned long long *ipext_InCEPkts = NULL; + + if(unlikely(do_bandwidth == -1)) { + do_bandwidth = config_get_boolean_ondemand("plugin:proc:/proc/net/netstat", "bandwidth", CONFIG_ONDEMAND_ONDEMAND); + do_inerrors = config_get_boolean_ondemand("plugin:proc:/proc/net/netstat", "input errors", CONFIG_ONDEMAND_ONDEMAND); + do_mcast = config_get_boolean_ondemand("plugin:proc:/proc/net/netstat", "multicast bandwidth", CONFIG_ONDEMAND_ONDEMAND); + do_bcast = config_get_boolean_ondemand("plugin:proc:/proc/net/netstat", "broadcast bandwidth", CONFIG_ONDEMAND_ONDEMAND); + do_mcast_p = config_get_boolean_ondemand("plugin:proc:/proc/net/netstat", "multicast packets", CONFIG_ONDEMAND_ONDEMAND); + do_bcast_p = config_get_boolean_ondemand("plugin:proc:/proc/net/netstat", "broadcast packets", CONFIG_ONDEMAND_ONDEMAND); + do_ecn = config_get_boolean_ondemand("plugin:proc:/proc/net/netstat", "ECN packets", CONFIG_ONDEMAND_ONDEMAND); + + do_tcpext_reorder = config_get_boolean_ondemand("plugin:proc:/proc/net/netstat", "TCP reorders", CONFIG_ONDEMAND_ONDEMAND); + do_tcpext_syscookies = config_get_boolean_ondemand("plugin:proc:/proc/net/netstat", "TCP SYN cookies", CONFIG_ONDEMAND_ONDEMAND); + do_tcpext_ofo = config_get_boolean_ondemand("plugin:proc:/proc/net/netstat", "TCP out-of-order queue", CONFIG_ONDEMAND_ONDEMAND); + do_tcpext_connaborts = config_get_boolean_ondemand("plugin:proc:/proc/net/netstat", "TCP connection aborts", CONFIG_ONDEMAND_ONDEMAND); + do_tcpext_memory = config_get_boolean_ondemand("plugin:proc:/proc/net/netstat", "TCP memory pressures", CONFIG_ONDEMAND_ONDEMAND); + + hash_ipext = simple_hash("IpExt"); + hash_tcpext = simple_hash("TcpExt"); + + hash_array(ipext_data); + hash_array(tcpext_data); + + // Reordering + tcpext_TCPFACKReorder = netstat_columns_find(tcpext_data, "TCPFACKReorder"); + tcpext_TCPSACKReorder = netstat_columns_find(tcpext_data, "TCPSACKReorder"); + tcpext_TCPRenoReorder = netstat_columns_find(tcpext_data, "TCPRenoReorder"); + tcpext_TCPTSReorder = netstat_columns_find(tcpext_data, "TCPTSReorder"); + + // SYN Cookies + tcpext_SyncookiesSent = netstat_columns_find(tcpext_data, "SyncookiesSent"); + tcpext_SyncookiesRecv = netstat_columns_find(tcpext_data, "SyncookiesRecv"); + tcpext_SyncookiesFailed = netstat_columns_find(tcpext_data, "SyncookiesFailed"); + + // Out Of Order Queue + // http://www.spinics.net/lists/netdev/msg204696.html + tcpext_TCPOFOQueue = netstat_columns_find(tcpext_data, "TCPOFOQueue"); // Number of packets queued in OFO queue + tcpext_TCPOFODrop = netstat_columns_find(tcpext_data, "TCPOFODrop"); // Number of packets meant to be queued in OFO but dropped because socket rcvbuf limit hit. + tcpext_TCPOFOMerge = netstat_columns_find(tcpext_data, "TCPOFOMerge"); // Number of packets in OFO that were merged with other packets. + tcpext_OfoPruned = netstat_columns_find(tcpext_data, "OfoPruned"); // packets dropped from out-of-order queue because of socket buffer overrun + + // connection resets + // https://github.com/ecki/net-tools/blob/bd8bceaed2311651710331a7f8990c3e31be9840/statistics.c + tcpext_TCPAbortOnData = netstat_columns_find(tcpext_data, "TCPAbortOnData"); // connections reset due to unexpected data + tcpext_TCPAbortOnClose = netstat_columns_find(tcpext_data, "TCPAbortOnClose"); // connections reset due to early user close + tcpext_TCPAbortOnMemory = netstat_columns_find(tcpext_data, "TCPAbortOnMemory"); // connections aborted due to memory pressure + tcpext_TCPAbortOnTimeout = netstat_columns_find(tcpext_data, "TCPAbortOnTimeout"); // connections aborted due to timeout + tcpext_TCPAbortOnLinger = netstat_columns_find(tcpext_data, "TCPAbortOnLinger"); // connections aborted after user close in linger timeout + tcpext_TCPAbortFailed = netstat_columns_find(tcpext_data, "TCPAbortFailed"); // times unable to send RST due to no memory + + tcpext_TCPMemoryPressures = netstat_columns_find(tcpext_data, "TCPMemoryPressures"); + + /* + tcpext_EmbryonicRsts = netstat_columns_find(tcpext_data, "EmbryonicRsts"); + tcpext_PruneCalled = netstat_columns_find(tcpext_data, "PruneCalled"); + tcpext_RcvPruned = netstat_columns_find(tcpext_data, "RcvPruned"); + tcpext_OutOfWindowIcmps = netstat_columns_find(tcpext_data, "OutOfWindowIcmps"); + tcpext_LockDroppedIcmps = netstat_columns_find(tcpext_data, "LockDroppedIcmps"); + tcpext_ArpFilter = netstat_columns_find(tcpext_data, "ArpFilter"); + tcpext_TW = netstat_columns_find(tcpext_data, "TW"); + tcpext_TWRecycled = netstat_columns_find(tcpext_data, "TWRecycled"); + tcpext_TWKilled = netstat_columns_find(tcpext_data, "TWKilled"); + tcpext_PAWSPassive = netstat_columns_find(tcpext_data, "PAWSPassive"); + tcpext_PAWSActive = netstat_columns_find(tcpext_data, "PAWSActive"); + tcpext_PAWSEstab = netstat_columns_find(tcpext_data, "PAWSEstab"); + tcpext_DelayedACKs = netstat_columns_find(tcpext_data, "DelayedACKs"); + tcpext_DelayedACKLocked = netstat_columns_find(tcpext_data, "DelayedACKLocked"); + tcpext_DelayedACKLost = netstat_columns_find(tcpext_data, "DelayedACKLost"); + tcpext_ListenOverflows = netstat_columns_find(tcpext_data, "ListenOverflows"); + tcpext_ListenDrops = netstat_columns_find(tcpext_data, "ListenDrops"); + tcpext_TCPPrequeued = netstat_columns_find(tcpext_data, "TCPPrequeued"); + tcpext_TCPDirectCopyFromBacklog = netstat_columns_find(tcpext_data, "TCPDirectCopyFromBacklog"); + tcpext_TCPDirectCopyFromPrequeue = netstat_columns_find(tcpext_data, "TCPDirectCopyFromPrequeue"); + tcpext_TCPPrequeueDropped = netstat_columns_find(tcpext_data, "TCPPrequeueDropped"); + tcpext_TCPHPHits = netstat_columns_find(tcpext_data, "TCPHPHits"); + tcpext_TCPHPHitsToUser = netstat_columns_find(tcpext_data, "TCPHPHitsToUser"); + tcpext_TCPPureAcks = netstat_columns_find(tcpext_data, "TCPPureAcks"); + tcpext_TCPHPAcks = netstat_columns_find(tcpext_data, "TCPHPAcks"); + tcpext_TCPRenoRecovery = netstat_columns_find(tcpext_data, "TCPRenoRecovery"); + tcpext_TCPSackRecovery = netstat_columns_find(tcpext_data, "TCPSackRecovery"); + tcpext_TCPSACKReneging = netstat_columns_find(tcpext_data, "TCPSACKReneging"); + tcpext_TCPFullUndo = netstat_columns_find(tcpext_data, "TCPFullUndo"); + tcpext_TCPPartialUndo = netstat_columns_find(tcpext_data, "TCPPartialUndo"); + tcpext_TCPDSACKUndo = netstat_columns_find(tcpext_data, "TCPDSACKUndo"); + tcpext_TCPLossUndo = netstat_columns_find(tcpext_data, "TCPLossUndo"); + tcpext_TCPLostRetransmit = netstat_columns_find(tcpext_data, "TCPLostRetransmit"); + tcpext_TCPRenoFailures = netstat_columns_find(tcpext_data, "TCPRenoFailures"); + tcpext_TCPSackFailures = netstat_columns_find(tcpext_data, "TCPSackFailures"); + tcpext_TCPLossFailures = netstat_columns_find(tcpext_data, "TCPLossFailures"); + tcpext_TCPFastRetrans = netstat_columns_find(tcpext_data, "TCPFastRetrans"); + tcpext_TCPForwardRetrans = netstat_columns_find(tcpext_data, "TCPForwardRetrans"); + tcpext_TCPSlowStartRetrans = netstat_columns_find(tcpext_data, "TCPSlowStartRetrans"); + tcpext_TCPTimeouts = netstat_columns_find(tcpext_data, "TCPTimeouts"); + tcpext_TCPLossProbes = netstat_columns_find(tcpext_data, "TCPLossProbes"); + tcpext_TCPLossProbeRecovery = netstat_columns_find(tcpext_data, "TCPLossProbeRecovery"); + tcpext_TCPRenoRecoveryFail = netstat_columns_find(tcpext_data, "TCPRenoRecoveryFail"); + tcpext_TCPSackRecoveryFail = netstat_columns_find(tcpext_data, "TCPSackRecoveryFail"); + tcpext_TCPSchedulerFailed = netstat_columns_find(tcpext_data, "TCPSchedulerFailed"); + tcpext_TCPRcvCollapsed = netstat_columns_find(tcpext_data, "TCPRcvCollapsed"); + tcpext_TCPDSACKOldSent = netstat_columns_find(tcpext_data, "TCPDSACKOldSent"); + tcpext_TCPDSACKOfoSent = netstat_columns_find(tcpext_data, "TCPDSACKOfoSent"); + tcpext_TCPDSACKRecv = netstat_columns_find(tcpext_data, "TCPDSACKRecv"); + tcpext_TCPDSACKOfoRecv = netstat_columns_find(tcpext_data, "TCPDSACKOfoRecv"); + tcpext_TCPSACKDiscard = netstat_columns_find(tcpext_data, "TCPSACKDiscard"); + tcpext_TCPDSACKIgnoredOld = netstat_columns_find(tcpext_data, "TCPDSACKIgnoredOld"); + tcpext_TCPDSACKIgnoredNoUndo = netstat_columns_find(tcpext_data, "TCPDSACKIgnoredNoUndo"); + tcpext_TCPSpuriousRTOs = netstat_columns_find(tcpext_data, "TCPSpuriousRTOs"); + tcpext_TCPMD5NotFound = netstat_columns_find(tcpext_data, "TCPMD5NotFound"); + tcpext_TCPMD5Unexpected = netstat_columns_find(tcpext_data, "TCPMD5Unexpected"); + tcpext_TCPSackShifted = netstat_columns_find(tcpext_data, "TCPSackShifted"); + tcpext_TCPSackMerged = netstat_columns_find(tcpext_data, "TCPSackMerged"); + tcpext_TCPSackShiftFallback = netstat_columns_find(tcpext_data, "TCPSackShiftFallback"); + tcpext_TCPBacklogDrop = netstat_columns_find(tcpext_data, "TCPBacklogDrop"); + tcpext_TCPMinTTLDrop = netstat_columns_find(tcpext_data, "TCPMinTTLDrop"); + tcpext_TCPDeferAcceptDrop = netstat_columns_find(tcpext_data, "TCPDeferAcceptDrop"); + tcpext_IPReversePathFilter = netstat_columns_find(tcpext_data, "IPReversePathFilter"); + tcpext_TCPTimeWaitOverflow = netstat_columns_find(tcpext_data, "TCPTimeWaitOverflow"); + tcpext_TCPReqQFullDoCookies = netstat_columns_find(tcpext_data, "TCPReqQFullDoCookies"); + tcpext_TCPReqQFullDrop = netstat_columns_find(tcpext_data, "TCPReqQFullDrop"); + tcpext_TCPRetransFail = netstat_columns_find(tcpext_data, "TCPRetransFail"); + tcpext_TCPRcvCoalesce = netstat_columns_find(tcpext_data, "TCPRcvCoalesce"); + tcpext_TCPChallengeACK = netstat_columns_find(tcpext_data, "TCPChallengeACK"); + tcpext_TCPSYNChallenge = netstat_columns_find(tcpext_data, "TCPSYNChallenge"); + tcpext_TCPFastOpenActive = netstat_columns_find(tcpext_data, "TCPFastOpenActive"); + tcpext_TCPFastOpenActiveFail = netstat_columns_find(tcpext_data, "TCPFastOpenActiveFail"); + tcpext_TCPFastOpenPassive = netstat_columns_find(tcpext_data, "TCPFastOpenPassive"); + tcpext_TCPFastOpenPassiveFail = netstat_columns_find(tcpext_data, "TCPFastOpenPassiveFail"); + tcpext_TCPFastOpenListenOverflow = netstat_columns_find(tcpext_data, "TCPFastOpenListenOverflow"); + tcpext_TCPFastOpenCookieReqd = netstat_columns_find(tcpext_data, "TCPFastOpenCookieReqd"); + tcpext_TCPSpuriousRtxHostQueues = netstat_columns_find(tcpext_data, "TCPSpuriousRtxHostQueues"); + tcpext_BusyPollRxPackets = netstat_columns_find(tcpext_data, "BusyPollRxPackets"); + tcpext_TCPAutoCorking = netstat_columns_find(tcpext_data, "TCPAutoCorking"); + tcpext_TCPFromZeroWindowAdv = netstat_columns_find(tcpext_data, "TCPFromZeroWindowAdv"); + tcpext_TCPToZeroWindowAdv = netstat_columns_find(tcpext_data, "TCPToZeroWindowAdv"); + tcpext_TCPWantZeroWindowAdv = netstat_columns_find(tcpext_data, "TCPWantZeroWindowAdv"); + tcpext_TCPSynRetrans = netstat_columns_find(tcpext_data, "TCPSynRetrans"); + tcpext_TCPOrigDataSent = netstat_columns_find(tcpext_data, "TCPOrigDataSent"); + tcpext_TCPHystartTrainDetect = netstat_columns_find(tcpext_data, "TCPHystartTrainDetect"); + tcpext_TCPHystartTrainCwnd = netstat_columns_find(tcpext_data, "TCPHystartTrainCwnd"); + tcpext_TCPHystartDelayDetect = netstat_columns_find(tcpext_data, "TCPHystartDelayDetect"); + tcpext_TCPHystartDelayCwnd = netstat_columns_find(tcpext_data, "TCPHystartDelayCwnd"); + tcpext_TCPACKSkippedSynRecv = netstat_columns_find(tcpext_data, "TCPACKSkippedSynRecv"); + tcpext_TCPACKSkippedPAWS = netstat_columns_find(tcpext_data, "TCPACKSkippedPAWS"); + tcpext_TCPACKSkippedSeq = netstat_columns_find(tcpext_data, "TCPACKSkippedSeq"); + tcpext_TCPACKSkippedFinWait2 = netstat_columns_find(tcpext_data, "TCPACKSkippedFinWait2"); + tcpext_TCPACKSkippedTimeWait = netstat_columns_find(tcpext_data, "TCPACKSkippedTimeWait"); + tcpext_TCPACKSkippedChallenge = netstat_columns_find(tcpext_data, "TCPACKSkippedChallenge"); + tcpext_TCPWinProbe = netstat_columns_find(tcpext_data, "TCPWinProbe"); + tcpext_TCPKeepAlive = netstat_columns_find(tcpext_data, "TCPKeepAlive"); + tcpext_TCPMTUPFail = netstat_columns_find(tcpext_data, "TCPMTUPFail"); + tcpext_TCPMTUPSuccess = netstat_columns_find(tcpext_data, "TCPMTUPSuccess"); +*/ + ipext_InNoRoutes = netstat_columns_find(ipext_data, "InNoRoutes"); + ipext_InTruncatedPkts = netstat_columns_find(ipext_data, "InTruncatedPkts"); + ipext_InMcastPkts = netstat_columns_find(ipext_data, "InMcastPkts"); + ipext_OutMcastPkts = netstat_columns_find(ipext_data, "OutMcastPkts"); + ipext_InBcastPkts = netstat_columns_find(ipext_data, "InBcastPkts"); + ipext_OutBcastPkts = netstat_columns_find(ipext_data, "OutBcastPkts"); + ipext_InOctets = netstat_columns_find(ipext_data, "InOctets"); + ipext_OutOctets = netstat_columns_find(ipext_data, "OutOctets"); + ipext_InMcastOctets = netstat_columns_find(ipext_data, "InMcastOctets"); + ipext_OutMcastOctets = netstat_columns_find(ipext_data, "OutMcastOctets"); + ipext_InBcastOctets = netstat_columns_find(ipext_data, "InBcastOctets"); + ipext_OutBcastOctets = netstat_columns_find(ipext_data, "OutBcastOctets"); + ipext_InCsumErrors = netstat_columns_find(ipext_data, "InCsumErrors"); + ipext_InNoECTPkts = netstat_columns_find(ipext_data, "InNoECTPkts"); + ipext_InECT1Pkts = netstat_columns_find(ipext_data, "InECT1Pkts"); + ipext_InECT0Pkts = netstat_columns_find(ipext_data, "InECT0Pkts"); + ipext_InCEPkts = netstat_columns_find(ipext_data, "InCEPkts"); + } - if(!ff) { + if(unlikely(!ff)) { char filename[FILENAME_MAX + 1]; snprintfz(filename, FILENAME_MAX, "%s%s", global_host_prefix, "/proc/net/netstat"); ff = procfile_open(config_get("plugin:proc:/proc/net/netstat", "filename to monitor", filename), " \t:", PROCFILE_FLAG_DEFAULT); } - if(!ff) return 1; + if(unlikely(!ff)) return 1; ff = procfile_readall(ff); - if(!ff) return 0; // we return 0, so that we will retry to open it next time + if(unlikely(!ff)) return 0; // we return 0, so that we will retry to open it next time uint32_t lines = procfile_lines(ff), l; uint32_t words; for(l = 0; l < lines ;l++) { - if(strcmp(procfile_lineword(ff, l, 0), "IpExt") == 0) { - l++; // we need the next line + char *key = procfile_lineword(ff, l, 0); + uint32_t hash = simple_hash(key); + + if(unlikely(hash == hash_ipext && strcmp(key, "IpExt") == 0)) { + uint32_t h = l++; if(strcmp(procfile_lineword(ff, l, 0), "IpExt") != 0) { error("Cannot read IpExt line from /proc/net/netstat."); break; } words = procfile_linewords(ff, l); - if(words < 12) { - error("Cannot read /proc/net/netstat IpExt line. Expected 12 params, read %u.", words); + if(words < 2) { + error("Cannot read /proc/net/netstat IpExt line. Expected 2+ params, read %u.", words); continue; } - unsigned long long - InNoRoutes = 0, InTruncatedPkts = 0, - InOctets = 0, InMcastPkts = 0, InBcastPkts = 0, InMcastOctets = 0, InBcastOctets = 0, - OutOctets = 0, OutMcastPkts = 0, OutBcastPkts = 0, OutMcastOctets = 0, OutBcastOctets = 0; - - InNoRoutes = strtoull(procfile_lineword(ff, l, 1), NULL, 10); - InTruncatedPkts = strtoull(procfile_lineword(ff, l, 2), NULL, 10); - InMcastPkts = strtoull(procfile_lineword(ff, l, 3), NULL, 10); - OutMcastPkts = strtoull(procfile_lineword(ff, l, 4), NULL, 10); - InBcastPkts = strtoull(procfile_lineword(ff, l, 5), NULL, 10); - OutBcastPkts = strtoull(procfile_lineword(ff, l, 6), NULL, 10); - InOctets = strtoull(procfile_lineword(ff, l, 7), NULL, 10); - OutOctets = strtoull(procfile_lineword(ff, l, 8), NULL, 10); - InMcastOctets = strtoull(procfile_lineword(ff, l, 9), NULL, 10); - OutMcastOctets = strtoull(procfile_lineword(ff, l, 10), NULL, 10); - InBcastOctets = strtoull(procfile_lineword(ff, l, 11), NULL, 10); - OutBcastOctets = strtoull(procfile_lineword(ff, l, 12), NULL, 10); + parse_line_pair(ff, ipext_data, h, l); RRDSET *st; // -------------------------------------------------------------------- - if(do_bandwidth == CONFIG_ONDEMAND_YES || (do_bandwidth == CONFIG_ONDEMAND_ONDEMAND && (InOctets || OutOctets))) { + if(do_bandwidth == CONFIG_ONDEMAND_YES || (do_bandwidth == CONFIG_ONDEMAND_ONDEMAND && (*ipext_InOctets || *ipext_OutOctets))) { do_bandwidth = CONFIG_ONDEMAND_YES; st = rrdset_find("system.ipv4"); if(!st) { st = rrdset_create("system", "ipv4", NULL, "network", NULL, "IPv4 Bandwidth", "kilobits/s", 500, update_every, RRDSET_TYPE_AREA); - rrddim_add(st, "received", NULL, 8, 1024, RRDDIM_INCREMENTAL); - rrddim_add(st, "sent", NULL, -8, 1024, RRDDIM_INCREMENTAL); + rrddim_add(st, "InOctets", "received", 8, 1024, RRDDIM_INCREMENTAL); + rrddim_add(st, "OutOctets", "sent", -8, 1024, RRDDIM_INCREMENTAL); } else rrdset_next(st); - rrddim_set(st, "sent", OutOctets); - rrddim_set(st, "received", InOctets); + rrddim_set(st, "InOctets", *ipext_InOctets); + rrddim_set(st, "OutOctets", *ipext_OutOctets); rrdset_done(st); } // -------------------------------------------------------------------- - if(do_inerrors == CONFIG_ONDEMAND_YES || (do_inerrors == CONFIG_ONDEMAND_ONDEMAND && (InNoRoutes || InTruncatedPkts))) { + if(do_inerrors == CONFIG_ONDEMAND_YES || (do_inerrors == CONFIG_ONDEMAND_ONDEMAND && (*ipext_InNoRoutes || *ipext_InTruncatedPkts))) { do_inerrors = CONFIG_ONDEMAND_YES; st = rrdset_find("ipv4.inerrors"); if(!st) { st = rrdset_create("ipv4", "inerrors", NULL, "errors", NULL, "IPv4 Input Errors", "packets/s", 4000, update_every, RRDSET_TYPE_LINE); st->isdetail = 1; - rrddim_add(st, "noroutes", NULL, 1, 1, RRDDIM_INCREMENTAL); - rrddim_add(st, "truncated", NULL, 1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "InNoRoutes", "noroutes", 1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "InTruncatedPkts", "truncated", 1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "InCsumErrors", "checksum", 1, 1, RRDDIM_INCREMENTAL); } else rrdset_next(st); - rrddim_set(st, "noroutes", InNoRoutes); - rrddim_set(st, "truncated", InTruncatedPkts); + rrddim_set(st, "InNoRoutes", *ipext_InNoRoutes); + rrddim_set(st, "InTruncatedPkts", *ipext_InTruncatedPkts); + rrddim_set(st, "InCsumErrors", *ipext_InCsumErrors); rrdset_done(st); } // -------------------------------------------------------------------- - if(do_mcast == CONFIG_ONDEMAND_YES || (do_mcast == CONFIG_ONDEMAND_ONDEMAND && (InMcastOctets || OutMcastOctets))) { + if(do_mcast == CONFIG_ONDEMAND_YES || (do_mcast == CONFIG_ONDEMAND_ONDEMAND && (*ipext_InMcastOctets || *ipext_OutMcastOctets))) { do_mcast = CONFIG_ONDEMAND_YES; st = rrdset_find("ipv4.mcast"); if(!st) { st = rrdset_create("ipv4", "mcast", NULL, "multicast", NULL, "IPv4 Multicast Bandwidth", "kilobits/s", 9000, update_every, RRDSET_TYPE_AREA); st->isdetail = 1; - rrddim_add(st, "received", NULL, 8, 1024, RRDDIM_INCREMENTAL); - rrddim_add(st, "sent", NULL, -8, 1024, RRDDIM_INCREMENTAL); + rrddim_add(st, "InMcastOctets", "received", 8, 1024, RRDDIM_INCREMENTAL); + rrddim_add(st, "OutMcastOctets", "sent", -8, 1024, RRDDIM_INCREMENTAL); } else rrdset_next(st); - rrddim_set(st, "sent", OutMcastOctets); - rrddim_set(st, "received", InMcastOctets); + rrddim_set(st, "InMcastOctets", *ipext_InMcastOctets); + rrddim_set(st, "OutMcastOctets", *ipext_OutMcastOctets); rrdset_done(st); } // -------------------------------------------------------------------- - if(do_bcast == CONFIG_ONDEMAND_YES || (do_bcast == CONFIG_ONDEMAND_ONDEMAND && (InBcastOctets || OutBcastOctets))) { + if(do_bcast == CONFIG_ONDEMAND_YES || (do_bcast == CONFIG_ONDEMAND_ONDEMAND && (*ipext_InBcastOctets || *ipext_OutBcastOctets))) { do_bcast = CONFIG_ONDEMAND_YES; st = rrdset_find("ipv4.bcast"); if(!st) { st = rrdset_create("ipv4", "bcast", NULL, "broadcast", NULL, "IPv4 Broadcast Bandwidth", "kilobits/s", 8000, update_every, RRDSET_TYPE_AREA); st->isdetail = 1; - rrddim_add(st, "received", NULL, 8, 1024, RRDDIM_INCREMENTAL); - rrddim_add(st, "sent", NULL, -8, 1024, RRDDIM_INCREMENTAL); + rrddim_add(st, "InBcastOctets", "received", 8, 1024, RRDDIM_INCREMENTAL); + rrddim_add(st, "OutBcastOctets", "sent", -8, 1024, RRDDIM_INCREMENTAL); } else rrdset_next(st); - rrddim_set(st, "sent", OutBcastOctets); - rrddim_set(st, "received", InBcastOctets); + rrddim_set(st, "InBcastOctets", *ipext_InBcastOctets); + rrddim_set(st, "OutBcastOctets", *ipext_OutBcastOctets); rrdset_done(st); } // -------------------------------------------------------------------- - if(do_mcast_p == CONFIG_ONDEMAND_YES || (do_mcast_p == CONFIG_ONDEMAND_ONDEMAND && (InMcastPkts || OutMcastPkts))) { + if(do_mcast_p == CONFIG_ONDEMAND_YES || (do_mcast_p == CONFIG_ONDEMAND_ONDEMAND && (*ipext_InMcastPkts || *ipext_OutMcastPkts))) { do_mcast_p = CONFIG_ONDEMAND_YES; st = rrdset_find("ipv4.mcastpkts"); if(!st) { - st = rrdset_create("ipv4", "mcastpkts", NULL, "multicast", NULL, "IPv4 Multicast Packets", "packets/s", 9500, update_every, RRDSET_TYPE_LINE); + st = rrdset_create("ipv4", "mcastpkts", NULL, "multicast", NULL, "IPv4 Multicast Packets", "packets/s", 8600, update_every, RRDSET_TYPE_LINE); st->isdetail = 1; - rrddim_add(st, "received", NULL, 1, 1, RRDDIM_INCREMENTAL); - rrddim_add(st, "sent", NULL, -1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "InMcastPkts", "received", 1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "OutMcastPkts", "sent", -1, 1, RRDDIM_INCREMENTAL); } else rrdset_next(st); - rrddim_set(st, "sent", OutMcastPkts); - rrddim_set(st, "received", InMcastPkts); + rrddim_set(st, "InMcastPkts", *ipext_InMcastPkts); + rrddim_set(st, "OutMcastPkts", *ipext_OutMcastPkts); rrdset_done(st); } // -------------------------------------------------------------------- - if(do_bcast_p == CONFIG_ONDEMAND_YES || (do_bcast_p == CONFIG_ONDEMAND_ONDEMAND && (InBcastPkts || OutBcastPkts))) { + if(do_bcast_p == CONFIG_ONDEMAND_YES || (do_bcast_p == CONFIG_ONDEMAND_ONDEMAND && (*ipext_InBcastPkts || *ipext_OutBcastPkts))) { do_bcast_p = CONFIG_ONDEMAND_YES; st = rrdset_find("ipv4.bcastpkts"); if(!st) { st = rrdset_create("ipv4", "bcastpkts", NULL, "broadcast", NULL, "IPv4 Broadcast Packets", "packets/s", 8500, update_every, RRDSET_TYPE_LINE); st->isdetail = 1; - rrddim_add(st, "received", NULL, 1, 1, RRDDIM_INCREMENTAL); - rrddim_add(st, "sent", NULL, -1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "InBcastPkts", "received", 1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "OutBcastPkts", "sent", -1, 1, RRDDIM_INCREMENTAL); + } + else rrdset_next(st); + + rrddim_set(st, "InBcastPkts", *ipext_InBcastPkts); + rrddim_set(st, "OutBcastPkts", *ipext_OutBcastPkts); + rrdset_done(st); + } + + // -------------------------------------------------------------------- + + if(do_ecn == CONFIG_ONDEMAND_YES || (do_ecn == CONFIG_ONDEMAND_ONDEMAND && (*ipext_InCEPkts || *ipext_InECT0Pkts || *ipext_InECT1Pkts || *ipext_InNoECTPkts))) { + do_ecn = CONFIG_ONDEMAND_YES; + st = rrdset_find("ipv4.ecnpkts"); + if(!st) { + st = rrdset_create("ipv4", "ecnpkts", NULL, "ecn", NULL, "IPv4 ECN Statistics", "packets/s", 8700, update_every, RRDSET_TYPE_LINE); + st->isdetail = 1; + + rrddim_add(st, "InCEPkts", "CEP", 1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "InNoECTPkts", "NoECTP", -1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "InECT0Pkts", "ECTP0", 1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "InECT1Pkts", "ECTP1", 1, 1, RRDDIM_INCREMENTAL); + } + else rrdset_next(st); + + rrddim_set(st, "InCEPkts", *ipext_InCEPkts); + rrddim_set(st, "InNoECTPkts", *ipext_InNoECTPkts); + rrddim_set(st, "InECT0Pkts", *ipext_InECT0Pkts); + rrddim_set(st, "InECT1Pkts", *ipext_InECT1Pkts); + rrdset_done(st); + } + } + else if(unlikely(hash == hash_tcpext && strcmp(key, "TcpExt") == 0)) { + uint32_t h = l++; + + if(strcmp(procfile_lineword(ff, l, 0), "TcpExt") != 0) { + error("Cannot read TcpExt line from /proc/net/netstat."); + break; + } + words = procfile_linewords(ff, l); + if(words < 2) { + error("Cannot read /proc/net/netstat TcpExt line. Expected 2+ params, read %u.", words); + continue; + } + + parse_line_pair(ff, tcpext_data, h, l); + + RRDSET *st; + + // -------------------------------------------------------------------- + + if(do_tcpext_memory == CONFIG_ONDEMAND_YES || (do_tcpext_memory == CONFIG_ONDEMAND_ONDEMAND && (*tcpext_TCPMemoryPressures))) { + do_tcpext_memory = CONFIG_ONDEMAND_YES; + st = rrdset_find("ipv4.tcpmemorypressures"); + if(!st) { + st = rrdset_create("ipv4", "tcpmemorypressures", NULL, "tcp", NULL, "TCP Memory Pressures", "events/s", 3000, update_every, RRDSET_TYPE_LINE); + + rrddim_add(st, "TCPMemoryPressures", "pressures", 1, 1, RRDDIM_INCREMENTAL); + } + else rrdset_next(st); + + rrddim_set(st, "TCPMemoryPressures", *tcpext_TCPMemoryPressures); + rrdset_done(st); + } + + // -------------------------------------------------------------------- + + if(do_tcpext_connaborts == CONFIG_ONDEMAND_YES || (do_tcpext_connaborts == CONFIG_ONDEMAND_ONDEMAND && (*tcpext_TCPAbortOnData || *tcpext_TCPAbortOnClose || *tcpext_TCPAbortOnMemory || *tcpext_TCPAbortOnTimeout || *tcpext_TCPAbortOnLinger || *tcpext_TCPAbortFailed))) { + do_tcpext_connaborts = CONFIG_ONDEMAND_YES; + st = rrdset_find("ipv4.tcpconnaborts"); + if(!st) { + st = rrdset_create("ipv4", "tcpconnaborts", NULL, "tcp", NULL, "TCP Connection Aborts", "connections/s", 3010, update_every, RRDSET_TYPE_LINE); + + rrddim_add(st, "TCPAbortOnData", "baddata", 1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "TCPAbortOnClose", "userclosed", 1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "TCPAbortOnMemory", "nomemory", 1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "TCPAbortOnTimeout", "timeout", 1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "TCPAbortOnLinger", "linger", 1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "TCPAbortFailed", "failed", -1, 1, RRDDIM_INCREMENTAL); + } + else rrdset_next(st); + + rrddim_set(st, "TCPAbortOnData", *tcpext_TCPAbortOnData); + rrddim_set(st, "TCPAbortOnClose", *tcpext_TCPAbortOnClose); + rrddim_set(st, "TCPAbortOnMemory", *tcpext_TCPAbortOnMemory); + rrddim_set(st, "TCPAbortOnTimeout", *tcpext_TCPAbortOnTimeout); + rrddim_set(st, "TCPAbortOnLinger", *tcpext_TCPAbortOnLinger); + rrddim_set(st, "TCPAbortFailed", *tcpext_TCPAbortFailed); + rrdset_done(st); + } + // -------------------------------------------------------------------- + + if(do_tcpext_reorder == CONFIG_ONDEMAND_YES || (do_tcpext_reorder == CONFIG_ONDEMAND_ONDEMAND && (*tcpext_TCPRenoReorder || *tcpext_TCPFACKReorder || *tcpext_TCPSACKReorder || *tcpext_TCPTSReorder))) { + do_tcpext_reorder = CONFIG_ONDEMAND_YES; + st = rrdset_find("ipv4.tcpreorders"); + if(!st) { + st = rrdset_create("ipv4", "tcpreorders", NULL, "tcp", NULL, "TCP Reordered Packets by Detection Method", "packets/s", 3020, update_every, RRDSET_TYPE_LINE); + + rrddim_add(st, "TCPTSReorder", "timestamp", 1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "TCPSACKReorder", "sack", 1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "TCPFACKReorder", "fack", 1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "TCPRenoReorder", "reno", 1, 1, RRDDIM_INCREMENTAL); + } + else rrdset_next(st); + + rrddim_set(st, "TCPTSReorder", *tcpext_TCPTSReorder); + rrddim_set(st, "TCPSACKReorder", *tcpext_TCPSACKReorder); + rrddim_set(st, "TCPFACKReorder", *tcpext_TCPFACKReorder); + rrddim_set(st, "TCPRenoReorder", *tcpext_TCPRenoReorder); + rrdset_done(st); + } + + // -------------------------------------------------------------------- + + if(do_tcpext_ofo == CONFIG_ONDEMAND_YES || (do_tcpext_ofo == CONFIG_ONDEMAND_ONDEMAND && (*tcpext_TCPOFOQueue || *tcpext_TCPOFODrop || *tcpext_TCPOFOMerge))) { + do_tcpext_ofo = CONFIG_ONDEMAND_YES; + st = rrdset_find("ipv4.tcpofo"); + if(!st) { + st = rrdset_create("ipv4", "tcpofo", NULL, "tcp", NULL, "TCP Out-Of-Order Queue", "packets/s", 3050, update_every, RRDSET_TYPE_LINE); + + rrddim_add(st, "TCPOFOQueue", "inqueue", 1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "TCPOFODrop", "dropped", -1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "TCPOFOMerge", "merged", 1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "OfoPruned", "pruned", -1, 1, RRDDIM_INCREMENTAL); } else rrdset_next(st); - rrddim_set(st, "sent", OutBcastPkts); - rrddim_set(st, "received", InBcastPkts); + rrddim_set(st, "TCPOFOQueue", *tcpext_TCPOFOQueue); + rrddim_set(st, "TCPOFODrop", *tcpext_TCPOFODrop); + rrddim_set(st, "TCPOFOMerge", *tcpext_TCPOFOMerge); + rrddim_set(st, "OfoPruned", *tcpext_OfoPruned); rrdset_done(st); } + + // -------------------------------------------------------------------- + + if(do_tcpext_syscookies == CONFIG_ONDEMAND_YES || (do_tcpext_syscookies == CONFIG_ONDEMAND_ONDEMAND && (*tcpext_SyncookiesSent || *tcpext_SyncookiesRecv || *tcpext_SyncookiesFailed))) { + do_tcpext_syscookies = CONFIG_ONDEMAND_YES; + st = rrdset_find("ipv4.tcpsyncookies"); + if(!st) { + st = rrdset_create("ipv4", "tcpsyncookies", NULL, "tcp", NULL, "TCP SYN Cookies", "packets/s", 3100, update_every, RRDSET_TYPE_LINE); + + rrddim_add(st, "SyncookiesRecv", "received", 1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "SyncookiesSent", "sent", -1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "SyncookiesFailed", "failed", -1, 1, RRDDIM_INCREMENTAL); + } + else rrdset_next(st); + + rrddim_set(st, "SyncookiesRecv", *tcpext_SyncookiesRecv); + rrddim_set(st, "SyncookiesSent", *tcpext_SyncookiesSent); + rrddim_set(st, "SyncookiesFailed", *tcpext_SyncookiesFailed); + rrdset_done(st); + } + } } diff --git a/src/proc_net_rpc_nfs.c b/src/proc_net_rpc_nfs.c new file mode 100644 index 00000000..98acdd81 --- /dev/null +++ b/src/proc_net_rpc_nfs.c @@ -0,0 +1,370 @@ +#include "common.h" + +struct nfs_procs { + char name[30]; + unsigned long long value; + int present; +}; + +struct nfs_procs nfs_proc2_values[] = { + { "null", 0ULL, 0 }, + { "getattr", 0ULL, 0 }, + { "setattr", 0ULL, 0 }, + { "root", 0ULL, 0 }, + { "lookup", 0ULL, 0 }, + { "readlink", 0ULL, 0 }, + { "read", 0ULL, 0 }, + { "wrcache", 0ULL, 0 }, + { "write", 0ULL, 0 }, + { "create", 0ULL, 0 }, + { "remove", 0ULL, 0 }, + { "rename", 0ULL, 0 }, + { "link", 0ULL, 0 }, + { "symlink", 0ULL, 0 }, + { "mkdir", 0ULL, 0 }, + { "rmdir", 0ULL, 0 }, + { "readdir", 0ULL, 0 }, + { "fsstat", 0ULL, 0 }, + + /* termination */ + { "", 0ULL, 0 } +}; + +struct nfs_procs nfs_proc3_values[] = { + { "null", 0ULL, 0 }, + { "getattr", 0ULL, 0 }, + { "setattr", 0ULL, 0 }, + { "lookup", 0ULL, 0 }, + { "access", 0ULL, 0 }, + { "readlink", 0ULL, 0 }, + { "read", 0ULL, 0 }, + { "write", 0ULL, 0 }, + { "create", 0ULL, 0 }, + { "mkdir", 0ULL, 0 }, + { "symlink", 0ULL, 0 }, + { "mknod", 0ULL, 0 }, + { "remove", 0ULL, 0 }, + { "rmdir", 0ULL, 0 }, + { "rename", 0ULL, 0 }, + { "link", 0ULL, 0 }, + { "readdir", 0ULL, 0 }, + { "readdirplus", 0ULL, 0 }, + { "fsstat", 0ULL, 0 }, + { "fsinfo", 0ULL, 0 }, + { "pathconf", 0ULL, 0 }, + { "commit", 0ULL, 0 }, + + /* termination */ + { "", 0ULL, 0 } +}; + +struct nfs_procs nfs_proc4_values[] = { + { "null", 0ULL, 0 }, + { "read", 0ULL, 0 }, + { "write", 0ULL, 0 }, + { "commit", 0ULL, 0 }, + { "open", 0ULL, 0 }, + { "open_conf", 0ULL, 0 }, + { "open_noat", 0ULL, 0 }, + { "open_dgrd", 0ULL, 0 }, + { "close", 0ULL, 0 }, + { "setattr", 0ULL, 0 }, + { "fsinfo", 0ULL, 0 }, + { "renew", 0ULL, 0 }, + { "setclntid", 0ULL, 0 }, + { "confirm", 0ULL, 0 }, + { "lock", 0ULL, 0 }, + { "lockt", 0ULL, 0 }, + { "locku", 0ULL, 0 }, + { "access", 0ULL, 0 }, + { "getattr", 0ULL, 0 }, + { "lookup", 0ULL, 0 }, + { "lookup_root", 0ULL, 0 }, + { "remove", 0ULL, 0 }, + { "rename", 0ULL, 0 }, + { "link", 0ULL, 0 }, + { "symlink", 0ULL, 0 }, + { "create", 0ULL, 0 }, + { "pathconf", 0ULL, 0 }, + { "statfs", 0ULL, 0 }, + { "readlink", 0ULL, 0 }, + { "readdir", 0ULL, 0 }, + { "server_caps", 0ULL, 0 }, + { "delegreturn", 0ULL, 0 }, + { "getacl", 0ULL, 0 }, + { "setacl", 0ULL, 0 }, + { "fs_locations", 0ULL, 0 }, + { "rel_lkowner", 0ULL, 0 }, + { "secinfo", 0ULL, 0 }, + { "fsid_present", 0ULL, 0 }, + + /* nfsv4.1 client ops */ + { "exchange_id", 0ULL, 0 }, + { "create_session", 0ULL, 0 }, + { "destroy_session", 0ULL, 0 }, + { "sequence", 0ULL, 0 }, + { "get_lease_time", 0ULL, 0 }, + { "reclaim_comp", 0ULL, 0 }, + { "layoutget", 0ULL, 0 }, + { "getdevinfo", 0ULL, 0 }, + { "layoutcommit", 0ULL, 0 }, + { "layoutreturn", 0ULL, 0 }, + { "secinfo_no", 0ULL, 0 }, + { "test_stateid", 0ULL, 0 }, + { "free_stateid", 0ULL, 0 }, + { "getdevicelist", 0ULL, 0 }, + { "bind_conn_to_ses", 0ULL, 0 }, + { "destroy_clientid", 0ULL, 0 }, + + /* nfsv4.2 client ops */ + { "seek", 0ULL, 0 }, + { "allocate", 0ULL, 0 }, + { "deallocate", 0ULL, 0 }, + { "layoutstats", 0ULL, 0 }, + { "clone", 0ULL, 0 }, + + /* termination */ + { "", 0ULL, 0 } +}; + +int do_proc_net_rpc_nfs(int update_every, unsigned long long dt) { + (void)dt; + + static procfile *ff = NULL; + static int do_net = -1, do_rpc = -1, do_proc2 = -1, do_proc3 = -1, do_proc4 = -1; + static int proc2_warning = 0, proc3_warning = 0, proc4_warning = 0; + + if(!ff) { + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s%s", global_host_prefix, "/proc/net/rpc/nfs"); + ff = procfile_open(config_get("plugin:proc:/proc/net/rpc/nfs", "filename to monitor", filename), " \t", PROCFILE_FLAG_DEFAULT); + } + if(!ff) return 1; + + ff = procfile_readall(ff); + if(!ff) return 0; // we return 0, so that we will retry to open it next time + + if(do_net == -1) do_net = config_get_boolean("plugin:proc:/proc/net/rpc/nfs", "network", 1); + if(do_rpc == -1) do_rpc = config_get_boolean("plugin:proc:/proc/net/rpc/nfs", "rpc", 1); + if(do_proc2 == -1) do_proc2 = config_get_boolean("plugin:proc:/proc/net/rpc/nfs", "NFS v2 procedures", 1); + if(do_proc3 == -1) do_proc3 = config_get_boolean("plugin:proc:/proc/net/rpc/nfs", "NFS v3 procedures", 1); + if(do_proc4 == -1) do_proc4 = config_get_boolean("plugin:proc:/proc/net/rpc/nfs", "NFS v4 procedures", 1); + + // if they are enabled, reset them to 1 + // later we do them =2 to avoid doing strcmp for all lines + if(do_net) do_net = 1; + if(do_rpc) do_rpc = 1; + if(do_proc2) do_proc2 = 1; + if(do_proc3) do_proc3 = 1; + if(do_proc4) do_proc4 = 1; + + uint32_t lines = procfile_lines(ff), l; + uint32_t words; + + char *type; + unsigned long long net_count = 0, net_udp_count = 0, net_tcp_count = 0, net_tcp_connections = 0; + unsigned long long rpc_calls = 0, rpc_retransmits = 0, rpc_auth_refresh = 0; + + for(l = 0; l < lines ;l++) { + words = procfile_linewords(ff, l); + if(!words) continue; + + type = procfile_lineword(ff, l, 0); + + if(do_net == 1 && strcmp(type, "net") == 0) { + if(words < 5) { + error("%s line of /proc/net/rpc/nfs has %u words, expected %d", type, words, 5); + continue; + } + + net_count = strtoull(procfile_lineword(ff, l, 1), NULL, 10); + net_udp_count = strtoull(procfile_lineword(ff, l, 2), NULL, 10); + net_tcp_count = strtoull(procfile_lineword(ff, l, 3), NULL, 10); + net_tcp_connections = strtoull(procfile_lineword(ff, l, 4), NULL, 10); + + unsigned long long sum = net_count + net_udp_count + net_tcp_count + net_tcp_connections; + if(sum == 0ULL) do_net = -1; + else do_net = 2; + } + else if(do_rpc == 1 && strcmp(type, "rpc") == 0) { + if(words < 4) { + error("%s line of /proc/net/rpc/nfs has %u words, expected %d", type, words, 6); + continue; + } + + rpc_calls = strtoull(procfile_lineword(ff, l, 1), NULL, 10); + rpc_retransmits = strtoull(procfile_lineword(ff, l, 2), NULL, 10); + rpc_auth_refresh = strtoull(procfile_lineword(ff, l, 3), NULL, 10); + + unsigned long long sum = rpc_calls + rpc_retransmits + rpc_auth_refresh; + if(sum == 0ULL) do_rpc = -1; + else do_rpc = 2; + } + else if(do_proc2 == 1 && strcmp(type, "proc2") == 0) { + // the first number is the count of numbers present + // so we start for word 2 + + unsigned long long sum = 0; + unsigned int i, j; + for(i = 0, j = 2; j < words && nfs_proc2_values[i].name[0] ; i++, j++) { + nfs_proc2_values[i].value = strtoull(procfile_lineword(ff, l, j), NULL, 10); + nfs_proc2_values[i].present = 1; + sum += nfs_proc2_values[i].value; + } + + if(sum == 0ULL) { + if(!proc2_warning) { + error("Disabling /proc/net/rpc/nfs v2 procedure calls chart. It seems unused on this machine. It will be enabled automatically when found with data in it."); + proc2_warning = 1; + } + do_proc2 = 0; + } + else do_proc2 = 2; + } + else if(do_proc3 == 1 && strcmp(type, "proc3") == 0) { + // the first number is the count of numbers present + // so we start for word 2 + + unsigned long long sum = 0; + unsigned int i, j; + for(i = 0, j = 2; j < words && nfs_proc3_values[i].name[0] ; i++, j++) { + nfs_proc3_values[i].value = strtoull(procfile_lineword(ff, l, j), NULL, 10); + nfs_proc3_values[i].present = 1; + sum += nfs_proc3_values[i].value; + } + + if(sum == 0ULL) { + if(!proc3_warning) { + info("Disabling /proc/net/rpc/nfs v3 procedure calls chart. It seems unused on this machine. It will be enabled automatically when found with data in it."); + proc3_warning = 1; + } + do_proc3 = 0; + } + else do_proc3 = 2; + } + else if(do_proc4 == 1 && strcmp(type, "proc4") == 0) { + // the first number is the count of numbers present + // so we start for word 2 + + unsigned long long sum = 0; + unsigned int i, j; + for(i = 0, j = 2; j < words && nfs_proc4_values[i].name[0] ; i++, j++) { + nfs_proc4_values[i].value = strtoull(procfile_lineword(ff, l, j), NULL, 10); + nfs_proc4_values[i].present = 1; + sum += nfs_proc4_values[i].value; + } + + if(sum == 0ULL) { + if(!proc4_warning) { + info("Disabling /proc/net/rpc/nfs v4 procedure calls chart. It seems unused on this machine. It will be enabled automatically when found with data in it."); + proc4_warning = 1; + } + do_proc4 = 0; + } + else do_proc4 = 2; + } + } + + RRDSET *st; + + // -------------------------------------------------------------------- + + if(do_net == 2) { + st = rrdset_find_bytype("nfs", "net"); + if(!st) { + st = rrdset_create("nfs", "net", NULL, "network", NULL, "NFS Client Network", "operations/s", 5007, update_every, RRDSET_TYPE_STACKED); + st->isdetail = 1; + + rrddim_add(st, "udp", NULL, 1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "tcp", NULL, 1, 1, RRDDIM_INCREMENTAL); + } + else rrdset_next(st); + + // ignore net_count, net_tcp_connections + if(net_count) {}; + if(net_tcp_connections) {}; + + rrddim_set(st, "udp", net_udp_count); + rrddim_set(st, "tcp", net_tcp_count); + rrdset_done(st); + } + + // -------------------------------------------------------------------- + + if(do_rpc == 2) { + st = rrdset_find_bytype("nfs", "rpc"); + if(!st) { + st = rrdset_create("nfs", "rpc", NULL, "rpc", NULL, "NFS Client Remote Procedure Calls Statistics", "calls/s", 5008, update_every, RRDSET_TYPE_LINE); + st->isdetail = 1; + + rrddim_add(st, "calls", NULL, 1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "retransmits", NULL, -1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "auth_refresh", NULL, -1, 1, RRDDIM_INCREMENTAL); + } + else rrdset_next(st); + + rrddim_set(st, "calls", rpc_calls); + rrddim_set(st, "retransmits", rpc_retransmits); + rrddim_set(st, "auth_refresh", rpc_auth_refresh); + rrdset_done(st); + } + + // -------------------------------------------------------------------- + + if(do_proc2 == 2) { + unsigned int i; + st = rrdset_find_bytype("nfs", "proc2"); + if(!st) { + st = rrdset_create("nfs", "proc2", NULL, "nfsv2rpc", NULL, "NFS v2 Client Remote Procedure Calls", "calls/s", 5009, update_every, RRDSET_TYPE_STACKED); + + for(i = 0; nfs_proc2_values[i].present ; i++) + rrddim_add(st, nfs_proc2_values[i].name, NULL, 1, 1, RRDDIM_INCREMENTAL); + } + else rrdset_next(st); + + for(i = 0; nfs_proc2_values[i].present ; i++) + rrddim_set(st, nfs_proc2_values[i].name, nfs_proc2_values[i].value); + + rrdset_done(st); + } + + // -------------------------------------------------------------------- + + if(do_proc3 == 2) { + unsigned int i; + st = rrdset_find_bytype("nfs", "proc3"); + if(!st) { + st = rrdset_create("nfs", "proc3", NULL, "nfsv3rpc", NULL, "NFS v3 Client Remote Procedure Calls", "calls/s", 5010, update_every, RRDSET_TYPE_STACKED); + + for(i = 0; nfs_proc3_values[i].present ; i++) + rrddim_add(st, nfs_proc3_values[i].name, NULL, 1, 1, RRDDIM_INCREMENTAL); + } + else rrdset_next(st); + + for(i = 0; nfs_proc3_values[i].present ; i++) + rrddim_set(st, nfs_proc3_values[i].name, nfs_proc3_values[i].value); + + rrdset_done(st); + } + + // -------------------------------------------------------------------- + + if(do_proc4 == 2) { + unsigned int i; + st = rrdset_find_bytype("nfs", "proc4"); + if(!st) { + st = rrdset_create("nfs", "proc4", NULL, "nfsv4rpc", NULL, "NFS v4 Client Remote Procedure Calls", "calls/s", 5011, update_every, RRDSET_TYPE_STACKED); + + for(i = 0; nfs_proc4_values[i].present ; i++) + rrddim_add(st, nfs_proc4_values[i].name, NULL, 1, 1, RRDDIM_INCREMENTAL); + } + else rrdset_next(st); + + for(i = 0; nfs_proc4_values[i].present ; i++) + rrddim_set(st, nfs_proc4_values[i].name, nfs_proc4_values[i].value); + + rrdset_done(st); + } + + return 0; +} diff --git a/src/proc_net_rpc_nfsd.c b/src/proc_net_rpc_nfsd.c index 0323b4df..817e6c86 100644 --- a/src/proc_net_rpc_nfsd.c +++ b/src/proc_net_rpc_nfsd.c @@ -2,47 +2,135 @@ struct nfsd_procs { char name[30]; - unsigned long long proc2; - unsigned long long proc3; - unsigned long long proc4; - int present2; - int present3; - int present4; + unsigned long long value; + int present; }; -struct nfsd_procs nfsd_proc_values[] = { - { "null", 0ULL, 0ULL, 0ULL, 0, 0, 0 }, - { "getattr", 0ULL, 0ULL, 0ULL, 0, 0, 0 }, - { "setattr", 0ULL, 0ULL, 0ULL, 0, 0, 0 }, - { "lookup", 0ULL, 0ULL, 0ULL, 0, 0, 0 }, - { "access", 0ULL, 0ULL, 0ULL, 0, 0, 0 }, - { "readlink", 0ULL, 0ULL, 0ULL, 0, 0, 0 }, - { "read", 0ULL, 0ULL, 0ULL, 0, 0, 0 }, - { "write", 0ULL, 0ULL, 0ULL, 0, 0, 0 }, - { "create", 0ULL, 0ULL, 0ULL, 0, 0, 0 }, - { "mkdir", 0ULL, 0ULL, 0ULL, 0, 0, 0 }, - { "symlink", 0ULL, 0ULL, 0ULL, 0, 0, 0 }, - { "mknod", 0ULL, 0ULL, 0ULL, 0, 0, 0 }, - { "remove", 0ULL, 0ULL, 0ULL, 0, 0, 0 }, - { "rmdir", 0ULL, 0ULL, 0ULL, 0, 0, 0 }, - { "rename", 0ULL, 0ULL, 0ULL, 0, 0, 0 }, - { "link", 0ULL, 0ULL, 0ULL, 0, 0, 0 }, - { "readdir", 0ULL, 0ULL, 0ULL, 0, 0, 0 }, - { "readdirplus", 0ULL, 0ULL, 0ULL, 0, 0, 0 }, - { "fsstat", 0ULL, 0ULL, 0ULL, 0, 0, 0 }, - { "fsinfo", 0ULL, 0ULL, 0ULL, 0, 0, 0 }, - { "pathconf", 0ULL, 0ULL, 0ULL, 0, 0, 0 }, - { "commit", 0ULL, 0ULL, 0ULL, 0, 0, 0 }, - { "", 0ULL, 0ULL, 0ULL, 0, 0, 0 }, +struct nfsd_procs nfsd_proc2_values[] = { + { "null", 0ULL, 0 }, + { "getattr", 0ULL, 0 }, + { "setattr", 0ULL, 0 }, + { "root", 0ULL, 0 }, + { "lookup", 0ULL, 0 }, + { "readlink", 0ULL, 0 }, + { "read", 0ULL, 0 }, + { "wrcache", 0ULL, 0 }, + { "write", 0ULL, 0 }, + { "create", 0ULL, 0 }, + { "remove", 0ULL, 0 }, + { "rename", 0ULL, 0 }, + { "link", 0ULL, 0 }, + { "symlink", 0ULL, 0 }, + { "mkdir", 0ULL, 0 }, + { "rmdir", 0ULL, 0 }, + { "readdir", 0ULL, 0 }, + { "fsstat", 0ULL, 0 }, + + /* termination */ + { "", 0ULL, 0 } }; -struct nfsd4_ops { - char name[30]; - unsigned long long value; - int present; +struct nfsd_procs nfsd_proc3_values[] = { + { "null", 0ULL, 0 }, + { "getattr", 0ULL, 0 }, + { "setattr", 0ULL, 0 }, + { "lookup", 0ULL, 0 }, + { "access", 0ULL, 0 }, + { "readlink", 0ULL, 0 }, + { "read", 0ULL, 0 }, + { "write", 0ULL, 0 }, + { "create", 0ULL, 0 }, + { "mkdir", 0ULL, 0 }, + { "symlink", 0ULL, 0 }, + { "mknod", 0ULL, 0 }, + { "remove", 0ULL, 0 }, + { "rmdir", 0ULL, 0 }, + { "rename", 0ULL, 0 }, + { "link", 0ULL, 0 }, + { "readdir", 0ULL, 0 }, + { "readdirplus", 0ULL, 0 }, + { "fsstat", 0ULL, 0 }, + { "fsinfo", 0ULL, 0 }, + { "pathconf", 0ULL, 0 }, + { "commit", 0ULL, 0 }, + + /* termination */ + { "", 0ULL, 0 } +}; + +struct nfsd_procs nfsd_proc4_values[] = { + { "null", 0ULL, 0 }, + { "read", 0ULL, 0 }, + { "write", 0ULL, 0 }, + { "commit", 0ULL, 0 }, + { "open", 0ULL, 0 }, + { "open_conf", 0ULL, 0 }, + { "open_noat", 0ULL, 0 }, + { "open_dgrd", 0ULL, 0 }, + { "close", 0ULL, 0 }, + { "setattr", 0ULL, 0 }, + { "fsinfo", 0ULL, 0 }, + { "renew", 0ULL, 0 }, + { "setclntid", 0ULL, 0 }, + { "confirm", 0ULL, 0 }, + { "lock", 0ULL, 0 }, + { "lockt", 0ULL, 0 }, + { "locku", 0ULL, 0 }, + { "access", 0ULL, 0 }, + { "getattr", 0ULL, 0 }, + { "lookup", 0ULL, 0 }, + { "lookup_root", 0ULL, 0 }, + { "remove", 0ULL, 0 }, + { "rename", 0ULL, 0 }, + { "link", 0ULL, 0 }, + { "symlink", 0ULL, 0 }, + { "create", 0ULL, 0 }, + { "pathconf", 0ULL, 0 }, + { "statfs", 0ULL, 0 }, + { "readlink", 0ULL, 0 }, + { "readdir", 0ULL, 0 }, + { "server_caps", 0ULL, 0 }, + { "delegreturn", 0ULL, 0 }, + { "getacl", 0ULL, 0 }, + { "setacl", 0ULL, 0 }, + { "fs_locations", 0ULL, 0 }, + { "rel_lkowner", 0ULL, 0 }, + { "secinfo", 0ULL, 0 }, + { "fsid_present", 0ULL, 0 }, + + /* nfsv4.1 client ops */ + { "exchange_id", 0ULL, 0 }, + { "create_session", 0ULL, 0 }, + { "destroy_session", 0ULL, 0 }, + { "sequence", 0ULL, 0 }, + { "get_lease_time", 0ULL, 0 }, + { "reclaim_comp", 0ULL, 0 }, + { "layoutget", 0ULL, 0 }, + { "getdevinfo", 0ULL, 0 }, + { "layoutcommit", 0ULL, 0 }, + { "layoutreturn", 0ULL, 0 }, + { "secinfo_no", 0ULL, 0 }, + { "test_stateid", 0ULL, 0 }, + { "free_stateid", 0ULL, 0 }, + { "getdevicelist", 0ULL, 0 }, + { "bind_conn_to_ses", 0ULL, 0 }, + { "destroy_clientid", 0ULL, 0 }, + + /* nfsv4.2 client ops */ + { "seek", 0ULL, 0 }, + { "allocate", 0ULL, 0 }, + { "deallocate", 0ULL, 0 }, + { "layoutstats", 0ULL, 0 }, + { "clone", 0ULL, 0 }, + + /* termination */ + { "", 0ULL, 0 } }; -struct nfsd4_ops nfsd4_ops_values[] = { +struct nfsd_procs nfsd4_ops_values[] = { + { "unused_op0", 0ULL, 0}, + { "unused_op1", 0ULL, 0}, + { "future_op2", 0ULL, 0}, { "access", 0ULL, 0}, { "close", 0ULL, 0}, { "commit", 0ULL, 0}, @@ -56,7 +144,7 @@ struct nfsd4_ops nfsd4_ops_values[] = { { "lockt", 0ULL, 0}, { "locku", 0ULL, 0}, { "lookup", 0ULL, 0}, - { "lookupp", 0ULL, 0}, + { "lookup_root", 0ULL, 0}, { "nverify", 0ULL, 0}, { "open", 0ULL, 0}, { "openattr", 0ULL, 0}, @@ -107,7 +195,7 @@ struct nfsd4_ops nfsd4_ops_values[] = { { "copy", 0ULL, 0}, { "copy_notify", 0ULL, 0}, { "deallocate", 0ULL, 0}, - { "io_advise", 0ULL, 0}, + { "ioadvise", 0ULL, 0}, { "layouterror", 0ULL, 0}, { "layoutstats", 0ULL, 0}, { "offload_cancel", 0ULL, 0}, @@ -174,13 +262,13 @@ int do_proc_net_rpc_nfsd(int update_every, unsigned long long dt) { unsigned long long th_threads = 0, th_fullcnt = 0, th_hist10 = 0, th_hist20 = 0, th_hist30 = 0, th_hist40 = 0, th_hist50 = 0, th_hist60 = 0, th_hist70 = 0, th_hist80 = 0, th_hist90 = 0, th_hist100 = 0; unsigned long long ra_size = 0, ra_hist10 = 0, ra_hist20 = 0, ra_hist30 = 0, ra_hist40 = 0, ra_hist50 = 0, ra_hist60 = 0, ra_hist70 = 0, ra_hist80 = 0, ra_hist90 = 0, ra_hist100 = 0, ra_none = 0; unsigned long long net_count = 0, net_udp_count = 0, net_tcp_count = 0, net_tcp_connections = 0; - unsigned long long rpc_count = 0, rpc_bad_format = 0, rpc_bad_auth = 0, rpc_bad_client = 0; + unsigned long long rpc_calls = 0, rpc_bad_format = 0, rpc_bad_auth = 0, rpc_bad_client = 0; for(l = 0; l < lines ;l++) { words = procfile_linewords(ff, l); if(!words) continue; - type = procfile_lineword(ff, l, 0); + type = procfile_lineword(ff, l, 0); if(do_rc == 1 && strcmp(type, "rc") == 0) { if(words < 4) { @@ -306,12 +394,12 @@ int do_proc_net_rpc_nfsd(int update_every, unsigned long long dt) { continue; } - rpc_count = strtoull(procfile_lineword(ff, l, 1), NULL, 10); + rpc_calls = strtoull(procfile_lineword(ff, l, 1), NULL, 10); rpc_bad_format = strtoull(procfile_lineword(ff, l, 2), NULL, 10); rpc_bad_auth = strtoull(procfile_lineword(ff, l, 3), NULL, 10); rpc_bad_client = strtoull(procfile_lineword(ff, l, 4), NULL, 10); - unsigned long long sum = rpc_count + rpc_bad_format + rpc_bad_auth + rpc_bad_client; + unsigned long long sum = rpc_calls + rpc_bad_format + rpc_bad_auth + rpc_bad_client; if(sum == 0ULL) do_rpc = -1; else do_rpc = 2; } @@ -321,10 +409,10 @@ int do_proc_net_rpc_nfsd(int update_every, unsigned long long dt) { unsigned long long sum = 0; unsigned int i, j; - for(i = 0, j = 2; j < words && nfsd_proc_values[i].name[0] ; i++, j++) { - nfsd_proc_values[i].proc2 = strtoull(procfile_lineword(ff, l, j), NULL, 10); - nfsd_proc_values[i].present2 = 1; - sum += nfsd_proc_values[i].proc2; + for(i = 0, j = 2; j < words && nfsd_proc2_values[i].name[0] ; i++, j++) { + nfsd_proc2_values[i].value = strtoull(procfile_lineword(ff, l, j), NULL, 10); + nfsd_proc2_values[i].present = 1; + sum += nfsd_proc2_values[i].value; } if(sum == 0ULL) { @@ -342,10 +430,10 @@ int do_proc_net_rpc_nfsd(int update_every, unsigned long long dt) { unsigned long long sum = 0; unsigned int i, j; - for(i = 0, j = 2; j < words && nfsd_proc_values[i].name[0] ; i++, j++) { - nfsd_proc_values[i].proc3 = strtoull(procfile_lineword(ff, l, j), NULL, 10); - nfsd_proc_values[i].present3 = 1; - sum += nfsd_proc_values[i].proc3; + for(i = 0, j = 2; j < words && nfsd_proc3_values[i].name[0] ; i++, j++) { + nfsd_proc3_values[i].value = strtoull(procfile_lineword(ff, l, j), NULL, 10); + nfsd_proc3_values[i].present = 1; + sum += nfsd_proc3_values[i].value; } if(sum == 0ULL) { @@ -363,10 +451,10 @@ int do_proc_net_rpc_nfsd(int update_every, unsigned long long dt) { unsigned long long sum = 0; unsigned int i, j; - for(i = 0, j = 2; j < words && nfsd_proc_values[i].name[0] ; i++, j++) { - nfsd_proc_values[i].proc4 = strtoull(procfile_lineword(ff, l, j), NULL, 10); - nfsd_proc_values[i].present4 = 1; - sum += nfsd_proc_values[i].proc4; + for(i = 0, j = 2; j < words && nfsd_proc4_values[i].name[0] ; i++, j++) { + nfsd_proc4_values[i].value = strtoull(procfile_lineword(ff, l, j), NULL, 10); + nfsd_proc4_values[i].present = 1; + sum += nfsd_proc4_values[i].value; } if(sum == 0ULL) { @@ -408,7 +496,7 @@ int do_proc_net_rpc_nfsd(int update_every, unsigned long long dt) { if(do_rc == 2) { st = rrdset_find_bytype("nfsd", "readcache"); if(!st) { - st = rrdset_create("nfsd", "readcache", NULL, "nfsd", NULL, "Read Cache", "reads/s", 5000, update_every, RRDSET_TYPE_STACKED); + st = rrdset_create("nfsd", "readcache", NULL, "cache", NULL, "NFS Server Read Cache", "reads/s", 5000, update_every, RRDSET_TYPE_STACKED); rrddim_add(st, "hits", NULL, 1, 1, RRDDIM_INCREMENTAL); rrddim_add(st, "misses", NULL, 1, 1, RRDDIM_INCREMENTAL); @@ -427,7 +515,7 @@ int do_proc_net_rpc_nfsd(int update_every, unsigned long long dt) { if(do_fh == 2) { st = rrdset_find_bytype("nfsd", "filehandles"); if(!st) { - st = rrdset_create("nfsd", "filehandles", NULL, "nfsd", NULL, "File Handles", "handles/s", 5001, update_every, RRDSET_TYPE_LINE); + st = rrdset_create("nfsd", "filehandles", NULL, "filehandles", NULL, "NFS Server File Handles", "handles/s", 5001, update_every, RRDSET_TYPE_LINE); st->isdetail = 1; rrddim_add(st, "stale", NULL, 1, 1, RRDDIM_ABSOLUTE); @@ -451,7 +539,7 @@ int do_proc_net_rpc_nfsd(int update_every, unsigned long long dt) { if(do_io == 2) { st = rrdset_find_bytype("nfsd", "io"); if(!st) { - st = rrdset_create("nfsd", "io", NULL, "nfsd", NULL, "I/O", "kilobytes/s", 5002, update_every, RRDSET_TYPE_AREA); + st = rrdset_create("nfsd", "io", NULL, "io", NULL, "NFS Server I/O", "kilobytes/s", 5002, update_every, RRDSET_TYPE_AREA); rrddim_add(st, "read", NULL, 1, 1000, RRDDIM_INCREMENTAL); rrddim_add(st, "write", NULL, -1, 1000, RRDDIM_INCREMENTAL); @@ -468,7 +556,7 @@ int do_proc_net_rpc_nfsd(int update_every, unsigned long long dt) { if(do_th == 2) { st = rrdset_find_bytype("nfsd", "threads"); if(!st) { - st = rrdset_create("nfsd", "threads", NULL, "nfsd", NULL, "Threads", "threads", 5003, update_every, RRDSET_TYPE_LINE); + st = rrdset_create("nfsd", "threads", NULL, "threads", NULL, "NFS Server Threads", "threads", 5003, update_every, RRDSET_TYPE_LINE); rrddim_add(st, "threads", NULL, 1, 1, RRDDIM_ABSOLUTE); } @@ -479,7 +567,7 @@ int do_proc_net_rpc_nfsd(int update_every, unsigned long long dt) { st = rrdset_find_bytype("nfsd", "threads_fullcnt"); if(!st) { - st = rrdset_create("nfsd", "threads_fullcnt", NULL, "nfsd", NULL, "Threads Full Count", "ops/s", 5004, update_every, RRDSET_TYPE_LINE); + st = rrdset_create("nfsd", "threads_fullcnt", NULL, "threads", NULL, "NFS Server Threads Full Count", "ops/s", 5004, update_every, RRDSET_TYPE_LINE); rrddim_add(st, "full_count", NULL, 1, 1, RRDDIM_INCREMENTAL); } @@ -490,7 +578,7 @@ int do_proc_net_rpc_nfsd(int update_every, unsigned long long dt) { st = rrdset_find_bytype("nfsd", "threads_histogram"); if(!st) { - st = rrdset_create("nfsd", "threads_histogram", NULL, "nfsd", NULL, "Threads Usage Histogram", "percentage", 5005, update_every, RRDSET_TYPE_LINE); + st = rrdset_create("nfsd", "threads_histogram", NULL, "threads", NULL, "NFS Server Threads Usage Histogram", "percentage", 5005, update_every, RRDSET_TYPE_LINE); rrddim_add(st, "0%-10%", NULL, 1, 1000, RRDDIM_ABSOLUTE); rrddim_add(st, "10%-20%", NULL, 1, 1000, RRDDIM_ABSOLUTE); @@ -523,7 +611,7 @@ int do_proc_net_rpc_nfsd(int update_every, unsigned long long dt) { if(do_ra == 2) { st = rrdset_find_bytype("nfsd", "readahead"); if(!st) { - st = rrdset_create("nfsd", "readahead", NULL, "nfsd", NULL, "Read Ahead Depth", "percentage", 5005, update_every, RRDSET_TYPE_STACKED); + st = rrdset_create("nfsd", "readahead", NULL, "readahead", NULL, "NFS Server Read Ahead Depth", "percentage", 5005, update_every, RRDSET_TYPE_STACKED); rrddim_add(st, "10%", NULL, 1, 1, RRDDIM_PCENT_OVER_DIFF_TOTAL); rrddim_add(st, "20%", NULL, 1, 1, RRDDIM_PCENT_OVER_DIFF_TOTAL); @@ -561,7 +649,7 @@ int do_proc_net_rpc_nfsd(int update_every, unsigned long long dt) { if(do_net == 2) { st = rrdset_find_bytype("nfsd", "net"); if(!st) { - st = rrdset_create("nfsd", "net", NULL, "nfsd", NULL, "Network Reads", "reads/s", 5007, update_every, RRDSET_TYPE_STACKED); + st = rrdset_create("nfsd", "net", NULL, "network", NULL, "NFS Server Network Statistics", "packets/s", 5007, update_every, RRDSET_TYPE_STACKED); st->isdetail = 1; rrddim_add(st, "udp", NULL, 1, 1, RRDDIM_INCREMENTAL); @@ -583,10 +671,10 @@ int do_proc_net_rpc_nfsd(int update_every, unsigned long long dt) { if(do_rpc == 2) { st = rrdset_find_bytype("nfsd", "rpc"); if(!st) { - st = rrdset_create("nfsd", "rpc", NULL, "nfsd", NULL, "Remote Procedure Calls", "calls/s", 5008, update_every, RRDSET_TYPE_LINE); + st = rrdset_create("nfsd", "rpc", NULL, "rpc", NULL, "NFS Server Remote Procedure Calls Statistics", "calls/s", 5008, update_every, RRDSET_TYPE_LINE); st->isdetail = 1; - rrddim_add(st, "all", NULL, 1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "calls", NULL, 1, 1, RRDDIM_INCREMENTAL); rrddim_add(st, "bad_format", NULL, -1, 1, RRDDIM_INCREMENTAL); rrddim_add(st, "bad_auth", NULL, -1, 1, RRDDIM_INCREMENTAL); } @@ -595,7 +683,7 @@ int do_proc_net_rpc_nfsd(int update_every, unsigned long long dt) { // ignore rpc_bad_client if(rpc_bad_client) {}; - rrddim_set(st, "all", rpc_count); + rrddim_set(st, "calls", rpc_calls); rrddim_set(st, "bad_format", rpc_bad_format); rrddim_set(st, "bad_auth", rpc_bad_auth); rrdset_done(st); @@ -607,15 +695,15 @@ int do_proc_net_rpc_nfsd(int update_every, unsigned long long dt) { unsigned int i; st = rrdset_find_bytype("nfsd", "proc2"); if(!st) { - st = rrdset_create("nfsd", "proc2", NULL, "nfsd", NULL, "NFS v2 Calls", "calls/s", 5009, update_every, RRDSET_TYPE_STACKED); + st = rrdset_create("nfsd", "proc2", NULL, "nfsv2rpc", NULL, "NFS v2 Server Remote Procedure Calls", "calls/s", 5009, update_every, RRDSET_TYPE_STACKED); - for(i = 0; nfsd_proc_values[i].present2 ; i++) - rrddim_add(st, nfsd_proc_values[i].name, NULL, 1, 1, RRDDIM_INCREMENTAL); + for(i = 0; nfsd_proc2_values[i].present ; i++) + rrddim_add(st, nfsd_proc2_values[i].name, NULL, 1, 1, RRDDIM_INCREMENTAL); } else rrdset_next(st); - for(i = 0; nfsd_proc_values[i].present2 ; i++) - rrddim_set(st, nfsd_proc_values[i].name, nfsd_proc_values[i].proc2); + for(i = 0; nfsd_proc2_values[i].present ; i++) + rrddim_set(st, nfsd_proc2_values[i].name, nfsd_proc2_values[i].value); rrdset_done(st); } @@ -626,15 +714,15 @@ int do_proc_net_rpc_nfsd(int update_every, unsigned long long dt) { unsigned int i; st = rrdset_find_bytype("nfsd", "proc3"); if(!st) { - st = rrdset_create("nfsd", "proc3", NULL, "nfsd", NULL, "NFS v3 Calls", "calls/s", 5010, update_every, RRDSET_TYPE_STACKED); + st = rrdset_create("nfsd", "proc3", NULL, "nfsv3rpc", NULL, "NFS v3 Server Remote Procedure Calls", "calls/s", 5010, update_every, RRDSET_TYPE_STACKED); - for(i = 0; nfsd_proc_values[i].present3 ; i++) - rrddim_add(st, nfsd_proc_values[i].name, NULL, 1, 1, RRDDIM_INCREMENTAL); + for(i = 0; nfsd_proc3_values[i].present ; i++) + rrddim_add(st, nfsd_proc3_values[i].name, NULL, 1, 1, RRDDIM_INCREMENTAL); } else rrdset_next(st); - for(i = 0; nfsd_proc_values[i].present3 ; i++) - rrddim_set(st, nfsd_proc_values[i].name, nfsd_proc_values[i].proc3); + for(i = 0; nfsd_proc3_values[i].present ; i++) + rrddim_set(st, nfsd_proc3_values[i].name, nfsd_proc3_values[i].value); rrdset_done(st); } @@ -645,15 +733,15 @@ int do_proc_net_rpc_nfsd(int update_every, unsigned long long dt) { unsigned int i; st = rrdset_find_bytype("nfsd", "proc4"); if(!st) { - st = rrdset_create("nfsd", "proc4", NULL, "nfsd", NULL, "NFS v4 Calls", "calls/s", 5011, update_every, RRDSET_TYPE_STACKED); + st = rrdset_create("nfsd", "proc4", NULL, "nfsv4rpc", NULL, "NFS v4 Server Remote Procedure Calls", "calls/s", 5011, update_every, RRDSET_TYPE_STACKED); - for(i = 0; nfsd_proc_values[i].present4 ; i++) - rrddim_add(st, nfsd_proc_values[i].name, NULL, 1, 1, RRDDIM_INCREMENTAL); + for(i = 0; nfsd_proc4_values[i].present ; i++) + rrddim_add(st, nfsd_proc4_values[i].name, NULL, 1, 1, RRDDIM_INCREMENTAL); } else rrdset_next(st); - for(i = 0; nfsd_proc_values[i].present4 ; i++) - rrddim_set(st, nfsd_proc_values[i].name, nfsd_proc_values[i].proc4); + for(i = 0; nfsd_proc4_values[i].present ; i++) + rrddim_set(st, nfsd_proc4_values[i].name, nfsd_proc4_values[i].value); rrdset_done(st); } @@ -664,7 +752,7 @@ int do_proc_net_rpc_nfsd(int update_every, unsigned long long dt) { unsigned int i; st = rrdset_find_bytype("nfsd", "proc4ops"); if(!st) { - st = rrdset_create("nfsd", "proc4ops", NULL, "nfsd", NULL, "NFS v4 Operations", "operations/s", 5012, update_every, RRDSET_TYPE_STACKED); + st = rrdset_create("nfsd", "proc4ops", NULL, "nfsv2ops", NULL, "NFS v4 Server Operations", "operations/s", 5012, update_every, RRDSET_TYPE_STACKED); for(i = 0; nfsd4_ops_values[i].present ; i++) rrddim_add(st, nfsd4_ops_values[i].name, NULL, 1, 1, RRDDIM_INCREMENTAL); diff --git a/src/proc_net_snmp.c b/src/proc_net_snmp.c index a773f55f..a75c0a96 100644 --- a/src/proc_net_snmp.c +++ b/src/proc_net_snmp.c @@ -3,34 +3,368 @@ #define RRD_TYPE_NET_SNMP "ipv4" #define RRD_TYPE_NET_SNMP_LEN strlen(RRD_TYPE_NET_SNMP) +#define NETSTAT_PRESENT 0x00000001 + +struct netstat_columns { + char *name; + uint32_t hash; + unsigned long long value; + int multiplier; // not needed everywhere + char *label; // not needed everywhere +}; + +static struct netstat_columns ip_data[] = { +// { "Forwarding", 0, 0, 1, NULL }, +// { "DefaultTTL", 0, 0, 1, NULL }, + { "InReceives", 0, 0, 1, NULL }, + { "InHdrErrors", 0, 0, 1, NULL }, + { "InAddrErrors", 0, 0, 1, NULL }, + { "ForwDatagrams", 0, 0, 1, NULL }, + { "InUnknownProtos", 0, 0, 1, NULL }, + { "InDiscards", 0, 0, 1, NULL }, + { "InDelivers", 0, 0, 1, NULL }, + { "OutRequests", 0, 0, 1, NULL }, + { "OutDiscards", 0, 0, 1, NULL }, + { "OutNoRoutes", 0, 0, 1, NULL }, +// { "ReasmTimeout", 0, 0, 1, NULL }, + { "ReasmReqds", 0, 0, 1, NULL }, + { "ReasmOKs", 0, 0, 1, NULL }, + { "ReasmFails", 0, 0, 1, NULL }, + { "FragOKs", 0, 0, 1, NULL }, + { "FragFails", 0, 0, 1, NULL }, + { "FragCreates", 0, 0, 1, NULL }, + { NULL, 0, 0, 0, NULL } +}; + +static struct netstat_columns icmp_data[] = { + { "InMsgs", 0, 0, 1, NULL }, + { "OutMsgs", 0, 0, -1, NULL }, + { "InErrors", 0, 0, 1, NULL }, + { "OutErrors", 0, 0, -1, NULL }, + { "InCsumErrors", 0, 0, 1, NULL }, + + // all these are available in icmpmsg +// { "InDestUnreachs", 0, 0, 1, NULL }, +// { "OutDestUnreachs", 0, 0, -1, NULL }, +// { "InTimeExcds", 0, 0, 1, NULL }, +// { "OutTimeExcds", 0, 0, -1, NULL }, +// { "InParmProbs", 0, 0, 1, NULL }, +// { "OutParmProbs", 0, 0, -1, NULL }, +// { "InSrcQuenchs", 0, 0, 1, NULL }, +// { "OutSrcQuenchs", 0, 0, -1, NULL }, +// { "InRedirects", 0, 0, 1, NULL }, +// { "OutRedirects", 0, 0, -1, NULL }, +// { "InEchos", 0, 0, 1, NULL }, +// { "OutEchos", 0, 0, -1, NULL }, +// { "InEchoReps", 0, 0, 1, NULL }, +// { "OutEchoReps", 0, 0, -1, NULL }, +// { "InTimestamps", 0, 0, 1, NULL }, +// { "OutTimestamps", 0, 0, -1, NULL }, +// { "InTimestampReps", 0, 0, 1, NULL }, +// { "OutTimestampReps", 0, 0, -1, NULL }, +// { "InAddrMasks", 0, 0, 1, NULL }, +// { "OutAddrMasks", 0, 0, -1, NULL }, +// { "InAddrMaskReps", 0, 0, 1, NULL }, +// { "OutAddrMaskReps", 0, 0, -1, NULL }, + + { NULL, 0, 0, 0, NULL } +}; + +static struct netstat_columns icmpmsg_data[] = { + { "InType0", 0, 0, 1, "InEchoReps" }, + { "OutType0", 0, 0, -1, "OutEchoReps" }, +// { "InType1", 0, 0, 1, NULL }, // unassigned +// { "OutType1", 0, 0, -1, NULL }, // unassigned +// { "InType2", 0, 0, 1, NULL }, // unassigned +// { "OutType2", 0, 0, -1, NULL }, // unassigned + { "InType3", 0, 0, 1, "InDestUnreachs" }, + { "OutType3", 0, 0, -1, "OutDestUnreachs" }, +// { "InType4", 0, 0, 1, "InSrcQuenchs" }, // deprecated +// { "OutType4", 0, 0, -1, "OutSrcQuenchs" }, // deprecated + { "InType5", 0, 0, 1, "InRedirects" }, + { "OutType5", 0, 0, -1, "OutRedirects" }, +// { "InType6", 0, 0, 1, "InAlterHostAddr" }, // deprecated +// { "OutType6", 0, 0, -1, "OutAlterHostAddr" }, // deprecated +// { "InType7", 0, 0, 1, NULL }, // unassigned +// { "OutType7", 0, 0, -1, NULL }, // unassigned + { "InType8", 0, 0, 1, "InEchos" }, + { "OutType8", 0, 0, -1, "OutEchos" }, + { "InType9", 0, 0, 1, "InRouterAdvert" }, + { "OutType9", 0, 0, -1, "OutRouterAdvert" }, + { "InType10", 0, 0, 1, "InRouterSelect" }, + { "OutType10", 0, 0, -1, "OutRouterSelect" }, + { "InType11", 0, 0, 1, "InTimeExcds" }, + { "OutType11", 0, 0, -1, "OutTimeExcds" }, + { "InType12", 0, 0, 1, "InParmProbs" }, + { "OutType12", 0, 0, -1, "OutParmProbs" }, + { "InType13", 0, 0, 1, "InTimestamps" }, + { "OutType13", 0, 0, -1, "OutTimestamps" }, + { "InType14", 0, 0, 1, "InTimestampReps" }, + { "OutType14", 0, 0, -1, "OutTimestampReps" }, +// { "InType15", 0, 0, 1, "InInfos" }, // deprecated +// { "OutType15", 0, 0, -1, "OutInfos" }, // deprecated +// { "InType16", 0, 0, 1, "InInfoReps" }, // deprecated +// { "OutType16", 0, 0, -1, "OutInfoReps" }, // deprecated +// { "InType17", 0, 0, 1, "InAddrMasks" }, // deprecated +// { "OutType17", 0, 0, -1, "OutAddrMasks" }, // deprecated +// { "InType18", 0, 0, 1, "InAddrMaskReps" }, // deprecated +// { "OutType18", 0, 0, -1, "OutAddrMaskReps" }, // deprecated +// { "InType30", 0, 0, 1, "InTraceroute" }, // deprecated +// { "OutType30", 0, 0, -1, "OutTraceroute" }, // deprecated + { NULL, 0, 0, 0, NULL } +}; + +static struct netstat_columns tcp_data[] = { +// { "RtoAlgorithm", 0, 0, 1, NULL }, +// { "RtoMin", 0, 0, 1, NULL }, +// { "RtoMax", 0, 0, 1, NULL }, +// { "MaxConn", 0, 0, 1, NULL }, + { "ActiveOpens", 0, 0, 1, NULL }, + { "PassiveOpens", 0, 0, 1, NULL }, + { "AttemptFails", 0, 0, 1, NULL }, + { "EstabResets", 0, 0, 1, NULL }, + { "CurrEstab", 0, 0, 1, NULL }, + { "InSegs", 0, 0, 1, NULL }, + { "OutSegs", 0, 0, 1, NULL }, + { "RetransSegs", 0, 0, 1, NULL }, + { "InErrs", 0, 0, 1, NULL }, + { "OutRsts", 0, 0, 1, NULL }, + { "InCsumErrors", 0, 0, 1, NULL }, + { NULL, 0, 0, 0, NULL } +}; + +static struct netstat_columns udp_data[] = { + { "InDatagrams", 0, 0, 1, NULL }, + { "NoPorts", 0, 0, 1, NULL }, + { "InErrors", 0, 0, 1, NULL }, + { "OutDatagrams", 0, 0, 1, NULL }, + { "RcvbufErrors", 0, 0, 1, NULL }, + { "SndbufErrors", 0, 0, 1, NULL }, + { "InCsumErrors", 0, 0, 1, NULL }, + { "IgnoredMulti", 0, 0, 1, NULL }, + { NULL, 0, 0, 0, NULL } +}; + +static struct netstat_columns udplite_data[] = { + { "InDatagrams", 0, 0, 1, NULL }, + { "NoPorts", 0, 0, 1, NULL }, + { "InErrors", 0, 0, 1, NULL }, + { "OutDatagrams", 0, 0, 1, NULL }, + { "RcvbufErrors", 0, 0, 1, NULL }, + { "SndbufErrors", 0, 0, 1, NULL }, + { "InCsumErrors", 0, 0, 1, NULL }, + { "IgnoredMulti", 0, 0, 1, NULL }, + { NULL, 0, 0, 0, NULL } +}; + +static void hash_array(struct netstat_columns *nc) { + int i; + + for(i = 0; nc[i].name ;i++) + nc[i].hash = simple_hash(nc[i].name); +} + +static unsigned long long *netstat_columns_find(struct netstat_columns *nc, const char *name) { + uint32_t i, hash = simple_hash(name); + + for(i = 0; nc[i].name ;i++) + if(unlikely(nc[i].hash == hash && !strcmp(nc[i].name, name))) + return &nc[i].value; + + fatal("Cannot find key '%s' in /proc/net/snmp internal array.", name); +} + +static void parse_line_pair(procfile *ff, struct netstat_columns *nc, uint32_t header_line, uint32_t values_line) { + uint32_t hwords = procfile_linewords(ff, header_line); + uint32_t vwords = procfile_linewords(ff, values_line); + uint32_t w, i; + + if(unlikely(vwords > hwords)) { + error("File /proc/net/snmp on header line %u has %u words, but on value line %u has %u words.", header_line, hwords, values_line, vwords); + vwords = hwords; + } + + for(w = 1; w < vwords ;w++) { + char *key = procfile_lineword(ff, header_line, w); + uint32_t hash = simple_hash(key); + + for(i = 0 ; nc[i].name ;i++) { + if(unlikely(hash == nc[i].hash && !strcmp(key, nc[i].name))) { + nc[i].value = strtoull(procfile_lineword(ff, values_line, w), NULL, 10); + break; + } + } + } +} + int do_proc_net_snmp(int update_every, unsigned long long dt) { + (void)dt; + static procfile *ff = NULL; static int do_ip_packets = -1, do_ip_fragsout = -1, do_ip_fragsin = -1, do_ip_errors = -1, do_tcp_sockets = -1, do_tcp_packets = -1, do_tcp_errors = -1, do_tcp_handshake = -1, - do_udp_packets = -1, do_udp_errors = -1; - - if(do_ip_packets == -1) do_ip_packets = config_get_boolean("plugin:proc:/proc/net/snmp", "ipv4 packets", 1); - if(do_ip_fragsout == -1) do_ip_fragsout = config_get_boolean("plugin:proc:/proc/net/snmp", "ipv4 fragments sent", 1); - if(do_ip_fragsin == -1) do_ip_fragsin = config_get_boolean("plugin:proc:/proc/net/snmp", "ipv4 fragments assembly", 1); - if(do_ip_errors == -1) do_ip_errors = config_get_boolean("plugin:proc:/proc/net/snmp", "ipv4 errors", 1); - if(do_tcp_sockets == -1) do_tcp_sockets = config_get_boolean("plugin:proc:/proc/net/snmp", "ipv4 TCP connections", 1); - if(do_tcp_packets == -1) do_tcp_packets = config_get_boolean("plugin:proc:/proc/net/snmp", "ipv4 TCP packets", 1); - if(do_tcp_errors == -1) do_tcp_errors = config_get_boolean("plugin:proc:/proc/net/snmp", "ipv4 TCP errors", 1); - if(do_tcp_handshake == -1) do_tcp_handshake = config_get_boolean("plugin:proc:/proc/net/snmp", "ipv4 TCP handshake issues", 1); - if(do_udp_packets == -1) do_udp_packets = config_get_boolean("plugin:proc:/proc/net/snmp", "ipv4 UDP packets", 1); - if(do_udp_errors == -1) do_udp_errors = config_get_boolean("plugin:proc:/proc/net/snmp", "ipv4 UDP errors", 1); - - if(dt) {}; - - if(!ff) { + do_udp_packets = -1, do_udp_errors = -1, do_icmp_packets = -1, do_icmpmsg = -1, do_udplite_packets = -1; + static uint32_t hash_ip = 0, hash_icmp = 0, hash_tcp = 0, hash_udp = 0, hash_icmpmsg = 0, hash_udplite = 0; + + //static unsigned long long *ip_Forwarding = NULL; + //static unsigned long long *ip_DefaultTTL = NULL; + static unsigned long long *ip_InReceives = NULL; + static unsigned long long *ip_InHdrErrors = NULL; + static unsigned long long *ip_InAddrErrors = NULL; + static unsigned long long *ip_ForwDatagrams = NULL; + static unsigned long long *ip_InUnknownProtos = NULL; + static unsigned long long *ip_InDiscards = NULL; + static unsigned long long *ip_InDelivers = NULL; + static unsigned long long *ip_OutRequests = NULL; + static unsigned long long *ip_OutDiscards = NULL; + static unsigned long long *ip_OutNoRoutes = NULL; + //static unsigned long long *ip_ReasmTimeout = NULL; + static unsigned long long *ip_ReasmReqds = NULL; + static unsigned long long *ip_ReasmOKs = NULL; + static unsigned long long *ip_ReasmFails = NULL; + static unsigned long long *ip_FragOKs = NULL; + static unsigned long long *ip_FragFails = NULL; + static unsigned long long *ip_FragCreates = NULL; + + static unsigned long long *icmp_InMsgs = NULL; + static unsigned long long *icmp_OutMsgs = NULL; + static unsigned long long *icmp_InErrors = NULL; + static unsigned long long *icmp_OutErrors = NULL; + static unsigned long long *icmp_InCsumErrors = NULL; + + //static unsigned long long *tcp_RtoAlgorithm = NULL; + //static unsigned long long *tcp_RtoMin = NULL; + //static unsigned long long *tcp_RtoMax = NULL; + //static unsigned long long *tcp_MaxConn = NULL; + static unsigned long long *tcp_ActiveOpens = NULL; + static unsigned long long *tcp_PassiveOpens = NULL; + static unsigned long long *tcp_AttemptFails = NULL; + static unsigned long long *tcp_EstabResets = NULL; + static unsigned long long *tcp_CurrEstab = NULL; + static unsigned long long *tcp_InSegs = NULL; + static unsigned long long *tcp_OutSegs = NULL; + static unsigned long long *tcp_RetransSegs = NULL; + static unsigned long long *tcp_InErrs = NULL; + static unsigned long long *tcp_OutRsts = NULL; + static unsigned long long *tcp_InCsumErrors = NULL; + + static unsigned long long *udp_InDatagrams = NULL; + static unsigned long long *udp_NoPorts = NULL; + static unsigned long long *udp_InErrors = NULL; + static unsigned long long *udp_OutDatagrams = NULL; + static unsigned long long *udp_RcvbufErrors = NULL; + static unsigned long long *udp_SndbufErrors = NULL; + static unsigned long long *udp_InCsumErrors = NULL; + static unsigned long long *udp_IgnoredMulti = NULL; + + static unsigned long long *udplite_InDatagrams = NULL; + static unsigned long long *udplite_NoPorts = NULL; + static unsigned long long *udplite_InErrors = NULL; + static unsigned long long *udplite_OutDatagrams = NULL; + static unsigned long long *udplite_RcvbufErrors = NULL; + static unsigned long long *udplite_SndbufErrors = NULL; + static unsigned long long *udplite_InCsumErrors = NULL; + static unsigned long long *udplite_IgnoredMulti = NULL; + + if(unlikely(do_ip_packets == -1)) { + do_ip_packets = config_get_boolean("plugin:proc:/proc/net/snmp", "ipv4 packets", 1); + do_ip_fragsout = config_get_boolean("plugin:proc:/proc/net/snmp", "ipv4 fragments sent", 1); + do_ip_fragsin = config_get_boolean("plugin:proc:/proc/net/snmp", "ipv4 fragments assembly", 1); + do_ip_errors = config_get_boolean("plugin:proc:/proc/net/snmp", "ipv4 errors", 1); + do_tcp_sockets = config_get_boolean("plugin:proc:/proc/net/snmp", "ipv4 TCP connections", 1); + do_tcp_packets = config_get_boolean("plugin:proc:/proc/net/snmp", "ipv4 TCP packets", 1); + do_tcp_errors = config_get_boolean("plugin:proc:/proc/net/snmp", "ipv4 TCP errors", 1); + do_tcp_handshake = config_get_boolean("plugin:proc:/proc/net/snmp", "ipv4 TCP handshake issues", 1); + do_udp_packets = config_get_boolean("plugin:proc:/proc/net/snmp", "ipv4 UDP packets", 1); + do_udp_errors = config_get_boolean("plugin:proc:/proc/net/snmp", "ipv4 UDP errors", 1); + do_icmp_packets = config_get_boolean("plugin:proc:/proc/net/snmp", "ipv4 ICMP packets", 1); + do_icmpmsg = config_get_boolean("plugin:proc:/proc/net/snmp", "ipv4 ICMP messages", 1); + do_udplite_packets = config_get_boolean("plugin:proc:/proc/net/snmp", "ipv4 UDPLite packets", 1); + + hash_ip = simple_hash("Ip"); + hash_tcp = simple_hash("Tcp"); + hash_udp = simple_hash("Udp"); + hash_icmp = simple_hash("Icmp"); + hash_icmpmsg = simple_hash("IcmpMsg"); + hash_udplite = simple_hash("UdpLite"); + + hash_array(ip_data); + hash_array(tcp_data); + hash_array(udp_data); + hash_array(icmp_data); + hash_array(icmpmsg_data); + hash_array(udplite_data); + + //ip_Forwarding = netstat_columns_find(ip_data, "Forwarding"); + //ip_DefaultTTL = netstat_columns_find(ip_data, "DefaultTTL"); + ip_InReceives = netstat_columns_find(ip_data, "InReceives"); + ip_InHdrErrors = netstat_columns_find(ip_data, "InHdrErrors"); + ip_InAddrErrors = netstat_columns_find(ip_data, "InAddrErrors"); + ip_ForwDatagrams = netstat_columns_find(ip_data, "ForwDatagrams"); + ip_InUnknownProtos = netstat_columns_find(ip_data, "InUnknownProtos"); + ip_InDiscards = netstat_columns_find(ip_data, "InDiscards"); + ip_InDelivers = netstat_columns_find(ip_data, "InDelivers"); + ip_OutRequests = netstat_columns_find(ip_data, "OutRequests"); + ip_OutDiscards = netstat_columns_find(ip_data, "OutDiscards"); + ip_OutNoRoutes = netstat_columns_find(ip_data, "OutNoRoutes"); + //ip_ReasmTimeout = netstat_columns_find(ip_data, "ReasmTimeout"); + ip_ReasmReqds = netstat_columns_find(ip_data, "ReasmReqds"); + ip_ReasmOKs = netstat_columns_find(ip_data, "ReasmOKs"); + ip_ReasmFails = netstat_columns_find(ip_data, "ReasmFails"); + ip_FragOKs = netstat_columns_find(ip_data, "FragOKs"); + ip_FragFails = netstat_columns_find(ip_data, "FragFails"); + ip_FragCreates = netstat_columns_find(ip_data, "FragCreates"); + + icmp_InMsgs = netstat_columns_find(icmp_data, "InMsgs"); + icmp_OutMsgs = netstat_columns_find(icmp_data, "OutMsgs"); + icmp_InErrors = netstat_columns_find(icmp_data, "InErrors"); + icmp_OutErrors = netstat_columns_find(icmp_data, "OutErrors"); + icmp_InCsumErrors = netstat_columns_find(icmp_data, "InCsumErrors"); + + //tcp_RtoAlgorithm = netstat_columns_find(tcp_data, "RtoAlgorithm"); + //tcp_RtoMin = netstat_columns_find(tcp_data, "RtoMin"); + //tcp_RtoMax = netstat_columns_find(tcp_data, "RtoMax"); + //tcp_MaxConn = netstat_columns_find(tcp_data, "MaxConn"); + tcp_ActiveOpens = netstat_columns_find(tcp_data, "ActiveOpens"); + tcp_PassiveOpens = netstat_columns_find(tcp_data, "PassiveOpens"); + tcp_AttemptFails = netstat_columns_find(tcp_data, "AttemptFails"); + tcp_EstabResets = netstat_columns_find(tcp_data, "EstabResets"); + tcp_CurrEstab = netstat_columns_find(tcp_data, "CurrEstab"); + tcp_InSegs = netstat_columns_find(tcp_data, "InSegs"); + tcp_OutSegs = netstat_columns_find(tcp_data, "OutSegs"); + tcp_RetransSegs = netstat_columns_find(tcp_data, "RetransSegs"); + tcp_InErrs = netstat_columns_find(tcp_data, "InErrs"); + tcp_OutRsts = netstat_columns_find(tcp_data, "OutRsts"); + tcp_InCsumErrors = netstat_columns_find(tcp_data, "InCsumErrors"); + + udp_InDatagrams = netstat_columns_find(udp_data, "InDatagrams"); + udp_NoPorts = netstat_columns_find(udp_data, "NoPorts"); + udp_InErrors = netstat_columns_find(udp_data, "InErrors"); + udp_OutDatagrams = netstat_columns_find(udp_data, "OutDatagrams"); + udp_RcvbufErrors = netstat_columns_find(udp_data, "RcvbufErrors"); + udp_SndbufErrors = netstat_columns_find(udp_data, "SndbufErrors"); + udp_InCsumErrors = netstat_columns_find(udp_data, "InCsumErrors"); + udp_IgnoredMulti = netstat_columns_find(udp_data, "IgnoredMulti"); + + udplite_InDatagrams = netstat_columns_find(udplite_data, "InDatagrams"); + udplite_NoPorts = netstat_columns_find(udplite_data, "NoPorts"); + udplite_InErrors = netstat_columns_find(udplite_data, "InErrors"); + udplite_OutDatagrams = netstat_columns_find(udplite_data, "OutDatagrams"); + udplite_RcvbufErrors = netstat_columns_find(udplite_data, "RcvbufErrors"); + udplite_SndbufErrors = netstat_columns_find(udplite_data, "SndbufErrors"); + udplite_InCsumErrors = netstat_columns_find(udplite_data, "InCsumErrors"); + udplite_IgnoredMulti = netstat_columns_find(udplite_data, "IgnoredMulti"); + } + + if(unlikely(!ff)) { char filename[FILENAME_MAX + 1]; snprintfz(filename, FILENAME_MAX, "%s%s", global_host_prefix, "/proc/net/snmp"); ff = procfile_open(config_get("plugin:proc:/proc/net/snmp", "filename to monitor", filename), " \t:", PROCFILE_FLAG_DEFAULT); } - if(!ff) return 1; + if(unlikely(!ff)) return 1; ff = procfile_readall(ff); - if(!ff) return 0; // we return 0, so that we will retry to open it next time + if(unlikely(!ff)) return 0; // we return 0, so that we will retry to open it next time uint32_t lines = procfile_lines(ff), l; uint32_t words; @@ -38,8 +372,11 @@ int do_proc_net_snmp(int update_every, unsigned long long dt) { RRDSET *st; for(l = 0; l < lines ;l++) { - if(strcmp(procfile_lineword(ff, l, 0), "Ip") == 0) { - l++; + char *key = procfile_lineword(ff, l, 0); + uint32_t hash = simple_hash(key); + + if(unlikely(hash == hash_ip && strcmp(key, "Ip") == 0)) { + uint32_t h = l++; if(strcmp(procfile_lineword(ff, l, 0), "Ip") != 0) { error("Cannot read Ip line from /proc/net/snmp."); @@ -47,42 +384,13 @@ int do_proc_net_snmp(int update_every, unsigned long long dt) { } words = procfile_linewords(ff, l); - if(words < 20) { - error("Cannot read /proc/net/snmp Ip line. Expected 20 params, read %u.", words); + if(words < 3) { + error("Cannot read /proc/net/snmp Ip line. Expected 3+ params, read %u.", words); continue; } // see also http://net-snmp.sourceforge.net/docs/mibs/ip.html - unsigned long long Forwarding, DefaultTTL, InReceives, InHdrErrors, InAddrErrors, ForwDatagrams, InUnknownProtos, InDiscards, InDelivers, - OutRequests, OutDiscards, OutNoRoutes, ReasmTimeout, ReasmReqds, ReasmOKs, ReasmFails, FragOKs, FragFails, FragCreates; - - Forwarding = strtoull(procfile_lineword(ff, l, 1), NULL, 10); - DefaultTTL = strtoull(procfile_lineword(ff, l, 2), NULL, 10); - InReceives = strtoull(procfile_lineword(ff, l, 3), NULL, 10); - InHdrErrors = strtoull(procfile_lineword(ff, l, 4), NULL, 10); - InAddrErrors = strtoull(procfile_lineword(ff, l, 5), NULL, 10); - ForwDatagrams = strtoull(procfile_lineword(ff, l, 6), NULL, 10); - InUnknownProtos = strtoull(procfile_lineword(ff, l, 7), NULL, 10); - InDiscards = strtoull(procfile_lineword(ff, l, 8), NULL, 10); - InDelivers = strtoull(procfile_lineword(ff, l, 9), NULL, 10); - OutRequests = strtoull(procfile_lineword(ff, l, 10), NULL, 10); - OutDiscards = strtoull(procfile_lineword(ff, l, 11), NULL, 10); - OutNoRoutes = strtoull(procfile_lineword(ff, l, 12), NULL, 10); - ReasmTimeout = strtoull(procfile_lineword(ff, l, 13), NULL, 10); - ReasmReqds = strtoull(procfile_lineword(ff, l, 14), NULL, 10); - ReasmOKs = strtoull(procfile_lineword(ff, l, 15), NULL, 10); - ReasmFails = strtoull(procfile_lineword(ff, l, 16), NULL, 10); - FragOKs = strtoull(procfile_lineword(ff, l, 17), NULL, 10); - FragFails = strtoull(procfile_lineword(ff, l, 18), NULL, 10); - FragCreates = strtoull(procfile_lineword(ff, l, 19), NULL, 10); - - // these are not counters - if(Forwarding) {}; // is forwarding enabled? - if(DefaultTTL) {}; // the default ttl on packets - if(ReasmTimeout) {}; // Reassembly timeout - - // this counter is not used - if(InDelivers) {}; // total number of packets delivered to IP user-protocols + parse_line_pair(ff, ip_data, h, l); // -------------------------------------------------------------------- @@ -91,15 +399,17 @@ int do_proc_net_snmp(int update_every, unsigned long long dt) { if(!st) { st = rrdset_create(RRD_TYPE_NET_SNMP, "packets", NULL, "packets", NULL, "IPv4 Packets", "packets/s", 3000, update_every, RRDSET_TYPE_LINE); - rrddim_add(st, "received", NULL, 1, 1, RRDDIM_INCREMENTAL); - rrddim_add(st, "sent", NULL, -1, 1, RRDDIM_INCREMENTAL); - rrddim_add(st, "forwarded", NULL, 1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "InReceives", "received", 1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "OutRequests", "sent", -1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "ForwDatagrams", "forwarded", 1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "InDelivers", "delivered", 1, 1, RRDDIM_INCREMENTAL); } else rrdset_next(st); - rrddim_set(st, "sent", OutRequests); - rrddim_set(st, "received", InReceives); - rrddim_set(st, "forwarded", ForwDatagrams); + rrddim_set(st, "OutRequests", *ip_OutRequests); + rrddim_set(st, "InReceives", *ip_InReceives); + rrddim_set(st, "ForwDatagrams", *ip_ForwDatagrams); + rrddim_set(st, "InDelivers", *ip_InDelivers); rrdset_done(st); } @@ -111,15 +421,15 @@ int do_proc_net_snmp(int update_every, unsigned long long dt) { st = rrdset_create(RRD_TYPE_NET_SNMP, "fragsout", NULL, "fragments", NULL, "IPv4 Fragments Sent", "packets/s", 3010, update_every, RRDSET_TYPE_LINE); st->isdetail = 1; - rrddim_add(st, "ok", NULL, 1, 1, RRDDIM_INCREMENTAL); - rrddim_add(st, "failed", NULL, -1, 1, RRDDIM_INCREMENTAL); - rrddim_add(st, "all", NULL, 1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "FragOKs", "ok", 1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "FragFails", "failed", -1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "FragCreates", "created", 1, 1, RRDDIM_INCREMENTAL); } else rrdset_next(st); - rrddim_set(st, "ok", FragOKs); - rrddim_set(st, "failed", FragFails); - rrddim_set(st, "all", FragCreates); + rrddim_set(st, "FragOKs", *ip_FragOKs); + rrddim_set(st, "FragFails", *ip_FragFails); + rrddim_set(st, "FragCreates", *ip_FragCreates); rrdset_done(st); } @@ -131,15 +441,15 @@ int do_proc_net_snmp(int update_every, unsigned long long dt) { st = rrdset_create(RRD_TYPE_NET_SNMP, "fragsin", NULL, "fragments", NULL, "IPv4 Fragments Reassembly", "packets/s", 3011, update_every, RRDSET_TYPE_LINE); st->isdetail = 1; - rrddim_add(st, "ok", NULL, 1, 1, RRDDIM_INCREMENTAL); - rrddim_add(st, "failed", NULL, -1, 1, RRDDIM_INCREMENTAL); - rrddim_add(st, "all", NULL, 1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "ReasmOKs", "ok", 1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "ReasmFails", "failed", -1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "ReasmReqds", "all", 1, 1, RRDDIM_INCREMENTAL); } else rrdset_next(st); - rrddim_set(st, "ok", ReasmOKs); - rrddim_set(st, "failed", ReasmFails); - rrddim_set(st, "all", ReasmReqds); + rrddim_set(st, "ReasmOKs", *ip_ReasmOKs); + rrddim_set(st, "ReasmFails", *ip_ReasmFails); + rrddim_set(st, "ReasmReqds", *ip_ReasmReqds); rrdset_done(st); } @@ -151,28 +461,108 @@ int do_proc_net_snmp(int update_every, unsigned long long dt) { st = rrdset_create(RRD_TYPE_NET_SNMP, "errors", NULL, "errors", NULL, "IPv4 Errors", "packets/s", 3002, update_every, RRDSET_TYPE_LINE); st->isdetail = 1; - rrddim_add(st, "InDiscards", NULL, 1, 1, RRDDIM_INCREMENTAL); - rrddim_add(st, "OutDiscards", NULL, -1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "InDiscards", NULL, 1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "OutDiscards", NULL, -1, 1, RRDDIM_INCREMENTAL); - rrddim_add(st, "InHdrErrors", NULL, 1, 1, RRDDIM_INCREMENTAL); - rrddim_add(st, "InAddrErrors", NULL, 1, 1, RRDDIM_INCREMENTAL); - rrddim_add(st, "InUnknownProtos", NULL, 1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "InHdrErrors", NULL, 1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "OutNoRoutes", NULL, -1, 1, RRDDIM_INCREMENTAL); - rrddim_add(st, "OutNoRoutes", NULL, -1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "InAddrErrors", NULL, 1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "InUnknownProtos", NULL, 1, 1, RRDDIM_INCREMENTAL); } else rrdset_next(st); - rrddim_set(st, "InDiscards", InDiscards); - rrddim_set(st, "OutDiscards", OutDiscards); - rrddim_set(st, "InHdrErrors", InHdrErrors); - rrddim_set(st, "InAddrErrors", InAddrErrors); - rrddim_set(st, "InUnknownProtos", InUnknownProtos); - rrddim_set(st, "OutNoRoutes", OutNoRoutes); + rrddim_set(st, "InDiscards", *ip_InDiscards); + rrddim_set(st, "OutDiscards", *ip_OutDiscards); + rrddim_set(st, "InHdrErrors", *ip_InHdrErrors); + rrddim_set(st, "InAddrErrors", *ip_InAddrErrors); + rrddim_set(st, "InUnknownProtos", *ip_InUnknownProtos); + rrddim_set(st, "OutNoRoutes", *ip_OutNoRoutes); rrdset_done(st); } } - else if(strcmp(procfile_lineword(ff, l, 0), "Tcp") == 0) { - l++; + else if(unlikely(hash == hash_icmp && strcmp(key, "Icmp") == 0)) { + uint32_t h = l++; + + if(strcmp(procfile_lineword(ff, l, 0), "Icmp") != 0) { + error("Cannot read Icmp line from /proc/net/snmp."); + break; + } + + words = procfile_linewords(ff, l); + if(words < 3) { + error("Cannot read /proc/net/snmp Icmp line. Expected 3+ params, read %u.", words); + continue; + } + + parse_line_pair(ff, icmp_data, h, l); + + // -------------------------------------------------------------------- + + if(do_icmp_packets) { + st = rrdset_find(RRD_TYPE_NET_SNMP ".icmp"); + if(!st) { + st = rrdset_create(RRD_TYPE_NET_SNMP, "icmp", NULL, "icmp", NULL, "IPv4 ICMP Packets", "packets/s", 2602, update_every, RRDSET_TYPE_LINE); + + rrddim_add(st, "InMsgs", "received", 1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "OutMsgs", "sent", -1, 1, RRDDIM_INCREMENTAL); + } + else rrdset_next(st); + + rrddim_set(st, "InMsgs", *icmp_InMsgs); + rrddim_set(st, "OutMsgs", *icmp_OutMsgs); + + rrdset_done(st); + + st = rrdset_find(RRD_TYPE_NET_SNMP ".icmp_errors"); + if(!st) { + st = rrdset_create(RRD_TYPE_NET_SNMP, "icmp_errors", NULL, "icmp", NULL, "IPv4 ICMP Errors", "packets/s", 2603, update_every, RRDSET_TYPE_LINE); + + rrddim_add(st, "InErrors", NULL, 1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "OutErrors", NULL, -1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "InCsumErrors", NULL, 1, 1, RRDDIM_INCREMENTAL); + } + else rrdset_next(st); + + rrddim_set(st, "InErrors", *icmp_InErrors); + rrddim_set(st, "OutErrors", *icmp_OutErrors); + rrddim_set(st, "InCsumErrors", *icmp_InCsumErrors); + + rrdset_done(st); + } + } + else if(unlikely(hash == hash_icmpmsg && strcmp(key, "IcmpMsg") == 0)) { + uint32_t h = l++; + + if(strcmp(procfile_lineword(ff, l, 0), "IcmpMsg") != 0) { + error("Cannot read IcmpMsg line from /proc/net/snmp."); + break; + } + + parse_line_pair(ff, icmpmsg_data, h, l); + + // -------------------------------------------------------------------- + + if(do_icmpmsg) { + int i; + + st = rrdset_find(RRD_TYPE_NET_SNMP ".icmpmsg"); + if(!st) { + st = rrdset_create(RRD_TYPE_NET_SNMP, "icmpmsg", NULL, "icmp", NULL, "IPv4 ICMP Messsages", "packets/s", 2604, update_every, RRDSET_TYPE_LINE); + + for(i = 0; icmpmsg_data[i].name ;i++) + rrddim_add(st, icmpmsg_data[i].name, icmpmsg_data[i].label, icmpmsg_data[i].multiplier, 1, RRDDIM_INCREMENTAL); + } + else rrdset_next(st); + + for(i = 0; icmpmsg_data[i].name ;i++) + rrddim_set(st, icmpmsg_data[i].name, icmpmsg_data[i].value); + + rrdset_done(st); + } + } + else if(unlikely(hash == hash_tcp && strcmp(key, "Tcp") == 0)) { + uint32_t h = l++; if(strcmp(procfile_lineword(ff, l, 0), "Tcp") != 0) { error("Cannot read Tcp line from /proc/net/snmp."); @@ -180,34 +570,12 @@ int do_proc_net_snmp(int update_every, unsigned long long dt) { } words = procfile_linewords(ff, l); - if(words < 15) { - error("Cannot read /proc/net/snmp Tcp line. Expected 15 params, read %u.", words); + if(words < 3) { + error("Cannot read /proc/net/snmp Tcp line. Expected 3+ params, read %u.", words); continue; } - unsigned long long RtoAlgorithm, RtoMin, RtoMax, MaxConn, ActiveOpens, PassiveOpens, AttemptFails, EstabResets, - CurrEstab, InSegs, OutSegs, RetransSegs, InErrs, OutRsts; - - RtoAlgorithm = strtoull(procfile_lineword(ff, l, 1), NULL, 10); - RtoMin = strtoull(procfile_lineword(ff, l, 2), NULL, 10); - RtoMax = strtoull(procfile_lineword(ff, l, 3), NULL, 10); - MaxConn = strtoull(procfile_lineword(ff, l, 4), NULL, 10); - ActiveOpens = strtoull(procfile_lineword(ff, l, 5), NULL, 10); - PassiveOpens = strtoull(procfile_lineword(ff, l, 6), NULL, 10); - AttemptFails = strtoull(procfile_lineword(ff, l, 7), NULL, 10); - EstabResets = strtoull(procfile_lineword(ff, l, 8), NULL, 10); - CurrEstab = strtoull(procfile_lineword(ff, l, 9), NULL, 10); - InSegs = strtoull(procfile_lineword(ff, l, 10), NULL, 10); - OutSegs = strtoull(procfile_lineword(ff, l, 11), NULL, 10); - RetransSegs = strtoull(procfile_lineword(ff, l, 12), NULL, 10); - InErrs = strtoull(procfile_lineword(ff, l, 13), NULL, 10); - OutRsts = strtoull(procfile_lineword(ff, l, 14), NULL, 10); - - // these are not counters - if(RtoAlgorithm) {}; - if(RtoMin) {}; - if(RtoMax) {}; - if(MaxConn) {}; + parse_line_pair(ff, tcp_data, h, l); // -------------------------------------------------------------------- @@ -217,11 +585,11 @@ int do_proc_net_snmp(int update_every, unsigned long long dt) { if(!st) { st = rrdset_create(RRD_TYPE_NET_SNMP, "tcpsock", NULL, "tcp", NULL, "IPv4 TCP Connections", "active connections", 2500, update_every, RRDSET_TYPE_LINE); - rrddim_add(st, "connections", NULL, 1, 1, RRDDIM_ABSOLUTE); + rrddim_add(st, "CurrEstab", "connections", 1, 1, RRDDIM_ABSOLUTE); } else rrdset_next(st); - rrddim_set(st, "connections", CurrEstab); + rrddim_set(st, "CurrEstab", *tcp_CurrEstab); rrdset_done(st); } @@ -232,13 +600,13 @@ int do_proc_net_snmp(int update_every, unsigned long long dt) { if(!st) { st = rrdset_create(RRD_TYPE_NET_SNMP, "tcppackets", NULL, "tcp", NULL, "IPv4 TCP Packets", "packets/s", 2600, update_every, RRDSET_TYPE_LINE); - rrddim_add(st, "received", NULL, 1, 1, RRDDIM_INCREMENTAL); - rrddim_add(st, "sent", NULL, -1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "InSegs", "received", 1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "OutSegs", "sent", -1, 1, RRDDIM_INCREMENTAL); } else rrdset_next(st); - rrddim_set(st, "received", InSegs); - rrddim_set(st, "sent", OutSegs); + rrddim_set(st, "InSegs", *tcp_InSegs); + rrddim_set(st, "OutSegs", *tcp_OutSegs); rrdset_done(st); } @@ -250,13 +618,15 @@ int do_proc_net_snmp(int update_every, unsigned long long dt) { st = rrdset_create(RRD_TYPE_NET_SNMP, "tcperrors", NULL, "tcp", NULL, "IPv4 TCP Errors", "packets/s", 2700, update_every, RRDSET_TYPE_LINE); st->isdetail = 1; - rrddim_add(st, "InErrs", NULL, 1, 1, RRDDIM_INCREMENTAL); - rrddim_add(st, "RetransSegs", NULL, -1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "InErrs", NULL, 1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "InCsumErrors", NULL, 1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "RetransSegs", NULL, -1, 1, RRDDIM_INCREMENTAL); } else rrdset_next(st); - rrddim_set(st, "InErrs", InErrs); - rrddim_set(st, "RetransSegs", RetransSegs); + rrddim_set(st, "InErrs", *tcp_InErrs); + rrddim_set(st, "InCsumErrors", *tcp_InCsumErrors); + rrddim_set(st, "RetransSegs", *tcp_RetransSegs); rrdset_done(st); } @@ -268,24 +638,24 @@ int do_proc_net_snmp(int update_every, unsigned long long dt) { st = rrdset_create(RRD_TYPE_NET_SNMP, "tcphandshake", NULL, "tcp", NULL, "IPv4 TCP Handshake Issues", "events/s", 2900, update_every, RRDSET_TYPE_LINE); st->isdetail = 1; - rrddim_add(st, "EstabResets", NULL, 1, 1, RRDDIM_INCREMENTAL); - rrddim_add(st, "OutRsts", NULL, -1, 1, RRDDIM_INCREMENTAL); - rrddim_add(st, "ActiveOpens", NULL, 1, 1, RRDDIM_INCREMENTAL); - rrddim_add(st, "PassiveOpens", NULL, 1, 1, RRDDIM_INCREMENTAL); - rrddim_add(st, "AttemptFails", NULL, 1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "EstabResets", NULL, 1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "OutRsts", NULL, -1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "ActiveOpens", NULL, 1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "PassiveOpens", NULL, 1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "AttemptFails", NULL, 1, 1, RRDDIM_INCREMENTAL); } else rrdset_next(st); - rrddim_set(st, "EstabResets", EstabResets); - rrddim_set(st, "OutRsts", OutRsts); - rrddim_set(st, "ActiveOpens", ActiveOpens); - rrddim_set(st, "PassiveOpens", PassiveOpens); - rrddim_set(st, "AttemptFails", AttemptFails); + rrddim_set(st, "EstabResets", *tcp_EstabResets); + rrddim_set(st, "OutRsts", *tcp_OutRsts); + rrddim_set(st, "ActiveOpens", *tcp_ActiveOpens); + rrddim_set(st, "PassiveOpens", *tcp_PassiveOpens); + rrddim_set(st, "AttemptFails", *tcp_AttemptFails); rrdset_done(st); } } - else if(strcmp(procfile_lineword(ff, l, 0), "Udp") == 0) { - l++; + else if(unlikely(hash == hash_udp && strcmp(key, "Udp") == 0)) { + uint32_t h = l++; if(strcmp(procfile_lineword(ff, l, 0), "Udp") != 0) { error("Cannot read Udp line from /proc/net/snmp."); @@ -293,19 +663,12 @@ int do_proc_net_snmp(int update_every, unsigned long long dt) { } words = procfile_linewords(ff, l); - if(words < 7) { - error("Cannot read /proc/net/snmp Udp line. Expected 7 params, read %u.", words); + if(words < 3) { + error("Cannot read /proc/net/snmp Udp line. Expected 3+ params, read %u.", words); continue; } - unsigned long long InDatagrams, NoPorts, InErrors, OutDatagrams, RcvbufErrors, SndbufErrors; - - InDatagrams = strtoull(procfile_lineword(ff, l, 1), NULL, 10); - NoPorts = strtoull(procfile_lineword(ff, l, 2), NULL, 10); - InErrors = strtoull(procfile_lineword(ff, l, 3), NULL, 10); - OutDatagrams = strtoull(procfile_lineword(ff, l, 4), NULL, 10); - RcvbufErrors = strtoull(procfile_lineword(ff, l, 5), NULL, 10); - SndbufErrors = strtoull(procfile_lineword(ff, l, 6), NULL, 10); + parse_line_pair(ff, udp_data, h, l); // -------------------------------------------------------------------- @@ -315,13 +678,13 @@ int do_proc_net_snmp(int update_every, unsigned long long dt) { if(!st) { st = rrdset_create(RRD_TYPE_NET_SNMP, "udppackets", NULL, "udp", NULL, "IPv4 UDP Packets", "packets/s", 2601, update_every, RRDSET_TYPE_LINE); - rrddim_add(st, "received", NULL, 1, 1, RRDDIM_INCREMENTAL); - rrddim_add(st, "sent", NULL, -1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "InDatagrams", "received", 1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "OutDatagrams", "sent", -1, 1, RRDDIM_INCREMENTAL); } else rrdset_next(st); - rrddim_set(st, "received", InDatagrams); - rrddim_set(st, "sent", OutDatagrams); + rrddim_set(st, "InDatagrams", *udp_InDatagrams); + rrddim_set(st, "OutDatagrams", *udp_OutDatagrams); rrdset_done(st); } @@ -333,17 +696,75 @@ int do_proc_net_snmp(int update_every, unsigned long long dt) { st = rrdset_create(RRD_TYPE_NET_SNMP, "udperrors", NULL, "udp", NULL, "IPv4 UDP Errors", "events/s", 2701, update_every, RRDSET_TYPE_LINE); st->isdetail = 1; - rrddim_add(st, "RcvbufErrors", NULL, 1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "RcvbufErrors", NULL, 1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "SndbufErrors", NULL, -1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "InErrors", NULL, 1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "NoPorts", NULL, 1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "InCsumErrors", NULL, 1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "IgnoredMulti", NULL, 1, 1, RRDDIM_INCREMENTAL); + } + else rrdset_next(st); + + rrddim_set(st, "InErrors", *udp_InErrors); + rrddim_set(st, "NoPorts", *udp_NoPorts); + rrddim_set(st, "RcvbufErrors", *udp_RcvbufErrors); + rrddim_set(st, "SndbufErrors", *udp_SndbufErrors); + rrddim_set(st, "InCsumErrors", *udp_InCsumErrors); + rrddim_set(st, "IgnoredMulti", *udp_IgnoredMulti); + rrdset_done(st); + } + } + else if(unlikely(hash == hash_udplite && strcmp(key, "UdpLite") == 0)) { + uint32_t h = l++; + + if(strcmp(procfile_lineword(ff, l, 0), "UdpLite") != 0) { + error("Cannot read UdpLite line from /proc/net/snmp."); + break; + } + + words = procfile_linewords(ff, l); + if(words < 3) { + error("Cannot read /proc/net/snmp UdpLite line. Expected 3+ params, read %u.", words); + continue; + } + + parse_line_pair(ff, udplite_data, h, l); + + // -------------------------------------------------------------------- + + if(do_udplite_packets) { + st = rrdset_find(RRD_TYPE_NET_SNMP ".udplite"); + if(!st) { + st = rrdset_create(RRD_TYPE_NET_SNMP, "udplite", NULL, "udplite", NULL, "IPv4 UDPLite Packets", "packets/s", 2603, update_every, RRDSET_TYPE_LINE); + + rrddim_add(st, "InDatagrams", "received", 1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "OutDatagrams", "sent", -1, 1, RRDDIM_INCREMENTAL); + } + else rrdset_next(st); + + rrddim_set(st, "InDatagrams", *udplite_InDatagrams); + rrddim_set(st, "OutDatagrams", *udplite_OutDatagrams); + rrdset_done(st); + + st = rrdset_find(RRD_TYPE_NET_SNMP ".udplite_errors"); + if(!st) { + st = rrdset_create(RRD_TYPE_NET_SNMP, "udplite_errors", NULL, "udplite", NULL, "IPv4 UDPLite Errors", "packets/s", 2604, update_every, RRDSET_TYPE_LINE); + + rrddim_add(st, "RcvbufErrors", NULL, 1, 1, RRDDIM_INCREMENTAL); rrddim_add(st, "SndbufErrors", NULL, -1, 1, RRDDIM_INCREMENTAL); - rrddim_add(st, "InErrors", NULL, 1, 1, RRDDIM_INCREMENTAL); - rrddim_add(st, "NoPorts", NULL, 1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "NoPorts", NULL, 1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "IgnoredMulti", NULL, 1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "InErrors", NULL, 1, 1, RRDDIM_INCREMENTAL); + rrddim_add(st, "InCsumErrors", NULL, 1, 1, RRDDIM_INCREMENTAL); } else rrdset_next(st); - rrddim_set(st, "InErrors", InErrors); - rrddim_set(st, "NoPorts", NoPorts); - rrddim_set(st, "RcvbufErrors", RcvbufErrors); - rrddim_set(st, "SndbufErrors", SndbufErrors); + rrddim_set(st, "NoPorts", *udplite_NoPorts); + rrddim_set(st, "InErrors", *udplite_InErrors); + rrddim_set(st, "InCsumErrors", *udplite_InCsumErrors); + rrddim_set(st, "RcvbufErrors", *udplite_RcvbufErrors); + rrddim_set(st, "SndbufErrors", *udplite_SndbufErrors); + rrddim_set(st, "IgnoredMulti", *udplite_IgnoredMulti); rrdset_done(st); } } diff --git a/src/proc_net_softnet_stat.c b/src/proc_net_softnet_stat.c new file mode 100644 index 00000000..b0131586 --- /dev/null +++ b/src/proc_net_softnet_stat.c @@ -0,0 +1,119 @@ +#include "common.h" + +static inline char *softnet_column_name(uint32_t column) { + switch(column) { + // https://github.com/torvalds/linux/blob/a7fd20d1c476af4563e66865213474a2f9f473a4/net/core/net-procfs.c#L161-L166 + case 0: return "processed"; + case 1: return "dropped"; + case 2: return "squeezed"; + case 9: return "received_rps"; + case 10: return "flow_limit_count"; + default: return NULL; + } +} + +int do_proc_net_softnet_stat(int update_every, unsigned long long dt) { + (void)dt; + + static procfile *ff = NULL; + static int do_per_core = -1; + static uint32_t allocated_lines = 0, allocated_columns = 0, *data = NULL; + + if(do_per_core == -1) do_per_core = config_get_boolean("plugin:proc:/proc/net/softnet_stat", "softnet_stat per core", 1); + + if(!ff) { + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s%s", global_host_prefix, "/proc/net/softnet_stat"); + ff = procfile_open(config_get("plugin:proc:/proc/net/softnet_stat", "filename to monitor", filename), " \t", PROCFILE_FLAG_DEFAULT); + } + if(!ff) return 1; + + ff = procfile_readall(ff); + if(!ff) return 0; // we return 0, so that we will retry to open it next time + + uint32_t lines = procfile_lines(ff), l; + uint32_t words = procfile_linewords(ff, 0), w; + + if(!lines || !words) { + error("Cannot read /proc/net/softnet_stat, %u lines and %u columns reported.", lines, words); + return 1; + } + + if(lines > 200) lines = 200; + if(words > 50) words = 50; + + if(unlikely(!data || lines > allocated_lines || words > allocated_columns)) { + freez(data); + allocated_lines = lines; + allocated_columns = words; + data = mallocz((allocated_lines + 1) * allocated_columns * sizeof(uint32_t)); + } + + // initialize to zero + memset(data, 0, (allocated_lines + 1) * allocated_columns * sizeof(uint32_t)); + + // parse the values + for(l = 0; l < lines ;l++) { + words = procfile_linewords(ff, l); + if(!words) continue; + + if(words > allocated_columns) words = allocated_columns; + + for(w = 0; w < words ; w++) { + if(unlikely(softnet_column_name(w))) { + uint32_t t = strtoul(procfile_lineword(ff, l, w), NULL, 16); + data[w] += t; + data[((l + 1) * allocated_columns) + w] = t; + } + } + } + + if(data[(lines * allocated_columns)] == 0) + lines--; + + RRDSET *st; + + // -------------------------------------------------------------------- + + st = rrdset_find_bytype("system", "softnet_stat"); + if(!st) { + st = rrdset_create("system", "softnet_stat", NULL, "softnet_stat", NULL, "System softnet_stat", "events/s", 955, update_every, RRDSET_TYPE_LINE); + for(w = 0; w < allocated_columns ;w++) + if(unlikely(softnet_column_name(w))) + rrddim_add(st, softnet_column_name(w), NULL, 1, 1, RRDDIM_INCREMENTAL); + } + else rrdset_next(st); + + for(w = 0; w < allocated_columns ;w++) + if(unlikely(softnet_column_name(w))) + rrddim_set(st, softnet_column_name(w), data[w]); + + rrdset_done(st); + + if(do_per_core) { + for(l = 0; l < lines ;l++) { + char id[50+1]; + snprintfz(id, 50, "cpu%u_softnet_stat", l); + + st = rrdset_find_bytype("cpu", id); + if(!st) { + char title[100+1]; + snprintfz(title, 100, "CPU%u softnet_stat", l); + + st = rrdset_create("cpu", id, NULL, "softnet_stat", NULL, title, "events/s", 4101 + l, update_every, RRDSET_TYPE_LINE); + for(w = 0; w < allocated_columns ;w++) + if(unlikely(softnet_column_name(w))) + rrddim_add(st, softnet_column_name(w), NULL, 1, 1, RRDDIM_INCREMENTAL); + } + else rrdset_next(st); + + for(w = 0; w < allocated_columns ;w++) + if(unlikely(softnet_column_name(w))) + rrddim_set(st, softnet_column_name(w), data[((l + 1) * allocated_columns) + w]); + + rrdset_done(st); + } + } + + return 0; +} diff --git a/src/proc_net_stat_conntrack.c b/src/proc_net_stat_conntrack.c index 8234b20d..54e250bf 100644 --- a/src/proc_net_stat_conntrack.c +++ b/src/proc_net_stat_conntrack.c @@ -88,7 +88,7 @@ int do_proc_net_stat_conntrack(int update_every, unsigned long long dt) { if(do_sockets) { st = rrdset_find(RRD_TYPE_NET_STAT_NETFILTER "." RRD_TYPE_NET_STAT_CONNTRACK "_sockets"); if(!st) { - st = rrdset_create(RRD_TYPE_NET_STAT_NETFILTER, RRD_TYPE_NET_STAT_CONNTRACK "_sockets", NULL, RRD_TYPE_NET_STAT_CONNTRACK, NULL, "Connection Tracker Connections", "active connections", 1000, update_every, RRDSET_TYPE_LINE); + st = rrdset_create(RRD_TYPE_NET_STAT_NETFILTER, RRD_TYPE_NET_STAT_CONNTRACK "_sockets", NULL, RRD_TYPE_NET_STAT_CONNTRACK, NULL, "Connection Tracker Connections", "active connections", 3000, update_every, RRDSET_TYPE_LINE); rrddim_add(st, "connections", NULL, 1, 1, RRDDIM_ABSOLUTE); } @@ -103,7 +103,7 @@ int do_proc_net_stat_conntrack(int update_every, unsigned long long dt) { if(do_new) { st = rrdset_find(RRD_TYPE_NET_STAT_NETFILTER "." RRD_TYPE_NET_STAT_CONNTRACK "_new"); if(!st) { - st = rrdset_create(RRD_TYPE_NET_STAT_NETFILTER, RRD_TYPE_NET_STAT_CONNTRACK "_new", NULL, RRD_TYPE_NET_STAT_CONNTRACK, NULL, "Connection Tracker New Connections", "connections/s", 1001, update_every, RRDSET_TYPE_LINE); + st = rrdset_create(RRD_TYPE_NET_STAT_NETFILTER, RRD_TYPE_NET_STAT_CONNTRACK "_new", NULL, RRD_TYPE_NET_STAT_CONNTRACK, NULL, "Connection Tracker New Connections", "connections/s", 3001, update_every, RRDSET_TYPE_LINE); rrddim_add(st, "new", NULL, 1, 1, RRDDIM_INCREMENTAL); rrddim_add(st, "ignore", NULL, -1, 1, RRDDIM_INCREMENTAL); @@ -122,7 +122,7 @@ int do_proc_net_stat_conntrack(int update_every, unsigned long long dt) { if(do_changes) { st = rrdset_find(RRD_TYPE_NET_STAT_NETFILTER "." RRD_TYPE_NET_STAT_CONNTRACK "_changes"); if(!st) { - st = rrdset_create(RRD_TYPE_NET_STAT_NETFILTER, RRD_TYPE_NET_STAT_CONNTRACK "_changes", NULL, RRD_TYPE_NET_STAT_CONNTRACK, NULL, "Connection Tracker Changes", "changes/s", 1002, update_every, RRDSET_TYPE_LINE); + st = rrdset_create(RRD_TYPE_NET_STAT_NETFILTER, RRD_TYPE_NET_STAT_CONNTRACK "_changes", NULL, RRD_TYPE_NET_STAT_CONNTRACK, NULL, "Connection Tracker Changes", "changes/s", 3002, update_every, RRDSET_TYPE_LINE); st->isdetail = 1; rrddim_add(st, "inserted", NULL, 1, 1, RRDDIM_INCREMENTAL); @@ -142,7 +142,7 @@ int do_proc_net_stat_conntrack(int update_every, unsigned long long dt) { if(do_expect) { st = rrdset_find(RRD_TYPE_NET_STAT_NETFILTER "." RRD_TYPE_NET_STAT_CONNTRACK "_expect"); if(!st) { - st = rrdset_create(RRD_TYPE_NET_STAT_NETFILTER, RRD_TYPE_NET_STAT_CONNTRACK "_expect", NULL, RRD_TYPE_NET_STAT_CONNTRACK, NULL, "Connection Tracker Expectations", "expectations/s", 1003, update_every, RRDSET_TYPE_LINE); + st = rrdset_create(RRD_TYPE_NET_STAT_NETFILTER, RRD_TYPE_NET_STAT_CONNTRACK "_expect", NULL, RRD_TYPE_NET_STAT_CONNTRACK, NULL, "Connection Tracker Expectations", "expectations/s", 3003, update_every, RRDSET_TYPE_LINE); st->isdetail = 1; rrddim_add(st, "created", NULL, 1, 1, RRDDIM_INCREMENTAL); @@ -162,7 +162,7 @@ int do_proc_net_stat_conntrack(int update_every, unsigned long long dt) { if(do_search) { st = rrdset_find(RRD_TYPE_NET_STAT_NETFILTER "." RRD_TYPE_NET_STAT_CONNTRACK "_search"); if(!st) { - st = rrdset_create(RRD_TYPE_NET_STAT_NETFILTER, RRD_TYPE_NET_STAT_CONNTRACK "_search", NULL, RRD_TYPE_NET_STAT_CONNTRACK, NULL, "Connection Tracker Searches", "searches/s", 1010, update_every, RRDSET_TYPE_LINE); + st = rrdset_create(RRD_TYPE_NET_STAT_NETFILTER, RRD_TYPE_NET_STAT_CONNTRACK "_search", NULL, RRD_TYPE_NET_STAT_CONNTRACK, NULL, "Connection Tracker Searches", "searches/s", 3010, update_every, RRDSET_TYPE_LINE); st->isdetail = 1; rrddim_add(st, "searched", NULL, 1, 1, RRDDIM_INCREMENTAL); @@ -182,7 +182,7 @@ int do_proc_net_stat_conntrack(int update_every, unsigned long long dt) { if(do_errors) { st = rrdset_find(RRD_TYPE_NET_STAT_NETFILTER "." RRD_TYPE_NET_STAT_CONNTRACK "_errors"); if(!st) { - st = rrdset_create(RRD_TYPE_NET_STAT_NETFILTER, RRD_TYPE_NET_STAT_CONNTRACK "_errors", NULL, RRD_TYPE_NET_STAT_CONNTRACK, NULL, "Connection Tracker Errors", "events/s", 1005, update_every, RRDSET_TYPE_LINE); + st = rrdset_create(RRD_TYPE_NET_STAT_NETFILTER, RRD_TYPE_NET_STAT_CONNTRACK "_errors", NULL, RRD_TYPE_NET_STAT_CONNTRACK, NULL, "Connection Tracker Errors", "events/s", 3005, update_every, RRDSET_TYPE_LINE); st->isdetail = 1; rrddim_add(st, "icmp_error", NULL, 1, 1, RRDDIM_INCREMENTAL); diff --git a/src/proc_net_stat_synproxy.c b/src/proc_net_stat_synproxy.c index 758c35de..102805f7 100644 --- a/src/proc_net_stat_synproxy.c +++ b/src/proc_net_stat_synproxy.c @@ -58,7 +58,7 @@ int do_proc_net_stat_synproxy(int update_every, unsigned long long dt) { st = rrdset_find(RRD_TYPE_NET_STAT_NETFILTER "." RRD_TYPE_NET_STAT_SYNPROXY "_entries"); if(!st) { - st = rrdset_create(RRD_TYPE_NET_STAT_NETFILTER, RRD_TYPE_NET_STAT_SYNPROXY "_entries", NULL, RRD_TYPE_NET_STAT_SYNPROXY, NULL, "SYNPROXY Entries Used", "entries", 1004, update_every, RRDSET_TYPE_LINE); + st = rrdset_create(RRD_TYPE_NET_STAT_NETFILTER, RRD_TYPE_NET_STAT_SYNPROXY "_entries", NULL, RRD_TYPE_NET_STAT_SYNPROXY, NULL, "SYNPROXY Entries Used", "entries", 3304, update_every, RRDSET_TYPE_LINE); rrddim_add(st, "entries", NULL, 1, 1, RRDDIM_ABSOLUTE); } @@ -75,7 +75,7 @@ int do_proc_net_stat_synproxy(int update_every, unsigned long long dt) { st = rrdset_find(RRD_TYPE_NET_STAT_NETFILTER "." RRD_TYPE_NET_STAT_SYNPROXY "_syn_received"); if(!st) { - st = rrdset_create(RRD_TYPE_NET_STAT_NETFILTER, RRD_TYPE_NET_STAT_SYNPROXY "_syn_received", NULL, RRD_TYPE_NET_STAT_SYNPROXY, NULL, "SYNPROXY SYN Packets received", "SYN/s", 1001, update_every, RRDSET_TYPE_LINE); + st = rrdset_create(RRD_TYPE_NET_STAT_NETFILTER, RRD_TYPE_NET_STAT_SYNPROXY "_syn_received", NULL, RRD_TYPE_NET_STAT_SYNPROXY, NULL, "SYNPROXY SYN Packets received", "SYN/s", 3301, update_every, RRDSET_TYPE_LINE); rrddim_add(st, "received", NULL, 1, 1, RRDDIM_INCREMENTAL); } @@ -92,7 +92,7 @@ int do_proc_net_stat_synproxy(int update_every, unsigned long long dt) { st = rrdset_find(RRD_TYPE_NET_STAT_NETFILTER "." RRD_TYPE_NET_STAT_SYNPROXY "_conn_reopened"); if(!st) { - st = rrdset_create(RRD_TYPE_NET_STAT_NETFILTER, RRD_TYPE_NET_STAT_SYNPROXY "_conn_reopened", NULL, RRD_TYPE_NET_STAT_SYNPROXY, NULL, "SYNPROXY Connections Reopened", "connections/s", 1003, update_every, RRDSET_TYPE_LINE); + st = rrdset_create(RRD_TYPE_NET_STAT_NETFILTER, RRD_TYPE_NET_STAT_SYNPROXY "_conn_reopened", NULL, RRD_TYPE_NET_STAT_SYNPROXY, NULL, "SYNPROXY Connections Reopened", "connections/s", 3303, update_every, RRDSET_TYPE_LINE); rrddim_add(st, "reopened", NULL, 1, 1, RRDDIM_INCREMENTAL); } @@ -109,7 +109,7 @@ int do_proc_net_stat_synproxy(int update_every, unsigned long long dt) { st = rrdset_find(RRD_TYPE_NET_STAT_NETFILTER "." RRD_TYPE_NET_STAT_SYNPROXY "_cookies"); if(!st) { - st = rrdset_create(RRD_TYPE_NET_STAT_NETFILTER, RRD_TYPE_NET_STAT_SYNPROXY "_cookies", NULL, RRD_TYPE_NET_STAT_SYNPROXY, NULL, "SYNPROXY TCP Cookies", "cookies/s", 1002, update_every, RRDSET_TYPE_LINE); + st = rrdset_create(RRD_TYPE_NET_STAT_NETFILTER, RRD_TYPE_NET_STAT_SYNPROXY "_cookies", NULL, RRD_TYPE_NET_STAT_SYNPROXY, NULL, "SYNPROXY TCP Cookies", "cookies/s", 3302, update_every, RRDSET_TYPE_LINE); rrddim_add(st, "valid", NULL, 1, 1, RRDDIM_INCREMENTAL); rrddim_add(st, "invalid", NULL, -1, 1, RRDDIM_INCREMENTAL); diff --git a/src/proc_softirqs.c b/src/proc_softirqs.c index a5165040..ebbbf2ae 100644 --- a/src/proc_softirqs.c +++ b/src/proc_softirqs.c @@ -21,8 +21,7 @@ static inline struct interrupt *get_interrupts_array(int lines, int cpus) { static struct interrupt *irrs = NULL; static int allocated = 0; - if(lines < allocated) return irrs; - else { + if(lines > allocated) { irrs = (struct interrupt *)reallocz(irrs, lines * recordsize(cpus)); allocated = lines; } @@ -134,8 +133,8 @@ int do_proc_softirqs(int update_every, unsigned long long dt) { int c; for(c = 0; c < cpus ; c++) { - char id[256+1]; - snprintfz(id, 256, "cpu%d_softirqs", c); + char id[50+1]; + snprintfz(id, 50, "cpu%d_softirqs", c); st = rrdset_find_bytype("cpu", id); if(!st) { @@ -148,10 +147,9 @@ int do_proc_softirqs(int update_every, unsigned long long dt) { } if(core_sum == 0) continue; // try next core - char name[256+1], title[256+1]; - snprintfz(name, 256, "cpu%d_softirqs", c); - snprintfz(title, 256, "CPU%d softirqs", c); - st = rrdset_create("cpu", id, name, "softirqs", "cpu.softirqs", title, "softirqs/s", 3000 + c, update_every, RRDSET_TYPE_STACKED); + char title[100+1]; + snprintfz(title, 100, "CPU%d softirqs", c); + st = rrdset_create("cpu", id, NULL, "softirqs", "cpu.softirqs", title, "softirqs/s", 3000 + c, update_every, RRDSET_TYPE_STACKED); for(l = 0; l < lines ;l++) { struct interrupt *irr = irrindex(irrs, l, cpus); diff --git a/src/proc_vmstat.c b/src/proc_vmstat.c index f25d50c5..5f4e5aad 100644 --- a/src/proc_vmstat.c +++ b/src/proc_vmstat.c @@ -193,11 +193,11 @@ int do_proc_vmstat(int update_every, unsigned long long dt) { // hash_unevictable_pgs_stranded = simple_hash("unevictable_pgs_stranded"); } - if(do_swapio == -1) do_swapio = config_get_boolean("plugin:proc:/proc/vmstat", "swap i/o", 1); - if(do_io == -1) do_io = config_get_boolean("plugin:proc:/proc/vmstat", "disk i/o", 1); - if(do_pgfaults == -1) do_pgfaults = config_get_boolean("plugin:proc:/proc/vmstat", "memory page faults", 1); + if(do_swapio == -1) do_swapio = config_get_boolean_ondemand("plugin:proc:/proc/vmstat", "swap i/o", CONFIG_ONDEMAND_ONDEMAND); + if(do_io == -1) do_io = config_get_boolean("plugin:proc:/proc/vmstat", "disk i/o", 1); + if(do_pgfaults == -1) do_pgfaults = config_get_boolean("plugin:proc:/proc/vmstat", "memory page faults", 1); - if(dt) {}; + (void)dt; if(!ff) { char filename[FILENAME_MAX + 1]; @@ -415,7 +415,9 @@ int do_proc_vmstat(int update_every, unsigned long long dt) { // -------------------------------------------------------------------- - if(do_swapio) { + if(pswpin || pswpout || do_swapio == CONFIG_ONDEMAND_YES) { + do_swapio = CONFIG_ONDEMAND_YES; + static RRDSET *st_swapio = NULL; if(!st_swapio) { st_swapio = rrdset_create("system", "swapio", NULL, "swap", NULL, "Swap I/O", "kilobytes/s", 250, update_every, RRDSET_TYPE_AREA); diff --git a/src/registry.c b/src/registry.c index f2319c47..a0fb629a 100644 --- a/src/registry.c +++ b/src/registry.c @@ -27,8 +27,15 @@ // - [DONE] limit the size of PERSON_URL names // - limit the number of requests that add data to the registry, // per client IP per hour - - +// +// 3. lower memory requirements +// +// - embed avl structures directly into registry objects, instead of DICTIONARY +// - store GUIDs in memory as UUID instead of char * +// (this will also remove the index hash, since UUIDs can be compared directly) +// - do not track persons using the demo machines only +// (i.e. start tracking them only when they access a non-demo machine) +// - [DONE] do not track custom dashboards by default #define REGISTRY_URL_FLAGS_DEFAULT 0x00 #define REGISTRY_URL_FLAGS_EXPIRED 0x01 @@ -328,10 +335,8 @@ static inline URL *registry_url_allocate_nolock(const char *url, size_t urllen) return u; } -static inline URL *registry_url_get(const char *url, size_t urllen) { - debug(D_REGISTRY, "Registry: registry_url_get('%s')", url); - - registry_urls_lock(); +static inline URL *registry_url_get_nolock(const char *url, size_t urllen) { + debug(D_REGISTRY, "Registry: registry_url_get_nolock('%s')", url); URL *u = dictionary_get(registry.urls, url); if(!u) { @@ -339,6 +344,16 @@ static inline URL *registry_url_get(const char *url, size_t urllen) { registry.urls_count++; } + return u; +} + +static inline URL *registry_url_get(const char *url, size_t urllen) { + debug(D_REGISTRY, "Registry: registry_url_get('%s')", url); + + registry_urls_lock(); + + URL *u = registry_url_get_nolock(url, urllen); + registry_urls_unlock(); return u; @@ -555,7 +570,6 @@ static inline PERSON *registry_person_get(const char *person_guid, time_t when) else { person_guid = buf; p = registry_person_find(person_guid); - if(!p) person_guid = NULL; } } @@ -782,6 +796,7 @@ int registry_log_load(void) { else registry_request_delete(p->guid, machine_guid, url, name, when); + registry.log_count++; break; default: @@ -789,7 +804,7 @@ int registry_log_load(void) { break; } } - + fclose(fp); } @@ -955,7 +970,7 @@ MACHINE *registry_request_machine(char *person_guid, char *machine_guid, char *u // make sure the machine exists m = registry_machine_find(request_machine); if(!m) { - info("Registry Machine URLs request: machine not found, person: '%s', machine '%s', url '%s', request machine '%s'", p->guid, m->guid, pu->url->url, request_machine); + info("Registry Machine URLs request: machine not found, person: '%s', machine '%s', url '%s', request machine '%s'", p->guid, machine_guid, pu->url->url, request_machine); return NULL; } @@ -1227,6 +1242,21 @@ int registry_request_switch_json(struct web_client *w, char *person_guid, char * // ---------------------------------------------------------------------------- // REGISTRY THIS MACHINE UNIQUE ID +static inline int is_machine_guid_blacklisted(const char *guid) { + // these are machine GUIDs that have been included in distribution packages. + // we blacklist them here, so that the next version of netdata will generate + // new ones. + + if(!strcmp(guid, "8a795b0c-2311-11e6-8563-000c295076a6") + || !strcmp(guid, "4aed1458-1c3e-11e6-a53f-000c290fc8f5") + ) { + error("Blacklisted machine GUID '%s' found.", guid); + return 1; + } + + return 0; +} + char *registry_get_this_machine_guid(void) { if(likely(registry.machine_guid[0])) return registry.machine_guid; @@ -1245,6 +1275,8 @@ char *registry_get_this_machine_guid(void) { registry.machine_guid[0] = '\0'; } + else if(is_machine_guid_blacklisted(registry.machine_guid)) + registry.machine_guid[0] = '\0'; } close(fd); } @@ -1376,6 +1408,8 @@ int registry_save(void) { return -2; } + error_log_limit_unlimited(); + char tmp_filename[FILENAME_MAX + 1]; char old_filename[FILENAME_MAX + 1]; @@ -1387,6 +1421,7 @@ int registry_save(void) { if(!fp) { error("Registry: Cannot create file: %s", tmp_filename); registry_log_unlock(); + error_log_limit_reset(); return -1; } @@ -1398,6 +1433,7 @@ int registry_save(void) { error("Registry: Cannot save registry machines - return value %d", bytes1); fclose(fp); registry_log_unlock(); + error_log_limit_reset(); return bytes1; } debug(D_REGISTRY, "Registry: saving machines took %d bytes", bytes1); @@ -1408,6 +1444,7 @@ int registry_save(void) { error("Registry: Cannot save registry persons - return value %d", bytes2); fclose(fp); registry_log_unlock(); + error_log_limit_reset(); return bytes2; } debug(D_REGISTRY, "Registry: saving persons took %d bytes", bytes2); @@ -1461,13 +1498,13 @@ int registry_save(void) { // it has been moved successfully // discard the current registry log registry_log_recreate_nolock(); - registry.log_count = 0; } } // continue operations registry_log_unlock(); + error_log_limit_reset(); return -1; } @@ -1559,7 +1596,8 @@ static inline size_t registry_load(void) { } *url++ = '\0'; - u = registry_url_allocate_nolock(url, strlen(url)); + // u = registry_url_allocate_nolock(url, strlen(url)); + u = registry_url_get_nolock(url, strlen(url)); time_t first_t = strtoul(&s[2], NULL, 16); @@ -1586,7 +1624,8 @@ static inline size_t registry_load(void) { } s[1] = s[10] = s[19] = s[28] = s[31] = '\0'; - u = registry_url_allocate_nolock(&s[32], strlen(&s[32])); + // u = registry_url_allocate_nolock(&s[32], strlen(&s[32])); + u = registry_url_get_nolock(&s[32], strlen(&s[32])); MACHINE_URL *mu = registry_machine_url_allocate(m, u, strtoul(&s[2], NULL, 16)); mu->last_t = strtoul(&s[11], NULL, 16); @@ -1635,7 +1674,7 @@ int registry_init(void) { registry.persons_expiration = config_get_number("registry", "registry expire idle persons days", 365) * 86400; registry.registry_domain = config_get("registry", "registry domain", ""); registry.registry_to_announce = config_get("registry", "registry to announce", "https://registry.my-netdata.io"); - registry.hostname = config_get("registry", "registry hostname", config_get("global", "hostname", hostname)); + registry.hostname = config_get("registry", "registry hostname", config_get("global", "hostname", localhost.hostname)); registry.verify_cookies_redirects = config_get_boolean("registry", "verify browser cookies support", 1); setenv("NETDATA_REGISTRY_HOSTNAME", registry.hostname, 1); @@ -1685,6 +1724,9 @@ int registry_init(void) { registry_log_open_nolock(); registry_load(); registry_log_load(); + + if(unlikely(registry_should_save_db())) + registry_save(); } return 0; @@ -5,10 +5,12 @@ // ---------------------------------------------------------------------------- // globals +/* // if not zero it gives the time (in seconds) to remove un-updated dimensions // DO NOT ENABLE // if dimensions are removed, the chart generation will have to run again int rrd_delete_unupdated_dimensions = 0; +*/ int rrd_update_every = UPDATE_EVERY; int rrd_default_history_entries = RRD_DEFAULT_HISTORY_ENTRIES; @@ -42,7 +44,8 @@ RRDHOST localhost = { AVL_LOCK_INITIALIZER }, .health_log = { - .nextid = 1, + .next_log_id = 1, + .next_alarm_id = 1, .count = 0, .max = 1000, .alarms = NULL, @@ -50,6 +53,12 @@ RRDHOST localhost = { } }; +void rrdhost_init(char *hostname) { + localhost.hostname = hostname; + localhost.health_log.next_log_id = + localhost.health_log.next_alarm_id = time(NULL); +} + void rrdhost_rwlock(RRDHOST *host) { pthread_rwlock_wrlock(&host->rrdset_root_rwlock); } @@ -413,8 +422,29 @@ void rrdset_reset(RRDSET *st) rd->last_collected_time.tv_sec = 0; rd->last_collected_time.tv_usec = 0; rd->counter = 0; - bzero(rd->values, rd->entries * sizeof(storage_number)); + memset(rd->values, 0, rd->entries * sizeof(storage_number)); + } +} +static long align_entries_to_pagesize(long entries) { + if(entries < 5) entries = 5; + if(entries > RRD_HISTORY_ENTRIES_MAX) entries = RRD_HISTORY_ENTRIES_MAX; + +#ifdef NETDATA_LOG_ALLOCATIONS + long page = (size_t)sysconf(_SC_PAGESIZE); + + long size = sizeof(RRDDIM) + entries * sizeof(storage_number); + if(size % page) { + size -= (size % page); + size += page; + + long n = (size - sizeof(RRDDIM)) / sizeof(storage_number); + return n; } + + return entries; +#else + return entries; +#endif } RRDSET *rrdset_create(const char *type, const char *id, const char *name, const char *family, const char *context, const char *title, const char *units, long priority, int update_every, int chart_type) @@ -441,9 +471,9 @@ RRDSET *rrdset_create(const char *type, const char *id, const char *name, const return st; } - long entries = config_get_number(fullid, "history", rrd_default_history_entries); - if(entries < 5) entries = config_set_number(fullid, "history", 5); - if(entries > RRD_HISTORY_ENTRIES_MAX) entries = config_set_number(fullid, "history", RRD_HISTORY_ENTRIES_MAX); + long rentries = config_get_number(fullid, "history", rrd_default_history_entries); + long entries = align_entries_to_pagesize(rentries); + if(entries != rentries) entries = config_set_number(fullid, "history", entries); int enabled = config_get_boolean(fullid, "enabled", 1); if(!enabled) entries = 5; @@ -459,29 +489,29 @@ RRDSET *rrdset_create(const char *type, const char *id, const char *name, const if(strcmp(st->magic, RRDSET_MAGIC) != 0) { errno = 0; info("Initializing file %s.", fullfilename); - bzero(st, size); + memset(st, 0, size); } else if(strcmp(st->id, fullid) != 0) { errno = 0; error("File %s contents are not for chart %s. Clearing it.", fullfilename, fullid); // munmap(st, size); // st = NULL; - bzero(st, size); + memset(st, 0, size); } else if(st->memsize != size || st->entries != entries) { errno = 0; error("File %s does not have the desired size. Clearing it.", fullfilename); - bzero(st, size); + memset(st, 0, size); } else if(st->update_every != update_every) { errno = 0; error("File %s does not have the desired update frequency. Clearing it.", fullfilename); - bzero(st, size); + memset(st, 0, size); } else if((time(NULL) - st->last_updated.tv_sec) > update_every * entries) { errno = 0; error("File %s is too old. Clearing it.", fullfilename); - bzero(st, size); + memset(st, 0, size); } } @@ -496,6 +526,7 @@ RRDSET *rrdset_create(const char *type, const char *id, const char *name, const st->next = NULL; st->mapped = rrd_memory_mode; st->variables = NULL; + st->alarms = NULL; } else { st = callocz(1, size); @@ -606,44 +637,44 @@ RRDDIM *rrddim_add(RRDSET *st, const char *id, const char *name, long multiplier if(strcmp(rd->magic, RRDDIMENSION_MAGIC) != 0) { errno = 0; info("Initializing file %s.", fullfilename); - bzero(rd, size); + memset(rd, 0, size); } else if(rd->memsize != size) { errno = 0; error("File %s does not have the desired size. Clearing it.", fullfilename); - bzero(rd, size); + memset(rd, 0, size); } else if(rd->multiplier != multiplier) { errno = 0; error("File %s does not have the same multiplier. Clearing it.", fullfilename); - bzero(rd, size); + memset(rd, 0, size); } else if(rd->divisor != divisor) { errno = 0; error("File %s does not have the same divisor. Clearing it.", fullfilename); - bzero(rd, size); + memset(rd, 0, size); } else if(rd->algorithm != algorithm) { errno = 0; error("File %s does not have the same algorithm. Clearing it.", fullfilename); - bzero(rd, size); + memset(rd, 0, size); } else if(rd->update_every != st->update_every) { errno = 0; error("File %s does not have the same refresh frequency. Clearing it.", fullfilename); - bzero(rd, size); + memset(rd, 0, size); } else if(usec_dt(&now, &rd->last_collected_time) > (rd->entries * rd->update_every * 1000000ULL)) { errno = 0; error("File %s is too old. Clearing it.", fullfilename); - bzero(rd, size); + memset(rd, 0, size); } else if(strcmp(rd->id, id) != 0) { errno = 0; error("File %s contents are not for dimension %s. Clearing it.", fullfilename, id); // munmap(rd, size); // rd = NULL; - bzero(rd, size); + memset(rd, 0, size); } } @@ -927,6 +958,8 @@ collected_number rrddim_set_by_pointer(RRDSET *st, RRDDIM *rd, collected_number rd->updated = 1; rd->counter++; + // fprintf(stderr, "%s.%s %llu " COLLECTED_NUMBER_FORMAT " dt %0.6f" " rate " CALCULATED_NUMBER_FORMAT "\n", st->name, rd->name, st->usec_since_last_update, value, (float)((double)st->usec_since_last_update / (double)1000000), (calculated_number)((value - rd->last_collected_value) * (calculated_number)rd->multiplier / (calculated_number)rd->divisor * 1000000.0 / (calculated_number)st->usec_since_last_update)); + return rd->last_collected_value; } @@ -978,25 +1011,42 @@ unsigned long long rrdset_done(RRDSET *st) debug(D_RRD_CALLS, "rrdset_done() for chart %s", st->name); - RRDDIM *rd, *last; - int oldstate, store_this_entry = 1, first_entry = 0; - unsigned long long last_ut, now_ut, next_ut, stored_entries = 0; + RRDDIM *rd; + + int + pthreadoldcancelstate; // store the old cancelable pthread state, to restore it at the end + + char + store_this_entry = 1, // boolean: 1 = store this entry, 0 = don't store this entry + first_entry = 0; // boolean: 1 = this is the first entry seen for this chart, 0 = all other entries + + unsigned int + stored_entries = 0; // the number of entries we have stored in the db, during this call to rrdset_done() + + unsigned long long + last_collect_ut, // the timestamp in microseconds, of the last collected value + now_collect_ut, // the timestamp in microseconds, of this collected value (this is NOW) + last_stored_ut, // the timestamp in microseconds, of the last stored entry in the db + next_store_ut, // the timestamp in microseconds, of the next entry to store in the db + update_every_ut = st->update_every * 1000000ULL; // st->update_every in microseconds - if(unlikely(pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &oldstate) != 0)) + if(unlikely(pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &pthreadoldcancelstate) != 0)) error("Cannot set pthread cancel state to DISABLE."); // a read lock is OK here pthread_rwlock_rdlock(&st->rwlock); +/* // enable the chart, if it was disabled if(unlikely(rrd_delete_unupdated_dimensions) && !st->enabled) st->enabled = 1; +*/ // check if the chart has a long time to be updated - if(unlikely(st->usec_since_last_update > st->entries * st->update_every * 1000000ULL)) { + if(unlikely(st->usec_since_last_update > st->entries * update_every_ut)) { info("%s: took too long to be updated (%0.3Lf secs). Reseting it.", st->name, (long double)(st->usec_since_last_update / 1000000.0)); rrdset_reset(st); - st->usec_since_last_update = st->update_every * 1000000ULL; + st->usec_since_last_update = update_every_ut; first_entry = 1; } if(unlikely(st->debug)) debug(D_RRD_STATS, "%s: microseconds since last update: %llu", st->name, st->usec_since_last_update); @@ -1006,6 +1056,7 @@ unsigned long long rrdset_done(RRDSET *st) // it is the first entry // set the last_collected_time to now gettimeofday(&st->last_collected_time, NULL); + last_collect_ut = st->last_collected_time.tv_sec * 1000000ULL + st->last_collected_time.tv_usec - update_every_ut; // the first entry should not be stored store_this_entry = 0; @@ -1016,7 +1067,8 @@ unsigned long long rrdset_done(RRDSET *st) else { // it is not the first entry // calculate the proper last_collected_time, using usec_since_last_update - unsigned long long ut = st->last_collected_time.tv_sec * 1000000ULL + st->last_collected_time.tv_usec + st->usec_since_last_update; + last_collect_ut = st->last_collected_time.tv_sec * 1000000ULL + st->last_collected_time.tv_usec; + unsigned long long ut = last_collect_ut + st->usec_since_last_update; st->last_collected_time.tv_sec = (time_t) (ut / 1000000ULL); st->last_collected_time.tv_usec = (suseconds_t) (ut % 1000000ULL); } @@ -1038,11 +1090,11 @@ unsigned long long rrdset_done(RRDSET *st) } // check if we will re-write the entire data set - if(unlikely(usec_dt(&st->last_collected_time, &st->last_updated) > st->update_every * st->entries * 1000000ULL)) { + if(unlikely(usec_dt(&st->last_collected_time, &st->last_updated) > st->entries * update_every_ut)) { info("%s: too old data (last updated at %ld.%ld, last collected at %ld.%ld). Reseting it. Will not store the next entry.", st->name, st->last_updated.tv_sec, st->last_updated.tv_usec, st->last_collected_time.tv_sec, st->last_collected_time.tv_usec); rrdset_reset(st); - st->usec_since_last_update = st->update_every * 1000000ULL; + st->usec_since_last_update = update_every_ut; gettimeofday(&st->last_collected_time, NULL); @@ -1056,21 +1108,18 @@ unsigned long long rrdset_done(RRDSET *st) } // these are the 3 variables that will help us in interpolation - // last_ut = the last time we added a value to the storage - // now_ut = the time the current value is taken at - // next_ut = the time of the next interpolation point - last_ut = st->last_updated.tv_sec * 1000000ULL + st->last_updated.tv_usec; - now_ut = st->last_collected_time.tv_sec * 1000000ULL + st->last_collected_time.tv_usec; - next_ut = (st->last_updated.tv_sec + st->update_every) * 1000000ULL; - - if(unlikely(!first_entry && now_ut < next_ut)) { - if(unlikely(st->debug)) debug(D_RRD_STATS, "%s: THIS IS IN THE SAME INTERPOLATION POINT", st->name); - } + // last_stored_ut = the last time we added a value to the storage + // now_collect_ut = the time the current value has been collected + // next_store_ut = the time of the next interpolation point + last_stored_ut = st->last_updated.tv_sec * 1000000ULL + st->last_updated.tv_usec; + now_collect_ut = st->last_collected_time.tv_sec * 1000000ULL + st->last_collected_time.tv_usec; + next_store_ut = (st->last_updated.tv_sec + st->update_every) * 1000000ULL; if(unlikely(st->debug)) { - debug(D_RRD_STATS, "%s: last ut = %0.3Lf (last updated time)", st->name, (long double)last_ut/1000000.0); - debug(D_RRD_STATS, "%s: now ut = %0.3Lf (current update time)", st->name, (long double)now_ut/1000000.0); - debug(D_RRD_STATS, "%s: next ut = %0.3Lf (next interpolation point)", st->name, (long double)next_ut/1000000.0); + debug(D_RRD_STATS, "%s: last_collect_ut = %0.3Lf (last collection time)", st->name, (long double)last_collect_ut/1000000.0); + debug(D_RRD_STATS, "%s: now_collect_ut = %0.3Lf (current collection time)", st->name, (long double)now_collect_ut/1000000.0); + debug(D_RRD_STATS, "%s: last_stored_ut = %0.3Lf (last updated time)", st->name, (long double)last_stored_ut/1000000.0); + debug(D_RRD_STATS, "%s: next_store_ut = %0.3Lf (next interpolation point)", st->name, (long double)next_store_ut/1000000.0); } if(unlikely(!st->counter_done)) { @@ -1082,7 +1131,7 @@ unsigned long long rrdset_done(RRDSET *st) // calculate totals and count the dimensions int dimensions; st->collected_total = 0; - for( rd = st->dimensions, dimensions = 0 ; likely(rd) ; rd = rd->next, dimensions++ ) + for( rd = st->dimensions, dimensions = 0 ; rd ; rd = rd->next, dimensions++ ) if(likely(rd->updated)) st->collected_total += rd->collected_value; uint32_t storage_flags = SN_EXISTS; @@ -1090,7 +1139,7 @@ unsigned long long rrdset_done(RRDSET *st) // process all dimensions to calculate their values // based on the collected figures only // at this stage we do not interpolate anything - for( rd = st->dimensions ; likely(rd) ; rd = rd->next ) { + for( rd = st->dimensions ; rd ; rd = rd->next ) { if(unlikely(!rd->updated)) { rd->calculated_value = 0; @@ -1169,7 +1218,7 @@ unsigned long long rrdset_done(RRDSET *st) rd->last_collected_value = rd->collected_value; } - rd->calculated_value = + rd->calculated_value += (calculated_number)(rd->collected_value - rd->last_collected_value) * (calculated_number)rd->multiplier / (calculated_number)rd->divisor; @@ -1259,21 +1308,26 @@ unsigned long long rrdset_done(RRDSET *st) // at this point we have all the calculated values ready // it is now time to interpolate values on a second boundary - unsigned long long first_ut = last_ut; - long long iterations = (now_ut - last_ut) / (st->update_every * 1000000ULL); - if((now_ut % (st->update_every * 1000000ULL)) == 0) iterations++; + if(unlikely(now_collect_ut < next_store_ut)) { + // this is collected in the same interpolation point + if(unlikely(st->debug)) debug(D_RRD_STATS, "%s: THIS IS IN THE SAME INTERPOLATION POINT", st->name); + } - for( ; likely(next_ut <= now_ut) ; next_ut += st->update_every * 1000000ULL, iterations-- ) { + unsigned long long first_ut = last_stored_ut; + long long iterations = (now_collect_ut - last_stored_ut) / (update_every_ut); + if((now_collect_ut % (update_every_ut)) == 0) iterations++; + + for( ; next_store_ut <= now_collect_ut ; last_collect_ut = next_store_ut, next_store_ut += update_every_ut, iterations-- ) { #ifdef NETDATA_INTERNAL_CHECKS - if(iterations < 0) { error("%s: iterations calculation wrapped! first_ut = %llu, last_ut = %llu, next_ut = %llu, now_ut = %llu", st->name, first_ut, last_ut, next_ut, now_ut); } + if(iterations < 0) { error("%s: iterations calculation wrapped! first_ut = %llu, last_stored_ut = %llu, next_store_ut = %llu, now_collect_ut = %llu", st->name, first_ut, last_stored_ut, next_store_ut, now_collect_ut); } #endif if(unlikely(st->debug)) { - debug(D_RRD_STATS, "%s: last ut = %0.3Lf (last updated time)", st->name, (long double)last_ut/1000000.0); - debug(D_RRD_STATS, "%s: next ut = %0.3Lf (next interpolation point)", st->name, (long double)next_ut/1000000.0); + debug(D_RRD_STATS, "%s: last_stored_ut = %0.3Lf (last updated time)", st->name, (long double)last_stored_ut/1000000.0); + debug(D_RRD_STATS, "%s: next_store_ut = %0.3Lf (next interpolation point)", st->name, (long double)next_store_ut/1000000.0); } - st->last_updated.tv_sec = (time_t) (next_ut / 1000000ULL); + st->last_updated.tv_sec = (time_t) (next_store_ut / 1000000ULL); st->last_updated.tv_usec = 0; for( rd = st->dimensions ; likely(rd) ; rd = rd->next ) { @@ -1283,8 +1337,8 @@ unsigned long long rrdset_done(RRDSET *st) case RRDDIM_INCREMENTAL: new_value = (calculated_number) ( rd->calculated_value - * (calculated_number)(next_ut - last_ut) - / (calculated_number)(now_ut - last_ut) + * (calculated_number)(next_store_ut - last_collect_ut) + / (calculated_number)(now_collect_ut - last_collect_ut) ); if(unlikely(st->debug)) @@ -1296,14 +1350,23 @@ unsigned long long rrdset_done(RRDSET *st) , st->id, rd->name , new_value , rd->calculated_value - , (next_ut - last_ut) - , (now_ut - last_ut) + , (next_store_ut - last_stored_ut) + , (now_collect_ut - last_stored_ut) ); rd->calculated_value -= new_value; new_value += rd->last_calculated_value; rd->last_calculated_value = 0; new_value /= (calculated_number)st->update_every; + + if(unlikely(next_store_ut - last_stored_ut < update_every_ut)) { + if(unlikely(st->debug)) + debug(D_RRD_STATS, "%s/%s: COLLECTION POINT IS SHORT " CALCULATED_NUMBER_FORMAT " - EXTRAPOLATING", + st->id, rd->name + , (calculated_number)(next_store_ut - last_stored_ut) + ); + new_value = new_value * (calculated_number)(st->update_every * 1000000) / (calculated_number)(next_store_ut - last_stored_ut); + } break; case RRDDIM_ABSOLUTE: @@ -1323,8 +1386,8 @@ unsigned long long rrdset_done(RRDSET *st) new_value = (calculated_number) ( ( (rd->calculated_value - rd->last_calculated_value) - * (calculated_number)(next_ut - first_ut) - / (calculated_number)(now_ut - first_ut) + * (calculated_number)(next_store_ut - last_collect_ut) + / (calculated_number)(now_collect_ut - last_collect_ut) ) + rd->last_calculated_value ); @@ -1338,20 +1401,15 @@ unsigned long long rrdset_done(RRDSET *st) , st->id, rd->name , new_value , rd->calculated_value, rd->last_calculated_value - , (next_ut - first_ut) - , (now_ut - first_ut), rd->last_calculated_value + , (next_store_ut - first_ut) + , (now_collect_ut - first_ut), rd->last_calculated_value ); - - // this is wrong - // it fades the value towards the target - // while we know the calculated value is different - // if(likely(next_ut + st->update_every * 1000000ULL > now_ut)) rd->calculated_value = new_value; } break; } if(unlikely(!store_this_entry)) { - // store_this_entry = 1; + rd->values[st->current_entry] = pack_storage_number(0, SN_NOT_EXISTS); continue; } @@ -1411,25 +1469,34 @@ unsigned long long rrdset_done(RRDSET *st) st->counter++; st->current_entry = ((st->current_entry + 1) >= st->entries) ? 0 : st->current_entry + 1; - last_ut = next_ut; + last_stored_ut = next_store_ut; } - // align next interpolation to last collection point - if(likely(stored_entries || !store_this_entry)) { - st->last_updated.tv_sec = st->last_collected_time.tv_sec; - st->last_updated.tv_usec = st->last_collected_time.tv_usec; - st->last_collected_total = st->collected_total; - } + st->last_collected_total = st->collected_total; - for( rd = st->dimensions; likely(rd) ; rd = rd->next ) { + for( rd = st->dimensions; rd ; rd = rd->next ) { if(unlikely(!rd->updated)) continue; - if(likely(stored_entries || !store_this_entry)) { - if(unlikely(st->debug)) debug(D_RRD_STATS, "%s/%s: setting last_collected_value (old: " COLLECTED_NUMBER_FORMAT ") to last_collected_value (new: " COLLECTED_NUMBER_FORMAT ")", st->id, rd->name, rd->last_collected_value, rd->collected_value); - rd->last_collected_value = rd->collected_value; + if(unlikely(st->debug)) debug(D_RRD_STATS, "%s/%s: setting last_collected_value (old: " COLLECTED_NUMBER_FORMAT ") to last_collected_value (new: " COLLECTED_NUMBER_FORMAT ")", st->id, rd->name, rd->last_collected_value, rd->collected_value); + rd->last_collected_value = rd->collected_value; + + switch(rd->algorithm) { + case RRDDIM_INCREMENTAL: + if(unlikely(!first_entry)) { + if(unlikely(st->debug)) debug(D_RRD_STATS, "%s/%s: setting last_calculated_value (old: " CALCULATED_NUMBER_FORMAT ") to last_calculated_value (new: " CALCULATED_NUMBER_FORMAT ")", st->id, rd->name, rd->last_calculated_value + rd->calculated_value, rd->calculated_value); + rd->last_calculated_value += rd->calculated_value; + } + else { + if(unlikely(st->debug)) debug(D_RRD_STATS, "%s: THIS IS THE FIRST POINT", st->name); + } + break; - if(unlikely(st->debug)) debug(D_RRD_STATS, "%s/%s: setting last_calculated_value (old: " CALCULATED_NUMBER_FORMAT ") to last_calculated_value (new: " CALCULATED_NUMBER_FORMAT ")", st->id, rd->name, rd->last_calculated_value, rd->calculated_value); - rd->last_calculated_value = rd->calculated_value; + case RRDDIM_ABSOLUTE: + case RRDDIM_PCENT_OVER_ROW_TOTAL: + case RRDDIM_PCENT_OVER_DIFF_TOTAL: + if(unlikely(st->debug)) debug(D_RRD_STATS, "%s/%s: setting last_calculated_value (old: " CALCULATED_NUMBER_FORMAT ") to last_calculated_value (new: " CALCULATED_NUMBER_FORMAT ")", st->id, rd->name, rd->last_calculated_value, rd->calculated_value); + rd->last_calculated_value = rd->calculated_value; + break; } rd->calculated_value = 0; @@ -1452,6 +1519,7 @@ unsigned long long rrdset_done(RRDSET *st) // ALL DONE ABOUT THE DATA UPDATE // -------------------------------------------------------------------- +/* // find if there are any obsolete dimensions (not updated recently) if(unlikely(rrd_delete_unupdated_dimensions)) { @@ -1460,6 +1528,7 @@ unsigned long long rrdset_done(RRDSET *st) break; if(unlikely(rd)) { + RRDDIM *last; // there is dimension to free // upgrade our read lock to a write lock pthread_rwlock_unlock(&st->rwlock); @@ -1497,11 +1566,12 @@ unsigned long long rrdset_done(RRDSET *st) } } } +*/ pthread_rwlock_unlock(&st->rwlock); - if(unlikely(pthread_setcancelstate(oldstate, NULL) != 0)) - error("Cannot set pthread cancel state to RESTORE (%d).", oldstate); + if(unlikely(pthread_setcancelstate(pthreadoldcancelstate, NULL) != 0)) + error("Cannot set pthread cancel state to RESTORE (%d).", pthreadoldcancelstate); return(st->usec_since_last_update); } @@ -277,7 +277,7 @@ struct rrdset { // ------------------------------------------------------------------------ // the dimensions - avl_tree_lock dimensions_index; // the root of the dimensions index + avl_tree_lock dimensions_index; // the root of the dimensions index RRDDIM *dimensions; // the actual data for every dimension }; @@ -310,6 +310,7 @@ struct rrdhost { }; typedef struct rrdhost RRDHOST; extern RRDHOST localhost; +extern void rrdhost_init(char *hostname); #ifdef NETDATA_INTERNAL_CHECKS #define rrdhost_check_wrlock(host) rrdhost_check_wrlock_int(host, __FILE__, __FUNCTION__, __LINE__) diff --git a/src/rrd2json.c b/src/rrd2json.c index 9009a8b1..474b5915 100644 --- a/src/rrd2json.c +++ b/src/rrd2json.c @@ -1,8 +1,5 @@ #include "common.h" -#define HOSTNAME_MAX 1024 -char *hostname = "unknown"; - void rrd_stats_api_v1_chart(RRDSET *st, BUFFER *wb) { pthread_rwlock_rdlock(&st->rwlock); @@ -84,7 +81,7 @@ void rrd_stats_api_v1_charts(BUFFER *wb) ",\n\t\"update_every\": %d" ",\n\t\"history\": %d" ",\n\t\"charts\": {" - , hostname + , localhost.hostname , rrd_update_every , rrd_default_history_entries ); @@ -246,7 +243,7 @@ void rrd_stats_all_json(BUFFER *wb) "\t\"history\": %d,\n" "\t\"memory\": %lu\n" "}\n" - , hostname + , localhost.hostname , rrd_update_every , rrd_default_history_entries , memory @@ -1217,13 +1214,13 @@ RRDR *rrd2rrdr(RRDSET *st, long points, long long after, long long before, int g absolute_period_requested = 0; } - // allow relative for before and after - if(((before < 0)?-before:before) <= (st->update_every * st->entries)) { + // allow relative for before and after (smaller than 3 years) + if(((before < 0)?-before:before) <= (3 * 365 * 86400)) { before = last_entry_t + before; absolute_period_requested = 0; } - if(((after < 0)?-after:after) <= (st->update_every * st->entries)) { + if(((after < 0)?-after:after) <= (3 * 365 * 86400)) { if(after == 0) after = -st->update_every; after = before + after; absolute_period_requested = 0; @@ -1567,9 +1564,9 @@ int rrd2value(RRDSET *st, BUFFER *wb, calculated_number *n, const char *dimensio } if(r->result_options & RRDR_RESULT_OPTION_RELATIVE) - wb->options |= WB_CONTENT_NO_CACHEABLE; + buffer_no_cacheable(wb); else if(r->result_options & RRDR_RESULT_OPTION_ABSOLUTE) - wb->options |= WB_CONTENT_CACHEABLE; + buffer_cacheable(wb); options = rrdr_check_options(r, options, dimensions); @@ -1595,9 +1592,9 @@ int rrd2format(RRDSET *st, BUFFER *wb, BUFFER *dimensions, uint32_t format, long } if(r->result_options & RRDR_RESULT_OPTION_RELATIVE) - wb->options |= WB_CONTENT_NO_CACHEABLE; + buffer_no_cacheable(wb); else if(r->result_options & RRDR_RESULT_OPTION_ABSOLUTE) - wb->options |= WB_CONTENT_CACHEABLE; + buffer_cacheable(wb); options = rrdr_check_options(r, options, (dimensions)?buffer_tostring(dimensions):NULL); diff --git a/src/sys_fs_cgroup.c b/src/sys_fs_cgroup.c index 892f737c..298f38a3 100644 --- a/src/sys_fs_cgroup.c +++ b/src/sys_fs_cgroup.c @@ -138,6 +138,18 @@ struct memory { unsigned long long total_active_file; unsigned long long total_unevictable; */ + + int usage_in_bytes_updated; + char *filename_usage_in_bytes; + unsigned long long usage_in_bytes; + + int msw_usage_in_bytes_updated; + char *filename_msw_usage_in_bytes; + unsigned long long msw_usage_in_bytes; + + int failcnt_updated; + char *filename_failcnt; + unsigned long long failcnt; }; // https://www.kernel.org/doc/Documentation/cgroup-v1/cpuacct.txt @@ -160,9 +172,13 @@ struct cpuacct_usage { unsigned long long *cpu_percpu; }; +#define CGROUP_OPTIONS_DISABLED_DUPLICATE 0x00000001 + struct cgroup { - int available; // found in the filesystem - int enabled; // enabled in the config + uint32_t options; + + char available; // found in the filesystem + char enabled; // enabled in the config char *id; uint32_t hash; @@ -553,6 +569,24 @@ void cgroup_read_memory(struct memory *mem) { mem->updated = 1; } + + mem->usage_in_bytes_updated = 0; + if(mem->filename_usage_in_bytes) { + if(likely(!read_single_number_file(mem->filename_usage_in_bytes, &mem->usage_in_bytes))) + mem->usage_in_bytes_updated = 1; + } + + mem->msw_usage_in_bytes_updated = 0; + if(mem->filename_msw_usage_in_bytes) { + if(likely(!read_single_number_file(mem->filename_msw_usage_in_bytes, &mem->msw_usage_in_bytes))) + mem->msw_usage_in_bytes_updated = 1; + } + + mem->failcnt_updated = 0; + if(mem->filename_failcnt) { + if(likely(!read_single_number_file(mem->filename_failcnt, &mem->failcnt))) + mem->failcnt_updated = 1; + } } void cgroup_read(struct cgroup *cg) { @@ -714,11 +748,18 @@ struct cgroup *cgroup_add(const char *id) { if (!strncmp(t->chart_id, "/system.slice/", 14) && !strncmp(cg->chart_id, "/init.scope/system.slice/", 25)) { error("Control group with chart id '%s' already exists with id '%s' and is enabled. Swapping them by enabling cgroup with id '%s' and disabling cgroup with id '%s'.", cg->chart_id, t->id, cg->id, t->id); + debug(D_CGROUP, "Control group with chart id '%s' already exists with id '%s' and is enabled. Swapping them by enabling cgroup with id '%s' and disabling cgroup with id '%s'.", + cg->chart_id, t->id, cg->id, t->id); t->enabled = 0; - } else { - error("Control group with chart id '%s' already exists with id '%s' and is enabled. Disabling cgroup with id '%s'.", + t->options |= CGROUP_OPTIONS_DISABLED_DUPLICATE; + } + else { + error("Control group with chart id '%s' already exists with id '%s' and is enabled and available. Disabling cgroup with id '%s'.", + cg->chart_id, t->id, cg->id); + debug(D_CGROUP, "Control group with chart id '%s' already exists with id '%s' and is enabled and available. Disabling cgroup with id '%s'.", cg->chart_id, t->id, cg->id); cg->enabled = 0; + cg->options |= CGROUP_OPTIONS_DISABLED_DUPLICATE; } break; @@ -865,8 +906,9 @@ void mark_all_cgroups_as_not_available() { struct cgroup *cg; // mark all as not available - for(cg = cgroup_root; cg ; cg = cg->next) + for(cg = cgroup_root; cg ; cg = cg->next) { cg->available = 0; + } } void cleanup_all_cgroups() { @@ -874,6 +916,18 @@ void cleanup_all_cgroups() { for(; cg ;) { if(!cg->available) { + // enable the first duplicate cgroup + { + struct cgroup *t; + for(t = cgroup_root; t ; t = t->next) { + if(t != cg && t->available && !t->enabled && t->options & CGROUP_OPTIONS_DISABLED_DUPLICATE && t->hash_chart == cg->hash_chart && !strcmp(t->chart_id, cg->chart_id)) { + debug(D_CGROUP, "Enabling duplicate of cgroup '%s' with id '%s', because the original with id '%s' stopped.", t->chart_id, t->id, cg->id); + t->enabled = 1; + t->options &= ~CGROUP_OPTIONS_DISABLED_DUPLICATE; + break; + } + } + } if(!last) cgroup_root = cg->next; @@ -966,6 +1020,27 @@ void find_all_cgroups() { debug(D_CGROUP, "memory.stat filename for cgroup '%s': '%s'", cg->id, cg->memory.filename); } else debug(D_CGROUP, "memory.stat file for cgroup '%s': '%s' does not exist.", cg->id, filename); + + snprintfz(filename, FILENAME_MAX, "%s%s/memory.usage_in_bytes", cgroup_memory_base, cg->id); + if(stat(filename, &buf) != -1) { + cg->memory.filename_usage_in_bytes = strdupz(filename); + debug(D_CGROUP, "memory.usage_in_bytes filename for cgroup '%s': '%s'", cg->id, cg->memory.filename_usage_in_bytes); + } + else debug(D_CGROUP, "memory.usage_in_bytes file for cgroup '%s': '%s' does not exist.", cg->id, filename); + + snprintfz(filename, FILENAME_MAX, "%s%s/memory.msw_usage_in_bytes", cgroup_memory_base, cg->id); + if(stat(filename, &buf) != -1) { + cg->memory.filename_msw_usage_in_bytes = strdupz(filename); + debug(D_CGROUP, "memory.msw_usage_in_bytes filename for cgroup '%s': '%s'", cg->id, cg->memory.filename_msw_usage_in_bytes); + } + else debug(D_CGROUP, "memory.msw_usage_in_bytes file for cgroup '%s': '%s' does not exist.", cg->id, filename); + + snprintfz(filename, FILENAME_MAX, "%s%s/memory.failcnt", cgroup_memory_base, cg->id); + if(stat(filename, &buf) != -1) { + cg->memory.filename_failcnt = strdupz(filename); + debug(D_CGROUP, "memory.failcnt filename for cgroup '%s': '%s'", cg->id, cg->memory.filename_failcnt); + } + else debug(D_CGROUP, "memory.failcnt file for cgroup '%s': '%s' does not exist.", cg->id, filename); } if(cgroup_enable_blkio) { if(!cg->io_service_bytes.filename) { @@ -1053,7 +1128,7 @@ void update_cgroup_charts(int update_every) { if(cg->cpuacct_stat.updated) { st = rrdset_find_bytype(type, "cpu"); if(!st) { - snprintfz(title, CHART_TITLE_MAX, "CPU Usage for cgroup %s", cg->chart_title); + snprintfz(title, CHART_TITLE_MAX, "CPU Usage (%d%% = %d core%s) for cgroup %s", (processors * 100), processors, (processors>1)?"s":"", cg->chart_title); st = rrdset_create(type, "cpu", NULL, "cpu", "cgroup.cpu", title, "%", 40000, update_every, RRDSET_TYPE_STACKED); rrddim_add(st, "user", NULL, 100, hz, RRDDIM_INCREMENTAL); @@ -1072,7 +1147,7 @@ void update_cgroup_charts(int update_every) { st = rrdset_find_bytype(type, "cpu_per_core"); if(!st) { - snprintfz(title, CHART_TITLE_MAX, "CPU Usage Per Core for cgroup %s", cg->chart_title); + snprintfz(title, CHART_TITLE_MAX, "CPU Usage (%d%% = %d core%s) Per Core for cgroup %s", (processors * 100), processors, (processors>1)?"s":"", cg->chart_title); st = rrdset_create(type, "cpu_per_core", NULL, "cpu", "cgroup.cpu_per_core", title, "%", 40100, update_every, RRDSET_TYPE_STACKED); for(i = 0; i < cg->cpuacct_usage.cpus ;i++) { @@ -1094,7 +1169,7 @@ void update_cgroup_charts(int update_every) { st = rrdset_find_bytype(type, "mem"); if(!st) { snprintfz(title, CHART_TITLE_MAX, "Memory Usage for cgroup %s", cg->chart_title); - st = rrdset_create(type, "mem", NULL, "mem", "cgroup.mem", title, "MB", 40200, update_every, + st = rrdset_create(type, "mem", NULL, "mem", "cgroup.mem", title, "MB", 40210, update_every, RRDSET_TYPE_STACKED); rrddim_add(st, "cache", NULL, 1, 1024 * 1024, RRDDIM_ABSOLUTE); @@ -1167,6 +1242,38 @@ void update_cgroup_charts(int update_every) { } } + if(cg->memory.usage_in_bytes_updated) { + st = rrdset_find_bytype(type, "mem_usage"); + if(!st) { + snprintfz(title, CHART_TITLE_MAX, "Total Memory for cgroup %s", cg->chart_title); + st = rrdset_create(type, "mem_usage", NULL, "mem", "cgroup.mem_usage", title, "MB", 40200, + update_every, RRDSET_TYPE_STACKED); + + rrddim_add(st, "ram", NULL, 1, 1024 * 1024, RRDDIM_ABSOLUTE); + rrddim_add(st, "swap", NULL, 1, 1024 * 1024, RRDDIM_ABSOLUTE); + } + else rrdset_next(st); + + rrddim_set(st, "ram", cg->memory.usage_in_bytes); + rrddim_set(st, "swap", (cg->memory.msw_usage_in_bytes > cg->memory.usage_in_bytes)?cg->memory.msw_usage_in_bytes - cg->memory.usage_in_bytes:0); + rrdset_done(st); + } + + if(cg->memory.failcnt_updated && cg->memory.failcnt > 0) { + st = rrdset_find_bytype(type, "mem_failcnt"); + if(!st) { + snprintfz(title, CHART_TITLE_MAX, "Memory Limit Failures for cgroup %s", cg->chart_title); + st = rrdset_create(type, "mem_failcnt", NULL, "mem", "cgroup.mem_failcnt", title, "MB", 40250, + update_every, RRDSET_TYPE_LINE); + + rrddim_add(st, "failures", NULL, 1, 1, RRDDIM_INCREMENTAL); + } + else rrdset_next(st); + + rrddim_set(st, "failures", cg->memory.failcnt); + rrdset_done(st); + } + if(cg->io_service_bytes.updated && cg->io_service_bytes.Read + cg->io_service_bytes.Write > 0) { st = rrdset_find_bytype(type, "io"); if(!st) { @@ -1302,7 +1409,7 @@ int do_sys_fs_cgroup(int update_every, unsigned long long dt) { void *cgroups_main(void *ptr) { - if(ptr) { ; } + (void)ptr; info("CGROUP Plugin thread created with task id %d", gettid()); @@ -1327,7 +1434,7 @@ void *cgroups_main(void *ptr) RRDSET *stcpu_thread = NULL; - for(;1;) { + for(;;) { if(unlikely(netdata_exit)) break; // delay until it is our time to run @@ -1372,6 +1479,8 @@ void *cgroups_main(void *ptr) } } + info("CGROUP thread exiting"); + pthread_exit(NULL); return NULL; } diff --git a/src/unit_test.c b/src/unit_test.c index a77fdb13..d699707a 100644 --- a/src/unit_test.c +++ b/src/unit_test.c @@ -379,7 +379,7 @@ struct feed_values test4_feed[] = { }; calculated_number test4_results[] = { - 5, 10, 10, 10, 10, 10, 10, 10, 10 + 10, 10, 10, 10, 10, 10, 10, 10, 10 }; struct test test4 = { @@ -414,7 +414,7 @@ struct feed_values test5_feed[] = { }; calculated_number test5_results[] = { - 500, 500, 0, 500, 500, 0, 0, 0, 0 + 1000, 500, 0, 500, 500, 0, 0, 0, 0 }; struct test test5 = { @@ -455,7 +455,7 @@ struct feed_values test6_feed[] = { }; calculated_number test6_results[] = { - 3000, 4000, 4000, 4000 + 4000, 4000, 4000, 4000 }; struct test test6 = { @@ -490,7 +490,7 @@ struct feed_values test7_feed[] = { }; calculated_number test7_results[] = { - 250, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500 + 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500 }; struct test test7 = { @@ -597,7 +597,7 @@ struct feed_values test10_feed[] = { }; calculated_number test10_results[] = { - 500, 1000, 1000, 1000, 1000, 1000, 1000 + 1000, 1000, 1000, 1000, 1000, 1000, 1000 }; struct test test10 = { @@ -737,6 +737,148 @@ struct test test13 = { }; // -------------------------------------------------------------------------------------------------------------------- +// test14 + +struct feed_values test14_feed[] = { + { 0, 0x015397dc42151c41ULL }, + { 13573000, 0x015397e612e3ff5dULL }, + { 29969000, 0x015397f905ecdaa8ULL }, + { 29958000, 0x0153980c2a6cb5e4ULL }, + { 30054000, 0x0153981f4032fb83ULL }, + { 34952000, 0x015398355efadaccULL }, + { 25046000, 0x01539845ba4b09f8ULL }, + { 29947000, 0x0153985948bf381dULL }, + { 30054000, 0x0153986c5b9c27e2ULL }, + { 29942000, 0x0153987f888982d0ULL }, +}; + +calculated_number test14_results[] = { + 23.1383300, 21.8515600, 21.8804600, 21.7788000, 22.0112200, 22.4386100, 22.0906100, 21.9150800 +}; + +struct test test14 = { + "test14", // name + "issue #981 with real data", + 30, // update_every + 8, // multiplier + 1000000000, // divisor + RRDDIM_INCREMENTAL, // algorithm + 10, // feed entries + 8, // result entries + test14_feed, // feed + test14_results, // results + NULL, // feed2 + NULL // results2 +}; + +struct feed_values test14b_feed[] = { + { 0, 0 }, + { 13573000, 13573000 }, + { 29969000, 13573000 + 29969000 }, + { 29958000, 13573000 + 29969000 + 29958000 }, + { 30054000, 13573000 + 29969000 + 29958000 + 30054000 }, + { 34952000, 13573000 + 29969000 + 29958000 + 30054000 + 34952000 }, + { 25046000, 13573000 + 29969000 + 29958000 + 30054000 + 34952000 + 25046000 }, + { 29947000, 13573000 + 29969000 + 29958000 + 30054000 + 34952000 + 25046000 + 29947000 }, + { 30054000, 13573000 + 29969000 + 29958000 + 30054000 + 34952000 + 25046000 + 29947000 + 30054000 }, + { 29942000, 13573000 + 29969000 + 29958000 + 30054000 + 34952000 + 25046000 + 29947000 + 30054000 + 29942000 }, +}; + +calculated_number test14b_results[] = { + 1000000, 1000000, 1000000, 1000000, 1000000, 1000000, 1000000, 1000000 +}; + +struct test test14b = { + "test14b", // name + "issue #981 with dummy data", + 30, // update_every + 1, // multiplier + 1, // divisor + RRDDIM_INCREMENTAL, // algorithm + 10, // feed entries + 8, // result entries + test14b_feed, // feed + test14b_results, // results + NULL, // feed2 + NULL // results2 +}; + +struct feed_values test14c_feed[] = { + { 29000000, 29000000 }, + { 1000000, 29000000 + 1000000 }, + { 30000000, 29000000 + 1000000 + 30000000 }, + { 30000000, 29000000 + 1000000 + 30000000 + 30000000 }, + { 30000000, 29000000 + 1000000 + 30000000 + 30000000 + 30000000 }, + { 30000000, 29000000 + 1000000 + 30000000 + 30000000 + 30000000 + 30000000 }, + { 30000000, 29000000 + 1000000 + 30000000 + 30000000 + 30000000 + 30000000 + 30000000 }, + { 30000000, 29000000 + 1000000 + 30000000 + 30000000 + 30000000 + 30000000 + 30000000 + 30000000 }, + { 30000000, 29000000 + 1000000 + 30000000 + 30000000 + 30000000 + 30000000 + 30000000 + 30000000 + 30000000 }, + { 30000000, 29000000 + 1000000 + 30000000 + 30000000 + 30000000 + 30000000 + 30000000 + 30000000 + 30000000 + 30000000 }, +}; + +calculated_number test14c_results[] = { + 1000000, 1000000, 1000000, 1000000, 1000000, 1000000, 1000000, 1000000, 1000000 +}; + +struct test test14c = { + "test14c", // name + "issue #981 with dummy data, checking for late start", + 30, // update_every + 1, // multiplier + 1, // divisor + RRDDIM_INCREMENTAL, // algorithm + 10, // feed entries + 9, // result entries + test14c_feed, // feed + test14c_results, // results + NULL, // feed2 + NULL // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test15 + +struct feed_values test15_feed[] = { + { 0, 1068066388 }, + { 1008752, 1068822698 }, + { 993809, 1069573072 }, + { 995911, 1070324135 }, + { 1014562, 1071078166 }, + { 994684, 1071831349 }, + { 993128, 1072235739 }, + { 1010332, 1072958871 }, + { 1003394, 1073707019 }, + { 995201, 1074460255 }, +}; + +collected_number test15_feed2[] = { + 178825286, 178825286, 178825286, 178825286, 178825498, 178825498, 179165652, 179202964, 179203282, 179204130 +}; + +calculated_number test15_results[] = { + 5857.4080000, 5898.4540000, 5891.6590000, 5806.3160000, 5914.2640000, 3202.2630000, 5589.6560000, 5822.5260000, 5911.7520000 +}; + +calculated_number test15_results2[] = { + 0.0000000, 0.0000000, 0.0024944, 1.6324779, 0.0212777, 2655.1890000, 290.5387000, 5.6733610, 6.5960220 +}; + +struct test test15 = { + "test15", // name + "test incremental with 2 dimensions", + 1, // update_every + 8, // multiplier + 1024, // divisor + RRDDIM_INCREMENTAL, // algorithm + 10, // feed entries + 9, // result entries + test15_feed, // feed + test15_results, // results + test15_feed2, // feed2 + test15_results2 // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- int run_test(struct test *test) { @@ -749,7 +891,7 @@ int run_test(struct test *test) snprintfz(name, 100, "unittest-%s", test->name); // create the chart - RRDSET *st = rrdset_create("netdata", name, name, "netdata", NULL, "Unit Testing", "a value", 1, 1, RRDSET_TYPE_LINE); + RRDSET *st = rrdset_create("netdata", name, name, "netdata", NULL, "Unit Testing", "a value", 1, test->update_every, RRDSET_TYPE_LINE); RRDDIM *rd = rrddim_add(st, "dim1", NULL, test->multiplier, test->divisor, test->algorithm); RRDDIM *rd2 = NULL; @@ -759,12 +901,20 @@ int run_test(struct test *test) st->debug = 1; // feed it with the test data + time_t time_now = 0, time_start = time(NULL); unsigned long c; + collected_number last = 0; for(c = 0; c < test->feed_entries; c++) { if(debug_flags) fprintf(stderr, "\n\n"); if(c) { - fprintf(stderr, " > %s: feeding position %lu, after %llu microseconds\n", test->name, c+1, test->feed[c].microseconds); + time_now += test->feed[c].microseconds; + fprintf(stderr, " > %s: feeding position %lu, after %0.3f seconds (%0.3f seconds from start), delta " CALCULATED_NUMBER_FORMAT ", rate " CALCULATED_NUMBER_FORMAT "\n", + test->name, c+1, + (float)test->feed[c].microseconds / 1000000.0, + (float)time_now / 1000000.0, + ((calculated_number)test->feed[c].value - (calculated_number)last) * (calculated_number)test->multiplier / (calculated_number)test->divisor, + (((calculated_number)test->feed[c].value - (calculated_number)last) * (calculated_number)test->multiplier / (calculated_number)test->divisor) / (calculated_number)test->feed[c].microseconds * (calculated_number)1000000); rrdset_next_usec(st, test->feed[c].microseconds); } else { @@ -773,6 +923,7 @@ int run_test(struct test *test) fprintf(stderr, " >> %s with value " COLLECTED_NUMBER_FORMAT "\n", rd->name, test->feed[c].value); rrddim_set(st, "dim1", test->feed[c].value); + last = test->feed[c].value; if(rd2) { fprintf(stderr, " >> %s with value " COLLECTED_NUMBER_FORMAT "\n", rd2->name, test->feed2[c]); @@ -785,6 +936,7 @@ int run_test(struct test *test) if(!c) { fprintf(stderr, " > %s: fixing first collection time to be %llu microseconds to second boundary\n", test->name, test->feed[c].microseconds); rd->last_collected_time.tv_usec = st->last_collected_time.tv_usec = st->last_updated.tv_usec = test->feed[c].microseconds; + // time_start = st->last_collected_time.tv_sec; } } @@ -801,14 +953,21 @@ int run_test(struct test *test) calculated_number v = unpack_storage_number(rd->values[c]); calculated_number n = test->results[c]; int same = (roundl(v * 10000000.0) == roundl(n * 10000000.0))?1:0; - fprintf(stderr, " %s/%s: checking position %lu, expecting value " CALCULATED_NUMBER_FORMAT ", found " CALCULATED_NUMBER_FORMAT ", %s\n", test->name, rd->name, c+1, n, v, (same)?"OK":"### E R R O R ###"); + fprintf(stderr, " %s/%s: checking position %lu (at %lu secs), expecting value " CALCULATED_NUMBER_FORMAT ", found " CALCULATED_NUMBER_FORMAT ", %s\n", + test->name, rd->name, c+1, + (rrdset_first_entry_t(st) + c * st->update_every) - time_start, + n, v, (same)?"OK":"### E R R O R ###"); + if(!same) errors++; if(rd2) { v = unpack_storage_number(rd2->values[c]); n = test->results2[c]; same = (roundl(v * 10000000.0) == roundl(n * 10000000.0))?1:0; - fprintf(stderr, " %s/%s: checking position %lu, expecting value " CALCULATED_NUMBER_FORMAT ", found " CALCULATED_NUMBER_FORMAT ", %s\n", test->name, rd2->name, c+1, n, v, (same)?"OK":"### E R R O R ###"); + fprintf(stderr, " %s/%s: checking position %lu (at %lu secs), expecting value " CALCULATED_NUMBER_FORMAT ", found " CALCULATED_NUMBER_FORMAT ", %s\n", + test->name, rd2->name, c+1, + (rrdset_first_entry_t(st) + c * st->update_every) - time_start, + n, v, (same)?"OK":"### E R R O R ###"); if(!same) errors++; } } @@ -857,6 +1016,18 @@ int run_all_mockup_tests(void) if(run_test(&test13)) return 1; + if(run_test(&test14)) + return 1; + + if(run_test(&test14b)) + return 1; + + if(run_test(&test14c)) + return 1; + + if(run_test(&test15)) + return 1; + return 0; } diff --git a/src/web_buffer.c b/src/web_buffer.c index 01a97ddc..93ba782a 100644 --- a/src/web_buffer.c +++ b/src/web_buffer.c @@ -35,6 +35,7 @@ void buffer_reset(BUFFER *wb) wb->contenttype = CT_TEXT_PLAIN; wb->options = 0; wb->date = 0; + wb->expires = 0; buffer_overflow_check(wb); } diff --git a/src/web_buffer.h b/src/web_buffer.h index c4cd0563..ee611209 100644 --- a/src/web_buffer.h +++ b/src/web_buffer.h @@ -4,12 +4,13 @@ #define WEB_DATA_LENGTH_INCREASE_STEP 1024 typedef struct web_buffer { - size_t size; // allocation size of buffer - size_t len; // current data length in buffer - char *buffer; // the buffer - uint8_t contenttype; - uint8_t options; - time_t date; // the date this content has been generated + size_t size; // allocation size of buffer, in bytes + size_t len; // current data length in buffer, in bytes + char *buffer; // the buffer itself + uint8_t contenttype; // the content type of the data in the buffer + uint8_t options; // options related to the content + time_t date; // the timestamp this content has been generated + time_t expires; // the timestamp this content expires } BUFFER; // options @@ -39,6 +40,9 @@ typedef struct web_buffer { #define CT_IMAGE_ICNS 20 #define CT_IMAGE_BMP 21 +#define buffer_cacheable(wb) do { (wb)->options |= WB_CONTENT_CACHEABLE; if((wb)->options & WB_CONTENT_NO_CACHEABLE) (wb)->options &= ~WB_CONTENT_NO_CACHEABLE; } while(0) +#define buffer_no_cacheable(wb) do { (wb)->options |= WB_CONTENT_NO_CACHEABLE; if((wb)->options & WB_CONTENT_CACHEABLE) (wb)->options &= ~WB_CONTENT_CACHEABLE; (wb)->expires = 0; } while(0) + #define buffer_strlen(wb) ((wb)->len) extern const char *buffer_tostring(BUFFER *wb); diff --git a/src/web_buffer_svg.c b/src/web_buffer_svg.c index 2f7627cc..3e847b5d 100644 --- a/src/web_buffer_svg.c +++ b/src/web_buffer_svg.c @@ -529,7 +529,7 @@ void buffer_svg(BUFFER *wb, const char *label, calculated_number value, const ch strcpy(value_string, "-"); else if(precision < 0) { - int len, l, lstop = 0; + int len, lstop = 0, trim_zeros = 1; calculated_number abs = value; if(isless(value, 0)) { @@ -537,27 +537,30 @@ void buffer_svg(BUFFER *wb, const char *label, calculated_number value, const ch abs = -value; } - if(isgreaterequal(abs, 1000)) len = snprintfz(value_string, VALUE_STRING_SIZE, "%0.0Lf", (long double)value); - else if(isgreaterequal(abs, 100)) len = snprintfz(value_string, VALUE_STRING_SIZE, "%0.1Lf", (long double)value); - else if(isgreaterequal(abs, 1)) len = snprintfz(value_string, VALUE_STRING_SIZE, "%0.2Lf", (long double)value); - else if(isgreaterequal(abs, 0.1)) len = snprintfz(value_string, VALUE_STRING_SIZE, "%0.3Lf", (long double)value); - else len = snprintfz(value_string, VALUE_STRING_SIZE, "%0.4Lf", (long double)value); - - // remove trailing zeros - for(l = len - 1; l > lstop ; l--) { - if(likely(value_string[l] == '0')) { - value_string[l] = '\0'; - len--; - } + if(isgreaterequal(abs, 1000)) { len = snprintfz(value_string, VALUE_STRING_SIZE, "%0.0Lf", (long double)value); trim_zeros = 0; } + else if(isgreaterequal(abs, 100)) len = snprintfz(value_string, VALUE_STRING_SIZE, "%0.1Lf", (long double)value); + else if(isgreaterequal(abs, 1)) len = snprintfz(value_string, VALUE_STRING_SIZE, "%0.2Lf", (long double)value); + else if(isgreaterequal(abs, 0.1)) len = snprintfz(value_string, VALUE_STRING_SIZE, "%0.3Lf", (long double)value); + else len = snprintfz(value_string, VALUE_STRING_SIZE, "%0.4Lf", (long double)value); + + if(unlikely(trim_zeros)) { + int l; + // remove trailing zeros from the decimal part + for(l = len - 1; l > lstop ; l--) { + if(likely(value_string[l] == '0')) { + value_string[l] = '\0'; + len--; + } - else if(unlikely(value_string[l] == '.')) { - value_string[l] = '\0'; - len--; - break; - } + else if(unlikely(value_string[l] == '.')) { + value_string[l] = '\0'; + len--; + break; + } - else - break; + else + break; + } } if(len >= 0) diff --git a/src/web_client.c b/src/web_client.c index 4036d4c8..0cf9eeb6 100644 --- a/src/web_client.c +++ b/src/web_client.c @@ -407,6 +407,7 @@ int mysendfile(struct web_client *w, char *filename) buffer_flush(w->response.data); w->response.rlen = stat.st_size; w->response.data->date = stat.st_mtim.tv_sec; + buffer_cacheable(w->response.data); return 200; } @@ -665,7 +666,7 @@ int web_client_api_request_v1_alarms(struct web_client *w, char *url) int all = 0; while(url) { - char *value = mystrsep(&url, "?&[]"); + char *value = mystrsep(&url, "?&"); if (!value || !*value) continue; if(!strcmp(value, "all")) all = 1; @@ -680,17 +681,28 @@ int web_client_api_request_v1_alarms(struct web_client *w, char *url) int web_client_api_request_v1_alarm_log(struct web_client *w, char *url) { - (void)url; + uint32_t after = 0; + + while(url) { + char *value = mystrsep(&url, "?&"); + if (!value || !*value) continue; + + char *name = mystrsep(&value, "="); + if(!name || !*name) continue; + if(!value || !*value) continue; + + if(!strcmp(name, "after")) after = strtoul(value, NULL, 0); + } buffer_flush(w->response.data); w->response.data->contenttype = CT_APPLICATION_JSON; - health_alarm_log2json(&localhost, w->response.data); + health_alarm_log2json(&localhost, w->response.data, after); return 200; } int web_client_api_request_v1_charts(struct web_client *w, char *url) { - if(url) { ; } + (void)url; buffer_flush(w->response.data); w->response.data->contenttype = CT_APPLICATION_JSON; @@ -706,7 +718,7 @@ int web_client_api_request_v1_chart(struct web_client *w, char *url) buffer_flush(w->response.data); while(url) { - char *value = mystrsep(&url, "?&[]"); + char *value = mystrsep(&url, "?&"); if(!value || !*value) continue; char *name = mystrsep(&value, "="); @@ -768,7 +780,7 @@ int web_client_api_request_v1_badge(struct web_client *w, char *url) { uint32_t options = 0x00000000; while(url) { - char *value = mystrsep(&url, "/?&[]"); + char *value = mystrsep(&url, "/?&"); if(!value || !*value) continue; char *name = mystrsep(&value, "="); @@ -809,6 +821,7 @@ int web_client_api_request_v1_badge(struct web_client *w, char *url) { } if(!chart || !*chart) { + buffer_no_cacheable(w->response.data); buffer_sprintf(w->response.data, "No chart id is given at the request."); goto cleanup; } @@ -816,6 +829,7 @@ int web_client_api_request_v1_badge(struct web_client *w, char *url) { RRDSET *st = rrdset_find(chart); if(!st) st = rrdset_find_byname(chart); if(!st) { + buffer_no_cacheable(w->response.data); buffer_svg(w->response.data, "chart not found", 0, "", NULL, NULL, 1, -1); ret = 200; goto cleanup; @@ -825,6 +839,7 @@ int web_client_api_request_v1_badge(struct web_client *w, char *url) { if(alarm) { rc = rrdcalc_find(st, alarm); if (!rc) { + buffer_no_cacheable(w->response.data); buffer_svg(w->response.data, "alarm not found", 0, "", NULL, NULL, 1, -1); ret = 200; goto cleanup; @@ -848,7 +863,7 @@ int web_client_api_request_v1_badge(struct web_client *w, char *url) { else if(options & RRDR_OPTION_NOT_ALIGNED) refresh = st->update_every; else { - refresh = (before - after); + refresh = (int)(before - after); if(refresh < 0) refresh = -refresh; } } @@ -904,8 +919,11 @@ int web_client_api_request_v1_badge(struct web_client *w, char *url) { calculated_number n = rc->value; if(isnan(n) || isinf(n)) n = 0; - if (refresh > 0) + if (refresh > 0) { buffer_sprintf(w->response.header, "Refresh: %d\r\n", refresh); + w->response.data->expires = time(NULL) + refresh; + } + else buffer_no_cacheable(w->response.data); if(!value_color) { switch(rc->status) { @@ -935,7 +953,14 @@ int web_client_api_request_v1_badge(struct web_client *w, char *url) { } } - buffer_svg(w->response.data, label, rc->value * multiply / divide, units, label_color, value_color, 0, precision); + buffer_svg(w->response.data, + label, + rc->value * multiply / divide, + units, + label_color, + value_color, + 0, + precision); ret = 200; } else { @@ -946,20 +971,40 @@ int web_client_api_request_v1_badge(struct web_client *w, char *url) { // if the collected value is too old, don't calculate its value if (rrdset_last_entry_t(st) >= (time(NULL) - (st->update_every * st->gap_when_lost_iterations_above))) - ret = rrd2value(st, w->response.data, &n, (dimensions) ? buffer_tostring(dimensions) : NULL, points, after, - before, group, options, NULL, &latest_timestamp, &value_is_null); + ret = rrd2value(st, + w->response.data, + &n, + (dimensions) ? buffer_tostring(dimensions) : NULL, + points, + after, + before, + group, + options, + NULL, + &latest_timestamp, + &value_is_null); // if the value cannot be calculated, show empty badge if (ret != 200) { + buffer_no_cacheable(w->response.data); value_is_null = 1; n = 0; ret = 200; } - else if (refresh > 0) + else if (refresh > 0) { buffer_sprintf(w->response.header, "Refresh: %d\r\n", refresh); + w->response.data->expires = time(NULL) + refresh; + } + else buffer_no_cacheable(w->response.data); // render the badge - buffer_svg(w->response.data, label, n * multiply / divide, units, label_color, value_color, value_is_null, + buffer_svg(w->response.data, + label, + n * multiply / divide, + units, + label_color, + value_color, + value_is_null, precision); } @@ -998,7 +1043,7 @@ int web_client_api_request_v1_data(struct web_client *w, char *url) uint32_t options = 0x00000000; while(url) { - char *value = mystrsep(&url, "?&[]"); + char *value = mystrsep(&url, "?&"); if(!value || !*value) continue; char *name = mystrsep(&value, "="); @@ -1143,6 +1188,9 @@ cleanup: return ret; } + +#define REGISTRY_VERIFY_COOKIES_GUID "give-me-back-this-cookie-now--please" + int web_client_api_request_v1_registry(struct web_client *w, char *url) { static uint32_t hash_action = 0, hash_access = 0, hash_hello = 0, hash_delete = 0, hash_search = 0, @@ -1190,7 +1238,7 @@ int web_client_api_request_v1_registry(struct web_client *w, char *url) */ while(url) { - char *value = mystrsep(&url, "?&[]"); + char *value = mystrsep(&url, "?&"); if (!value || !*value) continue; char *name = mystrsep(&value, "="); @@ -1280,8 +1328,7 @@ int web_client_api_request_v1_registry(struct web_client *w, char *url) w->tracking_required = 1; if(registry_verify_cookies_redirects() > 0 && (!cookie || !person_guid[0])) { buffer_flush(w->response.data); - - registry_set_cookie(w, "give-me-back-this-cookie-please"); + registry_set_cookie(w, REGISTRY_VERIFY_COOKIES_GUID); w->response.data->contenttype = CT_APPLICATION_JSON; buffer_sprintf(w->response.data, "{ \"status\": \"redirect\", \"registry\": \"%s\" }", registry_to_announce()); return 200; @@ -1329,6 +1376,10 @@ int web_client_api_request_v1_registry(struct web_client *w, char *url) return 307 */ } + + if(unlikely(cookie && person_guid[0] && !strcmp(person_guid, REGISTRY_VERIFY_COOKIES_GUID))) + person_guid[0] = '\0'; + return registry_request_access_json(w, person_guid, machine_guid, machine_url, url_name, time(NULL)); case 'D': @@ -1852,8 +1903,18 @@ static inline int http_request_validate(struct web_client *w) { } void web_client_process(struct web_client *w) { - static uint32_t hash_api = 0, hash_netdata_conf = 0, hash_data = 0, hash_datasource = 0, hash_graph = 0, - hash_list = 0, hash_all_json = 0, hash_exit = 0, hash_debug = 0, hash_mirror = 0; + static uint32_t + hash_api = 0, + hash_netdata_conf = 0, + hash_data = 0, + hash_datasource = 0, + hash_graph = 0, + hash_list = 0, + hash_all_json = 0; + +#ifdef NETDATA_INTERNAL_CHECKS + static uint32_t hash_exit = 0, hash_debug = 0, hash_mirror = 0; +#endif // start timing us gettimeofday(&w->tv_in, NULL); @@ -1866,9 +1927,11 @@ void web_client_process(struct web_client *w) { hash_graph = simple_hash(WEB_PATH_GRAPH); hash_list = simple_hash("list"); hash_all_json = simple_hash("all.json"); +#ifdef NETDATA_INTERNAL_CHECKS hash_exit = simple_hash("exit"); hash_debug = simple_hash("debug"); hash_mirror = simple_hash("mirror"); +#endif } int code = 500; @@ -2065,19 +2128,41 @@ void web_client_process(struct web_client *w) { } gettimeofday(&w->tv_ready, NULL); - w->response.data->date = time(NULL); w->response.sent = 0; w->response.code = code; + // set a proper last modified date + if(unlikely(!w->response.data->date)) + w->response.data->date = w->tv_ready.tv_sec; + + if(unlikely(code != 200)) + buffer_no_cacheable(w->response.data); + + // set a proper expiration date, if not already set + if(unlikely(!w->response.data->expires)) { + if(w->response.data->options & WB_CONTENT_NO_CACHEABLE) + w->response.data->expires = w->tv_ready.tv_sec + rrd_update_every; + else + w->response.data->expires = w->tv_ready.tv_sec + 86400; + } + // prepare the HTTP response header debug(D_WEB_CLIENT, "%llu: Generating HTTP header with response %d.", w->id, code); const char *content_type_string = web_content_type_to_string(w->response.data->contenttype); const char *code_msg = web_response_code_to_string(code); - char date[32]; - struct tm tmbuf, *tm = gmtime_r(&w->response.data->date, &tmbuf); - strftime(date, sizeof(date), "%a, %d %b %Y %H:%M:%S %Z", tm); + // prepare the last modified and expiration dates + char date[32], edate[32]; + { + struct tm tmbuf, *tm; + + tm = gmtime_r(&w->response.data->date, &tmbuf); + strftime(date, sizeof(date), "%a, %d %b %Y %H:%M:%S %Z", tm); + + tm = gmtime_r(&w->response.data->expires, &tmbuf); + strftime(edate, sizeof(edate), "%a, %d %b %Y %H:%M:%S %Z", tm); + } buffer_sprintf(w->response.header_output, "HTTP/1.1 %d %s\r\n" @@ -2129,44 +2214,37 @@ void web_client_process(struct web_client *w) { "Access-Control-Max-Age: 1209600\r\n" // 86400 * 14 ); } - - if(buffer_strlen(w->response.header)) - buffer_strcat(w->response.header_output, buffer_tostring(w->response.header)); - - if(w->mode == WEB_CLIENT_MODE_NORMAL && (w->response.data->options & WB_CONTENT_NO_CACHEABLE)) { - buffer_sprintf(w->response.header_output, - "Expires: %s\r\n" - "Cache-Control: no-cache\r\n" - , date); - } - else if(w->mode != WEB_CLIENT_MODE_OPTIONS) { - char edate[32]; - time_t et = w->response.data->date + (86400 * 14); - struct tm etmbuf, *etm = gmtime_r(&et, &etmbuf); - strftime(edate, sizeof(edate), "%a, %d %b %Y %H:%M:%S %Z", etm); - + else { buffer_sprintf(w->response.header_output, - "Expires: %s\r\n" - "Cache-Control: public\r\n" - , edate); + "Cache-Control: %s\r\n" + "Expires: %s\r\n", + (w->response.data->options & WB_CONTENT_NO_CACHEABLE)?"no-cache":"public", + edate); } - // if we know the content length, put it - if(!w->response.zoutput && (w->response.data->len || w->response.rlen)) - buffer_sprintf(w->response.header_output, - "Content-Length: %zu\r\n" - , w->response.data->len? w->response.data->len: w->response.rlen - ); - else if(!w->response.zoutput) - w->keepalive = 0; // content-length is required for keep-alive + // copy a possibly available custom header + if(unlikely(buffer_strlen(w->response.header))) + buffer_strcat(w->response.header_output, buffer_tostring(w->response.header)); - if(w->response.zoutput) { + // headers related to the transfer method + if(likely(w->response.zoutput)) { buffer_strcat(w->response.header_output, "Content-Encoding: gzip\r\n" "Transfer-Encoding: chunked\r\n" ); } + else { + if(likely((w->response.data->len || w->response.rlen))) { + // we know the content length, put it + buffer_sprintf(w->response.header_output, "Content-Length: %zu\r\n", w->response.data->len? w->response.data->len: w->response.rlen); + } + else { + // we don't know the content length, disable keep-alive + w->keepalive = 0; + } + } + // end of HTTP header buffer_strcat(w->response.header_output, "\r\n"); // sent the HTTP header diff --git a/src/web_server.c b/src/web_server.c index cf3687f3..cbbe6bb4 100644 --- a/src/web_server.c +++ b/src/web_server.c @@ -10,21 +10,39 @@ int web_server_mode = WEB_SERVER_MODE_MULTI_THREADED; #ifdef NETDATA_INTERNAL_CHECKS static void log_allocations(void) { - static int mem = 0; +#ifdef HAVE_C_MALLINFO + static int heap = 0, used = 0, mmap = 0; struct mallinfo mi; mi = mallinfo(); - if(mi.uordblks > mem) { + if(mi.uordblks > used) { int clients = 0; struct web_client *w; for(w = web_clients; w ; w = w->next) clients++; - info("Allocated memory increased from %d to %d (increased by %d bytes). There are %d web clients connected.", mem, mi.uordblks, mi.uordblks - mem, clients); - mem = mi.uordblks; + info("Allocated memory: used %d KB (+%d B), mmap %d KB (+%d B), heap %d KB (+%d B). %d web clients connected.", + mi.uordblks / 1024, + mi.uordblks - used, + mi.hblkhd / 1024, + mi.hblkhd - mmap, + mi.arena / 1024, + mi.arena - heap, + clients); + + used = mi.uordblks; + heap = mi.arena; + mmap = mi.hblkhd; } -} +#else /* ! HAVE_C_MALLINFO */ + ; +#endif /* ! HAVE_C_MALLINFO */ + +#ifdef has_jemalloc + malloc_stats_print(NULL, NULL, NULL); #endif +} +#endif /* NETDATA_INTERNAL_CHECKS */ #ifndef HAVE_ACCEPT4 int accept4(int sock, struct sockaddr *addr, socklen_t *addrlen, int flags) { @@ -38,10 +56,14 @@ int accept4(int sock, struct sockaddr *addr, socklen_t *addrlen, int flags) { flags &= ~SOCK_NONBLOCK; } +#ifdef SOCK_CLOEXEC +#ifdef O_CLOEXEC if (flags & SOCK_CLOEXEC) { newflags |= O_CLOEXEC; flags &= ~SOCK_CLOEXEC; } +#endif +#endif if (flags) { errno = -EINVAL; |