diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2021-02-07 11:45:55 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2021-02-07 11:45:55 +0000 |
commit | a8220ab2d293bb7f4b014b79d16b2fb05090fa93 (patch) | |
tree | 77f0a30f016c0925cf7ee9292e644bba183c2774 /collectors/proc.plugin | |
parent | Adding upstream version 1.19.0. (diff) | |
download | netdata-a8220ab2d293bb7f4b014b79d16b2fb05090fa93.tar.xz netdata-a8220ab2d293bb7f4b014b79d16b2fb05090fa93.zip |
Adding upstream version 1.29.0.upstream/1.29.0
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'collectors/proc.plugin')
-rw-r--r-- | collectors/proc.plugin/Makefile.in | 519 | ||||
-rw-r--r-- | collectors/proc.plugin/README.md | 121 | ||||
-rw-r--r-- | collectors/proc.plugin/plugin_proc.c | 7 | ||||
-rw-r--r-- | collectors/proc.plugin/plugin_proc.h | 7 | ||||
-rw-r--r-- | collectors/proc.plugin/proc_loadavg.c | 7 | ||||
-rw-r--r-- | collectors/proc.plugin/proc_mdstat.c | 1288 | ||||
-rw-r--r-- | collectors/proc.plugin/proc_meminfo.c | 9 | ||||
-rw-r--r-- | collectors/proc.plugin/proc_net_dev.c | 162 | ||||
-rw-r--r-- | collectors/proc.plugin/proc_net_softnet_stat.c | 4 | ||||
-rw-r--r-- | collectors/proc.plugin/proc_net_wireless.c | 453 | ||||
-rw-r--r-- | collectors/proc.plugin/proc_pressure.c | 178 | ||||
-rw-r--r-- | collectors/proc.plugin/proc_pressure.h | 31 | ||||
-rw-r--r-- | collectors/proc.plugin/proc_stat.c | 1 | ||||
-rw-r--r-- | collectors/proc.plugin/sys_class_infiniband.c | 704 |
14 files changed, 2310 insertions, 1181 deletions
diff --git a/collectors/proc.plugin/Makefile.in b/collectors/proc.plugin/Makefile.in deleted file mode 100644 index 47bb61f0..00000000 --- a/collectors/proc.plugin/Makefile.in +++ /dev/null @@ -1,519 +0,0 @@ -# Makefile.in generated by automake 1.15.1 from Makefile.am. -# @configure_input@ - -# Copyright (C) 1994-2017 Free Software Foundation, Inc. - -# This Makefile.in is free software; the Free Software Foundation -# gives unlimited permission to copy and/or distribute it, -# with or without modifications, as long as this notice is preserved. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY, to the extent permitted by law; without -# even the implied warranty of MERCHANTABILITY or FITNESS FOR A -# PARTICULAR PURPOSE. - -@SET_MAKE@ - -# SPDX-License-Identifier: GPL-3.0-or-later - -VPATH = @srcdir@ -am__is_gnu_make = { \ - if test -z '$(MAKELEVEL)'; then \ - false; \ - elif test -n '$(MAKE_HOST)'; then \ - true; \ - elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ - true; \ - else \ - false; \ - fi; \ -} -am__make_running_with_option = \ - case $${target_option-} in \ - ?) ;; \ - *) echo "am__make_running_with_option: internal error: invalid" \ - "target option '$${target_option-}' specified" >&2; \ - exit 1;; \ - esac; \ - has_opt=no; \ - sane_makeflags=$$MAKEFLAGS; \ - if $(am__is_gnu_make); then \ - sane_makeflags=$$MFLAGS; \ - else \ - case $$MAKEFLAGS in \ - *\\[\ \ ]*) \ - bs=\\; \ - sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ - | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ - esac; \ - fi; \ - skip_next=no; \ - strip_trailopt () \ - { \ - flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ - }; \ - for flg in $$sane_makeflags; do \ - test $$skip_next = yes && { skip_next=no; continue; }; \ - case $$flg in \ - *=*|--*) continue;; \ - -*I) strip_trailopt 'I'; skip_next=yes;; \ - -*I?*) strip_trailopt 'I';; \ - -*O) strip_trailopt 'O'; skip_next=yes;; \ - -*O?*) strip_trailopt 'O';; \ - -*l) strip_trailopt 'l'; skip_next=yes;; \ - -*l?*) strip_trailopt 'l';; \ - -[dEDm]) skip_next=yes;; \ - -[JT]) skip_next=yes;; \ - esac; \ - case $$flg in \ - *$$target_option*) has_opt=yes; break;; \ - esac; \ - done; \ - test $$has_opt = yes -am__make_dryrun = (target_option=n; $(am__make_running_with_option)) -am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) -pkgdatadir = $(datadir)/@PACKAGE@ -pkgincludedir = $(includedir)/@PACKAGE@ -pkglibdir = $(libdir)/@PACKAGE@ -pkglibexecdir = $(libexecdir)/@PACKAGE@ -am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd -install_sh_DATA = $(install_sh) -c -m 644 -install_sh_PROGRAM = $(install_sh) -c -install_sh_SCRIPT = $(install_sh) -c -INSTALL_HEADER = $(INSTALL_DATA) -transform = $(program_transform_name) -NORMAL_INSTALL = : -PRE_INSTALL = : -POST_INSTALL = : -NORMAL_UNINSTALL = : -PRE_UNINSTALL = : -POST_UNINSTALL = : -build_triplet = @build@ -host_triplet = @host@ -subdir = collectors/proc.plugin -ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 -am__aclocal_m4_deps = $(top_srcdir)/build/m4/ax_c___atomic.m4 \ - $(top_srcdir)/build/m4/ax_c__generic.m4 \ - $(top_srcdir)/build/m4/ax_c_lto.m4 \ - $(top_srcdir)/build/m4/ax_c_mallinfo.m4 \ - $(top_srcdir)/build/m4/ax_c_mallopt.m4 \ - $(top_srcdir)/build/m4/ax_check_compile_flag.m4 \ - $(top_srcdir)/build/m4/ax_gcc_func_attribute.m4 \ - $(top_srcdir)/build/m4/ax_pthread.m4 \ - $(top_srcdir)/build/m4/jemalloc.m4 \ - $(top_srcdir)/build/m4/tcmalloc.m4 $(top_srcdir)/configure.ac -am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ - $(ACLOCAL_M4) -DIST_COMMON = $(srcdir)/Makefile.am $(dist_noinst_DATA) \ - $(am__DIST_COMMON) -mkinstalldirs = $(install_sh) -d -CONFIG_HEADER = $(top_builddir)/config.h -CONFIG_CLEAN_FILES = -CONFIG_CLEAN_VPATH_FILES = -AM_V_P = $(am__v_P_@AM_V@) -am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) -am__v_P_0 = false -am__v_P_1 = : -AM_V_GEN = $(am__v_GEN_@AM_V@) -am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) -am__v_GEN_0 = @echo " GEN " $@; -am__v_GEN_1 = -AM_V_at = $(am__v_at_@AM_V@) -am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) -am__v_at_0 = @ -am__v_at_1 = -SOURCES = -DIST_SOURCES = -am__can_run_installinfo = \ - case $$AM_UPDATE_INFO_DIR in \ - n|no|NO) false;; \ - *) (install-info --version) >/dev/null 2>&1;; \ - esac -DATA = $(dist_noinst_DATA) -am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) -am__DIST_COMMON = $(srcdir)/Makefile.in -DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) -ACLOCAL = @ACLOCAL@ -AMTAR = @AMTAR@ -AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ -AUTOCONF = @AUTOCONF@ -AUTOHEADER = @AUTOHEADER@ -AUTOMAKE = @AUTOMAKE@ -AWK = @AWK@ -CC = @CC@ -CCDEPMODE = @CCDEPMODE@ -CFLAGS = @CFLAGS@ -CMOCKA_CFLAGS = @CMOCKA_CFLAGS@ -CMOCKA_LIBS = @CMOCKA_LIBS@ -CPP = @CPP@ -CPPFLAGS = @CPPFLAGS@ -CUPSCONFIG = @CUPSCONFIG@ -CXX = @CXX@ -CXXDEPMODE = @CXXDEPMODE@ -CXXFLAGS = @CXXFLAGS@ -CXX_BINARY = @CXX_BINARY@ -CYGPATH_W = @CYGPATH_W@ -DEFS = @DEFS@ -DEPDIR = @DEPDIR@ -ECHO_C = @ECHO_C@ -ECHO_N = @ECHO_N@ -ECHO_T = @ECHO_T@ -EGREP = @EGREP@ -ENABLE_UNITTESTS = @ENABLE_UNITTESTS@ -EXEEXT = @EXEEXT@ -GREP = @GREP@ -INSTALL = @INSTALL@ -INSTALL_DATA = @INSTALL_DATA@ -INSTALL_PROGRAM = @INSTALL_PROGRAM@ -INSTALL_SCRIPT = @INSTALL_SCRIPT@ -INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ -IPMIMONITORING_CFLAGS = @IPMIMONITORING_CFLAGS@ -IPMIMONITORING_LIBS = @IPMIMONITORING_LIBS@ -JSON_CFLAGS = @JSON_CFLAGS@ -JSON_LIBS = @JSON_LIBS@ -LDFLAGS = @LDFLAGS@ -LIBCAP_CFLAGS = @LIBCAP_CFLAGS@ -LIBCAP_LIBS = @LIBCAP_LIBS@ -LIBCRYPTO_CFLAGS = @LIBCRYPTO_CFLAGS@ -LIBCRYPTO_LIBS = @LIBCRYPTO_LIBS@ -LIBCURL_CFLAGS = @LIBCURL_CFLAGS@ -LIBCURL_LIBS = @LIBCURL_LIBS@ -LIBMNL_CFLAGS = @LIBMNL_CFLAGS@ -LIBMNL_LIBS = @LIBMNL_LIBS@ -LIBMONGOC_CFLAGS = @LIBMONGOC_CFLAGS@ -LIBMONGOC_LIBS = @LIBMONGOC_LIBS@ -LIBOBJS = @LIBOBJS@ -LIBS = @LIBS@ -LIBSSL_CFLAGS = @LIBSSL_CFLAGS@ -LIBSSL_LIBS = @LIBSSL_LIBS@ -LTLIBOBJS = @LTLIBOBJS@ -MAINT = @MAINT@ -MAKEINFO = @MAKEINFO@ -MATH_CFLAGS = @MATH_CFLAGS@ -MATH_LIBS = @MATH_LIBS@ -MKDIR_P = @MKDIR_P@ -NFACCT_CFLAGS = @NFACCT_CFLAGS@ -NFACCT_LIBS = @NFACCT_LIBS@ -OBJEXT = @OBJEXT@ -OPTIONAL_CUPS_CFLAGS = @OPTIONAL_CUPS_CFLAGS@ -OPTIONAL_CUPS_LIBS = @OPTIONAL_CUPS_LIBS@ -OPTIONAL_IPMIMONITORING_CFLAGS = @OPTIONAL_IPMIMONITORING_CFLAGS@ -OPTIONAL_IPMIMONITORING_LIBS = @OPTIONAL_IPMIMONITORING_LIBS@ -OPTIONAL_JSONC_LIBS = @OPTIONAL_JSONC_LIBS@ -OPTIONAL_JUDY_LIBS = @OPTIONAL_JUDY_LIBS@ -OPTIONAL_KINESIS_CFLAGS = @OPTIONAL_KINESIS_CFLAGS@ -OPTIONAL_KINESIS_LIBS = @OPTIONAL_KINESIS_LIBS@ -OPTIONAL_LIBCAP_CFLAGS = @OPTIONAL_LIBCAP_CFLAGS@ -OPTIONAL_LIBCAP_LIBS = @OPTIONAL_LIBCAP_LIBS@ -OPTIONAL_LZ4_LIBS = @OPTIONAL_LZ4_LIBS@ -OPTIONAL_MATH_CFLAGS = @OPTIONAL_MATH_CFLAGS@ -OPTIONAL_MATH_LIBS = @OPTIONAL_MATH_LIBS@ -OPTIONAL_MONGOC_CFLAGS = @OPTIONAL_MONGOC_CFLAGS@ -OPTIONAL_MONGOC_LIBS = @OPTIONAL_MONGOC_LIBS@ -OPTIONAL_NFACCT_CFLAGS = @OPTIONAL_NFACCT_CFLAGS@ -OPTIONAL_NFACCT_LIBS = @OPTIONAL_NFACCT_LIBS@ -OPTIONAL_PROMETHEUS_REMOTE_WRITE_CFLAGS = @OPTIONAL_PROMETHEUS_REMOTE_WRITE_CFLAGS@ -OPTIONAL_PROMETHEUS_REMOTE_WRITE_LIBS = @OPTIONAL_PROMETHEUS_REMOTE_WRITE_LIBS@ -OPTIONAL_SSL_LIBS = @OPTIONAL_SSL_LIBS@ -OPTIONAL_UUID_CFLAGS = @OPTIONAL_UUID_CFLAGS@ -OPTIONAL_UUID_LIBS = @OPTIONAL_UUID_LIBS@ -OPTIONAL_UV_LIBS = @OPTIONAL_UV_LIBS@ -OPTIONAL_XENSTAT_CFLAGS = @OPTIONAL_XENSTAT_CFLAGS@ -OPTIONAL_XENSTAT_LIBS = @OPTIONAL_XENSTAT_LIBS@ -OPTIONAL_ZLIB_CFLAGS = @OPTIONAL_ZLIB_CFLAGS@ -OPTIONAL_ZLIB_LIBS = @OPTIONAL_ZLIB_LIBS@ -PACKAGE = @PACKAGE@ -PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ -PACKAGE_NAME = @PACKAGE_NAME@ -PACKAGE_RPM_VERSION = @PACKAGE_RPM_VERSION@ -PACKAGE_STRING = @PACKAGE_STRING@ -PACKAGE_TARNAME = @PACKAGE_TARNAME@ -PACKAGE_URL = @PACKAGE_URL@ -PACKAGE_VERSION = @PACKAGE_VERSION@ -PATH_SEPARATOR = @PATH_SEPARATOR@ -PKG_CONFIG = @PKG_CONFIG@ -PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@ -PKG_CONFIG_PATH = @PKG_CONFIG_PATH@ -PROTOBUF_CFLAGS = @PROTOBUF_CFLAGS@ -PROTOBUF_LIBS = @PROTOBUF_LIBS@ -PROTOC = @PROTOC@ -PTHREAD_CC = @PTHREAD_CC@ -PTHREAD_CFLAGS = @PTHREAD_CFLAGS@ -PTHREAD_LIBS = @PTHREAD_LIBS@ -SET_MAKE = @SET_MAKE@ -SHELL = @SHELL@ -SSE_CANDIDATE = @SSE_CANDIDATE@ -STRIP = @STRIP@ -TEST_CFLAGS = @TEST_CFLAGS@ -TEST_LIBS = @TEST_LIBS@ -UUID_CFLAGS = @UUID_CFLAGS@ -UUID_LIBS = @UUID_LIBS@ -VERSION = @VERSION@ -XENLIGHT_CFLAGS = @XENLIGHT_CFLAGS@ -XENLIGHT_LIBS = @XENLIGHT_LIBS@ -YAJL_CFLAGS = @YAJL_CFLAGS@ -YAJL_LIBS = @YAJL_LIBS@ -ZLIB_CFLAGS = @ZLIB_CFLAGS@ -ZLIB_LIBS = @ZLIB_LIBS@ -abs_builddir = @abs_builddir@ -abs_srcdir = @abs_srcdir@ -abs_top_builddir = @abs_top_builddir@ -abs_top_srcdir = @abs_top_srcdir@ -ac_ct_CC = @ac_ct_CC@ -ac_ct_CXX = @ac_ct_CXX@ -am__include = @am__include@ -am__leading_dot = @am__leading_dot@ -am__quote = @am__quote@ -am__tar = @am__tar@ -am__untar = @am__untar@ -ax_pthread_config = @ax_pthread_config@ -bindir = @bindir@ -build = @build@ -build_alias = @build_alias@ -build_cpu = @build_cpu@ -build_os = @build_os@ -build_target = @build_target@ -build_vendor = @build_vendor@ -builddir = @builddir@ -cachedir = @cachedir@ -chartsdir = @chartsdir@ -configdir = @configdir@ -datadir = @datadir@ -datarootdir = @datarootdir@ -docdir = @docdir@ -dvidir = @dvidir@ -exec_prefix = @exec_prefix@ -has_jemalloc = @has_jemalloc@ -has_tcmalloc = @has_tcmalloc@ -host = @host@ -host_alias = @host_alias@ -host_cpu = @host_cpu@ -host_os = @host_os@ -host_vendor = @host_vendor@ -htmldir = @htmldir@ -includedir = @includedir@ -infodir = @infodir@ -install_sh = @install_sh@ -libconfigdir = @libconfigdir@ -libdir = @libdir@ -libexecdir = @libexecdir@ -localedir = @localedir@ -localstatedir = @localstatedir@ -logdir = @logdir@ -mandir = @mandir@ -mkdir_p = @mkdir_p@ -nodedir = @nodedir@ -oldincludedir = @oldincludedir@ -pdfdir = @pdfdir@ -pluginsdir = @pluginsdir@ -prefix = @prefix@ -program_transform_name = @program_transform_name@ -psdir = @psdir@ -pythondir = @pythondir@ -registrydir = @registrydir@ -runstatedir = @runstatedir@ -sbindir = @sbindir@ -sharedstatedir = @sharedstatedir@ -srcdir = @srcdir@ -sysconfdir = @sysconfdir@ -target_alias = @target_alias@ -top_build_prefix = @top_build_prefix@ -top_builddir = @top_builddir@ -top_srcdir = @top_srcdir@ -varlibdir = @varlibdir@ -webdir = @webdir@ -AUTOMAKE_OPTIONS = subdir-objects -MAINTAINERCLEANFILES = $(srcdir)/Makefile.in -dist_noinst_DATA = \ - README.md \ - $(NULL) - -all: all-am - -.SUFFIXES: -$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) - @for dep in $?; do \ - case '$(am__configure_deps)' in \ - *$$dep*) \ - ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ - && { if test -f $@; then exit 0; else break; fi; }; \ - exit 1;; \ - esac; \ - done; \ - echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu collectors/proc.plugin/Makefile'; \ - $(am__cd) $(top_srcdir) && \ - $(AUTOMAKE) --gnu collectors/proc.plugin/Makefile -Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status - @case '$?' in \ - *config.status*) \ - cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ - *) \ - echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \ - cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \ - esac; - -$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) - cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh - -$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) - cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh -$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) - cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh -$(am__aclocal_m4_deps): -tags TAGS: - -ctags CTAGS: - -cscope cscopelist: - - -distdir: $(DISTFILES) - @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ - topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ - list='$(DISTFILES)'; \ - dist_files=`for file in $$list; do echo $$file; done | \ - sed -e "s|^$$srcdirstrip/||;t" \ - -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ - case $$dist_files in \ - */*) $(MKDIR_P) `echo "$$dist_files" | \ - sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ - sort -u` ;; \ - esac; \ - for file in $$dist_files; do \ - if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ - if test -d $$d/$$file; then \ - dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ - if test -d "$(distdir)/$$file"; then \ - find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ - fi; \ - if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ - cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ - find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ - fi; \ - cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ - else \ - test -f "$(distdir)/$$file" \ - || cp -p $$d/$$file "$(distdir)/$$file" \ - || exit 1; \ - fi; \ - done -check-am: all-am -check: check-am -all-am: Makefile $(DATA) -installdirs: -install: install-am -install-exec: install-exec-am -install-data: install-data-am -uninstall: uninstall-am - -install-am: all-am - @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am - -installcheck: installcheck-am -install-strip: - if test -z '$(STRIP)'; then \ - $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ - install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ - install; \ - else \ - $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ - install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ - "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ - fi -mostlyclean-generic: - -clean-generic: - -distclean-generic: - -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) - -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) - -maintainer-clean-generic: - @echo "This command is intended for maintainers to use" - @echo "it deletes files that may require special tools to rebuild." - -test -z "$(MAINTAINERCLEANFILES)" || rm -f $(MAINTAINERCLEANFILES) -clean: clean-am - -clean-am: clean-generic mostlyclean-am - -distclean: distclean-am - -rm -f Makefile -distclean-am: clean-am distclean-generic - -dvi: dvi-am - -dvi-am: - -html: html-am - -html-am: - -info: info-am - -info-am: - -install-data-am: - -install-dvi: install-dvi-am - -install-dvi-am: - -install-exec-am: - -install-html: install-html-am - -install-html-am: - -install-info: install-info-am - -install-info-am: - -install-man: - -install-pdf: install-pdf-am - -install-pdf-am: - -install-ps: install-ps-am - -install-ps-am: - -installcheck-am: - -maintainer-clean: maintainer-clean-am - -rm -f Makefile -maintainer-clean-am: distclean-am maintainer-clean-generic - -mostlyclean: mostlyclean-am - -mostlyclean-am: mostlyclean-generic - -pdf: pdf-am - -pdf-am: - -ps: ps-am - -ps-am: - -uninstall-am: - -.MAKE: install-am install-strip - -.PHONY: all all-am check check-am clean clean-generic cscopelist-am \ - ctags-am distclean distclean-generic distdir dvi dvi-am html \ - html-am info info-am install install-am install-data \ - install-data-am install-dvi install-dvi-am install-exec \ - install-exec-am install-html install-html-am install-info \ - install-info-am install-man install-pdf install-pdf-am \ - install-ps install-ps-am install-strip installcheck \ - installcheck-am installdirs maintainer-clean \ - maintainer-clean-generic mostlyclean mostlyclean-generic pdf \ - pdf-am ps ps-am tags-am uninstall uninstall-am - -.PRECIOUS: Makefile - - -# Tell versions [3.59,3.63) of GNU make to not export all variables. -# Otherwise a system limit (for SysV at least) may be exceeded. -.NOEXPORT: diff --git a/collectors/proc.plugin/README.md b/collectors/proc.plugin/README.md index 7e2aa109..085afb4f 100644 --- a/collectors/proc.plugin/README.md +++ b/collectors/proc.plugin/README.md @@ -1,3 +1,8 @@ +<!-- +title: "proc.plugin" +custom_edit_url: https://github.com/netdata/netdata/edit/master/collectors/proc.plugin/README.md +--> + # proc.plugin - `/proc/net/dev` (all network interfaces for all their values) @@ -6,6 +11,7 @@ - `/proc/net/snmp` (total IPv4, TCP and UDP usage) - `/proc/net/snmp6` (total IPv6 usage) - `/proc/net/netstat` (more IPv4 usage) +- `/proc/net/wireless` (wireless extension) - `/proc/net/stat/nf_conntrack` (connection tracking performance) - `/proc/net/stat/synproxy` (synproxy performance) - `/proc/net/ip_vs/stats` (IPVS connection statistics) @@ -18,8 +24,10 @@ - `/proc/interrupts` (total and per core hardware interrupts) - `/proc/softirqs` (total and per core software interrupts) - `/proc/loadavg` (system load and total processes running) +- `/proc/pressure/{cpu,memory,io}` (pressure stall information) - `/proc/sys/kernel/random/entropy_avail` (random numbers pool availability - used in cryptography) - `/sys/class/power_supply` (power supply properties) +- `/sys/class/infiniband` (infiniband interconnect) - `ipc` (IPC semaphores and message queues) - `ksm` Kernel Same-Page Merging performance (several files under `/sys/kernel/mm/ksm`). - `netdata` (internal Netdata resources utilization) @@ -78,8 +86,8 @@ By default, Netdata will enable monitoring metrics only when they are not zero. Netdata categorizes all block devices in 3 categories: -1. physical disks (i.e. block devices that does not have slaves and are not partitions) -2. virtual disks (i.e. block devices that have slaves - like RAID devices) +1. physical disks (i.e. block devices that do not have child devices and are not partitions) +2. virtual disks (i.e. block devices that have child devices - like RAID devices) 3. disk partitions (i.e. block devices that are part of a physical disk) Performance metrics are enabled by default for all disk devices, except partitions and not-mounted virtual disks. Of course, you can enable/disable monitoring any block device by editing the Netdata configuration file. @@ -226,13 +234,31 @@ So, to disable performance metrics for all loop devices you could add `performan ## Monitoring CPUs -The `/proc/stat` module monitors CPU utilization, interrupts, context switches, processes started/running, thermal throttling, frequency, and idle states. It gathers this information from multiple files. +The `/proc/stat` module monitors CPU utilization, interrupts, context switches, processes started/running, thermal +throttling, frequency, and idle states. It gathers this information from multiple files. -If more than 50 cores are present in a system then CPU thermal throttling, frequency, and idle state charts are disabled. +If your system has more than 50 processors (`physical processors * cores per processor * threads per core`), the Agent +automatically disables CPU thermal throttling, frequency, and idle state charts. To override this default, see the next +section on configuration. -#### configuration +### Configuration + +The settings for monitoring CPUs is in the `[plugin:proc:/proc/stat]` of your `netdata.conf` file. -`keep per core files open` option in the `[plugin:proc:/proc/stat]` configuration section allows reducing the number of file operations on multiple files. +The `keep per core files open` option lets you reduce the number of file operations on multiple files. + +If your system has more than 50 processors and you would like to see the CPU thermal throttling, frequency, and idle +state charts that are automatically disabled, you can set the following boolean options in the +`[plugin:proc:/proc/stat]` section. + +```conf + keep per core files open = yes + keep cpuidle files open = yes + core_throttle_count = yes + package_throttle_count = yes + cpu frequency = yes + cpu idle states = yes +``` ### CPU frequency @@ -295,11 +321,50 @@ each state. By default Netdata will enable monitoring metrics only when they are not zero. If they are constantly zero they are ignored. Metrics that will start having values, after Netdata is started, will be detected and charts will be automatically added to the dashboard (a refresh of the dashboard is needed for them to appear though). +### Monitoring wireless network interfaces + +The settings for monitoring wireless is in the `[plugin:proc:/proc/net/wireless]` section of your `netdata.conf` file. + +```conf + status for all interfaces = yes + quality for all interfaces = yes + discarded packets for all interfaces = yes + missed beacon for all interface = yes +``` + +You can set the following values for each configuration option: + +- `auto` = enable monitoring if the collected values are not zero +- `yes` = enable monitoring +- `no` = disable monitoring + +#### Monitored wireless interface metrics + +- **Status** + The current state of the interface. This is a device-dependent option. + +- **Link** + Overall quality of the link. + +- **Level** + Received signal strength (RSSI), which indicates how strong the received signal is. + +- **Noise** + Background noise level. + +- **Discarded packets** + Discarded packets for: Number of packets received with a different NWID or ESSID (`nwid`), unable to decrypt (`crypt`), hardware was not able to properly re-assemble the link layer fragments (`frag`), packets failed to deliver (`retry`), and packets lost in relation with specific wireless operations (`misc`). + +- **Missed beacon** + Number of periodic beacons from the cell or the access point the interface has missed. + +#### Wireless configuration + #### alarms There are several alarms defined in `health.d/net.conf`. -The tricky ones are `inbound packets dropped` and `inbound packets dropped ratio`. They have quite a strict policy so that they warn users about possible issues. These alarms can be annoying for some network configurations. It is especially true for some bonding configurations if an interface is a slave or a bonding interface itself. If it is expected to have a certain number of drops on an interface for a certain network configuration, a separate alarm with different triggering thresholds can be created or the existing one can be disabled for this specific interface. It can be done with the help of the [families](../../health/#alarm-line-families) line in the alarm configuration. For example, if you want to disable the `inbound packets dropped` alarm for `eth0`, set `families: !eth0 *` in the alarm definition for `template: inbound_packets_dropped`. +The tricky ones are `inbound packets dropped` and `inbound packets dropped ratio`. They have quite a strict policy so that they warn users about possible issues. These alarms can be annoying for some network configurations. It is especially true for some bonding configurations if an interface is a child or a bonding interface itself. If it is expected to have a certain number of drops on an interface for a certain network configuration, a separate alarm with different triggering thresholds can be created or the existing one can be disabled for this specific interface. It can be done with the help of the [families](/health/REFERENCE.md#alarm-line-families) line in the alarm configuration. For example, if you want to disable the `inbound packets dropped` alarm for `eth0`, set `families: !eth0 *` in the alarm definition for `template: inbound_packets_dropped`. #### configuration @@ -436,6 +501,48 @@ and metrics: the corresponding `min` or `empty`, which will then always read as zero. This way, alerts which match on these will still work. +## Infiniband interconnect + +This module monitors every active Infiniband port. It provides generic counters statistics, and per-vendor hw-counters (if vendor is supported). + +### Monitored interface metrics + +Each port will have its counters metrics monitored, grouped in the following charts: + +- **Bandwidth usage** + Sent/Received data, in KB/s + +- **Packets Statistics** + Sent/Received packets, in 3 categories: total, unicast and multicast. + +- **Errors Statistics** + Many errors counters are provided, presenting statistics for: + - Packets: malformated, sent/received discarded by card/switch, missing ressource + - Link: downed, recovered, integrity error, minor error + - Other events: Tick Wait to send, buffer overrun + +If your vendor is supported, you'll also get HW-Counters statistics. These being vendor specific, please refer to their documentation. + +- Mellanox: [see statistics documentation](https://community.mellanox.com/s/article/understanding-mlx5-linux-counters-and-status-parameters) + +### configuration + +Default configuration will monitor only enabled infiniband ports, and refresh newly activated or created ports every 30 seconds + +``` +[plugin:proc:/sys/class/infiniband] + # dirname to monitor = /sys/class/infiniband + # bandwidth counters = yes + # packets counters = yes + # errors counters = yes + # hardware packets counters = auto + # hardware errors counters = auto + # monitor only ports being active = auto + # disable by default interfaces matching = + # refresh ports state every seconds = 30 +``` + + ## IPC ### Monitored IPC metrics diff --git a/collectors/proc.plugin/plugin_proc.c b/collectors/proc.plugin/plugin_proc.c index fcb1babc..19230c09 100644 --- a/collectors/proc.plugin/plugin_proc.c +++ b/collectors/proc.plugin/plugin_proc.c @@ -21,6 +21,9 @@ static struct proc_module { { .name = "/proc/loadavg", .dim = "loadavg", .func = do_proc_loadavg }, { .name = "/proc/sys/kernel/random/entropy_avail", .dim = "entropy", .func = do_proc_sys_kernel_random_entropy_avail }, + // pressure metrics + { .name = "/proc/pressure", .dim = "pressure", .func = do_proc_pressure }, + // CPU metrics { .name = "/proc/interrupts", .dim = "interrupts", .func = do_proc_interrupts }, { .name = "/proc/softirqs", .dim = "softirqs", .func = do_proc_softirqs }, @@ -36,6 +39,7 @@ static struct proc_module { // network metrics { .name = "/proc/net/dev", .dim = "netdev", .func = do_proc_net_dev }, + { .name = "/proc/net/wireless", .dim = "netwireless", .func = do_proc_net_wireless }, { .name = "/proc/net/sockstat", .dim = "sockstat", .func = do_proc_net_sockstat }, { .name = "/proc/net/sockstat6", .dim = "sockstat6", .func = do_proc_net_sockstat6 }, { .name = "/proc/net/netstat", .dim = "netstat", .func = do_proc_net_netstat }, // this has to be before /proc/net/snmp, because there is a shared metric @@ -44,6 +48,7 @@ static struct proc_module { { .name = "/proc/net/sctp/snmp", .dim = "sctp", .func = do_proc_net_sctp_snmp }, { .name = "/proc/net/softnet_stat", .dim = "softnet", .func = do_proc_net_softnet_stat }, { .name = "/proc/net/ip_vs/stats", .dim = "ipvs", .func = do_proc_net_ip_vs_stats }, + { .name = "/sys/class/infiniband", .dim = "infiniband", .func = do_sys_class_infiniband }, // firewall metrics { .name = "/proc/net/stat/conntrack", .dim = "conntrack", .func = do_proc_net_stat_conntrack }, @@ -145,7 +150,7 @@ void *proc_main(void *ptr) { static RRDSET *st = NULL; if(unlikely(!st)) { - st = rrdset_find_bytype_localhost("netdata", "plugin_proc_modules"); + st = rrdset_find_active_bytype_localhost("netdata", "plugin_proc_modules"); if(!st) { st = rrdset_create_localhost( diff --git a/collectors/proc.plugin/plugin_proc.h b/collectors/proc.plugin/plugin_proc.h index cb9a0c5f..108c026a 100644 --- a/collectors/proc.plugin/plugin_proc.h +++ b/collectors/proc.plugin/plugin_proc.h @@ -25,6 +25,7 @@ extern void *proc_main(void *ptr); extern int do_proc_net_dev(int update_every, usec_t dt); +extern int do_proc_net_wireless(int update_every, usec_t dt); extern int do_proc_diskstats(int update_every, usec_t dt); extern int do_proc_mdstat(int update_every, usec_t dt); extern int do_proc_net_snmp(int update_every, usec_t dt); @@ -40,6 +41,7 @@ extern int do_proc_net_rpc_nfsd(int update_every, usec_t dt); extern int do_proc_sys_kernel_random_entropy_avail(int update_every, usec_t dt); extern int do_proc_interrupts(int update_every, usec_t dt); extern int do_proc_softirqs(int update_every, usec_t dt); +extern int do_proc_pressure(int update_every, usec_t dt); extern int do_sys_kernel_mm_ksm(int update_every, usec_t dt); extern int do_sys_block_zram(int update_every, usec_t dt); extern int do_proc_loadavg(int update_every, usec_t dt); @@ -56,16 +58,19 @@ extern int do_proc_net_sctp_snmp(int update_every, usec_t dt); extern int do_ipc(int update_every, usec_t dt); extern int do_sys_class_power_supply(int update_every, usec_t dt); extern int do_proc_pagetypeinfo(int update_every, usec_t dt); +extern int do_sys_class_infiniband(int update_every, usec_t dt); extern int get_numa_node_count(void); // metrics that need to be shared among data collectors extern unsigned long long tcpext_TCPSynRetrans; // netdev renames -extern void netdev_rename_device_add(const char *host_device, const char *container_device, const char *container_name); +extern void netdev_rename_device_add( + const char *host_device, const char *container_device, const char *container_name, struct label *labels); extern void netdev_rename_device_del(const char *host_device); #include "proc_self_mountinfo.h" +#include "proc_pressure.h" #include "zfs_common.h" #else // (TARGET_OS == OS_LINUX) diff --git a/collectors/proc.plugin/proc_loadavg.c b/collectors/proc.plugin/proc_loadavg.c index db95b168..8b78ecc9 100644 --- a/collectors/proc.plugin/proc_loadavg.c +++ b/collectors/proc.plugin/proc_loadavg.c @@ -46,6 +46,10 @@ int do_proc_loadavg(int update_every, usec_t dt) { //unsigned long long running_processes = str2ull(procfile_lineword(ff, 0, 3)); unsigned long long active_processes = str2ull(procfile_lineword(ff, 0, 4)); + + //get system pid_max + unsigned long long max_processes = get_system_pid_max(); + // //unsigned long long next_pid = str2ull(procfile_lineword(ff, 0, 5)); @@ -95,6 +99,7 @@ int do_proc_loadavg(int update_every, usec_t dt) { if(likely(do_all_processes)) { static RRDSET *processes_chart = NULL; static RRDDIM *rd_active = NULL; + static RRDSETVAR *rd_pidmax; if(unlikely(!processes_chart)) { processes_chart = rrdset_create_localhost( @@ -113,10 +118,12 @@ int do_proc_loadavg(int update_every, usec_t dt) { ); rd_active = rrddim_add(processes_chart, "active", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_pidmax = rrdsetvar_custom_chart_variable_create(processes_chart, "pidmax"); } else rrdset_next(processes_chart); rrddim_set_by_pointer(processes_chart, rd_active, active_processes); + rrdsetvar_custom_chart_variable_set(rd_pidmax, max_processes); rrdset_done(processes_chart); } diff --git a/collectors/proc.plugin/proc_mdstat.c b/collectors/proc.plugin/proc_mdstat.c index abfd2ff1..e932453b 100644 --- a/collectors/proc.plugin/proc_mdstat.c +++ b/collectors/proc.plugin/proc_mdstat.c @@ -1,641 +1,647 @@ -// SPDX-License-Identifier: GPL-3.0-or-later
-
-#include "plugin_proc.h"
-
-#define PLUGIN_PROC_MODULE_MDSTAT_NAME "/proc/mdstat"
-
-struct raid {
- int redundant;
- char *name;
- uint32_t hash;
-
- RRDDIM *rd_health;
- unsigned long long failed_disks;
-
- RRDSET *st_disks;
- RRDDIM *rd_down;
- RRDDIM *rd_inuse;
- unsigned long long total_disks;
- unsigned long long inuse_disks;
-
- RRDSET *st_operation;
- RRDDIM *rd_check;
- RRDDIM *rd_resync;
- RRDDIM *rd_recovery;
- RRDDIM *rd_reshape;
- unsigned long long check;
- unsigned long long resync;
- unsigned long long recovery;
- unsigned long long reshape;
-
- RRDSET *st_finish;
- RRDDIM *rd_finish_in;
- unsigned long long finish_in;
-
- RRDSET *st_speed;
- RRDDIM *rd_speed;
- unsigned long long speed;
-
- char *mismatch_cnt_filename;
- RRDSET *st_mismatch_cnt;
- RRDDIM *rd_mismatch_cnt;
- unsigned long long mismatch_cnt;
-
- RRDSET *st_nonredundant;
- RRDDIM *rd_nonredundant;
-};
-
-struct old_raid {
- int redundant;
- char *name;
- uint32_t hash;
- int found;
-};
-
-static inline char *remove_trailing_chars(char *s, char c) {
- while(*s) {
- if(unlikely(*s == c)) {
- *s = '\0';
- }
- s++;
- }
- return s;
-}
-
-static inline void make_chart_obsolete(char *name, const char *id_modifier) {
- char id[50 + 1];
- RRDSET *st = NULL;
-
- if(likely(name && id_modifier)) {
- snprintfz(id, 50, "mdstat.%s_%s", name, id_modifier);
- st = rrdset_find_byname_localhost(id);
- if(likely(st)) rrdset_is_obsolete(st);
- }
-}
-
-int do_proc_mdstat(int update_every, usec_t dt) {
- (void)dt;
- static procfile *ff = NULL;
- static int do_health = -1, do_nonredundant = -1, do_disks = -1, do_operations = -1, do_mismatch = -1, do_mismatch_config = -1;
- static int make_charts_obsolete = -1;
- static char *mdstat_filename = NULL, *mismatch_cnt_filename = NULL;
- static struct raid *raids = NULL;
- static size_t raids_allocated = 0;
- size_t raids_num = 0, raid_idx = 0, redundant_num = 0;
- static struct old_raid *old_raids = NULL;
- static size_t old_raids_allocated = 0;
- size_t old_raid_idx = 0;
-
- if(unlikely(do_health == -1)){
- do_health = config_get_boolean("plugin:proc:/proc/mdstat", "faulty devices", CONFIG_BOOLEAN_YES);
- do_nonredundant = config_get_boolean("plugin:proc:/proc/mdstat", "nonredundant arrays availability", CONFIG_BOOLEAN_YES);
- do_mismatch_config = config_get_boolean_ondemand("plugin:proc:/proc/mdstat", "mismatch count", CONFIG_BOOLEAN_AUTO);
- do_disks = config_get_boolean("plugin:proc:/proc/mdstat", "disk stats", CONFIG_BOOLEAN_YES);
- do_operations = config_get_boolean("plugin:proc:/proc/mdstat", "operation status", CONFIG_BOOLEAN_YES);
-
- make_charts_obsolete = config_get_boolean("plugin:proc:/proc/mdstat", "make charts obsolete", CONFIG_BOOLEAN_YES);
-
- char filename[FILENAME_MAX + 1];
-
- snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/proc/mdstat");
- mdstat_filename = config_get("plugin:proc:/proc/mdstat", "filename to monitor", filename);
-
- snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/block/%s/md/mismatch_cnt");
- mismatch_cnt_filename = config_get("plugin:proc:/proc/mdstat", "mismatch_cnt filename to monitor", filename);
- }
-
- if(unlikely(!ff)) {
- ff = procfile_open(mdstat_filename, " \t:", PROCFILE_FLAG_DEFAULT);
- if(unlikely(!ff)) return 1;
- }
-
- ff = procfile_readall(ff);
- if(unlikely(!ff)) return 0; // we return 0, so that we will retry opening it next time
-
- size_t lines = procfile_lines(ff);
- size_t words = 0;
-
- if(unlikely(lines < 2)) {
- error("Cannot read /proc/mdstat. Expected 2 or more lines, read %zu.", lines);
- return 1;
- }
-
- // find how many raids are there
- size_t l;
- raids_num = 0;
- for(l = 1; l < lines - 2 ; l++) {
- if(unlikely(procfile_lineword(ff, l, 1)[0] == 'a')) // check if the raid is active
- raids_num++;
- }
-
- if(unlikely(!raids_num && !old_raids_allocated)) return 0; // we return 0, so that we will retry searching for raids next time
-
- // allocate the memory we need;
- if(unlikely(raids_num != raids_allocated)) {
- for(raid_idx = 0; raid_idx < raids_allocated; raid_idx++) {
- struct raid *raid = &raids[raid_idx];
- freez(raid->name);
- freez(raid->mismatch_cnt_filename);
- }
- if(raids_num) {
- raids = (struct raid *)reallocz(raids, raids_num * sizeof(struct raid));
- memset(raids, 0, raids_num * sizeof(struct raid));
- }
- else {
- freez(raids);
- raids = NULL;
- }
- raids_allocated = raids_num;
- }
-
- // loop through all lines except the first and the last ones
- for(l = 1, raid_idx = 0; l < (lines - 2) && raid_idx < raids_num; l++) {
- struct raid *raid = &raids[raid_idx];
- raid->redundant = 0;
-
- words = procfile_linewords(ff, l);
- if(unlikely(words < 2)) continue;
-
- if(unlikely(procfile_lineword(ff, l, 1)[0] != 'a')) continue;
- if(unlikely(!raid->name)) {
- raid->name = strdupz(procfile_lineword(ff, l, 0));
- raid->hash = simple_hash(raid->name);
- }
- else if(unlikely(strcmp(raid->name, procfile_lineword(ff, l, 0)))) {
- freez(raid->name);
- freez(raid->mismatch_cnt_filename);
- memset(raid, 0, sizeof(struct raid));
- raid->name = strdupz(procfile_lineword(ff, l, 0));
- raid->hash = simple_hash(raid->name);
- }
- if(unlikely(!raid->name || !raid->name[0])) continue;
- raid_idx++;
-
- // check if raid has disk status
- l++;
- words = procfile_linewords(ff, l);
- if(words < 2 || procfile_lineword(ff, l, words - 1)[0] != '[') continue;
-
- // split inuse and total number of disks
- if(likely(do_health || do_disks)) {
- char *s = NULL, *str_total = NULL, *str_inuse = NULL;
-
- s = procfile_lineword(ff, l, words - 2);
- if(unlikely(s[0] != '[')) {
- error("Cannot read /proc/mdstat raid health status. Unexpected format: missing opening bracket.");
- continue;
- }
- str_total = ++s;
- while(*s) {
- if(unlikely(*s == '/')) {
- *s = '\0';
- str_inuse = s + 1;
- }
- else if(unlikely(*s == ']')) {
- *s = '\0';
- break;
- }
- s++;
- }
- if(unlikely(str_total[0] == '\0' || !str_inuse || str_inuse[0] == '\0')) {
- error("Cannot read /proc/mdstat raid health status. Unexpected format.");
- continue;
- }
-
- raid->inuse_disks = str2ull(str_inuse);
- raid->total_disks = str2ull(str_total);
- raid->failed_disks = raid->total_disks - raid->inuse_disks;
- }
-
- raid->redundant = 1;
- redundant_num++;
- l++;
-
- // check if any operation is performed on the raid
- if(likely(do_operations)) {
- char *s = NULL;
-
- raid->check = 0;
- raid->resync = 0;
- raid->recovery = 0;
- raid->reshape = 0;
- raid->finish_in = 0;
- raid->speed = 0;
-
- words = procfile_linewords(ff, l);
- if(likely(words < 2)) continue;
- if(unlikely(procfile_lineword(ff, l, 0)[0] != '[')) continue;
- if(unlikely(words < 7)) {
- error("Cannot read /proc/mdstat line. Expected 7 params, read %zu.", words);
- continue;
- }
-
- char *word;
- word = procfile_lineword(ff, l, 3);
- remove_trailing_chars(word, '%');
-
- unsigned long long percentage = (unsigned long long)(str2ld(word, NULL) * 100);
- // possible operations: check, resync, recovery, reshape
- // 4-th character is unique for each operation so it is checked
- switch(procfile_lineword(ff, l, 1)[3]) {
- case 'c': // check
- raid->check = percentage;
- break;
- case 'y': // resync
- raid->resync = percentage;
- break;
- case 'o': // recovery
- raid->recovery = percentage;
- break;
- case 'h': // reshape
- raid->reshape = percentage;
- break;
- }
-
- word = procfile_lineword(ff, l, 5);
- s = remove_trailing_chars(word, 'm'); // remove trailing "min"
-
- word += 7; // skip leading "finish="
-
- if(likely(s > word))
- raid->finish_in = (unsigned long long)(str2ld(word, NULL) * 60);
-
- word = procfile_lineword(ff, l, 6);
- s = remove_trailing_chars(word, 'K'); // remove trailing "K/sec"
-
- word += 6; // skip leading "speed="
-
- if(likely(s > word))
- raid->speed = str2ull(word);
- }
- }
-
- // read mismatch_cnt files
- if(do_mismatch == -1) {
- if(do_mismatch_config == CONFIG_BOOLEAN_AUTO) {
- if(raids_num > 50)
- do_mismatch = CONFIG_BOOLEAN_NO;
- else
- do_mismatch = CONFIG_BOOLEAN_YES;
- }
- else
- do_mismatch = do_mismatch_config;
- }
-
- if(likely(do_mismatch)) {
- for(raid_idx = 0; raid_idx < raids_num ; raid_idx++) {
- char filename[FILENAME_MAX + 1];
- struct raid *raid = &raids[raid_idx];
-
- if(likely(raid->redundant)) {
- if(unlikely(!raid->mismatch_cnt_filename)) {
- snprintfz(filename, FILENAME_MAX, mismatch_cnt_filename, raid->name);
- raid->mismatch_cnt_filename = strdupz(filename);
- }
- if(unlikely(read_single_number_file(raid->mismatch_cnt_filename, &raid->mismatch_cnt))) {
- error("Cannot read file '%s'", raid->mismatch_cnt_filename);
- do_mismatch = CONFIG_BOOLEAN_NO;
- error("Monitoring for mismatch count has been disabled");
- break;
- }
- }
- }
- }
-
- // check for disappeared raids
- for(old_raid_idx = 0; old_raid_idx < old_raids_allocated; old_raid_idx++) {
- struct old_raid *old_raid = &old_raids[old_raid_idx];
- int found = 0;
-
- for(raid_idx = 0; raid_idx < raids_num ; raid_idx++) {
- struct raid *raid = &raids[raid_idx];
-
- if(unlikely(raid->hash == old_raid->hash
- && !strcmp(raid->name, old_raid->name)
- && raid->redundant == old_raid->redundant)) found = 1;
- }
-
- old_raid->found = found;
- }
-
- int raid_disappeared = 0;
- for(old_raid_idx = 0; old_raid_idx < old_raids_allocated; old_raid_idx++) {
- struct old_raid *old_raid = &old_raids[old_raid_idx];
-
- if(unlikely(!old_raid->found)) {
- if(likely(make_charts_obsolete)) {
- make_chart_obsolete(old_raid->name, "disks");
- make_chart_obsolete(old_raid->name, "mismatch");
- make_chart_obsolete(old_raid->name, "operation");
- make_chart_obsolete(old_raid->name, "finish");
- make_chart_obsolete(old_raid->name, "speed");
- make_chart_obsolete(old_raid->name, "availability");
- }
- raid_disappeared = 1;
- }
- }
-
- // allocate memory for nonredundant arrays
- if(unlikely(raid_disappeared || old_raids_allocated != raids_num)) {
- for(old_raid_idx = 0; old_raid_idx < old_raids_allocated; old_raid_idx++) {
- freez(old_raids[old_raid_idx].name);
- }
- if(likely(raids_num)) {
- old_raids = reallocz(old_raids, sizeof(struct old_raid) * raids_num);
- memset(old_raids, 0, sizeof(struct old_raid) * raids_num);
- }
- else {
- freez(old_raids);
- old_raids = NULL;
- }
- old_raids_allocated = raids_num;
- for(old_raid_idx = 0; old_raid_idx < old_raids_allocated; old_raid_idx++) {
- struct old_raid *old_raid = &old_raids[old_raid_idx];
- struct raid *raid = &raids[old_raid_idx];
-
- old_raid->name = strdupz(raid->name);
- old_raid->hash = raid->hash;
- old_raid->redundant = raid->redundant;
- }
- }
-
- // --------------------------------------------------------------------
-
- if(likely(do_health && redundant_num)) {
- static RRDSET *st_mdstat_health = NULL;
- if(unlikely(!st_mdstat_health)) {
- st_mdstat_health = rrdset_create_localhost(
- "mdstat"
- , "mdstat_health"
- , NULL
- , "health"
- , "md.health"
- , "Faulty Devices In MD"
- , "failed disks"
- , PLUGIN_PROC_NAME
- , PLUGIN_PROC_MODULE_MDSTAT_NAME
- , NETDATA_CHART_PRIO_MDSTAT_HEALTH
- , update_every
- , RRDSET_TYPE_LINE
- );
-
- rrdset_isnot_obsolete(st_mdstat_health);
- }
- else
- rrdset_next(st_mdstat_health);
-
- if(!redundant_num) {
- if(likely(make_charts_obsolete)) make_chart_obsolete("mdstat", "health");
- }
- else {
- for(raid_idx = 0; raid_idx < raids_num; raid_idx++) {
- struct raid *raid = &raids[raid_idx];
-
- if(likely(raid->redundant)) {
- if(unlikely(!raid->rd_health && !(raid->rd_health = rrddim_find(st_mdstat_health, raid->name))))
- raid->rd_health = rrddim_add(st_mdstat_health, raid->name, NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
-
- rrddim_set_by_pointer(st_mdstat_health, raid->rd_health, raid->failed_disks);
- }
- }
-
- rrdset_done(st_mdstat_health);
- }
- }
-
- // --------------------------------------------------------------------
-
- for(raid_idx = 0; raid_idx < raids_num ; raid_idx++) {
- struct raid *raid = &raids[raid_idx];
- char id[50 + 1];
- char family[50 + 1];
-
- if(likely(raid->redundant)) {
- if(likely(do_disks)) {
- snprintfz(id, 50, "%s_disks", raid->name);
-
- if(unlikely(!raid->st_disks && !(raid->st_disks = rrdset_find_byname_localhost(id)))) {
- snprintfz(family, 50, "%s", raid->name);
-
- raid->st_disks = rrdset_create_localhost(
- "mdstat"
- , id
- , NULL
- , family
- , "md.disks"
- , "Disks Stats"
- , "disks"
- , PLUGIN_PROC_NAME
- , PLUGIN_PROC_MODULE_MDSTAT_NAME
- , NETDATA_CHART_PRIO_MDSTAT_DISKS + raid_idx * 10
- , update_every
- , RRDSET_TYPE_STACKED
- );
-
- rrdset_isnot_obsolete(raid->st_disks);
- }
- else
- rrdset_next(raid->st_disks);
-
- if(unlikely(!raid->rd_inuse && !(raid->rd_inuse = rrddim_find(raid->st_disks, "inuse"))))
- raid->rd_inuse = rrddim_add(raid->st_disks, "inuse", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
- if(unlikely(!raid->rd_down && !(raid->rd_down = rrddim_find(raid->st_disks, "down"))))
- raid->rd_down = rrddim_add(raid->st_disks, "down", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
-
- rrddim_set_by_pointer(raid->st_disks, raid->rd_inuse, raid->inuse_disks);
- rrddim_set_by_pointer(raid->st_disks, raid->rd_down, raid->failed_disks);
-
- rrdset_done(raid->st_disks);
- }
-
- // --------------------------------------------------------------------
-
- if(likely(do_mismatch)) {
- snprintfz(id, 50, "%s_mismatch", raid->name);
-
- if(unlikely(!raid->st_mismatch_cnt && !(raid->st_mismatch_cnt = rrdset_find_byname_localhost(id)))) {
- snprintfz(family, 50, "%s", raid->name);
-
- raid->st_mismatch_cnt = rrdset_create_localhost(
- "mdstat"
- , id
- , NULL
- , family
- , "md.mismatch_cnt"
- , "Mismatch Count"
- , "unsynchronized blocks"
- , PLUGIN_PROC_NAME
- , PLUGIN_PROC_MODULE_MDSTAT_NAME
- , NETDATA_CHART_PRIO_MDSTAT_MISMATCH + raid_idx * 10
- , update_every
- , RRDSET_TYPE_LINE
- );
-
- rrdset_isnot_obsolete(raid->st_mismatch_cnt);
- }
- else
- rrdset_next(raid->st_mismatch_cnt);
-
- if(unlikely(!raid->rd_mismatch_cnt && !(raid->rd_mismatch_cnt = rrddim_find(raid->st_mismatch_cnt, "count"))))
- raid->rd_mismatch_cnt = rrddim_add(raid->st_mismatch_cnt, "count", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
-
- rrddim_set_by_pointer(raid->st_mismatch_cnt, raid->rd_mismatch_cnt, raid->mismatch_cnt);
-
- rrdset_done(raid->st_mismatch_cnt);
- }
-
- // --------------------------------------------------------------------
-
- if(likely(do_operations)) {
- snprintfz(id, 50, "%s_operation", raid->name);
-
- if(unlikely(!raid->st_operation && !(raid->st_operation = rrdset_find_byname_localhost(id)))) {
- snprintfz(family, 50, "%s", raid->name);
-
- raid->st_operation = rrdset_create_localhost(
- "mdstat"
- , id
- , NULL
- , family
- , "md.status"
- , "Current Status"
- , "percent"
- , PLUGIN_PROC_NAME
- , PLUGIN_PROC_MODULE_MDSTAT_NAME
- , NETDATA_CHART_PRIO_MDSTAT_OPERATION + raid_idx * 10
- , update_every
- , RRDSET_TYPE_LINE
- );
-
- rrdset_isnot_obsolete(raid->st_operation);
- }
- else
- rrdset_next(raid->st_operation);
-
- if(unlikely(!raid->rd_check && !(raid->rd_check = rrddim_find(raid->st_operation, "check"))))
- raid->rd_check = rrddim_add(raid->st_operation, "check", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
- if(unlikely(!raid->rd_resync && !(raid->rd_resync = rrddim_find(raid->st_operation, "resync"))))
- raid->rd_resync = rrddim_add(raid->st_operation, "resync", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
- if(unlikely(!raid->rd_recovery && !(raid->rd_recovery = rrddim_find(raid->st_operation, "recovery"))))
- raid->rd_recovery = rrddim_add(raid->st_operation, "recovery", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
- if(unlikely(!raid->rd_reshape && !(raid->rd_reshape = rrddim_find(raid->st_operation, "reshape"))))
- raid->rd_reshape = rrddim_add(raid->st_operation, "reshape", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
-
- rrddim_set_by_pointer(raid->st_operation, raid->rd_check, raid->check);
- rrddim_set_by_pointer(raid->st_operation, raid->rd_resync, raid->resync);
- rrddim_set_by_pointer(raid->st_operation, raid->rd_recovery, raid->recovery);
- rrddim_set_by_pointer(raid->st_operation, raid->rd_reshape, raid->reshape);
-
- rrdset_done(raid->st_operation);
-
- // --------------------------------------------------------------------
-
- snprintfz(id, 50, "%s_finish", raid->name);
-
- if(unlikely(!raid->st_finish && !(raid->st_finish = rrdset_find_byname_localhost(id)))) {
- snprintfz(family, 50, "%s", raid->name);
-
- raid->st_finish = rrdset_create_localhost(
- "mdstat"
- , id
- , NULL
- , family
- , "md.rate"
- , "Approximate Time Unit Finish"
- , "seconds"
- , PLUGIN_PROC_NAME
- , PLUGIN_PROC_MODULE_MDSTAT_NAME
- , NETDATA_CHART_PRIO_MDSTAT_FINISH + raid_idx * 10
- , update_every
- , RRDSET_TYPE_LINE
- );
-
- rrdset_isnot_obsolete(raid->st_finish);
- }
- else
- rrdset_next(raid->st_finish);
-
- if(unlikely(!raid->rd_finish_in && !(raid->rd_finish_in = rrddim_find(raid->st_finish, "finish_in"))))
- raid->rd_finish_in = rrddim_add(raid->st_finish, "finish_in", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
-
- rrddim_set_by_pointer(raid->st_finish, raid->rd_finish_in, raid->finish_in);
-
- rrdset_done(raid->st_finish);
-
- // --------------------------------------------------------------------
-
- snprintfz(id, 50, "%s_speed", raid->name);
-
- if(unlikely(!raid->st_speed && !(raid->st_speed = rrdset_find_byname_localhost(id)))) {
- snprintfz(family, 50, "%s", raid->name);
-
- raid->st_speed = rrdset_create_localhost(
- "mdstat"
- , id
- , NULL
- , family
- , "md.rate"
- , "Operation Speed"
- , "KiB/s"
- , PLUGIN_PROC_NAME
- , PLUGIN_PROC_MODULE_MDSTAT_NAME
- , NETDATA_CHART_PRIO_MDSTAT_SPEED + raid_idx * 10
- , update_every
- , RRDSET_TYPE_LINE
- );
-
- rrdset_isnot_obsolete(raid->st_speed);
- }
- else
- rrdset_next(raid->st_speed);
-
- if(unlikely(!raid->rd_speed && !(raid->rd_speed = rrddim_find(raid->st_speed, "speed"))))
- raid->rd_speed = rrddim_add(raid->st_speed, "speed", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
-
- rrddim_set_by_pointer(raid->st_speed, raid->rd_speed, raid->speed);
-
- rrdset_done(raid->st_speed);
- }
- }
- else {
-
- // --------------------------------------------------------------------
-
- if(likely(do_nonredundant)) {
- snprintfz(id, 50, "%s_availability", raid->name);
-
- if(unlikely(!raid->st_nonredundant && !(raid->st_nonredundant = rrdset_find_localhost(id)))) {
- snprintfz(family, 50, "%s", raid->name);
-
- raid->st_nonredundant = rrdset_create_localhost(
- "mdstat"
- , id
- , NULL
- , family
- , "md.nonredundant"
- , "Nonredundant Array Availability"
- , "boolean"
- , PLUGIN_PROC_NAME
- , PLUGIN_PROC_MODULE_MDSTAT_NAME
- , NETDATA_CHART_PRIO_MDSTAT_NONREDUNDANT + raid_idx * 10
- , update_every
- , RRDSET_TYPE_LINE
- );
-
- rrdset_isnot_obsolete(raid->st_nonredundant);
- }
- else
- rrdset_next(raid->st_nonredundant);
-
- if(unlikely(!raid->rd_nonredundant && !(raid->rd_nonredundant = rrddim_find(raid->st_nonredundant, "available"))))
- raid->rd_nonredundant = rrddim_add(raid->st_nonredundant, "available", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
-
- rrddim_set_by_pointer(raid->st_nonredundant, raid->rd_nonredundant, 1);
-
- rrdset_done(raid->st_nonredundant);
- }
- }
- }
-
- return 0;
-}
+// SPDX-License-Identifier: GPL-3.0-or-later + +#include "plugin_proc.h" + +#define PLUGIN_PROC_MODULE_MDSTAT_NAME "/proc/mdstat" + +struct raid { + int redundant; + char *name; + uint32_t hash; + + RRDDIM *rd_health; + unsigned long long failed_disks; + + RRDSET *st_disks; + RRDDIM *rd_down; + RRDDIM *rd_inuse; + unsigned long long total_disks; + unsigned long long inuse_disks; + + RRDSET *st_operation; + RRDDIM *rd_check; + RRDDIM *rd_resync; + RRDDIM *rd_recovery; + RRDDIM *rd_reshape; + unsigned long long check; + unsigned long long resync; + unsigned long long recovery; + unsigned long long reshape; + + RRDSET *st_finish; + RRDDIM *rd_finish_in; + unsigned long long finish_in; + + RRDSET *st_speed; + RRDDIM *rd_speed; + unsigned long long speed; + + char *mismatch_cnt_filename; + RRDSET *st_mismatch_cnt; + RRDDIM *rd_mismatch_cnt; + unsigned long long mismatch_cnt; + + RRDSET *st_nonredundant; + RRDDIM *rd_nonredundant; +}; + +struct old_raid { + int redundant; + char *name; + uint32_t hash; + int found; +}; + +static inline char *remove_trailing_chars(char *s, char c) +{ + while (*s) { + if (unlikely(*s == c)) { + *s = '\0'; + } + s++; + } + return s; +} + +static inline void make_chart_obsolete(char *name, const char *id_modifier) +{ + char id[50 + 1]; + RRDSET *st = NULL; + + if (likely(name && id_modifier)) { + snprintfz(id, 50, "mdstat.%s_%s", name, id_modifier); + st = rrdset_find_active_byname_localhost(id); + if (likely(st)) + rrdset_is_obsolete(st); + } +} + +int do_proc_mdstat(int update_every, usec_t dt) +{ + (void)dt; + static procfile *ff = NULL; + static int do_health = -1, do_nonredundant = -1, do_disks = -1, do_operations = -1, do_mismatch = -1, + do_mismatch_config = -1; + static int make_charts_obsolete = -1; + static char *mdstat_filename = NULL, *mismatch_cnt_filename = NULL; + static struct raid *raids = NULL; + static size_t raids_allocated = 0; + size_t raids_num = 0, raid_idx = 0, redundant_num = 0; + static struct old_raid *old_raids = NULL; + static size_t old_raids_allocated = 0; + size_t old_raid_idx = 0; + + if (unlikely(do_health == -1)) { + do_health = + config_get_boolean("plugin:proc:/proc/mdstat", "faulty devices", CONFIG_BOOLEAN_YES); + do_nonredundant = + config_get_boolean("plugin:proc:/proc/mdstat", "nonredundant arrays availability", CONFIG_BOOLEAN_YES); + do_mismatch_config = + config_get_boolean_ondemand("plugin:proc:/proc/mdstat", "mismatch count", CONFIG_BOOLEAN_AUTO); + do_disks = + config_get_boolean("plugin:proc:/proc/mdstat", "disk stats", CONFIG_BOOLEAN_YES); + do_operations = + config_get_boolean("plugin:proc:/proc/mdstat", "operation status", CONFIG_BOOLEAN_YES); + + make_charts_obsolete = + config_get_boolean("plugin:proc:/proc/mdstat", "make charts obsolete", CONFIG_BOOLEAN_YES); + + char filename[FILENAME_MAX + 1]; + + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/proc/mdstat"); + mdstat_filename = config_get("plugin:proc:/proc/mdstat", "filename to monitor", filename); + + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/block/%s/md/mismatch_cnt"); + mismatch_cnt_filename = config_get("plugin:proc:/proc/mdstat", "mismatch_cnt filename to monitor", filename); + } + + if (unlikely(!ff)) { + ff = procfile_open(mdstat_filename, " \t:", PROCFILE_FLAG_DEFAULT); + if (unlikely(!ff)) + return 1; + } + + ff = procfile_readall(ff); + if (unlikely(!ff)) + return 0; // we return 0, so that we will retry opening it next time + + size_t lines = procfile_lines(ff); + size_t words = 0; + + if (unlikely(lines < 2)) { + error("Cannot read /proc/mdstat. Expected 2 or more lines, read %zu.", lines); + return 1; + } + + // find how many raids are there + size_t l; + raids_num = 0; + for (l = 1; l < lines - 2; l++) { + if (unlikely(procfile_lineword(ff, l, 1)[0] == 'a')) // check if the raid is active + raids_num++; + } + + if (unlikely(!raids_num && !old_raids_allocated)) + return 0; // we return 0, so that we will retry searching for raids next time + + // allocate the memory we need; + if (unlikely(raids_num != raids_allocated)) { + for (raid_idx = 0; raid_idx < raids_allocated; raid_idx++) { + struct raid *raid = &raids[raid_idx]; + freez(raid->name); + freez(raid->mismatch_cnt_filename); + } + if (raids_num) { + raids = (struct raid *)reallocz(raids, raids_num * sizeof(struct raid)); + memset(raids, 0, raids_num * sizeof(struct raid)); + } else { + freez(raids); + raids = NULL; + } + raids_allocated = raids_num; + } + + // loop through all lines except the first and the last ones + for (l = 1, raid_idx = 0; l < (lines - 2) && raid_idx < raids_num; l++) { + struct raid *raid = &raids[raid_idx]; + raid->redundant = 0; + + words = procfile_linewords(ff, l); + + if (unlikely(words < 2)) + continue; + + if (unlikely(procfile_lineword(ff, l, 1)[0] != 'a')) + continue; + + if (unlikely(!raid->name)) { + raid->name = strdupz(procfile_lineword(ff, l, 0)); + raid->hash = simple_hash(raid->name); + } else if (unlikely(strcmp(raid->name, procfile_lineword(ff, l, 0)))) { + freez(raid->name); + freez(raid->mismatch_cnt_filename); + memset(raid, 0, sizeof(struct raid)); + raid->name = strdupz(procfile_lineword(ff, l, 0)); + raid->hash = simple_hash(raid->name); + } + + if (unlikely(!raid->name || !raid->name[0])) + continue; + + raid_idx++; + + // check if raid has disk status + l++; + words = procfile_linewords(ff, l); + if (words < 2 || procfile_lineword(ff, l, words - 1)[0] != '[') + continue; + + // split inuse and total number of disks + if (likely(do_health || do_disks)) { + char *s = NULL, *str_total = NULL, *str_inuse = NULL; + + s = procfile_lineword(ff, l, words - 2); + if (unlikely(s[0] != '[')) { + error("Cannot read /proc/mdstat raid health status. Unexpected format: missing opening bracket."); + continue; + } + str_total = ++s; + while (*s) { + if (unlikely(*s == '/')) { + *s = '\0'; + str_inuse = s + 1; + } else if (unlikely(*s == ']')) { + *s = '\0'; + break; + } + s++; + } + if (unlikely(str_total[0] == '\0' || !str_inuse || str_inuse[0] == '\0')) { + error("Cannot read /proc/mdstat raid health status. Unexpected format."); + continue; + } + + raid->inuse_disks = str2ull(str_inuse); + raid->total_disks = str2ull(str_total); + raid->failed_disks = raid->total_disks - raid->inuse_disks; + } + + raid->redundant = 1; + redundant_num++; + l++; + + // check if any operation is performed on the raid + if (likely(do_operations)) { + char *s = NULL; + + raid->check = 0; + raid->resync = 0; + raid->recovery = 0; + raid->reshape = 0; + raid->finish_in = 0; + raid->speed = 0; + + words = procfile_linewords(ff, l); + + if (likely(words < 2)) + continue; + + if (unlikely(procfile_lineword(ff, l, 0)[0] != '[')) + continue; + + if (unlikely(words < 7)) { + error("Cannot read /proc/mdstat line. Expected 7 params, read %zu.", words); + continue; + } + + char *word; + word = procfile_lineword(ff, l, 3); + remove_trailing_chars(word, '%'); + + unsigned long long percentage = (unsigned long long)(str2ld(word, NULL) * 100); + // possible operations: check, resync, recovery, reshape + // 4-th character is unique for each operation so it is checked + switch (procfile_lineword(ff, l, 1)[3]) { + case 'c': // check + raid->check = percentage; + break; + case 'y': // resync + raid->resync = percentage; + break; + case 'o': // recovery + raid->recovery = percentage; + break; + case 'h': // reshape + raid->reshape = percentage; + break; + } + + word = procfile_lineword(ff, l, 5); + s = remove_trailing_chars(word, 'm'); // remove trailing "min" + + word += 7; // skip leading "finish=" + + if (likely(s > word)) + raid->finish_in = (unsigned long long)(str2ld(word, NULL) * 60); + + word = procfile_lineword(ff, l, 6); + s = remove_trailing_chars(word, 'K'); // remove trailing "K/sec" + + word += 6; // skip leading "speed=" + + if (likely(s > word)) + raid->speed = str2ull(word); + } + } + + // read mismatch_cnt files + if (do_mismatch == -1) { + if (do_mismatch_config == CONFIG_BOOLEAN_AUTO) { + if (raids_num > 50) + do_mismatch = CONFIG_BOOLEAN_NO; + else + do_mismatch = CONFIG_BOOLEAN_YES; + } else + do_mismatch = do_mismatch_config; + } + + if (likely(do_mismatch)) { + for (raid_idx = 0; raid_idx < raids_num; raid_idx++) { + char filename[FILENAME_MAX + 1]; + struct raid *raid = &raids[raid_idx]; + + if (likely(raid->redundant)) { + if (unlikely(!raid->mismatch_cnt_filename)) { + snprintfz(filename, FILENAME_MAX, mismatch_cnt_filename, raid->name); + raid->mismatch_cnt_filename = strdupz(filename); + } + if (unlikely(read_single_number_file(raid->mismatch_cnt_filename, &raid->mismatch_cnt))) { + error("Cannot read file '%s'", raid->mismatch_cnt_filename); + do_mismatch = CONFIG_BOOLEAN_NO; + error("Monitoring for mismatch count has been disabled"); + break; + } + } + } + } + + // check for disappeared raids + for (old_raid_idx = 0; old_raid_idx < old_raids_allocated; old_raid_idx++) { + struct old_raid *old_raid = &old_raids[old_raid_idx]; + int found = 0; + + for (raid_idx = 0; raid_idx < raids_num; raid_idx++) { + struct raid *raid = &raids[raid_idx]; + + if (unlikely( + raid->hash == old_raid->hash && !strcmp(raid->name, old_raid->name) && + raid->redundant == old_raid->redundant)) + found = 1; + } + + old_raid->found = found; + } + + int raid_disappeared = 0; + for (old_raid_idx = 0; old_raid_idx < old_raids_allocated; old_raid_idx++) { + struct old_raid *old_raid = &old_raids[old_raid_idx]; + + if (unlikely(!old_raid->found)) { + if (likely(make_charts_obsolete)) { + make_chart_obsolete(old_raid->name, "disks"); + make_chart_obsolete(old_raid->name, "mismatch"); + make_chart_obsolete(old_raid->name, "operation"); + make_chart_obsolete(old_raid->name, "finish"); + make_chart_obsolete(old_raid->name, "speed"); + make_chart_obsolete(old_raid->name, "availability"); + } + raid_disappeared = 1; + } + } + + // allocate memory for nonredundant arrays + if (unlikely(raid_disappeared || old_raids_allocated != raids_num)) { + for (old_raid_idx = 0; old_raid_idx < old_raids_allocated; old_raid_idx++) { + freez(old_raids[old_raid_idx].name); + } + if (likely(raids_num)) { + old_raids = reallocz(old_raids, sizeof(struct old_raid) * raids_num); + memset(old_raids, 0, sizeof(struct old_raid) * raids_num); + } else { + freez(old_raids); + old_raids = NULL; + } + old_raids_allocated = raids_num; + for (old_raid_idx = 0; old_raid_idx < old_raids_allocated; old_raid_idx++) { + struct old_raid *old_raid = &old_raids[old_raid_idx]; + struct raid *raid = &raids[old_raid_idx]; + + old_raid->name = strdupz(raid->name); + old_raid->hash = raid->hash; + old_raid->redundant = raid->redundant; + } + } + + // -------------------------------------------------------------------- + + if (likely(do_health && redundant_num)) { + static RRDSET *st_mdstat_health = NULL; + if (unlikely(!st_mdstat_health)) { + st_mdstat_health = rrdset_create_localhost( + "mdstat", + "mdstat_health", + NULL, + "health", + "md.health", + "Faulty Devices In MD", + "failed disks", + PLUGIN_PROC_NAME, + PLUGIN_PROC_MODULE_MDSTAT_NAME, + NETDATA_CHART_PRIO_MDSTAT_HEALTH, + update_every, + RRDSET_TYPE_LINE); + + rrdset_isnot_obsolete(st_mdstat_health); + } else + rrdset_next(st_mdstat_health); + + if (!redundant_num) { + if (likely(make_charts_obsolete)) + make_chart_obsolete("mdstat", "health"); + } else { + for (raid_idx = 0; raid_idx < raids_num; raid_idx++) { + struct raid *raid = &raids[raid_idx]; + + if (likely(raid->redundant)) { + if (unlikely(!raid->rd_health && !(raid->rd_health = rrddim_find_active(st_mdstat_health, raid->name)))) + raid->rd_health = rrddim_add(st_mdstat_health, raid->name, NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + rrddim_set_by_pointer(st_mdstat_health, raid->rd_health, raid->failed_disks); + } + } + + rrdset_done(st_mdstat_health); + } + } + + // -------------------------------------------------------------------- + + for (raid_idx = 0; raid_idx < raids_num; raid_idx++) { + struct raid *raid = &raids[raid_idx]; + char id[50 + 1]; + char family[50 + 1]; + + if (likely(raid->redundant)) { + if (likely(do_disks)) { + snprintfz(id, 50, "%s_disks", raid->name); + + if (unlikely(!raid->st_disks && !(raid->st_disks = rrdset_find_active_byname_localhost(id)))) { + snprintfz(family, 50, "%s", raid->name); + + raid->st_disks = rrdset_create_localhost( + "mdstat", + id, + NULL, + family, + "md.disks", + "Disks Stats", + "disks", + PLUGIN_PROC_NAME, + PLUGIN_PROC_MODULE_MDSTAT_NAME, + NETDATA_CHART_PRIO_MDSTAT_DISKS + raid_idx * 10, + update_every, + RRDSET_TYPE_STACKED); + + rrdset_isnot_obsolete(raid->st_disks); + } else + rrdset_next(raid->st_disks); + + if (unlikely(!raid->rd_inuse && !(raid->rd_inuse = rrddim_find_active(raid->st_disks, "inuse")))) + raid->rd_inuse = rrddim_add(raid->st_disks, "inuse", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + if (unlikely(!raid->rd_down && !(raid->rd_down = rrddim_find_active(raid->st_disks, "down")))) + raid->rd_down = rrddim_add(raid->st_disks, "down", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + rrddim_set_by_pointer(raid->st_disks, raid->rd_inuse, raid->inuse_disks); + rrddim_set_by_pointer(raid->st_disks, raid->rd_down, raid->failed_disks); + + rrdset_done(raid->st_disks); + } + + // -------------------------------------------------------------------- + + if (likely(do_mismatch)) { + snprintfz(id, 50, "%s_mismatch", raid->name); + + if (unlikely(!raid->st_mismatch_cnt && !(raid->st_mismatch_cnt = rrdset_find_active_byname_localhost(id)))) { + snprintfz(family, 50, "%s", raid->name); + + raid->st_mismatch_cnt = rrdset_create_localhost( + "mdstat", + id, + NULL, + family, + "md.mismatch_cnt", + "Mismatch Count", + "unsynchronized blocks", + PLUGIN_PROC_NAME, + PLUGIN_PROC_MODULE_MDSTAT_NAME, + NETDATA_CHART_PRIO_MDSTAT_MISMATCH + raid_idx * 10, + update_every, + RRDSET_TYPE_LINE); + + rrdset_isnot_obsolete(raid->st_mismatch_cnt); + } else + rrdset_next(raid->st_mismatch_cnt); + + if (unlikely(!raid->rd_mismatch_cnt && !(raid->rd_mismatch_cnt = rrddim_find_active(raid->st_mismatch_cnt, "count")))) + raid->rd_mismatch_cnt = rrddim_add(raid->st_mismatch_cnt, "count", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + rrddim_set_by_pointer(raid->st_mismatch_cnt, raid->rd_mismatch_cnt, raid->mismatch_cnt); + + rrdset_done(raid->st_mismatch_cnt); + } + + // -------------------------------------------------------------------- + + if (likely(do_operations)) { + snprintfz(id, 50, "%s_operation", raid->name); + + if (unlikely(!raid->st_operation && !(raid->st_operation = rrdset_find_active_byname_localhost(id)))) { + snprintfz(family, 50, "%s", raid->name); + + raid->st_operation = rrdset_create_localhost( + "mdstat", + id, + NULL, + family, + "md.status", + "Current Status", + "percent", + PLUGIN_PROC_NAME, + PLUGIN_PROC_MODULE_MDSTAT_NAME, + NETDATA_CHART_PRIO_MDSTAT_OPERATION + raid_idx * 10, + update_every, + RRDSET_TYPE_LINE); + + rrdset_isnot_obsolete(raid->st_operation); + } else + rrdset_next(raid->st_operation); + + if(unlikely(!raid->rd_check && !(raid->rd_check = rrddim_find_active(raid->st_operation, "check")))) + raid->rd_check = rrddim_add(raid->st_operation, "check", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + if(unlikely(!raid->rd_resync && !(raid->rd_resync = rrddim_find_active(raid->st_operation, "resync")))) + raid->rd_resync = rrddim_add(raid->st_operation, "resync", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + if(unlikely(!raid->rd_recovery && !(raid->rd_recovery = rrddim_find_active(raid->st_operation, "recovery")))) + raid->rd_recovery = rrddim_add(raid->st_operation, "recovery", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + if(unlikely(!raid->rd_reshape && !(raid->rd_reshape = rrddim_find_active(raid->st_operation, "reshape")))) + raid->rd_reshape = rrddim_add(raid->st_operation, "reshape", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + + rrddim_set_by_pointer(raid->st_operation, raid->rd_check, raid->check); + rrddim_set_by_pointer(raid->st_operation, raid->rd_resync, raid->resync); + rrddim_set_by_pointer(raid->st_operation, raid->rd_recovery, raid->recovery); + rrddim_set_by_pointer(raid->st_operation, raid->rd_reshape, raid->reshape); + + rrdset_done(raid->st_operation); + + // -------------------------------------------------------------------- + + snprintfz(id, 50, "%s_finish", raid->name); + + if (unlikely(!raid->st_finish && !(raid->st_finish = rrdset_find_active_byname_localhost(id)))) { + snprintfz(family, 50, "%s", raid->name); + + raid->st_finish = rrdset_create_localhost( + "mdstat", + id, + NULL, + family, + "md.rate", + "Approximate Time Unit Finish", + "seconds", + PLUGIN_PROC_NAME, + PLUGIN_PROC_MODULE_MDSTAT_NAME, + NETDATA_CHART_PRIO_MDSTAT_FINISH + raid_idx * 10, + update_every, RRDSET_TYPE_LINE); + + rrdset_isnot_obsolete(raid->st_finish); + } else + rrdset_next(raid->st_finish); + + if(unlikely(!raid->rd_finish_in && !(raid->rd_finish_in = rrddim_find_active(raid->st_finish, "finish_in")))) + raid->rd_finish_in = rrddim_add(raid->st_finish, "finish_in", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + rrddim_set_by_pointer(raid->st_finish, raid->rd_finish_in, raid->finish_in); + + rrdset_done(raid->st_finish); + + // -------------------------------------------------------------------- + + snprintfz(id, 50, "%s_speed", raid->name); + + if (unlikely(!raid->st_speed && !(raid->st_speed = rrdset_find_active_byname_localhost(id)))) { + snprintfz(family, 50, "%s", raid->name); + + raid->st_speed = rrdset_create_localhost( + "mdstat", + id, + NULL, + family, + "md.rate", + "Operation Speed", + "KiB/s", + PLUGIN_PROC_NAME, + PLUGIN_PROC_MODULE_MDSTAT_NAME, + NETDATA_CHART_PRIO_MDSTAT_SPEED + raid_idx * 10, + update_every, + RRDSET_TYPE_LINE); + + rrdset_isnot_obsolete(raid->st_speed); + } else + rrdset_next(raid->st_speed); + + if (unlikely(!raid->rd_speed && !(raid->rd_speed = rrddim_find_active(raid->st_speed, "speed")))) + raid->rd_speed = rrddim_add(raid->st_speed, "speed", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + rrddim_set_by_pointer(raid->st_speed, raid->rd_speed, raid->speed); + + rrdset_done(raid->st_speed); + } + } else { + // -------------------------------------------------------------------- + + if (likely(do_nonredundant)) { + snprintfz(id, 50, "%s_availability", raid->name); + + if (unlikely(!raid->st_nonredundant && !(raid->st_nonredundant = rrdset_find_active_localhost(id)))) { + snprintfz(family, 50, "%s", raid->name); + + raid->st_nonredundant = rrdset_create_localhost( + "mdstat", + id, + NULL, + family, + "md.nonredundant", + "Nonredundant Array Availability", + "boolean", + PLUGIN_PROC_NAME, + PLUGIN_PROC_MODULE_MDSTAT_NAME, + NETDATA_CHART_PRIO_MDSTAT_NONREDUNDANT + raid_idx * 10, + update_every, + RRDSET_TYPE_LINE); + + rrdset_isnot_obsolete(raid->st_nonredundant); + } else + rrdset_next(raid->st_nonredundant); + + if (unlikely(!raid->rd_nonredundant && !(raid->rd_nonredundant = rrddim_find_active(raid->st_nonredundant, "available")))) + raid->rd_nonredundant = rrddim_add(raid->st_nonredundant, "available", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + rrddim_set_by_pointer(raid->st_nonredundant, raid->rd_nonredundant, 1); + + rrdset_done(raid->st_nonredundant); + } + } + } + + return 0; +} diff --git a/collectors/proc.plugin/proc_meminfo.c b/collectors/proc.plugin/proc_meminfo.c index 92135393..51d77fe0 100644 --- a/collectors/proc.plugin/proc_meminfo.c +++ b/collectors/proc.plugin/proc_meminfo.c @@ -35,7 +35,7 @@ int do_proc_meminfo(int update_every, usec_t dt) { Writeback = 0, //AnonPages = 0, //Mapped = 0, - //Shmem = 0, + Shmem = 0, Slab = 0, SReclaimable = 0, SUnreclaim = 0, @@ -92,7 +92,7 @@ int do_proc_meminfo(int update_every, usec_t dt) { arl_expect(arl_base, "Writeback", &Writeback); //arl_expect(arl_base, "AnonPages", &AnonPages); //arl_expect(arl_base, "Mapped", &Mapped); - //arl_expect(arl_base, "Shmem", &Shmem); + arl_expect(arl_base, "Shmem", &Shmem); arl_expect(arl_base, "Slab", &Slab); arl_expect(arl_base, "SReclaimable", &SReclaimable); arl_expect(arl_base, "SUnreclaim", &SUnreclaim); @@ -145,8 +145,8 @@ int do_proc_meminfo(int update_every, usec_t dt) { // -------------------------------------------------------------------- - // http://stackoverflow.com/questions/3019748/how-to-reliably-measure-available-memory-in-linux - unsigned long long MemCached = Cached + SReclaimable; + // http://calimeroteknik.free.fr/blag/?article20/really-used-memory-on-gnu-linux + unsigned long long MemCached = Cached + SReclaimable - Shmem; unsigned long long MemUsed = MemTotal - MemFree - MemCached - Buffers; if(do_ram) { @@ -526,4 +526,3 @@ int do_proc_meminfo(int update_every, usec_t dt) { return 0; } - diff --git a/collectors/proc.plugin/proc_net_dev.c b/collectors/proc.plugin/proc_net_dev.c index 8d9751d1..a90e3c3e 100644 --- a/collectors/proc.plugin/proc_net_dev.c +++ b/collectors/proc.plugin/proc_net_dev.c @@ -5,6 +5,21 @@ #define PLUGIN_PROC_MODULE_NETDEV_NAME "/proc/net/dev" #define CONFIG_SECTION_PLUGIN_PROC_NETDEV "plugin:" PLUGIN_PROC_CONFIG_NAME ":" PLUGIN_PROC_MODULE_NETDEV_NAME +// As defined in https://www.kernel.org/doc/Documentation/ABI/testing/sysfs-class-net +const char *operstate_names[] = { "unknown", "notpresent", "down", "lowerlayerdown", "testing", "dormant", "up" }; + +static inline int get_operstate(char *operstate) +{ + int i; + + for (i = 0; i < (int) (sizeof(operstate_names) / sizeof(char *)); i++) { + if (!strcmp(operstate, operstate_names[i])) { + return i; + } + } + return 0; +} + // ---------------------------------------------------------------------------- // netdev list @@ -45,6 +60,8 @@ static struct netdev { const char *chart_family; + struct label *chart_labels; + int flipped; unsigned long priority; @@ -67,6 +84,8 @@ static struct netdev { kernel_uint_t tcarrier; kernel_uint_t tcompressed; kernel_uint_t speed; + kernel_uint_t duplex; + kernel_uint_t operstate; // charts RRDSET *st_bandwidth; @@ -97,9 +116,18 @@ static struct netdev { RRDDIM *rd_tcompressed; usec_t speed_last_collected_usec; + usec_t duplex_last_collected_usec; + usec_t operstate_last_collected_usec; + char *filename_speed; RRDSETVAR *chart_var_speed; + char *filename_duplex; + RRDSETVAR *chart_var_duplex; + + char *filename_operstate; + RRDSETVAR *chart_var_operstate; + struct netdev *next; } *netdev_root = NULL, *netdev_last_used = NULL; @@ -166,14 +194,16 @@ static void netdev_free_chart_strings(struct netdev *d) { static void netdev_free(struct netdev *d) { netdev_charts_release(d); netdev_free_chart_strings(d); + free_label_list(d->chart_labels); freez((void *)d->name); freez((void *)d->filename_speed); + freez((void *)d->filename_duplex); + freez((void *)d->filename_operstate); freez((void *)d); netdev_added--; } - // ---------------------------------------------------------------------------- // netdev renames @@ -184,6 +214,8 @@ static struct netdev_rename { const char *container_device; const char *container_name; + struct label *chart_labels; + int processed; struct netdev_rename *next; @@ -203,7 +235,9 @@ static struct netdev_rename *netdev_rename_find(const char *host_device, uint32_ } // other threads can call this function to register a rename to a netdev -void netdev_rename_device_add(const char *host_device, const char *container_device, const char *container_name) { +void netdev_rename_device_add( + const char *host_device, const char *container_device, const char *container_name, struct label *labels) +{ netdata_mutex_lock(&netdev_rename_mutex); uint32_t hash = simple_hash(host_device); @@ -213,6 +247,7 @@ void netdev_rename_device_add(const char *host_device, const char *container_dev r->host_device = strdupz(host_device); r->container_device = strdupz(container_device); r->container_name = strdupz(container_name); + update_label_list(&r->chart_labels, labels); r->hash = hash; r->next = netdev_rename_root; r->processed = 0; @@ -227,6 +262,9 @@ void netdev_rename_device_add(const char *host_device, const char *container_dev r->container_device = strdupz(container_device); r->container_name = strdupz(container_name); + + update_label_list(&r->chart_labels, labels); + r->processed = 0; netdev_pending_renames++; info("CGROUP: altered network interface rename for '%s' as '%s' under '%s'", r->host_device, r->container_device, r->container_name); @@ -258,6 +296,7 @@ void netdev_rename_device_del(const char *host_device) { freez((void *) r->host_device); freez((void *) r->container_name); freez((void *) r->container_device); + free_label_list(r->chart_labels); freez((void *) r); break; } @@ -307,6 +346,8 @@ static inline void netdev_rename_cgroup(struct netdev *d, struct netdev_rename * snprintfz(buffer, RRD_ID_LENGTH_MAX, "net %s", r->container_device); d->chart_family = strdupz(buffer); + update_label_list(&d->chart_labels, r->chart_labels); + d->priority = NETDATA_CHART_PRIO_CGROUP_NET_IFACE; d->flipped = 1; } @@ -439,9 +480,15 @@ int do_proc_net_dev(int update_every, usec_t dt) { static SIMPLE_PATTERN *disabled_list = NULL; static procfile *ff = NULL; static int enable_new_interfaces = -1; - static int do_bandwidth = -1, do_packets = -1, do_errors = -1, do_drops = -1, do_fifo = -1, do_compressed = -1, do_events = -1; - static char *path_to_sys_devices_virtual_net = NULL, *path_to_sys_class_net_speed = NULL, *proc_net_dev_filename = NULL; + static int do_bandwidth = -1, do_packets = -1, do_errors = -1, do_drops = -1, do_fifo = -1, do_compressed = -1, + do_events = -1; + static char *path_to_sys_devices_virtual_net = NULL, *path_to_sys_class_net_speed = NULL, + *proc_net_dev_filename = NULL; + static char *path_to_sys_class_net_duplex = NULL; + static char *path_to_sys_class_net_operstate = NULL; static long long int dt_to_refresh_speed = 0; + static long long int dt_to_refresh_duplex = 0; + static long long int dt_to_refresh_operstate = 0; if(unlikely(enable_new_interfaces == -1)) { char filename[FILENAME_MAX + 1]; @@ -455,6 +502,13 @@ int do_proc_net_dev(int update_every, usec_t dt) { snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/class/net/%s/speed"); path_to_sys_class_net_speed = config_get(CONFIG_SECTION_PLUGIN_PROC_NETDEV, "path to get net device speed", filename); + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/class/net/%s/duplex"); + path_to_sys_class_net_duplex = config_get(CONFIG_SECTION_PLUGIN_PROC_NETDEV, "path to get net device duplex", filename); + + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/class/net/%s/operstate"); + path_to_sys_class_net_operstate = config_get(CONFIG_SECTION_PLUGIN_PROC_NETDEV, "path to get net device operstate", filename); + + enable_new_interfaces = config_get_boolean_ondemand(CONFIG_SECTION_PLUGIN_PROC_NETDEV, "enable new interfaces detected at runtime", CONFIG_BOOLEAN_AUTO); do_bandwidth = config_get_boolean_ondemand(CONFIG_SECTION_PLUGIN_PROC_NETDEV, "bandwidth for all interfaces", CONFIG_BOOLEAN_AUTO); @@ -468,7 +522,15 @@ int do_proc_net_dev(int update_every, usec_t dt) { disabled_list = simple_pattern_create(config_get(CONFIG_SECTION_PLUGIN_PROC_NETDEV, "disable by default interfaces matching", "lo fireqos* *-ifb"), NULL, SIMPLE_PATTERN_EXACT); dt_to_refresh_speed = config_get_number(CONFIG_SECTION_PLUGIN_PROC_NETDEV, "refresh interface speed every seconds", 10) * USEC_PER_SEC; - if(dt_to_refresh_speed < 0) dt_to_refresh_speed = 0; + dt_to_refresh_duplex = config_get_number(CONFIG_SECTION_PLUGIN_PROC_NETDEV, "refresh interface duplex every seconds", 10) * USEC_PER_SEC; + dt_to_refresh_operstate = config_get_number(CONFIG_SECTION_PLUGIN_PROC_NETDEV, "refresh interface operstate every seconds", 10) * USEC_PER_SEC; + + if (dt_to_refresh_operstate < 0) + dt_to_refresh_operstate = 0; + if (dt_to_refresh_duplex < 0) + dt_to_refresh_duplex = 0; + if (dt_to_refresh_speed < 0) + dt_to_refresh_speed = 0; } if(unlikely(!ff)) { @@ -525,6 +587,12 @@ int do_proc_net_dev(int update_every, usec_t dt) { // set the filename to get the interface speed snprintfz(buffer, FILENAME_MAX, path_to_sys_class_net_speed, d->name); d->filename_speed = strdupz(buffer); + + snprintfz(buffer, FILENAME_MAX, path_to_sys_class_net_duplex, d->name); + d->filename_duplex = strdupz(buffer); + + snprintfz(buffer, FILENAME_MAX, path_to_sys_class_net_operstate, d->name); + d->filename_operstate = strdupz(buffer); } snprintfz(buffer, FILENAME_MAX, "plugin:proc:/proc/net/dev:%s", d->name); @@ -623,6 +691,8 @@ int do_proc_net_dev(int update_every, usec_t dt) { , RRDSET_TYPE_AREA ); + rrdset_update_labels(d->st_bandwidth, d->chart_labels); + d->rd_rbytes = rrddim_add(d->st_bandwidth, "received", NULL, 8, BITS_IN_A_KILOBIT, RRD_ALGORITHM_INCREMENTAL); d->rd_tbytes = rrddim_add(d->st_bandwidth, "sent", NULL, -8, BITS_IN_A_KILOBIT, RRD_ALGORITHM_INCREMENTAL); @@ -668,6 +738,76 @@ int do_proc_net_dev(int update_every, usec_t dt) { } } } + + if (d->filename_duplex) { + d->duplex_last_collected_usec += dt; + + if (unlikely(d->duplex_last_collected_usec >= (usec_t)dt_to_refresh_duplex)) { + if (unlikely(!d->chart_var_duplex)) { + d->chart_var_duplex = rrdsetvar_custom_chart_variable_create(d->st_bandwidth, "duplex"); + if (!d->chart_var_duplex) { + error("Cannot create interface %s chart variable 'duplex'. Will not update the duplex status anymore.", d->name); + freez(d->filename_duplex); + d->filename_duplex = NULL; + } + } + + if (d->filename_duplex && d->chart_var_duplex) { + char buffer[32 + 1]; + + if (read_file(d->filename_duplex, buffer, 32)) { + error("Cannot refresh interface %s duplex state by reading '%s'. I will stop updating it.", d->name, d->filename_duplex); + freez(d->filename_duplex); + d->filename_duplex = NULL; + } else { + // values can be unknown, half or full -- just check the first letter for speed + if (buffer[0] == 'f') + d->duplex = 2; + else if (buffer[0] == 'h') + d->duplex = 1; + else + d->duplex = 0; + + rrdsetvar_custom_chart_variable_set(d->chart_var_duplex, (calculated_number)d->duplex); + d->duplex_last_collected_usec = 0; + } + } + } + } + + if (d->filename_operstate) { + d->operstate_last_collected_usec += dt; + + if (unlikely(d->operstate_last_collected_usec >= (usec_t)dt_to_refresh_operstate)) { + if (unlikely(!d->chart_var_operstate)) { + d->chart_var_operstate = rrdsetvar_custom_chart_variable_create(d->st_bandwidth, "operstate"); + if (!d->chart_var_operstate) { + error( + "Cannot create interface %s chart variable 'operstate'. I will stop updating it.", + d->name); + freez(d->filename_operstate); + d->filename_operstate = NULL; + } + } + + if (d->filename_operstate && d->chart_var_operstate) { + char buffer[32 + 1], *trimmed_buffer; + + if (read_file(d->filename_operstate, buffer, 32)) { + error( + "Cannot refresh %s operstate by reading '%s'. Will not update its status anymore.", + d->name, d->filename_operstate); + freez(d->filename_operstate); + d->filename_operstate = NULL; + } else { + trimmed_buffer = trim(buffer); + d->operstate = get_operstate(trimmed_buffer); + rrdsetvar_custom_chart_variable_set(d->chart_var_operstate, (calculated_number)d->operstate); + d->operstate_last_collected_usec = 0; + } + } + } + } } // -------------------------------------------------------------------- @@ -696,6 +836,8 @@ int do_proc_net_dev(int update_every, usec_t dt) { rrdset_flag_set(d->st_packets, RRDSET_FLAG_DETAIL); + rrdset_update_labels(d->st_packets, d->chart_labels); + d->rd_rpackets = rrddim_add(d->st_packets, "received", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); d->rd_tpackets = rrddim_add(d->st_packets, "sent", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); d->rd_rmulticast = rrddim_add(d->st_packets, "multicast", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); @@ -742,6 +884,8 @@ int do_proc_net_dev(int update_every, usec_t dt) { rrdset_flag_set(d->st_errors, RRDSET_FLAG_DETAIL); + rrdset_update_labels(d->st_errors, d->chart_labels); + d->rd_rerrors = rrddim_add(d->st_errors, "inbound", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); d->rd_terrors = rrddim_add(d->st_errors, "outbound", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); @@ -786,6 +930,8 @@ int do_proc_net_dev(int update_every, usec_t dt) { rrdset_flag_set(d->st_drops, RRDSET_FLAG_DETAIL); + rrdset_update_labels(d->st_drops, d->chart_labels); + d->rd_rdrops = rrddim_add(d->st_drops, "inbound", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); d->rd_tdrops = rrddim_add(d->st_drops, "outbound", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); @@ -830,6 +976,8 @@ int do_proc_net_dev(int update_every, usec_t dt) { rrdset_flag_set(d->st_fifo, RRDSET_FLAG_DETAIL); + rrdset_update_labels(d->st_fifo, d->chart_labels); + d->rd_rfifo = rrddim_add(d->st_fifo, "receive", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); d->rd_tfifo = rrddim_add(d->st_fifo, "transmit", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); @@ -874,6 +1022,8 @@ int do_proc_net_dev(int update_every, usec_t dt) { rrdset_flag_set(d->st_compressed, RRDSET_FLAG_DETAIL); + rrdset_update_labels(d->st_compressed, d->chart_labels); + d->rd_rcompressed = rrddim_add(d->st_compressed, "received", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); d->rd_tcompressed = rrddim_add(d->st_compressed, "sent", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); @@ -918,6 +1068,8 @@ int do_proc_net_dev(int update_every, usec_t dt) { rrdset_flag_set(d->st_events, RRDSET_FLAG_DETAIL); + rrdset_update_labels(d->st_events, d->chart_labels); + d->rd_rframe = rrddim_add(d->st_events, "frames", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); d->rd_tcollisions = rrddim_add(d->st_events, "collisions", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); d->rd_tcarrier = rrddim_add(d->st_events, "carrier", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); diff --git a/collectors/proc.plugin/proc_net_softnet_stat.c b/collectors/proc.plugin/proc_net_softnet_stat.c index 7ec783e7..a29ccccd 100644 --- a/collectors/proc.plugin/proc_net_softnet_stat.c +++ b/collectors/proc.plugin/proc_net_softnet_stat.c @@ -81,7 +81,7 @@ int do_proc_net_softnet_stat(int update_every, usec_t dt) { // -------------------------------------------------------------------- - st = rrdset_find_bytype_localhost("system", "softnet_stat"); + st = rrdset_find_active_bytype_localhost("system", "softnet_stat"); if(unlikely(!st)) { st = rrdset_create_localhost( "system" @@ -114,7 +114,7 @@ int do_proc_net_softnet_stat(int update_every, usec_t dt) { char id[50+1]; snprintfz(id, 50, "cpu%zu_softnet_stat", l); - st = rrdset_find_bytype_localhost("cpu", id); + st = rrdset_find_active_bytype_localhost("cpu", id); if(unlikely(!st)) { char title[100+1]; snprintfz(title, 100, "CPU%zu softnet_stat", l); diff --git a/collectors/proc.plugin/proc_net_wireless.c b/collectors/proc.plugin/proc_net_wireless.c new file mode 100644 index 00000000..32a53c68 --- /dev/null +++ b/collectors/proc.plugin/proc_net_wireless.c @@ -0,0 +1,453 @@ +#include <stdbool.h> +#include "plugin_proc.h" + +#define PLUGIN_PROC_MODULE_NETWIRELESS_NAME "/proc/net/wireless" + +#define CONFIG_SECTION_PLUGIN_PROC_NETWIRELESS "plugin:" PLUGIN_PROC_CONFIG_NAME ":" PLUGIN_PROC_MODULE_NETWIRELESS_NAME + + +static struct netwireless { + char *name; + uint32_t hash; + + //flags + bool configured; + struct timeval updated; + + int do_status; + int do_quality; + int do_discarded_packets; + int do_missed_beacon; + + // Data collected + // status + kernel_uint_t status; + + // Quality + calculated_number link; + calculated_number level; + calculated_number noise; + + // Discarded packets + kernel_uint_t nwid; + kernel_uint_t crypt; + kernel_uint_t frag; + kernel_uint_t retry; + kernel_uint_t misc; + + // missed beacon + kernel_uint_t missed_beacon; + + const char *chart_id_net_status; + const char *chart_id_net_link; + const char *chart_id_net_level; + const char *chart_id_net_noise; + const char *chart_id_net_discarded_packets; + const char *chart_id_net_missed_beacon; + + const char *chart_family; + + // charts + // satus + RRDSET *st_status; + + // Quality + RRDSET *st_link; + RRDSET *st_level; + RRDSET *st_noise; + + // Discarded Packets + RRDSET *st_discarded_packets; + // Missed beacon + RRDSET *st_missed_beacon; + + // Dimensions + // status + RRDDIM *rd_status; + + // Quality + RRDDIM *rd_link; + RRDDIM *rd_level; + RRDDIM *rd_noise; + + // Discarded packets + RRDDIM *rd_nwid; + RRDDIM *rd_crypt; + RRDDIM *rd_frag; + RRDDIM *rd_retry; + RRDDIM *rd_misc; + + // missed beacon + RRDDIM *rd_missed_beacon; + + struct netwireless *next; +} *netwireless_root = NULL; + +static void netwireless_free_st(struct netwireless *wireless_dev) +{ + if (wireless_dev->st_status) rrdset_is_obsolete(wireless_dev->st_status); + if (wireless_dev->st_link) rrdset_is_obsolete(wireless_dev->st_link); + if (wireless_dev->st_level) rrdset_is_obsolete(wireless_dev->st_level); + if (wireless_dev->st_noise) rrdset_is_obsolete(wireless_dev->st_noise); + if (wireless_dev->st_discarded_packets) rrdset_is_obsolete(wireless_dev->st_discarded_packets); + if (wireless_dev->st_missed_beacon) rrdset_is_obsolete(wireless_dev->st_missed_beacon); + + wireless_dev->st_status = NULL; + wireless_dev->st_link = NULL; + wireless_dev->st_level = NULL; + wireless_dev->st_noise = NULL; + wireless_dev->st_discarded_packets = NULL; + wireless_dev->st_missed_beacon = NULL; +} + +static void netwireless_free(struct netwireless *wireless_dev) +{ + wireless_dev->next = NULL; + freez((void *)wireless_dev->name); + netwireless_free_st(wireless_dev); + freez((void *)wireless_dev->chart_id_net_status); + freez((void *)wireless_dev->chart_id_net_link); + freez((void *)wireless_dev->chart_id_net_level); + freez((void *)wireless_dev->chart_id_net_noise); + freez((void *)wireless_dev->chart_id_net_discarded_packets); + freez((void *)wireless_dev->chart_id_net_missed_beacon); + + freez((void *)wireless_dev); +} + +static void netwireless_cleanup(struct timeval *timestamp) +{ + struct netwireless *previous = NULL; + struct netwireless *current; + // search it, from begining to the end + for (current = netwireless_root; current;) { + + if (timercmp(¤t->updated, timestamp, <)) { + struct netwireless *to_free = current; + current = current->next; + netwireless_free(to_free); + + if (previous) { + previous->next = current; + } else { + netwireless_root = current; + } + } else { + previous = current; + current = current->next; + } + } +} + +// finds an existing interface or creates a new entry +static struct netwireless *find_or_create_wireless(const char *name) +{ + struct netwireless *wireless; + uint32_t hash = simple_hash(name); + + // search it, from begining to the end + for (wireless = netwireless_root ; wireless ; wireless = wireless->next) { + if (unlikely(hash == wireless->hash && !strcmp(name, wireless->name))) { + return wireless; + } + } + + // create a new one + wireless = callocz(1, sizeof(struct netwireless)); + wireless->name = strdupz(name); + wireless->hash = hash; + + // link it to the end + if (netwireless_root) { + struct netwireless *last_node; + for (last_node = netwireless_root; last_node->next ; last_node = last_node->next); + + last_node->next = wireless; + } else + netwireless_root = wireless; + + return wireless; +} + +static void configure_device(int do_status, int do_quality, int do_discarded_packets, int do_missed, + struct netwireless *wireless_dev) { + wireless_dev->do_status = do_status; + wireless_dev->do_quality = do_quality; + wireless_dev->do_discarded_packets = do_discarded_packets; + wireless_dev->do_missed_beacon = do_missed; + wireless_dev->configured = true; + + char buffer[RRD_ID_LENGTH_MAX + 1]; + + snprintfz(buffer, RRD_ID_LENGTH_MAX, "%s_status", wireless_dev->name); + wireless_dev->chart_id_net_status = strdupz(buffer); + + snprintfz(buffer, RRD_ID_LENGTH_MAX, "%s_link_quality", wireless_dev->name); + wireless_dev->chart_id_net_link = strdupz(buffer); + + snprintfz(buffer, RRD_ID_LENGTH_MAX, "%s_signal_level", wireless_dev->name); + wireless_dev->chart_id_net_level = strdupz(buffer); + + snprintfz(buffer, RRD_ID_LENGTH_MAX, "%s_noise_level", wireless_dev->name); + wireless_dev->chart_id_net_noise = strdupz(buffer); + + snprintfz(buffer, RRD_ID_LENGTH_MAX, "%s_discarded_packets", wireless_dev->name); + wireless_dev->chart_id_net_discarded_packets = strdupz(buffer); + + snprintfz(buffer, RRD_ID_LENGTH_MAX, "%s_missed_beacon", wireless_dev->name); + wireless_dev->chart_id_net_missed_beacon = strdupz(buffer); +} + +int do_proc_net_wireless(int update_every, usec_t dt) +{ + UNUSED(dt); + static procfile *ff = NULL; + static int do_status, do_quality = -1, do_discarded_packets, do_beacon; + static char *proc_net_wireless_filename = NULL; + + if (unlikely(do_quality == -1)) { + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/proc/net/wireless"); + + proc_net_wireless_filename = config_get(CONFIG_SECTION_PLUGIN_PROC_NETWIRELESS,"filename to monitor", + filename); + + do_status = config_get_boolean_ondemand(CONFIG_SECTION_PLUGIN_PROC_NETWIRELESS, + "status for all interfaces", CONFIG_BOOLEAN_AUTO); + + do_quality = config_get_boolean_ondemand(CONFIG_SECTION_PLUGIN_PROC_NETWIRELESS, + "quality for all interfaces", CONFIG_BOOLEAN_AUTO); + + do_discarded_packets = config_get_boolean_ondemand(CONFIG_SECTION_PLUGIN_PROC_NETWIRELESS, + "discarded packets for all interfaces", + CONFIG_BOOLEAN_AUTO); + + do_beacon = config_get_boolean_ondemand(CONFIG_SECTION_PLUGIN_PROC_NETWIRELESS, + "missed beacon for all interface", CONFIG_BOOLEAN_AUTO); + } + + if (unlikely(!ff)) { + ff = procfile_open(proc_net_wireless_filename, " \t,|", PROCFILE_FLAG_DEFAULT); + if (unlikely(!ff)) return 1; + } + + ff = procfile_readall(ff); + if (unlikely(!ff)) return 1; + + size_t lines = procfile_lines(ff); + struct timeval timestamp; + size_t l; + gettimeofday(×tamp, NULL); + for (l = 2; l < lines; l++) { + if (unlikely(procfile_linewords(ff, l) < 11)) continue; + + char *name = procfile_lineword(ff, l, 0); + size_t len = strlen(name); + if (name[len - 1] == ':') name[len - 1] = '\0'; + + struct netwireless *wireless_dev = find_or_create_wireless(name); + + if (unlikely(!wireless_dev->configured)) { + configure_device(do_status, do_quality, do_discarded_packets, do_beacon, wireless_dev); + } + + if (likely(do_status != CONFIG_BOOLEAN_NO)) { + wireless_dev->status = str2kernel_uint_t(procfile_lineword(ff, l, 1)); + + if (unlikely(!wireless_dev->st_status)) { + wireless_dev->st_status = rrdset_create_localhost("wireless", + wireless_dev->chart_id_net_status, + NULL, + wireless_dev->name, + "wireless.status", + "Internal status reported by interface.", + "status", + PLUGIN_PROC_NAME, + PLUGIN_PROC_MODULE_NETWIRELESS_NAME, + NETDATA_CHART_PRIO_WIRELESS_IFACE, + update_every, + RRDSET_TYPE_LINE); + rrdset_flag_set(wireless_dev->st_status, RRDSET_FLAG_DETAIL); + + wireless_dev->rd_status = rrddim_add(wireless_dev->st_status, "status", NULL, 1, + 1, RRD_ALGORITHM_ABSOLUTE); + } else { + rrdset_next(wireless_dev->st_status); + } + + rrddim_set_by_pointer(wireless_dev->st_status, wireless_dev->rd_status, + (collected_number)wireless_dev->status); + rrdset_done(wireless_dev->st_status); + } + + if (likely(do_quality != CONFIG_BOOLEAN_NO)) { + wireless_dev->link = str2ld(procfile_lineword(ff, l, 2), NULL); + wireless_dev->level = str2ld(procfile_lineword(ff, l, 3), NULL); + wireless_dev->noise = str2ld(procfile_lineword(ff, l, 4), NULL); + + if (unlikely(!wireless_dev->st_link)) { + wireless_dev->st_link = rrdset_create_localhost("wireless", + wireless_dev->chart_id_net_link, + NULL, + wireless_dev->name, + "wireless.link_quality", + "Overall quality of the link. This is an aggregate value, and depends on the driver and hardware.", + "value", + PLUGIN_PROC_NAME, + PLUGIN_PROC_MODULE_NETWIRELESS_NAME, + NETDATA_CHART_PRIO_WIRELESS_IFACE + 1, + update_every, + RRDSET_TYPE_LINE); + rrdset_flag_set(wireless_dev->st_link, RRDSET_FLAG_DETAIL); + + wireless_dev->rd_link = rrddim_add(wireless_dev->st_link, "link_quality", NULL, 1, 1, + RRD_ALGORITHM_ABSOLUTE); + } else { + rrdset_next(wireless_dev->st_link); + } + + if (unlikely(!wireless_dev->st_level)) { + wireless_dev->st_level = rrdset_create_localhost("wireless", + wireless_dev->chart_id_net_level, + NULL, + wireless_dev->name, + "wireless.signal_level", + "The signal level is the wireless signal power level received by the wireless client. The closer the value is to 0, the stronger the signal.", + "dBm", + PLUGIN_PROC_NAME, + PLUGIN_PROC_MODULE_NETWIRELESS_NAME, + NETDATA_CHART_PRIO_WIRELESS_IFACE + 2, + update_every, + RRDSET_TYPE_LINE); + rrdset_flag_set(wireless_dev->st_level, RRDSET_FLAG_DETAIL); + + wireless_dev->rd_level = rrddim_add(wireless_dev->st_level, "signal_level", NULL, 1, 1, + RRD_ALGORITHM_ABSOLUTE); + } else { + rrdset_next(wireless_dev->st_level); + } + + if (unlikely(!wireless_dev->st_noise)) { + wireless_dev->st_noise = rrdset_create_localhost("wireless", + wireless_dev->chart_id_net_noise, + NULL, + wireless_dev->name, + "wireless.noise_level", + "The noise level indicates the amount of background noise in your environment. The closer the value to 0, the greater the noise level.", + "dBm", + PLUGIN_PROC_NAME, + PLUGIN_PROC_MODULE_NETWIRELESS_NAME, + NETDATA_CHART_PRIO_WIRELESS_IFACE + 3, + update_every, + RRDSET_TYPE_LINE); + rrdset_flag_set(wireless_dev->st_noise, RRDSET_FLAG_DETAIL); + + wireless_dev->rd_noise = rrddim_add(wireless_dev->st_noise, "noise_level", NULL, 1, 1, + RRD_ALGORITHM_ABSOLUTE); + } else { + rrdset_next(wireless_dev->st_noise); + } + + rrddim_set_by_pointer(wireless_dev->st_link, wireless_dev->rd_link, + (collected_number)wireless_dev->link); + rrdset_done(wireless_dev->st_link); + + rrddim_set_by_pointer(wireless_dev->st_level, wireless_dev->rd_level, + (collected_number)wireless_dev->level); + rrdset_done(wireless_dev->st_level); + + rrddim_set_by_pointer(wireless_dev->st_noise, wireless_dev->rd_noise, + (collected_number)wireless_dev->noise); + rrdset_done(wireless_dev->st_noise); + } + + if (likely(do_discarded_packets)) { + wireless_dev->nwid = str2kernel_uint_t(procfile_lineword(ff, l, 5)); + wireless_dev->crypt = str2kernel_uint_t(procfile_lineword(ff, l, 6)); + wireless_dev->frag = str2kernel_uint_t(procfile_lineword(ff, l, 7)); + wireless_dev->retry = str2kernel_uint_t(procfile_lineword(ff, l, 8)); + wireless_dev->misc = str2kernel_uint_t(procfile_lineword(ff, l, 9)); + + if (unlikely(!wireless_dev->st_discarded_packets)) { + wireless_dev->st_discarded_packets = rrdset_create_localhost("wireless", + wireless_dev->chart_id_net_discarded_packets, + NULL, + wireless_dev->name, + "wireless.discarded_packets", + "Packet discarded in the wireless adapter due to \"wireless\" specific problems.", + "packets/s", + PLUGIN_PROC_NAME, + PLUGIN_PROC_MODULE_NETWIRELESS_NAME, + NETDATA_CHART_PRIO_WIRELESS_IFACE + 4, + update_every, + RRDSET_TYPE_LINE); + + rrdset_flag_set(wireless_dev->st_discarded_packets, RRDSET_FLAG_DETAIL); + + wireless_dev->rd_nwid = rrddim_add(wireless_dev->st_discarded_packets, "nwid", NULL, 1, + 1, RRD_ALGORITHM_INCREMENTAL); + wireless_dev->rd_crypt = rrddim_add(wireless_dev->st_discarded_packets, "crypt", NULL, 1, + 1, RRD_ALGORITHM_INCREMENTAL); + wireless_dev->rd_frag = rrddim_add(wireless_dev->st_discarded_packets, "frag", NULL, 1, + 1, RRD_ALGORITHM_INCREMENTAL); + wireless_dev->rd_retry = rrddim_add(wireless_dev->st_discarded_packets, "retry", NULL, 1, + 1, RRD_ALGORITHM_INCREMENTAL); + wireless_dev->rd_misc = rrddim_add(wireless_dev->st_discarded_packets, "misc", NULL, 1, + 1, RRD_ALGORITHM_INCREMENTAL); + } else { + rrdset_next(wireless_dev->st_discarded_packets); + } + + rrddim_set_by_pointer(wireless_dev->st_discarded_packets, wireless_dev->rd_nwid, + (collected_number)wireless_dev->nwid); + + rrddim_set_by_pointer(wireless_dev->st_discarded_packets, wireless_dev->rd_crypt, + (collected_number)wireless_dev->crypt); + + rrddim_set_by_pointer(wireless_dev->st_discarded_packets, wireless_dev->rd_frag, + (collected_number)wireless_dev->frag); + + rrddim_set_by_pointer(wireless_dev->st_discarded_packets, wireless_dev->rd_retry, + (collected_number)wireless_dev->retry); + + rrddim_set_by_pointer(wireless_dev->st_discarded_packets, wireless_dev->rd_misc, + (collected_number)wireless_dev->misc); + + rrdset_done(wireless_dev->st_discarded_packets); + } + + if (likely(do_beacon)) { + wireless_dev->missed_beacon = str2kernel_uint_t(procfile_lineword(ff, l, 10)); + + if (unlikely(!wireless_dev->st_missed_beacon)) { + wireless_dev->st_missed_beacon = rrdset_create_localhost("wireless", + wireless_dev->chart_id_net_missed_beacon, + NULL, + wireless_dev->name, + "wireless.missed_beacons", + "Number of missed beacons.", + "frames/s", + PLUGIN_PROC_NAME, + PLUGIN_PROC_MODULE_NETWIRELESS_NAME, + NETDATA_CHART_PRIO_WIRELESS_IFACE + 5, + update_every, + RRDSET_TYPE_LINE); + rrdset_flag_set(wireless_dev->st_missed_beacon, RRDSET_FLAG_DETAIL); + + wireless_dev->rd_missed_beacon = rrddim_add(wireless_dev->st_missed_beacon, "missed_beacons", + NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } else { + rrdset_next(wireless_dev->st_missed_beacon); + } + + rrddim_set_by_pointer(wireless_dev->st_missed_beacon, wireless_dev->rd_missed_beacon, + (collected_number)wireless_dev->missed_beacon); + rrdset_done(wireless_dev->st_missed_beacon); + } + + wireless_dev->updated = timestamp; + } + + netwireless_cleanup(×tamp); + return 0; +} diff --git a/collectors/proc.plugin/proc_pressure.c b/collectors/proc.plugin/proc_pressure.c new file mode 100644 index 00000000..4a40b4aa --- /dev/null +++ b/collectors/proc.plugin/proc_pressure.c @@ -0,0 +1,178 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "plugin_proc.h" + +#define PLUGIN_PROC_MODULE_PRESSURE_NAME "/proc/pressure" +#define CONFIG_SECTION_PLUGIN_PROC_PRESSURE "plugin:" PLUGIN_PROC_CONFIG_NAME ":" PLUGIN_PROC_MODULE_PRESSURE_NAME + +// linux calculates this every 2 seconds, see kernel/sched/psi.c PSI_FREQ +#define MIN_PRESSURE_UPDATE_EVERY 2 + + +static struct pressure resources[PRESSURE_NUM_RESOURCES] = { + { + .some = { .id = "cpu_pressure", .title = "CPU Pressure" }, + }, + { + .some = { .id = "memory_some_pressure", .title = "Memory Pressure" }, + .full = { .id = "memory_full_pressure", .title = "Memory Full Pressure" }, + }, + { + .some = { .id = "io_some_pressure", .title = "I/O Pressure" }, + .full = { .id = "io_full_pressure", .title = "I/O Full Pressure" }, + }, +}; + +static struct { + procfile *pf; + const char *name; // metric file name + const char *family; // webui section name + int section_priority; +} resource_info[PRESSURE_NUM_RESOURCES] = { + { .name = "cpu", .family = "cpu", .section_priority = NETDATA_CHART_PRIO_SYSTEM_CPU }, + { .name = "memory", .family = "ram", .section_priority = NETDATA_CHART_PRIO_SYSTEM_RAM }, + { .name = "io", .family = "disk", .section_priority = NETDATA_CHART_PRIO_SYSTEM_IO }, +}; + +void update_pressure_chart(struct pressure_chart *chart) { + rrddim_set_by_pointer(chart->st, chart->rd10, (collected_number)(chart->value10 * 100)); + rrddim_set_by_pointer(chart->st, chart->rd60, (collected_number) (chart->value60 * 100)); + rrddim_set_by_pointer(chart->st, chart->rd300, (collected_number) (chart->value300 * 100)); + + rrdset_done(chart->st); +} + +int do_proc_pressure(int update_every, usec_t dt) { + int fail_count = 0; + int i; + + static usec_t next_pressure_dt = 0; + static char *base_path = NULL; + + update_every = (update_every < MIN_PRESSURE_UPDATE_EVERY) ? MIN_PRESSURE_UPDATE_EVERY : update_every; + + if (next_pressure_dt <= dt) { + next_pressure_dt = update_every * USEC_PER_SEC; + } else { + next_pressure_dt -= dt; + return 0; + } + + if (unlikely(!base_path)) { + base_path = config_get(CONFIG_SECTION_PLUGIN_PROC_PRESSURE, "base path of pressure metrics", "/proc/pressure"); + } + + for (i = 0; i < PRESSURE_NUM_RESOURCES; i++) { + procfile *ff = resource_info[i].pf; + int do_some = resources[i].some.enabled, do_full = resources[i].full.enabled; + + if (unlikely(!ff)) { + char filename[FILENAME_MAX + 1]; + char config_key[CONFIG_MAX_NAME + 1]; + + snprintfz(filename + , FILENAME_MAX + , "%s%s/%s" + , netdata_configured_host_prefix + , base_path + , resource_info[i].name); + + snprintfz(config_key, CONFIG_MAX_NAME, "enable %s some pressure", resource_info[i].name); + do_some = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_PRESSURE, config_key, CONFIG_BOOLEAN_YES); + resources[i].some.enabled = do_some; + if (resources[i].full.id) { + snprintfz(config_key, CONFIG_MAX_NAME, "enable %s full pressure", resource_info[i].name); + do_full = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_PRESSURE, config_key, CONFIG_BOOLEAN_YES); + resources[i].full.enabled = do_full; + } + + ff = procfile_open(filename, " =", PROCFILE_FLAG_DEFAULT); + if (unlikely(!ff)) { + error("Cannot read pressure information from %s.", filename); + fail_count++; + continue; + } + } + + ff = procfile_readall(ff); + resource_info[i].pf = ff; + if (unlikely(!ff)) { + fail_count++; + continue; + } + + size_t lines = procfile_lines(ff); + if (unlikely(lines < 1)) { + error("%s has no lines.", procfile_filename(ff)); + fail_count++; + continue; + } + + struct pressure_chart *chart; + if (do_some) { + chart = &resources[i].some; + if (unlikely(!chart->st)) { + chart->st = rrdset_create_localhost( + "system" + , chart->id + , NULL + , resource_info[i].family + , NULL + , chart->title + , "percentage" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_PRESSURE_NAME + , resource_info[i].section_priority + 40 + , update_every + , RRDSET_TYPE_LINE + ); + chart->rd10 = rrddim_add(chart->st, "some 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + chart->rd60 = rrddim_add(chart->st, "some 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + chart->rd300 = rrddim_add(chart->st, "some 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + } else { + rrdset_next(chart->st); + } + + chart->value10 = strtod(procfile_lineword(ff, 0, 2), NULL); + chart->value60 = strtod(procfile_lineword(ff, 0, 4), NULL); + chart->value300 = strtod(procfile_lineword(ff, 0, 6), NULL); + update_pressure_chart(chart); + } + + if (do_full && lines > 2) { + chart = &resources[i].full; + if (unlikely(!chart->st)) { + chart->st = rrdset_create_localhost( + "system" + , chart->id + , NULL + , resource_info[i].family + , NULL + , chart->title + , "percentage" + , PLUGIN_PROC_NAME + , PLUGIN_PROC_MODULE_PRESSURE_NAME + , resource_info[i].section_priority + 45 + , update_every + , RRDSET_TYPE_LINE + ); + chart->rd10 = rrddim_add(chart->st, "full 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + chart->rd60 = rrddim_add(chart->st, "full 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + chart->rd300 = rrddim_add(chart->st, "full 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + } else { + rrdset_next(chart->st); + } + + chart->value10 = strtod(procfile_lineword(ff, 1, 2), NULL); + chart->value60 = strtod(procfile_lineword(ff, 1, 4), NULL); + chart->value300 = strtod(procfile_lineword(ff, 1, 6), NULL); + update_pressure_chart(chart); + } + } + + if (PRESSURE_NUM_RESOURCES == fail_count) { + return 1; + } + + return 0; +} diff --git a/collectors/proc.plugin/proc_pressure.h b/collectors/proc.plugin/proc_pressure.h new file mode 100644 index 00000000..33302186 --- /dev/null +++ b/collectors/proc.plugin/proc_pressure.h @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_PROC_PRESSURE_H +#define NETDATA_PROC_PRESSURE_H + +#define PRESSURE_NUM_RESOURCES 3 + +struct pressure { + int updated; + char *filename; + + struct pressure_chart { + int enabled; + + const char *id; + const char *title; + + double value10; + double value60; + double value300; + + RRDSET *st; + RRDDIM *rd10; + RRDDIM *rd60; + RRDDIM *rd300; + } some, full; +}; + +extern void update_pressure_chart(struct pressure_chart *chart); + +#endif //NETDATA_PROC_PRESSURE_H diff --git a/collectors/proc.plugin/proc_stat.c b/collectors/proc.plugin/proc_stat.c index 5e6b79fc..373a0677 100644 --- a/collectors/proc.plugin/proc_stat.c +++ b/collectors/proc.plugin/proc_stat.c @@ -1003,6 +1003,7 @@ int do_proc_stat(int update_every, usec_t dt) { else error("Cannot read current process affinity"); + // These threads are very ephemeral and don't need to have a specific name if(unlikely(pthread_create(&thread, NULL, wake_cpu_thread, (void *)&core))) error("Cannot create wake_cpu_thread"); else if(unlikely(pthread_join(thread, NULL))) diff --git a/collectors/proc.plugin/sys_class_infiniband.c b/collectors/proc.plugin/sys_class_infiniband.c new file mode 100644 index 00000000..46f40f2c --- /dev/null +++ b/collectors/proc.plugin/sys_class_infiniband.c @@ -0,0 +1,704 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +// Heavily inspired from proc_net_dev.c +#include "plugin_proc.h" + +#define PLUGIN_PROC_MODULE_INFINIBAND_NAME "/sys/class/infiniband" +#define CONFIG_SECTION_PLUGIN_SYS_CLASS_INFINIBAND \ + "plugin:" PLUGIN_PROC_CONFIG_NAME ":" PLUGIN_PROC_MODULE_INFINIBAND_NAME + +// ib_device::name[IB_DEVICE_NAME_MAX(64)] + "-" + ib_device::phys_port_cnt[u8 = 3 chars] +#define IBNAME_MAX 68 + +// ---------------------------------------------------------------------------- +// infiniband & omnipath standard counters + +// I use macro as there's no single file acting as summary, but a lot of different files, so can't use helpers like +// procfile(). Also, omnipath generates other counters, that are not provided by infiniband +#define FOREACH_COUNTER(GEN, ...) \ + FOREACH_COUNTER_BYTES(GEN, __VA_ARGS__) \ + FOREACH_COUNTER_PACKETS(GEN, __VA_ARGS__) \ + FOREACH_COUNTER_ERRORS(GEN, __VA_ARGS__) + +#define FOREACH_COUNTER_BYTES(GEN, ...) \ + GEN(port_rcv_data, bytes, "Received", 1, __VA_ARGS__) \ + GEN(port_xmit_data, bytes, "Sent", -1, __VA_ARGS__) + +#define FOREACH_COUNTER_PACKETS(GEN, ...) \ + GEN(port_rcv_packets, packets, "Received", 1, __VA_ARGS__) \ + GEN(port_xmit_packets, packets, "Sent", -1, __VA_ARGS__) \ + GEN(multicast_rcv_packets, packets, "Mcast rcvd", 1, __VA_ARGS__) \ + GEN(multicast_xmit_packets, packets, "Mcast sent", -1, __VA_ARGS__) \ + GEN(unicast_rcv_packets, packets, "Ucast rcvd", 1, __VA_ARGS__) \ + GEN(unicast_xmit_packets, packets, "Ucast sent", -1, __VA_ARGS__) + +#define FOREACH_COUNTER_ERRORS(GEN, ...) \ + GEN(port_rcv_errors, errors, "Pkts malformated", 1, __VA_ARGS__) \ + GEN(port_rcv_constraint_errors, errors, "Pkts rcvd discarded ", 1, __VA_ARGS__) \ + GEN(port_xmit_discards, errors, "Pkts sent discarded", 1, __VA_ARGS__) \ + GEN(port_xmit_wait, errors, "Tick Wait to send", 1, __VA_ARGS__) \ + GEN(VL15_dropped, errors, "Pkts missed ressource", 1, __VA_ARGS__) \ + GEN(excessive_buffer_overrun_errors, errors, "Buffer overrun", 1, __VA_ARGS__) \ + GEN(link_downed, errors, "Link Downed", 1, __VA_ARGS__) \ + GEN(link_error_recovery, errors, "Link recovered", 1, __VA_ARGS__) \ + GEN(local_link_integrity_errors, errors, "Link integrity err", 1, __VA_ARGS__) \ + GEN(symbol_error, errors, "Link minor errors", 1, __VA_ARGS__) \ + GEN(port_rcv_remote_physical_errors, errors, "Pkts rcvd with EBP", 1, __VA_ARGS__) \ + GEN(port_rcv_switch_relay_errors, errors, "Pkts rcvd discarded by switch", 1, __VA_ARGS__) \ + GEN(port_xmit_constraint_errors, errors, "Pkts sent discarded by switch", 1, __VA_ARGS__) + +// +// Hardware Counters +// + +// IMPORTANT: These are vendor-specific fields. +// If you want to add a new vendor, search this for for 'VENDORS:' keyword and +// add your definition as 'VENDOR-<key>:' where <key> if the string part that +// is shown in /sys/class/infiniband/<key>X_Y +// EG: for Mellanox, shown as mlx0_1, it's 'mlx' +// for Intel, shown as hfi1_1, it's 'hfi' + +// VENDORS: List of implemented hardware vendors +#define FOREACH_HWCOUNTER_NAME(GEN, ...) GEN(mlx, __VA_ARGS__) + +// VENDOR-MLX: HW Counters for Mellanox ConnectX Devices +#define FOREACH_HWCOUNTER_MLX(GEN, ...) \ + FOREACH_HWCOUNTER_MLX_PACKETS(GEN, __VA_ARGS__) \ + FOREACH_HWCOUNTER_MLX_ERRORS(GEN, __VA_ARGS__) + +#define FOREACH_HWCOUNTER_MLX_PACKETS(GEN, ...) \ + GEN(np_cnp_sent, hwpackets, "RoCEv2 Congestion sent", 1, __VA_ARGS__) \ + GEN(np_ecn_marked_roce_packets, hwpackets, "RoCEv2 Congestion rcvd", -1, __VA_ARGS__) \ + GEN(rp_cnp_handled, hwpackets, "IB Congestion handled", 1, __VA_ARGS__) \ + GEN(rx_atomic_requests, hwpackets, "ATOMIC req. rcvd", 1, __VA_ARGS__) \ + GEN(rx_dct_connect, hwpackets, "Connection req. rcvd", 1, __VA_ARGS__) \ + GEN(rx_read_requests, hwpackets, "Read req. rcvd", 1, __VA_ARGS__) \ + GEN(rx_write_requests, hwpackets, "Write req. rcvd", 1, __VA_ARGS__) \ + GEN(roce_adp_retrans, hwpackets, "RoCE retrans adaptive", 1, __VA_ARGS__) \ + GEN(roce_adp_retrans_to, hwpackets, "RoCE retrans timeout", 1, __VA_ARGS__) \ + GEN(roce_slow_restart, hwpackets, "RoCE slow restart", 1, __VA_ARGS__) \ + GEN(roce_slow_restart_cnps, hwpackets, "RoCE slow restart congestion", 1, __VA_ARGS__) \ + GEN(roce_slow_restart_trans, hwpackets, "RoCE slow restart count", 1, __VA_ARGS__) + +#define FOREACH_HWCOUNTER_MLX_ERRORS(GEN, ...) \ + GEN(duplicate_request, hwerrors, "Duplicated packets", -1, __VA_ARGS__) \ + GEN(implied_nak_seq_err, hwerrors, "Pkt Seq Num gap", 1, __VA_ARGS__) \ + GEN(local_ack_timeout_err, hwerrors, "Ack timer expired", 1, __VA_ARGS__) \ + GEN(out_of_buffer, hwerrors, "Drop missing buffer", 1, __VA_ARGS__) \ + GEN(out_of_sequence, hwerrors, "Drop out of sequence", 1, __VA_ARGS__) \ + GEN(packet_seq_err, hwerrors, "NAK sequence rcvd", 1, __VA_ARGS__) \ + GEN(req_cqe_error, hwerrors, "CQE err Req", 1, __VA_ARGS__) \ + GEN(resp_cqe_error, hwerrors, "CQE err Resp", 1, __VA_ARGS__) \ + GEN(req_cqe_flush_error, hwerrors, "CQE Flushed err Req", 1, __VA_ARGS__) \ + GEN(resp_cqe_flush_error, hwerrors, "CQE Flushed err Resp", 1, __VA_ARGS__) \ + GEN(req_remote_access_errors, hwerrors, "Remote access err Req", 1, __VA_ARGS__) \ + GEN(resp_remote_access_errors, hwerrors, "Remote access err Resp", 1, __VA_ARGS__) \ + GEN(req_remote_invalid_request, hwerrors, "Remote invalid req", 1, __VA_ARGS__) \ + GEN(resp_local_length_error, hwerrors, "Local length err Resp", 1, __VA_ARGS__) \ + GEN(rnr_nak_retry_err, hwerrors, "RNR NAK Packets", 1, __VA_ARGS__) \ + GEN(rp_cnp_ignored, hwerrors, "CNP Pkts ignored", 1, __VA_ARGS__) \ + GEN(rx_icrc_encapsulated, hwerrors, "RoCE ICRC Errors", 1, __VA_ARGS__) + +// Common definitions used more than once +#define GEN_RRD_DIM_ADD(NAME, GRP, DESC, DIR, PORT) \ + GEN_RRD_DIM_ADD_CUSTOM(NAME, GRP, DESC, DIR, PORT, 1, 1, RRD_ALGORITHM_INCREMENTAL) + +#define GEN_RRD_DIM_ADD_CUSTOM(NAME, GRP, DESC, DIR, PORT, MULT, DIV, ALGO) \ + PORT->rd_##NAME = rrddim_add(PORT->st_##GRP, DESC, NULL, DIR * MULT, DIV, ALGO); + +#define GEN_RRD_DIM_ADD_HW(NAME, GRP, DESC, DIR, PORT, HW) \ + HW->rd_##NAME = rrddim_add(PORT->st_##GRP, DESC, NULL, DIR, 1, RRD_ALGORITHM_INCREMENTAL); + +#define GEN_RRD_DIM_SETP(NAME, GRP, DESC, DIR, PORT) \ + rrddim_set_by_pointer(PORT->st_##GRP, PORT->rd_##NAME, (collected_number)PORT->NAME); + +#define GEN_RRD_DIM_SETP_HW(NAME, GRP, DESC, DIR, PORT, HW) \ + rrddim_set_by_pointer(PORT->st_##GRP, HW->rd_##NAME, (collected_number)HW->NAME); + +// https://community.mellanox.com/s/article/understanding-mlx5-linux-counters-and-status-parameters +// https://community.mellanox.com/s/article/infiniband-port-counters +static struct ibport { + char *name; + char *counters_path; + char *hwcounters_path; + int len; + + // flags + int configured; + int enabled; + int discovered; + + int do_bytes; + int do_packets; + int do_errors; + int do_hwpackets; + int do_hwerrors; + + const char *chart_type_bytes; + const char *chart_type_packets; + const char *chart_type_errors; + const char *chart_type_hwpackets; + const char *chart_type_hwerrors; + + const char *chart_id_bytes; + const char *chart_id_packets; + const char *chart_id_errors; + const char *chart_id_hwpackets; + const char *chart_id_hwerrors; + + const char *chart_family; + + unsigned long priority; + + // Port details using drivers/infiniband/core/sysfs.c :: rate_show() + RRDDIM *rd_speed; + uint64_t speed; + uint64_t width; + +// Stats from /$device/ports/$portid/counters +// as drivers/infiniband/hw/qib/qib_verbs.h +// All uint64 except vl15_dropped, local_link_integrity_errors, excessive_buffer_overrun_errors uint32 +// Will generate 2 elements for each counter: +// - uint64_t to store the value +// - char* to store the filename path +// - RRDDIM* to store the RRD Dimension +#define GEN_DEF_COUNTER(NAME, ...) \ + uint64_t NAME; \ + char *file_##NAME; \ + RRDDIM *rd_##NAME; + FOREACH_COUNTER(GEN_DEF_COUNTER) + +// Vendor specific hwcounters from /$device/ports/$portid/hw_counters +// We will generate one struct pointer per vendor to avoid future casting +#define GEN_DEF_HWCOUNTER_PTR(VENDOR, ...) struct ibporthw_##VENDOR *hwcounters_##VENDOR; + FOREACH_HWCOUNTER_NAME(GEN_DEF_HWCOUNTER_PTR) + + // Function pointer to the "infiniband_hwcounters_parse_<vendor>" function + void (*hwcounters_parse)(struct ibport *); + void (*hwcounters_dorrd)(struct ibport *); + + // charts and dim + RRDSET *st_bytes; + RRDSET *st_packets; + RRDSET *st_errors; + RRDSET *st_hwpackets; + RRDSET *st_hwerrors; + + RRDSETVAR *stv_speed; + + usec_t speed_last_collected_usec; + + struct ibport *next; +} *ibport_root = NULL, *ibport_last_used = NULL; + +// VENDORS: reading / calculation functions +#define GEN_DEF_HWCOUNTER(NAME, ...) \ + uint64_t NAME; \ + char *file_##NAME; \ + RRDDIM *rd_##NAME; + +#define GEN_DO_HWCOUNTER_READ(NAME, GRP, DESC, DIR, PORT, HW, ...) \ + if (HW->file_##NAME) { \ + if (read_single_number_file(HW->file_##NAME, (unsigned long long *)&HW->NAME)) { \ + error("cannot read iface '%s' hwcounter '" #HW "'", PORT->name); \ + HW->file_##NAME = NULL; \ + } \ + } + +// VENDOR-MLX: Mellanox +struct ibporthw_mlx { + FOREACH_HWCOUNTER_MLX(GEN_DEF_HWCOUNTER) +}; +void infiniband_hwcounters_parse_mlx(struct ibport *port) +{ + if (port->do_hwerrors != CONFIG_BOOLEAN_NO) + FOREACH_HWCOUNTER_MLX_ERRORS(GEN_DO_HWCOUNTER_READ, port, port->hwcounters_mlx) + if (port->do_hwpackets != CONFIG_BOOLEAN_NO) + FOREACH_HWCOUNTER_MLX_PACKETS(GEN_DO_HWCOUNTER_READ, port, port->hwcounters_mlx) +} +void infiniband_hwcounters_dorrd_mlx(struct ibport *port) +{ + if (port->do_hwerrors != CONFIG_BOOLEAN_NO) { + FOREACH_HWCOUNTER_MLX_ERRORS(GEN_RRD_DIM_SETP_HW, port, port->hwcounters_mlx) + rrdset_done(port->st_hwerrors); + } + if (port->do_hwpackets != CONFIG_BOOLEAN_NO) { + FOREACH_HWCOUNTER_MLX_PACKETS(GEN_RRD_DIM_SETP_HW, port, port->hwcounters_mlx) + rrdset_done(port->st_hwpackets); + } +} + +// ---------------------------------------------------------------------------- + +static struct ibport *get_ibport(const char *dev, const char *port) +{ + struct ibport *p; + + char name[IBNAME_MAX + 1]; + snprintfz(name, IBNAME_MAX, "%s-%s", dev, port); + + // search it, resuming from the last position in sequence + for (p = ibport_last_used; p; p = p->next) { + if (unlikely(!strcmp(name, p->name))) { + ibport_last_used = p->next; + return p; + } + } + + // new round, from the beginning to the last position used this time + for (p = ibport_root; p != ibport_last_used; p = p->next) { + if (unlikely(!strcmp(name, p->name))) { + ibport_last_used = p->next; + return p; + } + } + + // create a new one + p = callocz(1, sizeof(struct ibport)); + p->name = strdupz(name); + p->len = strlen(p->name); + + p->chart_type_bytes = strdupz("infiniband_cnt_bytes"); + p->chart_type_packets = strdupz("infiniband_cnt_packets"); + p->chart_type_errors = strdupz("infiniband_cnt_errors"); + p->chart_type_hwpackets = strdupz("infiniband_hwc_packets"); + p->chart_type_hwerrors = strdupz("infiniband_hwc_errors"); + + char buffer[RRD_ID_LENGTH_MAX + 1]; + snprintfz(buffer, RRD_ID_LENGTH_MAX, "ib_cntbytes_%s", p->name); + p->chart_id_bytes = strdupz(buffer); + + snprintfz(buffer, RRD_ID_LENGTH_MAX, "ib_cntpackets_%s", p->name); + p->chart_id_packets = strdupz(buffer); + + snprintfz(buffer, RRD_ID_LENGTH_MAX, "ib_cnterrors_%s", p->name); + p->chart_id_errors = strdupz(buffer); + + snprintfz(buffer, RRD_ID_LENGTH_MAX, "ib_hwcntpackets_%s", p->name); + p->chart_id_hwpackets = strdupz(buffer); + + snprintfz(buffer, RRD_ID_LENGTH_MAX, "ib_hwcnterrors_%s", p->name); + p->chart_id_hwerrors = strdupz(buffer); + + p->chart_family = strdupz(p->name); + p->priority = NETDATA_CHART_PRIO_INFINIBAND; + + // Link current ibport to last one in the list + if (ibport_root) { + struct ibport *t; + for (t = ibport_root; t->next; t = t->next) + ; + t->next = p; + } else + ibport_root = p; + + return p; +} + +int do_sys_class_infiniband(int update_every, usec_t dt) +{ + (void)dt; + static SIMPLE_PATTERN *disabled_list = NULL; + static int initialized = 0; + static int enable_new_ports = -1, enable_only_active = CONFIG_BOOLEAN_YES; + static int do_bytes = -1, do_packets = -1, do_errors = -1, do_hwpackets = -1, do_hwerrors = -1; + static char *sys_class_infiniband_dirname = NULL; + + static long long int dt_to_refresh_ports = 0, last_refresh_ports_usec = 0; + + if (unlikely(enable_new_ports == -1)) { + char dirname[FILENAME_MAX + 1]; + + snprintfz(dirname, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/class/infiniband"); + sys_class_infiniband_dirname = + config_get(CONFIG_SECTION_PLUGIN_SYS_CLASS_INFINIBAND, "dirname to monitor", dirname); + + do_bytes = config_get_boolean_ondemand( + CONFIG_SECTION_PLUGIN_SYS_CLASS_INFINIBAND, "bandwidth counters", CONFIG_BOOLEAN_YES); + do_packets = config_get_boolean_ondemand( + CONFIG_SECTION_PLUGIN_SYS_CLASS_INFINIBAND, "packets counters", CONFIG_BOOLEAN_YES); + do_errors = config_get_boolean_ondemand( + CONFIG_SECTION_PLUGIN_SYS_CLASS_INFINIBAND, "errors counters", CONFIG_BOOLEAN_YES); + do_hwpackets = config_get_boolean_ondemand( + CONFIG_SECTION_PLUGIN_SYS_CLASS_INFINIBAND, "hardware packets counters", CONFIG_BOOLEAN_AUTO); + do_hwerrors = config_get_boolean_ondemand( + CONFIG_SECTION_PLUGIN_SYS_CLASS_INFINIBAND, "hardware errors counters", CONFIG_BOOLEAN_AUTO); + + enable_only_active = config_get_boolean_ondemand( + CONFIG_SECTION_PLUGIN_SYS_CLASS_INFINIBAND, "monitor only active ports", CONFIG_BOOLEAN_AUTO); + disabled_list = simple_pattern_create( + config_get(CONFIG_SECTION_PLUGIN_SYS_CLASS_INFINIBAND, "disable by default interfaces matching", ""), NULL, + SIMPLE_PATTERN_EXACT); + + dt_to_refresh_ports = + config_get_number(CONFIG_SECTION_PLUGIN_SYS_CLASS_INFINIBAND, "refresh ports state every seconds", 30) * + USEC_PER_SEC; + if (dt_to_refresh_ports < 0) + dt_to_refresh_ports = 0; + } + + // init listing of /sys/class/infiniband/ (or rediscovery) + if (unlikely(!initialized) || unlikely(last_refresh_ports_usec >= dt_to_refresh_ports)) { + // If folder does not exists, return 1 to disable + DIR *devices_dir = opendir(sys_class_infiniband_dirname); + if (unlikely(!devices_dir)) + return 1; + + // Work on all device available + struct dirent *dev_dent; + while ((dev_dent = readdir(devices_dir))) { + // Skip special folders + if (!strcmp(dev_dent->d_name, "..") || !strcmp(dev_dent->d_name, ".")) + continue; + + // /sys/class/infiniband/<dev>/ports + char ports_dirname[FILENAME_MAX + 1]; + snprintfz(ports_dirname, FILENAME_MAX, "%s/%s/%s", sys_class_infiniband_dirname, dev_dent->d_name, "ports"); + + DIR *ports_dir = opendir(ports_dirname); + if (unlikely(!ports_dir)) + continue; + + struct dirent *port_dent; + while ((port_dent = readdir(ports_dir))) { + // Skip special folders + if (!strcmp(port_dent->d_name, "..") || !strcmp(port_dent->d_name, ".")) + continue; + + char buffer[FILENAME_MAX + 1]; + + // Check if counters are availablea (mandatory) + // /sys/class/infiniband/<device>/ports/<port>/counters + char counters_dirname[FILENAME_MAX + 1]; + snprintfz(counters_dirname, FILENAME_MAX, "%s/%s/%s", ports_dirname, port_dent->d_name, "counters"); + DIR *counters_dir = opendir(counters_dirname); + // Standard counters are mandatory + if (!counters_dir) + continue; + closedir(counters_dir); + + // Hardware Counters are optionnal, used later + char hwcounters_dirname[FILENAME_MAX + 1]; + snprintfz( + hwcounters_dirname, FILENAME_MAX, "%s/%s/%s", ports_dirname, port_dent->d_name, "hw_counters"); + + // Get new or existing ibport + struct ibport *p = get_ibport(dev_dent->d_name, port_dent->d_name); + if (!p) + continue; + + // Prepare configuration + if (!p->configured) { + p->configured = 1; + + // Enable by default, will be filtered out later + p->enabled = 1; + + p->counters_path = strdupz(counters_dirname); + p->hwcounters_path = strdupz(hwcounters_dirname); + + snprintfz(buffer, FILENAME_MAX, "plugin:proc:/sys/class/infiniband:%s", p->name); + + // Standard counters + p->do_bytes = config_get_boolean_ondemand(buffer, "bytes", do_bytes); + p->do_packets = config_get_boolean_ondemand(buffer, "packets", do_packets); + p->do_errors = config_get_boolean_ondemand(buffer, "errors", do_errors); + +// Gen filename allocation and concatenation +#define GEN_DO_COUNTER_NAME(NAME, GRP, DESC, DIR, PORT, ...) \ + PORT->file_##NAME = callocz(1, strlen(PORT->counters_path) + sizeof(#NAME) + 3); \ + strcat(PORT->file_##NAME, PORT->counters_path); \ + strcat(PORT->file_##NAME, "/" #NAME); + FOREACH_COUNTER(GEN_DO_COUNTER_NAME, p) + + // Check HW Counters vendor dependent + DIR *hwcounters_dir = opendir(hwcounters_dirname); + if (hwcounters_dir) { + // By default set standard + p->do_hwpackets = config_get_boolean_ondemand(buffer, "hwpackets", do_hwpackets); + p->do_hwerrors = config_get_boolean_ondemand(buffer, "hwerrors", do_hwerrors); + +// VENDORS: Set your own + +// Allocate the chars to the filenames +#define GEN_DO_HWCOUNTER_NAME(NAME, GRP, DESC, DIR, PORT, HW, ...) \ + HW->file_##NAME = callocz(1, strlen(PORT->hwcounters_path) + sizeof(#NAME) + 3); \ + strcat(HW->file_##NAME, PORT->hwcounters_path); \ + strcat(HW->file_##NAME, "/" #NAME); + + // VENDOR-MLX: Mellanox + if (strncmp(dev_dent->d_name, "mlx", 3) == 0) { + // Allocate the vendor specific struct + p->hwcounters_mlx = callocz(1, sizeof(struct ibporthw_mlx)); + + FOREACH_HWCOUNTER_MLX(GEN_DO_HWCOUNTER_NAME, p, p->hwcounters_mlx) + + // Set the function pointer for hwcounter parsing + p->hwcounters_parse = &infiniband_hwcounters_parse_mlx; + p->hwcounters_dorrd = &infiniband_hwcounters_dorrd_mlx; + } + + // VENDOR: Unknown + else { + p->do_hwpackets = CONFIG_BOOLEAN_NO; + p->do_hwerrors = CONFIG_BOOLEAN_NO; + } + closedir(hwcounters_dir); + } + } + + // Check port state to keep activation + if (enable_only_active) { + snprintfz(buffer, FILENAME_MAX, "%s/%s/%s", ports_dirname, port_dent->d_name, "state"); + unsigned long long active; + // File is "1: DOWN" or "4: ACTIVE", but str2ull will stop on first non-decimal char + read_single_number_file(buffer, &active); + + // Want "IB_PORT_ACTIVE" == "4", as defined by drivers/infiniband/core/sysfs.c::state_show() + if (active == 4) + p->enabled = 1; + else + p->enabled = 0; + } + + if (p->enabled) + p->enabled = !simple_pattern_matches(disabled_list, p->name); + + // Check / Update the link speed & width frm "rate" file + // Sample output: "100 Gb/sec (4X EDR)" + snprintfz(buffer, FILENAME_MAX, "%s/%s/%s", ports_dirname, port_dent->d_name, "rate"); + char buffer_rate[65]; + if (read_file(buffer, buffer_rate, 64)) { + error("Unable to read '%s'", buffer); + p->width = 1; + } else { + char *buffer_width = strstr(buffer_rate, "("); + buffer_width++; + // str2ull will stop on first non-decimal value + p->speed = str2ull(buffer_rate); + p->width = str2ull(buffer_width); + } + + if (!p->discovered) + info( + "Infiniband card %s port %s at speed %lu width %lu", dev_dent->d_name, port_dent->d_name, + p->speed, p->width); + + p->discovered = 1; + } + closedir(ports_dir); + } + closedir(devices_dir); + + initialized = 1; + last_refresh_ports_usec = 0; + } + last_refresh_ports_usec += dt; + + // Update all ports values + struct ibport *port; + for (port = ibport_root; port; port = port->next) { + if (!port->enabled) + continue; + // + // Read values from system to struct + // + +// counter from file and place it in ibport struct +#define GEN_DO_COUNTER_READ(NAME, GRP, DESC, DIR, PORT, ...) \ + if (PORT->file_##NAME) { \ + if (read_single_number_file(PORT->file_##NAME, (unsigned long long *)&PORT->NAME)) { \ + error("cannot read iface '%s' counter '" #NAME "'", PORT->name); \ + PORT->file_##NAME = NULL; \ + } \ + } + + // Update charts + if (port->do_bytes != CONFIG_BOOLEAN_NO) { + // Read values from sysfs + FOREACH_COUNTER_BYTES(GEN_DO_COUNTER_READ, port) + + // First creation of RRD Set (charts) + if (unlikely(!port->st_bytes)) { + port->st_bytes = rrdset_create_localhost( + "Infiniband", + port->chart_id_bytes, + NULL, + port->chart_family, + "ib.bytes", + "Bandwidth usage", + "kilobits/s", + PLUGIN_PROC_NAME, + PLUGIN_PROC_MODULE_INFINIBAND_NAME, + port->priority + 1, + update_every, + RRDSET_TYPE_AREA); + // Create Dimensions + rrdset_flag_set(port->st_bytes, RRDSET_FLAG_DETAIL); + // On this chart, we want to have a KB/s so the dashboard will autoscale it + // The reported values are also per-lane, so we must multiply it by the width + FOREACH_COUNTER_BYTES(GEN_RRD_DIM_ADD_CUSTOM, port, 8 * port->width, 1024, RRD_ALGORITHM_INCREMENTAL) + + port->stv_speed = rrdsetvar_custom_chart_variable_create(port->st_bytes, "link_speed"); + } else + rrdset_next(port->st_bytes); + + // Link read values to dimensions + FOREACH_COUNTER_BYTES(GEN_RRD_DIM_SETP, port) + + // For link speed set only variable + rrdsetvar_custom_chart_variable_set(port->stv_speed, port->speed); + + rrdset_done(port->st_bytes); + } + + if (port->do_packets != CONFIG_BOOLEAN_NO) { + // Read values from sysfs + FOREACH_COUNTER_PACKETS(GEN_DO_COUNTER_READ, port) + + // First creation of RRD Set (charts) + if (unlikely(!port->st_packets)) { + port->st_packets = rrdset_create_localhost( + "Infiniband", + port->chart_id_packets, + NULL, + port->chart_family, + "ib.packets", + "Packets Statistics", + "packets/s", + PLUGIN_PROC_NAME, + PLUGIN_PROC_MODULE_INFINIBAND_NAME, + port->priority + 2, + update_every, + RRDSET_TYPE_AREA); + // Create Dimensions + rrdset_flag_set(port->st_packets, RRDSET_FLAG_DETAIL); + FOREACH_COUNTER_PACKETS(GEN_RRD_DIM_ADD, port) + } else + rrdset_next(port->st_packets); + + // Link read values to dimensions + FOREACH_COUNTER_PACKETS(GEN_RRD_DIM_SETP, port) + rrdset_done(port->st_packets); + } + + if (port->do_errors != CONFIG_BOOLEAN_NO) { + // Read values from sysfs + FOREACH_COUNTER_ERRORS(GEN_DO_COUNTER_READ, port) + + // First creation of RRD Set (charts) + if (unlikely(!port->st_errors)) { + port->st_errors = rrdset_create_localhost( + "Infiniband", + port->chart_id_errors, + NULL, + port->chart_family, + "ib.errors", + "Error Counters", + "errors/s", + PLUGIN_PROC_NAME, + PLUGIN_PROC_MODULE_INFINIBAND_NAME, + port->priority + 3, + update_every, + RRDSET_TYPE_LINE); + // Create Dimensions + rrdset_flag_set(port->st_errors, RRDSET_FLAG_DETAIL); + FOREACH_COUNTER_ERRORS(GEN_RRD_DIM_ADD, port) + } else + rrdset_next(port->st_errors); + + // Link read values to dimensions + FOREACH_COUNTER_ERRORS(GEN_RRD_DIM_SETP, port) + rrdset_done(port->st_errors); + } + + // + // HW Counters + // + + // Call the function for parsing and setting hwcounters + if (port->hwcounters_parse && port->hwcounters_dorrd) { + // Read all values (done by vendor-specific function) + (*port->hwcounters_parse)(port); + + if (port->do_hwerrors != CONFIG_BOOLEAN_NO) { + // First creation of RRD Set (charts) + if (unlikely(!port->st_hwerrors)) { + port->st_hwerrors = rrdset_create_localhost( + "Infiniband", + port->chart_id_hwerrors, + NULL, + port->chart_family, + "ib.hwerrors", + "Hardware Errors", + "errors/s", + PLUGIN_PROC_NAME, + PLUGIN_PROC_MODULE_INFINIBAND_NAME, + port->priority + 4, + update_every, + RRDSET_TYPE_LINE); + + rrdset_flag_set(port->st_hwerrors, RRDSET_FLAG_DETAIL); + + // VENDORS: Set your selection + + // VENDOR: Mellanox + if (strncmp(port->name, "mlx", 3) == 0) { + FOREACH_HWCOUNTER_MLX_ERRORS(GEN_RRD_DIM_ADD_HW, port, port->hwcounters_mlx) + } + + // Unknown vendor, should not happen + else { + error( + "Unmanaged vendor for '%s', do_hwerrors should have been set to no. Please report this bug", + port->name); + port->do_hwerrors = CONFIG_BOOLEAN_NO; + } + } else + rrdset_next(port->st_hwerrors); + } + + if (port->do_hwpackets != CONFIG_BOOLEAN_NO) { + // First creation of RRD Set (charts) + if (unlikely(!port->st_hwpackets)) { + port->st_hwpackets = rrdset_create_localhost( + "Infiniband", + port->chart_id_hwpackets, + NULL, + port->chart_family, + "ib.hwpackets", + "Hardware Packets Statistics", + "packets/s", + PLUGIN_PROC_NAME, + PLUGIN_PROC_MODULE_INFINIBAND_NAME, + port->priority + 5, + update_every, + RRDSET_TYPE_LINE); + + rrdset_flag_set(port->st_hwpackets, RRDSET_FLAG_DETAIL); + + // VENDORS: Set your selection + + // VENDOR: Mellanox + if (strncmp(port->name, "mlx", 3) == 0) { + FOREACH_HWCOUNTER_MLX_PACKETS(GEN_RRD_DIM_ADD_HW, port, port->hwcounters_mlx) + } + + // Unknown vendor, should not happen + else { + error( + "Unmanaged vendor for '%s', do_hwpackets should have been set to no. Please report this bug", + port->name); + port->do_hwpackets = CONFIG_BOOLEAN_NO; + } + } else + rrdset_next(port->st_hwpackets); + } + + // Update values to rrd (done by vendor-specific function) + (*port->hwcounters_dorrd)(port); + } + } + + return 0; +} |